This project has retired. For details please refer to its Attic page.
ExcelExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.plugin.officescraper;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.apache.any23.extractor.ExtractionException;
22  import org.apache.any23.extractor.ExtractionParameters;
23  import org.apache.any23.extractor.ExtractionResult;
24  import org.apache.any23.extractor.ExtractionResultImpl;
25  import org.apache.any23.rdf.RDFUtils;
26  import org.apache.any23.vocab.Excel;
27  import org.apache.any23.writer.CompositeTripleHandler;
28  import org.apache.any23.writer.CountingTripleHandler;
29  import org.apache.any23.writer.NTriplesWriter;
30  import org.apache.any23.writer.TripleHandler;
31  import org.apache.any23.writer.TripleHandlerException;
32  import org.junit.Assert;
33  import org.junit.Before;
34  import org.junit.Test;
35  import org.mockito.Mockito;
36  import org.eclipse.rdf4j.model.IRI;
37  import org.eclipse.rdf4j.model.vocabulary.RDF;
38  import org.slf4j.Logger;
39  import org.slf4j.LoggerFactory;
40  
41  import java.io.ByteArrayOutputStream;
42  import java.io.IOException;
43  import java.io.InputStream;
44  
45  /**
46   * Test case for {@link ExcelExtractor}.
47   *
48   * @author Michele Mostarda (mostarda@fbk.eu)
49   */
50  public class ExcelExtractorTest {
51  
52      private static final Logger logger = LoggerFactory.getLogger(ExcelExtractorTest.class);
53  
54      private ExcelExtractor extractor;
55  
56      @Before
57      public void setUp() {
58          extractor = new ExcelExtractorFactory().createExtractor();
59      }
60  
61      @Test
62      public void testGetDescription() {
63          Assert.assertNotNull( extractor.getDescription() );
64      }
65  
66      @Test
67      public void testExtractXLSX() throws IOException, ExtractionException, TripleHandlerException {
68          final String FILE = "test1-workbook.xlsx";
69          processFile(FILE);
70      }
71  
72      @Test
73      public void testExtractXLS() throws IOException, ExtractionException, TripleHandlerException {
74          final String FILE = "test2-workbook.xls";
75          processFile(FILE);
76      }
77  
78      private void processFile(String resource) throws IOException, ExtractionException, TripleHandlerException {
79          final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
80          final ExtractionContext extractionContext = new ExtractionContext(
81                  extractor.getDescription().getExtractorName(),
82                  RDFUtils.iri("file://" + resource)
83          );
84          final InputStream is = this.getClass().getResourceAsStream(resource);
85          final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
86          final TripleHandler verifierTripleHandler = Mockito.mock(TripleHandler.class);
87          compositeTripleHandler.addChild(verifierTripleHandler);
88          final CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
89          compositeTripleHandler.addChild(countingTripleHandler);
90          final ByteArrayOutputStream out = new ByteArrayOutputStream();
91          compositeTripleHandler.addChild( new NTriplesWriter(out) );
92          final ExtractionResult extractionResult = new ExtractionResultImpl(
93                  extractionContext, extractor, compositeTripleHandler
94          );
95          extractor.run(extractionParameters, extractionContext, is, extractionResult);
96          compositeTripleHandler.close();
97          logger.debug(out.toString());
98  
99          verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsSheet, 2 );
100         verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsRow  , 6 );
101         verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsCell , 18);
102 
103         verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().sheet, 2 );
104         verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().row  , 6 );
105         verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().cell , 18);
106     }
107 
108     private void verifyPredicateOccurrence(TripleHandler mock, IRI predicate, int occurrence)
109     throws TripleHandlerException {
110         Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
111                 Mockito.any(),
112                 Mockito.eq(predicate),
113                 Mockito.any(),
114                 Mockito.any(),
115                 Mockito.any()
116         );
117     }
118 
119     private void verifyTypeOccurrence(TripleHandler mock, IRI type, int occurrence)
120     throws TripleHandlerException {
121         Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
122                 Mockito.any(),
123                 Mockito.eq(RDF.TYPE),
124                 Mockito.eq(type),
125                 Mockito.any(),
126                 Mockito.any()
127         );
128     }
129 
130 }