This project has retired. For details please refer to its Attic page.
CSVExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.csv;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.extractor.html.AbstractExtractorTestCase;
22  import org.apache.any23.vocab.CSV;
23  import org.junit.Test;
24  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
25  import org.eclipse.rdf4j.model.vocabulary.RDF;
26  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
27  import org.slf4j.Logger;
28  import org.slf4j.LoggerFactory;
29  
30  /**
31   * Reference test case for {@link CSVExtractor}.
32   *
33   * @author Davide Palmisano ( dpalmisano@gmail.com )
34   */
35  public class CSVExtractorTest extends AbstractExtractorTestCase {
36  
37      private static final Logger logger = LoggerFactory.getLogger(CSVExtractorTest.class);
38  
39      @Override
40      protected ExtractorFactory<?> getExtractorFactory() {
41          return new CSVExtractorFactory();
42      }
43  
44      @Test
45      public void testExtractionCommaSeparated() throws Exception {
46          CSV csv = CSV.getInstance();
47          assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
48          logger.debug(dumpModelToRDFXML());
49  
50          assertModelNotEmpty();
51          assertStatementsSize(null, null, null, 28);
52          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
53          assertContains(null, csv.numberOfColumns,
54                  SimpleValueFactory.getInstance().createLiteral("4", XMLSchema.INTEGER));
55          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XMLSchema.INTEGER));
56      }
57  
58      @Test
59      public void testExtractionSemicolonSeparated() throws Exception {
60          CSV csv = CSV.getInstance();
61          assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
62          logger.debug(dumpModelToRDFXML());
63  
64          assertModelNotEmpty();
65          assertStatementsSize(null, null, null, 28);
66          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
67          assertContains(null, csv.numberOfColumns,
68                  SimpleValueFactory.getInstance().createLiteral("4", XMLSchema.INTEGER));
69          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XMLSchema.INTEGER));
70      }
71  
72      @Test
73      public void testExtractionTabSeparated() throws Exception {
74          CSV csv = CSV.getInstance();
75          assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
76          logger.debug(dumpModelToRDFXML());
77  
78          assertModelNotEmpty();
79          assertStatementsSize(null, null, null, 28);
80          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
81          assertContains(null, csv.numberOfColumns,
82                  SimpleValueFactory.getInstance().createLiteral("4", XMLSchema.INTEGER));
83          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XMLSchema.INTEGER));
84      }
85  
86      @Test
87      public void testTypeManagement() throws Exception {
88          CSV csv = CSV.getInstance();
89          assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
90          logger.debug(dumpModelToRDFXML());
91  
92          assertModelNotEmpty();
93          assertStatementsSize(null, null, null, 21);
94          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
95          assertContains(null, csv.numberOfColumns,
96                  SimpleValueFactory.getInstance().createLiteral("2", XMLSchema.INTEGER));
97          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XMLSchema.INTEGER));
98          assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
99          assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
100         assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
101     }
102 
103     @Test
104     public void testExtractionEmptyValue() throws Exception {
105         CSV csv = CSV.getInstance();
106         assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
107         logger.debug(dumpModelToRDFXML());
108 
109         assertModelNotEmpty();
110         assertStatementsSize(null, null, null, 25);
111         assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
112         assertContains(null, csv.numberOfColumns,
113                 SimpleValueFactory.getInstance().createLiteral("4", XMLSchema.INTEGER));
114         assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XMLSchema.INTEGER));
115         assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
116         assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
117     }
118 
119 }