This project has retired. For details please refer to its Attic page.
RDFaExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.rdfa;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.junit.Assert;
22  import org.junit.Test;
23  import org.eclipse.rdf4j.model.Statement;
24  import org.eclipse.rdf4j.repository.RepositoryException;
25  import org.eclipse.rdf4j.rio.RDFHandlerException;
26  import org.eclipse.rdf4j.rio.RDFParseException;
27  
28  import java.io.IOException;
29  import java.util.List;
30  
31  /**
32   * Reference Test Class for {@link RDFaExtractor}.
33   */
34  public class RDFaExtractorTest extends AbstractRDFaExtractorTestCase {
35  
36      /**
37       * Taken from the <a href="http://www.heppnetz.de/rdfa4google/testcases.html">GoodRelations test cases</a>. It
38       * checks if the extraction is the same when the namespaces are defined in <i>RDFa1.0</i> or <i>RDFa1.1</i>
39       * respectively.
40       *
41       * @throws org.eclipse.rdf4j.repository.RepositoryException
42       *             if an error is encountered whilst loading content from a storage connection
43       * @throws java.io.IOException
44       *             if there is an error interpreting the input data
45       * @throws org.eclipse.rdf4j.rio.RDFHandlerException
46       *             if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
47       * @throws org.eclipse.rdf4j.rio.RDFParseException
48       *             if there is an exception parsing an RDF Stream
49       */
50      @Test
51      public void testRDFa11PrefixBackwardCompatibility()
52              throws RepositoryException, RDFHandlerException, IOException, RDFParseException {
53          final int EXPECTED_STATEMENTS = 31;
54  
55          assertExtract("/html/rdfa/goodrelations-rdfa10.html");
56          logger.debug("Model 1 " + dumpHumanReadableTriples());
57          Assert.assertEquals(EXPECTED_STATEMENTS, dumpAsListOfStatements().size());
58          List<Statement> rdfa10Stmts = dumpAsListOfStatements();
59  
60          // assertContainsModel("/html/rdfa/goodrelations-rdfa10-expected.nq");
61  
62          assertExtract("/html/rdfa/goodrelations-rdfa11.html");
63          logger.debug("Model 2 " + dumpHumanReadableTriples());
64          Assert.assertTrue(dumpAsListOfStatements().size() >= EXPECTED_STATEMENTS);
65  
66          for (Statement stmt : rdfa10Stmts) {
67              assertContains(stmt);
68          }
69      }
70  
71      @Test
72      public void testRDFa11CURIEs() throws Exception {
73      }
74  
75      /**
76       * Tests that the default parser settings enable tolerance in data type parsing.
77       */
78      @Test
79      public void testTolerantParsing() {
80          assertExtract("/html/rdfa/oreilly-invalid-datatype.html");
81      }
82  
83      @Override
84      protected ExtractorFactory<?> getExtractorFactory() {
85          return new RDFaExtractorFactory();
86      }
87  
88  }