1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.extractor.rdfa;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.junit.Assert;
22 import org.junit.Test;
23 import org.eclipse.rdf4j.model.Statement;
24 import org.eclipse.rdf4j.repository.RepositoryException;
25 import org.eclipse.rdf4j.rio.RDFHandlerException;
26 import org.eclipse.rdf4j.rio.RDFParseException;
27
28 import java.io.IOException;
29 import java.util.List;
30
31 /**
32 * Reference Test Class for {@link RDFaExtractor}.
33 */
34 public class RDFaExtractorTest extends AbstractRDFaExtractorTestCase {
35
36 /**
37 * Taken from the <a href="http://www.heppnetz.de/rdfa4google/testcases.html">GoodRelations test cases</a>. It
38 * checks if the extraction is the same when the namespaces are defined in <i>RDFa1.0</i> or <i>RDFa1.1</i>
39 * respectively.
40 *
41 * @throws org.eclipse.rdf4j.repository.RepositoryException
42 * if an error is encountered whilst loading content from a storage connection
43 * @throws java.io.IOException
44 * if there is an error interpreting the input data
45 * @throws org.eclipse.rdf4j.rio.RDFHandlerException
46 * if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
47 * @throws org.eclipse.rdf4j.rio.RDFParseException
48 * if there is an exception parsing an RDF Stream
49 */
50 @Test
51 public void testRDFa11PrefixBackwardCompatibility()
52 throws RepositoryException, RDFHandlerException, IOException, RDFParseException {
53 final int EXPECTED_STATEMENTS = 31;
54
55 assertExtract("/html/rdfa/goodrelations-rdfa10.html");
56 logger.debug("Model 1 " + dumpHumanReadableTriples());
57 Assert.assertEquals(EXPECTED_STATEMENTS, dumpAsListOfStatements().size());
58 List<Statement> rdfa10Stmts = dumpAsListOfStatements();
59
60 // assertContainsModel("/html/rdfa/goodrelations-rdfa10-expected.nq");
61
62 assertExtract("/html/rdfa/goodrelations-rdfa11.html");
63 logger.debug("Model 2 " + dumpHumanReadableTriples());
64 Assert.assertTrue(dumpAsListOfStatements().size() >= EXPECTED_STATEMENTS);
65
66 for (Statement stmt : rdfa10Stmts) {
67 assertContains(stmt);
68 }
69 }
70
71 @Test
72 public void testRDFa11CURIEs() throws Exception {
73 }
74
75 /**
76 * Tests that the default parser settings enable tolerance in data type parsing.
77 */
78 @Test
79 public void testTolerantParsing() {
80 assertExtract("/html/rdfa/oreilly-invalid-datatype.html");
81 }
82
83 @Override
84 protected ExtractorFactory<?> getExtractorFactory() {
85 return new RDFaExtractorFactory();
86 }
87
88 }