1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */1718package org.apache.any23.extractor.rdfa;
1920import org.apache.any23.extractor.html.AbstractExtractorTestCase;
21import org.apache.any23.rdf.RDFUtils;
22import org.apache.any23.vocab.DCTerms;
23import org.apache.any23.vocab.FOAF;
24import org.junit.Test;
25import org.slf4j.Logger;
26import org.slf4j.LoggerFactory;
2728/**29 * This class provides a common set of tests for an <i>RDFa</i> extractor.30 *31 * @author Michele Mostarda (mostarda@fbk.eu)32 */33publicabstractclassAbstractRDFaExtractorTestCaseextendsAbstractExtractorTestCase {
3435protectedstaticfinal DCTerms vDCTERMS = DCTerms.getInstance();
36protectedstaticfinal FOAF vFOAF = FOAF.getInstance();
3738 Logger logger = LoggerFactory.getLogger(RDFaExtractorTest.class);
3940/**41 * Verify the basic RDFa support.42 *43 * @throws Exception44 * if there is an issue asserting test values.45 */46 @Test
47publicvoid testBasic() throws Exception {
48 assertExtract("/html/rdfa/basic.html");
49 logger.info(dumpModelToNQuads());
50 assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice", "en"));
51 assertContains(null, vDCTERMS.title, RDFUtils.literal("The trouble with Bob", "en"));
52 assertContains(null, RDFUtils.iri("http://fake.org/prop"), RDFUtils.literal("Mary", "en"));
53 }
5455/**56 * This test check if the <a href="https://www.w3.org/TR/rdfa-core/#s_curieprocessing">RDFa1.1 CURIEs</a> expansion57 * is correct and backward compatible with <a href="http://www.w3.org/TR/rdfa-syntax/#s_curieprocessing">RDFa58 * 1.0</a>.59 *60 * @throws Exception61 * if there is an issue asserting test values.62 */63 @Test
64publicvoid testRDFa11CURIEs() throws Exception {
65 assertExtract("/html/rdfa/rdfa-11-curies.html");
66 assertModelNotEmpty();
67 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
68 RDFUtils.iri("http://dbpedia.org/name"), RDFUtils.literal("Albert Einstein"));
69 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
70 RDFUtils.iri("http://dbpedia.org/knows"),
71 RDFUtils.iri("http://dbpedia.org/resource/Franklin_Roosevlet"));
72 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
73 RDFUtils.iri("http://database.org/description"), RDFUtils.literal("Tables listing departments"));
74 assertContains(RDFUtils.iri("http://database.org/table/Departments"), RDFUtils.iri("http://database.org/owner"),
75 RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
76 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
77 RDFUtils.iri("http://xmlns.com/foaf/0.1/author"),
78 RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
79 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
80 RDFUtils.iri("http://purl.org/dc/01/name"), RDFUtils.literal("Departments"));
81 assertStatementsSize(null, null, null, 6);
82 logger.debug(dumpHumanReadableTriples());
83 }
8485/**86 * This test checks if the subject of a property modeled as <i>RDFa</i> in a <i>XHTML</i> document where the subject87 * contains inner <i>XML</i> tags is represented as a plain <i>Literal</i> stripping all the inner tags. For details88 * see the <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa in XHTML: Syntax and Processing</a> recommendation.89 *90 * @throws Exception91 * if there an error asserting test values.92 */93 @Test
94publicvoid testEmptyDatatypeDeclarationWithInnerXMLTags() throws Exception {
95 assertExtract("/html/rdfa/null-datatype-test.html");
96 logger.debug(dumpModelToRDFXML());
9798 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"), vFOAF.name,
99 RDFUtils.literal("Albert Einstein", "en"));
100101 }
102103/**104 * This test checks if the <i>RDF</i> extraction is compliant to the105 * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa in XHTML: Syntax and Processing</a> specification against the106 * <a href="http://files.openspring.net/tmp/drupal-test-frontpage.html">Drupal test page</a>.107 *108 * @throws Exception109 * if there an error asserting test values.110 */111 @Test
112publicvoid testDrupalTestPage() throws Exception {
113 assertExtract("/html/rdfa/drupal-test-frontpage.html");
114 logger.debug(dumpModelToTurtle());
115 assertContains(RDFUtils.iri("http://bob.example.com/node/3"), vDCTERMS.title,
116 RDFUtils.literal("A blog post...", "en"));
117 }
118119/**120 * See RDFa 1.1 Specification section 6.2 .121 *122 * @throws Exception123 * if there an error asserting test values.124 */125 @Test
126publicvoid testIncompleteTripleManagement() throws Exception {
127 assertExtract("/html/rdfa/incomplete-triples.html");
128 logger.debug(dumpModelToTurtle());
129130 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
131 RDFUtils.iri("http://dbpedia.org/property/birthPlace"),
132 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
133 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Germany"),
134 RDFUtils.iri("http://dbpedia.org/property/conventionalLongName"),
135 RDFUtils.literal("Federal Republic of Germany"));
136 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
137 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
138 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
139 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
140 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
141 RDFUtils.iri("http://dbpedia.org/resource/United_States"));
142 }
143144 }