This project has retired. For details please refer to its Attic page.
AbstractRDFaExtractorTestCase xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.rdfa;
19  
20  import org.apache.any23.extractor.html.AbstractExtractorTestCase;
21  import org.apache.any23.rdf.RDFUtils;
22  import org.apache.any23.vocab.DCTerms;
23  import org.apache.any23.vocab.FOAF;
24  import org.junit.Test;
25  import org.slf4j.Logger;
26  import org.slf4j.LoggerFactory;
27  
28  /**
29   * This class provides a common set of tests for an <i>RDFa</i> extractor.
30   *
31   * @author Michele Mostarda (mostarda@fbk.eu)
32   */
33  public abstract class AbstractRDFaExtractorTestCase extends AbstractExtractorTestCase {
34  
35      protected static final DCTerms vDCTERMS = DCTerms.getInstance();
36      protected static final FOAF vFOAF = FOAF.getInstance();
37  
38      Logger logger = LoggerFactory.getLogger(RDFaExtractorTest.class);
39  
40      /**
41       * Verify the basic RDFa support.
42       *
43       * @throws Exception
44       *             if there is an issue asserting test values.
45       */
46      @Test
47      public void testBasic() throws Exception {
48          assertExtract("/html/rdfa/basic.html");
49          logger.info(dumpModelToNQuads());
50          assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice", "en"));
51          assertContains(null, vDCTERMS.title, RDFUtils.literal("The trouble with Bob", "en"));
52          assertContains(null, RDFUtils.iri("http://fake.org/prop"), RDFUtils.literal("Mary", "en"));
53      }
54  
55      /**
56       * This test check if the <a href="https://www.w3.org/TR/rdfa-core/#s_curieprocessing">RDFa1.1 CURIEs</a> expansion
57       * is correct and backward compatible with <a href="http://www.w3.org/TR/rdfa-syntax/#s_curieprocessing">RDFa
58       * 1.0</a>.
59       *
60       * @throws Exception
61       *             if there is an issue asserting test values.
62       */
63      @Test
64      public void testRDFa11CURIEs() throws Exception {
65          assertExtract("/html/rdfa/rdfa-11-curies.html");
66          assertModelNotEmpty();
67          assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
68                  RDFUtils.iri("http://dbpedia.org/name"), RDFUtils.literal("Albert Einstein"));
69          assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
70                  RDFUtils.iri("http://dbpedia.org/knows"),
71                  RDFUtils.iri("http://dbpedia.org/resource/Franklin_Roosevlet"));
72          assertContains(RDFUtils.iri("http://database.org/table/Departments"),
73                  RDFUtils.iri("http://database.org/description"), RDFUtils.literal("Tables listing departments"));
74          assertContains(RDFUtils.iri("http://database.org/table/Departments"), RDFUtils.iri("http://database.org/owner"),
75                  RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
76          assertContains(RDFUtils.iri("http://database.org/table/Departments"),
77                  RDFUtils.iri("http://xmlns.com/foaf/0.1/author"),
78                  RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
79          assertContains(RDFUtils.iri("http://database.org/table/Departments"),
80                  RDFUtils.iri("http://purl.org/dc/01/name"), RDFUtils.literal("Departments"));
81          assertStatementsSize(null, null, null, 6);
82          logger.debug(dumpHumanReadableTriples());
83      }
84  
85      /**
86       * This test checks if the subject of a property modeled as <i>RDFa</i> in a <i>XHTML</i> document where the subject
87       * contains inner <i>XML</i> tags is represented as a plain <i>Literal</i> stripping all the inner tags. For details
88       * see the <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa in XHTML: Syntax and Processing</a> recommendation.
89       *
90       * @throws Exception
91       *             if there an error asserting test values.
92       */
93      @Test
94      public void testEmptyDatatypeDeclarationWithInnerXMLTags() throws Exception {
95          assertExtract("/html/rdfa/null-datatype-test.html");
96          logger.debug(dumpModelToRDFXML());
97  
98          assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"), vFOAF.name,
99                  RDFUtils.literal("Albert Einstein", "en"));
100 
101     }
102 
103     /**
104      * This test checks if the <i>RDF</i> extraction is compliant to the
105      * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa in XHTML: Syntax and Processing</a> specification against the
106      * <a href="http://files.openspring.net/tmp/drupal-test-frontpage.html">Drupal test page</a>.
107      *
108      * @throws Exception
109      *             if there an error asserting test values.
110      */
111     @Test
112     public void testDrupalTestPage() throws Exception {
113         assertExtract("/html/rdfa/drupal-test-frontpage.html");
114         logger.debug(dumpModelToTurtle());
115         assertContains(RDFUtils.iri("http://bob.example.com/node/3"), vDCTERMS.title,
116                 RDFUtils.literal("A blog post...", "en"));
117     }
118 
119     /**
120      * See RDFa 1.1 Specification section 6.2 .
121      *
122      * @throws Exception
123      *             if there an error asserting test values.
124      */
125     @Test
126     public void testIncompleteTripleManagement() throws Exception {
127         assertExtract("/html/rdfa/incomplete-triples.html");
128         logger.debug(dumpModelToTurtle());
129 
130         assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
131                 RDFUtils.iri("http://dbpedia.org/property/birthPlace"),
132                 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
133         assertContains(RDFUtils.iri("http://dbpedia.org/resource/Germany"),
134                 RDFUtils.iri("http://dbpedia.org/property/conventionalLongName"),
135                 RDFUtils.literal("Federal Republic of Germany"));
136         assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
137                 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
138                 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
139         assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
140                 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
141                 RDFUtils.iri("http://dbpedia.org/resource/United_States"));
142     }
143 
144 }