This project has retired. For details please refer to its Attic page.
HTMLMetaExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.vocab.SINDICE;
22  import org.junit.Test;
23  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
24  
25  /**
26   * Reference Test class for the {@link HTMLMetaExtractor} extractor.
27   *
28   * @author Davide Palmisano ( dpalmisano@gmail.com )
29   */
30  public class HTMLMetaExtractorTest extends AbstractExtractorTestCase {
31  
32      private static final SINDICE vSINDICE = SINDICE.getInstance();
33  
34      protected ExtractorFactory<?> getExtractorFactory() {
35          return new HTMLMetaExtractorFactory();
36      }
37  
38      @Test
39      public void testExtractPageMeta() throws Exception {
40          assertExtract("/html/html-head-meta-extractor.html");
41          assertModelNotEmpty();
42          assertStatementsSize(null, null, null, 10);
43          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
44                  SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/title"),
45                  "XHTML+RDFa example", "en");
46          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
47                  SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/language"), "en", "en");
48          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
49                  SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/subject"),
50                  "XHTML+RDFa, semantic web", "en");
51          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
52                  SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/format"),
53                  "application/xhtml+xml", "en");
54          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
55                  SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/description"),
56                  "Example for Extensible Hypertext Markup Language + Resource Description Framework – in – attributes.",
57                  "en");
58          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
59                  SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "robots"), "index, follow",
60                  "en");
61          assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
62                  SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "content-language"), "en",
63                  "en");
64      }
65  
66      @Test
67      public void testNoMeta() throws Exception {
68          assertExtract("/html/html-head-link-extractor.html");
69          assertModelEmpty();
70      }
71  
72      @Test
73      public void testExtractPageMetaWithExtensionsPerMozillaSpecification() throws Exception {
74          assertExtract("/html/html-head-meta-extractor-with-mozilla-extensions.html");
75          assertModelNotEmpty();
76          assertStatementsSize(null, null, null, 2);
77      }
78  
79  }