1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */1718package org.apache.any23.extractor.html;
1920import org.apache.any23.extractor.ExtractorFactory;
21import org.apache.any23.vocab.SINDICE;
22import org.junit.Test;
23import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
2425/**26 * Reference Test class for the {@link HTMLMetaExtractor} extractor.27 *28 * @author Davide Palmisano ( dpalmisano@gmail.com )29 */30publicclassHTMLMetaExtractorTestextendsAbstractExtractorTestCase {
3132privatestaticfinal SINDICE vSINDICE = SINDICE.getInstance();
3334protected ExtractorFactory<?> getExtractorFactory() {
35returnnew HTMLMetaExtractorFactory();
36 }
3738 @Test
39publicvoid testExtractPageMeta() throws Exception {
40 assertExtract("/html/html-head-meta-extractor.html");
41 assertModelNotEmpty();
42 assertStatementsSize(null, null, null, 10);
43 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
44 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/title"),
45"XHTML+RDFa example", "en");
46 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
47 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/language"), "en", "en");
48 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
49 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/subject"),
50"XHTML+RDFa, semantic web", "en");
51 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
52 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/format"),
53"application/xhtml+xml", "en");
54 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
55 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/description"),
56"Example for Extensible Hypertext Markup Language + Resource Description Framework – in – attributes.",
57"en");
58 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
59 SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "robots"), "index, follow",
60"en");
61 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
62 SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "content-language"), "en",
63"en");
64 }
6566 @Test
67publicvoid testNoMeta() throws Exception {
68 assertExtract("/html/html-head-link-extractor.html");
69 assertModelEmpty();
70 }
7172 @Test
73publicvoid testExtractPageMetaWithExtensionsPerMozillaSpecification() throws Exception {
74 assertExtract("/html/html-head-meta-extractor-with-mozilla-extensions.html");
75 assertModelNotEmpty();
76 assertStatementsSize(null, null, null, 2);
77 }
7879 }