1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */1718package org.apache.any23.extractor.html;
1920import org.apache.any23.extractor.ExtractorFactory;
21import org.apache.any23.rdf.RDFUtils;
22import org.apache.any23.vocab.DCTerms;
23import org.apache.any23.vocab.SINDICE;
24import org.junit.Test;
25import org.eclipse.rdf4j.model.Literal;
26import org.eclipse.rdf4j.repository.RepositoryException;
2728/**29 * Reference Test class for the {@link TitleExtractor} extractor.30 * 31 */32publicclassTitleExtractorTestextendsAbstractExtractorTestCase {
3334privatestaticfinal DCTerms vDCTERMS = DCTerms.getInstance();
35privatestaticfinal SINDICE vSINDICE = SINDICE.getInstance();
3637private Literal helloLiteral = RDFUtils.literal("Hello World!");
3839 @Override
40protected ExtractorFactory<?> getExtractorFactory() {
41returnnew TitleExtractorFactory();
42 }
4344 @Test
45publicvoid testExtractPageTitle() throws RepositoryException {
46 assertExtract("/microformats/xfn/simple-me.html");
47 assertContains(baseIRI, vDCTERMS.title, helloLiteral);
48 }
4950 @Test
51publicvoid testStripSpacesFromTitle() throws RepositoryException {
52 assertExtract("/microformats/xfn/strip-spaces.html");
53 assertContains(baseIRI, vDCTERMS.title, helloLiteral);
54 }
5556 @Test
57publicvoid testNoPageTitle() throws RepositoryException {
58 assertExtract("/microformats/xfn/tagsoup.html");
59 assertModelEmpty();
60 }
6162 @Test
63publicvoid testMixedCaseTitleTag() throws RepositoryException {
64 assertExtract("/microformats/xfn/mixed-case.html");
65 assertContains(baseIRI, vDCTERMS.title, helloLiteral);
66 }
6768/**69 * This test verifies that when present the default language this is adopted by the title literal.70 * 71 * @throws org.eclipse.rdf4j.repository.RepositoryException72 * if an error is encountered whilst loading content from a storage connection73 */74 @Test
75publicvoid testTitleWithDefaultLanguage() throws RepositoryException {
76 assertExtract("/html/default-language.html");
77 assertContains(baseIRI, vDCTERMS.title, RDFUtils.literal("Welcome to mydomain.net", "en"));
78 assertNotContains(baseIRI, vDCTERMS.title, RDFUtils.literal("Welcome to mydomain.net", (String) null));
79 }
8081 }