1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */17package org.apache.any23.extractor.html;
1819import org.apache.any23.extractor.ExtractorFactory;
20import org.apache.any23.extractor.IssueReport;
21import org.apache.any23.rdf.RDFUtils;
22import org.apache.any23.vocab.FOAF;
23import org.junit.Test;
2425/**26 * @author lmcgibbn27 *28 */29publicclassEmbeddedJSONLDExtractorTestextendsAbstractExtractorTestCase {
3031 @Test
32publicvoid testEmbeddedJSONLDInHead() throws Exception {
33 assertExtract("/html/html-embedded-jsonld-extractor.html");
34 assertModelNotEmpty();
35 assertStatementsSize(null, null, null, 3);
36 }
3738 @Test
39publicvoid testSeveralEmbeddedJSONLDInHead() throws Exception {
40 assertExtract("/html/html-embedded-jsonld-extractor-multiple.html");
41 assertModelNotEmpty();
42 assertStatementsSize(null, null, null, 7);
43 }
4445 @Test
46publicvoid testEmbeddedJSONLDInBody() throws Exception {
47 assertExtract("/html/html-body-embedded-jsonld-extractor.html");
48 assertModelNotEmpty();
49 assertStatementsSize(null, null, null, 3);
50 }
5152 @Test
53publicvoid testEmbeddedJSONLDInHeadAndBody() throws Exception {
54 assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html");
55 assertModelNotEmpty();
56 assertStatementsSize(null, null, null, 7);
57 }
5859 @Test
60publicvoid testJSONLDCommentStripping() throws Exception {
61 assertExtract("/html/html-jsonld-strip-comments.html");
62 assertModelNotEmpty();
63 assertStatementsSize(null, null, null, 3);
64 assertContains(RDFUtils.iri(FOAF.NS, "name"), "Robert\\\" Millar\\\\\"\"\\\\");
65 }
6667 @Test
68publicvoid testJSONLDCommaNormalization() {
69 assertExtract("/html/html-jsonld-commas.html");
70 assertModelNotEmpty();
71 assertStatementsSize(null, null, null, 30);
72 }
7374 @Test
75publicvoid testJSONLDUnescapedCharacters() {
76 assertExtract("/html/html-jsonld-unescaped-characters.html");
77 assertModelNotEmpty();
78 assertStatementsSize(null, null, null, 375);
79 assertContains(RDFUtils.iri("http://schema.org/name"), "Weezer & Pixies\u0008");
80 assertContains(RDFUtils.iri("http://schema.org/description"),
81"#1 MAGIC SHOW IN L.A.\nThe current WINNER of the CW’s Penn & Teller’s FOOL US, Illusionist "82 + "extraordinaire Ivan Amodei is on a national tour with his show INTIMATE ILLUSIONS."83 + "\n\nCurrently, on an ei...");
84 }
8586 @Test
87publicvoid testJSONLDFatalError() {
88 assertExtract("/html/html-jsonld-fatal-error.html", false);
89 assertIssue(IssueReport.IssueLevel.FATAL,
90".*Unexpected character .* was expecting comma to separate Object entries.*");
91 assertStatementsSize(null, null, null, 4);
92 }
9394 @Test
95publicvoid testJSONLDBadCharacter() throws Exception {
96 assertExtract("/html/html-jsonld-bad-character.html");
97 assertStatementsSize(null, null, null, 12);
98 }
99100 @Override
101protected ExtractorFactory<?> getExtractorFactory() {
102returnnew EmbeddedJSONLDExtractorFactory();
103 }
104105 }