This project has retired. For details please refer to its Attic page.
EmbeddedJSONLDExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.any23.extractor.html;
18  
19  import org.apache.any23.extractor.ExtractorFactory;
20  import org.apache.any23.extractor.IssueReport;
21  import org.apache.any23.rdf.RDFUtils;
22  import org.apache.any23.vocab.FOAF;
23  import org.junit.Test;
24  
25  /**
26   * @author lmcgibbn
27   *
28   */
29  public class EmbeddedJSONLDExtractorTest extends AbstractExtractorTestCase {
30  
31      @Test
32      public void testEmbeddedJSONLDInHead() throws Exception {
33          assertExtract("/html/html-embedded-jsonld-extractor.html");
34          assertModelNotEmpty();
35          assertStatementsSize(null, null, null, 3);
36      }
37  
38      @Test
39      public void testSeveralEmbeddedJSONLDInHead() throws Exception {
40          assertExtract("/html/html-embedded-jsonld-extractor-multiple.html");
41          assertModelNotEmpty();
42          assertStatementsSize(null, null, null, 7);
43      }
44  
45      @Test
46      public void testEmbeddedJSONLDInBody() throws Exception {
47          assertExtract("/html/html-body-embedded-jsonld-extractor.html");
48          assertModelNotEmpty();
49          assertStatementsSize(null, null, null, 3);
50      }
51  
52      @Test
53      public void testEmbeddedJSONLDInHeadAndBody() throws Exception {
54          assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html");
55          assertModelNotEmpty();
56          assertStatementsSize(null, null, null, 7);
57      }
58  
59      @Test
60      public void testJSONLDCommentStripping() throws Exception {
61          assertExtract("/html/html-jsonld-strip-comments.html");
62          assertModelNotEmpty();
63          assertStatementsSize(null, null, null, 3);
64          assertContains(RDFUtils.iri(FOAF.NS, "name"), "Robert\\\" Millar\\\\\"\"\\\\");
65      }
66  
67      @Test
68      public void testJSONLDCommaNormalization() {
69          assertExtract("/html/html-jsonld-commas.html");
70          assertModelNotEmpty();
71          assertStatementsSize(null, null, null, 30);
72      }
73  
74      @Test
75      public void testJSONLDUnescapedCharacters() {
76          assertExtract("/html/html-jsonld-unescaped-characters.html");
77          assertModelNotEmpty();
78          assertStatementsSize(null, null, null, 375);
79          assertContains(RDFUtils.iri("http://schema.org/name"), "Weezer & Pixies\u0008");
80          assertContains(RDFUtils.iri("http://schema.org/description"),
81                  "#1 MAGIC SHOW IN L.A.\nThe current WINNER of the CW’s Penn & Teller’s FOOL US, Illusionist "
82                          + "extraordinaire Ivan Amodei is on a national tour with his show INTIMATE ILLUSIONS."
83                          + "\n\nCurrently, on an ei...");
84      }
85  
86      @Test
87      public void testJSONLDFatalError() {
88          assertExtract("/html/html-jsonld-fatal-error.html", false);
89          assertIssue(IssueReport.IssueLevel.FATAL,
90                  ".*Unexpected character .* was expecting comma to separate Object entries.*");
91          assertStatementsSize(null, null, null, 4);
92      }
93  
94      @Test
95      public void testJSONLDBadCharacter() throws Exception {
96          assertExtract("/html/html-jsonld-bad-character.html");
97          assertStatementsSize(null, null, null, 12);
98      }
99  
100     @Override
101     protected ExtractorFactory<?> getExtractorFactory() {
102         return new EmbeddedJSONLDExtractorFactory();
103     }
104 
105 }