This project has retired. For details please refer to its Attic page.
JSONLDExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.any23.extractor.rdf;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.nio.charset.StandardCharsets;
24  
25  import com.fasterxml.jackson.core.JsonFactory;
26  import com.fasterxml.jackson.core.JsonParser;
27  import org.apache.any23.extractor.ExtractionContext;
28  import org.apache.any23.extractor.ExtractionException;
29  import org.apache.any23.extractor.ExtractionParameters;
30  import org.apache.any23.extractor.ExtractionResult;
31  import org.apache.any23.extractor.ExtractionResultImpl;
32  import org.apache.any23.rdf.RDFUtils;
33  import org.apache.any23.writer.RDFXMLWriter;
34  import org.apache.any23.writer.TripleHandler;
35  import org.apache.any23.writer.TripleHandlerException;
36  import org.junit.After;
37  import org.junit.Assert;
38  import org.junit.Before;
39  import org.junit.Test;
40  import org.eclipse.rdf4j.model.IRI;
41  import org.slf4j.Logger;
42  import org.slf4j.LoggerFactory;
43  
44  /**
45   * Test case for {@link JSONLDExtractor}.
46   *
47   */
48  public class JSONLDExtractorTest {
49  
50      private static final Logger logger = LoggerFactory.getLogger(JSONLDExtractorTest.class);
51  
52      private JSONLDExtractor extractor;
53  
54      @Before
55      public void setUp() throws Exception {
56          extractor = new JSONLDExtractor();
57      }
58  
59      @After
60      public void tearDown() throws Exception {
61          extractor = null;
62      }
63  
64      @Test
65      public void testExtractFromJSONLDDocument() throws IOException, ExtractionException, TripleHandlerException {
66          final IRI uri = RDFUtils.iri("http://host.com/place-example.jsonld");
67          extract(uri, "/org/apache/any23/extractor/rdf/place-example.jsonld");
68      }
69  
70      @Test
71      public void testWhitespaceCleaning() throws Exception {
72          for (int i = 0; i <= Character.MAX_CODE_POINT; i++) {
73              if (Character.isWhitespace(i) || Character.isSpaceChar(i)) {
74                  byte[] bytes = new String(Character.toChars(i)).getBytes(StandardCharsets.UTF_8);
75                  @SuppressWarnings("resource")
76                  InputStream stream = new JsonCleaningInputStream(new ByteArrayInputStream(bytes));
77                  if (i == '\r' || i == '\n') {
78                      Assert.assertEquals(stream.read(), i);
79                  } else {
80                      Assert.assertEquals(stream.read(), ' ');
81                  }
82                  Assert.assertEquals(stream.read(), -1);
83              }
84          }
85      }
86  
87      @Test
88      public void testJsonCleaning() throws Exception {
89          JsonCleaningInputStream stream = new JsonCleaningInputStream(
90                  getClass().getResourceAsStream("/html/json-cleaning-test.json"));
91  
92          JsonParser parser = new JsonFactory().createParser(stream);
93  
94          int numTokens = 0;
95          while (parser.nextToken() != null) {
96              numTokens++;
97          }
98  
99          Assert.assertEquals(numTokens, 41);
100 
101     }
102 
103     public void extract(IRI uri, String filePath) throws IOException, ExtractionException, TripleHandlerException {
104         ByteArrayOutputStream baos = new ByteArrayOutputStream();
105         final TripleHandler tHandler = new RDFXMLWriter(baos);
106         final ExtractionContext extractionContext = new ExtractionContext("rdf-jsonld", uri);
107         final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
108         try {
109             extractor.run(ExtractionParameters.newDefault(), extractionContext,
110                     this.getClass().getResourceAsStream(filePath), result);
111         } finally {
112             logger.debug(baos.toString("UTF-8"));
113             tHandler.close();
114             result.close();
115         }
116     }
117 
118 }