View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.rdf;
19  
20  import com.fasterxml.jackson.core.JsonLocation;
21  import com.fasterxml.jackson.core.JsonProcessingException;
22  import com.fasterxml.jackson.core.StreamReadFeature;
23  import com.fasterxml.jackson.core.json.JsonReadFeature;
24  import com.fasterxml.jackson.databind.ObjectMapper;
25  import com.fasterxml.jackson.databind.json.JsonMapper;
26  import com.github.jsonldjava.core.JsonLdOptions;
27  import com.github.jsonldjava.core.JsonLdProcessor;
28  import com.github.jsonldjava.utils.JsonUtils;
29  import org.apache.any23.extractor.ExtractionContext;
30  import org.apache.any23.extractor.ExtractionException;
31  import org.apache.any23.extractor.ExtractionParameters;
32  import org.apache.any23.extractor.ExtractionResult;
33  import org.apache.any23.extractor.ExtractorDescription;
34  import org.apache.any23.extractor.IssueReport;
35  import org.apache.any23.rdf.Any23ValueFactoryWrapper;
36  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
37  import org.eclipse.rdf4j.rio.RDFParser;
38  
39  import java.io.IOException;
40  import java.io.InputStream;
41  
42  /**
43   * Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor} handling
44   * <a href="http://www.w3.org/TR/json-ld/">JSON-LD</a> format.
45   *
46   */
47  public class JSONLDExtractor extends BaseRDFExtractor {
48  
49      private static final ObjectMapper OBJECT_MAPPER = JsonMapper.builder()
50              .enable(JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER).disable(JsonReadFeature.ALLOW_JAVA_COMMENTS)
51              .disable(JsonReadFeature.ALLOW_MISSING_VALUES).enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS)
52              .enable(JsonReadFeature.ALLOW_LEADING_ZEROS_FOR_NUMBERS).disable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
53              .disable(JsonReadFeature.ALLOW_TRAILING_COMMA).enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS)
54              .enable(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES).disable(JsonReadFeature.ALLOW_YAML_COMMENTS)
55              .enable(StreamReadFeature.IGNORE_UNDEFINED).enable(StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION)
56              .disable(StreamReadFeature.STRICT_DUPLICATE_DETECTION).build();
57  
58      /**
59       * @deprecated since 2.4. This extractor has never supported these settings. Use {@link #JSONLDExtractor()} instead.
60       * 
61       * @param verifyDataType
62       *            has no effect
63       * @param stopAtFirstError
64       *            has no effect
65       */
66      @Deprecated
67      public JSONLDExtractor(boolean verifyDataType, boolean stopAtFirstError) {
68          super(verifyDataType, stopAtFirstError);
69      }
70  
71      public JSONLDExtractor() {
72          super(false, false);
73      }
74  
75      @Override
76      public ExtractorDescription getDescription() {
77          return JSONLDExtractorFactory.getDescriptionInstance();
78      }
79  
80      @Override
81      public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream in,
82              ExtractionResult extractionResult) throws IOException, ExtractionException {
83          JSONLDJavaSinkNLDJavaSink.html#JSONLDJavaSink">JSONLDJavaSink handler = new JSONLDJavaSink(extractionResult, new Any23ValueFactoryWrapper(
84                  SimpleValueFactory.getInstance(), extractionResult, extractionContext.getDefaultLanguage()));
85  
86          JsonLdOptions options = new JsonLdOptions(extractionContext.getDocumentIRI().stringValue());
87          options.useNamespaces = true;
88  
89          try {
90              Object json = JsonUtils
91                      .fromJsonParser(OBJECT_MAPPER.getFactory().createParser(new JsonCleaningInputStream(in)));
92              JsonLdProcessor.toRDF(json, handler, options);
93          } catch (JsonProcessingException e) {
94              JsonLocation loc = e.getLocation();
95              if (loc == null) {
96                  extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, e.getOriginalMessage(), -1L, -1L);
97              } else {
98                  extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, e.getOriginalMessage(), loc.getLineNr(),
99                          loc.getColumnNr());
100             }
101         } catch (Exception e) {
102             // ANY23-420: jsonld-java can sometimes throw IllegalArgumentException
103             extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, toString(e), -1, -1);
104         }
105     }
106 
107     /* DEPRECATED METHODS */
108 
109     /**
110      * @deprecated since 2.4. This extractor has never supported this setting. Do not use.
111      * 
112      * @param stopAtFirstError
113      *            has no effect
114      */
115     @Deprecated
116     @Override
117     public void setStopAtFirstError(boolean stopAtFirstError) {
118         super.setStopAtFirstError(stopAtFirstError);
119     }
120 
121     /**
122      * @deprecated since 2.4. This extractor has never supported this setting. Do not use.
123      * 
124      * @param verifyDataType
125      *            has no effect
126      */
127     @Deprecated
128     @Override
129     public void setVerifyDataType(boolean verifyDataType) {
130         super.setVerifyDataType(verifyDataType);
131     }
132 
133     /**
134      * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link RDFParser}. Do not use this method.
135      * 
136      * @param extractionContext
137      *            the extraction context
138      * @param extractionResult
139      *            the extraction result
140      * 
141      * @return a {@link RDFParser}
142      */
143     @Deprecated
144     @Override
145     protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
146         return RDFParserFactory.getInstance().getJSONLDParser(isVerifyDataType(), isStopAtFirstError(),
147                 extractionContext, extractionResult);
148     }
149 
150 }