View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.configuration.Settings;
21  import org.apache.any23.extractor.ExtractionContext;
22  import org.apache.any23.rdf.RDFUtils;
23  import org.eclipse.rdf4j.model.Resource;
24  import org.eclipse.rdf4j.model.IRI;
25  import org.eclipse.rdf4j.model.Value;
26  import org.eclipse.rdf4j.rio.RDFHandlerException;
27  import org.eclipse.rdf4j.rio.RDFWriter;
28  import org.eclipse.rdf4j.rio.RDFWriterFactory;
29  import org.eclipse.rdf4j.rio.WriterConfig;
30  
31  import java.io.BufferedWriter;
32  import java.io.Flushable;
33  import java.io.IOException;
34  import java.io.OutputStream;
35  import java.io.OutputStreamWriter;
36  import java.nio.charset.Charset;
37  import java.util.Locale;
38  import java.util.Optional;
39  
40  /**
41   * A {@link TripleHandler} that writes triples to a Sesame {@link org.eclipse.rdf4j.rio.RDFWriter}, eg for serialization
42   * using one of Sesame's writers.
43   *
44   * @author Richard Cyganiak (richard@cyganiak.de)
45   * @author Michele Mostarda (mostarda@fbk.eu)
46   * @author Hans Brende (hansbrende@apache.org)
47   */
48  public abstract class RDFWriterTripleHandler extends TripleWriterHandler implements FormatWriter {
49  
50      private RDFWriter _writer;
51      private boolean writerStarted;
52      private final Flushable out;
53      private final TripleFormat format;
54  
55      /**
56       * The annotation flag.
57       */
58      private boolean annotated = false;
59  
60      static TripleFormat format(RDFWriterFactory rdf4j) {
61          return TripleFormat.of(rdf4j.getRDFFormat());
62      }
63  
64      RDFWriterTripleHandler(RDFWriterFactory rdf4j, TripleFormat format, OutputStream out, Settings settings) {
65          this.format = format;
66          Optional<Charset> charset = format.getCharset();
67          RDFWriter w;
68          if (!charset.isPresent()) {
69              this.out = out;
70              w = _writer = rdf4j.getWriter(out);
71          } else {
72              // use buffered writer if format supports encoding
73              BufferedWriter buf = new BufferedWriter(new OutputStreamWriter(out, charset.get()));
74              this.out = buf;
75              w = _writer = rdf4j.getWriter(buf);
76          }
77          configure(w.getWriterConfig(), settings);
78      }
79  
80      abstract void configure(WriterConfig config, Settings settings);
81  
82      RDFWriter writer() throws TripleHandlerException {
83          RDFWriter w = _writer;
84          if (w == null) {
85              throw new TripleHandlerException("writer has been closed!");
86          }
87          if (!writerStarted) {
88              writerStarted = true;
89              try {
90                  w.startRDF();
91              } catch (RDFHandlerException e) {
92                  throw new TripleHandlerException("Error while starting document", e);
93              }
94          }
95          return w;
96      }
97  
98      /**
99       * If <code>true</code> then the produced <b>RDF</b> is annotated with the extractors used to generate the specific
100      * statements.
101      *
102      * @return the annotation flag value.
103      */
104     @Override
105     public boolean isAnnotated() {
106         return annotated;
107     }
108 
109     /**
110      * Sets the <i>annotation</i> flag.
111      *
112      * @param f
113      *            If <code>true</code> then the produced <b>RDF</b> is annotated with the extractors used to generate
114      *            the specific statements.
115      */
116     @Override
117     public void setAnnotated(boolean f) {
118         annotated = f;
119     }
120 
121     @Override
122     public void startDocument(IRI documentIRI) throws TripleHandlerException {
123         handleComment("OUTPUT FORMAT: " + format);
124     }
125 
126     @Override
127     public void openContext(ExtractionContext context) throws TripleHandlerException {
128         handleComment("BEGIN: " + context);
129     }
130 
131     @Override
132     public void writeTriple(Resource s, IRI p, Value o, Resource g) throws TripleHandlerException {
133         try {
134             writer().handleStatement(RDFUtils.quad(s, p, o, g));
135         } catch (RDFHandlerException ex) {
136             throw new TripleHandlerException(
137                     String.format(Locale.ROOT, "Error while receiving triple: %s %s %s %s", s, p, o, g), ex);
138         }
139     }
140 
141     @Override
142     public void writeNamespace(String prefix, String uri) throws TripleHandlerException {
143         try {
144             writer().handleNamespace(prefix, uri);
145         } catch (RDFHandlerException ex) {
146             throw new TripleHandlerException(
147                     String.format(Locale.ROOT, "Error while receiving namespace: %s:%s", prefix, uri), ex);
148         }
149     }
150 
151     @Override
152     public void closeContext(ExtractionContext context) throws TripleHandlerException {
153         handleComment("END: " + context);
154     }
155 
156     @Override
157     public void close() throws TripleHandlerException {
158         RDFWriter writer = _writer;
159         if (writer == null) {
160             return;
161         }
162         _writer = null;
163         try {
164             if (!writerStarted) {
165                 writer.startRDF();
166             }
167             writer.endRDF(); // calls flush()
168         } catch (RDFHandlerException e) {
169             throw new TripleHandlerException("Error closing writer", e);
170         }
171     }
172 
173     @Override
174     public void endDocument(IRI documentIRI) throws TripleHandlerException {
175         try {
176             out.flush();
177         } catch (IOException e) {
178             throw new TripleHandlerException("Error ending document", e);
179         }
180     }
181 
182     private void handleComment(String comment) throws TripleHandlerException {
183         if (!annotated)
184             return;
185         try {
186             writer().handleComment(comment);
187         } catch (RDFHandlerException rdfhe) {
188             throw new TripleHandlerException("Error while handing comment.", rdfhe);
189         }
190     }
191 }