View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.configuration.Settings;
21  import org.apache.any23.extractor.ExtractionContext;
22  import org.apache.any23.rdf.RDFUtils;
23  import org.eclipse.rdf4j.model.Resource;
24  import org.eclipse.rdf4j.model.IRI;
25  import org.eclipse.rdf4j.model.Value;
26  import org.eclipse.rdf4j.rio.RDFHandlerException;
27  import org.eclipse.rdf4j.rio.RDFWriter;
28  import org.eclipse.rdf4j.rio.RDFWriterFactory;
29  import org.eclipse.rdf4j.rio.WriterConfig;
30  
31  import java.io.BufferedWriter;
32  import java.io.Flushable;
33  import java.io.IOException;
34  import java.io.OutputStream;
35  import java.io.OutputStreamWriter;
36  import java.nio.charset.Charset;
37  import java.util.Optional;
38  
39  /**
40   * A {@link TripleHandler} that writes
41   * triples to a Sesame {@link org.eclipse.rdf4j.rio.RDFWriter},
42   * eg for serialization using one of Sesame's writers.
43   *
44   * @author Richard Cyganiak (richard@cyganiak.de)
45   * @author Michele Mostarda (mostarda@fbk.eu)
46   * @author Hans Brende (hansbrende@apache.org)
47   */
48  public abstract class RDFWriterTripleHandler extends TripleWriterHandler implements FormatWriter {
49  
50      private RDFWriter _writer;
51      private boolean writerStarted;
52      private final Flushable out;
53      private final TripleFormat format;
54  
55      /**
56       * The annotation flag.
57       */
58      private boolean annotated = false;
59  
60      static TripleFormat format(RDFWriterFactory rdf4j) {
61          return TripleFormat.of(rdf4j.getRDFFormat());
62      }
63  
64      RDFWriterTripleHandler(RDFWriterFactory rdf4j, TripleFormat format, OutputStream out, Settings settings) {
65          this.format = format;
66          Optional<Charset> charset = format.getCharset();
67          RDFWriter w;
68          if (!charset.isPresent()) {
69              this.out = out;
70              w = _writer = rdf4j.getWriter(out);
71          } else {
72              //use buffered writer if format supports encoding
73              BufferedWriter buf = new BufferedWriter(new OutputStreamWriter(out, charset.get()));
74              this.out = buf;
75              w = _writer = rdf4j.getWriter(buf);
76          }
77          configure(w.getWriterConfig(), settings);
78      }
79  
80      abstract void configure(WriterConfig config, Settings settings);
81  
82      RDFWriter writer() throws TripleHandlerException {
83          RDFWriter w = _writer;
84          if (w == null) {
85              throw new TripleHandlerException("writer has been closed!");
86          }
87          if (!writerStarted) {
88              writerStarted = true;
89              try {
90                  w.startRDF();
91              } catch (RDFHandlerException e) {
92                  throw new TripleHandlerException("Error while starting document", e);
93              }
94          }
95          return w;
96      }
97  
98      /**
99       * If <code>true</code> then the produced <b>RDF</b> is annotated with
100      * the extractors used to generate the specific statements.
101      *
102      * @return the annotation flag value.
103      */
104     @Override
105     public boolean isAnnotated() {
106         return annotated;
107     }
108 
109     /**
110      * Sets the <i>annotation</i> flag.
111      *
112      * @param f If <code>true</code> then the produced <b>RDF</b> is annotated with
113      *          the extractors used to generate the specific statements.
114      */
115     @Override
116     public void setAnnotated(boolean f) {
117         annotated = f;
118     }
119 
120     @Override
121     public void startDocument(IRI documentIRI) throws TripleHandlerException {
122         handleComment("OUTPUT FORMAT: " + format);
123     }
124 
125     @Override
126     public void openContext(ExtractionContext context) throws TripleHandlerException {
127         handleComment("BEGIN: " + context );
128     }
129 
130     @Override
131     public void writeTriple(Resource s, IRI p, Value o, Resource g)
132     throws TripleHandlerException {
133         try {
134             writer().handleStatement(RDFUtils.quad(s, p, o, g));
135         } catch (RDFHandlerException ex) {
136             throw new TripleHandlerException(
137                     String.format("Error while receiving triple: %s %s %s %s", s, p, o, g),
138                     ex
139             );
140         }
141     }
142 
143     @Override
144     public void writeNamespace(String prefix, String uri)
145     throws TripleHandlerException {
146         try {
147             writer().handleNamespace(prefix, uri);
148         } catch (RDFHandlerException ex) {
149             throw new TripleHandlerException(String.format("Error while receiving namespace: %s:%s", prefix, uri),
150                     ex
151             );
152         }
153     }
154 
155     @Override
156     public void closeContext(ExtractionContext context) throws TripleHandlerException {
157         handleComment( "END: " + context );
158     }
159 
160     @Override
161     public void close() throws TripleHandlerException {
162         RDFWriter writer = _writer;
163         if (writer == null) {
164             return;
165         }
166         _writer = null;
167         try {
168             if (!writerStarted) {
169                 writer.startRDF();
170             }
171             writer.endRDF(); //calls flush()
172         } catch (RDFHandlerException e) {
173             throw new TripleHandlerException("Error closing writer", e);
174         }
175     }
176 
177     @Override
178     public void endDocument(IRI documentIRI) throws TripleHandlerException {
179         try {
180             out.flush();
181         } catch (IOException e) {
182             throw new TripleHandlerException("Error ending document", e);
183         }
184     }
185 
186     private void handleComment(String comment) throws TripleHandlerException {
187         if( !annotated ) return;
188         try {
189             writer().handleComment(comment);
190         } catch (RDFHandlerException rdfhe) {
191             throw new TripleHandlerException("Error while handing comment.", rdfhe);
192         }
193     }
194 }