View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.eclipse.rdf4j.model.Resource;
21  import org.eclipse.rdf4j.model.IRI;
22  import org.eclipse.rdf4j.model.Value;
23  
24  import java.io.BufferedWriter;
25  import java.io.OutputStream;
26  import java.io.OutputStreamWriter;
27  import java.io.PrintWriter;
28  import java.nio.charset.Charset;
29  import java.nio.charset.StandardCharsets;
30  import java.util.Collections;
31  import java.util.TreeSet;
32  
33  /**
34   * This writer simply produces a list of unique <i>IRI</i> present in the subject or in the object of every single
35   * extracted <i>RDF Statement</i>.
36   * 
37   * @author Davide Palmisano (palmisano@fbk.eu)
38   * @author Hans Brende (hansbrende@apache.org)
39   */
40  public class URIListWriter extends TripleWriterHandler implements FormatWriter {
41  
42      private static final Charset charset = StandardCharsets.UTF_8;
43  
44      static final TripleFormat FORMAT = TripleFormat.of("URIList", Collections.singleton(URIListWriterFactory.MIME_TYPE),
45              charset, Collections.singleton("txt"), null, TripleFormat.NONSTANDARD);
46  
47      private final TreeSet<String> resources = new TreeSet<>();
48  
49      private PrintWriter writer;
50  
51      public URIListWriter(OutputStream outputStream) {
52          writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(outputStream, charset)));
53      }
54  
55      @Override
56      public void writeTriple(Resource s, IRI p, Value o, Resource g) throws TripleHandlerException {
57          String string;
58          if (s instanceof IRI && resources.add(string = s.stringValue())) {
59              writer.println(string);
60          }
61          if (o instanceof IRI && resources.add(string = o.stringValue())) {
62              writer.println(string);
63          }
64      }
65  
66      @Override
67      public void writeNamespace(String prefix, String uri) throws TripleHandlerException {
68      }
69  
70      @Override
71      public void endDocument(IRI documentIRI) throws TripleHandlerException {
72          writer.flush();
73      }
74  
75      @Override
76      public void close() throws TripleHandlerException {
77          writer.flush();
78          writer = null;
79          resources.clear();
80      }
81  
82      @Override
83      public boolean isAnnotated() {
84          return false;
85      }
86  
87      @Override
88      public void setAnnotated(boolean f) {
89          // Empty.
90      }
91  
92  }