View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.eclipse.rdf4j.model.Resource;
21  import org.eclipse.rdf4j.model.IRI;
22  import org.eclipse.rdf4j.model.Value;
23  
24  import java.io.BufferedWriter;
25  import java.io.OutputStream;
26  import java.io.OutputStreamWriter;
27  import java.io.PrintWriter;
28  import java.nio.charset.Charset;
29  import java.nio.charset.StandardCharsets;
30  import java.util.Collections;
31  import java.util.TreeSet;
32  
33  /**
34   * This writer simply produces a list of unique <i>IRI</i> present in the
35   * subject or in the object of every single extracted <i>RDF Statement</i>.
36   * 
37   * @author Davide Palmisano (palmisano@fbk.eu)
38   * @author Hans Brende (hansbrende@apache.org)
39   */
40  public class URIListWriter extends TripleWriterHandler implements FormatWriter {
41  
42      private static final Charset charset = StandardCharsets.UTF_8;
43  
44      static final TripleFormat FORMAT = TripleFormat.of("URIList",
45              Collections.singleton(URIListWriterFactory.MIME_TYPE), charset, Collections.singleton("txt"), null,
46              TripleFormat.NONSTANDARD);
47  
48      private final TreeSet<String> resources = new TreeSet<>();
49  
50      private PrintWriter writer;
51  
52      public URIListWriter(OutputStream outputStream) {
53          writer = new PrintWriter(new BufferedWriter(
54                  new OutputStreamWriter(outputStream, charset)));
55      }
56  
57      @Override
58      public void writeTriple(Resource s, IRI p, Value o, Resource g)
59              throws TripleHandlerException {
60          String string;
61          if (s instanceof IRI && resources.add(string = s.stringValue())) {
62              writer.println(string);
63          }
64          if (o instanceof IRI && resources.add(string = o.stringValue())) {
65              writer.println(string);
66          }
67      }
68  
69      @Override
70      public void writeNamespace(String prefix, String uri)
71              throws TripleHandlerException {
72      }
73  
74      @Override
75      public void endDocument(IRI documentIRI) throws TripleHandlerException {
76          writer.flush();
77      }
78  
79      @Override
80      public void close() throws TripleHandlerException {
81          writer.flush();
82          writer = null;
83          resources.clear();
84      }
85  
86      @Override
87      public boolean isAnnotated() {
88          return false;
89      }
90  
91      @Override
92      public void setAnnotated(boolean f) {
93          // Empty.
94      }
95  
96  }