View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.eclipse.rdf4j.model.Resource;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Value;
24  
25  /**
26   * Defines a document based triple handler.
27   */
28  public interface TripleHandler extends AutoCloseable {
29  
30      void startDocument(IRI documentIRI) throws TripleHandlerException;
31  
32      /**
33       * Informs the handler that a new context has been established. Contexts are not guaranteed to receive any triples,
34       * so they might be closed without any triple.
35       * 
36       * @param context
37       *            an instantiated {@link org.apache.any23.extractor.ExtractionContext}
38       * 
39       * @throws TripleHandlerException
40       *             if there is an errr opening the {@link org.apache.any23.extractor.ExtractionContext}
41       */
42      void openContext(ExtractionContext context) throws TripleHandlerException;
43  
44      /**
45       * Invoked with a currently open context, notifies the detection of a triple.
46       *
47       * @param s
48       *            triple subject, cannot be <code>null</code>.
49       * @param p
50       *            triple predicate, cannot be <code>null</code>.
51       * @param o
52       *            triple object, cannot be <code>null</code>.
53       * @param g
54       *            triple graph, can be <code>null</code>.
55       * @param context
56       *            extraction context.
57       * 
58       * @throws TripleHandlerException
59       *             if there is an error receiving the triple.
60       */
61      void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException;
62  
63      /**
64       * Invoked with a currently open context, notifies the detection of a namespace.
65       *
66       * @param prefix
67       *            namespace prefix.
68       * @param uri
69       *            namespace <i>IRI</i>.
70       * @param context
71       *            namespace context.
72       * 
73       * @throws TripleHandlerException
74       *             if there is an error receiving the namespace.
75       */
76      void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException;
77  
78      /**
79       * Informs the handler that no more triples will come from a previously opened context. All contexts are guaranteed
80       * to be closed before the final close(). The document context for each document is guaranteed to be closed after
81       * all local contexts of that document.
82       *
83       * @param context
84       *            the context to be closed.
85       * 
86       * @throws TripleHandlerException
87       *             if there is an error closing the {@link org.apache.any23.extractor.ExtractionContext}.
88       */
89      void closeContext(ExtractionContext context) throws TripleHandlerException;
90  
91      /**
92       * Informs the handler that the end of the document has been reached.
93       *
94       * @param documentIRI
95       *            document IRI.
96       * 
97       * @throws TripleHandlerException
98       *             if there is an error ending the document.
99       */
100     void endDocument(IRI documentIRI) throws TripleHandlerException;
101 
102     /**
103      * Sets the length of the content to be processed.
104      *
105      * @param contentLength
106      *            length of the content being processed.
107      */
108     void setContentLength(long contentLength);
109 
110     /**
111      * Will be called last and exactly once.
112      * 
113      * @throws TripleHandlerException
114      *             if there is an error closing the {@link org.apache.any23.writer.TripleHandler} implementation.
115      */
116     void close() throws TripleHandlerException;
117 
118 }