View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.writer;
19  
20  import org.apache.any23.extractor.ExtractionContext;
21  import org.eclipse.rdf4j.model.Resource;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Value;
24  
25  /**
26   * Defines a document based triple handler.
27   */
28  public interface TripleHandler extends AutoCloseable {
29  
30      void startDocument(IRI documentIRI) throws TripleHandlerException;
31  
32      /**
33       * Informs the handler that a new context has been established.
34       * Contexts are not guaranteed to receive any triples, so they
35       * might be closed without any triple.
36       * @param context an instantiated {@link org.apache.any23.extractor.ExtractionContext}
37       * @throws TripleHandlerException if there is an errr opening the 
38       * {@link org.apache.any23.extractor.ExtractionContext}
39       */
40      void openContext(ExtractionContext context) throws TripleHandlerException;
41  
42      /**
43       * Invoked with a currently open context,
44       * notifies the detection of a triple.
45       *
46       * @param s triple subject, cannot be <code>null</code>.
47       * @param p triple predicate, cannot be <code>null</code>.
48       * @param o triple object, cannot be <code>null</code>.
49       * @param g triple graph, can be <code>null</code>.
50       * @param context extraction context.
51       * @throws TripleHandlerException if there is an error receiving the triple.
52       */
53      void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException;
54  
55      /**
56       * Invoked with a currently open context, notifies the detection of a
57       * namespace.
58       *
59       * @param prefix namespace prefix.
60       * @param uri namespace <i>IRI</i>.
61       * @param context namespace context.
62       * @throws TripleHandlerException if there is an error receiving the namespace.
63       */
64      void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException;
65  
66      /**
67       * Informs the handler that no more triples will come from a
68       * previously opened context. All contexts are guaranteed to
69       * be closed before the final close(). The document context
70       * for each document is guaranteed to be closed after all
71       * local contexts of that document.
72       *
73       * @param context the context to be closed.
74       * @throws TripleHandlerException if there is an error closing the 
75       * {@link org.apache.any23.extractor.ExtractionContext}.
76       */
77      void closeContext(ExtractionContext context) throws TripleHandlerException;
78  
79      /**
80       * Informs the handler that the end of the document
81       * has been reached.
82       *
83       * @param documentIRI document IRI.
84       * @throws TripleHandlerException if there is an error ending the document.
85       */
86      void endDocument(IRI documentIRI) throws TripleHandlerException;
87  
88      /**
89       * Sets the length of the content to be processed.
90       *
91       * @param contentLength length of the content being processed.
92       */
93      void setContentLength(long contentLength);
94  
95      /**
96       * Will be called last and exactly once.
97       * @throws TripleHandlerException if there is an error closing the
98       * {@link org.apache.any23.writer.TripleHandler} implementation.
99       */
100     void close() throws TripleHandlerException;
101 
102 }