View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.source;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  
23  /**
24   * A source of input streams. Mostly intended for situations where opening of an input stream is to be delayed.
25   *
26   * @author Richard Cyganiak (richard@cyganiak.de)
27   */
28  public interface DocumentSource {
29  
30      /**
31       * Returns the input stream for accessing the content of the document.
32       *
33       * @return not <code>null</code> input stream for accessing document data.
34       * 
35       * @throws IOException
36       *             if there is an error opening the {@link org.apache.any23.source.DocumentSource}
37       *             {@link java.io.InputStream}
38       */
39      InputStream openInputStream() throws IOException;
40  
41      /**
42       * @return a string describing the content type of the provided document.
43       */
44      public String getContentType();
45  
46      /**
47       * @return the size of the content length in bytes.
48       */
49      public long getContentLength();
50  
51      /**
52       * @return the actual, final, canonical IRI if redirects occur.
53       */
54      public String getDocumentIRI();
55  
56      /**
57       * A value of <i>false</i> indicates that the document resides remotely, and that multiple successive accesses to it
58       * should be avoided by copying it to local storage. This can also be used for sources that do not support multiple
59       * calls to {@link #openInputStream()}.
60       * 
61       * @return true if the {@link org.apache.any23.source.DocumentSource} is cached locally.
62       */
63      public boolean isLocal();
64  }