View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.source;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  
23  /**
24   * A source of input streams. Mostly intended for
25   * situations where opening of an input stream is
26   * to be delayed.
27   *
28   * @author Richard Cyganiak (richard@cyganiak.de)
29   */
30  public interface DocumentSource {
31  
32      /**
33       * Returns the input stream for accessing the content of the document.
34       *
35       * @return not <code>null</code> input stream for accessing document data.
36       * @throws IOException if there is an error opening the
37       * {@link org.apache.any23.source.DocumentSource} {@link java.io.InputStream}
38       */
39      InputStream openInputStream() throws IOException;
40  
41      /**
42       * @return a string describing the content type of the provided document.
43       */
44      public String getContentType();
45  
46      /**
47       * @return the size of the content length in bytes.
48       */
49      public long getContentLength();
50  
51      /**
52       * @return the actual, final, canonical IRI if redirects occur.
53       */
54      public String getDocumentIRI();
55  
56      /**
57       * A value of <i>false</i> indicates that the document
58       * resides remotely, and that multiple successive accesses
59       * to it should be avoided by copying it to local storage.
60       * This can also be used for sources that do not support
61       * multiple calls to {@link #openInputStream()}.
62       * @return true if the {@link org.apache.any23.source.DocumentSource} is
63       * cached locally.
64       */
65      public boolean isLocal();
66  }