This project has retired. For details please refer to its Attic page.
HTMLFixture xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.source.DocumentSource;
21  import org.apache.any23.source.FileDocumentSource;
22  import org.junit.Assert;
23  import org.w3c.dom.Node;
24  
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.IOException;
28  
29  /**
30   * This class is a wrapper around an HTML document providing a simply facade.
31   */
32  public class HTMLFixture {
33  
34      private final File file;
35  
36      public HTMLFixture(File file) {
37          Assert.assertNotNull("Test resource file was null", file);
38          Assert.assertTrue("Test resource file does not exist", file.exists());
39          this.file = file;
40      }
41  
42      private File getFile() {
43          return file;
44      }
45  
46      public DocumentSource getOpener(String baseIRI) {
47          return new FileDocumentSource(getFile(), baseIRI);
48      }
49  
50      /**
51       * @return the DOM root {@link org.w3c.dom.Node} of the whole document.
52       */
53      public Node getDOM() {
54          try {
55              return new TagSoupParser(new FileInputStream(getFile()), "http://example.org/").getDOM();
56          } catch (IOException ex) {
57              throw new RuntimeException(ex);
58          }
59      }
60  
61      /**
62       * @return an {@link HTMLDocument} object of the whole HTML document.
63       */
64      public HTMLDocument getHTMLDocument() {
65          return new HTMLDocument(getDOM());
66      }
67  }