1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */1718package org.apache.any23.extractor.html;
1920import org.apache.any23.source.DocumentSource;
21import org.apache.any23.source.FileDocumentSource;
22import org.junit.Assert;
23import org.w3c.dom.Node;
2425import java.io.File;
26import java.io.FileInputStream;
27import java.io.IOException;
2829/**30 * This class is a wrapper around an HTML document providing a simply facade.31 */32publicclassHTMLFixture {
3334privatefinal File file;
3536publicHTMLFixture(File file) {
37 Assert.assertNotNull("Test resource file was null", file);
38 Assert.assertTrue("Test resource file does not exist", file.exists());
39this.file = file;
40 }
4142private File getFile() {
43return file;
44 }
4546public DocumentSource getOpener(String baseIRI) {
47returnnew FileDocumentSource(getFile(), baseIRI);
48 }
4950/**51 * @return the DOM root {@link org.w3c.dom.Node} of the whole document.52 */53public Node getDOM() {
54try {
55returnnew TagSoupParser(new FileInputStream(getFile()), "http://example.org/").getDOM();
56 } catch (IOException ex) {
57thrownew RuntimeException(ex);
58 }
59 }
6061/**62 * @return an {@link HTMLDocument} object of the whole HTML document.63 */64public HTMLDocument getHTMLDocument() {
65returnnew HTMLDocument(getDOM());
66 }
67 }