1/*2 * Licensed to the Apache Software Foundation (ASF) under one or more3 * contributor license agreements. See the NOTICE file distributed with4 * this work for additional information regarding copyright ownership.5 * The ASF licenses this file to You under the Apache License, Version 2.06 * (the "License"); you may not use this file except in compliance with7 * the License. You may obtain a copy of the License at8 *9 * http://www.apache.org/licenses/LICENSE-2.010 *11 * Unless required by applicable law or agreed to in writing, software12 * distributed under the License is distributed on an "AS IS" BASIS,13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14 * See the License for the specific language governing permissions and15 * limitations under the License.16 */1718package org.apache.any23.extractor;
1920import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
21import org.apache.any23.extractor.rdf.NQuadsExtractorFactory;
22import org.apache.any23.extractor.rdf.NTriplesExtractorFactory;
23import org.apache.any23.extractor.rdf.RDFXMLExtractorFactory;
24import org.apache.any23.extractor.rdf.TriXExtractorFactory;
25import org.apache.any23.extractor.rdf.TurtleExtractorFactory;
26import org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory;
27import org.apache.any23.mime.MIMEType;
28import org.eclipse.rdf4j.rio.RDFFormat;
29import org.junit.Assert;
30import org.apache.any23.extractor.example.ExampleExtractor;
31import org.apache.any23.rdf.RDFUtils;
32import org.apache.any23.writer.CountingTripleHandler;
33import org.junit.Test;
34import org.eclipse.rdf4j.model.IRI;
3536import java.util.List;
37import java.util.stream.Collectors;
3839/**40 * Tests the <i>extraction</i> scenario.41 */42publicclassExtractionAPITest {
4344privatestaticfinal String exampleDoc = "http://example.com/";
45privatestaticfinal IRI uri = RDFUtils.iri(exampleDoc);
4647 @Test
48publicvoid testDirectInstantiation() throws Exception {
49 CountingTripleHandler out = new CountingTripleHandler();
50ExampleExtractoreExtractor.html#ExampleExtractor">ExampleExtractor extractor = newExampleExtractor();
51 ExtractionContext extractionContext = new ExtractionContext("extractor-name", uri);
52 ExtractionResultImpl writer = new ExtractionResultImpl(extractionContext, extractor, out);
53 extractor.run(ExtractionParameters.newDefault(), extractionContext, uri, writer);
54 writer.close();
55 Assert.assertEquals(1, out.getCount());
56 }
5758privatestaticvoid test(ExtractorFactory<?> factory, RDFFormat... formats) {
59 List<String> mimetypes = factory.getSupportedMIMETypes().stream().map(MIMEType::getFullType)
60 .collect(Collectors.toList());
6162 Assert.assertEquals(formats[0].getDefaultMIMEType(), mimetypes.get(0));
6364for (RDFFormat format : formats) {
65for (String mimeType : format.getMIMETypes()) {
66if (mimeType.endsWith("/xml")) {
67// TODO: xml mimetypes are commented out in RDFXML extractor. Why?68continue;
69 }
70 Assert.assertTrue(mimeType, mimetypes.contains(mimeType));
71 }
72 }
73 }
7475 @Test
76publicvoid testMimetypes() {
77 test(new JSONLDExtractorFactory(), RDFFormat.JSONLD);
78 test(new NTriplesExtractorFactory(), RDFFormat.NTRIPLES);
79 test(new NQuadsExtractorFactory(), RDFFormat.NQUADS);
80 test(new TurtleExtractorFactory(), RDFFormat.TURTLE, RDFFormat.N3);
81 test(new RDFXMLExtractorFactory(), RDFFormat.RDFXML);
82 test(new TriXExtractorFactory(), RDFFormat.TRIX);
83 test(new RDFa11ExtractorFactory(), RDFFormat.RDFA);
84 }
8586 }