1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.apache.any23.extractor.ExtractionException;
22 import org.apache.any23.extractor.ExtractionParameters;
23 import org.apache.any23.extractor.ExtractionResult;
24 import org.apache.any23.extractor.ExtractorDescription;
25 import org.apache.any23.extractor.ExtractorFactory;
26 import org.apache.any23.extractor.SimpleExtractorFactory;
27 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
28 import org.apache.any23.rdf.PopularPrefixes;
29 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
30 import org.openrdf.model.BNode;
31 import org.openrdf.model.URI;
32 import org.openrdf.model.ValueFactory;
33 import org.openrdf.model.impl.ValueFactoryImpl;
34 import org.w3c.dom.Document;
35
36 import java.io.IOException;
37 import java.util.Arrays;
38
39
40
41
42
43
44
45
46 public class ICBMExtractor implements TagSoupDOMExtractor {
47
48 public final static ExtractorFactory<ICBMExtractor> factory =
49 SimpleExtractorFactory.create(
50 "html-head-icbm",
51 PopularPrefixes.createSubset("geo", "rdf"),
52 Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
53 "example-icbm.html",
54 ICBMExtractor.class
55 );
56
57 public void run(
58 ExtractionParameters extractionParameters,
59 ExtractionContext extractionContext,
60 Document in,
61 ExtractionResult out
62 ) throws IOException, ExtractionException {
63
64
65 String props = DomUtils.find(in, "//META[@name=\"ICBM\" or @name=\"geo.position\"]/@content");
66 if ("".equals(props)) return;
67
68 String[] coords = props.split("[;,]");
69 float lat, lon;
70 try {
71 lat = Float.parseFloat(coords[0]);
72 lon = Float.parseFloat(coords[1]);
73 } catch (NumberFormatException nfe) {
74 return;
75 }
76
77 final ValueFactory factory = new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance(), out);
78 BNode point = factory.createBNode();
79 out.writeTriple(extractionContext.getDocumentURI(), expand("dcterms:related"), point);
80 out.writeTriple(point, expand("rdf:type"), expand("geo:Point"));
81 out.writeTriple(point, expand("geo:lat"), factory.createLiteral(Float.toString(lat)));
82 out.writeTriple(point, expand("geo:long"), factory.createLiteral(Float.toString(lon)));
83 }
84
85 private URI expand(String curie) {
86 return factory.getPrefixes().expand(curie);
87 }
88
89 public ExtractorDescription getDescription() {
90 return factory;
91 }
92
93 }