View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractionResult;
21  import org.apache.any23.extractor.ExtractorDescription;
22  import org.apache.any23.extractor.TagSoupExtractionResult;
23  import org.apache.any23.vocab.VCard;
24  import org.eclipse.rdf4j.model.BNode;
25  import org.eclipse.rdf4j.model.vocabulary.RDF;
26  import org.w3c.dom.Node;
27  
28  /**
29   * Extractor for the <a href="http://microformats.org/wiki/geo">Geo</a>
30   * microformat.
31   *
32   * @author Gabriele Renzi
33   */
34  public class GeoExtractor extends EntityBasedMicroformatExtractor {
35  
36      private static final VCard vVCARD = VCard.getInstance();
37  
38      @Override
39      public ExtractorDescription getDescription() {
40          return GeoExtractorFactory.getDescriptionInstance();
41      }
42  
43      protected String getBaseClassName() {
44          return "geo";
45      }
46  
47      @Override
48      protected void resetExtractor() {
49          // Empty.
50      }
51  
52      protected boolean extractEntity(Node node, ExtractionResult out) {
53          if (null == node)
54              return false;
55          //try lat & lon
56          final HTMLDocumentTMLDocument.html#HTMLDocument">HTMLDocument document = new HTMLDocument(node);
57          HTMLDocument.TextField latNode = document.getSingularTextField("latitude" );
58          HTMLDocument.TextField lonNode = document.getSingularTextField("longitude");
59          String lat = latNode.value();
60          String lon = lonNode.value();
61          if ("".equals(lat) || "".equals(lon)) {
62              String[] both = document.getSingularUrlField("geo").value().split(";");
63              if (both.length != 2)
64                  return false;
65              lat = both[0];
66              lon = both[1];
67          }
68          BNode geo = getBlankNodeFor(node);
69          out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
70          conditionallyAddStringProperty(
71                  latNode.source(),
72                  geo, vVCARD.latitude , lat
73          );
74          conditionallyAddStringProperty(
75                  lonNode.source(),
76                  geo, vVCARD.longitude, lon
77          );
78  
79          final TagSoupExtractionResultorg/apache/any23/extractor/TagSoupExtractionResult.html#TagSoupExtractionResult">TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
80          tser.addResourceRoot( document.getPathToLocalRoot(), geo, this.getClass() );
81  
82          return true;
83      }
84      
85  }