View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html.microformats2;
19  
20  import org.apache.any23.extractor.ExtractionResult;
21  import org.apache.any23.extractor.ExtractorDescription;
22  import org.apache.any23.extractor.TagSoupExtractionResult;
23  import org.apache.any23.vocab.VCard;
24  import org.eclipse.rdf4j.model.BNode;
25  import org.eclipse.rdf4j.model.vocabulary.RDF;
26  import org.w3c.dom.Node;
27  import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
28  import org.apache.any23.extractor.html.HTMLDocument;
29  
30  import java.util.ArrayList;
31  
32  /**
33   * Extractor for the <a href="http://microformats.org/wiki/h-geo">h-geo</a>
34   * microformat.
35   *
36   * @author Nisala Nirmana
37   */
38  public class HGeoExtractor extends EntityBasedMicroformatExtractor {
39  
40      private static final VCard vVCARD = VCard.getInstance();
41  
42      private static final String[] geoFields = {
43              "latitude",
44              "longitude",
45              "altitude"
46      };
47  
48      @Override
49      public ExtractorDescription getDescription() {
50          return HGeoExtractorFactory.getDescriptionInstance();
51      }
52  
53      protected String getBaseClassName() {
54          return Microformats2Prefixes.CLASS_PREFIX+"geo";
55      }
56  
57      @Override
58      protected void resetExtractor() {
59          // Empty.
60      }
61  
62      protected boolean extractEntity(Node node, ExtractionResult out) {
63          if (null == node) return false;
64          final HTMLDocumentl/HTMLDocument.html#HTMLDocument">HTMLDocument document = new HTMLDocument(node);
65          BNode geo = getBlankNodeFor(node);
66          out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
67          final String extractorName = getDescription().getExtractorName();
68          ArrayList<HTMLDocument.TextField> geoNodes = new ArrayList<HTMLDocument.TextField>();
69          for(String field : geoFields){
70              geoNodes.add(document.getSingularTextField(Microformats2Prefixes.PROPERTY_PREFIX+field));
71          }
72          if(geoNodes.get(0).source()==null){
73              String[] composed = document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX +"geo")
74                                          .value().split(";");
75              for(int counter=0;counter<composed.length;counter++){
76                  conditionallyAddStringProperty(
77                          document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX+"geo").source(),
78                          geo, vVCARD.getProperty(geoFields[counter]), composed[counter]
79                  );
80              }
81          }else{
82              for(int counter=0;counter<geoNodes.size();counter++){
83                  conditionallyAddStringProperty(
84                          geoNodes.get(counter).source(),
85                          geo, vVCARD.getProperty(geoFields[counter]) , geoNodes.get(counter).value()
86                  );
87              }
88          }
89          final TagSoupExtractionResult../org/apache/any23/extractor/TagSoupExtractionResult.html#TagSoupExtractionResult">TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
90          tser.addResourceRoot( document.getPathToLocalRoot(), geo, this.getClass() );
91          return true;
92      }
93      
94  }