View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import java.util.List;
21  
22  import org.apache.any23.extractor.ExtractionException;
23  import org.apache.any23.extractor.ExtractionResult;
24  import org.apache.any23.extractor.ExtractorDescription;
25  import org.apache.any23.extractor.TagSoupExtractionResult;
26  import org.apache.any23.extractor.html.HTMLDocument.TextField;
27  import org.apache.any23.vocab.Review;
28  import org.apache.any23.vocab.ReviewAggregate;
29  import org.apache.any23.vocab.VCard;
30  import org.eclipse.rdf4j.model.BNode;
31  import org.eclipse.rdf4j.model.Resource;
32  import org.eclipse.rdf4j.model.vocabulary.RDF;
33  import org.w3c.dom.Node;
34  
35  /**
36   * Extractor for the <a
37   * href="http://microformats.org/wiki/hreview-aggregate">hReview-aggregate</a>
38   * microformat.
39   * 
40   * @author Sebastien Richard
41   */
42  public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
43      private static final Review vREVIEW = Review.getInstance();
44      private static final ReviewAggregate../org/apache/any23/vocab/ReviewAggregate.html#ReviewAggregate">ReviewAggregate vREVIEWAGG = ReviewAggregate
45              .getInstance();
46      private static final VCard vVCARD = VCard.getInstance();
47  
48      @Override
49      public ExtractorDescription getDescription() {
50          return HReviewAggregateExtractorFactory.getDescriptionInstance();
51      }
52  
53      @Override
54      protected String getBaseClassName() {
55          return "hreview-aggregate";
56      }
57  
58      @Override
59      protected void resetExtractor() {
60          // Empty.
61      }
62  
63      @Override
64      protected boolean extractEntity(Node node, ExtractionResult out)
65              throws ExtractionException {
66          BNode rev = getBlankNodeFor(node);
67          out.writeTriple(rev, RDF.TYPE, vREVIEWAGG.ReviewAggregate);
68          final HTMLDocumentTMLDocument.html#HTMLDocument">HTMLDocument fragment = new HTMLDocument(node);
69          addRating(fragment, rev);
70          addWorst(fragment, rev);
71          addBest(fragment, rev);
72          addAverage(fragment, rev);
73          addSummary(fragment, rev);
74          addType(fragment, rev);
75          addItem(fragment, rev);
76          addCount(fragment, rev);
77          addVotes(fragment, rev);
78  
79          final TagSoupExtractionResultorg/apache/any23/extractor/TagSoupExtractionResult.html#TagSoupExtractionResult">TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
80          tser.addResourceRoot(DomUtils.getXPathListForNode(node), rev,
81                  this.getClass());
82  
83          return true;
84      }
85  
86      private void addType(HTMLDocument doc, Resource rev) {
87          TextField value = doc.getSingularTextField("type");
88          conditionallyAddStringProperty(value.source(), rev, vREVIEW.type,
89                  value.value());
90      }
91  
92      private void addItem(HTMLDocument root, BNode rev)
93              throws ExtractionException {
94          List<Node> nodes = root.findAllByClassName("item");
95          for (Node node : nodes) {
96              Resource item = findDummy(new HTMLDocument(node));
97              addBNodeProperty(node, item, vREVIEW.hasReview, rev);
98          }
99      }
100 
101     private Resource findDummy(HTMLDocument item) throws ExtractionException {
102         Resource blank = getBlankNodeFor(item.getDocument());
103         TextField val = item.getSingularTextField("fn");
104         conditionallyAddStringProperty(val.source(), blank, vVCARD.fn,
105                 val.value());
106         final TextField url = item.getSingularUrlField("url");
107         conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument()
108                 .resolveIRI(url.value()));
109         TextField pics[] = item.getPluralUrlField("photo");
110         for (TextField pic : pics) {
111             addIRIProperty(blank, vVCARD.photo,
112                     getHTMLDocument().resolveIRI(pic.value()));
113         }
114         return blank;
115     }
116 
117     private void addRating(HTMLDocument doc, Resource rev) {
118         HTMLDocument.TextField value = doc.getSingularTextField("rating");
119         conditionallyAddStringProperty(value.source(), rev, vREVIEW.rating,
120                 value.value());
121     }
122 
123     private void addWorst(HTMLDocument doc, Resource rev) {
124         HTMLDocument.TextField value = doc.getSingularTextField("worst");
125         conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.worst,
126                 value.value());
127     }
128 
129     private void addBest(HTMLDocument doc, Resource rev) {
130         HTMLDocument.TextField value = doc.getSingularTextField("best");
131         conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.best,
132                 value.value());
133     }
134 
135     private void addAverage(HTMLDocument doc, Resource rev) {
136         HTMLDocument.TextField value = doc.getSingularTextField("average");
137         conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.average,
138                 value.value());
139     }
140 
141     private void addCount(HTMLDocument doc, Resource rev) {
142         HTMLDocument.TextField value = doc.getSingularTextField("count");
143         conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.count,
144                 value.value());
145     }
146 
147     private void addVotes(HTMLDocument doc, Resource rev) {
148         HTMLDocument.TextField value = doc.getSingularTextField("votes");
149         conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.votes,
150                 value.value());
151     }
152 
153     private void addSummary(HTMLDocument doc, Resource rev) {
154         TextField value = doc.getSingularTextField("summary");
155         conditionallyAddStringProperty(value.source(), rev, vREVIEW.title,
156                 value.value());
157     }
158 }