This project has retired. For details please refer to its Attic page.
HReviewExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.rdf.RDFUtils;
22  import org.apache.any23.vocab.DCTerms;
23  import org.apache.any23.vocab.Review;
24  import org.apache.any23.vocab.SINDICE;
25  import org.apache.any23.vocab.VCard;
26  import org.junit.Test;
27  import org.eclipse.rdf4j.model.Resource;
28  import org.eclipse.rdf4j.model.Statement;
29  import org.eclipse.rdf4j.model.Value;
30  import org.eclipse.rdf4j.model.vocabulary.RDF;
31  import org.eclipse.rdf4j.repository.RepositoryResult;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  
35  /**
36   * Reference Test class for the {@link HReviewExtractor} extractor.
37   *
38   * @author Davide Palmisano (dpalmisano@gmail.com)
39   */
40  public class HReviewExtractorTest extends AbstractExtractorTestCase {
41  
42      private static final DCTerms vDCTERMS = DCTerms.getInstance();
43      private static final Review vREVIEW = Review.getInstance();
44      private static final SINDICE vSINDICE = SINDICE.getInstance();
45      private static final VCard vVCARD = VCard.getInstance();
46  
47      private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
48  
49      protected ExtractorFactory<?> getExtractorFactory() {
50          return new HReviewExtractorFactory();
51      }
52  
53      @Test
54      public void testNoMicroformats() throws Exception {
55          assertExtract("/html/html-without-uf.html");
56          assertModelEmpty();
57      }
58  
59      @Test
60      public void test01Basic() throws Exception {
61          assertExtract("/microformats/hreview/01-spec.html");
62          assertModelNotEmpty();
63  
64          assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
65  
66          // reviewer, item
67          assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
68  
69          // there is one address in the item vcard
70          assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
71  
72          RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
73  
74          try {
75              while (reviews.hasNext()) {
76  
77                  Resource review = reviews.next().getSubject();
78                  logger.debug(review.stringValue());
79  
80                  assertContains(review, vREVIEW.rating, "5");
81                  assertContains(review, vREVIEW.title, "Crepes on Cole is awesome");
82                  assertContains(review, vDCTERMS.date, "20050418T2300-0700");
83  
84                  assertContains(vREVIEW.text,
85                          "Crepes on Cole is one of the best little \n" + "      creperies in San Francisco.\n      "
86                                  + "Excellent food and service. Plenty of tables in a variety of sizes\n"
87                                  + "      for parties large and small.  " + "Window seating makes for excellent\n      "
88                                  + "people watching to/from the N-Judah which stops right outside.\n"
89                                  + "      I've had many fun social gatherings here, as well as gotten\n"
90                                  + "      plenty of work done thanks to neighborhood WiFi.");
91  
92                  assertContains(null, vREVIEW.hasReview, review);
93  
94              }
95          } finally {
96              reviews.close();
97          }
98  
99          assertNotContains(vVCARD.locality, null);
100         assertNotContains(vVCARD.organization_name, null);
101 
102     }
103 
104     @Test
105     public void test02RatedTags() throws Exception {
106 
107         assertExtract("/microformats/hreview/02-spec-2.html");
108         assertModelNotEmpty();
109 
110         assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
111 
112         // reviewer, item
113         assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
114         assertStatementsSize(vREVIEW.hasReview, (Value) null, 1);
115         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
116 
117         // there is one address in the item vcard
118         assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
119 
120         RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
121 
122         try {
123             while (reviews.hasNext()) {
124                 Resource review = reviews.next().getSubject();
125                 assertContains(review, vREVIEW.rating, "18");
126                 assertContains(review, vREVIEW.title, "Cafe Borrone");
127                 assertContains(review, vDCTERMS.date, "20050428T2130-0700");
128 
129                 assertContains(vREVIEW.text,
130                         "This \n    cafe\n    " + "is a welcoming oasis on " + "the Peninsula.\n    "
131                                 + "It even has a fountain outside which nearly eliminates\n    "
132                                 + "the sounds of El Camino traffic.  " + "Next door to a superb indy bookstore,\n    "
133                                 + "Cafe Borrone is an ideal spot to grab a\n    coffee\n    or "
134                                 + "a meal to accompany a newly purchased book or imported periodical.\n"
135                                 + "    Soups and\n    sandwich\n    specials rotate daily.  "
136                                 + "The corn chowder with croutons and big chunks of cheese\n    "
137                                 + "goes especially well with a freshly toasted mini-baguette.  "
138                                 + "Evenings are\n    often crowded and may require sharing a table "
139                                 + "with a perfect stranger.\n    "
140                                 + "Espresso\n    afficionados will appreciate the\n    Illy coffee.\n    "
141                                 + "Noise levels can vary from peaceful in the late mornings to nearly overwhelming on\n"
142                                 + "    jazz band nights.");
143 
144                 assertContains(null, vREVIEW.hasReview, review);
145                 assertContains(vREVIEW.type, "business");
146 
147             }
148 
149         } finally {
150             reviews.close();
151         }
152 
153     }
154 
155     @Test
156     public void test03NoHcardForItem() throws Exception {
157 
158         assertExtract("/microformats/hreview/03-spec-3.html");
159         assertModelNotEmpty();
160 
161         assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
162         assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
163 
164         RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
165 
166         try {
167 
168             while (reviews.hasNext()) {
169 
170                 Resource review = reviews.next().getSubject();
171 
172                 assertContains(review, vREVIEW.rating, "5");
173                 assertNotContains(vREVIEW.title, null);
174                 assertContains(review, vDCTERMS.date, "200502");
175 
176                 assertContains(vREVIEW.text,
177                         "\"The people thought they were just being rewarded for "
178                                 + "treating others\n       as they like to be treated, for "
179                                 + "obeying stop signs and curing diseases,\n       for mailing "
180                                 + "letters with the address of the sender... Don't wake me,\n "
181                                 + "      I plan on sleeping in...\"\n     \n     \"Nothing Better\""
182                                 + " is a great track on this album, too...");
183 
184                 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
185 
186                 try {
187                     while (reviewSubjects.hasNext()) {
188                         Resource reviewSubject = reviewSubjects.next().getSubject();
189                         assertContains(reviewSubject, vVCARD.fn, "The Postal Service: Give Up");
190                         assertContains(reviewSubject, vVCARD.url,
191                                 RDFUtils.iri("http://www.amazon.com/exec/obidos/ASIN/B000089CJI/"));
192                         assertContains(reviewSubject, vVCARD.photo,
193                                 RDFUtils.iri("http://images.amazon.com/images/P/B000089CJI.01._SCTHUMBZZZ_.jpg"));
194                     }
195                 } finally {
196                     reviewSubjects.close();
197                 }
198 
199             }
200 
201         } finally {
202             reviews.close();
203         }
204 
205     }
206 
207     @Test
208     public void test04NoHcardForItem() throws Exception {
209 
210         assertExtract("/microformats/hreview/04-spec-4.html");
211         assertModelNotEmpty();
212 
213         assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
214         // reviewer, no item
215         assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
216 
217         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
218 
219         RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
220 
221         try {
222 
223             while (reviews.hasNext()) {
224 
225                 Resource review = reviews.next().getSubject();
226 
227                 assertContains(review, vREVIEW.rating, "4");
228                 assertNotContains(vREVIEW.title, null);
229                 assertContains(review, vDCTERMS.date, "20050418");
230 
231                 assertContains(vREVIEW.text, "This movie has great music and visuals.");
232 
233                 assertStatementsSize(vREVIEW.hasReview, review, 1);
234 
235                 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
236 
237                 try {
238                     while (reviewSubjects.hasNext()) {
239                         Resource reviewSubject = reviewSubjects.next().getSubject();
240                         assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)");
241                         assertContains(reviewSubject, vVCARD.url, RDFUtils.iri("http://www.imdb.com/title/tt0299977/"));
242                     }
243 
244                 } finally {
245                     reviewSubjects.close();
246                 }
247 
248             }
249 
250         } finally {
251             reviews.close();
252         }
253 
254     }
255 
256     /**
257      * This test is the same defined in {@link HReviewExtractorTest#test04NoHcardForItem} but assess the behavior in
258      * presence of a <i>Microformat</i> name with a different letter capitalization.
259      *
260      * @throws Exception
261      *             if there is an error asserting the test data.
262      */
263     @Test
264     public void testCaseSensitiveness() throws Exception {
265         assertExtract("/microformats/hreview/05-spec.html");
266         assertModelNotEmpty();
267         assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
268         // reviewer, no item
269         assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
270 
271         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
272 
273         RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
274 
275         try {
276 
277             while (reviews.hasNext()) {
278 
279                 Resource review = reviews.next().getSubject();
280 
281                 assertContains(review, vREVIEW.rating, "4");
282                 assertNotContains(vREVIEW.title, null);
283                 assertContains(review, vDCTERMS.date, "20050418");
284 
285                 assertContains(vREVIEW.text, "This movie has great music and visuals.");
286 
287                 assertStatementsSize(vREVIEW.hasReview, review, 1);
288 
289                 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
290 
291                 try {
292                     while (reviewSubjects.hasNext()) {
293                         Resource reviewSubject = reviewSubjects.next().getSubject();
294                         assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)");
295                         assertContains(reviewSubject, vVCARD.url, RDFUtils.iri("http://www.imdb.com/title/tt0299977/"));
296                     }
297 
298                 } finally {
299                     reviewSubjects.close();
300                 }
301 
302             }
303 
304         } finally {
305             reviews.close();
306         }
307     }
308 
309 }