This project has retired. For details please refer to its
Attic page.
HReviewExtractorTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.apache.any23.rdf.RDFUtils;
22 import org.apache.any23.vocab.DCTerms;
23 import org.apache.any23.vocab.Review;
24 import org.apache.any23.vocab.SINDICE;
25 import org.apache.any23.vocab.VCard;
26 import org.junit.Test;
27 import org.eclipse.rdf4j.model.Resource;
28 import org.eclipse.rdf4j.model.Statement;
29 import org.eclipse.rdf4j.model.Value;
30 import org.eclipse.rdf4j.model.vocabulary.RDF;
31 import org.eclipse.rdf4j.repository.RepositoryResult;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35
36
37
38
39
40 public class HReviewExtractorTest extends AbstractExtractorTestCase {
41
42 private static final DCTerms vDCTERMS = DCTerms.getInstance();
43 private static final Review vREVIEW = Review.getInstance();
44 private static final SINDICE vSINDICE = SINDICE.getInstance();
45 private static final VCard vVCARD = VCard.getInstance();
46
47 private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
48
49 protected ExtractorFactory<?> getExtractorFactory() {
50 return new HReviewExtractorFactory();
51 }
52
53 @Test
54 public void testNoMicroformats() throws Exception {
55 assertExtract("/html/html-without-uf.html");
56 assertModelEmpty();
57 }
58
59 @Test
60 public void test01Basic() throws Exception {
61 assertExtract("/microformats/hreview/01-spec.html");
62 assertModelNotEmpty();
63
64 assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
65
66
67 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
68
69
70 assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
71
72 RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
73
74 try {
75 while (reviews.hasNext()) {
76
77 Resource review = reviews.next().getSubject();
78 logger.debug(review.stringValue());
79
80 assertContains(review, vREVIEW.rating, "5");
81 assertContains(review, vREVIEW.title, "Crepes on Cole is awesome");
82 assertContains(review, vDCTERMS.date, "20050418T2300-0700");
83
84 assertContains(vREVIEW.text,
85 "Crepes on Cole is one of the best little \n" + " creperies in San Francisco.\n "
86 + "Excellent food and service. Plenty of tables in a variety of sizes\n"
87 + " for parties large and small. " + "Window seating makes for excellent\n "
88 + "people watching to/from the N-Judah which stops right outside.\n"
89 + " I've had many fun social gatherings here, as well as gotten\n"
90 + " plenty of work done thanks to neighborhood WiFi.");
91
92 assertContains(null, vREVIEW.hasReview, review);
93
94 }
95 } finally {
96 reviews.close();
97 }
98
99 assertNotContains(vVCARD.locality, null);
100 assertNotContains(vVCARD.organization_name, null);
101
102 }
103
104 @Test
105 public void test02RatedTags() throws Exception {
106
107 assertExtract("/microformats/hreview/02-spec-2.html");
108 assertModelNotEmpty();
109
110 assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
111
112
113 assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
114 assertStatementsSize(vREVIEW.hasReview, (Value) null, 1);
115 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
116
117
118 assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
119
120 RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
121
122 try {
123 while (reviews.hasNext()) {
124 Resource review = reviews.next().getSubject();
125 assertContains(review, vREVIEW.rating, "18");
126 assertContains(review, vREVIEW.title, "Cafe Borrone");
127 assertContains(review, vDCTERMS.date, "20050428T2130-0700");
128
129 assertContains(vREVIEW.text,
130 "This \n cafe\n " + "is a welcoming oasis on " + "the Peninsula.\n "
131 + "It even has a fountain outside which nearly eliminates\n "
132 + "the sounds of El Camino traffic. " + "Next door to a superb indy bookstore,\n "
133 + "Cafe Borrone is an ideal spot to grab a\n coffee\n or "
134 + "a meal to accompany a newly purchased book or imported periodical.\n"
135 + " Soups and\n sandwich\n specials rotate daily. "
136 + "The corn chowder with croutons and big chunks of cheese\n "
137 + "goes especially well with a freshly toasted mini-baguette. "
138 + "Evenings are\n often crowded and may require sharing a table "
139 + "with a perfect stranger.\n "
140 + "Espresso\n afficionados will appreciate the\n Illy coffee.\n "
141 + "Noise levels can vary from peaceful in the late mornings to nearly overwhelming on\n"
142 + " jazz band nights.");
143
144 assertContains(null, vREVIEW.hasReview, review);
145 assertContains(vREVIEW.type, "business");
146
147 }
148
149 } finally {
150 reviews.close();
151 }
152
153 }
154
155 @Test
156 public void test03NoHcardForItem() throws Exception {
157
158 assertExtract("/microformats/hreview/03-spec-3.html");
159 assertModelNotEmpty();
160
161 assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
162 assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
163
164 RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
165
166 try {
167
168 while (reviews.hasNext()) {
169
170 Resource review = reviews.next().getSubject();
171
172 assertContains(review, vREVIEW.rating, "5");
173 assertNotContains(vREVIEW.title, null);
174 assertContains(review, vDCTERMS.date, "200502");
175
176 assertContains(vREVIEW.text,
177 "\"The people thought they were just being rewarded for "
178 + "treating others\n as they like to be treated, for "
179 + "obeying stop signs and curing diseases,\n for mailing "
180 + "letters with the address of the sender... Don't wake me,\n "
181 + " I plan on sleeping in...\"\n \n \"Nothing Better\""
182 + " is a great track on this album, too...");
183
184 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
185
186 try {
187 while (reviewSubjects.hasNext()) {
188 Resource reviewSubject = reviewSubjects.next().getSubject();
189 assertContains(reviewSubject, vVCARD.fn, "The Postal Service: Give Up");
190 assertContains(reviewSubject, vVCARD.url,
191 RDFUtils.iri("http://www.amazon.com/exec/obidos/ASIN/B000089CJI/"));
192 assertContains(reviewSubject, vVCARD.photo,
193 RDFUtils.iri("http://images.amazon.com/images/P/B000089CJI.01._SCTHUMBZZZ_.jpg"));
194 }
195 } finally {
196 reviewSubjects.close();
197 }
198
199 }
200
201 } finally {
202 reviews.close();
203 }
204
205 }
206
207 @Test
208 public void test04NoHcardForItem() throws Exception {
209
210 assertExtract("/microformats/hreview/04-spec-4.html");
211 assertModelNotEmpty();
212
213 assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
214
215 assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
216
217 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
218
219 RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
220
221 try {
222
223 while (reviews.hasNext()) {
224
225 Resource review = reviews.next().getSubject();
226
227 assertContains(review, vREVIEW.rating, "4");
228 assertNotContains(vREVIEW.title, null);
229 assertContains(review, vDCTERMS.date, "20050418");
230
231 assertContains(vREVIEW.text, "This movie has great music and visuals.");
232
233 assertStatementsSize(vREVIEW.hasReview, review, 1);
234
235 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
236
237 try {
238 while (reviewSubjects.hasNext()) {
239 Resource reviewSubject = reviewSubjects.next().getSubject();
240 assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)");
241 assertContains(reviewSubject, vVCARD.url, RDFUtils.iri("http://www.imdb.com/title/tt0299977/"));
242 }
243
244 } finally {
245 reviewSubjects.close();
246 }
247
248 }
249
250 } finally {
251 reviews.close();
252 }
253
254 }
255
256
257
258
259
260
261
262
263 @Test
264 public void testCaseSensitiveness() throws Exception {
265 assertExtract("/microformats/hreview/05-spec.html");
266 assertModelNotEmpty();
267 assertStatementsSize(RDF.TYPE, vREVIEW.Review, 1);
268
269 assertStatementsSize(vREVIEW.reviewer, (Value) null, 1);
270
271 assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
272
273 RepositoryResult<Statement> reviews = getStatements(null, RDF.TYPE, vREVIEW.Review);
274
275 try {
276
277 while (reviews.hasNext()) {
278
279 Resource review = reviews.next().getSubject();
280
281 assertContains(review, vREVIEW.rating, "4");
282 assertNotContains(vREVIEW.title, null);
283 assertContains(review, vDCTERMS.date, "20050418");
284
285 assertContains(vREVIEW.text, "This movie has great music and visuals.");
286
287 assertStatementsSize(vREVIEW.hasReview, review, 1);
288
289 RepositoryResult<Statement> reviewSubjects = getStatements(null, vREVIEW.hasReview, review);
290
291 try {
292 while (reviewSubjects.hasNext()) {
293 Resource reviewSubject = reviewSubjects.next().getSubject();
294 assertContains(reviewSubject, vVCARD.fn, "Ying Xiong (HERO)");
295 assertContains(reviewSubject, vVCARD.url, RDFUtils.iri("http://www.imdb.com/title/tt0299977/"));
296 }
297
298 } finally {
299 reviewSubjects.close();
300 }
301
302 }
303
304 } finally {
305 reviews.close();
306 }
307 }
308
309 }