This project has retired. For details please refer to its
Attic page.
HListingExtractorTest xref
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.apache.any23.rdf.RDFUtils;
22 import org.apache.any23.vocab.FOAF;
23 import org.apache.any23.vocab.HListing;
24 import org.apache.any23.vocab.SINDICE;
25 import org.junit.Test;
26 import org.eclipse.rdf4j.model.Resource;
27 import org.eclipse.rdf4j.model.vocabulary.RDF;
28 import org.slf4j.Logger;
29 import org.slf4j.LoggerFactory;
30
31
32
33
34
35
36
37
38 public class HListingExtractorTest extends AbstractExtractorTestCase {
39
40 private static final SINDICE vSINDICE = SINDICE.getInstance();
41 private static final HListing vHLISTING = HListing.getInstance();
42 private static final FOAF vFOAF = FOAF.getInstance();
43
44 private static final Logger logger = LoggerFactory.getLogger(HListingExtractorTest.class);
45
46 protected ExtractorFactory<?> getExtractorFactory() {
47 return new HListingExtractorFactory();
48 }
49
50 @Test
51 public void testNoMicroformats() throws Exception {
52 assertExtract("/html/html-without-uf.html");
53 assertModelEmpty();
54 }
55
56 @Test
57 public void testListingWithouthContent() throws Exception {
58 assertExtract("/microformats/hlisting/empty.html");
59 assertModelNotEmpty();
60 assertStatementsSize(null, null, null, 3);
61 }
62
63 @Test
64 public void testSingleAction() throws Exception {
65 assertExtract("/microformats/hlisting/single-action.html");
66 assertModelNotEmpty();
67 assertContains(vHLISTING.action, vHLISTING.offer);
68 }
69
70 @Test
71 public void testMultipleActions() throws Exception {
72 assertExtract("/microformats/hlisting/multiple-actions.html");
73 assertModelNotEmpty();
74 assertContains(vHLISTING.action, vHLISTING.offer);
75 assertContains(vHLISTING.action, vHLISTING.sell);
76 }
77
78 @Test
79 public void testMultipleActionsNested() throws Exception {
80 assertExtract("/microformats/hlisting/multiple-actions-nested.html");
81 assertModelNotEmpty();
82 assertContains(vHLISTING.action, vHLISTING.offer);
83 assertContains(vHLISTING.action, vHLISTING.sell);
84 assertContains(vHLISTING.action, vHLISTING.rent);
85 }
86
87 @Test
88 public void testActionsOutside() throws Exception {
89 assertExtract("/microformats/hlisting/single-action-outside.html");
90 assertModelNotEmpty();
91 assertNotContains(vHLISTING.action, vHLISTING.offer);
92 }
93
94 @Test
95 public void testListerFn() throws Exception {
96 assertExtract("/microformats/hlisting/actions-lister-fn.html");
97 assertModelNotEmpty();
98 assertContains(vHLISTING.action, vHLISTING.offer);
99 assertContains(RDF.TYPE, vHLISTING.Lister);
100 assertContains(vHLISTING.listerName, "mike");
101 }
102
103 @Test
104 public void testListerFnTel() throws Exception {
105 assertExtract("/microformats/hlisting/actions-lister-fn-tel.html");
106 assertModelNotEmpty();
107
108 assertContains(vHLISTING.action, vHLISTING.offer);
109 assertContains(vHLISTING.listerName, "John Broker");
110 assertContains(RDF.TYPE, vHLISTING.Lister);
111 assertContains(vHLISTING.tel, "(110) 555-1212");
112 }
113
114 @Test
115 public void testItemFn() throws Exception {
116 assertExtract("/microformats/hlisting/item-fn.html");
117 assertModelNotEmpty();
118 assertContains(RDF.TYPE, vHLISTING.Item);
119 assertContains(vHLISTING.itemName, "Parking space");
120 }
121
122 @Test
123 public void testItemFnUrl() throws Exception {
124 assertExtract("/microformats/hlisting/item-fn-url.html");
125 assertModelNotEmpty();
126 assertContains(RDF.TYPE, vHLISTING.Item);
127 assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://item.com/"));
128 assertContains(vHLISTING.itemName, "Parking space");
129 }
130
131 @Test
132 public void testItemPhotoImg() throws Exception {
133 assertExtract("/microformats/hlisting/item-fn-url-photo-img.html");
134 assertModelNotEmpty();
135 assertContains(RDF.TYPE, vHLISTING.Item);
136 assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://item.com/"));
137 assertContains(vHLISTING.itemName, "Parking space");
138 assertContains(vHLISTING.itemPhoto, RDFUtils.iri(baseIRI.stringValue() + "photo.jpg"));
139 }
140
141 @Test
142 public void testItemPhotoHref() throws Exception {
143 assertExtract("/microformats/hlisting/item-fn-photo-href.html");
144 assertModelNotEmpty();
145 assertContains(RDF.TYPE, vHLISTING.Item);
146 assertContains(vHLISTING.itemName, "Parking space");
147 assertContains(vHLISTING.itemPhoto, RDFUtils.iri(baseIRI.stringValue() + "pic.jpg"));
148 }
149
150 @Test
151 public void testKelkoo() throws Exception {
152 assertExtract("/microformats/hlisting/kelkoo.html");
153 assertModelNotEmpty();
154
155 assertContains(RDF.TYPE, vHLISTING.Listing);
156 assertContains(RDF.TYPE, vHLISTING.Item);
157 assertContains(vHLISTING.action, vHLISTING.offer);
158 assertContains(vHLISTING.itemName, "Benq MP622 - DLP Projector - 2700 ANSI lumens - XGA...");
159
160 assertContains(vHLISTING.description, (Resource) null);
161
162 assertContains(RDF.TYPE, vHLISTING.Lister);
163
164 assertContains(vHLISTING.listerUrl, RDFUtils.iri(baseIRI.stringValue() + "m-4621623-pc-world-business.html"));
165 assertContains(vHLISTING.listerOrg, "PC World Business");
166
167 assertContains(vHLISTING.listerLogo,
168 RDFUtils.iri(baseIRI.stringValue() + "data/merchantlogos/4621623/pcworld.gif"));
169
170 assertContains(vHLISTING.listerName, "PC World Business");
171
172 assertContains(vHLISTING.itemPhoto,
173 RDFUtils.iri("http://img.kelkoo.com/uk/medium/675/496/00117250662929509422269096808645163496675.jpg"));
174
175 assertContains(vHLISTING.price, "\u00A3480.17");
176 }
177
178 @Test
179 public void testKelkooFull() throws Exception {
180 assertExtract("/microformats/hlisting/kelkoo-full.html");
181 assertModelNotEmpty();
182 assertContains(RDF.TYPE, vHLISTING.Listing);
183 assertContains(RDF.TYPE, vHLISTING.Item);
184 assertContains(vHLISTING.action, vHLISTING.offer);
185 assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://bob.example.com/"));
186 assertContains(RDF.TYPE, vHLISTING.Lister);
187
188 assertContains(vHLISTING.itemName, "Hanro Touch Feeling Shape Bodysuit Underwear");
189 assertContains(vHLISTING.itemName, "Spanx Slim Cognito - Shaping Mid-Thigh Bodysuit");
190 assertContains(vHLISTING.itemName, "Spanx Spanx Slim Cognito High Leg Shaping...");
191
192 assertContains(vHLISTING.itemPhoto,
193 RDFUtils.iri("http://img.kelkoo.com/uk/medium/657/449/00162475823966154731749844283942320449657.jpg"));
194 assertContains(vHLISTING.itemPhoto,
195 RDFUtils.iri("http://img.kelkoo.com/uk/medium/545/091/00154244199719224091151116421737036091545.jpg"));
196 assertContains(vHLISTING.itemPhoto,
197 RDFUtils.iri("http://img.kelkoo.com/uk/medium/018/426/00156227992563192632349212375692442426018.jpg"));
198
199 assertContains(vHLISTING.listerLogo,
200 RDFUtils.iri("http://bob.example.com/data/merchantlogos/6957423/socksfox.gif"));
201 assertContains(vHLISTING.listerLogo,
202 RDFUtils.iri("http://bob.example.com/data/merchantlogos/3590723/mytightsnew.gif"));
203 assertContains(vHLISTING.listerLogo,
204 RDFUtils.iri("http://bob.example.com/data/merchantlogos/2977501/pleaseonlinelogo88x311.gif"));
205
206 assertContains(vHLISTING.listerName, "Socks Fox");
207 assertContains(vHLISTING.listerName, "My Tights");
208 assertContains(vHLISTING.listerName, "Tightsplease");
209
210 assertContains(vHLISTING.listerOrg, "Socks Fox");
211 assertContains(vHLISTING.listerOrg, "My Tights");
212 assertContains(vHLISTING.listerName, "Tightsplease");
213
214 assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-6957423-socks-fox.html"));
215 assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-3590723-my-tights.html"));
216 assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-2977501-tightsplease.html"));
217
218 assertContains(vHLISTING.price, "\u00A380");
219 assertContains(vHLISTING.price, "\u00A347.95");
220 assertContains(vHLISTING.price, "\u00A354.99");
221 }
222
223 @Test
224 public void testListerURL() throws Exception {
225 assertExtract("/microformats/hlisting/actions-lister-url.html");
226 assertModelNotEmpty();
227 assertContains(vHLISTING.action, vHLISTING.offer);
228 assertContains(vHLISTING.listerName, "John Broker");
229 assertContains(RDF.TYPE, vHLISTING.Lister);
230 assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://homepage.com"));
231 }
232
233 @Test
234 public void testListerEmail() throws Exception {
235 assertExtract("/microformats/hlisting/actions-lister-email.html");
236 assertModelNotEmpty();
237 assertContains(vHLISTING.action, vHLISTING.offer);
238 assertContains(vHLISTING.listerName, "John Broker");
239 assertContains(RDF.TYPE, vHLISTING.Lister);
240 assertContains(vFOAF.mbox, RDFUtils.iri("mailto:info@commerce.net"));
241 }
242
243 @Test
244 public void testListerEmailHref() throws Exception {
245 assertExtract("/microformats/hlisting/actions-lister-email-href.html");
246 assertModelNotEmpty();
247 assertContains(vHLISTING.action, vHLISTING.offer);
248 assertContains(RDF.TYPE, vHLISTING.Lister);
249 assertContains(vHLISTING.listerName, "John Broker");
250 assertContains(vFOAF.mbox, RDFUtils.iri("mailto:info@commerce.net"));
251 }
252
253 @Test
254 public void testDtListed() throws Exception {
255 assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
256 assertModelNotEmpty();
257 assertNotContains(vHLISTING.action, vHLISTING.offer);
258 assertContains(vHLISTING.dtlisted, "2006-02-02");
259 }
260
261 @Test
262 public void testDtExpired() throws Exception {
263 assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
264 assertModelNotEmpty();
265 assertNotContains(vHLISTING.action, vHLISTING.offer);
266 assertContains(vHLISTING.dtexpired, "2006-04-01");
267 }
268
269 @Test
270 public void testSummary() throws Exception {
271 assertExtract("/microformats/hlisting/summary.html");
272 assertModelNotEmpty();
273 assertContains(vHLISTING.summary, "summary stuff");
274 }
275
276 @Test
277 public void testDtListedAndExpired() throws Exception {
278 assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
279 assertModelNotEmpty();
280 assertNotContains(vHLISTING.action, vHLISTING.offer);
281 assertContains(vHLISTING.dtlisted, "2006-02-02");
282 assertContains(vHLISTING.dtexpired, "2006-04-01");
283 }
284
285 @Test
286 public void testPrice() throws Exception {
287 assertExtract("/microformats/hlisting/price.html");
288 assertModelNotEmpty();
289 assertContains(vHLISTING.price, "$215/qtr");
290 }
291
292 @Test
293 public void testPriceAndDt() throws Exception {
294 assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
295 assertModelNotEmpty();
296 assertContains(vHLISTING.price, "$215/qtr");
297 assertContains(vHLISTING.dtlisted, "2006-02-02");
298 assertContains(vHLISTING.dtexpired, "2006-04-01");
299 }
300
301 @Test
302 public void testPermalink() throws Exception {
303 assertExtract("/microformats/hlisting/summary-bookmark.html");
304 assertModelNotEmpty();
305 assertContains(vHLISTING.permalink, "http://livre.com/book");
306 assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://livre.com/author"));
307 }
308
309 @Test
310 public void testComplexDescription() throws Exception {
311 assertExtract("/microformats/hlisting/description-complex.html");
312 assertModelNotEmpty();
313 assertContains(vHLISTING.description,
314 "BenQ today introduced two new additions to its renowned bus... + Show details");
315 }
316
317 @Test
318 public void testDescription() throws Exception {
319 assertExtract("/microformats/hlisting/description.html");
320 assertModelNotEmpty();
321 assertContains(vHLISTING.description, "bla bla bla");
322 }
323
324 }