This project has retired. For details please refer to its Attic page.
HListingExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.rdf.RDFUtils;
22  import org.apache.any23.vocab.FOAF;
23  import org.apache.any23.vocab.HListing;
24  import org.apache.any23.vocab.SINDICE;
25  import org.junit.Test;
26  import org.eclipse.rdf4j.model.Resource;
27  import org.eclipse.rdf4j.model.vocabulary.RDF;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  
31  /**
32   *
33   * Reference Test class for the {@link HListingExtractor} extractor.
34   *
35   * @author Davide Palmisano (dpalmisano@gmail.com)
36   *
37   */
38  public class HListingExtractorTest extends AbstractExtractorTestCase {
39  
40      private static final SINDICE vSINDICE = SINDICE.getInstance();
41      private static final HListing vHLISTING = HListing.getInstance();
42      private static final FOAF vFOAF = FOAF.getInstance();
43  
44      private static final Logger logger = LoggerFactory.getLogger(HListingExtractorTest.class);
45  
46      protected ExtractorFactory<?> getExtractorFactory() {
47          return new HListingExtractorFactory();
48      }
49  
50      @Test
51      public void testNoMicroformats() throws Exception {
52          assertExtract("/html/html-without-uf.html");
53          assertModelEmpty();
54      }
55  
56      @Test
57      public void testListingWithouthContent() throws Exception {
58          assertExtract("/microformats/hlisting/empty.html");
59          assertModelNotEmpty();
60          assertStatementsSize(null, null, null, 3);
61      }
62  
63      @Test
64      public void testSingleAction() throws Exception {
65          assertExtract("/microformats/hlisting/single-action.html");
66          assertModelNotEmpty();
67          assertContains(vHLISTING.action, vHLISTING.offer);
68      }
69  
70      @Test
71      public void testMultipleActions() throws Exception {
72          assertExtract("/microformats/hlisting/multiple-actions.html");
73          assertModelNotEmpty();
74          assertContains(vHLISTING.action, vHLISTING.offer);
75          assertContains(vHLISTING.action, vHLISTING.sell);
76      }
77  
78      @Test
79      public void testMultipleActionsNested() throws Exception {
80          assertExtract("/microformats/hlisting/multiple-actions-nested.html");
81          assertModelNotEmpty();
82          assertContains(vHLISTING.action, vHLISTING.offer);
83          assertContains(vHLISTING.action, vHLISTING.sell);
84          assertContains(vHLISTING.action, vHLISTING.rent);
85      }
86  
87      @Test
88      public void testActionsOutside() throws Exception {
89          assertExtract("/microformats/hlisting/single-action-outside.html");
90          assertModelNotEmpty();
91          assertNotContains(vHLISTING.action, vHLISTING.offer);
92      }
93  
94      @Test
95      public void testListerFn() throws Exception {
96          assertExtract("/microformats/hlisting/actions-lister-fn.html");
97          assertModelNotEmpty();
98          assertContains(vHLISTING.action, vHLISTING.offer);
99          assertContains(RDF.TYPE, vHLISTING.Lister);
100         assertContains(vHLISTING.listerName, "mike");
101     }
102 
103     @Test
104     public void testListerFnTel() throws Exception {
105         assertExtract("/microformats/hlisting/actions-lister-fn-tel.html");
106         assertModelNotEmpty();
107 
108         assertContains(vHLISTING.action, vHLISTING.offer);
109         assertContains(vHLISTING.listerName, "John Broker");
110         assertContains(RDF.TYPE, vHLISTING.Lister);
111         assertContains(vHLISTING.tel, "(110) 555-1212");
112     }
113 
114     @Test
115     public void testItemFn() throws Exception {
116         assertExtract("/microformats/hlisting/item-fn.html");
117         assertModelNotEmpty();
118         assertContains(RDF.TYPE, vHLISTING.Item);
119         assertContains(vHLISTING.itemName, "Parking space");
120     }
121 
122     @Test
123     public void testItemFnUrl() throws Exception {
124         assertExtract("/microformats/hlisting/item-fn-url.html");
125         assertModelNotEmpty();
126         assertContains(RDF.TYPE, vHLISTING.Item);
127         assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://item.com/"));
128         assertContains(vHLISTING.itemName, "Parking space");
129     }
130 
131     @Test
132     public void testItemPhotoImg() throws Exception {
133         assertExtract("/microformats/hlisting/item-fn-url-photo-img.html");
134         assertModelNotEmpty();
135         assertContains(RDF.TYPE, vHLISTING.Item);
136         assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://item.com/"));
137         assertContains(vHLISTING.itemName, "Parking space");
138         assertContains(vHLISTING.itemPhoto, RDFUtils.iri(baseIRI.stringValue() + "photo.jpg"));
139     }
140 
141     @Test
142     public void testItemPhotoHref() throws Exception {
143         assertExtract("/microformats/hlisting/item-fn-photo-href.html");
144         assertModelNotEmpty();
145         assertContains(RDF.TYPE, vHLISTING.Item);
146         assertContains(vHLISTING.itemName, "Parking space");
147         assertContains(vHLISTING.itemPhoto, RDFUtils.iri(baseIRI.stringValue() + "pic.jpg"));
148     }
149 
150     @Test
151     public void testKelkoo() throws Exception {
152         assertExtract("/microformats/hlisting/kelkoo.html");
153         assertModelNotEmpty();
154 
155         assertContains(RDF.TYPE, vHLISTING.Listing);
156         assertContains(RDF.TYPE, vHLISTING.Item);
157         assertContains(vHLISTING.action, vHLISTING.offer);
158         assertContains(vHLISTING.itemName, "Benq MP622 - DLP Projector - 2700 ANSI lumens - XGA...");
159 
160         assertContains(vHLISTING.description, (Resource) null);
161 
162         assertContains(RDF.TYPE, vHLISTING.Lister);
163 
164         assertContains(vHLISTING.listerUrl, RDFUtils.iri(baseIRI.stringValue() + "m-4621623-pc-world-business.html"));
165         assertContains(vHLISTING.listerOrg, "PC World Business");
166 
167         assertContains(vHLISTING.listerLogo,
168                 RDFUtils.iri(baseIRI.stringValue() + "data/merchantlogos/4621623/pcworld.gif"));
169 
170         assertContains(vHLISTING.listerName, "PC World Business");
171 
172         assertContains(vHLISTING.itemPhoto,
173                 RDFUtils.iri("http://img.kelkoo.com/uk/medium/675/496/00117250662929509422269096808645163496675.jpg"));
174 
175         assertContains(vHLISTING.price, "\u00A3480.17");
176     }
177 
178     @Test
179     public void testKelkooFull() throws Exception {
180         assertExtract("/microformats/hlisting/kelkoo-full.html");
181         assertModelNotEmpty();
182         assertContains(RDF.TYPE, vHLISTING.Listing);
183         assertContains(RDF.TYPE, vHLISTING.Item);
184         assertContains(vHLISTING.action, vHLISTING.offer);
185         assertContains(vHLISTING.itemUrl, RDFUtils.iri("http://bob.example.com/"));
186         assertContains(RDF.TYPE, vHLISTING.Lister);
187 
188         assertContains(vHLISTING.itemName, "Hanro Touch Feeling Shape Bodysuit Underwear");
189         assertContains(vHLISTING.itemName, "Spanx Slim Cognito - Shaping Mid-Thigh Bodysuit");
190         assertContains(vHLISTING.itemName, "Spanx Spanx Slim Cognito High Leg Shaping...");
191 
192         assertContains(vHLISTING.itemPhoto,
193                 RDFUtils.iri("http://img.kelkoo.com/uk/medium/657/449/00162475823966154731749844283942320449657.jpg"));
194         assertContains(vHLISTING.itemPhoto,
195                 RDFUtils.iri("http://img.kelkoo.com/uk/medium/545/091/00154244199719224091151116421737036091545.jpg"));
196         assertContains(vHLISTING.itemPhoto,
197                 RDFUtils.iri("http://img.kelkoo.com/uk/medium/018/426/00156227992563192632349212375692442426018.jpg"));
198 
199         assertContains(vHLISTING.listerLogo,
200                 RDFUtils.iri("http://bob.example.com/data/merchantlogos/6957423/socksfox.gif"));
201         assertContains(vHLISTING.listerLogo,
202                 RDFUtils.iri("http://bob.example.com/data/merchantlogos/3590723/mytightsnew.gif"));
203         assertContains(vHLISTING.listerLogo,
204                 RDFUtils.iri("http://bob.example.com/data/merchantlogos/2977501/pleaseonlinelogo88x311.gif"));
205 
206         assertContains(vHLISTING.listerName, "Socks Fox");
207         assertContains(vHLISTING.listerName, "My Tights");
208         assertContains(vHLISTING.listerName, "Tightsplease");
209 
210         assertContains(vHLISTING.listerOrg, "Socks Fox");
211         assertContains(vHLISTING.listerOrg, "My Tights");
212         assertContains(vHLISTING.listerName, "Tightsplease");
213 
214         assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-6957423-socks-fox.html"));
215         assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-3590723-my-tights.html"));
216         assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://bob.example.com/m-2977501-tightsplease.html"));
217 
218         assertContains(vHLISTING.price, "\u00A380");
219         assertContains(vHLISTING.price, "\u00A347.95");
220         assertContains(vHLISTING.price, "\u00A354.99");
221     }
222 
223     @Test
224     public void testListerURL() throws Exception {
225         assertExtract("/microformats/hlisting/actions-lister-url.html");
226         assertModelNotEmpty();
227         assertContains(vHLISTING.action, vHLISTING.offer);
228         assertContains(vHLISTING.listerName, "John Broker");
229         assertContains(RDF.TYPE, vHLISTING.Lister);
230         assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://homepage.com"));
231     }
232 
233     @Test
234     public void testListerEmail() throws Exception {
235         assertExtract("/microformats/hlisting/actions-lister-email.html");
236         assertModelNotEmpty();
237         assertContains(vHLISTING.action, vHLISTING.offer);
238         assertContains(vHLISTING.listerName, "John Broker");
239         assertContains(RDF.TYPE, vHLISTING.Lister);
240         assertContains(vFOAF.mbox, RDFUtils.iri("mailto:info@commerce.net"));
241     }
242 
243     @Test
244     public void testListerEmailHref() throws Exception {
245         assertExtract("/microformats/hlisting/actions-lister-email-href.html");
246         assertModelNotEmpty();
247         assertContains(vHLISTING.action, vHLISTING.offer);
248         assertContains(RDF.TYPE, vHLISTING.Lister);
249         assertContains(vHLISTING.listerName, "John Broker");
250         assertContains(vFOAF.mbox, RDFUtils.iri("mailto:info@commerce.net"));
251     }
252 
253     @Test
254     public void testDtListed() throws Exception {
255         assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
256         assertModelNotEmpty();
257         assertNotContains(vHLISTING.action, vHLISTING.offer);
258         assertContains(vHLISTING.dtlisted, "2006-02-02");
259     }
260 
261     @Test
262     public void testDtExpired() throws Exception {
263         assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
264         assertModelNotEmpty();
265         assertNotContains(vHLISTING.action, vHLISTING.offer);
266         assertContains(vHLISTING.dtexpired, "2006-04-01");
267     }
268 
269     @Test
270     public void testSummary() throws Exception {
271         assertExtract("/microformats/hlisting/summary.html");
272         assertModelNotEmpty();
273         assertContains(vHLISTING.summary, "summary stuff");
274     }
275 
276     @Test
277     public void testDtListedAndExpired() throws Exception {
278         assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
279         assertModelNotEmpty();
280         assertNotContains(vHLISTING.action, vHLISTING.offer);
281         assertContains(vHLISTING.dtlisted, "2006-02-02");
282         assertContains(vHLISTING.dtexpired, "2006-04-01");
283     }
284 
285     @Test
286     public void testPrice() throws Exception {
287         assertExtract("/microformats/hlisting/price.html");
288         assertModelNotEmpty();
289         assertContains(vHLISTING.price, "$215/qtr");
290     }
291 
292     @Test
293     public void testPriceAndDt() throws Exception {
294         assertExtract("/microformats/hlisting/dtlisted-dtexpired.html");
295         assertModelNotEmpty();
296         assertContains(vHLISTING.price, "$215/qtr");
297         assertContains(vHLISTING.dtlisted, "2006-02-02");
298         assertContains(vHLISTING.dtexpired, "2006-04-01");
299     }
300 
301     @Test
302     public void testPermalink() throws Exception {
303         assertExtract("/microformats/hlisting/summary-bookmark.html");
304         assertModelNotEmpty();
305         assertContains(vHLISTING.permalink, "http://livre.com/book");
306         assertContains(vHLISTING.listerUrl, RDFUtils.iri("http://livre.com/author"));
307     }
308 
309     @Test
310     public void testComplexDescription() throws Exception {
311         assertExtract("/microformats/hlisting/description-complex.html");
312         assertModelNotEmpty();
313         assertContains(vHLISTING.description,
314                 "BenQ today introduced two new additions to its renowned bus... + Show details");
315     }
316 
317     @Test
318     public void testDescription() throws Exception {
319         assertExtract("/microformats/hlisting/description.html");
320         assertModelNotEmpty();
321         assertContains(vHLISTING.description, "bla bla bla");
322     }
323 
324 }