This project has retired. For details please refer to its Attic page.
HResumeExtractorTest xref
View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.vocab.DOAC;
22  import org.apache.any23.vocab.FOAF;
23  import org.apache.any23.vocab.SINDICE;
24  import org.apache.any23.vocab.VCard;
25  import org.junit.Assert;
26  import org.junit.Test;
27  import org.eclipse.rdf4j.model.Resource;
28  import org.eclipse.rdf4j.model.Statement;
29  import org.eclipse.rdf4j.model.Value;
30  import org.eclipse.rdf4j.model.vocabulary.RDF;
31  import org.eclipse.rdf4j.repository.RepositoryResult;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  
35  import java.util.HashSet;
36  import java.util.Set;
37  
38  /**
39   * Reference Test class for the {@link HResumeExtractor} extractor.
40   *
41   * @author Davide Palmisano (dpalmisano@gmail.com)
42   */
43  public class HResumeExtractorTest extends AbstractExtractorTestCase {
44  
45      private static final SINDICE vSINDICE = SINDICE.getInstance();
46      private static final FOAF vFOAF = FOAF.getInstance();
47      private static final DOAC vDOAC = DOAC.getInstance();
48      private static final VCard vVCARD = VCard.getInstance();
49  
50      private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
51  
52      protected ExtractorFactory<?> getExtractorFactory() {
53          return new HResumeExtractorFactory();
54      }
55  
56      @Test
57      public void testNoMicroformats() throws Exception {
58          assertExtract("/html/html-without-uf.html");
59          assertModelEmpty();
60      }
61  
62      @Test
63      public void testLinkedIn() throws Exception {
64          assertExtract("/microformats/hresume/steveganz.html");
65          assertModelNotEmpty();
66          assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
67  
68          Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
69  
70          assertContains(person, vDOAC.summary, (Resource) null);
71  
72          assertContains(person, vDOAC.summary,
73                  "Steve Ganz is passionate about connecting people,\n"
74                          + "semantic markup, sushi, and disc golf - not necessarily in that order.\n"
75                          + "Currently obsessed with developing the user experience at LinkedIn,\n"
76                          + "Steve is a second generation Silicon Valley geek and a veteran web\n"
77                          + "professional who has been building human-computer interfaces since 1994.");
78  
79          assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null);
80  
81          assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
82  
83          assertStatementsSize(vDOAC.experience, (Value) null, 7);
84          assertStatementsSize(vDOAC.education, (Value) null, 2);
85          assertStatementsSize(vDOAC.affiliation, (Value) null, 8);
86      }
87  
88      @Test
89      public void testLinkedInComplete() throws Exception {
90  
91          assertExtract("/microformats/hresume/steveganz.html");
92          assertModelNotEmpty();
93  
94          assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
95  
96          assertStatementsSize(vDOAC.experience, (Value) null, 7);
97          assertStatementsSize(vDOAC.education, (Value) null, 2);
98          assertStatementsSize(vDOAC.affiliation, (Value) null, 8);
99          assertStatementsSize(vDOAC.skill, (Value) null, 17);
100 
101         RepositoryResult<Statement> statements = getStatements(null, vDOAC.organization, null);
102 
103         Set<String> checkSet = new HashSet<String>();
104 
105         try {
106             while (statements.hasNext()) {
107                 Statement statement = statements.next();
108                 checkSet.add(statement.getObject().stringValue());
109                 logger.debug(statement.getObject().stringValue());
110             }
111 
112         } finally {
113             statements.close();
114         }
115 
116         String[] names = new String[] { "LinkedIn Corporation", "PayPal, an eBay Company", "McAfee, Inc.",
117                 "Printable Technologies", "Collabria, Inc.", "Self-employed", "3G Productions",
118                 "Lee Strasberg Theatre and Film\n" + "\tInstitute", "Leland High School" };
119 
120         for (String name : names)
121             Assert.assertTrue(checkSet.contains(name));
122 
123         Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
124         assertContains(person, vFOAF.isPrimaryTopicOf, (Value) null);
125         findExactlyOneObject(person, vFOAF.isPrimaryTopicOf);
126     }
127 
128     @Test
129     public void testAnt() throws Exception {
130         assertExtract("/microformats/hresume/ant.html");
131         assertModelNotEmpty();
132 
133         assertStatementsSize(RDF.TYPE, vFOAF.Person, 1);
134 
135         Resource person = findExactlyOneBlankSubject(RDF.TYPE, vFOAF.Person);
136         assertContains(person, vDOAC.summary, (Resource) null);
137 
138         assertContains(person, vDOAC.summary,
139                 "Senior Systems\n              Analyst/Developer.\n              "
140                         + "Experienced in the analysis, design and\n              "
141                         + "implementation of distributed, multi-tier\n              "
142                         + "applications using Microsoft\n              technologies.\n"
143                         + "              Specialising in data capture applications on the\n" + "              Web.");
144 
145         assertContains(person, vFOAF.isPrimaryTopicOf, (Resource) null);
146 
147         assertStatementsSize(RDF.TYPE, vVCARD.VCard, 0);
148 
149         assertStatementsSize(vDOAC.experience, (Value) null, 16);
150         assertStatementsSize(vDOAC.education, (Value) null, 2);
151         assertStatementsSize(vDOAC.affiliation, (Value) null, 0);
152         assertStatementsSize(vDOAC.skill, (Value) null, 4);
153     }
154 
155 }