View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.w3c.dom.Node;
21  
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collection;
25  import java.util.Collections;
26  import java.util.HashMap;
27  import java.util.List;
28  import java.util.Map;
29  
30  import static org.apache.any23.extractor.html.HTMLDocument.TextField;
31  
32  /**
33   * An HCard name, consisting of various parts. Handles computation of full names from first and last names, and similar
34   * computations.
35   *
36   * @author Richard Cyganiak (richard@cyganiak.de)
37   */
38  public class HCardName {
39  
40      public static final String GIVEN_NAME = "given-name";
41      public static final String FAMILY_NAME = "family-name";
42      public static final String ADDITIONAL_NAME = "additional-name";
43      public static final String NICKNAME = "nickname";
44      public static final String HONORIFIC_PREFIX = "honorific-prefix";
45      public static final String HONORIFIC_SUFFIX = "honorific-suffix";
46  
47      public static final String[] FIELDS = { GIVEN_NAME, FAMILY_NAME, ADDITIONAL_NAME, NICKNAME, HONORIFIC_PREFIX,
48              HONORIFIC_SUFFIX };
49  
50      private static final String[] NAME_COMPONENTS = { HONORIFIC_PREFIX, GIVEN_NAME, ADDITIONAL_NAME, FAMILY_NAME,
51              HONORIFIC_SUFFIX };
52  
53      private Map<String, FieldValue> fields = new HashMap<String, FieldValue>();
54      private TextField[] fullName = null;
55      private TextField organization = null;
56      private TextField unit = null;
57  
58      private static TextField join(TextField[] sarray, String delimiter) {
59          StringBuilder builder = new StringBuilder();
60          final int sarrayLengthMin2 = sarray.length - 1;
61          for (int i = 0; i < sarray.length; i++) {
62              builder.append(sarray[i].value());
63              if (i < sarrayLengthMin2) {
64                  builder.append(delimiter);
65              }
66          }
67          return new TextField(builder.toString(), sarray[0].source());
68      }
69  
70      /**
71       * Resets the content of the HName fields.
72       */
73      public void reset() {
74          fields.clear();
75          fullName = null;
76          organization = null;
77          unit = null;
78      }
79  
80      public void setField(String fieldName, TextField nd) {
81          final String value = fixWhiteSpace(nd.value());
82          if (value == null)
83              return;
84          FieldValue fieldValue = fields.get(fieldName);
85          if (fieldValue == null) {
86              fieldValue = new FieldValue();
87              fields.put(fieldName, fieldValue);
88          }
89          fieldValue.addValue(new TextField(value, nd.source()));
90      }
91  
92      public void setFullName(TextField nd) {
93          final String value = fixWhiteSpace(nd.value());
94          if (value == null)
95              return;
96          String[] split = value.split("\\s+");
97          // Supporting case: ['King,', 'Ryan'] that is converted to ['Ryan', 'King'] .
98          final String split0 = split[0];
99          final int split0Length = split0.length();
100         if (split.length > 1 && split0.charAt(split0Length - 1) == ',') {
101             String swap = split[1];
102             split[1] = split0.substring(0, split0Length - 1);
103             split[0] = swap;
104         }
105         TextField[] splitFields = new TextField[split.length];
106         for (int i = 0; i < split.length; i++) {
107             splitFields[i] = new TextField(split[i], nd.source());
108         }
109         this.fullName = splitFields;
110     }
111 
112     public void setOrganization(TextField nd) {
113         final String value = fixWhiteSpace(nd.value());
114         if (value == null)
115             return;
116         this.organization = new TextField(value, nd.source());
117     }
118 
119     public boolean isMultiField(String fieldName) {
120         FieldValue fieldValue = fields.get(fieldName);
121         return fieldValue != null && fieldValue.isMultiField();
122     }
123 
124     public boolean containsField(String fieldName) {
125         return GIVEN_NAME.equals(fieldName) || FAMILY_NAME.equals(fieldName) || fields.containsKey(fieldName);
126     }
127 
128     public TextField getField(String fieldName) {
129         if (GIVEN_NAME.equals(fieldName)) {
130             return getFullNamePart(GIVEN_NAME, 0);
131         }
132         if (FAMILY_NAME.equals(fieldName)) {
133             return getFullNamePart(FAMILY_NAME, Integer.MAX_VALUE);
134         }
135         FieldValue v = fields.get(fieldName);
136         return v == null ? null : v.getValue();
137     }
138 
139     public Collection<TextField> getFields(String fieldName) {
140         FieldValue v = fields.get(fieldName);
141         return v == null ? Collections.<TextField> emptyList() : v.getValues();
142     }
143 
144     private TextField getFullNamePart(String fieldName, int index) {
145         if (fields.containsKey(fieldName)) {
146             return fields.get(fieldName).getValue();
147         }
148         if (fullName == null)
149             return null;
150         // If org and fn are the same, the hCard is for an organization, and we do not split the fn
151         if (organization != null && fullName[0].value().equals(organization.value())) {
152             return null;
153         }
154         if (index != Integer.MAX_VALUE && fullName.length <= index)
155             return null;
156         return fullName[index == Integer.MAX_VALUE ? fullName.length - 1 : index];
157     }
158 
159     public boolean hasField(String fieldName) {
160         return getField(fieldName) != null;
161     }
162 
163     public boolean hasAnyField() {
164         for (String fieldName : FIELDS) {
165             if (hasField(fieldName))
166                 return true;
167         }
168         return false;
169     }
170 
171     public TextField getFullName() {
172         if (fullName != null)
173             return join(fullName, " ");
174         StringBuffer s = new StringBuffer();
175         boolean empty = true;
176         Node first = null;
177         TextField current;
178         for (String fieldName : NAME_COMPONENTS) {
179             if (!hasField(fieldName))
180                 continue;
181             if (!empty) {
182                 s.append(' ');
183             }
184             current = getField(fieldName);
185             if (first == null) {
186                 first = current.source();
187             }
188             s.append(current.value());
189             empty = false;
190         }
191         if (empty)
192             return null;
193         return new TextField(s.toString(), first);
194     }
195 
196     public TextField getOrganization() {
197         return organization;
198     }
199 
200     public void setOrganizationUnit(TextField nd) {
201         final String value = fixWhiteSpace(nd.value());
202         if (value == null)
203             return;
204         this.unit = new TextField(value, nd.source());
205     }
206 
207     public TextField getOrganizationUnit() {
208         return unit;
209     }
210 
211     private String fixWhiteSpace(String s) {
212         if (s == null)
213             return null;
214         s = s.trim().replaceAll("\\s+", " ");
215         if ("".equals(s))
216             return null;
217         return s;
218     }
219 
220     /**
221      * Represents a possible field value.
222      */
223     private static class FieldValue {
224 
225         private TextField value;
226         private List<TextField> multiValue = new ArrayList<TextField>();
227 
228         FieldValue() {
229         }
230 
231         void addValue(TextField v) {
232             if (value == null && multiValue == null) {
233                 value = v;
234             } else if (multiValue == null) {
235                 multiValue = new ArrayList<TextField>();
236                 multiValue.add(value);
237                 value = null;
238                 multiValue.add(v);
239             } else {
240                 multiValue.add(v);
241             }
242         }
243 
244         boolean isMultiField() {
245             return value == null;
246         }
247 
248         TextField getValue() {
249             return value != null ? value : multiValue.get(0);
250         }
251 
252         Collection<TextField> getValues() {
253             return value != null ? Arrays.asList(value) : multiValue;
254         }
255     }
256 
257 }