1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.w3c.dom.Node;
21
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 import static org.apache.any23.extractor.html.HTMLDocument.TextField;
31
32
33
34
35
36
37
38 public class HCardName {
39
40 public static final String GIVEN_NAME = "given-name";
41 public static final String FAMILY_NAME = "family-name";
42 public static final String ADDITIONAL_NAME = "additional-name";
43 public static final String NICKNAME = "nickname";
44 public static final String HONORIFIC_PREFIX = "honorific-prefix";
45 public static final String HONORIFIC_SUFFIX = "honorific-suffix";
46
47 public static final String[] FIELDS = {
48 GIVEN_NAME,
49 FAMILY_NAME,
50 ADDITIONAL_NAME,
51 NICKNAME,
52 HONORIFIC_PREFIX,
53 HONORIFIC_SUFFIX
54 };
55
56 private static final String[] NAME_COMPONENTS = {
57 HONORIFIC_PREFIX,
58 GIVEN_NAME,
59 ADDITIONAL_NAME,
60 FAMILY_NAME,
61 HONORIFIC_SUFFIX
62 };
63
64 private Map<String, FieldValue> fields = new HashMap<String, FieldValue>();
65 private TextField[] fullName = null;
66 private TextField organization = null;
67 private TextField unit = null;
68
69 private static TextField join(TextField[] sarray, String delimiter) {
70 StringBuilder builder = new StringBuilder();
71 final int sarrayLengthMin2 = sarray.length - 1;
72 for(int i = 0; i < sarray.length; i++) {
73 builder.append(sarray[i].value());
74 if( i < sarrayLengthMin2) {
75 builder.append(delimiter);
76 }
77 }
78 return new TextField( builder.toString(), sarray[0].source() ) ;
79 }
80
81
82
83
84 public void reset() {
85 fields.clear();
86 fullName = null;
87 organization = null;
88 unit = null;
89 }
90
91 public void setField(String fieldName, TextField nd) {
92 final String value = fixWhiteSpace( nd.value() );
93 if (value == null) return;
94 FieldValue fieldValue = fields.get(fieldName);
95 if(fieldValue == null) {
96 fieldValue = new FieldValue();
97 fields.put(fieldName, fieldValue);
98 }
99 fieldValue.addValue( new TextField(value, nd.source()) );
100 }
101
102 public void setFullName(TextField nd) {
103 final String value = fixWhiteSpace( nd.value() );
104 if (value == null) return;
105 String[] split = value.split("\\s+");
106
107 final String split0 = split[0];
108 final int split0Length = split0.length();
109 if(split.length > 1 && split0.charAt(split0Length -1) == ',') {
110 String swap = split[1];
111 split[1] = split0.substring(0, split0Length -1);
112 split[0] = swap;
113 }
114 TextField[] splitFields = new TextField[split.length];
115 for(int i = 0; i < split.length; i++) {
116 splitFields[i] = new TextField(split[i], nd.source());
117 }
118 this.fullName = splitFields;
119 }
120
121 public void setOrganization(TextField nd) {
122 final String value = fixWhiteSpace( nd.value() );
123 if (value == null) return;
124 this.organization = new TextField(value, nd.source());
125 }
126
127 public boolean isMultiField(String fieldName) {
128 FieldValue fieldValue = fields.get(fieldName);
129 return fieldValue != null && fieldValue.isMultiField();
130 }
131
132 public boolean containsField(String fieldName) {
133 return GIVEN_NAME.equals(fieldName) || FAMILY_NAME.equals(fieldName) || fields.containsKey(fieldName);
134 }
135
136 public TextField getField(String fieldName) {
137 if (GIVEN_NAME.equals(fieldName)) {
138 return getFullNamePart(GIVEN_NAME, 0);
139 }
140 if (FAMILY_NAME.equals(fieldName)) {
141 return getFullNamePart(FAMILY_NAME, Integer.MAX_VALUE);
142 }
143 FieldValue v = fields.get(fieldName);
144 return v == null ? null : v.getValue();
145 }
146
147 public Collection<TextField> getFields(String fieldName) {
148 FieldValue v = fields.get(fieldName);
149 return v == null ? Collections.<TextField>emptyList() : v.getValues();
150 }
151
152 private TextField getFullNamePart(String fieldName, int index) {
153 if (fields.containsKey(fieldName)) {
154 return fields.get(fieldName).getValue();
155 }
156 if (fullName == null) return null;
157
158 if (organization != null && fullName[0].value().equals(organization.value())) {
159 return null;
160 }
161 if (index != Integer.MAX_VALUE && fullName.length <= index) return null;
162 return fullName[ index == Integer.MAX_VALUE ? fullName.length - 1 : index];
163 }
164
165 public boolean hasField(String fieldName) {
166 return getField(fieldName) != null;
167 }
168
169 public boolean hasAnyField() {
170 for (String fieldName : FIELDS) {
171 if (hasField(fieldName)) return true;
172 }
173 return false;
174 }
175
176 public TextField getFullName() {
177 if (fullName != null) return join(fullName, " ");
178 StringBuffer s = new StringBuffer();
179 boolean empty = true;
180 Node first = null;
181 TextField current;
182 for (String fieldName : NAME_COMPONENTS) {
183 if (!hasField(fieldName)) continue;
184 if (!empty) {
185 s.append(' ');
186 }
187 current = getField(fieldName);
188 if(first == null) { first = current.source(); }
189 s.append( current.value() );
190 empty = false;
191 }
192 if (empty) return null;
193 return new TextField( s.toString(), first);
194 }
195
196 public TextField getOrganization() {
197 return organization;
198 }
199
200 public void setOrganizationUnit(TextField nd) {
201 final String value = fixWhiteSpace( nd.value() );
202 if (value == null) return;
203 this.unit = new TextField(value, nd.source() );
204 }
205
206 public TextField getOrganizationUnit() {
207 return unit;
208 }
209
210 private String fixWhiteSpace(String s) {
211 if (s == null) return null;
212 s = s.trim().replaceAll("\\s+", " ");
213 if ("".equals(s)) return null;
214 return s;
215 }
216
217
218
219
220 private class FieldValue {
221
222 private TextField value;
223 private List<TextField> multiValue = new ArrayList<TextField>();
224
225 FieldValue() {}
226
227 void addValue(TextField v) {
228 if(value == null && multiValue == null) {
229 value = v;
230 } else if(multiValue == null) {
231 multiValue = new ArrayList<TextField>();
232 multiValue.add(value);
233 value = null;
234 multiValue.add(v);
235 } else {
236 multiValue.add(v);
237 }
238 }
239
240 boolean isMultiField() {
241 return value == null;
242 }
243
244 TextField getValue() {
245 return value != null ? value : multiValue.get(0);
246 }
247
248 Collection<TextField> getValues() {
249 return value != null ? Arrays.asList(value) : multiValue;
250 }
251 }
252
253 }