View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.microdata;
19  
20  import org.apache.any23.rdf.RDFUtils;
21  import org.apache.commons.lang.StringUtils;
22  import org.eclipse.rdf4j.common.net.ParsedIRI;
23  import org.eclipse.rdf4j.model.IRI;
24  
25  import java.net.MalformedURLException;
26  import java.net.URL;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.Collection;
30  import java.util.Collections;
31  import java.util.HashMap;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.regex.Pattern;
35  
36  /**
37   * This class describes a <b>Microdata <i>itemscope</i></b>.
38   *
39   * @author Michele Mostarda (mostarda@fbk.eu)
40   * @author Hans Brende (hansbrende@apache.org)
41   */
42  public class ItemScope extends Item {
43  
44      /**
45       * Map of properties and multi values.
46       */
47      private final Map<String, List<ItemProp>> properties;
48  
49      /**
50       * <i>itemscope</i> DOM identifier in container document.
51       */
52      private final String id;
53  
54      /**
55       * <i>itemscope</i> references.
56       */
57      private final String[] refs;
58  
59      /**
60       * <i>itemscope</i> type.
61       */
62      private final List<IRI> type;
63  
64      /**
65       * <i>itemscope</i> external identifier.
66       */
67      private final String itemId;
68  
69      /**
70       * Constructor.
71       *
72       * @param xpath     location of this <i>itemscope</i> within the container document.
73       * @param itemProps list of properties bound to this <i>itemscope</i>.
74       * @param id        DOM identifier for this <i>itemscope</i>. Can be <code>null</code>.
75       * @param refs      list of item prop references connected to this <i>itemscope</i>. Can be <code>null</code>.
76       * @param type      <i>itemscope</i> type. Can be <code>null</code>.
77       * @param itemId    <i>itemscope</i> id. Can be <code>null</code>.
78       */
79      public ItemScope(String xpath, ItemProp[] itemProps, String id, String[] refs, String type, String itemId) {
80          this(xpath, itemProps, id, refs, stringToSingletonIRI(type), itemId);
81      }
82  
83      private static final Pattern looksLikeStartsWithHost = Pattern.compile("[^:/.]+(\\.[^:/.]+)+(:\\d+)?([/#?].*)?");
84  
85      static List<IRI> stringToSingletonIRI(String type) {
86          if (StringUtils.isNotBlank(type)) {
87              ParsedIRI iri = ParsedIRI.create(type.trim());
88              if (StringUtils.isBlank(iri.getScheme())) {
89                  String host = iri.getHost();
90                  if (StringUtils.isNotBlank(host)) {
91                      iri = new ParsedIRI("http", iri.getUserInfo(), host, iri.getPort(), iri.getPath(), iri.getQuery(), iri.getFragment());
92                  } else {
93                      String path = iri.getPath();
94                      if (path != null && looksLikeStartsWithHost.matcher(path).matches()) {
95                          iri = ParsedIRI.create("http://" + iri.toString());
96                      }
97                  }
98              }
99              return Collections.singletonList(RDFUtils.iri(iri.toString()));
100         } else {
101             return Collections.emptyList();
102         }
103     }
104 
105     ItemScope(String xpath, ItemProp[] itemProps, String id, String[] refs, List<IRI> types, String itemId) {
106         super(xpath);
107 
108         if (itemProps == null) {
109             throw new NullPointerException("itemProps list cannot be null.");
110         }
111 
112         this.type = types;
113         this.id = id;
114         this.refs = refs;
115         this.itemId = itemId;
116 
117         final Map<String, List<ItemProp>> tmpProperties = new HashMap<>();
118         for (ItemProp itemProp : itemProps) {
119             final String propName = itemProp.getName();
120             List<ItemProp> propList = tmpProperties.get(propName);
121             if (propList == null) {
122                 propList = new ArrayList<>();
123                 tmpProperties.put(propName, propList);
124             }
125             propList.add(itemProp);
126         }
127         final Map<String, List<ItemProp>> properties = new HashMap<>();
128         for (Map.Entry<String, List<ItemProp>> propertiesEntry : tmpProperties.entrySet()) {
129             properties.put(
130                     propertiesEntry.getKey(),
131                     //Collections.unmodifiableList( propertiesEntry.getValue() )
132                     propertiesEntry.getValue()
133             );
134         }
135         // this.properties = Collections.unmodifiableMap(properties);
136         this.properties = properties;
137     }
138 
139     /**
140      * @return map of declared properties, every property can have more than a value.
141      */
142     public Map<String, List<ItemProp>> getProperties() {
143         return properties;
144     }
145 
146     /**
147      * @return the <i>itemscope</i>
148      */
149     public String getId() {
150         return id;
151     }
152 
153     /**
154      * @return <i>itemscope</i> list of references to <i>itemprop</i>s.
155      */
156     public String[] getRefs() {
157         return refs;
158     }
159 
160     /**
161      * @return <i>itemscope</i> type.
162      */
163     public URL getType() {
164         //No longer using URL.
165         //But for backwards compatibility:
166         try {
167             return type.isEmpty() ? null : new URL(type.get(0).stringValue());
168         } catch (MalformedURLException e) {
169             try {
170                 return new URL(ParsedIRI.create(type.get(0).stringValue()).toASCIIString());
171             } catch (Exception e1) {
172                 return null;
173             }
174         }
175     }
176 
177     List<IRI> getTypes() {
178         return type;
179     }
180 
181     /**
182      * @return <i>itemscope</i> public identifier.
183      */
184     public String getItemId() {
185         return itemId;
186     }
187 
188     @Override
189     public String toJSON() {
190         StringBuilder sb = new StringBuilder();
191         int i;
192         int j;
193         final Collection<List<ItemProp>> itemPropsList = properties.values();
194         j = 0;
195         for (List<ItemProp> itemProps : itemPropsList) {
196             i = 0;
197             for (ItemProp itemProp : itemProps) {
198                 sb.append(itemProp);
199                 if (i < itemProps.size() - 1) {
200                     sb.append(", ");
201                 }
202                 i++;
203             }
204             if (j < itemPropsList.size() - 1) {
205                 sb.append(", ");
206             }
207             j++;
208         }
209         return String.format(
210                 "{ " +
211                         "\"xpath\" : \"%s\", \"id\" : %s, \"refs\" : %s, \"type\" : %s, \"itemid\" : %s, \"properties\" : [ %s ]" +
212                         " }",
213                 getXpath(),
214                 id == null ? null : "\"" + id + "\"",
215                 refs == null ? null : toJSON(refs),
216                 type.isEmpty() ? null : "\"" + type.get(0) + "\"",
217                 itemId == null ? null : "\"" + itemId + "\"",
218                 sb.toString()
219         );
220     }
221 
222     @Override
223     public String toString() {
224         return toJSON();
225     }
226 
227     @Override
228     public int hashCode() {
229         int i = properties == null ? 0 : properties.hashCode();
230         i += id == null         ? 0 : id.hashCode();
231         i += refs == null       ? 0 : Arrays.hashCode(refs);
232         i += type == null       ? 0 : type.hashCode();
233         i += itemId == null     ? 0 : itemId.hashCode();
234         return i;
235     }
236 
237     @Override
238     public boolean equals(Object obj) {
239         if (obj == null) {
240             return false;
241         }
242         if (obj == this) {
243             return true;
244         }
245         if (obj instanceof ItemScope) {
246             final ItemScope/../../../../org/apache/any23/extractor/microdata/ItemScope.html#ItemScope">ItemScope other = (ItemScope) obj;
247                 return
248                         super.getXpath().equals(other.getXpath())
249                             &&
250                         (properties == null ? other.properties == null : properties.equals(other.properties))
251                             &&
252                         (id == null ? other.id == null : id.equals(other.id))
253                             &&
254                         (refs == null ? other.refs == null : Arrays.equals(refs, other.refs))
255                             &&
256                         (type == null ? other.type == null : type.equals(other.type))
257                             &&
258                         (itemId == null ? other.itemId == null : itemId.equals(other.itemId));
259         }
260         return false;
261     }
262 
263     protected void acquireProperty(ItemProp itemProp) {
264         List<ItemProp> itemProps = properties.computeIfAbsent(itemProp.getName(), k -> new ArrayList<>());
265         if (!itemProps.contains(itemProp))
266             itemProps.add(itemProp);
267     }
268 
269     protected void disownProperty(ItemProp itemProp) {
270         List<ItemProp> propList = properties.get(itemProp.getName());
271         if (propList != null)
272             propList.remove(itemProp);
273     }
274 
275     private String toJSON(String[] in) {
276         StringBuilder sb = new StringBuilder();
277         sb.append('[');
278         for (int i = 0; i < in.length; i++) {
279             sb.append("\"");
280             sb.append(in[i]);
281             sb.append("\"");
282             if (i < in.length - 1) {
283                 sb.append(", ");
284             }
285         }
286         sb.append(']');
287         return sb.toString();
288     }
289 
290 }