View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.microdata;
19  
20  import java.net.MalformedURLException;
21  import java.net.URL;
22  import java.text.ParseException;
23  import java.text.SimpleDateFormat;
24  import java.util.Date;
25  import java.util.Locale;
26  import java.util.Objects;
27  
28  import org.apache.any23.util.StringUtils;
29  import org.eclipse.rdf4j.model.Literal;
30  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
31  
32  /**
33   * Describes a possible value for a <b>Microdata item property</b>.
34   *
35   * @author Michele Mostarda (mostarda@fbk.eu)
36   */
37  public class ItemPropValue {
38  
39      /**
40       * Internal content value.
41       */
42      private final Object content;
43  
44      /**
45       * Content type.
46       */
47      private final Type type;
48  
49      private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<>();
50  
51      /**
52       * Supported types.
53       */
54      public enum Type {
55          Plain(String.class), Link(String.class), Date(Date.class), Nested(ItemScope.class);
56  
57          Type(Class<?> contentClass) {
58              this.contentClass = contentClass;
59          }
60  
61          private final Class<?> contentClass;
62  
63          private Object checkClass(Object content) {
64              Objects.requireNonNull(content, "content cannot be null");
65              if (!contentClass.isInstance(content)) {
66                  throw new IllegalArgumentException(
67                          "content must be a " + contentClass.getName() + " when type is " + this);
68              }
69              return content;
70          }
71      }
72  
73      public static Date parseDateTime(String dateStr) throws ParseException {
74          return getSdf().parse(dateStr);
75      }
76  
77      public static String formatDateTime(Date in) {
78          return getSdf().format(in);
79      }
80  
81      private static SimpleDateFormat getSdf() {
82          SimpleDateFormat simpleDateFormat = sdf.get();
83          if (simpleDateFormat == null) {
84              simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
85              sdf.set(simpleDateFormat);
86          }
87          return simpleDateFormat;
88      }
89  
90      /**
91       * Constructor.
92       *
93       * @param content
94       *            content object.
95       * @param type
96       *            content type.
97       */
98      public ItemPropValue(Object content, Type type) {
99          this.type = Objects.requireNonNull(type, "type cannot be null");
100         this.content = type.checkClass(content);
101         this.literal = null;
102     }
103 
104     ItemPropValue(Literal literal) {
105         this.literal = literal;
106 
107         Type type;
108         Object content;
109 
110         // for backwards compatibility:
111         if (XMLSchema.DATE.equals(literal.getDatatype()) || XMLSchema.DATETIME.equals(literal.getDatatype())) {
112             try {
113                 content = parseDateTime(literal.getLabel());
114                 type = Type.Date;
115             } catch (Exception e) {
116                 content = literal.getLabel();
117                 type = Type.Plain;
118             }
119         } else {
120             content = literal.getLabel();
121             type = Type.Plain;
122         }
123         this.type = type;
124         this.content = content;
125     }
126 
127     final Literal literal;
128 
129     /**
130      * @return the content object.
131      */
132     public Object getContent() {
133         return content;
134     }
135 
136     /**
137      * @return the content type.
138      */
139     public Type getType() {
140         return type;
141     }
142 
143     /**
144      * @return <code>true</code> if type is plain text.
145      */
146     public boolean isPlain() {
147         return type == Type.Plain;
148     }
149 
150     /**
151      * @return <code>true</code> if type is a link.
152      */
153     public boolean isLink() {
154         return type == Type.Link;
155     }
156 
157     /**
158      * @return <code>true</code> if type is a date.
159      */
160     public boolean isDate() {
161         return type == Type.Date;
162     }
163 
164     /**
165      * @return <code>true</code> if type is a nested {@link ItemScope}.
166      */
167     public boolean isNested() {
168         return type == Type.Nested;
169     }
170 
171     /**
172      * @return <code>true</code> if type is an integer.
173      */
174     public boolean isInteger() {
175         if (type != Type.Plain)
176             return false;
177         try {
178             Integer.parseInt((String) content);
179             return true;
180         } catch (Exception e) {
181             return false;
182         }
183     }
184 
185     /**
186      * @return <code>true</code> if type is a float.
187      */
188     public boolean isFloat() {
189         if (type != Type.Plain)
190             return false;
191         try {
192             Float.parseFloat((String) content);
193             return true;
194         } catch (Exception e) {
195             return false;
196         }
197     }
198 
199     /**
200      * @return <code>true</code> if type is a number.
201      */
202     public boolean isNumber() {
203         return isInteger() || isFloat();
204     }
205 
206     /**
207      * @return the content value as integer, or raises an exception.
208      * 
209      * @throws NumberFormatException
210      *             if the content is not an integer.
211      * @throws ClassCastException
212      *             if content is not plain.
213      */
214     public int getAsInteger() {
215         return Integer.parseInt((String) content);
216     }
217 
218     /**
219      * @return the content value as float, or raises an exception.
220      * 
221      * @throws NumberFormatException
222      *             if the content is not an float.
223      * @throws ClassCastException
224      *             if content is not plain.
225      */
226     public float getAsFloat() {
227         return Float.parseFloat((String) content);
228     }
229 
230     /**
231      * @return the content as {@link Date} if <code>type == Type.DateTime</code>,
232      * 
233      * @throws ClassCastException
234      *             if content is not a valid date.
235      */
236     public Date getAsDate() {
237         return (Date) content;
238     }
239 
240     /**
241      * @return the content value as URL, or raises an exception.
242      */
243     public URL getAsLink() {
244         try {
245             return new URL((String) content);
246         } catch (MalformedURLException murle) {
247             throw new IllegalStateException("Error while parsing IRI.", murle);
248         }
249     }
250 
251     /**
252      * @return the content value as {@link ItemScope}.
253      */
254     public ItemScope getAsNested() {
255         return (ItemScope) content;
256     }
257 
258     public String toJSON() {
259         String contentStr;
260         if (content instanceof String) {
261             contentStr = "\"" + StringUtils.escapeAsJSONString((String) content) + "\"";
262         } else if (content instanceof Date) {
263             contentStr = "\"" + getSdf().format((Date) content) + "\"";
264         } else {
265             contentStr = content.toString();
266         }
267 
268         return String.format(Locale.ROOT, "{ \"content\" : %s, \"type\" : \"%s\" }", contentStr, type);
269     }
270 
271     @Override
272     public String toString() {
273         return toJSON();
274     }
275 
276     @Override
277     public int hashCode() {
278         return content.hashCode() * type.hashCode() * 2;
279     }
280 
281     @Override
282     public boolean equals(Object obj) {
283         if (obj == null) {
284             return false;
285         }
286         if (obj == this) {
287             return true;
288         }
289         if (obj instanceof ItemPropValue) {
290             final ItemPropValue../../../org/apache/any23/extractor/microdata/ItemPropValue.html#ItemPropValue">ItemPropValue other = (ItemPropValue) obj;
291             return content.equals(other.content) && type.equals(other.type);
292         }
293         return false;
294     }
295 }