View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.microdata;
19  
20  import java.net.MalformedURLException;
21  import java.net.URL;
22  import java.text.ParseException;
23  import java.text.SimpleDateFormat;
24  import java.util.Date;
25  import java.util.Objects;
26  
27  import org.apache.any23.util.StringUtils;
28  import org.eclipse.rdf4j.model.Literal;
29  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
30  
31  /**
32   * Describes a possible value for a <b>Microdata item property</b>.
33   *
34   * @author Michele Mostarda (mostarda@fbk.eu)
35   */
36  public class ItemPropValue {
37  
38      /**
39       * Internal content value.
40       */
41      private final Object content;
42  
43      /**
44       * Content type.
45       */
46      private final Type type;
47  
48      private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<>();
49  
50      /**
51       * Supported types.
52       */
53      public enum Type {
54          Plain(String.class),
55          Link(String.class),
56          Date(Date.class),
57          Nested(ItemScope.class);
58  
59          Type(Class<?> contentClass) {
60              this.contentClass = contentClass;
61          }
62  
63          private final Class<?> contentClass;
64  
65          private Object checkClass(Object content) {
66              Objects.requireNonNull(content, "content cannot be null");
67              if (!contentClass.isInstance(content)) {
68                  throw new IllegalArgumentException("content must be a " + contentClass.getName() + " when type is " + this);
69              }
70              return content;
71          }
72      }
73  
74      public static Date parseDateTime(String dateStr) throws ParseException {
75          return getSdf().parse(dateStr);
76      }
77  
78      public static String formatDateTime(Date in) {
79          return getSdf().format(in);
80      }
81      
82      private static SimpleDateFormat getSdf() {
83          SimpleDateFormat simpleDateFormat = sdf.get();
84          if (simpleDateFormat == null) {
85              simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
86              sdf.set(simpleDateFormat);
87          }
88          return simpleDateFormat;
89      }
90  
91      /**
92       * Constructor.
93       *
94       * @param content content object.
95       * @param type content type.
96       */
97      public ItemPropValue(Object content, Type type) {
98          this.type = Objects.requireNonNull(type, "type cannot be null");
99          this.content = type.checkClass(content);
100         this.literal = null;
101     }
102 
103     ItemPropValue(Literal literal) {
104         this.literal = literal;
105 
106         Type type;
107         Object content;
108 
109         //for backwards compatibility:
110         if (XMLSchema.DATE.equals(literal.getDatatype()) || XMLSchema.DATETIME.equals(literal.getDatatype())) {
111             try {
112                 content = parseDateTime(literal.getLabel());
113                 type = Type.Date;
114             } catch (Exception e) {
115                 content = literal.getLabel();
116                 type = Type.Plain;
117             }
118         } else {
119             content = literal.getLabel();
120             type = Type.Plain;
121         }
122         this.type = type;
123         this.content = content;
124     }
125 
126     final Literal literal;
127 
128     /**
129      * @return the content object.
130      */
131     public Object getContent() {
132         return content;
133     }
134 
135     /**
136      * @return the content type.
137      */
138     public Type getType() {
139         return type;
140     }
141 
142    /**
143      * @return <code>true</code> if type is plain text.
144      */
145     public boolean isPlain() {
146         return type == Type.Plain;
147     }
148 
149     /**
150      * @return <code>true</code> if type is a link.
151      */
152     public boolean isLink() {
153         return type == Type.Link;
154     }
155 
156     /**
157      * @return <code>true</code> if type is a date.
158      */
159     public boolean isDate() {
160         return type == Type.Date;
161     }
162 
163     /**
164      * @return <code>true</code> if type is a nested {@link ItemScope}.
165      */
166     public boolean isNested() {
167         return type == Type.Nested;
168     }
169 
170     /**
171      * @return <code>true</code> if type is an integer.
172      */
173     public boolean isInteger() {
174         if(type != Type.Plain)
175             return false;
176          try {
177              Integer.parseInt((String) content);
178              return true;
179          } catch (Exception e) {
180              return false;
181          }
182      }
183 
184     /**
185      * @return <code>true</code> if type is a float.
186      */
187      public boolean isFloat() {
188          if(type != Type.Plain)
189              return false;
190          try {
191              Float.parseFloat((String) content);
192              return true;
193          } catch (Exception e) {
194              return false;
195          }
196      }
197 
198     /**
199      * @return <code>true</code> if type is a number.
200      */
201      public boolean isNumber() {
202          return isInteger() || isFloat();
203      }
204 
205     /**
206      * @return the content value as integer, or raises an exception.
207      * @throws NumberFormatException if the content is not an integer.
208      * @throws ClassCastException if content is not plain.
209      */
210      public int getAsInteger() {
211          return Integer.parseInt((String) content);
212      }
213 
214     /**
215      * @return the content value as float, or raises an exception.
216      * @throws NumberFormatException if the content is not an float.
217      * @throws ClassCastException if content is not plain.
218      */
219      public float getAsFloat() {
220          return Float.parseFloat((String) content);
221      }
222 
223 
224     /**
225      * @return the content as {@link Date}
226      *         if <code>type == Type.DateTime</code>,
227      * @throws ClassCastException if content is not a valid date.
228      */
229     public Date getAsDate() {
230         return (Date) content;
231     }
232 
233     /**
234      * @return the content value as URL, or raises an exception.
235      */
236     public URL getAsLink() {
237         try {
238             return new URL((String) content);
239         } catch (MalformedURLException murle) {
240             throw new IllegalStateException("Error while parsing IRI.", murle);
241         }
242     }
243 
244     /**
245      * @return the content value as {@link ItemScope}.
246      */
247     public ItemScope getAsNested() {
248         return (ItemScope) content;
249     }
250 
251     public String toJSON() {
252         String contentStr;
253         if(content instanceof String) {
254             contentStr = "\"" + StringUtils.escapeAsJSONString((String) content) + "\"";
255         } else if(content instanceof Date) {
256             contentStr = "\"" + getSdf().format((Date) content) + "\"";
257         } else {
258             contentStr = content.toString();
259         }
260 
261         return String.format( "{ \"content\" : %s, \"type\" : \"%s\" }", contentStr, type );
262     }
263 
264     @Override
265     public String toString() {
266         return toJSON();
267     }
268 
269     @Override
270     public int hashCode() {
271         return content.hashCode() * type.hashCode() * 2;
272     }
273 
274     @Override
275     public boolean equals(Object obj) {
276         if(obj == null) {
277             return false;
278         }
279         if(obj == this) {
280             return true;
281         }
282         if(obj instanceof ItemPropValue) {
283             final ItemPropValue../../../org/apache/any23/extractor/microdata/ItemPropValue.html#ItemPropValue">ItemPropValue other = (ItemPropValue) obj;
284             return content.equals(other.content) && type.equals(other.type);
285         }
286         return false;
287     }
288 }