View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.any23.extractor.microdata;
18  
19  import org.apache.any23.extractor.html.DomUtils;
20  import org.apache.any23.rdf.RDFUtils;
21  import org.apache.commons.lang3.StringUtils;
22  import org.eclipse.rdf4j.model.IRI;
23  import org.eclipse.rdf4j.model.Literal;
24  import org.eclipse.rdf4j.model.datatypes.XMLDatatypeUtil;
25  import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
26  import org.jsoup.parser.Tag;
27  import org.w3c.dom.Document;
28  import org.w3c.dom.Element;
29  import org.w3c.dom.NamedNodeMap;
30  import org.w3c.dom.Node;
31  import org.w3c.dom.NodeList;
32  import org.w3c.dom.traversal.DocumentTraversal;
33  import org.w3c.dom.traversal.NodeFilter;
34  import org.w3c.dom.traversal.TreeWalker;
35  
36  import java.io.PrintStream;
37  import java.util.ArrayList;
38  import java.util.Arrays;
39  import java.util.Collections;
40  import java.util.HashMap;
41  import java.util.HashSet;
42  import java.util.LinkedHashSet;
43  import java.util.List;
44  import java.util.Locale;
45  import java.util.Map;
46  import java.util.Set;
47  import java.util.stream.Collectors;
48  
49  /**
50   * This class provides utility methods for handling <b>Microdata</b> nodes contained within a <i>DOM</i> document.
51   *
52   * @author Michele Mostarda (mostarda@fbk.eu)
53   * @author Hans Brende (hansbrende@apache.org)
54   */
55  public class MicrodataParser {
56  
57      enum ErrorMode {
58          /** This mode raises an exception at first encountered error. */
59          STOP_AT_FIRST_ERROR,
60          /** This mode produces a full error report. */
61          FULL_REPORT
62      }
63  
64      private final Document document;
65  
66      /**
67       * This set holds the name of properties being dereferenced. The {@link #deferProperties(String...)} checks first if
68       * the required dereference has been already asked, if so raises a loop detection error. This map works in
69       * coordination with {@link #dereferenceRecursionCounter}, so that at the end of {@link #deferProperties(String...)}
70       * call recursion the loopDetectorSet can be cleaned up.
71       */
72      private final Set<String> loopDetectorSet = new HashSet<>();
73  
74      /**
75       * {@link ItemScope} cache.
76       */
77      private final Map<Node, ItemScope> itemScopes = new HashMap<>();
78  
79      /**
80       * {@link ItemPropValue} cache.
81       */
82      private final Map<Node, ItemPropValue> itemPropValues = new HashMap<>();
83  
84      /**
85       * Counts the recursive call of {@link #deferProperties(String...)}. It helps to cleanup the
86       * {@link #loopDetectorSet} when recursion ends.
87       */
88      private int dereferenceRecursionCounter = 0;
89  
90      /**
91       * Current error mode.
92       */
93      private ErrorMode errorMode = ErrorMode.FULL_REPORT;
94  
95      /**
96       * List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FULL_REPORT}.
97       */
98      private final List<MicrodataParserException> errors = new ArrayList<>();
99  
100     public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
101     public static final String ITEMPROP_ATTRIBUTE = "itemprop";
102     private static final String REVERSE_ITEMPROP_ATTRIBUTE = "itemprop-reverse";
103 
104     /**
105      * List of tags providing the <code>src</code> property.
106      */
107     public static final Set<String> SRC_TAGS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList("audio",
108             "embed", "frame", "iframe", "img", "source", "track", "video", "input", "layer", "script", "textarea")));
109 
110     /**
111      * List of tags providing the <code>href</code> property.
112      */
113     public static final Set<String> HREF_TAGS = Collections
114             .unmodifiableSet(new HashSet<String>(Arrays.asList("a", "area", "link")));
115 
116     public MicrodataParser(Document document) {
117         if (document == null) {
118             throw new NullPointerException("Document cannot be null.");
119         }
120         this.document = document;
121     }
122 
123     /**
124      * Returns all the <i>itemScope</i>s detected within the given root node.
125      *
126      * @param node
127      *            root node to search in.
128      * 
129      * @return list of detected items.
130      */
131     public static List<Node> getItemScopeNodes(Node node) {
132         return DomUtils.findAllByAttributeName(node, ITEMSCOPE_ATTRIBUTE);
133     }
134 
135     /**
136      * Check whether a node is an <i>itemScope</i>.
137      *
138      * @param node
139      *            node to check.
140      * 
141      * @return <code>true</code> if the node is an <i>itemScope</i>., <code>false</code> otherwise.
142      */
143     public static boolean isItemScope(Node node) {
144         return DomUtils.readAttribute(node, ITEMSCOPE_ATTRIBUTE, null) != null;
145     }
146 
147     /**
148      * Returns all the <i>itemProp</i>s detected within the given root node.
149      *
150      * @param node
151      *            root node to search in.
152      * 
153      * @return list of detected items.
154      */
155     public static List<Node> getItemPropNodes(Node node) {
156         return DomUtils.findAllByAttributeName(node, ITEMPROP_ATTRIBUTE);
157     }
158 
159     /**
160      * Check whether a node is an <i>itemProp</i>.
161      *
162      * @param node
163      *            node to check.
164      * 
165      * @return <code>true</code> if the node is an <i>itemProp</i>., <code>false</code> otherwise.
166      */
167     public static boolean isItemProp(Node node) {
168         return DomUtils.readAttribute(node, ITEMPROP_ATTRIBUTE, null) != null;
169     }
170 
171     private static boolean isContainedInItemScope(Node node) {
172         for (Node p = node.getParentNode(); p != null; p = p.getParentNode()) {
173             NamedNodeMap attrs = p.getAttributes();
174             if (attrs != null && attrs.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
175                 return true;
176             }
177         }
178         return false;
179     }
180 
181     private static boolean isContainedInId(Node node, Set<String> ids) {
182         do {
183             String id = DomUtils.readAttribute(node, "id", null);
184             if (id != null && ids.contains(id)) {
185                 return true;
186             }
187             node = node.getParentNode();
188         } while (node != null);
189         return false;
190     }
191 
192     /**
193      * Returns only the <i>itemScope</i>s that are top level items.
194      *
195      * @param node
196      *            root node to search in.
197      * 
198      * @return list of detected top item scopes.
199      */
200     public static List<Node> getTopLevelItemScopeNodes(Node node) {
201         final List<Node> itemScopes = getItemScopeNodes(node);
202         final List<Node> topLevelItemScopes = new ArrayList<>();
203         final List<Node> possibles = new ArrayList<>();
204         for (Node itemScope : itemScopes) {
205             if (!isItemProp(itemScope) && DomUtils.readAttribute(itemScope, REVERSE_ITEMPROP_ATTRIBUTE, null) == null) {
206                 topLevelItemScopes.add(itemScope);
207             } else if (!isContainedInItemScope(itemScope)) {
208                 possibles.add(itemScope);
209             }
210         }
211 
212         if (!possibles.isEmpty()) {
213             Set<String> refIds = itemScopes.stream().flatMap(n -> Arrays.stream(itemrefIds(n)))
214                     .collect(Collectors.toSet());
215 
216             for (Node itemScope : possibles) {
217                 if (!isContainedInId(itemScope, refIds)) {
218                     topLevelItemScopes.add(itemScope);
219                 }
220             }
221         }
222 
223         return topLevelItemScopes;
224     }
225 
226     /**
227      * Returns all the <b>Microdata items</b> detected within the given <code>document</code>.
228      *
229      * @param document
230      *            document to be processed.
231      * @param errorMode
232      *            error management policy.
233      * 
234      * @return list of <b>itemscope</b> items.
235      * 
236      * @throws MicrodataParserException
237      *             if
238      *             <code>errorMode == {@link org.apache.any23.extractor.microdata.MicrodataParser.ErrorMode#STOP_AT_FIRST_ERROR}</code>
239      *             and an error occurs.
240      */
241     public static MicrodataParserReport getMicrodata(Document document, ErrorMode errorMode)
242             throws MicrodataParserException {
243         final List<Node> itemNodes = getTopLevelItemScopeNodes(document);
244         final List<ItemScope> items = new ArrayList<>();
245         final MicrodataParserataParser.html#MicrodataParser">MicrodataParser microdataParser = new MicrodataParser(document);
246         microdataParser.setErrorMode(errorMode);
247         for (Node itemNode : itemNodes) {
248             items.add(microdataParser.getItemScope(itemNode));
249         }
250         return new MicrodataParserReport(items.toArray(new ItemScope[items.size()]), microdataParser.getErrors());
251     }
252 
253     /**
254      * Returns all the <b>Microdata items</b> detected within the given <code>document</code>, works in full report
255      * mode.
256      *
257      * @param document
258      *            document to be processed.
259      * 
260      * @return list of <b>itemscope</b> items.
261      */
262     public static MicrodataParserReport getMicrodata(Document document) {
263         try {
264             return getMicrodata(document, ErrorMode.FULL_REPORT);
265         } catch (MicrodataParserException mpe) {
266             throw new IllegalStateException("Unexpected exception.", mpe);
267         }
268     }
269 
270     /**
271      * Returns a <i>JSON</i> containing the list of all extracted Microdata, as described at
272      * <a href="http://www.w3.org/TR/microdata/#json">Microdata JSON Specification</a>.
273      *
274      * @param document
275      *            document to be processed.
276      * @param ps
277      *            the {@link java.io.PrintStream} to write JSON to
278      */
279     public static void getMicrodataAsJSON(Document document, PrintStream ps) {
280         final MicrodataParserReport report = getMicrodata(document);
281         final ItemScope[] itemScopes = report.getDetectedItemScopes();
282         final MicrodataParserException[] errors = report.getErrors();
283 
284         ps.append("{ ");
285 
286         // Results.
287         ps.append("\"result\" : [");
288         for (int i = 0; i < itemScopes.length; i++) {
289             if (i > 0) {
290                 ps.print(", ");
291             }
292             ps.print(itemScopes[i].toJSON());
293         }
294         ps.append("] ");
295 
296         // Errors.
297         if (errors != null && errors.length > 0) {
298             ps.append(", ");
299             ps.append("\"errors\" : [");
300             for (int i = 0; i < errors.length; i++) {
301                 if (i > 0) {
302                     ps.print(", ");
303                 }
304                 ps.print(errors[i].toJSON());
305             }
306             ps.append("] ");
307         }
308 
309         ps.append("}");
310     }
311 
312     public void setErrorMode(ErrorMode errorMode) {
313         if (errorMode == null)
314             throw new IllegalArgumentException("errorMode must be not null.");
315         this.errorMode = errorMode;
316     }
317 
318     public ErrorMode getErrorMode() {
319         return this.errorMode;
320     }
321 
322     public MicrodataParserException[] getErrors() {
323         return errors == null ? new MicrodataParserException[0]
324                 : errors.toArray(new MicrodataParserException[errors.size()]);
325     }
326 
327     /**
328      * Reads the value of a <b>itemprop</b> node.
329      *
330      * @param node
331      *            itemprop node.
332      * 
333      * @return value detected within the given <code>node</code>.
334      * 
335      * @throws MicrodataParserException
336      *             if an error occurs while extracting a nested item scope.
337      */
338     public ItemPropValue getPropertyValue(Node node) throws MicrodataParserException {
339         final ItemPropValue itemPropValue = itemPropValues.get(node);
340         if (itemPropValue != null)
341             return itemPropValue;
342 
343         if (isItemScope(node)) {
344             return new ItemPropValue(getItemScope(node), ItemPropValue.Type.Nested);
345         }
346 
347         final String nodeName = node.getNodeName().toLowerCase(Locale.ROOT);
348 
349         // see http://w3c.github.io/microdata-rdf/#dfn-property-values
350         if ("data".equals(nodeName) || "meter".equals(nodeName)) {
351             String value = value(node, "value");
352             Literal l;
353             if (XMLDatatypeUtil.isValidInteger(value)) {
354                 l = RDFUtils.literal(value, XMLSchema.INTEGER);
355             } else if (XMLDatatypeUtil.isValidDouble(value)) {
356                 l = RDFUtils.literal(value, XMLSchema.DOUBLE);
357             } else {
358                 l = RDFUtils.literal(value);
359             }
360             return new ItemPropValue(l);
361         }
362         if ("time".equals(nodeName)) {
363             String dateTimeStr = value(node, "datetime");
364             Literal l;
365             if (XMLDatatypeUtil.isValidDate(dateTimeStr)) {
366                 l = RDFUtils.literal(dateTimeStr, XMLSchema.DATE);
367             } else if (XMLDatatypeUtil.isValidTime(dateTimeStr)) {
368                 l = RDFUtils.literal(dateTimeStr, XMLSchema.TIME);
369             } else if (XMLDatatypeUtil.isValidDateTime(dateTimeStr)) {
370                 l = RDFUtils.literal(dateTimeStr, XMLSchema.DATETIME);
371             } else if (XMLDatatypeUtil.isValidGYearMonth(dateTimeStr)) {
372                 l = RDFUtils.literal(dateTimeStr, XMLSchema.GYEARMONTH);
373             } else if (XMLDatatypeUtil.isValidGYear(dateTimeStr)) {
374                 l = RDFUtils.literal(dateTimeStr, XMLSchema.GYEAR);
375             } else if (XMLDatatypeUtil.isValidDuration(dateTimeStr)) {
376                 l = RDFUtils.literal(dateTimeStr, XMLSchema.DURATION);
377             } else {
378                 l = RDFUtils.literal(dateTimeStr, getLanguage(node));
379             }
380             return new ItemPropValue(l);
381         }
382 
383         if (SRC_TAGS.contains(nodeName)) {
384             return link(node, "src");
385         }
386         if (HREF_TAGS.contains(nodeName)) {
387             return link(node, "href");
388         }
389 
390         if ("object".equals(nodeName)) {
391             return link(node, "data");
392         }
393 
394         String val = DomUtils.readAttribute(node, "content", null);
395         if (val != null) {
396             return new ItemPropValue(RDFUtils.literal(val, getLanguage(node)));
397         }
398 
399         Literal l = RDFUtils.literal(textContent(node), getLanguage(node));
400         final ItemPropValueropValue.html#ItemPropValue">ItemPropValue newItemPropValue = new ItemPropValue(l);
401         itemPropValues.put(node, newItemPropValue);
402         return newItemPropValue;
403     }
404 
405     private static String textContent(Node node) {
406         StringBuilder content = new StringBuilder();
407         appendFormatted(node, content, false);
408         return content.toString();
409     }
410 
411     private static boolean shouldSeparateWithNewline(CharSequence s0, CharSequence s1) {
412         for (int i = 0, len = s1.length(); i < len; i++) {
413             char ch = s1.charAt(i);
414             if (ch == '\n' || ch == '\r') {
415                 return false;
416             }
417             if (!Character.isWhitespace(ch)) {
418                 break;
419             }
420         }
421         for (int i = s0.length() - 1; i >= 0; i--) {
422             char ch = s0.charAt(i);
423             if (ch == '\n' || ch == '\r') {
424                 return false;
425             }
426             if (!Character.isWhitespace(ch)) {
427                 return true;
428             }
429         }
430         return false;
431     }
432 
433     private static boolean appendFormatted(Node node, StringBuilder sb, boolean needsNewline) {
434         switch (node.getNodeType()) {
435         case Node.TEXT_NODE:
436             String text = node.getTextContent();
437             if (text.isEmpty()) {
438                 return needsNewline;
439             }
440             if (needsNewline && shouldSeparateWithNewline(sb, text)) {
441                 sb.append('\n');
442             }
443             sb.append(text);
444             return false;
445         case Node.ELEMENT_NODE:
446             final String nodeName = node.getNodeName().toLowerCase(Locale.ENGLISH);
447             final boolean thisNeedsNewline = "br".equals(nodeName) || Tag.valueOf(nodeName).isBlock();
448             final NodeList children = node.getChildNodes();
449             boolean prevChildNeedsNewline = needsNewline || thisNeedsNewline;
450             for (int i = 0, len = children.getLength(); i < len; i++) {
451                 prevChildNeedsNewline = appendFormatted(children.item(i), sb, prevChildNeedsNewline);
452             }
453             return prevChildNeedsNewline || thisNeedsNewline;
454         default:
455             return needsNewline;
456         }
457     }
458 
459     private static String content(Node node, String attrName) {
460         NamedNodeMap attributes = node.getAttributes();
461         if (attributes != null) {
462             Node attr = attributes.getNamedItem("content");
463             if (attr != null) {
464                 return attr.getNodeValue();
465             }
466             attr = attributes.getNamedItem(attrName);
467             if (attr != null) {
468                 return attr.getNodeValue();
469             }
470         }
471         return null;
472     }
473 
474     private static String value(Node node, String attrName) {
475         String content = content(node, attrName);
476         return StringUtils.stripToEmpty(content != null ? content : node.getTextContent());
477     }
478 
479     private static ItemPropValue link(Node node, String attrName) {
480         String content = content(node, attrName);
481         return content == null ? new ItemPropValue(RDFUtils.literal(""))
482                 : new ItemPropValue(content, ItemPropValue.Type.Link);
483     }
484 
485     // see https://www.w3.org/TR/html52/dom.html#the-lang-and-xmllang-attributes
486     private static String getLanguage(Node node) {
487         String lang;
488         do {
489             lang = DomUtils.readAttribute(node, "xml:lang", null);
490             if (StringUtils.isNotBlank(lang)) {
491                 return lang.trim();
492             }
493             lang = DomUtils.readAttribute(node, "lang", null);
494             if (StringUtils.isNotBlank(lang)) {
495                 return lang.trim();
496             }
497             node = node.getParentNode();
498         } while (node != null);
499         return null;
500     }
501 
502     /**
503      * Returns all the <b>itemprop</b>s for the given <b>itemscope</b> node.
504      *
505      * @param scopeNode
506      *            node representing the <b>itemscope</b>
507      * @param skipRoot
508      *            if <code>true</code> the given root <code>node</code> will be not read as a property, even if it
509      *            contains the <b>itemprop</b> attribute.
510      * 
511      * @return the list of <b>itemprop</b>s detected within the given <b>itemscope</b>.
512      * 
513      * @throws MicrodataParserException
514      *             if an error occurs while retrieving an property value.
515      */
516     public List<ItemProp> getItemProps(final Node scopeNode, boolean skipRoot) throws MicrodataParserException {
517         final Set<Node> accepted = new LinkedHashSet<>();
518 
519         boolean skipRootChildren = false;
520         if (!skipRoot) {
521             NamedNodeMap attributes = scopeNode.getAttributes();
522             if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null
523                     || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null) {
524                 accepted.add(scopeNode);
525             }
526             if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
527                 skipRootChildren = true;
528             }
529         }
530 
531         if (!skipRootChildren) {
532             // TreeWalker to walk DOM tree starting with the scopeNode. Nodes maybe visited multiple times.
533             TreeWalker treeWalker = ((DocumentTraversal) scopeNode.getOwnerDocument()).createTreeWalker(scopeNode,
534                     NodeFilter.SHOW_ELEMENT, new NodeFilter() {
535                         @Override
536                         public short acceptNode(Node node) {
537                             if (node.getNodeType() == Node.ELEMENT_NODE) {
538                                 NamedNodeMap attributes = node.getAttributes();
539                                 if ((attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null
540                                         || attributes.getNamedItem(REVERSE_ITEMPROP_ATTRIBUTE) != null)
541                                         && scopeNode != node) {
542                                     accepted.add(node);
543                                 }
544 
545                                 if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
546                                     // Don't visit descendants of nodes that define a new scope
547                                     return FILTER_REJECT;
548                                 }
549                             }
550                             return FILTER_ACCEPT;
551                         }
552                     }, false);
553 
554             // To populate accepted we only need to walk the tree.
555             while (treeWalker.nextNode() != null)
556                 ;
557         }
558 
559         final List<ItemProp> result = new ArrayList<>();
560         for (Node itemPropNode : accepted) {
561             final String itemProp = DomUtils.readAttribute(itemPropNode, ITEMPROP_ATTRIBUTE, null);
562             final String reverseProp = DomUtils.readAttribute(itemPropNode, REVERSE_ITEMPROP_ATTRIBUTE, null);
563 
564             boolean hasItemProp = StringUtils.isNotBlank(itemProp);
565             boolean hasReverseProp = StringUtils.isNotBlank(reverseProp);
566 
567             if (!hasItemProp && !hasReverseProp) {
568                 manageError(new MicrodataParserException("invalid property name '" + itemProp + "'", itemPropNode));
569                 continue;
570             }
571 
572             ItemPropValue itemPropValue;
573             try {
574                 itemPropValue = getPropertyValue(itemPropNode);
575             } catch (MicrodataParserException mpe) {
576                 manageError(mpe);
577                 continue;
578             }
579             if (hasItemProp) {
580                 for (String propertyName : itemProp.trim().split("\\s+")) {
581                     result.add(
582                             new ItemProp(DomUtils.getXPathForNode(itemPropNode), propertyName, itemPropValue, false));
583                 }
584             }
585             if (hasReverseProp) {
586                 if (itemPropValue.literal != null) {
587                     manageError(new MicrodataParserException(REVERSE_ITEMPROP_ATTRIBUTE + " cannot point to a literal",
588                             itemPropNode));
589                     continue;
590                 }
591                 for (String propertyName : reverseProp.trim().split("\\s+")) {
592                     result.add(new ItemProp(DomUtils.getXPathForNode(itemPropNode), propertyName, itemPropValue, true));
593                 }
594             }
595         }
596         return result;
597     }
598 
599     /**
600      * Given a document and a list of <b>itemprop</b> names this method will return such <b>itemprops</b>.
601      * 
602      * @param refs
603      *            list of references.
604      * 
605      * @return list of retrieved <b>itemprop</b>s.
606      * 
607      * @throws MicrodataParserException
608      *             if a loop is detected or a property name is missing.
609      */
610     public ItemProp[] deferProperties(String... refs) throws MicrodataParserException {
611         Document document = this.document;
612         dereferenceRecursionCounter++;
613         final List<ItemProp> result = new ArrayList<>();
614         try {
615             for (String ref : refs) {
616                 if (loopDetectorSet.contains(ref)) {
617                     throw new MicrodataParserException(String.format(Locale.ROOT,
618                             "Loop detected with depth %d while dereferencing itemProp '%s' .",
619                             dereferenceRecursionCounter - 1, ref), null);
620                 }
621                 loopDetectorSet.add(ref);
622                 Element element = document.getElementById(ref);
623                 if (element == null) {
624                     manageError(new MicrodataParserException(
625                             String.format(Locale.ROOT, "Unknown itemProp id '%s'", ref), null));
626                     continue;
627                 }
628                 result.addAll(getItemProps(element, false));
629             }
630         } catch (MicrodataParserException mpe) {
631             if (dereferenceRecursionCounter == 1)
632                 manageError(mpe);
633             else
634                 throw mpe; // Recursion end, this the the top call.
635         } finally {
636             dereferenceRecursionCounter--;
637             if (dereferenceRecursionCounter == 0) { // Recursion end, this the the top call.
638                 loopDetectorSet.clear();
639             }
640         }
641         return result.toArray(new ItemProp[result.size()]);
642     }
643 
644     private static final String[] EMPTY_STRINGS = new String[0];
645 
646     private static String[] itemrefIds(Node node) {
647         String itemref = DomUtils.readAttribute(node, "itemref", null);
648         return StringUtils.isBlank(itemref) ? EMPTY_STRINGS : itemref.trim().split("\\s+");
649     }
650 
651     /**
652      * Returns the {@link ItemScope} instance described within the specified <code>node</code>.
653      *
654      * @param node
655      *            node describing an <i>itemscope</i>.
656      * 
657      * @return instance of ItemScope object.
658      * 
659      * @throws MicrodataParserException
660      *             if an error occurs while dereferencing properties.
661      */
662     public ItemScope getItemScope(Node node) throws MicrodataParserException {
663         final ItemScope itemScope = itemScopes.get(node);
664         if (itemScope != null)
665             return itemScope;
666 
667         final String id = DomUtils.readAttribute(node, "id", null);
668         final String itemType = DomUtils.readAttribute(node, "itemtype", null);
669         final String itemId = DomUtils.readAttribute(node, "itemid", null);
670 
671         final List<ItemProp> itemProps = getItemProps(node, true);
672         final String[] itemrefIDs = itemrefIds(node);
673         final ItemProp[] deferredProperties;
674         try {
675             deferredProperties = deferProperties(itemrefIDs);
676         } catch (MicrodataParserException mpe) {
677             mpe.setErrorNode(node);
678             throw mpe;
679         }
680         for (ItemProp deferredProperty : deferredProperties) {
681             if (itemProps.contains(deferredProperty)) {
682                 manageError(new MicrodataParserException(
683                         String.format(Locale.ROOT, "Duplicated deferred itemProp '%s'.", deferredProperty.getName()),
684                         node));
685                 continue;
686             }
687             itemProps.add(deferredProperty);
688         }
689 
690         List<IRI> types;
691         if (itemType == null) {
692             types = Collections.emptyList();
693         } else {
694             types = new ArrayList<>();
695             boolean canConcatWithPrev = false;
696             for (String s : itemType.trim().split("\\s+")) {
697                 try {
698                     canConcatWithPrev = types.addAll(ItemScope.stringToSingletonIRI(s));
699                 } catch (RuntimeException e) {
700                     if (canConcatWithPrev) {
701                         int lastInd = types.size() - 1;
702                         try {
703                             List<IRI> secondTry = ItemScope
704                                     .stringToSingletonIRI(types.get(lastInd).stringValue() + " " + s);
705                             types.remove(lastInd);
706                             canConcatWithPrev = types.addAll(secondTry);
707                         } catch (RuntimeException e2) {
708                             manageError(new MicrodataParserException(e.getMessage(), node));
709                             canConcatWithPrev = false;
710                         }
711                     } else {
712                         manageError(new MicrodataParserException(e.getMessage(), node));
713                     }
714                 }
715             }
716         }
717 
718         final ItemScopeta/ItemScope.html#ItemScope">ItemScope newItemScope = new ItemScope(DomUtils.getXPathForNode(node),
719                 itemProps.toArray(new ItemProp[itemProps.size()]), id, itemrefIDs, types, itemId);
720         itemScopes.put(node, newItemScope);
721         return newItemScope;
722     }
723 
724     private void manageError(MicrodataParserExceptiondataParserException.html#MicrodataParserException">MicrodataParserException mpe) throws MicrodataParserException {
725         switch (errorMode) {
726         case FULL_REPORT:
727             errors.add(mpe);
728             break;
729         case STOP_AT_FIRST_ERROR:
730             throw mpe;
731         default:
732             throw new IllegalStateException("Unsupported mode " + errorMode);
733         }
734     }
735 
736 }