View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.Configuration;
21  import org.apache.any23.configuration.DefaultConfiguration;
22  
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  /**
27   * This class models the parameters to be used to perform an extraction. See org.apache.any23.Any23 for more details.
28   * 
29   * @author Michele Mostarda (mostarda@fbk.eu)
30   */
31  public class ExtractionParameters {
32  
33      private final Configuration configuration;
34  
35      private final ValidationMode extractionMode;
36  
37      private final Map<String, Boolean> extractionFlags;
38  
39      private final Map<String, String> extractionProperties;
40  
41      public static final String METADATA_DOMAIN_PER_ENTITY_FLAG = "any23.extraction.metadata.domain.per.entity";
42  
43      public static final String METADATA_NESTING_FLAG = "any23.extraction.metadata.nesting";
44  
45      public static final String METADATA_TIMESIZE_FLAG = "any23.extraction.metadata.timesize";
46  
47      public static final String EXTRACTION_CONTEXT_IRI_PROPERTY = "any23.extraction.context.iri";
48  
49      /**
50       * Constructor.
51       *
52       * @param configuration
53       *            underlying configuration.
54       * @param extractionMode
55       *            specifies the required extraction mode.
56       * @param extractionFlags
57       *            map of specific flags used for extraction. If not specified they will be retrieved by the default
58       *            {@link org.apache.any23.configuration.Configuration}.
59       * @param extractionProperties
60       *            map of specific properties used for extraction. If not specified they will ne retrieved by the default
61       *            {@link org.apache.any23.configuration.Configuration}.
62       */
63      public ExtractionParameters(Configuration configuration, ValidationMode extractionMode,
64              Map<String, Boolean> extractionFlags, Map<String, String> extractionProperties) {
65          if (configuration == null) {
66              throw new NullPointerException("Configuration cannot be null.");
67          }
68          if (extractionMode == null) {
69              throw new NullPointerException("Extraction mode cannot be null.");
70          }
71          this.configuration = configuration;
72          this.extractionMode = extractionMode;
73          this.extractionFlags = extractionFlags == null ? new HashMap<>() : new HashMap<>(extractionFlags);
74          this.extractionProperties = extractionProperties == null ? new HashMap<>()
75                  : new HashMap<>(extractionProperties);
76      }
77  
78      /**
79       * Constructor.
80       *
81       * @param configuration
82       *            underlying configuration.
83       * @param extractionMode
84       *            specifies the required extraction mode.
85       */
86      public ExtractionParameters(Configuration configuration, ValidationMode extractionMode) {
87          this(configuration, extractionMode, null, null);
88      }
89  
90      /**
91       * Constructor, allows to set explicitly the value for flag SingleDocumentExtraction#METADATA_NESTING_FLAG.
92       *
93       * @param configuration
94       *            the underlying configuration.
95       * @param extractionMode
96       *            specifies the required extraction mode.
97       * @param nesting
98       *            if <code>true</code> nesting triples will be expressed.
99       */
100     public ExtractionParameters(Configuration configuration, ValidationMode extractionMode, final boolean nesting) {
101         this(configuration, extractionMode, new HashMap<String, Boolean>() {
102             /**
103              * 
104              */
105             private static final long serialVersionUID = 1L;
106             {
107                 put(ExtractionParameters.METADATA_NESTING_FLAG, nesting);
108             }
109         }, null);
110     }
111 
112     /**
113      * @param c
114      *            the underlying configuration.
115      * 
116      * @return the default extraction parameters.
117      */
118     public static final ExtractionParameters newDefault(Configuration c) {
119         return new ExtractionParameters(c, ValidationMode.NONE);
120     }
121 
122     /**
123      * Creates the default extraction parameters with {@link org.apache.any23.configuration.DefaultConfiguration}.
124      *
125      * @return the default extraction parameters.
126      */
127     public static final ExtractionParameters newDefault() {
128         return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.NONE);
129     }
130 
131     /**
132      * Declares the supported validation actions.
133      */
134     public enum ValidationMode {
135         NONE, VALIDATE, VALIDATE_AND_FIX
136     }
137 
138     /**
139      * @return <code>true</code> if validation is active.
140      */
141     public boolean isValidate() {
142         return extractionMode == ValidationMode.VALIDATE || extractionMode == ValidationMode.VALIDATE_AND_FIX;
143     }
144 
145     /**
146      * @return <code>true</code> if fix is active.
147      */
148     public boolean isFix() {
149         return extractionMode == ValidationMode.VALIDATE_AND_FIX;
150     }
151 
152     /**
153      * Returns the value of the specified extraction flag, if the flag is undefined it will be retrieved by the default
154      * {@link org.apache.any23.configuration.Configuration}.
155      *
156      * @param flagName
157      *            name of flag.
158      * 
159      * @return flag value.
160      */
161     public boolean getFlag(String flagName) {
162         final Boolean value = extractionFlags.get(flagName);
163         if (value == null) {
164             return configuration.getFlagProperty(flagName);
165         }
166         return value;
167     }
168 
169     /**
170      * Sets the value for an extraction flag.
171      *
172      * @param flagName
173      *            flag name.
174      * @param value
175      *            new flag value.
176      * 
177      * @return the previous flag value.
178      */
179     public Boolean setFlag(String flagName, boolean value) {
180         checkPropertyExists(flagName);
181         validateValue("flag name", flagName);
182         return extractionFlags.put(flagName, value);
183     }
184 
185     /**
186      * Returns the value of the specified extraction property, if the property is undefined it will be retrieved by the
187      * default {@link org.apache.any23.configuration.Configuration}.
188      *
189      * @param propertyName
190      *            the property name.
191      * 
192      * @return the property value.
193      * 
194      * @throws IllegalArgumentException
195      *             if the property name is not defined in configuration.
196      */
197     public String getProperty(String propertyName) {
198         final String propertyValue = extractionProperties.get(propertyName);
199         if (propertyValue == null) {
200             return configuration.getPropertyOrFail(propertyName);
201         }
202         return propertyValue;
203     }
204 
205     /**
206      * Sets the value for an extraction property.
207      *
208      * @param propertyName
209      *            the property name.
210      * @param propertyValue
211      *            the property value.
212      * 
213      * @return the previous property value.
214      */
215     public String setProperty(String propertyName, String propertyValue) {
216         checkPropertyExists(propertyName);
217         validateValue("property name", propertyName);
218         validateValue("property value", propertyValue);
219         return extractionProperties.put(propertyName, propertyValue);
220     }
221 
222     @Override
223     public boolean equals(Object obj) {
224         if (obj == null) {
225             return false;
226         }
227         if (obj == this) {
228             return true;
229         }
230         if (obj instanceof ExtractionParameters) {
231             ExtractionParametersrg/apache/any23/extractor/ExtractionParameters.html#ExtractionParameters">ExtractionParameters other = (ExtractionParameters) obj;
232             return extractionMode == other.extractionMode && extractionFlags.equals(other.extractionFlags)
233                     && extractionProperties.equals(other.extractionProperties);
234         }
235         return false;
236     }
237 
238     @Override
239     public int hashCode() {
240         return extractionMode.hashCode() * 2 * extractionFlags.hashCode() * 3 * extractionProperties.hashCode() * 5;
241     }
242 
243     private void checkPropertyExists(String propertyName) {
244         if (!configuration.defineProperty(propertyName)) {
245             throw new IllegalArgumentException(
246                     String.format(java.util.Locale.ROOT, "Property '%s' is unknown and cannot be set.", propertyName));
247         }
248     }
249 
250     private void validateValue(String desc, String value) {
251         if (value == null || value.trim().length() == 0)
252             throw new IllegalArgumentException(String.format(java.util.Locale.ROOT, "Invalid %s: '%s'", desc, value));
253     }
254 }