View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.Configuration;
21  import org.apache.any23.configuration.DefaultConfiguration;
22  
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  /**
27   * This class models the parameters to be used to perform an extraction.
28   * See org.apache.any23.Any23 for more details.
29   * @author Michele Mostarda (mostarda@fbk.eu)
30   */
31  public class ExtractionParameters {
32  
33      private final Configuration configuration;
34  
35      private final ValidationMode extractionMode;
36  
37      private final Map<String, Boolean> extractionFlags;
38  
39      private final Map<String,String> extractionProperties;
40  
41      public static final String METADATA_DOMAIN_PER_ENTITY_FLAG  = "any23.extraction.metadata.domain.per.entity";
42  
43      public static final String METADATA_NESTING_FLAG            = "any23.extraction.metadata.nesting";
44  
45      public static final String METADATA_TIMESIZE_FLAG           = "any23.extraction.metadata.timesize";
46  
47      public static final String EXTRACTION_CONTEXT_IRI_PROPERTY = "any23.extraction.context.iri";
48  
49      /**
50       * Constructor.
51       *
52       * @param configuration underlying configuration.
53       * @param extractionMode specifies the required extraction mode.
54       * @param extractionFlags map of specific flags used for extraction. If not specified they will
55       *        be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
56       * @param extractionProperties map of specific properties used for extraction. If not specified
57       *        they will ne retrieved by the default {@link org.apache.any23.configuration.Configuration}.
58       */
59      public ExtractionParameters(
60              Configuration configuration,
61              ValidationMode extractionMode,
62              Map<String, Boolean> extractionFlags,
63              Map<String,String> extractionProperties
64      ) {
65          if(configuration == null) {
66              throw new NullPointerException("Configuration cannot be null.");
67          }
68          if(extractionMode == null) {
69              throw new NullPointerException("Extraction mode cannot be null.");
70          }
71          this.configuration  = configuration;
72          this.extractionMode = extractionMode;
73          this.extractionFlags =
74                  extractionFlags == null
75                          ?
76                  new HashMap<>()
77                          :
78                  new HashMap<>(extractionFlags);
79          this.extractionProperties =
80                  extractionProperties == null
81                          ?
82                  new HashMap<>()
83                          :
84                  new HashMap<>(extractionProperties);
85      }
86  
87      /**
88       * Constructor.
89       *
90       * @param configuration underlying configuration.
91       * @param extractionMode specifies the required extraction mode.
92       */
93      public ExtractionParameters(Configuration configuration, ValidationMode extractionMode) {
94          this(configuration, extractionMode, null, null);
95      }
96  
97      /**
98       * Constructor, allows to set explicitly the value for flag
99       * SingleDocumentExtraction#METADATA_NESTING_FLAG.
100      *
101      * @param configuration the underlying configuration.
102      * @param extractionMode specifies the required extraction mode.
103      * @param nesting if <code>true</code> nesting triples will be expressed.
104      */
105     public ExtractionParameters(Configuration configuration, ValidationMode extractionMode, final boolean nesting) {
106         this(
107                 configuration,
108                 extractionMode,
109                 new HashMap<String, Boolean>(){
110                   /**
111                    * 
112                    */
113                   private static final long serialVersionUID = 1L; {
114                     put(ExtractionParameters.METADATA_NESTING_FLAG, nesting);
115                 }},
116                 null
117         );
118     }
119 
120     /**
121      * @param c the underlying configuration.
122      * @return the default extraction parameters.
123      */
124     public static final ExtractionParameters newDefault(Configuration c) {
125         return new ExtractionParameters(c, ValidationMode.NONE);
126     }
127 
128     /**
129      * Creates the default extraction parameters with {@link org.apache.any23.configuration.DefaultConfiguration}.
130      *
131      * @return the default extraction parameters.
132      */
133     public static final ExtractionParameters newDefault() {
134         return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.NONE);
135     }
136 
137     /**
138      * Declares the supported validation actions.
139      */
140     public enum ValidationMode {
141         NONE,
142         VALIDATE,
143         VALIDATE_AND_FIX
144     }
145 
146     /**
147      * @return <code>true</code> if validation is active.
148      */
149     public boolean isValidate() {
150         return extractionMode == ValidationMode.VALIDATE || extractionMode == ValidationMode.VALIDATE_AND_FIX;
151     }
152 
153     /**
154      * @return <code>true</code> if fix is active.
155      */
156     public boolean isFix() {
157         return extractionMode == ValidationMode.VALIDATE_AND_FIX;
158     }
159 
160     /**
161      * Returns the value of the specified extraction flag, if the flag is undefined
162      * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
163      *
164      * @param flagName name of flag.
165      * @return flag value.
166      */
167     public boolean getFlag(String flagName) {
168         final Boolean value = extractionFlags.get(flagName);
169         if(value == null) {
170             return configuration.getFlagProperty(flagName);
171         }
172         return value;
173     }
174 
175     /**
176      * Sets the value for an extraction flag.
177      *
178      * @param flagName flag name.
179      * @param value new flag value.
180      * @return the previous flag value.
181      */
182     public Boolean setFlag(String flagName, boolean value) {
183         checkPropertyExists(flagName);
184         validateValue("flag name", flagName);
185         return extractionFlags.put(flagName, value);
186     }
187 
188     /**
189      * Returns the value of the specified extraction property, if the property is undefined
190      * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
191      *
192      * @param propertyName the property name.
193      * @return the property value.
194      * @throws IllegalArgumentException if the property name is not defined in configuration.
195      */
196     public String getProperty(String propertyName) {
197         final String propertyValue = extractionProperties.get(propertyName);
198         if(propertyValue == null) {
199             return configuration.getPropertyOrFail(propertyName);
200         }
201         return propertyValue;
202     }
203 
204     /**
205      * Sets the value for an extraction property.
206      *
207      * @param propertyName the property name.
208      * @param propertyValue the property value.
209      * @return the previous property value.
210      */
211     public String setProperty(String propertyName, String propertyValue) {
212         checkPropertyExists(propertyName);
213         validateValue("property name" , propertyName);
214         validateValue("property value", propertyValue);
215         return extractionProperties.put(propertyName, propertyValue);
216     }
217 
218     @Override
219     public boolean equals(Object obj) {
220         if(obj == null) {
221             return false;
222         }
223         if(obj == this) {
224             return true;
225         }
226         if(obj instanceof ExtractionParameters) {
227             ExtractionParametersrg/apache/any23/extractor/ExtractionParameters.html#ExtractionParameters">ExtractionParameters other = (ExtractionParameters) obj;
228             return
229                     extractionMode == other.extractionMode
230                             &&
231                     extractionFlags.equals( other.extractionFlags)
232                             &&
233                     extractionProperties.equals( other.extractionProperties );
234         }
235         return false;
236     }
237 
238     @Override
239     public int hashCode() {
240         return extractionMode.hashCode() * 2 * extractionFlags.hashCode() * 3 * extractionProperties.hashCode() * 5;
241     }
242 
243     private void checkPropertyExists(String propertyName) {
244         if(! configuration.defineProperty(propertyName) ) {
245             throw new IllegalArgumentException(
246                     String.format("Property '%s' is unknown and cannot be set.", propertyName)
247             );
248         }
249     }
250 
251     private void validateValue(String desc, String value) {
252         if(value == null || value.trim().length() == 0)
253             throw new IllegalArgumentException( String.format("Invalid %s: '%s'", desc, value) );
254     }
255 }