View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.configuration.Configuration;
21  import org.apache.any23.configuration.DefaultConfiguration;
22  
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  /**
27   * This class models the parameters to be used to perform an extraction.
28   *
29   * @see org.apache.any23.Any23
30   * @author Michele Mostarda (mostarda@fbk.eu)
31   */
32  public class ExtractionParameters {
33  
34      /**
35       * @param c the underlying configuration.
36       * @return the default extraction parameters.
37       */
38      public static final ExtractionParameters newDefault(Configuration c) {
39          return new ExtractionParameters(c, ValidationMode.None);
40      }
41  
42      /**
43       * Creates the default extraction parameters with {@link org.apache.any23.configuration.DefaultConfiguration}.
44       *
45       * @return the default extraction parameters.
46       */
47      public static final ExtractionParameters newDefault() {
48          return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.None);
49      }
50  
51      /**
52       * Declares the supported validation actions.
53       */
54      public enum ValidationMode {
55          None,
56          Validate,
57          ValidateAndFix
58      }
59  
60      private final Configuration configuration;
61  
62      private final ValidationMode extractionMode;
63  
64      private final Map<String, Boolean> extractionFlags;
65  
66      private final Map<String,String> extractionProperties;
67  
68      public static final String METADATA_DOMAIN_PER_ENTITY_FLAG  = "any23.extraction.metadata.domain.per.entity";
69  
70      public static final String METADATA_NESTING_FLAG            = "any23.extraction.metadata.nesting";
71  
72      public static final String METADATA_TIMESIZE_FLAG           = "any23.extraction.metadata.timesize";
73  
74      public static final String EXTRACTION_CONTEXT_URI_PROPERTY = "any23.extraction.context.uri";
75  
76      /**
77       * Constructor.
78       *
79       * @param configuration underlying configuration.
80       * @param extractionMode specifies the required extraction mode.
81       * @param extractionFlags map of specific flags used for extraction. If not specified they will
82       *        be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
83       * @param extractionProperties map of specific properties used for extraction. If not specified
84       *        they will ne retrieved by the default {@link org.apache.any23.configuration.Configuration}.
85       */
86      public ExtractionParameters(
87              Configuration configuration,
88              ValidationMode extractionMode,
89              Map<String, Boolean> extractionFlags,
90              Map<String,String> extractionProperties
91      ) {
92          if(configuration == null) {
93              throw new NullPointerException("Configuration cannot be null.");
94          }
95          if(extractionMode == null) {
96              throw new NullPointerException("Extraction mode cannot be null.");
97          }
98          this.configuration  = configuration;
99          this.extractionMode = extractionMode;
100         this.extractionFlags =
101                 extractionFlags == null
102                         ?
103                 new HashMap<String,Boolean>()
104                         :
105                 new HashMap<String,Boolean>(extractionFlags);
106         this.extractionProperties =
107                 extractionProperties == null
108                         ?
109                 new HashMap<String,String>()
110                         :
111                 new HashMap<String,String>(extractionProperties);
112     }
113 
114     /**
115      * Constructor.
116      *
117      * @param configuration underlying configuration.
118      * @param extractionMode specifies the required extraction mode.
119      */
120     public ExtractionParameters(Configuration configuration, ValidationMode extractionMode) {
121         this(configuration, extractionMode, null, null);
122     }
123 
124     /**
125      * Constructor, allows to set explicitly the value for flag
126      * {@link SingleDocumentExtraction#METADATA_NESTING_FLAG}.
127      *
128      * @param configuration the underlying configuration.
129      * @param extractionMode specifies the required extraction mode.
130      * @param nesting if <code>true</code> nesting triples will be expressed.
131      */
132     public ExtractionParameters(Configuration configuration, ValidationMode extractionMode, final boolean nesting) {
133         this(
134                 configuration,
135                 extractionMode,
136                 new HashMap<String, Boolean>(){{
137                     put(ExtractionParameters.METADATA_NESTING_FLAG, nesting);
138                 }},
139                 null
140         );
141     }
142 
143     /**
144      * @return <code>true</code> if validation is active.
145      */
146     public boolean isValidate() {
147         return extractionMode == ValidationMode.Validate || extractionMode == ValidationMode.ValidateAndFix;
148     }
149 
150     /**
151      * @return <code>true</code> if fix is active.
152      */
153     public boolean isFix() {
154         return extractionMode == ValidationMode.ValidateAndFix;
155     }
156 
157     /**
158      * Returns the value of the specified extraction flag, if the flag is undefined
159      * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
160      *
161      * @param flagName name of flag.
162      * @return flag value.
163      */
164     public boolean getFlag(String flagName) {
165         final Boolean value = extractionFlags.get(flagName);
166         if(value == null) {
167             return configuration.getFlagProperty(flagName);
168         }
169         return value;
170     }
171 
172     /**
173      * Sets the value for an extraction flag.
174      *
175      * @param flagName flag name.
176      * @param value new flag value.
177      * @return the previous flag value.
178      */
179     public Boolean setFlag(String flagName, boolean value) {
180         checkPropertyExists(flagName);
181         validateValue("flag name", flagName);
182         return extractionFlags.put(flagName, value);
183     }
184 
185     /**
186      * Returns the value of the specified extraction property, if the property is undefined
187      * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
188      *
189      * @param propertyName the property name.
190      * @return the property value.
191      * @throws IllegalArgumentException if the property name is not defined in configuration.
192      */
193     public String getProperty(String propertyName) {
194         final String propertyValue = extractionProperties.get(propertyName);
195         if(propertyValue == null) {
196             return configuration.getPropertyOrFail(propertyName);
197         }
198         return propertyValue;
199     }
200 
201     /**
202      * Sets the value for an extraction property.
203      *
204      * @param propertyName the property name.
205      * @param propertyValue the property value.
206      * @return the previous property value.
207      */
208     public String setProperty(String propertyName, String propertyValue) {
209         checkPropertyExists(propertyName);
210         validateValue("property name" , propertyName);
211         validateValue("property value", propertyValue);
212         return extractionProperties.put(propertyName, propertyValue);
213     }
214 
215     @Override
216     public boolean equals(Object obj) {
217         if(obj == null) {
218             return false;
219         }
220         if(obj == this) {
221             return true;
222         }
223         if(obj instanceof ExtractionParameters) {
224             ExtractionParameters other = (ExtractionParameters) obj;
225             return
226                     extractionMode == other.extractionMode
227                             &&
228                     extractionFlags.equals( other.extractionFlags)
229                             &&
230                     extractionProperties.equals( other.extractionProperties );
231         }
232         return false;
233     }
234 
235     @Override
236     public int hashCode() {
237         return extractionMode.hashCode() * 2 * extractionFlags.hashCode() * 3 * extractionProperties.hashCode() * 5;
238     }
239 
240     private void checkPropertyExists(String propertyName) {
241         if(! configuration.defineProperty(propertyName) ) {
242             throw new IllegalArgumentException(
243                     String.format("Property '%s' is unknown and cannot be set.", propertyName)
244             );
245         }
246     }
247 
248     private void validateValue(String desc, String value) {
249         if(value == null || value.trim().length() == 0)
250             throw new IllegalArgumentException( String.format("Invalid %s: '%s'", desc, value) );
251     }
252 }