1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.search.solr.internal.metadata

File AbstractSolrMetadataExtractor.java

 

Coverage histogram

../../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

50
123
16
1
534
287
58
0.47
7.69
16
3.62

Classes

Class Line # Actions
AbstractSolrMetadataExtractor 71 123 0% 58 10
0.9470899794.7%
 

Contributing tests

This file is covered by 13 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.search.solr.internal.metadata;
21   
22    import java.io.InputStream;
23    import java.util.Arrays;
24    import java.util.Collection;
25    import java.util.HashSet;
26    import java.util.List;
27    import java.util.Locale;
28    import java.util.Map;
29    import java.util.Set;
30   
31    import javax.inject.Inject;
32    import javax.inject.Named;
33    import javax.inject.Provider;
34   
35    import org.apache.solr.common.SolrInputDocument;
36    import org.apache.tika.Tika;
37    import org.apache.tika.metadata.Metadata;
38    import org.apache.tika.metadata.TikaMetadataKeys;
39    import org.slf4j.Logger;
40    import org.xwiki.component.manager.ComponentLookupException;
41    import org.xwiki.component.manager.ComponentManager;
42    import org.xwiki.context.Execution;
43    import org.xwiki.model.EntityType;
44    import org.xwiki.model.reference.DocumentReference;
45    import org.xwiki.model.reference.EntityReference;
46    import org.xwiki.model.reference.EntityReferenceSerializer;
47    import org.xwiki.search.solr.internal.api.FieldUtils;
48    import org.xwiki.search.solr.internal.api.SolrIndexerException;
49    import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
50   
51    import com.xpn.xwiki.XWikiContext;
52    import com.xpn.xwiki.XWikiException;
53    import com.xpn.xwiki.doc.XWikiAttachment;
54    import com.xpn.xwiki.doc.XWikiDocument;
55    import com.xpn.xwiki.objects.BaseObject;
56    import com.xpn.xwiki.objects.BaseProperty;
57    import com.xpn.xwiki.objects.classes.BaseClass;
58    import com.xpn.xwiki.objects.classes.BooleanClass;
59    import com.xpn.xwiki.objects.classes.ListItem;
60    import com.xpn.xwiki.objects.classes.PasswordClass;
61    import com.xpn.xwiki.objects.classes.PropertyClass;
62    import com.xpn.xwiki.objects.classes.StaticListClass;
63    import com.xpn.xwiki.objects.classes.TextAreaClass;
64   
65    /**
66    * Abstract implementation for a metadata extractor.
67    *
68    * @version $Id: 7c13171be982a77744464ebb56c71b2df8b13682 $
69    * @since 4.3M2
70    */
 
71    public abstract class AbstractSolrMetadataExtractor implements SolrMetadataExtractor
72    {
73    /**
74    * The format used when indexing the objcontent field: "<propertyName>:<propertyValue>".
75    */
76    private static final String OBJCONTENT_FORMAT = "%s : %s";
77   
78    /**
79    * The maximum number of characters allowed in a short text. This should be the same as the maximum length of a
80    * StringProperty, as specified by xwiki.hbm.xml. We need this limit to be able to handle differently short strings
81    * and large strings when indexing XObject properties.
82    */
83    protected static final int SHORT_TEXT_LIMIT = 255;
84   
85    /**
86    * Logging framework.
87    */
88    @Inject
89    protected Logger logger;
90   
91    /**
92    * Execution component.
93    */
94    @Inject
95    protected Execution execution;
96   
97    /**
98    * Reference to String serializer. Used for fields such as class and fullname that are relative to their wiki and
99    * are stored without the wiki name.
100    */
101    @Inject
102    @Named("local")
103    protected EntityReferenceSerializer<String> localSerializer;
104   
105    /**
106    * Used to access current {@link XWikiContext}.
107    */
108    @Inject
109    protected Provider<XWikiContext> xcontextProvider;
110   
111    /**
112    * Used to find the resolver.
113    */
114    @Inject
115    protected ComponentManager componentManager;
116   
 
117  4049 toggle @Override
118    public LengthSolrInputDocument getSolrDocument(EntityReference entityReference)
119    throws SolrIndexerException, IllegalArgumentException
120    {
121  4049 try {
122  4049 LengthSolrInputDocument solrDocument = new LengthSolrInputDocument();
123   
124  4049 solrDocument.setField(FieldUtils.ID, getResolver(entityReference).getId(entityReference));
125   
126  4048 if (!setDocumentFields(new DocumentReference(entityReference.extractReference(EntityType.DOCUMENT)),
127    solrDocument)) {
128  1 return null;
129    }
130   
131  4047 solrDocument.setField(FieldUtils.TYPE, entityReference.getType().name());
132   
133  4047 if (!setFieldsInternal(solrDocument, entityReference)) {
134  4 return null;
135    }
136   
137  4043 return solrDocument;
138    } catch (Exception e) {
139  1 String message = String.format("Failed to get input Solr document for entity '%s'", entityReference);
140  1 throw new SolrIndexerException(message, e);
141    }
142    }
143   
144    /**
145    * @param solrDocument the {@link LengthSolrInputDocument} to modify
146    * @param entityReference the reference of the entity
147    * @return false if the entity should not be indexed (generally mean it does not exist), true otherwise
148    * @throws Exception in case of errors
149    */
150    protected abstract boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
151    throws Exception;
152   
153    /**
154    * @param entityReference the reference of the entity
155    * @return the Solr resolver associated to the entity type
156    * @throws SolrIndexerException if any error
157    */
 
158  4049 toggle protected SolrReferenceResolver getResolver(EntityReference entityReference) throws SolrIndexerException
159    {
160  4049 try {
161  4049 return this.componentManager.getInstance(SolrReferenceResolver.class,
162    entityReference.getType().getLowerCase());
163    } catch (ComponentLookupException e) {
164  0 throw new SolrIndexerException(
165    "Faile to find solr reference resolver for type reference [" + entityReference + "]");
166    }
167    }
168   
169    /**
170    * Utility method.
171    *
172    * @param documentReference reference to a document.
173    * @return the {@link XWikiDocument} instance referenced.
174    * @throws XWikiException if problems occur.
175    */
 
176  9648 toggle protected XWikiDocument getDocument(DocumentReference documentReference) throws XWikiException
177    {
178  9648 XWikiContext xcontext = this.xcontextProvider.get();
179   
180  9648 XWikiDocument document = xcontext.getWiki().getDocument(documentReference, xcontext);
181   
182  9647 return document;
183    }
184   
185    /**
186    * Fetch translated document.
187    *
188    * @param documentReference reference to the document to be translated.
189    * @return translated document.
190    * @throws SolrIndexerException if problems occur.
191    */
 
192  876 toggle protected XWikiDocument getTranslatedDocument(DocumentReference documentReference) throws SolrIndexerException
193    {
194  876 try {
195  876 XWikiDocument document = getDocument(documentReference);
196  876 Locale locale = documentReference.getLocale();
197   
198  876 if (locale == null || locale.equals(Locale.ROOT)) {
199  759 return document;
200    }
201   
202  117 XWikiDocument translatedDocument = document.getTranslatedDocument(locale, this.xcontextProvider.get());
203   
204    // XWikiDocument#getTranslatedDocument returns the default document when the locale does not exist
205  117 if (translatedDocument.getRealLocale().equals(locale)) {
206  117 return translatedDocument;
207    }
208    } catch (Exception e) {
209  0 throw new SolrIndexerException(
210    String.format("Failed to get translated document for '%s'", documentReference), e);
211    }
212   
213  0 return null;
214    }
215   
216    /**
217    * Adds to a Solr document the fields that are specific to the XWiki document that contains the entity to be
218    * indexed. These fields required to identify the owning document and to also reflect some properties of the owning
219    * document towards the indexed entity (like locale and hidden flag).
220    *
221    * @param documentReference reference to document.
222    * @param solrDocument the Solr document to which to add the fields.
223    * @return false if the document does not exist, true otherwise
224    * @throws Exception if problems occur.
225    */
 
226  4725 toggle protected boolean setDocumentFields(DocumentReference documentReference, SolrInputDocument solrDocument)
227    throws Exception
228    {
229  4725 XWikiDocument document = getDocument(documentReference);
230  4724 if (document.isNew()) {
231  1 return false;
232    }
233   
234  4723 solrDocument.setField(FieldUtils.HIDDEN, document.isHidden());
235   
236  4723 solrDocument.setField(FieldUtils.WIKI, documentReference.getWikiReference().getName());
237  4723 solrDocument.setField(FieldUtils.NAME, documentReference.getName());
238   
239    // Set the fields that are used to query / filter the document hierarchy.
240  4723 setHierarchyFields(solrDocument, documentReference.getParent());
241   
242  4723 Locale locale = getLocale(documentReference);
243  4723 solrDocument.setField(FieldUtils.LOCALE, locale.toString());
244  4723 solrDocument.setField(FieldUtils.LANGUAGE, locale.getLanguage());
245   
246  4723 return true;
247    }
248   
 
249  3076 toggle protected Set<Locale> getLocales(DocumentReference documentReference, Locale entityLocale)
250    throws XWikiException, SolrIndexerException
251    {
252  3076 XWikiContext xcontext = this.xcontextProvider.get();
253   
254  3076 return getLocales(xcontext.getWiki().getDocument(documentReference, xcontext), entityLocale);
255    }
256   
 
257  4043 toggle protected Set<Locale> getLocales(XWikiDocument xdocument, Locale entityLocale)
258    throws XWikiException, SolrIndexerException
259    {
260  4043 Set<Locale> locales = new HashSet<Locale>();
261   
262  4043 String entityLocaleString = entityLocale != null ? entityLocale.toString() : null;
263   
264    // 1) Add entity locale
265  4043 if (entityLocale != null) {
266  876 locales.add(entityLocale);
267    }
268   
269  4043 XWikiContext xcontext = this.xcontextProvider.get();
270   
271    // 2) Add locales from the document
272   
273  4043 List<Locale> documentLocales = xdocument.getTranslationLocales(this.xcontextProvider.get());
274   
275    // If entityLocale is null it means that it's an entity without the support for translations
276    // (objects/attachments)
277  4043 if (entityLocale == null) {
278  3167 for (Locale locale : documentLocales) {
279  116 locales.add(locale);
280    }
281    }
282   
283    // 3) Add locales from preferences
284   
285  4043 List<Locale> availableLocales = xcontext.getWiki().getAvailableLocales(xcontext);
286   
287  4043 for (Locale locale : availableLocales) {
288    // Add locale only if there is no explicit translation for it
289  4031 if (!documentLocales.contains(locale)) {
290  4031 if (entityLocale == null || locale.toString().startsWith(entityLocaleString)) {
291  3914 locales.add(locale);
292    }
293    }
294    }
295   
296    // 4) Make sure that the original document's locale is there as well.
297  4043 locales.add(getLocale(xdocument.getDocumentReference()));
298   
299  4043 return locales;
300    }
301   
 
302  876 toggle protected void addLocales(XWikiDocument xdocument, Locale entityLocale, SolrInputDocument solrDocument)
303    throws SolrIndexerException, XWikiException
304    {
305  876 Set<Locale> locales = getLocales(xdocument, entityLocale);
306  876 for (Locale childLocale : locales) {
307  1752 solrDocument.addField(FieldUtils.LOCALES, childLocale.toString());
308    }
309    }
310   
311    /**
312    * @param documentReference reference to the document.
313    * @return the locale code of the referenced document.
314    * @throws SolrIndexerException if problems occur.
315    */
 
316  9642 toggle protected Locale getLocale(DocumentReference documentReference) throws SolrIndexerException
317    {
318  9642 Locale locale = null;
319   
320  9642 try {
321  9642 if (documentReference.getLocale() != null && !documentReference.getLocale().equals(Locale.ROOT)) {
322  234 locale = documentReference.getLocale();
323    } else {
324  9408 XWikiContext xcontext = this.xcontextProvider.get();
325  9408 locale = xcontext.getWiki().getDocument(documentReference, xcontext).getRealLocale();
326    }
327    } catch (Exception e) {
328  0 throw new SolrIndexerException(
329    String.format("Exception while fetching the locale of the document '%s'", documentReference), e);
330    }
331   
332  9642 return locale;
333    }
334   
335    /**
336    * Adds the properties of a given object to a Solr document.
337    *
338    * @param solrDocument the document where to add the properties
339    * @param object the object whose properties to add
340    * @param locale the locale of the indexed document; in case of translations, this will obviously be different than
341    * the original document's locale
342    */
 
343  2928 toggle protected void setObjectContent(SolrInputDocument solrDocument, BaseObject object, Locale locale)
344    {
345  2928 if (object == null) {
346    // Yes, the platform can return null objects.
347  23 return;
348    }
349   
350  2905 BaseClass xClass = object.getXClass(this.xcontextProvider.get());
351  2905 for (Object field : object.getFieldList()) {
352  11425 @SuppressWarnings("unchecked")
353    BaseProperty<EntityReference> property = (BaseProperty<EntityReference>) field;
354    // Avoid indexing empty properties.
355  11425 if (property.getValue() != null) {
356  11401 PropertyClass propertyClass = (PropertyClass) xClass.get(property.getName());
357  11401 setPropertyValue(solrDocument, property, propertyClass, locale);
358    }
359    }
360    }
361   
362    /**
363    * Add the value of the given object property to a Solr document.
364    *
365    * @param solrDocument the document to add the object property value to
366    * @param property the object property whose value to add
367    * @param propertyClass the class that describes the given property
368    * @param locale the locale of the indexed document
369    */
 
370  11401 toggle private void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<EntityReference> property,
371    PropertyClass propertyClass, Locale locale)
372    {
373  11401 Object propertyValue = property.getValue();
374  11401 if (propertyClass instanceof StaticListClass) {
375  2376 setStaticListPropertyValue(solrDocument, property, (StaticListClass) propertyClass, locale);
376  9025 } else if (propertyClass instanceof TextAreaClass
377    || (propertyClass != null && "String".equals(propertyClass.getClassType()))
378    || (propertyValue instanceof CharSequence && String.valueOf(propertyValue).length() > SHORT_TEXT_LIMIT)) {
379    // Index TextArea and String properties as text, based on the document locale. We didn't check if the
380    // property class is an instance of StringClass because it has subclasses that don't store free text (like
381    // the EmailClass). Plus we didn't want to include the PasswordClass (which extends StringClass).
382    //
383    // We also index large strings as localized text in order to cover custom XClass properties that may not
384    // extend TextArea but still have large strings as value, and also the case when a TextArea property is
385    // removed from an XClass but there are still objects that have a (large) value set for it (the property
386    // class is null in this case). The 255 limit is defined in xwiki.hbm.xml for string properties.
387   
388    // It's important here to make sure we give strings to Solr, as it can mutate the value we give it,
389    // so we need to make sure we don't endanger the state of the document
390  6287 setPropertyValue(solrDocument, property, new TypedValue(String.valueOf(propertyValue), TypedValue.TEXT),
391    locale);
392   
393  6287 if (!(propertyClass instanceof TextAreaClass)
394    && String.valueOf(propertyValue).length() <= SHORT_TEXT_LIMIT) {
395    // Also index the raw value that is saved in the database. This provide a stable field name and also
396    // allows exact matching
397  4318 setPropertyValue(solrDocument, property, new TypedValue(propertyValue), locale);
398    }
399  2738 } else if (propertyValue instanceof Collection) {
400    // We iterate the collection instead of giving it to Solr because, although it supports passing collections,
401    // it reuses the collection in some cases, when the value of a field is set for the first time for instance,
402    // which can lead to side effects on our side.
403  73 for (Object value : (Collection<?>) propertyValue) {
404  18 if (value != null) {
405    // Avoid indexing null values.
406  18 setPropertyValue(solrDocument, property, new TypedValue(value), locale);
407    }
408    }
409  2665 } else if (propertyValue instanceof Integer && propertyClass instanceof BooleanClass) {
410    // Boolean properties are stored as integers (0 is false and 1 is true).
411  1534 Boolean booleanValue = ((Integer) propertyValue) != 0;
412  1534 setPropertyValue(solrDocument, property, new TypedValue(booleanValue), locale);
413  1131 } else if (!(propertyClass instanceof PasswordClass)) {
414    // Avoid indexing passwords.
415  1124 setPropertyValue(solrDocument, property, new TypedValue(propertyValue), locale);
416    }
417    }
418   
419    /**
420    * Add the values of a static list property to a Solr document. We add both the raw value (what is saved in the
421    * database) and the display value (the label seen by the user, which is specified in the XClass).
422    *
423    * @param solrDocument the document to add the property value to
424    * @param property the static list property whose value to add
425    * @param propertyClass the static list class that should be used to get the list of known values
426    * @param locale the locale of the indexed document
427    * @see "XWIKI-9417: Search does not return any results for Static List values"
428    */
 
429  2376 toggle private void setStaticListPropertyValue(SolrInputDocument solrDocument, BaseProperty<EntityReference> property,
430    StaticListClass propertyClass, Locale locale)
431    {
432    // The list of known values specified in the XClass.
433  2376 Map<String, ListItem> knownValues = propertyClass.getMap(this.xcontextProvider.get());
434  2376 Object propertyValue = property.getValue();
435    // When multiple selection is on the value is a list. Otherwise, for single selection, the value is a string.
436  2376 List<?> rawValues = propertyValue instanceof List ? (List<?>) propertyValue : Arrays.asList(propertyValue);
437  2376 for (Object rawValue : rawValues) {
438    // Avoid indexing null values.
439  1736 if (rawValue != null) {
440    // Index the raw value that is saved in the database. This is most probably a string so we'll be able to
441    // perform exact matches on this value.
442  1736 setPropertyValue(solrDocument, property, new TypedValue(rawValue), locale);
443  1736 ListItem valueInfo = knownValues.get(rawValue);
444  1736 if (valueInfo != null && valueInfo.getValue() != null && !valueInfo.getValue().equals(rawValue)) {
445    // Index the display value as text (based on the given locale). This is the text seen by the user
446    // when he edits the static list property. This text is specified on the XClass (but can be
447    // overwritten by translations!).
448  45 setPropertyValue(solrDocument, property, new TypedValue(valueInfo.getValue(), TypedValue.TEXT),
449    locale);
450    }
451    }
452    }
453    }
454   
455    /**
456    * Add the given value to a Solr document on the field corresponding to the specified object property.
457    *
458    * @param solrDocument the document to add the value to
459    * @param property the object property instance used to get information about the property the given value
460    * corresponds to
461    * @param typedValue the value to add
462    * @param locale the locale of the indexed document
463    */
 
464  15062 toggle protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<EntityReference> property,
465    TypedValue typedValue, Locale locale)
466    {
467    // Collect all the property values from all the objects of a document in a single (localized) field.
468  15062 String fieldName = FieldUtils.getFieldName(FieldUtils.OBJECT_CONTENT, locale);
469  15062 String fieldValue = String.format(OBJCONTENT_FORMAT, property.getName(), typedValue.getValue());
470    // The current method can be called multiple times for the same property value (but with a different type).
471    // Since we don't care about the value type here (all the values are collected in a localized field) we need to
472    // make sure we don't add the same value twice. Derived classes can override this method and use the value type.
473  15062 addFieldValueOnce(solrDocument, fieldName, fieldValue);
474    }
475   
476    /**
477    * Adds a value to a document field, ensuring that the value is not duplicated.
478    *
479    * @param solrDocument the document to add the field value to
480    * @param fieldName the field name
481    * @param fieldValue the field value to add
482    */
 
483  19419 toggle protected void addFieldValueOnce(SolrInputDocument solrDocument, String fieldName, Object fieldValue)
484    {
485  19419 Collection<Object> fieldValues = solrDocument.getFieldValues(fieldName);
486  19419 if (fieldValues == null || !fieldValues.contains(fieldValue)) {
487  12999 solrDocument.addField(fieldName, fieldValue);
488    }
489    }
490   
491    /**
492    * Tries to extract text indexable content from a generic attachment.
493    *
494    * @param attachment the attachment to extract the content from
495    * @return the text representation of the attachment's content
496    * @throws SolrIndexerException if problems occur
497    */
 
498  286 toggle protected String getContentAsText(XWikiAttachment attachment)
499    {
500  286 try {
501  286 Tika tika = new Tika();
502   
503  286 Metadata metadata = new Metadata();
504  286 metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attachment.getFilename());
505   
506  286 InputStream in = attachment.getContentInputStream(this.xcontextProvider.get());
507   
508  286 try {
509  286 return tika.parseToString(in, metadata);
510    } finally {
511  286 in.close();
512    }
513    } catch (Exception e) {
514  0 this.logger.error("Failed to retrieve the content of attachment [{}]", attachment.getReference(), e);
515  0 return null;
516    }
517    }
518   
 
519  4723 toggle private void setHierarchyFields(SolrInputDocument solrDocument, EntityReference path)
520    {
521  4723 solrDocument.setField(FieldUtils.SPACE_EXACT, this.localSerializer.serialize(path));
522  4723 List<EntityReference> ancestors = path.getReversedReferenceChain();
523    // Skip the wiki reference because we want to index the local space references.
524  10002 for (int i = 1; i < ancestors.size(); i++) {
525  5279 solrDocument.addField(FieldUtils.SPACES, ancestors.get(i).getName());
526  5279 String localAncestorReference = this.localSerializer.serialize(ancestors.get(i));
527  5279 solrDocument.addField(FieldUtils.SPACE_PREFIX, localAncestorReference);
528    // We prefix the local ancestor reference with the depth in order to use 'facet.prefix'. We also add a
529    // trailing slash in order to distinguish between space names with the same prefix (e.g. 0/Gallery/ and
530    // 0/GalleryCode/).
531  5279 solrDocument.addField(FieldUtils.SPACE_FACET, (i - 1) + "/" + localAncestorReference + ".");
532    }
533    }
534    }