1. Project Clover database Sat Feb 2 2019 06:45:20 CET
  2. Package org.xwiki.search.solr.internal.metadata

File DocumentSolrMetadataExtractor.java

 

Coverage histogram

../../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

8
71
7
1
295
154
14
0.2
10.14
7
2

Classes

Class Line # Actions
DocumentSolrMetadataExtractor 60 71 0% 14 4
0.9534883595.3%
 

Contributing tests

This file is covered by 12 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.search.solr.internal.metadata;
21   
22    import java.util.List;
23    import java.util.Locale;
24    import java.util.Map;
25   
26    import javax.inject.Inject;
27    import javax.inject.Named;
28    import javax.inject.Singleton;
29   
30    import org.apache.commons.lang3.StringUtils;
31    import org.apache.solr.common.SolrInputDocument;
32    import org.xwiki.component.annotation.Component;
33    import org.xwiki.model.EntityType;
34    import org.xwiki.model.reference.DocumentReference;
35    import org.xwiki.model.reference.EntityReference;
36    import org.xwiki.model.reference.EntityReferenceSerializer;
37    import org.xwiki.rendering.renderer.BlockRenderer;
38    import org.xwiki.rendering.renderer.printer.DefaultWikiPrinter;
39    import org.xwiki.rendering.renderer.printer.WikiPrinter;
40    import org.xwiki.rendering.syntax.Syntax;
41    import org.xwiki.search.solr.internal.api.FieldUtils;
42    import org.xwiki.search.solr.internal.api.SolrFieldNameEncoder;
43   
44    import com.xpn.xwiki.XWikiContext;
45    import com.xpn.xwiki.XWikiException;
46    import com.xpn.xwiki.doc.XWikiAttachment;
47    import com.xpn.xwiki.doc.XWikiDocument;
48    import com.xpn.xwiki.objects.BaseObject;
49    import com.xpn.xwiki.objects.BaseProperty;
50   
51    /**
52    * Extract the metadata to be indexed from document.
53    *
54    * @version $Id: a6cf8e4deab3b10e030a10964e70eb2ccffe24f6 $
55    * @since 4.3M2
56    */
57    @Component
58    @Named("document")
59    @Singleton
 
60    public class DocumentSolrMetadataExtractor extends AbstractSolrMetadataExtractor
61    {
62    /**
63    * BlockRenderer component used to render the wiki content before indexing.
64    */
65    @Inject
66    @Named("plain/1.0")
67    private BlockRenderer renderer;
68   
69    @Inject
70    private EntityReferenceSerializer<String> entityReferenceSerializer;
71   
72    /**
73    * Used to serialize entity reference to be used in dynamic field names.
74    */
75    @Inject
76    @Named("solr")
77    private EntityReferenceSerializer<String> fieldNameSerializer;
78   
79    /**
80    * Used to encode dynamic field names that may contain special characters.
81    */
82    @Inject
83    private SolrFieldNameEncoder fieldNameEncoder;
84   
 
85  7727 toggle @Override
86    public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
87    throws Exception
88    {
89  7727 DocumentReference documentReference = new DocumentReference(entityReference);
90   
91  7727 XWikiContext xcontext = this.xcontextProvider.get();
92   
93  7727 XWikiDocument translatedDocument = getTranslatedDocument(documentReference);
94  7727 if (translatedDocument == null) {
95  0 return false;
96    }
97   
98  7727 Locale locale = getLocale(documentReference);
99   
100  7727 solrDocument.setField(FieldUtils.FULLNAME, localSerializer.serialize(documentReference));
101   
102    // Rendered title.
103  7727 String plainTitle = translatedDocument.getRenderedTitle(Syntax.PLAIN_1_0, xcontext);
104  7727 solrDocument.setField(FieldUtils.getFieldName(FieldUtils.TITLE, locale), plainTitle);
105   
106    // Raw Content
107  7727 solrDocument.setField(FieldUtils.getFieldName(FieldUtils.DOCUMENT_RAW_CONTENT, locale),
108    translatedDocument.getContent());
109   
110    // Rendered content
111  7727 WikiPrinter plainContentPrinter = new DefaultWikiPrinter();
112  7727 this.renderer.render(translatedDocument.getXDOM(), plainContentPrinter);
113  7727 solrDocument.setField(FieldUtils.getFieldName(FieldUtils.DOCUMENT_RENDERED_CONTENT, locale),
114    plainContentPrinter.toString());
115   
116  7727 solrDocument.setField(FieldUtils.VERSION, translatedDocument.getVersion());
117  7727 solrDocument.setField(FieldUtils.COMMENT, translatedDocument.getComment());
118   
119  7727 solrDocument.setField(FieldUtils.DOCUMENT_LOCALE, translatedDocument.getLocale().toString());
120   
121    // Add locale inheritance
122  7727 addLocales(translatedDocument, translatedDocument.getLocale(), solrDocument);
123   
124    // Get both serialized user reference string and pretty user name
125  7727 setAuthors(solrDocument, translatedDocument, entityReference);
126   
127    // Document dates.
128  7727 solrDocument.setField(FieldUtils.CREATIONDATE, translatedDocument.getCreationDate());
129  7727 solrDocument.setField(FieldUtils.DATE, translatedDocument.getContentUpdateDate());
130   
131    // Document translations have their own hidden fields
132  7727 solrDocument.setField(FieldUtils.HIDDEN, translatedDocument.isHidden());
133   
134    // Add any extra fields (about objects, etc.) that can improve the findability of the document.
135  7727 setExtras(documentReference, solrDocument, locale);
136   
137  7727 return true;
138    }
139   
140    /**
141    * @param solrDocument the Solr document
142    * @param translatedDocument the XWiki document
143    * @param entityReference the document reference
144    */
 
145  7727 toggle private void setAuthors(SolrInputDocument solrDocument, XWikiDocument translatedDocument,
146    EntityReference entityReference)
147    {
148  7727 XWikiContext xcontext = this.xcontextProvider.get();
149   
150  7727 String authorString = entityReferenceSerializer.serialize(translatedDocument.getAuthorReference());
151  7727 solrDocument.setField(FieldUtils.AUTHOR, authorString);
152  7727 String authorDisplayString =
153    xcontext.getWiki().getPlainUserName(translatedDocument.getAuthorReference(), xcontext);
154  7727 solrDocument.setField(FieldUtils.AUTHOR_DISPLAY, authorDisplayString);
155   
156  7727 String creatorString = entityReferenceSerializer.serialize(translatedDocument.getCreatorReference());
157  7727 solrDocument.setField(FieldUtils.CREATOR, creatorString);
158  7727 String creatorDisplayString =
159    xcontext.getWiki().getPlainUserName(translatedDocument.getCreatorReference(), xcontext);
160  7727 solrDocument.setField(FieldUtils.CREATOR_DISPLAY, creatorDisplayString);
161    }
162   
163    /**
164    * @param documentReference the document's reference.
165    * @param solrDocument the Solr document where to add the data.
166    * @param locale the locale of which to index the extra data.
167    * @throws XWikiException if problems occur.
168    */
 
169  7727 toggle protected void setExtras(DocumentReference documentReference, SolrInputDocument solrDocument, Locale locale)
170    throws XWikiException
171    {
172    // We need to support the following types of queries:
173    // * search for documents matching specific values in multiple XObject properties
174    // * search for documents matching specific values in attachment meta data
175    // In order to avoid using joins we have to index the XObjects and the attachments both separately and on the
176    // document rows in the Solr index. This means we'll have duplicated information but we believe the increase in
177    // the index size pays off if you take into account the simplified query syntax and the search speed.
178   
179    // Use the original document to get the objects and the attachments because the translated document is just a
180    // lightweight document containing just the translated content and title.
181  7727 XWikiDocument originalDocument = getDocument(documentReference);
182   
183    // NOTE: To be able to still find translated documents, we need to redundantly index the same objects (including
184    // comments) and attachments for each translation. If we don`t do this then only the original document will be
185    // found. That's why we pass the locale of the translated document to the following method calls.
186  7727 setObjects(solrDocument, locale, originalDocument);
187  7727 setAttachments(solrDocument, locale, originalDocument);
188    }
189   
190    /**
191    * @param solrDocument the Solr document where to add the objects.
192    * @param locale the locale for which to index the objects.
193    * @param originalDocument the original document where the objects come from.
194    */
 
195  7727 toggle protected void setObjects(SolrInputDocument solrDocument, Locale locale, XWikiDocument originalDocument)
196    {
197  7727 for (Map.Entry<DocumentReference, List<BaseObject>> objects : originalDocument.getXObjects().entrySet()) {
198  8155 boolean hasObjectsOfThisType = false;
199  8155 for (BaseObject object : objects.getValue()) {
200    // Yes, the old core can return null objects.
201  9982 hasObjectsOfThisType |= object != null;
202  9982 setObjectContent(solrDocument, object, locale);
203    }
204  8155 if (hasObjectsOfThisType) {
205  8155 solrDocument.addField(FieldUtils.CLASS, localSerializer.serialize(objects.getKey()));
206    }
207    }
208    }
209   
 
210  76635 toggle @Override
211    protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<EntityReference> property,
212    TypedValue typedValue, Locale locale)
213    {
214  76635 Object value = typedValue.getValue();
215  76635 String type = typedValue.getType();
216   
217    // We need to be able to query an object property alone.
218  76635 EntityReference classReference = property.getObject().getRelativeXClassReference();
219  76635 EntityReference propertyReference =
220    new EntityReference(property.getName(), EntityType.CLASS_PROPERTY, classReference);
221  76635 String serializedPropertyReference = fieldNameEncoder.encode(fieldNameSerializer.serialize(propertyReference));
222  76635 String prefix = "property." + serializedPropertyReference;
223    // Note that we're using "addField" because we want to collect all the property values, even from multiple
224    // objects of the same type.
225  76635 solrDocument.addField(FieldUtils.getFieldName(prefix, type, locale), value);
226   
227    // We need to be able to sort by a property value and for this we need a dedicated (single valued) field because
228    // the field we just added is multiValued and multiValued fields are not sortable.
229    // We don't need to sort on properties that hold large localized texts or large strings (e.g. TextArea).
230  76635 if ((type != TypedValue.TEXT && type != TypedValue.STRING)
231    || String.valueOf(value).length() <= SHORT_TEXT_LIMIT) {
232    // Short localized texts are indexed as strings because a sort field is either non-tokenized (i.e. has no
233    // Analyzer) or uses an Analyzer that only produces a single Term (i.e. uses the KeywordTokenizer).
234  74749 String sortType = "sort" + StringUtils.capitalize(type == TypedValue.TEXT ? TypedValue.STRING : type);
235    // We're using "setField" because the sort fields must be single valued. The consequence is that for
236    // properties with multiple values the last value we set will be used for sorting (e.g. if a document has
237    // two objects of the same type then the value from the second object will be used for sorting).
238  74749 solrDocument.setField(FieldUtils.getFieldName(prefix, sortType, locale), value);
239    }
240   
241    // We need to be able to query all properties of a specific type of object at once.
242  76635 String serializedClassReference = fieldNameEncoder.encode(fieldNameSerializer.serialize(classReference));
243  76635 String objectOfTypeFieldName = "object." + serializedClassReference;
244    // The current method can be called multiple times for the same property value (but with a different type).
245    // Since we don't care about the value type here (all the values are collected in a localized field) we need to
246    // make sure we don't add the same value twice.
247  76635 addFieldValueOnce(solrDocument, FieldUtils.getFieldName(objectOfTypeFieldName, locale), value);
248   
249    // We need to be able to query all objects from a document at once.
250  76635 super.setPropertyValue(solrDocument, property, typedValue, locale);
251    }
252   
253    /**
254    * @param solrDocument the Solr document where to add the attachments data
255    * @param locale the locale for which to index the attachments
256    * @param originalDocument the original document, that should be used to access the attachments
257    */
 
258  7727 toggle private void setAttachments(SolrInputDocument solrDocument, Locale locale, XWikiDocument originalDocument)
259    {
260  7727 for (XWikiAttachment attachment : originalDocument.getAttachmentList()) {
261  904 setAttachment(solrDocument, locale, attachment);
262    }
263    }
264   
265    /**
266    * Extracts the meta data from the given attachment and adds it to the given Solr document.
267    *
268    * @param solrDocument the Solr document where to add the attachment data
269    * @param locale the locale for which to index the attachments
270    * @param attachment the attachment to index
271    */
 
272  904 toggle private void setAttachment(SolrInputDocument solrDocument, Locale locale, XWikiAttachment attachment)
273    {
274  904 XWikiContext xcontext = xcontextProvider.get();
275   
276  904 solrDocument.addField(FieldUtils.FILENAME, attachment.getFilename());
277  904 solrDocument.addField(FieldUtils.MIME_TYPE, attachment.getMimeType(xcontext));
278  904 solrDocument.addField(FieldUtils.ATTACHMENT_DATE, attachment.getDate());
279  904 solrDocument.addField(FieldUtils.ATTACHMENT_SIZE, attachment.getLongSize());
280   
281  904 String attachmentTextContent = getContentAsText(attachment);
282  904 solrDocument.addField(FieldUtils.getFieldName(FieldUtils.ATTACHMENT_CONTENT, locale), attachmentTextContent);
283   
284    // Index the full author reference for exact matching (faceting).
285  904 String authorStringReference = entityReferenceSerializer.serialize(attachment.getAuthorReference());
286  904 solrDocument.addField(FieldUtils.ATTACHMENT_AUTHOR, authorStringReference);
287  904 try {
288    // Index the author display name for free text search.
289  904 String authorDisplayName = xcontext.getWiki().getPlainUserName(attachment.getAuthorReference(), xcontext);
290  904 solrDocument.addField(FieldUtils.ATTACHMENT_AUTHOR_DISPLAY, authorDisplayName);
291    } catch (Exception e) {
292  0 this.logger.error("Failed to get author display name for attachment [{}]", attachment.getReference(), e);
293    }
294    }
295    }