1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.search.solr.internal.job

File SolrDocumentIterator.java

 

Coverage histogram

../../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

8
30
5
1
174
94
10
0.33
6
5
2

Classes

Class Line # Actions
SolrDocumentIterator 57 30 0% 10 0
1.0100%
 

Contributing tests

This file is covered by 3 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.search.solr.internal.job;
21   
22    import java.util.Collections;
23    import java.util.List;
24   
25    import javax.inject.Inject;
26    import javax.inject.Named;
27    import javax.inject.Provider;
28   
29    import org.apache.commons.lang3.tuple.ImmutablePair;
30    import org.apache.commons.lang3.tuple.Pair;
31    import org.apache.solr.client.solrj.SolrQuery;
32    import org.apache.solr.client.solrj.SolrQuery.ORDER;
33    import org.apache.solr.client.solrj.response.QueryResponse;
34    import org.apache.solr.common.SolrDocument;
35    import org.apache.solr.common.SolrDocumentList;
36    import org.apache.solr.common.params.CursorMarkParams;
37    import org.xwiki.component.annotation.Component;
38    import org.xwiki.component.annotation.InstantiationStrategy;
39    import org.xwiki.component.descriptor.ComponentInstantiationStrategy;
40    import org.xwiki.model.EntityType;
41    import org.xwiki.model.reference.DocumentReference;
42    import org.xwiki.model.reference.DocumentReferenceResolver;
43    import org.xwiki.search.solr.internal.api.FieldUtils;
44    import org.xwiki.search.solr.internal.api.SolrIndexerException;
45    import org.xwiki.search.solr.internal.api.SolrInstance;
46    import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
47   
48    /**
49    * Iterates the documents from the Solr search index.
50    *
51    * @version $Id: 6d7dd1eadb94045896b4fb67b5aa9eaf3761f5a2 $
52    * @since 5.4.5
53    */
54    @Component
55    @InstantiationStrategy(ComponentInstantiationStrategy.PER_LOOKUP)
56    @Named("solr")
 
57    public class SolrDocumentIterator extends AbstractDocumentIterator<String>
58    {
59    /**
60    * The current index in the list of {@link #results}.
61    */
62    private int index;
63   
64    /**
65    * A 'page' of results taken from the Solr index.
66    */
67    private List<SolrDocument> results = Collections.emptyList();
68   
69    /**
70    * The query used to fetch the documents from the Solr index.
71    */
72    private SolrQuery query;
73   
74    /**
75    * Provider for the {@link SolrInstance} that allows communication with the Solr server.
76    */
77    @Inject
78    private Provider<SolrInstance> solrInstanceProvider;
79   
80    /**
81    * Used to obtain the query corresponding to the configured root entity.
82    */
83    @Inject
84    private SolrReferenceResolver solrReferenceResolver;
85   
86    @Inject
87    private DocumentReferenceResolver<SolrDocument> solrDocumentReferenceResolver;
88   
 
89  967 toggle @Override
90    public boolean hasNext()
91    {
92  967 return getResults().size() > index;
93    }
94   
 
95  3 toggle @Override
96    public Pair<DocumentReference, String> next()
97    {
98  3 SolrDocument result = getResults().get(index++);
99  3 DocumentReference documentReference = this.solrDocumentReferenceResolver.resolve(result);
100  3 String version = (String) result.get(FieldUtils.VERSION);
101  3 return new ImmutablePair<DocumentReference, String>(documentReference, version);
102    }
103   
 
104  5 toggle @Override
105    public long size()
106    {
107  5 return getResults() instanceof SolrDocumentList ? ((SolrDocumentList) results).getNumFound() : results.size();
108    }
109   
110    /**
111    * The current 'page' of results. If the current page has been fully iterated then a new page is fetched
112    * automatically.
113    *
114    * @return the current 'page' of results taken from the Solr index
115    */
 
116  975 toggle private List<SolrDocument> getResults()
117    {
118  975 if (index >= results.size()) {
119  971 try {
120    // Cursor-based pagination.
121  971 String cursorMark = getQuery().get(CursorMarkParams.CURSOR_MARK_PARAM);
122  971 QueryResponse response = this.solrInstanceProvider.get().query(query);
123  970 if (cursorMark.equals(response.getNextCursorMark())) {
124  967 results = Collections.emptyList();
125    } else {
126  3 results = response.getResults();
127  3 query.set(CursorMarkParams.CURSOR_MARK_PARAM, response.getNextCursorMark());
128    }
129    } catch (Exception e) {
130  1 results = Collections.emptyList();
131  1 logger.error("Failed to query the Solr index.", e);
132    }
133  971 index = 0;
134    }
135  975 return results;
136    }
137   
138    /**
139    * @return the query used to fetch the documents from the Solr index
140    * @throws SolrIndexerException if we fail to obtain a query for the configured root entity
141    */
 
142  971 toggle private SolrQuery getQuery() throws SolrIndexerException
143    {
144  971 if (query == null) {
145  6 query = new SolrQuery(solrReferenceResolver.getQuery(rootReference));
146  6 query.setFields(FieldUtils.WIKI, FieldUtils.SPACES, FieldUtils.NAME, FieldUtils.DOCUMENT_LOCALE,
147    FieldUtils.VERSION);
148  6 query.addFilterQuery(FieldUtils.TYPE + ':' + EntityType.DOCUMENT.name());
149    // This iterator must have the same order as the database iterator, otherwise the synchronization fails.
150    // Note that we had two options:
151    // (A) Sort the Solr index only by id and enable docValues on the id field to improve the speed. But then we
152    // need to sort the database on a computed field (fullName_locale) which is very slow (due to the missing
153    // database index for the computed column).
154    // (B) Sort the Solr index by multiple fields, each having a corresponding column in the database. This
155    // slows down a bit the Solr query but allows us to sort the database on stored columns that have indexes,
156    // thus improving the speed of the database query.
157    // We chose solution B because it offers a good tradeoff between Solr and database performance.
158  6 query.addSort(FieldUtils.WIKI, ORDER.asc);
159  6 query.addSort(FieldUtils.SPACE_EXACT, ORDER.asc);
160  6 query.addSort(FieldUtils.NAME_EXACT, ORDER.asc);
161  6 query.addSort(FieldUtils.DOCUMENT_LOCALE, ORDER.asc);
162    // Cursor-based deep-paging requires the unique key to be included in the sort fields as a tie-breaker.
163    // See https://issues.apache.org/jira/browse/SOLR-6277 .
164  6 query.addSort(FieldUtils.ID, ORDER.asc);
165    // Paginate using a cursor because it performs better than basic pagination (using absolute offset,
166    // especially when the offset is big) and because the impact of index modifications is much smaller (and we
167    // plan to update the index to match the database during the synchronization process).
168    // See https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results
169  6 query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
170  6 query.setRows(LIMIT);
171    }
172  971 return query;
173    }
174    }