1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.xml

File ExtractHandler.java

 

Coverage histogram

../../../img/srcFileCovDistChart8.png
54% of files have more coverage

Code metrics

24
62
16
2
299
150
29
0.47
3.88
8
1.81

Classes

Class Line # Actions
ExtractHandler 49 58 0% 26 23
0.7578947575.8%
ExtractHandler.XMLTag 54 4 0% 3 2
0.7142857371.4%
 

Contributing tests

This file is covered by 3 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.xml;
21   
22    import java.util.Stack;
23   
24    import org.xml.sax.Attributes;
25    import org.xml.sax.SAXException;
26    import org.xml.sax.helpers.DefaultHandler;
27   
28    /**
29    * Extracts a well-formed XML fragment by listening to SAX events. The result has the following semantic:<br>
30    * {@code xmlInput.dropAllTags().substring(start, length).unDropAssociatedTags()}
31    * <p>
32    * So basically we would create an instance like {@code new ExtractHandler(0, 400)} in order to obtain an XML
33    * fragment with its inner text length of at most 400 characters, starting at position (character) 0 in the source
34    * (input) XML's inner text. The ExtractHandler is used in feed plug-in to obtain a preview of an XML (HTML, to be more
35    * specific). Another use case could be to paginate an XML source (keeping pages well-formed).
36    * <p>
37    * As an example, the result of applying an {@code ExtractHandler(3, 13)} to:
38    * <pre>{@code
39    * <p>click <a href="realyLongURL" title="Here">here</a> to view the result</p>
40    * }</pre>
41    * is
42    * <pre>{@code
43    * <p>ck <a href="realyLongURL" title="Here">here</a> to</p>
44    * }</pre>
45    *
46    * @version $Id: 0774b1b90cb1b327680fb96fa8098e3356136f38 $
47    * @since 1.6M2
48    */
 
49    public class ExtractHandler extends DefaultHandler
50    {
51    /**
52    * A simple utility bean for representing an XML tag.
53    */
 
54    private static class XMLTag
55    {
56    /**
57    * Tag's qualified name.
58    */
59    private String qName;
60   
61    /**
62    * Tag's attributes.
63    */
64    private Attributes atts;
65   
66    /**
67    * Constructs a new XML tag with the given qualified name and attributes.
68    *
69    * @param qName Tag's qualified name.
70    * @param atts Tag's attributes.
71    */
 
72  6 toggle XMLTag(String qName, Attributes atts)
73    {
74  6 this.qName = qName;
75  6 this.atts = atts;
76    }
77   
78    /**
79    * @return Tag's qualified name.
80    */
 
81  2 toggle public String getQName()
82    {
83  2 return this.qName;
84    }
85   
86    /**
87    * @return Tag's attributes.
88    */
 
89  0 toggle public Attributes getAtts()
90    {
91  0 return this.atts;
92    }
93    }
94   
95    /**
96    * The number of characters, in text nodes, that have to be read before starting the extraction.
97    */
98    private int lowerBound;
99   
100    /**
101    * The maximum number of characters that may be read during the parsing process.
102    */
103    private int upperBound;
104   
105    /**
106    * The number of characters read so far.
107    */
108    private int counter;
109   
110    /**
111    * The stack of open tags; when the lower bound is reached all the tags in the stack must be opened; when the upper
112    * bound is reached all the tags in the stack must be closed.
113    */
114    private Stack<XMLTag> openedTags = new Stack<XMLTag>();
115   
116    /**
117    * The fragment that is extracted during the parsing process.
118    */
119    private StringBuilder result = new StringBuilder();
120   
121    /**
122    * <code>true</code> if the extraction was successful. The parsing process throws an exception when the upper bound
123    * is reached; this flag is useful to distinguish between this exception and the others.
124    */
125    private boolean finished;
126   
127    /**
128    * Creates a new instance.
129    *
130    * @param start The character index from where to start the extraction.
131    * @param length The number of plain text characters to extract.
132    * @throws SAXException if start is less than zero or length is less than or equal to zero.
133    */
 
134  4 toggle public ExtractHandler(int start, int length) throws SAXException
135    {
136  4 super();
137  4 if (start < 0) {
138  0 throw new SAXException("Start must be greater than or equal to 0");
139    }
140  4 if (length <= 0) {
141  0 throw new SAXException("Length must be greater than 0");
142    }
143  4 this.lowerBound = start;
144  4 this.upperBound = this.lowerBound + length;
145    }
146   
147    /**
148    * @return The extracted text.
149    */
 
150  4 toggle public String getResult()
151    {
152  4 return this.result.toString();
153    }
154   
155    /**
156    * @return true if the extraction process has succeeded; false if an exception occurred during the process.
157    */
 
158  2 toggle public boolean isFinished()
159    {
160  2 return this.finished;
161    }
162   
163    /**
164    * Append an open tag with the given specification to the result buffer.
165    *
166    * @param qName Tag's qualified name.
167    * @param atts Tag's attributes.
168    */
 
169  6 toggle private void openTag(String qName, Attributes atts)
170    {
171  6 this.result.append('<').append(qName);
172  6 for (int i = 0; i < atts.getLength(); i++) {
173  0 this.result.append(' ').append(atts.getQName(i)).append("=\"").append(atts.getValue(i)).append('\"');
174    }
175  6 this.result.append('>');
176    }
177   
178    /**
179    * Open all pending tags.
180    *
181    * @see #openTag(String, Attributes)
182    */
 
183  0 toggle private void openTags()
184    {
185  0 for (XMLTag tag : this.openedTags) {
186  0 openTag(tag.getQName(), tag.getAtts());
187    }
188    }
189   
190    /**
191    * Close all pending tags.
192    *
193    * @see #closeTag(String)
194    */
 
195  4 toggle private void closeTags()
196    {
197  6 while (!this.openedTags.isEmpty()) {
198  2 closeTag(this.openedTags.pop().getQName());
199    }
200    }
201   
202    /**
203    * Append a closed tag with the given qualified name to the result buffer.
204    *
205    * @param qName Tag's qualified name.
206    */
 
207  6 toggle private void closeTag(String qName)
208    {
209  6 this.result.append("</").append(qName).append('>');
210    }
211   
212    /**
213    * @return true if the start point has been passed but the length limit hasn't been reached.
214    */
 
215  14 toggle private boolean isExtracting()
216    {
217  14 return this.lowerBound <= this.counter && this.counter <= this.upperBound;
218    }
219   
 
220  4 toggle @Override
221    public void startDocument() throws SAXException
222    {
223  4 super.startDocument();
224  4 this.counter = 0;
225  4 this.openedTags.clear();
226  4 this.result.setLength(0);
227  4 this.finished = false;
228    }
229   
 
230  6 toggle @Override
231    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException
232    {
233  6 this.openedTags.push(new XMLTag(qName, atts));
234  6 if (isExtracting()) {
235  6 openTag(qName, atts);
236    }
237    }
238   
 
239  6 toggle @Override
240    public void characters(char[] ch, int start, int length) throws SAXException
241    {
242  6 int offset = this.lowerBound - this.counter;
243  6 if (offset > 0) {
244  0 if (offset > length) {
245  0 this.counter += length;
246  0 return;
247    } else {
248  0 this.counter = this.lowerBound;
249  0 openTags();
250  0 characters(ch, start + offset, length - offset);
251  0 return;
252    }
253    }
254  6 int remainingLength = this.upperBound - this.counter;
255  6 if (remainingLength <= length) {
256  2 String content = String.valueOf(ch, start, remainingLength);
257  2 int spaceIndex = remainingLength;
258    // If we're in the middle of a word, try to cut before it, so that we don't output half-words
259  2 if (length > remainingLength && ch[start + remainingLength] != ' ') {
260  2 spaceIndex = content.lastIndexOf(' ');
261    }
262  2 if (spaceIndex >= 0) {
263  1 this.counter += spaceIndex;
264  1 this.result.append(content.substring(0, spaceIndex));
265    } else {
266  1 this.counter = this.upperBound;
267  1 this.result.append(content);
268    }
269  2 endDocument();
270  2 throw new SAXException("Length limit reached");
271    } else {
272  4 this.counter += length;
273  4 this.result.append(ch, start, length);
274    }
275    }
276   
 
277  4 toggle @Override
278    public void endElement(String namespaceURI, String localName, String qName) throws SAXException
279    {
280    // We assume the XML fragment is well defined, and thus we shouldn't have a closed tag
281    // without its pair open tag. So we don't test for empty stack or tag match.
282  4 this.openedTags.pop();
283  4 if (isExtracting()) {
284  4 closeTag(qName);
285    }
286    }
287   
 
288  4 toggle @Override
289    public void endDocument() throws SAXException
290    {
291  4 super.endDocument();
292    // Close open tags
293  4 if (isExtracting()) {
294  4 closeTags();
295    }
296    // set finished flag to distinguish between "length limit reached" and other exceptions
297  4 this.finished = true;
298    }
299    }