1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.rendering.wikimodel.xhtml.filter

File XHTMLWhitespaceXMLFilter.java

 

Coverage histogram

../../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

78
167
33
3
612
387
78
0.47
5.06
11
2.36

Classes

Class Line # Actions
XHTMLWhitespaceXMLFilter 56 156 0% 75 14
0.946969794.7%
XHTMLWhitespaceXMLFilter.Event 570 11 0% 3 0
1.0100%
XHTMLWhitespaceXMLFilter.Event.Type 572 0 - 0 0
-1.0 -
 

Contributing tests

This file is covered by 324 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.rendering.wikimodel.xhtml.filter;
21   
22    import java.util.ArrayDeque;
23    import java.util.ArrayList;
24    import java.util.Arrays;
25    import java.util.Deque;
26    import java.util.HashSet;
27    import java.util.List;
28    import java.util.Set;
29    import java.util.regex.Matcher;
30    import java.util.regex.Pattern;
31   
32    import org.xml.sax.Attributes;
33    import org.xml.sax.SAXException;
34    import org.xml.sax.XMLReader;
35    import org.xml.sax.helpers.AttributesImpl;
36   
37    /**
38    * Removes non-semantic whitespaces in XML elements. See http://www.w3.org/TR/html4/struct/text.html#h-9.1 for more
39    * details. Possible use cases:
40    * <ul>
41    * <li><b>UC1</b>: Any white spaces group is removed if it's before a non inline (see NONINLINE_ELEMENTS) element or
42    * at the beginning of the document.</li>
43    * <li><b>UC2</b>: Any white spaces group is removed if it's after a non inline (see NONINLINE_ELEMENTS) element or
44    * at the end of the document.</li>
45    * <li><b>UC3</b>: Inside inline content any white spaces group become a single space.</li>
46    * <li><b>UC4</b>: Non visible elements (comments, CDATA and NONVISIBLE_ELEMENTS) are invisible and do not cut a
47    * white space group.
48    * <code>text(sp)<!--comment-->(sp)text</code> becomes <code>text(sp)<!--comment-->text</code></li>
49    * <li><b>UC5</b>: Visible empty element like img count as text when grouping white spaces</li>
50    * <li><b>UC6</b>: Semantic comment count as text when grouping white spaces</li>
51    * </ul>
52    *
53    * @version $Id: ee5ef567bf9039e9cdced7067b056ab6cb952621 $
54    * @since 4.0M1
55    */
 
56    public class XHTMLWhitespaceXMLFilter extends DefaultXMLFilter
57    {
58    private static final Pattern HTML_WHITESPACE_DUPLICATES_PATTERN = Pattern
59    .compile("\\s{2,}|[\\t\\n\\x0B\\f\\r]+");
60   
61    private static final Pattern HTML_WHITESPACE_HEAD_PATTERN = Pattern
62    .compile("^\\s+");
63   
64    private static final Pattern HTML_WHITESPACE_TAIL_PATTERN = Pattern
65    .compile("\\s+$");
66   
67    private static final Set<String> NONINLINE_ELEMENTS = new HashSet<>(
68    Arrays.asList("address", "blockquote", "div", "dl", "dt", "dd",
69    "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
70    "noscript", "ol", "p", "pre", "script", "table", "ul", "html",
71    "body", "td", "tr", "th", "tbody", "head", "li", "thead", "tfoot",
72    "caption", "col", "colgroup", "legend", "base", "link", "meta",
73    "style", "title"));
74   
75    /**
76    * Non visible elements behave like CDATA and comments: it's part of thewhite space group.
77    */
78    private static final Set<String> NONVISIBLE_ELEMENTS = new HashSet<>(Arrays.asList("script"));
79   
80    /**
81    * Visible elements like images count in the inline text to clean white spaces.
82    */
83    private static final Set<String> EMPTYVISIBLE_ELEMENTS = new HashSet<>(Arrays.asList("img"));
84   
85    /**
86    * State indicating if the white spaces has to be cleaned. It's an int to support &lt;pre&gt;pre&lt;/pre&gt;
87    * inside &lt;tt class=&quot;wikimodel-verbatim&quot;&gt;pre&lt;/tt&gt;.
88    */
89    private int fNoCleanUpLevel = 0;
90   
91    /**
92    * Content to clean.
93    */
94    private StringBuffer fContent = new StringBuffer();
95   
96    /**
97    * Bufferized current inline text. It contains only text (and no inline start/end element, comment or CDATA) to be
98    * able know if a leading space has to be removed because the previous text ends with it or if there is no previous
99    * text.
100    */
101    private StringBuffer fPreviousInlineText = new StringBuffer();
102   
103    /**
104    * The previous content to send. Buffurized waiting to know if its trailing space has to be removed when it's the
105    * last text of inline content.
106    */
107    private String fPreviousContent = null;
108   
109    /**
110    * Previous inline elements. These are the elements before the previous content. It's buffered to support space
111    * group cleaning betwen different inline elements.
112    */
113    private List<Event> fPreviousElements = new ArrayList<Event>();
114   
115    private Deque<Attributes> fAttributes = new ArrayDeque<Attributes>();
116   
 
117  0 toggle public XHTMLWhitespaceXMLFilter()
118    {
119    }
120   
 
121  439 toggle public XHTMLWhitespaceXMLFilter(XMLReader reader)
122    {
123  439 super(reader);
124    }
125   
 
126  2758 toggle @Override
127    public void characters(char[] ch, int start, int length)
128    throws SAXException
129    {
130  2758 getContent().append(ch, start, length);
131    }
132   
 
133  4900 toggle @Override
134    public void startElement(String uri, String localName, String qName,
135    Attributes atts) throws SAXException
136    {
137  4900 Attributes clonedAtts = new AttributesImpl(atts);
138  4900 fAttributes.push(clonedAtts);
139   
140  4900 if (NONVISIBLE_ELEMENTS.contains(qName)) {
141  3 startNonVisibleElement();
142   
143    // send start element event
144  3 super.startElement(uri, localName, qName, atts);
145    } else {
146  4897 if (NONINLINE_ELEMENTS.contains(qName)) {
147    // Flush previous content and print current one
148  2631 flushContent();
149   
150    // white spaces inside pre element are not cleaned
151  2631 if ("pre".equalsIgnoreCase(qName)) {
152  16 ++fNoCleanUpLevel;
153    }
154   
155    // send start element event
156  2631 super.startElement(uri, localName, qName, atts);
157  2266 } else if (EMPTYVISIBLE_ELEMENTS.contains(qName)) {
158  46 startEmptyVisibleElement();
159   
160  46 super.startElement(uri, localName, qName, atts);
161    } else {
162  2220 appendInlineEvent(new Event(uri, localName, qName, clonedAtts));
163  2220 if (preservedInlineContent(qName, atts)) {
164  16 ++fNoCleanUpLevel;
165    }
166    }
167    }
168    }
169   
 
170  4894 toggle @Override
171    public void endElement(String uri, String localName, String qName)
172    throws SAXException
173    {
174  4894 if (NONVISIBLE_ELEMENTS.contains(qName)) {
175  3 endNonVisibleElement();
176   
177  3 super.endElement(uri, localName, qName);
178   
179  3 --fNoCleanUpLevel;
180    } else {
181  4891 if (NONINLINE_ELEMENTS.contains(qName)) {
182    // Flush previous content and print current one
183  2625 flushContent();
184   
185    // white spaces inside pre element are not cleaned
186  2625 if ("pre".equalsIgnoreCase(qName)) {
187  16 --fNoCleanUpLevel;
188    }
189   
190  2625 super.endElement(uri, localName, qName);
191  2266 } else if (EMPTYVISIBLE_ELEMENTS.contains(qName)) {
192  46 endEmptyVisibleElement();
193   
194  46 super.endElement(uri, localName, qName);
195    } else {
196  2220 appendInlineEvent(new Event(uri, localName, qName));
197  2220 if (preservedInlineContent(qName, fAttributes.peek())) {
198  16 --fNoCleanUpLevel;
199    }
200    }
201    }
202   
203  4894 fAttributes.pop();
204    }
205   
 
206  4440 toggle private boolean preservedInlineContent(String name, Attributes atts)
207    {
208  4440 boolean preserved = false;
209   
210  4440 if ("tt".equalsIgnoreCase(name)) {
211  44 String value = atts.getValue("class");
212   
213  44 if (value != null) {
214  32 preserved = Arrays.asList(value.split(" ")).contains(
215    "wikimodel-verbatim");
216    }
217    }
218   
219  4440 return preserved;
220    }
221   
 
222  40 toggle @Override
223    public void startCDATA() throws SAXException
224    {
225    // Flush previous content and print current one
226  40 flushContent(true);
227   
228  40 ++fNoCleanUpLevel;
229   
230  40 super.startCDATA();
231    }
232   
 
233  40 toggle @Override
234    public void endCDATA() throws SAXException
235    {
236    // Flush previous content and print current one
237  40 flushContent(false);
238   
239  40 super.endCDATA();
240   
241  40 --fNoCleanUpLevel;
242    }
243   
 
244  270 toggle @Override
245    public void comment(char[] ch, int start, int length) throws SAXException
246    {
247  270 if (shouldRemoveWhiteSpaces()) {
248  270 String comment = new String(ch, start, length);
249   
250  270 if (isSemanticComment(comment)) {
251    // UC6: Semantic comment count as text when grouping white
252    // spaces
253  260 startEmptyVisibleElement();
254   
255  260 super.comment(ch, start, length);
256    } else {
257  10 appendInlineEvent(new Event(comment));
258    }
259    } else {
260  0 super.comment(ch, start, length);
261    }
262    }
263   
 
264  472 toggle @Override
265    public void endDocument() throws SAXException
266    {
267    // Flush previous content and print current one
268  472 flushContent();
269   
270  472 super.endDocument();
271    }
272   
 
273  13731 toggle protected boolean shouldRemoveWhiteSpaces()
274    {
275  13731 return fNoCleanUpLevel == 0;
276    }
277   
 
278  7313 toggle protected void sendPreviousContent(boolean trimTrailing)
279    throws SAXException
280    {
281  7313 if (fPreviousContent != null && fPreviousContent.length() > 0) {
282  295 if (trimTrailing) {
283  23 fPreviousContent = trimTrailingWhiteSpaces(fPreviousContent);
284    }
285   
286  295 sendCharacters(fPreviousContent.toCharArray());
287  295 fPreviousContent = null;
288    }
289   
290  7313 for (Event event : fPreviousElements) {
291  1725 sendInlineEvent(event);
292    }
293  7313 fPreviousElements.clear();
294    }
295   
 
296  4450 toggle protected void sendInlineEvent(Event event) throws SAXException
297    {
298  4450 if (event.type == Event.Type.BEGIN_ELEMENT) {
299  2220 super.startElement(event.uri, event.localName, event.qName,
300    event.atts);
301  2230 } else if (event.type == Event.Type.END_ELEMENT) {
302  2220 super.endElement(event.uri, event.localName, event.qName);
303  10 } else if (event.type == Event.Type.COMMENT) {
304  10 super.comment(event.content.toCharArray(), 0, event.content
305    .length());
306    }
307    }
308   
309    /**
310    * Flush previous content and print current one.
311    */
 
312  5728 toggle protected void flushContent() throws SAXException
313    {
314  5728 flushContent(true);
315    }
316   
317    /**
318    * Flush previous content and print current one.
319    */
 
320  5808 toggle protected void flushContent(boolean trimTrailing) throws SAXException
321    {
322  5808 cleanContentLeadingSpaces();
323  5808 cleanContentExtraWhiteSpaces();
324   
325  5808 if (trimTrailing) {
326    // UC2: Any white spaces group is removed if it's after a non inline
327    // (see NONINLINE_ELEMENTS) element.
328  5768 trimTrailingWhiteSpaces();
329    }
330   
331    // Send previous content
332  5808 sendPreviousContent(getContent().length() == 0);
333   
334    // Send current content
335  5808 if (getContent().length() > 0) {
336  1061 sendCharacters(getContent().toString().toCharArray());
337  1061 getContent().setLength(0);
338    }
339   
340    // Reinit inline text buffer
341  5808 fPreviousInlineText.setLength(0);
342    }
343   
344    /**
345    * Append an inline element. Inline elements ending with a space are stacked
346    * waiting for a non space character or the end of the inline content.
347    */
 
348  4450 toggle protected void appendInlineEvent(Event event) throws SAXException
349    {
350  4450 cleanContentLeadingSpaces();
351  4450 cleanContentExtraWhiteSpaces();
352   
353  4450 if (getContent().length() > 0) {
354  1196 sendPreviousContent(false);
355   
356  1196 fPreviousInlineText.append(getContent());
357   
358  1196 if (getContent().charAt(getContent().length() - 1) == ' ') {
359  293 fPreviousContent = getContent().toString();
360  293 fPreviousElements.add(event);
361    } else {
362  903 sendCharacters(getContent().toString().toCharArray());
363  903 sendInlineEvent(event);
364    }
365   
366  1196 getContent().setLength(0);
367    } else {
368  3254 if (fPreviousInlineText.length() == 0) {
369    // There is no inline text before this inline element
370  1822 sendInlineEvent(event);
371    } else {
372    // The last inline text ends with a space
373  1432 fPreviousElements.add(event);
374    }
375    }
376    }
377   
 
378  306 toggle protected void startEmptyVisibleElement() throws SAXException
379    {
380  306 cleanContentLeadingSpaces();
381  306 cleanContentExtraWhiteSpaces();
382   
383    // Send previous content
384  306 sendPreviousContent(false);
385   
386    // Send content
387  306 sendCharacters(getContent().toString().toCharArray());
388  306 fPreviousInlineText.append(getContent());
389   
390    // Add visible element as part of the inline text
391  306 fPreviousInlineText.append("EmptyVisibleElement");
392   
393  306 getContent().setLength(0);
394    }
395   
 
396  46 toggle protected void endEmptyVisibleElement() throws SAXException
397    {
398    // Send current content
399  46 if (getContent().length() > 0) {
400  0 sendCharacters(getContent().toString().toCharArray());
401  0 getContent().setLength(0);
402    }
403    }
404   
405    /**
406    * Append an non visible element.
407    */
 
408  3 toggle protected void startNonVisibleElement() throws SAXException
409    {
410  3 if (shouldRemoveWhiteSpaces()) {
411  3 cleanContentLeadingSpaces();
412  3 cleanContentExtraWhiteSpaces();
413   
414  3 if (getContent().length() > 0) {
415  3 sendPreviousContent(false);
416   
417  3 fPreviousInlineText.append(getContent());
418   
419  3 if (getContent().charAt(getContent().length() - 1) == ' ') {
420  2 fPreviousContent = getContent().toString();
421    } else {
422  1 sendCharacters(getContent().toString().toCharArray());
423    }
424    }
425   
426    // The is some text ending with a space before the non visible
427    // element. The space will move after the element if it's needed (if
428    // the element is followed by inline text);
429  3 if (fPreviousContent != null) {
430  2 sendCharacters(fPreviousContent.toCharArray(), 0,
431    fPreviousContent.length() - 1);
432  2 fPreviousContent = " ";
433    }
434    } else {
435    // Send current content
436  0 sendCharacters(getContent().toString().toCharArray());
437    }
438   
439  3 getContent().setLength(0);
440   
441    // Do not clean white spaces when in non visible element
442  3 ++fNoCleanUpLevel;
443    }
444   
445    /**
446    * Flush previous content and print current one.
447    */
 
448  3 toggle protected void endNonVisibleElement() throws SAXException
449    {
450    // Send current content
451  3 if (getContent().length() > 0) {
452  2 sendCharacters(getContent().toString().toCharArray());
453  2 getContent().setLength(0);
454    }
455    }
456   
 
457  2568 toggle protected void sendCharacters(char ch[]) throws SAXException
458    {
459  2568 sendCharacters(ch, 0, ch.length);
460    }
461   
 
462  2570 toggle protected void sendCharacters(char ch[], int start, int length)
463    throws SAXException
464    {
465  2570 if (length > 0) {
466  2289 super.characters(ch, start, length);
467    }
468    }
469   
470    /**
471    * UC1: Any white spaces group is removed if it's before a non inline
472    * element or at the begining of the document.
473    * <p>
474    * UC3: Remove leading spaces of content if previous inline text already
475    * ends with a space.
476    */
 
477  10567 toggle private void cleanContentLeadingSpaces()
478    {
479  10567 if (getContent().length() > 0) {
480  2749 if (fPreviousInlineText.length() == 0
481    || fPreviousInlineText
482    .charAt(fPreviousInlineText.length() - 1) == ' ')
483    {
484  2216 trimLeadingWhiteSpaces();
485    }
486    }
487    }
488   
489    /**
490    * UC3: Replace group of white spaces by a single space.
491    */
 
492  10567 toggle protected void cleanContentExtraWhiteSpaces()
493    {
494  10567 if (getContent().length() > 0) {
495  2294 if (shouldRemoveWhiteSpaces()) {
496  2217 Matcher matcher = HTML_WHITESPACE_DUPLICATES_PATTERN
497    .matcher(getContent());
498  2217 String result = matcher.replaceAll(" ");
499  2217 getContent().setLength(0);
500  2217 getContent().append(result);
501    }
502    }
503    }
504   
505    // Trim white spaces and new lines since they are ignored in XHTML (except
506    // when in CDATA or PRE elements).
 
507  2216 toggle protected void trimLeadingWhiteSpaces()
508    {
509  2216 if (shouldRemoveWhiteSpaces() && getContent().length() > 0) {
510  2150 String result = trimLeadingWhiteSpaces(getContent());
511  2150 getContent().setLength(0);
512  2150 getContent().append(result);
513    }
514    }
515   
 
516  2150 toggle protected String trimLeadingWhiteSpaces(CharSequence content)
517    {
518  2150 String trimedContent;
519   
520  2150 if (shouldRemoveWhiteSpaces() && content.length() > 0) {
521  2150 Matcher matcher = HTML_WHITESPACE_HEAD_PATTERN.matcher(content);
522  2150 trimedContent = matcher.replaceAll("");
523    } else {
524  0 trimedContent = content.toString();
525    }
526   
527  2150 return trimedContent;
528    }
529   
 
530  5768 toggle protected void trimTrailingWhiteSpaces()
531    {
532  5768 if (shouldRemoveWhiteSpaces() && getContent().length() > 0) {
533  1007 String result = trimTrailingWhiteSpaces(getContent());
534  1007 getContent().setLength(0);
535  1007 getContent().append(result);
536    }
537    }
538   
 
539  1030 toggle protected String trimTrailingWhiteSpaces(CharSequence content)
540    {
541  1030 String trimedContent;
542   
543  1030 if (shouldRemoveWhiteSpaces() && content.length() > 0) {
544  1030 Matcher matcher = HTML_WHITESPACE_TAIL_PATTERN.matcher(content);
545  1030 trimedContent = matcher.replaceAll("");
546    } else {
547  0 trimedContent = content.toString();
548    }
549   
550  1030 return trimedContent;
551    }
552   
 
553  73075 toggle protected StringBuffer getContent()
554    {
555  73075 return fContent;
556    }
557   
558    /**
559    * We remove spaces around non semantic comments.
560    *
561    * @param comment the comment to evaluate
562    * @return true if the comment is a semantic one
563    */
 
564  270 toggle protected boolean isSemanticComment(String comment)
565    {
566  270 return comment.startsWith("startmacro:")
567    || comment.startsWith("stopmacro");
568    }
569   
 
570    private static class Event
571    {
 
572    public enum Type
573    {
574    BEGIN_ELEMENT, END_ELEMENT, COMMENT
575    }
576   
577    public Type type;
578   
579    public String uri;
580   
581    public String localName;
582   
583    public String qName;
584   
585    public Attributes atts;
586   
587    String content;
588   
 
589  2220 toggle public Event(String uri, String localName, String qName, Attributes atts)
590    {
591  2220 this.type = Type.BEGIN_ELEMENT;
592  2220 this.uri = uri;
593  2220 this.localName = localName;
594  2220 this.qName = qName;
595  2220 this.atts = atts;
596    }
597   
 
598  2220 toggle public Event(String uri, String localName, String qName)
599    {
600  2220 this.type = Type.END_ELEMENT;
601  2220 this.uri = uri;
602  2220 this.localName = localName;
603  2220 this.qName = qName;
604    }
605   
 
606  10 toggle public Event(String content)
607    {
608  10 this.type = Type.COMMENT;
609  10 this.content = content;
610    }
611    }
612    }