Clover Coverage Report - XWiki Rendering - Parent POM 4.0-SNAPSHOT (Aggregated)
Coverage timestamp: Mon Mar 12 2012 18:03:13 CET
../../../../../../img/srcFileCovDistChart10.png 0% of files have more coverage
169   627   78   5.12
78   388   0.46   11
33     2.36  
3    
 
  XHTMLWhitespaceXMLFilter       Line # 59 158 0% 75 11 95.9% 0.9586466
  XHTMLWhitespaceXMLFilter.Event       Line # 585 11 0% 3 0 100% 1.0
  XHTMLWhitespaceXMLFilter.Event.Type       Line # 587 0 - 0 0 - -1.0
 
  (219)
 
1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.rendering.wikimodel.xhtml.filter;
21   
22    import java.util.ArrayList;
23    import java.util.Arrays;
24    import java.util.HashSet;
25    import java.util.List;
26    import java.util.Set;
27    import java.util.Stack;
28    import java.util.regex.Matcher;
29    import java.util.regex.Pattern;
30   
31    import org.xml.sax.Attributes;
32    import org.xml.sax.SAXException;
33    import org.xml.sax.XMLReader;
34    import org.xml.sax.helpers.AttributesImpl;
35   
36    /**
37    * Removes non-semantic whitespaces in XML elements. See
38    * http://www.w3.org/TR/html4/struct/text.html#h-9.1 for more details. Possible
39    * use cases:
40    * <p/>
41    * <ul>
42    * <li><b>UC1</b>: Any white spaces group is removed if it's before a non inline
43    * (see INLINE_ELEMENTS) element or at the begining of the document.</li>
44    * <li><b>UC2</b>: Any white spaces group is removed if it's after a non inline
45    * (see INLINE_ELEMENTS) element or at the end of the document.</li>
46    * <li><b>UC3</b>: Inside inline content any white spaces group become a single
47    * space.</li>
48    * <li><b>UC5</b>: Non visible element (comments, CDATA and NONVISIBLE_ELEMENTS)
49    * are invisibles and does not cut a white space group.
50    * <code>text(sp)<!--comment-->(sp)text</code> becomes
51    * <code>text(sp)<!--comment-->text</code></li>
52    * <li><b>UC5</b>: Visible empty element like img count as text when grouping
53    * white spaces</li>
54    * <li><b>UC6</b>: Semantic comment count as text when grouping white spaces</li>
55    *
56    * @version $Id: 939c4bc79e172d79c8a2d7aae0a71b5bfa663316 $
57    * @since 4.0M1
58    */
 
59    public class XHTMLWhitespaceXMLFilter extends DefaultXMLFilter
60    {
61    private static final Pattern HTML_WHITESPACE_DUPLICATES_PATTERN = Pattern
62    .compile("\\s{2,}|[\\t\\n\\x0B\\f\\r]+");
63   
64    private static final Pattern HTML_WHITESPACE_HEAD_PATTERN = Pattern
65    .compile("^\\s+");
66   
67    private static final Pattern HTML_WHITESPACE_TAIL_PATTERN = Pattern
68    .compile("\\s+$");
69   
70    private static final Set<String> NONINLINE_ELEMENTS = new HashSet<String>(
71    Arrays.asList("address", "blockquote", "div", "dl", "dt", "dd",
72    "fieldset", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
73    "noscript", "ol", "p", "pre", "script", "table", "ul", "html",
74    "body", "td", "tr", "th", "tbody", "head", "li", "thead", "tfoot",
75    "caption", "col", "colgroup", "legend", "base", "link", "meta",
76    "style", "title"));
77   
78    /**
79    * Non visible elements behave like CDATA and comments: it's part of the
80    * white space group.
81    */
82    private static final Set<String> NONVISIBLE_ELEMENTS = new HashSet<String>(
83    Arrays.asList("script"));
84   
85    /**
86    * Visible elements like images count in the inline text to clean white
87    * spaces.
88    */
89    private static final Set<String> EMPTYVISIBLE_ELEMENTS = new HashSet<String>(
90    Arrays.asList("img"));
91   
92    /**
93    * State indicating if the white spaces has to be cleaned. It's an int to
94    * support &lt;pre&gt;pre&lt;/pre&gt; inside &lt;tt
95    * class=&quot;wikimodel-verbatim&quot;&gt;pre&lt;/tt&gt;.
96    */
97    private int fNoCleanUpLevel = 0;
98   
99    /**
100    * Content to clean.
101    */
102    private StringBuffer fContent = new StringBuffer();
103   
104    /**
105    * Bufferized current inline text. It contains only text (and no inline
106    * start/end element, comment or CDATA) to be able know if a leading space
107    * has to be remove because the previous text ends with it or if there is no
108    * previous text.
109    */
110    private StringBuffer fPreviousInlineText = new StringBuffer();
111   
112    /**
113    * The previous content to send. Buffurized waiting to know if its trailing
114    * space has to be removed when it's the last text of inline content.
115    */
116    private String fPreviousContent = null;
117   
118    /**
119    * Previous inline elements. These are the elements before the previous
120    * content. It's buffurized to support space group cleaning betwen different
121    * inline elements.
122    */
123    private List<Event> fPreviousElements = new ArrayList<Event>();
124   
125    private Stack<Attributes> fAttributes = new Stack<Attributes>();
126   
 
127  0 toggle public XHTMLWhitespaceXMLFilter()
128    {
129   
130    }
131   
 
132  311 toggle public XHTMLWhitespaceXMLFilter(XMLReader reader)
133    {
134  311 super(reader);
135    }
136   
 
137  935 toggle @Override
138    public void characters(char[] ch, int start, int length)
139    throws SAXException
140    {
141  935 getContent().append(ch, start, length);
142    }
143   
 
144  1774 toggle @Override
145    public void startElement(String uri, String localName, String qName,
146    Attributes atts) throws SAXException
147    {
148  1774 Attributes clonedAtts = fAttributes.push(new AttributesImpl(atts));
149   
150  1774 if (NONVISIBLE_ELEMENTS.contains(localName)) {
151  3 startNonVisibleElement();
152   
153    // send start element event
154  3 super.startElement(uri, localName, qName, atts);
155    } else {
156  1771 if (NONINLINE_ELEMENTS.contains(localName)) {
157    // Flush previous content and print current one
158  1289 flushContent();
159   
160    // white spaces inside pre element are not cleaned
161  1289 if ("pre".equalsIgnoreCase(localName)) {
162  15 ++fNoCleanUpLevel;
163    }
164   
165    // send start element event
166  1289 super.startElement(uri, localName, qName, atts);
167  482 } else if (EMPTYVISIBLE_ELEMENTS.contains(localName)) {
168  35 startEmptyVisibleElement();
169   
170  35 super.startElement(uri, localName, qName, atts);
171  447 } else if (preservedInlineContent(localName, atts)) {
172    // Flush previous content and print current one
173  14 flushContent(false);
174   
175  14 ++fNoCleanUpLevel;
176   
177    // send start element event
178  14 super.startElement(uri, localName, qName, atts);
179    } else {
180  433 appendInlineEvent(new Event(uri, localName, qName, clonedAtts));
181    }
182    }
183    }
184   
 
185  1774 toggle @Override
186    public void endElement(String uri, String localName, String qName)
187    throws SAXException
188    {
189  1774 if (NONVISIBLE_ELEMENTS.contains(localName)) {
190  3 endNonVisibleElement();
191   
192  3 super.endElement(uri, localName, qName);
193   
194  3 --fNoCleanUpLevel;
195    } else {
196  1771 if (NONINLINE_ELEMENTS.contains(localName)) {
197    // Flush previous content and print current one
198  1289 flushContent();
199   
200    // white spaces inside pre element are not cleaned
201  1289 if ("pre".equalsIgnoreCase(localName)) {
202  15 --fNoCleanUpLevel;
203    }
204   
205  1289 super.endElement(uri, localName, qName);
206  482 } else if (EMPTYVISIBLE_ELEMENTS.contains(localName)) {
207  35 endEmptyVisibleElement();
208   
209  35 super.endElement(uri, localName, qName);
210  447 } else if (preservedInlineContent(localName, fAttributes.peek())) {
211    // Flush previous content and print current one
212  14 flushContent();
213   
214  14 --fNoCleanUpLevel;
215   
216  14 super.endElement(uri, localName, qName);
217    } else {
218  433 appendInlineEvent(new Event(uri, localName, qName));
219    }
220    }
221   
222  1774 fAttributes.pop();
223    }
224   
 
225  894 toggle private boolean preservedInlineContent(String localName, Attributes atts)
226    {
227  894 boolean preserved = false;
228   
229  894 if ("tt".equalsIgnoreCase(localName)) {
230  40 String value = atts.getValue("class");
231   
232  40 if (value != null) {
233  28 preserved = Arrays.asList(value.split(" ")).contains(
234    "wikimodel-verbatim");
235    }
236    }
237   
238  894 return preserved;
239    }
240   
 
241  9 toggle @Override
242    public void startCDATA() throws SAXException
243    {
244  9 startNonVisibleElement();
245   
246  9 super.startCDATA();
247    }
248   
 
249  9 toggle @Override
250    public void endCDATA() throws SAXException
251    {
252  9 endNonVisibleElement();
253   
254  9 super.endCDATA();
255   
256  9 --fNoCleanUpLevel;
257    }
258   
 
259  218 toggle @Override
260    public void comment(char[] ch, int start, int length) throws SAXException
261    {
262  218 if (shouldRemoveWhiteSpaces()) {
263  218 String comment = new String(ch, start, length);
264   
265  218 if (isSemanticComment(comment)) {
266    // UC6: Semantic comment count as text when grouping white
267    // spaces
268  208 startEmptyVisibleElement();
269   
270  208 super.comment(ch, start, length);
271    } else {
272  10 appendInlineEvent(new Event(comment));
273    }
274    } else {
275  0 super.comment(ch, start, length);
276    }
277    }
278   
 
279  345 toggle @Override
280    public void endDocument() throws SAXException
281    {
282    // Flush previous content and print current one
283  345 flushContent();
284   
285  345 super.endDocument();
286    }
287   
 
288  5812 toggle protected boolean shouldRemoveWhiteSpaces()
289    {
290  5812 return fNoCleanUpLevel == 0;
291    }
292   
 
293  3548 toggle protected void sendPreviousContent(boolean trimTrailing)
294    throws SAXException
295    {
296  3548 if (fPreviousContent != null && fPreviousContent.length() > 0) {
297  54 if (trimTrailing) {
298  9 fPreviousContent = trimTrailingWhiteSpaces(fPreviousContent);
299    }
300   
301  54 sendCharacters(fPreviousContent.toCharArray());
302  54 fPreviousContent = null;
303    }
304   
305  3548 for (Event event : fPreviousElements) {
306  418 sendInlineEvent(event);
307    }
308  3548 fPreviousElements.clear();
309    }
310   
 
311  876 toggle protected void sendInlineEvent(Event event) throws SAXException
312    {
313  876 if (event.type == Event.Type.BEGIN_ELEMENT) {
314  433 super.startElement(event.uri, event.localName, event.qName,
315    event.atts);
316  443 } else if (event.type == Event.Type.END_ELEMENT) {
317  433 super.endElement(event.uri, event.localName, event.qName);
318  10 } else if (event.type == Event.Type.COMMENT) {
319  10 super.comment(event.content.toCharArray(), 0, event.content
320    .length());
321    }
322    }
323   
324    /**
325    * Flush previous content and print current one.
326    */
 
327  2937 toggle protected void flushContent() throws SAXException
328    {
329  2937 flushContent(true);
330    }
331   
332    /**
333    * Flush previous content and print current one.
334    */
 
335  2951 toggle protected void flushContent(boolean trimTrailing) throws SAXException
336    {
337  2951 cleanContentLeadingSpaces();
338  2951 cleanContentExtraWhiteSpaces();
339   
340  2951 if (trimTrailing) {
341    // UC2: Any white spaces group is removed if it's after a non inline
342    // (see INLINE_ELEMENTS) element.
343  2937 trimTrailingWhiteSpaces();
344    }
345   
346    // Send previous content
347  2951 sendPreviousContent(getContent().length() == 0);
348   
349    // Send current content
350  2951 if (getContent().length() > 0) {
351  414 sendCharacters(getContent().toString().toCharArray());
352  414 getContent().setLength(0);
353    }
354   
355    // Reinit inline text buffer
356  2951 fPreviousInlineText.setLength(0);
357    }
358   
359    /**
360    * Append an inline element. Inline elements ending with a space are stacked
361    * waiting for a non space character or the end of the inline content.
362    */
 
363  876 toggle protected void appendInlineEvent(Event event) throws SAXException
364    {
365  876 cleanContentLeadingSpaces();
366  876 cleanContentExtraWhiteSpaces();
367   
368  876 if (getContent().length() > 0) {
369  344 sendPreviousContent(false);
370   
371  344 fPreviousInlineText.append(getContent());
372   
373  344 if (getContent().charAt(getContent().length() - 1) == ' ') {
374  47 fPreviousContent = getContent().toString();
375  47 fPreviousElements.add(event);
376    } else {
377  297 sendCharacters(getContent().toString().toCharArray());
378  297 sendInlineEvent(event);
379    }
380   
381  344 getContent().setLength(0);
382    } else {
383  532 if (fPreviousInlineText.length() == 0) {
384    // There is no inline text before this inline element
385  161 sendInlineEvent(event);
386    } else {
387    // The last inline text ends with a space
388  371 fPreviousElements.add(event);
389    }
390    }
391    }
392   
 
393  243 toggle protected void startEmptyVisibleElement() throws SAXException
394    {
395  243 cleanContentLeadingSpaces();
396  243 cleanContentExtraWhiteSpaces();
397   
398    // Send previous content
399  243 sendPreviousContent(false);
400   
401    // Send content
402  243 sendCharacters(getContent().toString().toCharArray());
403  243 fPreviousInlineText.append(getContent());
404   
405    // Add visible element as part of the inline text
406  243 fPreviousInlineText.append("EmptyVisibleElement");
407   
408  243 getContent().setLength(0);
409    }
410   
 
411  35 toggle protected void endEmptyVisibleElement() throws SAXException
412    {
413    // Send current content
414  35 if (getContent().length() > 0) {
415  0 sendCharacters(getContent().toString().toCharArray());
416  0 getContent().setLength(0);
417    }
418    }
419   
420    /**
421    * Append an non visible element.
422    */
 
423  12 toggle protected void startNonVisibleElement() throws SAXException
424    {
425  12 if (shouldRemoveWhiteSpaces()) {
426  11 cleanContentLeadingSpaces();
427  11 cleanContentExtraWhiteSpaces();
428   
429  11 if (getContent().length() > 0) {
430  10 sendPreviousContent(false);
431   
432  10 fPreviousInlineText.append(getContent());
433   
434  10 if (getContent().charAt(getContent().length() - 1) == ' ') {
435  7 fPreviousContent = getContent().toString();
436    } else {
437  3 sendCharacters(getContent().toString().toCharArray());
438    }
439    }
440   
441    // The is some text ending with a space before the non visible
442    // element. The space will move after the element if it's needed (if
443    // the element is followed by inline text);
444  11 if (fPreviousContent != null) {
445  7 sendCharacters(fPreviousContent.toCharArray(), 0,
446    fPreviousContent.length() - 1);
447  7 fPreviousContent = " ";
448    }
449    } else {
450    // Send current content
451  1 sendCharacters(getContent().toString().toCharArray());
452    }
453   
454  12 getContent().setLength(0);
455   
456    // Do not clean white spaces when in non visible element
457  12 ++fNoCleanUpLevel;
458    }
459   
460    /**
461    * Flush previous content and print current one.
462    */
 
463  12 toggle protected void endNonVisibleElement() throws SAXException
464    {
465    // Send current content
466  12 if (getContent().length() > 0) {
467  11 sendCharacters(getContent().toString().toCharArray());
468  11 getContent().setLength(0);
469    }
470    }
471   
 
472  1023 toggle protected void sendCharacters(char ch[]) throws SAXException
473    {
474  1023 sendCharacters(ch, 0, ch.length);
475    }
476   
 
477  1030 toggle protected void sendCharacters(char ch[], int start, int length)
478    throws SAXException
479    {
480  1030 if (length > 0) {
481  812 super.characters(ch, start, length);
482    }
483    }
484   
485    /**
486    * UC1: Any white spaces group is removed if it's before a non inline
487    * element or at the begining of the document.
488    * <p>
489    * UC3: Remove leading spaces of content if previous inline text already
490    * ends with a space.
491    */
 
492  4081 toggle private void cleanContentLeadingSpaces()
493    {
494  4081 if (getContent().length() > 0) {
495  922 if (fPreviousInlineText.length() == 0
496    || fPreviousInlineText
497    .charAt(fPreviousInlineText.length() - 1) == ' ')
498    {
499  740 trimLeadingWhiteSpaces();
500    }
501    }
502    }
503   
504    /**
505    * UC3: Replace group of white spaces by a single space.
506    */
 
507  4081 toggle protected void cleanContentExtraWhiteSpaces()
508    {
509  4081 if (getContent().length() > 0) {
510  801 if (shouldRemoveWhiteSpaces()) {
511  768 Matcher matcher = HTML_WHITESPACE_DUPLICATES_PATTERN
512    .matcher(getContent());
513  768 String result = matcher.replaceAll(" ");
514  768 getContent().setLength(0);
515  768 getContent().append(result);
516    }
517    }
518    }
519   
520    // Trim white spaces and new lines since they are ignored in XHTML (except
521    // when in CDATA or PRE elements).
 
522  740 toggle protected void trimLeadingWhiteSpaces()
523    {
524  740 if (shouldRemoveWhiteSpaces() && getContent().length() > 0) {
525  711 String result = trimLeadingWhiteSpaces(getContent());
526  711 getContent().setLength(0);
527  711 getContent().append(result);
528    }
529    }
530   
 
531  711 toggle protected String trimLeadingWhiteSpaces(CharSequence content)
532    {
533  711 String trimedContent;
534   
535  711 if (shouldRemoveWhiteSpaces() && content.length() > 0) {
536  711 Matcher matcher = HTML_WHITESPACE_HEAD_PATTERN.matcher(content);
537  711 trimedContent = matcher.replaceAll("");
538    } else {
539  0 trimedContent = content.toString();
540    }
541   
542  711 return trimedContent;
543    }
544   
 
545  2937 toggle protected void trimTrailingWhiteSpaces()
546    {
547  2937 if (shouldRemoveWhiteSpaces() && getContent().length() > 0) {
548  384 String result = trimTrailingWhiteSpaces(getContent());
549  384 getContent().setLength(0);
550  384 getContent().append(result);
551    }
552    }
553   
 
554  393 toggle protected String trimTrailingWhiteSpaces(CharSequence content)
555    {
556  393 String trimedContent;
557   
558  393 if (shouldRemoveWhiteSpaces() && content.length() > 0) {
559  393 Matcher matcher = HTML_WHITESPACE_TAIL_PATTERN.matcher(content);
560  393 trimedContent = matcher.replaceAll("");
561    } else {
562  0 trimedContent = content.toString();
563    }
564   
565  393 return trimedContent;
566    }
567   
 
568  28496 toggle protected StringBuffer getContent()
569    {
570  28496 return fContent;
571    }
572   
573    /**
574    * We remove spaces around non semantic comments.
575    *
576    * @param comment the comment to evaluate
577    * @return true if the comment is a semantic one
578    */
 
579  218 toggle protected boolean isSemanticComment(String comment)
580    {
581  218 return comment.startsWith("startmacro:")
582    || comment.startsWith("stopmacro");
583    }
584   
 
585    private static class Event
586    {
 
587    public enum Type
588    {
589    BEGIN_ELEMENT, END_ELEMENT, COMMENT
590    }
591   
592    public Type type;
593   
594    public String uri;
595   
596    public String localName;
597   
598    public String qName;
599   
600    public Attributes atts;
601   
602    String content;
603   
 
604  433 toggle public Event(String uri, String localName, String qName, Attributes atts)
605    {
606  433 this.type = Type.BEGIN_ELEMENT;
607  433 this.uri = uri;
608  433 this.localName = localName;
609  433 this.qName = qName;
610  433 this.atts = atts;
611    }
612   
 
613  433 toggle public Event(String uri, String localName, String qName)
614    {
615  433 this.type = Type.END_ELEMENT;
616  433 this.uri = uri;
617  433 this.localName = localName;
618  433 this.qName = qName;
619    }
620   
 
621  10 toggle public Event(String content)
622    {
623  10 this.type = Type.COMMENT;
624  10 this.content = content;
625    }
626    }
627    }