1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.xml

File XMLUtils.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

30
152
16
1
484
286
51
0.34
9.5
16
3.19

Classes

Class Line # Actions
XMLUtils 61 152 0% 51 11
0.944444494.4%
 

Contributing tests

This file is covered by 441 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.xml;
21   
22    import java.io.IOException;
23    import java.io.StringReader;
24    import java.io.StringWriter;
25    import java.util.regex.Pattern;
26   
27    import javax.xml.parsers.DocumentBuilderFactory;
28    import javax.xml.parsers.ParserConfigurationException;
29    import javax.xml.transform.OutputKeys;
30    import javax.xml.transform.Result;
31    import javax.xml.transform.Source;
32    import javax.xml.transform.Transformer;
33    import javax.xml.transform.TransformerException;
34    import javax.xml.transform.TransformerFactory;
35    import javax.xml.transform.TransformerFactoryConfigurationError;
36    import javax.xml.transform.dom.DOMSource;
37    import javax.xml.transform.sax.SAXResult;
38    import javax.xml.transform.sax.SAXSource;
39    import javax.xml.transform.stream.StreamResult;
40   
41    import org.slf4j.Logger;
42    import org.slf4j.LoggerFactory;
43    import org.w3c.dom.Document;
44    import org.w3c.dom.Node;
45    import org.w3c.dom.bootstrap.DOMImplementationRegistry;
46    import org.w3c.dom.ls.DOMImplementationLS;
47    import org.w3c.dom.ls.LSInput;
48    import org.w3c.dom.ls.LSOutput;
49    import org.w3c.dom.ls.LSParser;
50    import org.w3c.dom.ls.LSSerializer;
51    import org.xml.sax.InputSource;
52    import org.xml.sax.SAXException;
53    import org.xml.sax.XMLReader;
54   
55    /**
56    * XML Utility methods.
57    *
58    * @version $Id: 945e152a847a57f16964a0d774d5e0653c2950a7 $
59    * @since 1.6M1
60    */
 
61    public final class XMLUtils
62    {
63    /** Logging helper object. */
64    private static final Logger LOGGER = LoggerFactory.getLogger(XMLUtils.class);
65   
66    /** XML encoding of the "ampersand" character. */
67    private static final String AMP = "&";
68   
69    /** Regular expression recognizing XML-escaped "ampersand" characters. */
70    private static final Pattern AMP_PATTERN = Pattern.compile("&(?:amp|#0*+38|#x0*+26);");
71   
72    /** XML encoding of the "single quote" character. */
73    private static final String APOS = "'";
74   
75    /** Regular expression recognizing XML-escaped "single quote" characters. */
76    private static final Pattern APOS_PATTERN = Pattern.compile("&(?:apos|#0*+39|#x0*+27);");
77   
78    /** XML encoding of the "double quote" character. */
79    private static final String QUOT = """;
80   
81    /** Regular expression recognizing XML-escaped "double quote" characters. */
82    private static final Pattern QUOT_PATTERN = Pattern.compile("&(?:quot|#0*+34|#x0*+22);");
83   
84    /** XML encoding of the "left curly bracket". */
85    private static final String LCURL = "{";
86   
87    /** Regular expression recognizing XML-escaped "left curly bracket" characters. */
88    private static final Pattern LCURL_PATTERN = Pattern.compile("&(?:#0*+123|#x0*+7[bB]);");
89   
90    /** XML encoding of the "less than" character. */
91    private static final String LT = "<";
92   
93    /** Regular expression recognizing XML-escaped "less than" characters. */
94    private static final Pattern LT_PATTERN = Pattern.compile("&(?:lt|#0*+60|#x0*+3[cC]);");
95   
96    /** XML encoding of the "greater than" character. */
97    private static final String GT = ">";
98   
99    /** Regular expression recognizing XML-escaped "greater than" characters. */
100    private static final Pattern GT_PATTERN = Pattern.compile("&(?:gt|#0*+62|#x0*+3[eE]);");
101   
102    /** Helper object for manipulating DOM Level 3 Load and Save APIs. */
103    private static final DOMImplementationLS LS_IMPL;
104   
105    /** Xerces configuration parameter for disabling fetching and checking XMLs against their DTD. */
106    private static final String DISABLE_DTD_PARAM = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
107   
 
108  57 toggle static {
109  57 DOMImplementationLS implementation = null;
110  57 try {
111  57 implementation =
112    (DOMImplementationLS) DOMImplementationRegistry.newInstance().getDOMImplementation("LS 3.0");
113    } catch (Exception ex) {
114  0 LOGGER.warn("Cannot initialize the XML Script Service: [{}]", ex.getMessage());
115    }
116  57 LS_IMPL = implementation;
117    }
118   
119    /**
120    * Private constructor since this is a utility class that shouldn't be instantiated (all methods are static).
121    */
 
122  0 toggle private XMLUtils()
123    {
124    // Nothing to do
125    }
126   
127    /**
128    * Extracts a well-formed XML fragment from the given DOM tree.
129    *
130    * @param node the root of the DOM tree where the extraction takes place
131    * @param start the index of the first character
132    * @param length the maximum number of characters in text nodes to include in the returned fragment
133    * @return a well-formed XML fragment starting at the given character index and having up to the specified length,
134    * summing only the characters in text nodes
135    * @since 1.6M2
136    */
 
137  4 toggle public static String extractXML(Node node, int start, int length)
138    {
139  4 ExtractHandler handler = null;
140  4 try {
141  4 handler = new ExtractHandler(start, length);
142  4 Transformer xformer = TransformerFactory.newInstance().newTransformer();
143  4 xformer.transform(new DOMSource(node), new SAXResult(handler));
144  2 return handler.getResult();
145    } catch (Throwable t) {
146  2 if (handler != null && handler.isFinished()) {
147  2 return handler.getResult();
148    } else {
149  0 throw new RuntimeException("Failed to extract XML", t);
150    }
151    }
152    }
153   
154    /**
155    * XML comment does not support some characters inside its content but there is no official escaping/unescaping for
156    * it so we made our own.
157    * <ul>
158    * <li>1) Escape existing \</li>
159    * <li>2) Escape --</li>
160    * <li>3) Add {@code \} (unescaped as {@code ""}) at the end if the last char is {@code -}</li>
161    * </ul>
162    *
163    * @param content the XML comment content to escape
164    * @return the escaped content.
165    * @since 1.9M2
166    */
 
167  115 toggle public static String escapeXMLComment(String content)
168    {
169  115 StringBuffer str = new StringBuffer(content.length());
170   
171  115 char[] buff = content.toCharArray();
172  115 char lastChar = 0;
173  115 for (char c : buff) {
174  6113 if (c == '\\') {
175  19 str.append('\\');
176  6094 } else if (c == '-' && lastChar == '-') {
177  6 str.append('\\');
178    }
179   
180  6113 str.append(c);
181  6113 lastChar = c;
182    }
183   
184  115 if (lastChar == '-') {
185  5 str.append('\\');
186    }
187   
188  115 return str.toString();
189    }
190   
191    /**
192    * XML comment does not support some characters inside its content but there is no official escaping/unescaping for
193    * it so we made our own.
194    *
195    * @param content the XML comment content to unescape
196    * @return the unescaped content.
197    * @see #escapeXMLComment(String)
198    * @since 1.9M2
199    */
 
200  198 toggle public static String unescapeXMLComment(String content)
201    {
202  198 StringBuffer str = new StringBuffer(content.length());
203   
204  198 char[] buff = content.toCharArray();
205  198 boolean escaped = false;
206  198 for (char c : buff) {
207  6869 if (!escaped && c == '\\') {
208  30 escaped = true;
209  30 continue;
210    }
211   
212  6839 str.append(c);
213  6839 escaped = false;
214    }
215   
216  198 return str.toString();
217    }
218   
219    /**
220    * Escapes all the XML special characters in a <code>String</code> using numerical XML entities. Specifically,
221    * escapes &lt;, &gt;, ", ', &amp; and {. Left curly bracket is included here to protect against {{/html}} in
222    * xwiki 2.x syntax.
223    *
224    * @param content the text to escape, may be {@code null}
225    * @return a new escaped {@code String}, {@code null} if {@code null} input
226    */
 
227  112926 toggle public static String escape(Object content)
228    {
229  112927 return escapeAttributeValue(content);
230    }
231   
232    /**
233    * Escapes all the XML special characters and left curly bracket in a <code>String</code> using numerical XML
234    * entities, so that the resulting string can safely be used as an XML attribute value. Specifically, escapes &lt;,
235    * &gt;, ", ', &amp; and {. Left curly bracket is included here to protect against {{/html}} in xwiki 2.x syntax.
236    *
237    * @param content the text to escape, may be {@code null}
238    * @return a new escaped {@code String}, {@code null} if {@code null} input
239    */
 
240  112920 toggle public static String escapeAttributeValue(Object content)
241    {
242  112920 if (content == null) {
243  1259 return null;
244    }
245  111664 String str = String.valueOf(content);
246  111665 StringBuilder result = new StringBuilder((int) (str.length() * 1.1));
247  111667 int length = str.length();
248  111668 char c;
249  2149954 for (int i = 0; i < length; ++i) {
250  2038299 c = str.charAt(i);
251  2038297 switch (c) {
252  12685 case '&':
253  12685 result.append(AMP);
254  12685 break;
255  153 case '\'':
256  153 result.append(APOS);
257  153 break;
258  319 case '"':
259  319 result.append(QUOT);
260  319 break;
261  42 case '<':
262  42 result.append(LT);
263  42 break;
264  31 case '>':
265  31 result.append(GT);
266  31 break;
267  904 case '{':
268  904 result.append(LCURL);
269  904 break;
270  2024168 default:
271  2024167 result.append(c);
272    }
273    }
274  111657 return result.toString();
275    }
276   
277    /**
278    * Escapes the XML special characters in a <code>String</code> using numerical XML entities, so that the resulting
279    * string can safely be used as an XML text node. Specifically, escapes &lt;, &gt;, and &amp;.
280    *
281    * @param content the text to escape, may be {@code null}
282    * @return a new escaped {@code String}, {@code null} if {@code null} input
283    */
 
284  4 toggle public static String escapeElementContent(Object content)
285    {
286  4 if (content == null) {
287  1 return null;
288    }
289  3 String str = String.valueOf(content);
290  3 StringBuilder result = new StringBuilder((int) (str.length() * 1.1));
291  3 int length = str.length();
292  3 char c;
293  31 for (int i = 0; i < length; ++i) {
294  28 c = str.charAt(i);
295  28 switch (c) {
296  2 case '&':
297  2 result.append(AMP);
298  2 break;
299  3 case '<':
300  3 result.append(LT);
301  3 break;
302  1 case '>':
303  1 result.append(GT);
304  1 break;
305  22 default:
306  22 result.append(c);
307    }
308    }
309  3 return result.toString();
310    }
311   
312    /**
313    * Unescape encoded special XML characters. Only &gt;, &lt; &amp;, ", ' and { are unescaped, since they are the only
314    * ones that affect the resulting markup.
315    *
316    * @param content the text to decode, may be {@code null}
317    * @return unescaped content, {@code null} if {@code null} input
318    */
 
319  10 toggle public static String unescape(Object content)
320    {
321  10 if (content == null) {
322  1 return null;
323    }
324  9 String str = String.valueOf(content);
325   
326  9 str = APOS_PATTERN.matcher(str).replaceAll("'");
327  9 str = QUOT_PATTERN.matcher(str).replaceAll("\"");
328  9 str = LT_PATTERN.matcher(str).replaceAll("<");
329  9 str = GT_PATTERN.matcher(str).replaceAll(">");
330  9 str = AMP_PATTERN.matcher(str).replaceAll("&");
331  9 str = LCURL_PATTERN.matcher(str).replaceAll("{");
332   
333  9 return str;
334    }
335   
336    /**
337    * Construct a new (empty) DOM Document and return it.
338    *
339    * @return an empty DOM Document
340    */
 
341  6 toggle public static Document createDOMDocument()
342    {
343  6 try {
344  6 return DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
345    } catch (ParserConfigurationException ex) {
346  0 LOGGER.error("Cannot create DOM Documents", ex);
347  0 return null;
348    }
349    }
350   
351    /**
352    * Parse a DOM Document from a source.
353    *
354    * @param source the source input to parse
355    * @return the equivalent DOM Document, or {@code null} if the parsing failed.
356    */
 
357  30 toggle public static Document parse(LSInput source)
358    {
359  30 try {
360  30 LSParser p = LS_IMPL.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
361    // Disable validation, since this takes a lot of time and causes unneeded network traffic
362  30 p.getDomConfig().setParameter("validate", false);
363  30 if (p.getDomConfig().canSetParameter(DISABLE_DTD_PARAM, false)) {
364  30 p.getDomConfig().setParameter(DISABLE_DTD_PARAM, false);
365    }
366  30 return p.parse(source);
367    } catch (Exception ex) {
368  3 LOGGER.warn("Cannot parse XML document: [{}]", ex.getMessage());
369  3 return null;
370    }
371    }
372   
373    /**
374    * Serialize a DOM Node into a string, including the XML declaration at the start.
375    *
376    * @param node the node to export
377    * @return the serialized node, or an empty string if the serialization fails
378    */
 
379  7 toggle public static String serialize(Node node)
380    {
381  7 return serialize(node, true);
382    }
383   
384    /**
385    * Serialize a DOM Node into a string, with an optional XML declaration at the start.
386    *
387    * @param node the node to export
388    * @param withXmlDeclaration whether to output the XML declaration or not
389    * @return the serialized node, or an empty string if the serialization fails or the node is {@code null}
390    */
 
391  19 toggle public static String serialize(Node node, boolean withXmlDeclaration)
392    {
393  19 if (node == null) {
394  1 return "";
395    }
396  18 try {
397  18 LSOutput output = LS_IMPL.createLSOutput();
398  18 StringWriter result = new StringWriter();
399  18 output.setCharacterStream(result);
400  18 LSSerializer serializer = LS_IMPL.createLSSerializer();
401  18 serializer.getDomConfig().setParameter("xml-declaration", withXmlDeclaration);
402  18 serializer.setNewLine("\n");
403  18 String encoding = "UTF-8";
404  18 if (node instanceof Document) {
405  15 encoding = ((Document) node).getXmlEncoding();
406  3 } else if (node.getOwnerDocument() != null) {
407  3 encoding = node.getOwnerDocument().getXmlEncoding();
408    }
409  18 output.setEncoding(encoding);
410  18 serializer.write(node, output);
411  18 return result.toString();
412    } catch (Exception ex) {
413  0 LOGGER.warn("Failed to serialize node to XML String: [{}]", ex.getMessage());
414  0 return "";
415    }
416    }
417   
418    /**
419    * Apply an XSLT transformation to a Document.
420    *
421    * @param xml the document to transform
422    * @param xslt the stylesheet to apply
423    * @return the transformation result, or {@code null} if an error occurs or {@code null} xml or xslt input
424    */
 
425  9 toggle public static String transform(Source xml, Source xslt)
426    {
427  9 if (xml != null && xslt != null) {
428  6 try {
429  6 StringWriter output = new StringWriter();
430  6 Result result = new StreamResult(output);
431  6 javax.xml.transform.TransformerFactory.newInstance().newTransformer(xslt).transform(xml, result);
432  5 return output.toString();
433    } catch (Exception ex) {
434  1 LOGGER.warn("Failed to apply XSLT transformation: [{}]", ex.getMessage());
435    }
436    }
437  4 return null;
438    }
439   
440    /**
441    * Parse and pretty print a XML content.
442    *
443    * @param content the XML content to format
444    * @return the formated version of the passed XML content
445    * @throws TransformerFactoryConfigurationError when failing to create a
446    * {@link TransformerFactoryConfigurationError}
447    * @throws TransformerException when failing to transform the content
448    * @since 5.2M1
449    */
 
450  462 toggle public static String formatXMLContent(String content) throws TransformerFactoryConfigurationError,
451    TransformerException
452    {
453  462 Transformer transformer = TransformerFactory.newInstance().newTransformer();
454  462 transformer.setOutputProperty(OutputKeys.INDENT, "yes");
455  462 transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
456   
457  462 StreamResult result = new StreamResult(new StringWriter());
458   
459    // Use a SAX Source instead of a StreamSource so that we can control the XMLReader used and set up one that
460    // doesn't resolve entities (and thus doesn't go out on the internet to fetch DTDs!).
461  462 SAXSource source = new SAXSource(new InputSource(new StringReader(content)));
462  462 try {
463  462 XMLReader reader = org.xml.sax.helpers.XMLReaderFactory.createXMLReader();
464  462 reader.setEntityResolver(new org.xml.sax.EntityResolver() {
 
465  16 toggle @Override
466    public InputSource resolveEntity(String publicId, String systemId)
467    throws SAXException, IOException
468    {
469    // Return an empty resolved entity. Note that we don't return null since this would tell the reader
470    // to go on the internet to fetch the DTD.
471  16 return new InputSource(new StringReader(""));
472    }
473    });
474  462 source.setXMLReader(reader);
475    } catch (Exception e) {
476  0 throw new TransformerException(String.format(
477    "Failed to create XML Reader while pretty-printing content [%s]", content), e);
478    }
479   
480  462 transformer.transform(source, result);
481   
482  462 return result.getWriter().toString();
483    }
484    }