1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.officeimporter.internal.filter

File ImageFilter.java

 

Coverage histogram

../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

18
45
3
1
190
118
16
0.36
15
3
5.33

Classes

Class Line # Actions
ImageFilter 74 45 0% 16 4
0.9393939493.9%
 

Contributing tests

This file is covered by 24 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.officeimporter.internal.filter;
21   
22    import java.net.URLDecoder;
23    import java.nio.charset.Charset;
24    import java.util.HashMap;
25    import java.util.List;
26    import java.util.Map;
27   
28    import javax.inject.Inject;
29    import javax.inject.Named;
30    import javax.inject.Singleton;
31   
32    import org.apache.commons.lang3.StringUtils;
33    import org.apache.commons.lang3.exception.ExceptionUtils;
34    import org.apache.tika.mime.MimeTypeException;
35    import org.apache.tika.mime.MimeTypes;
36    import org.slf4j.Logger;
37    import org.w3c.dom.Attr;
38    import org.w3c.dom.Comment;
39    import org.w3c.dom.Document;
40    import org.w3c.dom.Element;
41    import org.xwiki.bridge.DocumentAccessBridge;
42    import org.xwiki.component.annotation.Component;
43    import org.xwiki.model.reference.AttachmentReference;
44    import org.xwiki.model.reference.DocumentReference;
45    import org.xwiki.model.reference.DocumentReferenceResolver;
46    import org.xwiki.rendering.listener.reference.ResourceReference;
47    import org.xwiki.rendering.listener.reference.ResourceType;
48    import org.xwiki.rendering.renderer.reference.ResourceReferenceSerializer;
49    import org.xwiki.xml.XMLUtils;
50    import org.xwiki.xml.html.filter.AbstractHTMLFilter;
51   
52    import com.github.ooxi.jdatauri.DataUri;
53   
54    /**
55    * This filter performs the following transformations on the {@code <img>} tags:
56    * <ul>
57    * <li>Changes the image source to point to the attached file and adds the XHTML markers (comments) required in order to
58    * convert the XHTML to the right wiki syntax. For this you need to specify the "targetDocument" cleaning
59    * parameter.</li>
60    * <li>Collects the images embedded through the Data URI scheme when the "attachEmbeddedImages" cleaning parameter is
61    * set to true. The result can be accessed from the user data associated with the filtered document, under the
62    * "embeddedImages" key.</li>
63    * <li>Removes the "align" attribute as it can cause problems. First, the office server has a problem with center
64    * aligning images (it aligns them to left). Then, the office server uses {@code <br clear"xxx">} to avoid content
65    * wrapping around images which is not valid XHTML.</li>
66    * </ul>
67    *
68    * @version $Id: cddbdba11f4af7911c719bebd1bcec5783c66712 $
69    * @since 1.8M1
70    */
71    @Component
72    @Named("officeimporter/image")
73    @Singleton
 
74    public class ImageFilter extends AbstractHTMLFilter
75    {
76    private static final String UTF_8 = "UTF-8";
77   
78    private static final String EMBEDDED_IMAGES = "embeddedImages";
79   
80    @Inject
81    private Logger logger;
82   
83    /**
84    * The {@link DocumentAccessBridge} component.
85    */
86    @Inject
87    private DocumentAccessBridge documentAccessBridge;
88   
89    /**
90    * Used to serialize the image reference as XHTML comment.
91    */
92    @Inject
93    @Named("xhtmlmarker")
94    private ResourceReferenceSerializer xhtmlMarkerSerializer;
95   
96    /**
97    * The component used to parse string document references.
98    */
99    @Inject
100    @Named("currentmixed")
101    private DocumentReferenceResolver<String> stringDocumentReferenceResolver;
102   
 
103  71 toggle @Override
104    public void filter(Document htmlDocument, Map<String, String> cleaningParams)
105    {
106  71 String targetDocumentName = cleaningParams.get("targetDocument");
107  71 DocumentReference targetDocumentReference =
108  71 targetDocumentName == null ? null : this.stringDocumentReferenceResolver.resolve(targetDocumentName);
109   
110  71 boolean attachEmbeddedImages = Boolean.valueOf(cleaningParams.get("attachEmbeddedImages"));
111  71 if (attachEmbeddedImages) {
112  2 htmlDocument.setUserData(EMBEDDED_IMAGES, new HashMap<String, byte[]>(), null);
113    }
114   
115  71 List<Element> images = filterDescendants(htmlDocument.getDocumentElement(), new String[] {TAG_IMG});
116  71 for (Element image : images) {
117  6 Attr source = image.getAttributeNode(ATTRIBUTE_SRC);
118  6 if (source != null && targetDocumentReference != null) {
119  5 filterImageSource(source, targetDocumentReference);
120    }
121   
122    // The 'align' attribute of images creates a lot of problems. First,the office server has a problem with
123    // center aligning images (it aligns them to left). Next, the office server uses <br clear"xxx"> for
124    // avoiding content wrapping around images which is not valid XHTML. There for, to be consistent and simple
125    // we will remove the 'align' attribute of all the images so that they are all left aligned.
126  6 image.removeAttribute(ATTRIBUTE_ALIGN);
127    }
128    }
129   
 
130  5 toggle private void filterImageSource(Attr source, DocumentReference targetDocumentReference)
131    {
132  5 String fileName = null;
133  5 try {
134  5 fileName = getFileName(source);
135    } catch (Exception e) {
136  0 this.logger.warn("Failed to extract the image file name. Root cause is [{}]",
137    ExceptionUtils.getRootCauseMessage(e));
138  0 this.logger.debug("Full stacktrace is: ", e);
139    }
140  5 if (StringUtils.isEmpty(fileName)) {
141  2 return;
142    }
143   
144    // Set image source attribute relative to the reference document.
145  3 AttachmentReference attachmentReference = new AttachmentReference(fileName, targetDocumentReference);
146  3 source.setValue(this.documentAccessBridge.getAttachmentURL(attachmentReference, false));
147   
148  3 ResourceReference imageReference = new ResourceReference(fileName, ResourceType.ATTACHMENT);
149  3 imageReference.setTyped(false);
150  3 Comment beforeComment = source.getOwnerDocument().createComment(
151    XMLUtils.escapeXMLComment("startimage:" + this.xhtmlMarkerSerializer.serialize(imageReference)));
152  3 Comment afterComment = source.getOwnerDocument().createComment("stopimage");
153  3 Element image = source.getOwnerElement();
154  3 image.getParentNode().insertBefore(beforeComment, image);
155  3 image.getParentNode().insertBefore(afterComment, image.getNextSibling());
156    }
157   
 
158  5 toggle private String getFileName(Attr source) throws MimeTypeException
159    {
160  5 String value = source.getValue();
161  5 String fileName = null;
162  5 @SuppressWarnings("unchecked")
163    Map<String, byte[]> embeddedImages =
164    (Map<String, byte[]>) source.getOwnerDocument().getUserData(EMBEDDED_IMAGES);
165  5 if (embeddedImages != null && value.startsWith("data:")) {
166    // An image embedded using the Data URI scheme.
167  2 DataUri dataURI = DataUri.parse(value, Charset.forName(UTF_8));
168  2 fileName = dataURI.getFilename();
169  2 if (StringUtils.isEmpty(fileName)) {
170  1 fileName = String.valueOf(Math.abs(dataURI.hashCode()));
171  1 if (!StringUtils.isEmpty(dataURI.getMime())) {
172  1 String extension = MimeTypes.getDefaultMimeTypes().forName(dataURI.getMime()).getExtension();
173  1 fileName += extension;
174    }
175    }
176  2 embeddedImages.put(fileName, dataURI.getData());
177  3 } else if (!value.contains("://")) {
178    // A relative path.
179  1 int separator = value.lastIndexOf('/');
180  1 fileName = separator < 0 ? value : value.substring(separator + 1);
181  1 try {
182    // We have to decode the image file name in case it contains URL special characters.
183  1 fileName = URLDecoder.decode(fileName, UTF_8);
184    } catch (Exception e) {
185    // This shouldn't happen. Use the encoded image file name.
186    }
187    }
188  5 return fileName;
189    }
190    }