1. Project Clover database Tue Dec 20 2016 21:24:09 CET
  2. Package org.xwiki.officeimporter.internal.builder

File DefaultPresentationBuilder.java

 

Coverage histogram

../../../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

2
42
5
1
248
123
9
0.21
8.4
5
1.8

Classes

Class Line # Actions
DefaultPresentationBuilder 65 42 0% 9 3
0.9387755493.9%
 

Contributing tests

This file is covered by 1 test. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.officeimporter.internal.builder;
21   
22    import java.io.InputStream;
23    import java.io.StringReader;
24    import java.io.UnsupportedEncodingException;
25    import java.net.URLEncoder;
26    import java.util.Collections;
27    import java.util.HashMap;
28    import java.util.Map;
29   
30    import javax.inject.Inject;
31    import javax.inject.Named;
32    import javax.inject.Provider;
33    import javax.inject.Singleton;
34   
35    import org.apache.commons.lang3.StringUtils;
36    import org.w3c.dom.Document;
37    import org.xwiki.bridge.DocumentAccessBridge;
38    import org.xwiki.component.annotation.Component;
39    import org.xwiki.component.manager.ComponentManager;
40    import org.xwiki.model.reference.DocumentReference;
41    import org.xwiki.model.reference.EntityReferenceSerializer;
42    import org.xwiki.officeimporter.OfficeImporterException;
43    import org.xwiki.officeimporter.builder.PresentationBuilder;
44    import org.xwiki.officeimporter.converter.OfficeConverterException;
45    import org.xwiki.officeimporter.document.XDOMOfficeDocument;
46    import org.xwiki.officeimporter.server.OfficeServer;
47    import org.xwiki.rendering.block.Block;
48    import org.xwiki.rendering.block.ExpandedMacroBlock;
49    import org.xwiki.rendering.block.XDOM;
50    import org.xwiki.rendering.listener.MetaData;
51    import org.xwiki.rendering.parser.Parser;
52    import org.xwiki.rendering.renderer.BlockRenderer;
53    import org.xwiki.xml.html.HTMLCleaner;
54    import org.xwiki.xml.html.HTMLCleanerConfiguration;
55    import org.xwiki.xml.html.HTMLUtils;
56   
57    /**
58    * Default implementation of {@link PresentationBuilder}.
59    *
60    * @version $Id: df9806d2a7ed07446dab63f6c1ae6b020f8ae713 $
61    * @since 2.1M1
62    */
63    @Component
64    @Singleton
 
65    public class DefaultPresentationBuilder implements PresentationBuilder
66    {
67    /**
68    * Provides the component manager used by {@link XDOMOfficeDocument}.
69    */
70    @Inject
71    @Named("context")
72    private Provider<ComponentManager> contextComponentManagerProvider;
73   
74    /**
75    * Used to obtain document converter.
76    */
77    @Inject
78    private OfficeServer officeServer;
79   
80    /**
81    * Used to access current context document.
82    */
83    @Inject
84    private DocumentAccessBridge documentAccessBridge;
85   
86    /**
87    * Used to serialize the reference document name.
88    */
89    @Inject
90    private EntityReferenceSerializer<String> entityReferenceSerializer;
91   
92    /**
93    * Office HTML cleaner.
94    */
95    @Inject
96    @Named("openoffice")
97    private HTMLCleaner officeHTMLCleaner;
98   
99    /**
100    * The component used to parse the XHTML obtained after cleaning.
101    */
102    @Inject
103    @Named("xhtml/1.0")
104    private Parser xhtmlParser;
105   
 
106  1 toggle @Override
107    public XDOMOfficeDocument build(InputStream officeFileStream, String officeFileName,
108    DocumentReference documentReference) throws OfficeImporterException
109    {
110    // Invoke the office document converter.
111  1 Map<String, byte[]> artifacts = importPresentation(officeFileStream, officeFileName);
112   
113    // Create presentation HTML.
114  1 String html = buildPresentationHTML(artifacts, StringUtils.substringBeforeLast(officeFileName, "."));
115   
116    // Clear and adjust presentation HTML (slide image URLs are updated to point to the corresponding attachments).
117  1 html = cleanPresentationHTML(html, documentReference);
118   
119    // Create the XDOM.
120  1 XDOM xdom = buildPresentationXDOM(html, documentReference);
121   
122  1 return new XDOMOfficeDocument(xdom, artifacts, this.contextComponentManagerProvider.get());
123    }
124   
125    /**
126    * Invokes the Office Server to convert the given input stream. The result is a map of artifacts including slide
127    * images.
128    *
129    * @param officeFileStream the office presentation byte stream
130    * @param officeFileName the name of the office presentation that is being imported
131    * @return the map of artifacts created by the Office Server
132    * @throws OfficeImporterException if converting the office presentation fails
133    */
 
134  1 toggle protected Map<String, byte[]> importPresentation(InputStream officeFileStream, String officeFileName)
135    throws OfficeImporterException
136    {
137  1 Map<String, InputStream> inputStreams = new HashMap<String, InputStream>();
138  1 inputStreams.put(officeFileName, officeFileStream);
139  1 try {
140    // The office converter uses the output file name extension to determine the output format/syntax.
141    // The returned artifacts are of three types: imgX.jpg (slide screen shot), imgX.html (HTML page that
142    // display the corresponding slide screen shot) and textX.html (HTML page that display the text extracted
143    // from the corresponding slide). We use "img0.html" as the output file name because the corresponding
144    // artifact displays a screen shot of the first presentation slide.
145  1 return this.officeServer.getConverter().convert(inputStreams, officeFileName, "img0.html");
146    } catch (OfficeConverterException e) {
147  0 String message = "Error while converting document [%s] into html.";
148  0 throw new OfficeImporterException(String.format(message, officeFileName), e);
149    }
150    }
151   
152    /**
153    * Builds the presentation HTML from the presentation artifacts. There are two types of presentation artifacts:
154    * slide image and slide text. The returned HTML will display all the slide images. Slide text is currently ignored.
155    * All artifacts except slide images are removed from {@code presentationArtifacts}. Slide images names are prefixed
156    * with the given {@code nameSpace} to avoid name conflicts.
157    *
158    * @param presentationArtifacts the map of presentation artifacts; this method removes some of the presentation
159    * artifacts and renames others so be aware of the side effects
160    * @param nameSpace the prefix to add in front of all slide image names to prevent name conflicts
161    * @return the presentation HTML
162    */
 
163  1 toggle protected String buildPresentationHTML(Map<String, byte[]> presentationArtifacts, String nameSpace)
164    {
165  1 StringBuilder presentationHTML = new StringBuilder();
166   
167    // Iterate all the slides.
168  1 int i = 0;
169  1 String slideImageKeyFormat = "img%s.jpg";
170  1 byte[] slideImage = presentationArtifacts.remove(String.format(slideImageKeyFormat, i));
171  3 while (slideImage != null) {
172    // Remove unused artifacts.
173    // imgX.html is an HTML page that displays the corresponding slide image.
174  2 presentationArtifacts.remove(String.format("img%s.html", i));
175    // textX.html is an HTML page that displays the text extracted from the corresponding slide.
176  2 presentationArtifacts.remove(String.format("text%s.html", i));
177   
178    // Rename slide image to prevent name conflicts when it will be attached to the target document.
179  2 String slideImageName = String.format("%s-slide%s.jpg", nameSpace, i);
180  2 presentationArtifacts.put(slideImageName, slideImage);
181   
182    // Append slide image to the presentation HTML.
183  2 String slideImageURL = null;
184  2 try {
185    // We need to encode the slide image name in case it contains special URL characters.
186  2 slideImageURL = URLEncoder.encode(slideImageName, "UTF-8");
187    } catch (UnsupportedEncodingException e) {
188    // This should never happen.
189    }
190  2 presentationHTML.append(String.format("<p><img src=\"%s\"/></p>", slideImageURL));
191   
192    // Move to the next slide.
193  2 slideImage = presentationArtifacts.remove(String.format(slideImageKeyFormat, ++i));
194    }
195   
196  1 return presentationHTML.toString();
197    }
198   
199    /**
200    * Cleans the presentation HTML. This method must be called mainly to ensure that the slide image URLs are updated
201    * to point to the corresponding attachments.
202    *
203    * @param dirtyHTML the HTML to be cleaned
204    * @param targetDocumentReference the document where the slide images will be attached
205    * @return the cleaned HTML
206    */
 
207  1 toggle protected String cleanPresentationHTML(String dirtyHTML, DocumentReference targetDocumentReference)
208    {
209  1 HTMLCleanerConfiguration configuration = this.officeHTMLCleaner.getDefaultConfiguration();
210  1 configuration.setParameters(Collections.singletonMap("targetDocument",
211    this.entityReferenceSerializer.serialize(targetDocumentReference)));
212  1 Document xhtmlDocument = this.officeHTMLCleaner.clean(new StringReader(dirtyHTML), configuration);
213  1 HTMLUtils.stripHTMLEnvelope(xhtmlDocument);
214  1 return HTMLUtils.toString(xhtmlDocument);
215    }
216   
217    /**
218    * Parses the given HTML text into an XDOM tree.
219    *
220    * @param html the HTML text to parse
221    * @param targetDocumentReference specifies the document where the presentation will be imported; we use the target
222    * document reference to get the syntax of the target document and to set the {@code BASE} meta data on
223    * the created XDOM
224    * @return a XDOM tree
225    * @throws OfficeImporterException if parsing the given HTML fails
226    */
 
227  1 toggle protected XDOM buildPresentationXDOM(String html, DocumentReference targetDocumentReference)
228    throws OfficeImporterException
229    {
230  1 try {
231  1 ComponentManager contextComponentManager = this.contextComponentManagerProvider.get();
232  1 String syntaxId = this.documentAccessBridge.getDocument(targetDocumentReference).getSyntax().toIdString();
233  1 BlockRenderer renderer = contextComponentManager.getInstance(BlockRenderer.class, syntaxId);
234   
235  1 Map<String, String> galleryParameters = Collections.emptyMap();
236  1 ExpandedMacroBlock gallery =
237    new ExpandedMacroBlock("gallery", galleryParameters, renderer, false, contextComponentManager);
238  1 gallery.addChild(this.xhtmlParser.parse(new StringReader(html)));
239   
240  1 XDOM xdom = new XDOM(Collections.singletonList((Block) gallery));
241    // Make sure (image) references are resolved relative to the target document reference.
242  1 xdom.getMetaData().addMetaData(MetaData.BASE, entityReferenceSerializer.serialize(targetDocumentReference));
243  1 return xdom;
244    } catch (Exception e) {
245  0 throw new OfficeImporterException("Failed to build presentation XDOM.", e);
246    }
247    }
248    }