1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
package org.xwiki.officeimporter.internal.builder; |
21 |
|
|
22 |
|
import java.io.ByteArrayInputStream; |
23 |
|
import java.io.IOException; |
24 |
|
import java.io.InputStream; |
25 |
|
import java.io.InputStreamReader; |
26 |
|
import java.io.Reader; |
27 |
|
import java.nio.charset.Charset; |
28 |
|
import java.util.HashMap; |
29 |
|
import java.util.Map; |
30 |
|
|
31 |
|
import javax.inject.Inject; |
32 |
|
import javax.inject.Named; |
33 |
|
import javax.inject.Singleton; |
34 |
|
|
35 |
|
import org.apache.commons.lang3.StringUtils; |
36 |
|
import org.apache.tika.parser.html.HtmlEncodingDetector; |
37 |
|
import org.w3c.dom.Document; |
38 |
|
import org.xwiki.component.annotation.Component; |
39 |
|
import org.xwiki.model.reference.DocumentReference; |
40 |
|
import org.xwiki.model.reference.EntityReferenceSerializer; |
41 |
|
import org.xwiki.officeimporter.OfficeImporterException; |
42 |
|
import org.xwiki.officeimporter.builder.XHTMLOfficeDocumentBuilder; |
43 |
|
import org.xwiki.officeimporter.converter.OfficeConverterException; |
44 |
|
import org.xwiki.officeimporter.document.XHTMLOfficeDocument; |
45 |
|
import org.xwiki.officeimporter.server.OfficeServer; |
46 |
|
import org.xwiki.xml.html.HTMLCleaner; |
47 |
|
import org.xwiki.xml.html.HTMLCleanerConfiguration; |
48 |
|
|
49 |
|
|
50 |
|
@link |
51 |
|
|
52 |
|
@version |
53 |
|
@since |
54 |
|
|
55 |
|
@Component |
56 |
|
@Singleton |
|
|
| 86.1% |
Uncovered Elements: 5 (36) |
Complexity: 7 |
Complexity Density: 0.25 |
|
57 |
|
public class DefaultXHTMLOfficeDocumentBuilder implements XHTMLOfficeDocumentBuilder |
58 |
|
{ |
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
@Inject |
63 |
|
private EntityReferenceSerializer<String> entityReferenceSerializer; |
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
@Inject |
69 |
|
private OfficeServer officeServer; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
@Inject |
75 |
|
@Named("openoffice") |
76 |
|
private HTMLCleaner officeHtmlCleaner; |
77 |
|
|
78 |
|
|
79 |
|
|
80 |
|
|
81 |
|
private HtmlEncodingDetector htmlEncodingDetector = new HtmlEncodingDetector(); |
82 |
|
|
|
|
| 84% |
Uncovered Elements: 4 (25) |
Complexity: 4 |
Complexity Density: 0.19 |
|
83 |
2 |
@Override... |
84 |
|
public XHTMLOfficeDocument build(InputStream officeFileStream, String officeFileName, DocumentReference reference, |
85 |
|
boolean filterStyles) throws OfficeImporterException |
86 |
|
{ |
87 |
|
|
88 |
2 |
Map<String, InputStream> inputStreams = new HashMap<String, InputStream>(); |
89 |
2 |
inputStreams.put(officeFileName, officeFileStream); |
90 |
2 |
Map<String, byte[]> artifacts; |
91 |
|
|
92 |
2 |
String outputFileName = StringUtils.substringBeforeLast(officeFileName, ".") + ".html"; |
93 |
2 |
try { |
94 |
2 |
artifacts = this.officeServer.getConverter().convert(inputStreams, officeFileName, outputFileName); |
95 |
|
} catch (OfficeConverterException ex) { |
96 |
0 |
String message = "Error while converting document [%s] into html."; |
97 |
0 |
throw new OfficeImporterException(String.format(message, officeFileName), ex); |
98 |
|
} |
99 |
|
|
100 |
|
|
101 |
2 |
Map<String, String> params = new HashMap<String, String>(); |
102 |
2 |
params.put("targetDocument", this.entityReferenceSerializer.serialize(reference)); |
103 |
|
|
104 |
|
|
105 |
2 |
params.put("attachEmbeddedImages", "true"); |
106 |
2 |
if (filterStyles) { |
107 |
2 |
params.put("filterStyles", "strict"); |
108 |
|
} |
109 |
|
|
110 |
|
|
111 |
2 |
HTMLCleanerConfiguration configuration = this.officeHtmlCleaner.getDefaultConfiguration(); |
112 |
2 |
configuration.setParameters(params); |
113 |
2 |
Reader html = getReader(artifacts.remove(outputFileName)); |
114 |
2 |
Document xhtmlDoc = this.officeHtmlCleaner.clean(html, configuration); |
115 |
|
|
116 |
2 |
@SuppressWarnings("unchecked") |
117 |
|
Map<String, byte[]> embeddedImages = (Map<String, byte[]>) xhtmlDoc.getUserData("embeddedImages"); |
118 |
2 |
if (embeddedImages != null) { |
119 |
2 |
artifacts.putAll(embeddedImages); |
120 |
|
} |
121 |
|
|
122 |
|
|
123 |
2 |
return new XHTMLOfficeDocument(xhtmlDoc, artifacts); |
124 |
|
} |
125 |
|
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
@param |
130 |
|
@return |
131 |
|
|
|
|
| 88.9% |
Uncovered Elements: 1 (9) |
Complexity: 3 |
Complexity Density: 0.43 |
|
132 |
2 |
private Reader getReader(byte[] html)... |
133 |
|
{ |
134 |
2 |
InputStream htmlInputStream = new ByteArrayInputStream(html); |
135 |
2 |
Charset charset = null; |
136 |
2 |
try { |
137 |
2 |
charset = htmlEncodingDetector.detect(htmlInputStream, null); |
138 |
|
} catch (IOException e) { |
139 |
|
|
140 |
|
} |
141 |
2 |
if (charset == null) { |
142 |
2 |
charset = Charset.forName("UTF-8"); |
143 |
|
} |
144 |
2 |
return new InputStreamReader(htmlInputStream, charset); |
145 |
|
} |
146 |
|
} |