1. Project Clover database Sat Feb 2 2019 06:45:20 CET
  2. Package org.xwiki.tika.internal

File TikaUtils.java

 

Coverage histogram

../../../../img/srcFileCovDistChart7.png
66% of files have more coverage

Code metrics

2
28
14
1
228
104
21
0.75
2
14
1.5

Classes

Class Line # Actions
TikaUtils 45 28 0% 21 17
0.613636461.4%
 

Contributing tests

This file is covered by 42 tests. .

Source view

1    /*
2    * See the NOTICE file distributed with this work for additional
3    * information regarding copyright ownership.
4    *
5    * This is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU Lesser General Public License as
7    * published by the Free Software Foundation; either version 2.1 of
8    * the License, or (at your option) any later version.
9    *
10    * This software is distributed in the hope that it will be useful,
11    * but WITHOUT ANY WARRANTY; without even the implied warranty of
12    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13    * Lesser General Public License for more details.
14    *
15    * You should have received a copy of the GNU Lesser General Public
16    * License along with this software; if not, write to the Free
17    * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18    * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
19    */
20    package org.xwiki.tika.internal;
21   
22    import java.io.BufferedInputStream;
23    import java.io.File;
24    import java.io.IOException;
25    import java.io.InputStream;
26    import java.net.URL;
27    import java.nio.file.Path;
28   
29    import org.apache.commons.io.input.AutoCloseInputStream;
30    import org.apache.commons.lang3.exception.ExceptionUtils;
31    import org.apache.tika.Tika;
32    import org.apache.tika.config.TikaConfig;
33    import org.apache.tika.exception.TikaException;
34    import org.apache.tika.exception.ZeroByteFileException;
35    import org.apache.tika.metadata.Metadata;
36    import org.slf4j.Logger;
37    import org.slf4j.LoggerFactory;
38   
39    /**
40    * Provide a pre-configured {@link Tika} instance.
41    *
42    * @version $Id: 63a535dbb1455a7679e5c7f4b494053c720bef38 $
43    * @since 10.1RC1
44    */
 
45    public final class TikaUtils
46    {
47    protected static final Logger LOGGER = LoggerFactory.getLogger(TikaUtils.class);
48   
49    private static Tika tika;
50   
 
51  44 toggle static {
52  44 try {
53  44 tika = new Tika(new TikaConfig(TikaUtils.class.getResource("/tika-config.xml")));
54    } catch (Exception e) {
55  0 LOGGER.warn("Failed to load tika configuration (default configuration will be used): {}",
56    ExceptionUtils.getRootCauseMessage(e));
57   
58  0 tika = new Tika();
59    }
60    }
61   
 
62  0 toggle private TikaUtils()
63    {
64    // Utility class
65    }
66   
67    /**
68    * @return the shared {@link Tika} instance
69    */
 
70  0 toggle public static Tika getTika()
71    {
72  0 return tika;
73    }
74   
75    // TODO: Remove when https://issues.apache.org/jira/browse/IO-568 is fixed (AutoCloseInputStream does not properly
76    // support mark/reset)
 
77  6924 toggle private static InputStream safeInputStream(InputStream stream)
78    {
79  6927 if (stream instanceof AutoCloseInputStream) {
80  1509 return new BufferedInputStream(stream);
81    }
82   
83  5417 return stream;
84    }
85   
86    /**
87    * @see Tika#detect(File)
88    * @param file the file
89    * @return detected media type
90    * @throws IOException if the file can not be read
91    * @see #detect(Path)
92    */
 
93  8 toggle public static String detect(File file) throws IOException
94    {
95  8 return tika.detect(file);
96    }
97   
98    /**
99    * @see Tika#detect(Path)
100    * @param path the path of the file
101    * @return detected media type
102    * @throws IOException if the file can not be read
103    */
 
104  0 toggle public static String detect(Path path) throws IOException
105    {
106  0 return tika.detect(path);
107    }
108   
109    /**
110    * @see Tika#detect(InputStream, String)
111    * @param stream the document stream
112    * @param name document name
113    * @return detected media type
114    * @throws IOException if the stream can not be read
115    */
 
116  5405 toggle public static String detect(InputStream stream, String name) throws IOException
117    {
118  5404 return tika.detect(safeInputStream(stream), name);
119    }
120   
121    /**
122    * @see Tika#detect(InputStream)
123    * @param stream the document stream
124    * @return detected media type
125    * @throws IOException if the stream can not be read
126    */
 
127  19 toggle public static String detect(InputStream stream) throws IOException
128    {
129  19 return tika.detect(stream);
130    }
131   
132    /**
133    * @see Tika#detect(String)
134    * @param name the file name of the document
135    * @return detected media type
136    */
 
137  941 toggle public static String detect(String name)
138    {
139  941 return tika.detect(name);
140    }
141   
142    /**
143    * @see Tika#parseToString(InputStream, Metadata)
144    * @param stream the document to be parsed
145    * @param metadata document metadata
146    * @return extracted text content
147    * @throws IOException if the document can not be read
148    * @throws TikaException if the document can not be parsed
149    */
 
150  1518 toggle public static String parseToString(InputStream stream, Metadata metadata) throws IOException, TikaException
151    {
152  1518 try {
153  1518 return tika.parseToString(safeInputStream(stream), metadata);
154    } catch (ZeroByteFileException e) {
155    // How is empty file an issue ?
156  12 return "";
157    }
158    }
159   
160    /**
161    * @see Tika#parseToString(InputStream)
162    * @param stream the document to be parsed
163    * @return extracted text content
164    * @throws IOException if the document can not be read
165    * @throws TikaException if the document can not be parsed
166    */
 
167  1 toggle public static String parseToString(InputStream stream) throws IOException, TikaException
168    {
169  1 try {
170  1 return tika.parseToString(safeInputStream(stream));
171    } catch (ZeroByteFileException e) {
172    // How is empty file an issue ?
173  0 return "";
174    }
175    }
176   
177    /**
178    * @see Tika#parseToString(Path)
179    * @param path the path of the file to be parsed
180    * @return extracted text content
181    * @throws IOException if the file can not be read
182    * @throws TikaException if the file can not be parsed
183    */
 
184  0 toggle public static String parseToString(Path path) throws IOException, TikaException
185    {
186  0 try {
187  0 return tika.parseToString(path);
188    } catch (ZeroByteFileException e) {
189    // How is empty file an issue ?
190  0 return "";
191    }
192    }
193   
194    /**
195    * @see Tika#parseToString(File)
196    * @param file the file to be parsed
197    * @return extracted text content
198    * @throws IOException if the file can not be read
199    * @throws TikaException if the file can not be parsed
200    * @see #parseToString(Path)
201    */
 
202  0 toggle public static String parseToString(File file) throws IOException, TikaException
203    {
204  0 try {
205  0 return tika.parseToString(file);
206    } catch (ZeroByteFileException e) {
207    // How is empty file an issue ?
208  0 return "";
209    }
210    }
211   
212    /**
213    * @see Tika#parseToString(URL)
214    * @param url the URL of the resource to be parsed
215    * @return extracted text content
216    * @throws IOException if the resource can not be read
217    * @throws TikaException if the resource can not be parsed
218    */
 
219  1 toggle public static String parseToString(URL url) throws IOException, TikaException
220    {
221  1 try {
222  1 return tika.parseToString(url);
223    } catch (ZeroByteFileException e) {
224    // How is empty file an issue ?
225  0 return "";
226    }
227    }
228    }