@@ -53,25 +53,26 @@ public class TikaDocumentItemProcessor extends TLItemProcessor implements ItemPr
5353
5454 private static final org .slf4j .Logger LOG = LoggerFactory .getLogger (TikaDocumentItemProcessor .class );
5555
56- // whether shall we parse all the document data into XHTML format
57- @ Value ("${tika.keepTags}" )
58- private boolean keepTags ;
59-
6056 // the name of the field in the resulting JSON file that will hold the content of the parsed document
6157 @ Value ("${tika.tikaFieldName}" )
6258 String tikaFieldName ;
6359
64- // configuration of Tika module plus individual parsers
65- private TikaConfig tikaConfig ;
6660
67- @ Value ("${tika.tesseract.timeout:#{null}}" )
61+ // whether shall we parse all the document data into XHTML format
62+ @ Value ("${tika.keepTags:false}" )
63+ private boolean keepTags ;
64+
65+ @ Value ("${tika.tesseract.timeout:120}" )
6866 Integer tesseractTimeout ;
6967 private TesseractOCRConfig tesseractConfig ;
7068
71- @ Value ("${tika.convert.timeout:#{null} }" )
69+ @ Value ("${tika.convert.timeout:120 }" )
7270 Integer convertTimeout ;
73- private ImageMagickConfig imgConfig ;
7471
72+
73+ // configuration of Tika module plus individual parsers
74+ private TikaConfig tikaConfig ;
75+ private ImageMagickConfig imgConfig ;
7576 private AutoDetectParser parser ;
7677
7778
@@ -88,13 +89,13 @@ public void init() throws IOException, SAXException, TikaException{
8889
8990 // load tesseract ocr configuration
9091 tesseractConfig = new TesseractOCRConfig ();
91- if (tesseractTimeout != null && tesseractTimeout > 0 ) {
92+ if (tesseractTimeout > 0 ) {
9293 tesseractConfig .setTimeout (tesseractTimeout );
9394 }
9495
9596 // load image magick configuration -- used for tiff conversion
9697 imgConfig = new ImageMagickConfig ();
97- if (convertTimeout != null && convertTimeout > 0 ) {
98+ if (convertTimeout > 0 ) {
9899 imgConfig .setTimeout (convertTimeout );
99100 }
100101
0 commit comments