Skip to content
This repository was archived by the owner on Jan 13, 2023. It is now read-only.

Commit 381009f

Browse files
committed
set default properties for tika
1 parent ab56dd6 commit 381009f

File tree

1 file changed

+12
-11
lines changed

1 file changed

+12
-11
lines changed

src/main/java/uk/ac/kcl/itemProcessors/TikaDocumentItemProcessor.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,26 @@ public class TikaDocumentItemProcessor extends TLItemProcessor implements ItemPr
5353

5454
private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(TikaDocumentItemProcessor.class);
5555

56-
// whether shall we parse all the document data into XHTML format
57-
@Value("${tika.keepTags}")
58-
private boolean keepTags;
59-
6056
// the name of the field in the resulting JSON file that will hold the content of the parsed document
6157
@Value("${tika.tikaFieldName}")
6258
String tikaFieldName;
6359

64-
// configuration of Tika module plus individual parsers
65-
private TikaConfig tikaConfig;
6660

67-
@Value("${tika.tesseract.timeout:#{null}}")
61+
// whether shall we parse all the document data into XHTML format
62+
@Value("${tika.keepTags:false}")
63+
private boolean keepTags;
64+
65+
@Value("${tika.tesseract.timeout:120}")
6866
Integer tesseractTimeout;
6967
private TesseractOCRConfig tesseractConfig;
7068

71-
@Value("${tika.convert.timeout:#{null}}")
69+
@Value("${tika.convert.timeout:120}")
7270
Integer convertTimeout;
73-
private ImageMagickConfig imgConfig;
7471

72+
73+
// configuration of Tika module plus individual parsers
74+
private TikaConfig tikaConfig;
75+
private ImageMagickConfig imgConfig;
7576
private AutoDetectParser parser;
7677

7778

@@ -88,13 +89,13 @@ public void init() throws IOException, SAXException, TikaException{
8889

8990
// load tesseract ocr configuration
9091
tesseractConfig = new TesseractOCRConfig();
91-
if (tesseractTimeout != null && tesseractTimeout > 0) {
92+
if (tesseractTimeout > 0) {
9293
tesseractConfig.setTimeout(tesseractTimeout);
9394
}
9495

9596
// load image magick configuration -- used for tiff conversion
9697
imgConfig = new ImageMagickConfig();
97-
if (convertTimeout != null && convertTimeout > 0) {
98+
if (convertTimeout > 0) {
9899
imgConfig.setTimeout(convertTimeout);
99100
}
100101

0 commit comments

Comments
 (0)