Skip to content

Commit 62bbc58

Browse files
committed
v1.2 released: use models' absolute paths; avoid using System.getProperty("user.dir")
1 parent 2879cd1 commit 62bbc58

File tree

11 files changed

+25
-16
lines changed

11 files changed

+25
-16
lines changed

Readme.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ If you are looking for light-weight versions, VnCoreNLP's word segmentation and
2121
## Installation <a name="install"></a>
2222

2323
- `Java 1.8+` (Prerequisite)
24-
- File `VnCoreNLP-1.1.1.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
24+
- File `VnCoreNLP-1.2.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
2525
- `Python 3.6+` if using [a Python wrapper of VnCoreNLP](https://github.com/thelinhbkhn2014/VnCoreNLP_Wrapper). To install this wrapper, users have to run the following command:
2626

2727
`$ pip3 install py_vncorenlp`
@@ -38,7 +38,7 @@ import py_vncorenlp
3838
# and save them in some local working folder
3939
py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')
4040

41-
# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.1.1.jar` and `models`
41+
# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.2.jar` and `models`
4242
model = py_vncorenlp.VnCoreNLP(save_dir='/absolute/path/to/vncorenlp')
4343
# Equivalent to: model = py_vncorenlp.VnCoreNLP(annotators=["wseg", "pos", "ner", "parse"], save_dir='/absolute/path/to/vncorenlp')
4444

@@ -80,13 +80,13 @@ print(output)
8080
You can run VnCoreNLP to annotate an input raw text corpus (e.g. a collection of news content) by using following commands:
8181

8282
// To perform word segmentation, POS tagging, NER and then dependency parsing
83-
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt
83+
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt
8484
// To perform word segmentation, POS tagging and then NER
85-
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
85+
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
8686
// To perform word segmentation and then POS tagging
87-
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos
87+
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos
8888
// To perform word segmentation
89-
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg
89+
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg
9090

9191

9292
### Using VnCoreNLP from the API

VnCoreNLP-1.2.jar

26.1 MB
Binary file not shown.

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>VnCoreNLP</groupId>
88
<artifactId>VnCoreNLP</artifactId>
9-
<version>1.1.1</version>
9+
<version>1.2</version>
1010
<build>
1111
<plugins>
1212
<plugin>

src/main/java/vn/corenlp/ner/NerRecognizer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import vn.corenlp.wordsegmenter.Vocabulary;
1212
import vn.pipeline.LexicalInitializer;
1313
import vn.pipeline.Word;
14+
import vn.pipeline.Utils;
1415

1516
import java.io.File;
1617
import java.io.IOException;
@@ -34,7 +35,7 @@ public NerRecognizer() throws IOException{
3435
nlpDecoder = new NLPDecoder();
3536
List<NLPComponent<NLPNode>> components = new ArrayList();
3637

37-
String modelPath = System.getProperty("user.dir") + "/models/ner/vi-ner.xz";
38+
String modelPath = Utils.jarDir + "/models/ner/vi-ner.xz";
3839
if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!");
3940
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
4041
if(lexica != null) {

src/main/java/vn/corenlp/parser/DependencyParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.apache.log4j.Logger;
1111
import vn.pipeline.LexicalInitializer;
1212
import vn.pipeline.Word;
13+
import vn.pipeline.Utils;
1314

1415
import java.io.File;
1516
import java.io.IOException;
@@ -32,7 +33,7 @@ public DependencyParser() throws IOException {
3233
nlpDecoder = new NLPDecoder();
3334
List<NLPComponent<NLPNode>> components = new ArrayList();
3435

35-
String modelPath = System.getProperty("user.dir") + "/models/dep/vi-dep.xz";
36+
String modelPath = Utils.jarDir + "/models/dep/vi-dep.xz";
3637
if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!");
3738
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
3839
if(lexica != null) {

src/main/java/vn/corenlp/postagger/PosTagger.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
import java.util.LinkedList;
1414
import java.util.List;
1515

16+
import vn.pipeline.Utils;
17+
1618
public class PosTagger {
1719
private static PosTagger posTagger = null;
1820
private MorphTagger tagger;
1921
public final static Logger LOGGER = Logger.getLogger(PosTagger.class);
2022
public PosTagger() throws IOException {
2123
LOGGER.info("Loading POS Tagging model");
22-
String modelPath = System.getProperty("user.dir") + "/models/postagger/vi-tagger";
24+
String modelPath = Utils.jarDir + "/models/postagger/vi-tagger";
2325
if (!new File(modelPath).exists()) throw new IOException("PosTagger: " + modelPath + " is not found!");
2426
tagger = FileUtils.loadFromFile(modelPath);
2527

src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
import java.util.HashSet;
88
import java.util.Set;
99

10+
import vn.pipeline.Utils;
11+
1012
@SuppressWarnings("unchecked")
1113
public class Vocabulary {
1214
public static Set<String> VN_DICT;
1315
static {
1416
VN_DICT = new HashSet<String>();
1517
try {
16-
String vocabPath = System.getProperty("user.dir") + "/models/wordsegmenter/vi-vocab";
18+
String vocabPath = Utils.jarDir + "/models/wordsegmenter/vi-vocab";
1719
if (!new File(vocabPath).exists())
1820
throw new IOException("Vocabulary: " + vocabPath + " is not found!");
1921
//Vocabulary.class.getClassLoader().getResource("wordsegmenter/vi-vocab").getPath()

src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public class WordSegmenter {
2222
public WordSegmenter()
2323
throws IOException {
2424
LOGGER.info("Loading Word Segmentation model");
25-
String modelPath = System.getProperty("user.dir") + "/models/wordsegmenter/wordsegmenter.rdr";
25+
String modelPath = vn.pipeline.Utils.jarDir + "/models/wordsegmenter/wordsegmenter.rdr";
2626
if (!new File(modelPath).exists())
2727
throw new IOException("WordSegmenter: " + modelPath + " is not found!");
2828

src/main/java/vn/pipeline/LexicalInitializer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ public LexicalInitializer(boolean initLexica) throws IOException {
2727
this.initLexica = initLexica;
2828
this.lexicalMap = new HashMap<>();
2929

30-
String lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-500brownclusters.xz";
30+
String lexicalPath = Utils.jarDir + "/models/ner/vi-500brownclusters.xz";
3131
if (!new File(lexicalPath).exists())
3232
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
3333
lexicalMap.put("word_clusters", lexicalPath);
3434

35-
lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-pretrainedembeddings.xz";
35+
lexicalPath = Utils.jarDir + "/models/ner/vi-pretrainedembeddings.xz";
3636
if (!new File(lexicalPath).exists())
3737
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
3838
lexicalMap.put("word_embeddings", lexicalPath);

src/main/java/vn/pipeline/Utils.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
import com.optimaize.langdetect.ngram.NgramExtractors;
77
import com.optimaize.langdetect.profiles.LanguageProfileReader;
88

9+
import java.io.File;
910
import java.io.IOException;
1011
import java.util.List;
1112

1213
public class Utils {
14+
private static File jarFile = new File(VnCoreNLP.class.getProtectionDomain().getCodeSource().getLocation().getPath());
15+
public static String jarDir = jarFile.getParentFile().getPath();
16+
1317
private static LanguageDetector languageDetector = null;
1418
public static String detectLanguage(String text) throws IOException{
1519
if(languageDetector == null) {

0 commit comments

Comments
 (0)