Skip to content

Commit 5444926

Browse files
committed
avoid reading training data twice
1 parent 84ac019 commit 5444926

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

ml_datasets/spacy_readers/dbpedia_reader.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@ def dbpedia_reader(
1616
assert isinstance(gold_label, str)
1717
unique_labels.add(gold_label)
1818
# do this here to avoid reading the data multiple times
19-
data = list(dbpedia(train, path, limit=limit))
19+
if train:
20+
data = all_train_data
21+
if limit >= 1:
22+
data = data[:limit]
23+
else:
24+
data = list(dbpedia(train, path, limit=limit))
2025

2126
def read_examples(nlp):
2227
for text, gold_label in data:

0 commit comments

Comments
 (0)