Skip to content

Commit 4928703

Browse files
Update trec_tot_2025.py
1 parent 8871505 commit 4928703

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

ir_datasets/datasets/trec_tot_2025.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,19 @@ class JsonlDocumentOffset(NamedTuple):
2121
offset_end: int
2222

2323

24-
class TrecToT2025Doc():
25-
def __init__(self, json_doc):
26-
parsed_doc = json.loads(json_doc)
27-
self.doc_id = parsed_doc["id"]
28-
self.title = parsed_doc["title"]
29-
self.url = parsed_doc["url"]
30-
self.text = parsed_doc["text"]
24+
class TrecToT2025Doc(NamedTuple):
25+
doc_id: str
26+
title: str
27+
url: str
28+
text: str
29+
30+
@staticmethod
31+
def _from_json(self, json_doc):
32+
return TrecToT2025Doc(json_doc["id"], json_doc["title"], json_doc["url"], json_doc["text"])
3133

3234
def default_text(self):
3335
return self.title + " " + self.text
3436

35-
def _asdict(self):
36-
return {"doc_id": self.doc_id, "text": self.default_text()}
3737

3838
class JsonlWithOffsetsDocsStore(Docstore):
3939
def __init__(self, docs, offsets):
@@ -71,7 +71,7 @@ def get_many_iter(self, doc_ids):
7171
class TrecToT2025DocsStore(JsonlWithOffsetsDocsStore):
7272
def get_many_iter(self, doc_ids):
7373
for i in super().get_many_iter(doc_ids):
74-
yield TrecToT2025Doc(i)
74+
yield TrecToT2025Doc._from_json(i)
7575

7676

7777
class JsonlDocumentsWithOffsets(BaseDocs):
@@ -82,10 +82,10 @@ def __init__(self, docs, offsets):
8282
def docs_iter(self):
8383
with gzip.open(self.__docs.path()) as f:
8484
for l in f:
85-
yield TrecToT2025Doc(l)
85+
yield TrecToT2025Doc._from_json(json.loads(l))
8686

8787
def docs_cls(self):
88-
return self._cls
88+
return TrecToT2025Doc
8989

9090
def docs_store(self, field='doc_id'):
9191
return TrecToT2025DocsStore(self.__docs, self.__offsets)

0 commit comments

Comments
 (0)