Skip to content
This repository was archived by the owner on Aug 16, 2022. It is now read-only.

Commit ea0d978

Browse files
committed
Use Tilde organizer (with Berlinium GUI) as a bibliography manager
1 parent ef67ae2 commit ea0d978

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

utils/add_pdf_articles.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""
2+
This script introduces how the Tilde organizer
3+
can be used as a bibliography manager.
4+
See https://github.com/tilde-lab/pycrystal/tree/master/papers
5+
for an example usage for the CRYSTAL17 code online bibliography.
6+
Two files are currently needed:
7+
* bib_els_file (raw bibliography items as presented online)
8+
* bib_data_file (processed bibliography items, e.g. with DOI, PDF, etc.)
9+
"""
10+
import os
11+
import sys
12+
import json
13+
import random
14+
from hashlib import md5
15+
16+
import chk_tilde_install
17+
18+
from tilde.core.api import API
19+
from tilde.core.settings import connect_database, settings
20+
from tilde.parsers import Output
21+
22+
23+
# these mappings were absent in the CRYSTAL17 online bibliography
24+
MISSING_MAPPING = {'to127': ['O', 'Zr'], 'knaup2005': ['C', 'O', 'Si'], 'catti2000': ['O', 'Si'], 'to307': ['H', 'O', 'Si'], 'sto52': ['H', 'O', 'Si'], 'lindsay98': ['Cl', 'Si'], 'mukhopadhyay2004': ['O', 'Si'], 'to279': ['O', 'Si'], 'Gibbs99a': ['O', 'Si'], 'gibbs1999': ['O', 'Si'], 'gibbs2003': ['O', 'Si'], 'gibbs2006': ['Na', 'Mg', 'Al', 'Si', 'B', 'N', 'C', 'S', 'P', 'O'], 'sto89': ['O', 'Si'], 'to220': ['C', 'Si'], 'zwijnenburg2007': ['O', 'Si'], 'to264': ['O', 'Si'], 'goumans2007': ['O', 'Si'], 'to45': ['C', 'Si'], 'sonnet1999': ['Si'], 'to253': ['O', 'Si'], 'gnani2000': ['O', 'Si'], 'zwijnenburg2006': ['Si', 'Ge', 'Be', 'O', 'F', 'S'], 'to44': ['Si'], 'to126': ['Si'], 'knaup2005b': ['C', 'Si'], 'gibbs2000': ['O', 'Si'], 'sto72': ['Si'], 'sto92': ['H', 'Si']}
25+
26+
class PDF_Article(Output):
27+
def __init__(self, filename):
28+
Output.__init__(self, filename)
29+
self.related_files.append(filename)
30+
self.info['dtype'] = 0x1
31+
32+
def get_checksum(self):
33+
if os.path.exists(self._filename):
34+
hash_md5 = md5()
35+
with open(self._filename, "rb") as f:
36+
for chunk in iter(lambda: f.read(4096), b""):
37+
hash_md5.update(chunk)
38+
# NB. there are dups, and we need to workaround them
39+
return hash_md5.hexdigest() + 'PDF'
40+
41+
self.related_files = []
42+
# for non-ready items, TODO
43+
return "".join([random.choice("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") for _ in range(48)]) + 'PDF'
44+
45+
def set_meta_and_els(self, els):
46+
self.info['elements'] = els
47+
self.info['standard'] = ' / '.join(sorted(els))
48+
self.info['formula'] = ' / '.join(sorted(els))
49+
self.info['ng'] = 0
50+
self.info['nelem'] = 0
51+
self.info['H'] = 'unknown'
52+
self.info['framework'] = 0x3 # CRYSTAL
53+
self.info['ansatz'] = 0x3 # CRYSTAL
54+
55+
if __name__ == "__main__":
56+
try:
57+
bib_els_file = sys.argv[1]
58+
bib_data_file = sys.argv[2]
59+
except IndexError:
60+
raise RuntimeError
61+
62+
f = open(bib_els_file)
63+
els2bib = json.loads(f.read())
64+
f.close()
65+
66+
f = open(bib_data_file)
67+
data2meta = json.loads(f.read())
68+
f.close()
69+
70+
folder = os.sep.join(bib_els_file.split(os.sep)[:-2])
71+
72+
session = connect_database(settings)
73+
work = API()
74+
75+
data2els = {}
76+
for el in els2bib['els2paperids']:
77+
for article_item in set(els2bib['els2paperids'][el]): # FIXME? set, as we have dups els
78+
data2els.setdefault(article_item, []).append(el)
79+
data2els.update(MISSING_MAPPING)
80+
81+
for key in els2bib['paperids2bib']:
82+
# for non-ready items, TODO
83+
if key not in data2meta:
84+
filename = 'data/NONCE'
85+
doi = None
86+
authors = els2bib['paperids2bib'][key][0].replace(' and ', ', ').encode('ascii', 'ignore').split(', ')
87+
year = els2bib['paperids2bib'][key][2]
88+
article_title = els2bib['paperids2bib'][key][1].encode('ascii', 'ignore')
89+
pubdata = els2bib['paperids2bib'][key][3].encode('ascii', 'ignore')
90+
print("Missing: %s, %s, %s (%s)" % (authors, article_title, pubdata, year))
91+
else:
92+
filename = data2meta[key][0]
93+
doi = data2meta[key][1]
94+
authors = data2meta[key][2].encode('ascii', 'ignore').split(', ')
95+
year = data2meta[key][5]
96+
article_title = data2meta[key][3].encode('ascii', 'ignore')
97+
pubdata = data2meta[key][4].encode('ascii', 'ignore')
98+
99+
seen = set()
100+
seen_add = seen.add
101+
authors = [x for x in authors if not (x in seen or seen_add(x))] # preserving order
102+
103+
data_item = PDF_Article(os.path.join(folder, filename))
104+
data_item.set_meta_and_els(data2els[key])
105+
106+
data_item.info['authors'] = authors
107+
data_item.info['year'] = year
108+
data_item.info['article_title'] = article_title
109+
if doi: data_item.info['doi'] = doi
110+
data_item.info['pubdata'] = pubdata
111+
112+
checksum, error = work.save(data_item, session)
113+
if error:
114+
print(error)
115+
116+
session.close()

0 commit comments

Comments
 (0)