Skip to content

Commit 118fc42

Browse files
committed
project duration added
1 parent 7fb997a commit 118fc42

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

upwork_parser.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ def skills(job):
6666
else:
6767
return np.nan
6868

69+
def duration(job):
70+
try:
71+
return job.find('span', class_='js-duration').text.strip().replace('Est. Time: ', '').lower()
72+
except:
73+
return np.nan
74+
6975
class TqdmLoggingHandler(logging.Handler):
7076

7177
def __init__ (self, level = logging.NOTSET):
@@ -93,7 +99,7 @@ def emit (self, record):
9399
if os.path.isfile('./upwork_df.csv'):
94100
df = pd.read_csv('./upwork_df.csv', index_col=0)
95101
else:
96-
df = pd.DataFrame(columns=['budget', 'desc', 'key', 'level', 'skills', 'time', 'title', 'type'])
102+
df = pd.DataFrame(columns=['budget', 'desc', 'key', 'level', 'skills', 'time', 'title', 'type', 'duration'])
97103

98104
logging.info('Start parsing pages...')
99105

@@ -108,7 +114,7 @@ def emit (self, record):
108114

109115
for job in jobs:
110116
row = {
111-
'key': key(job), 'title': title(job), 'time': time(job), 'type': type_(job),
117+
'key': key(job), 'title': title(job), 'time': time(job), 'type': type_(job), 'duration': duration(job),
112118
'level': level(job), 'budget': budget(job), 'desc': desc(job), 'skills': skills(job)
113119
}
114120
try:

0 commit comments

Comments
 (0)