@@ -66,6 +66,12 @@ def skills(job):
6666 else :
6767 return np .nan
6868
69+ def duration (job ):
70+ try :
71+ return job .find ('span' , class_ = 'js-duration' ).text .strip ().replace ('Est. Time: ' , '' ).lower ()
72+ except :
73+ return np .nan
74+
6975class TqdmLoggingHandler (logging .Handler ):
7076
7177 def __init__ (self , level = logging .NOTSET ):
@@ -93,7 +99,7 @@ def emit (self, record):
9399if os .path .isfile ('./upwork_df.csv' ):
94100 df = pd .read_csv ('./upwork_df.csv' , index_col = 0 )
95101else :
96- df = pd .DataFrame (columns = ['budget' , 'desc' , 'key' , 'level' , 'skills' , 'time' , 'title' , 'type' ])
102+ df = pd .DataFrame (columns = ['budget' , 'desc' , 'key' , 'level' , 'skills' , 'time' , 'title' , 'type' , 'duration' ])
97103
98104logging .info ('Start parsing pages...' )
99105
@@ -108,7 +114,7 @@ def emit (self, record):
108114
109115 for job in jobs :
110116 row = {
111- 'key' : key (job ), 'title' : title (job ), 'time' : time (job ), 'type' : type_ (job ),
117+ 'key' : key (job ), 'title' : title (job ), 'time' : time (job ), 'type' : type_ (job ), 'duration' : duration ( job ),
112118 'level' : level (job ), 'budget' : budget (job ), 'desc' : desc (job ), 'skills' : skills (job )
113119 }
114120 try :
0 commit comments