@@ -82,33 +82,34 @@ def extractTotalPages(url):
8282
8383
8484def scrape_reviews (url ):
85- totalPages , pageTitle , totalReviews = extractTotalPages (url )
86- print (f"[scrape-amazon] - { pageTitle } " )
87- print (f"[scrape-amazon] Total Pages - { totalPages } " )
88- print (f"[scrape-amazon] Total Reviews - { totalReviews } \n " )
89- urlsToFetch = []
90- for page in range (1 , totalPages + 1 ):
91- urlToFetch = url + f"?pageNumber={ page } "
92- urlsToFetch .append (urlToFetch )
93-
94- results = p_map (extractPage , urlsToFetch )
95- res = {}
96- for k in results :
97- for list in k :
98- if list in res :
99- res [list ] += k [list ]
100- else :
101- res [list ] = k [list ]
102-
103- productReviewsData = pd .DataFrame ()
104-
105- # # Adding Information
106-
107- productReviewsData ["Reviewer" ] = res ["reviewers" ]
108- productReviewsData ["Rating" ] = res ["ratings" ]
109- productReviewsData ["Title" ] = res ["reviewTitles" ]
110- productReviewsData ["Description" ] = res ["reviewDescriptions" ]
111- # productReviewsData["link"] = url
112- # productReviewsData["Product Title"] = pageTitle
113-
114- return productReviewsData
85+ if __name__ == '__main__' :
86+ totalPages , pageTitle , totalReviews = extractTotalPages (url )
87+ print (f"[scrape-amazon] - { pageTitle } " )
88+ print (f"[scrape-amazon] Total Pages - { totalPages } " )
89+ print (f"[scrape-amazon] Total Reviews - { totalReviews } \n " )
90+ urlsToFetch = []
91+ for page in range (1 , totalPages + 1 ):
92+ urlToFetch = url + f"?pageNumber={ page } "
93+ urlsToFetch .append (urlToFetch )
94+
95+ results = p_map (extractPage , urlsToFetch )
96+ res = {}
97+ for k in results :
98+ for list in k :
99+ if list in res :
100+ res [list ] += k [list ]
101+ else :
102+ res [list ] = k [list ]
103+
104+ productReviewsData = pd .DataFrame ()
105+
106+ # # Adding Information
107+
108+ productReviewsData ["Reviewer" ] = res ["reviewers" ]
109+ productReviewsData ["Rating" ] = res ["ratings" ]
110+ productReviewsData ["Title" ] = res ["reviewTitles" ]
111+ productReviewsData ["Description" ] = res ["reviewDescriptions" ]
112+ # productReviewsData["link"] = url
113+ # productReviewsData["Product Title"] = pageTitle
114+
115+ return productReviewsData
0 commit comments