Skip to content

Commit 3adae54

Browse files
Update scrape.py
1 parent eacce03 commit 3adae54

File tree

1 file changed

+31
-30
lines changed

1 file changed

+31
-30
lines changed

scrape_amazon/util/scrape.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -82,33 +82,34 @@ def extractTotalPages(url):
8282

8383

8484
def scrape_reviews(url):
85-
totalPages, pageTitle, totalReviews = extractTotalPages(url)
86-
print(f"[scrape-amazon] - {pageTitle}")
87-
print(f"[scrape-amazon] Total Pages - {totalPages}")
88-
print(f"[scrape-amazon] Total Reviews - {totalReviews}\n")
89-
urlsToFetch = []
90-
for page in range(1, totalPages + 1):
91-
urlToFetch = url + f"?pageNumber={page}"
92-
urlsToFetch.append(urlToFetch)
93-
94-
results = p_map(extractPage, urlsToFetch)
95-
res = {}
96-
for k in results:
97-
for list in k:
98-
if list in res:
99-
res[list] += k[list]
100-
else:
101-
res[list] = k[list]
102-
103-
productReviewsData = pd.DataFrame()
104-
105-
# # Adding Information
106-
107-
productReviewsData["Reviewer"] = res["reviewers"]
108-
productReviewsData["Rating"] = res["ratings"]
109-
productReviewsData["Title"] = res["reviewTitles"]
110-
productReviewsData["Description"] = res["reviewDescriptions"]
111-
# productReviewsData["link"] = url
112-
# productReviewsData["Product Title"] = pageTitle
113-
114-
return productReviewsData
85+
if __name__ == '__main__':
86+
totalPages, pageTitle, totalReviews = extractTotalPages(url)
87+
print(f"[scrape-amazon] - {pageTitle}")
88+
print(f"[scrape-amazon] Total Pages - {totalPages}")
89+
print(f"[scrape-amazon] Total Reviews - {totalReviews}\n")
90+
urlsToFetch = []
91+
for page in range(1, totalPages + 1):
92+
urlToFetch = url + f"?pageNumber={page}"
93+
urlsToFetch.append(urlToFetch)
94+
95+
results = p_map(extractPage, urlsToFetch)
96+
res = {}
97+
for k in results:
98+
for list in k:
99+
if list in res:
100+
res[list] += k[list]
101+
else:
102+
res[list] = k[list]
103+
104+
productReviewsData = pd.DataFrame()
105+
106+
# # Adding Information
107+
108+
productReviewsData["Reviewer"] = res["reviewers"]
109+
productReviewsData["Rating"] = res["ratings"]
110+
productReviewsData["Title"] = res["reviewTitles"]
111+
productReviewsData["Description"] = res["reviewDescriptions"]
112+
# productReviewsData["link"] = url
113+
# productReviewsData["Product Title"] = pageTitle
114+
115+
return productReviewsData

0 commit comments

Comments
 (0)