officialpm
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎scrape_amazon/util/scrape.py‎
Lines changed: 7 additions & 0 deletions b/‎scrape_amazon/util/scrape.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎setup.cfg‎
Lines changed: 1 addition & 1 deletion b/‎setup.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 2 additions & 1 deletion b/‎setup.py‎
Lines changed: 2 additions & 1 deletion
@@ -27,7 +27,7 @@
 from scrape_amazon import get_reviews
 
 reviews = get_reviews('in','B078BNQ318') #returns dataframe
-#Reviewer, Rating, Title, Description
+#Reviewer, Rating, Title, Description, Date
 ```
 
 ### CLI
@@ -52,11 +52,11 @@ output_path output_path for saving (B078BNQ318.csv)
 ## Output
 
 ```shell
- Reviewer Rating Title Description
-0 Parth Maniar 4 Great but ... I change ... 
-1 Manpreet Singh 3 Delivers ... Great ph ... 
-2 Aparna Uniyal 1 Battery/H ... I have ... 
-3 Rahul 5 Great but ... On the f ... 
+ Reviewer Rating Title Description Date
+0 Parth Maniar 4 Great but ... I change ...  '05/24/2021, 00:00:00'
+1 Manpreet Singh 3 Delivers ... Great ph ...  '05/24/2021, 00:00:00' 
+2 Aparna Uniyal 1 Battery/H ... I have ...  '05/24/2021, 00:00:00' 
+3 Rahul 5 Great but ... On the f ...  '05/24/2021, 00:00:00' 
 ```
 ## Want to contribute?
 To get more information on contributing, go to the 
 
@@ -1,5 +1,6 @@
 import math
 import re
+import datefinder
 import pandas as pd
 from bs4 import BeautifulSoup
 from p_tqdm import p_map
@@ -34,9 +35,11 @@ def extractPage(url: str) -> str:
  pageNotLoaded = False
  reviewers = []
  ratings = []
+ ratingsDate = []
  reviewDescriptions = []
  reviewTitles = []
  reviewrsSpan = productPage.findAll("span", {"class": "a-profile-name"})
+ reviewDate = productPage.findAll("span", {"class": "review-date"})
  ratingsSpan = productPage.findAll("i", {"class": "review-rating"})
  reviewTitlesSpan = productPage.findAll("a", {"class": "review-title-content"})
  reviewDescriptionSpan = productPage.findAll(
@@ -48,6 +51,8 @@ def extractPage(url: str) -> str:
  for i in range(2, len(reviewrsSpan)):
  reviewers.append(reviewrsSpan[i].get_text())
  ratings.append(int(ratingsSpan[i].get_text()[0]))
+ matches = datefinder.find_dates(reviewDate[i].get_text())
+ ratingsDate.append(list(matches)[0].strftime("%m/%d/%Y"))
 
  for i in range(0, len(reviewTitlesSpan)):
  reviewTitles.append(reviewTitlesSpan[i].get_text())
@@ -63,6 +68,7 @@ def extractPage(url: str) -> str:
  "ratings": ratings,
  "reviewTitles": reviewTitles,
  "reviewDescriptions": reviewDescriptions,
+ "date": ratingsDate,
  }
 
 
@@ -107,6 +113,7 @@ def scrape_reviews(url):
  productReviewsData["Rating"] = res["ratings"]
  productReviewsData["Title"] = res["reviewTitles"]
  productReviewsData["Description"] = res["reviewDescriptions"]
+ productReviewsData["Date"] = res["date"]
  # productReviewsData["link"] = url
  # productReviewsData["Product Title"] = pageTitle
 
 
@@ -1,6 +1,6 @@
 [metadata]
 name = scrape_amazon
-version = 0.1.7
+version = 0.1.8
 description = Scrape Amazon Reviews smoothly.
 license = MIT
 author = Parth Maniar
 
@@ -5,7 +5,7 @@
 
 setuptools.setup(
  name="scrape_amazon",
- version="0.1.7",
+ version="0.1.8",
  description="Scrape Amazon Reviews",
  url="http://github.com/officialpm/scrape-amazon",
  author="Parth Maniar",
@@ -34,6 +34,7 @@
  "p_tqdm",
  "my_fake_useragent",
  "requests",
+ "datefinder"
  ],
  entry_points={
  "console_scripts": ["scrape-amazon=scrape_amazon.cli:get_reviews_cli"],