Last Updated: December 30, 2017
·
3.136K
· hur1can3

Scrape Foursquare API to get venue information

import urllib
import urllib2
import json
import datetime
import pandas as pd
from pandas.io.json import json_normalize
import math
import time
from math import cos
from pandas import DataFrame

### Helper function for converting meters to lat/long

def distcust(p, d, lat_m, long_m):
 lat = p['lat']
 long = p['long']

 lat1 = lat + lat_m * (d / (11100.0/90*1000) * cos(lat))
 long1 = long + long_m * (d / (11100.0/90*1000))

 return {'lat': lat1, 'long': long1}

client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
#p = {'lat': 37.7833, 'long': -122.4167} # central San Francisco, at Van Ness and Market
#p = {'lat': 40.783011, 'long': -73.965368} # central NYC, at Central Park
p = {'lat': 42.963601, 'long': -85.66878} # grand rapids, mi at division and fulton 
distance = 100
limit = 50
gridSize = 10
df = DataFrame()
requested_keys = ["categories","id","location","name"]
category = "bar"
category_id = "4d4b7105d754a06376d81259"

for x in [x1 / 10.0 for x1 in range(-3*gridSize, 3*gridSize)]:
 for y in [y1 / 10.0 for y1 in range(-3*gridSize, 3*gridSize)]:
 center = distcust(p,distance,x,y)
 url = "https://api.foursquare.com/v2/venues/search?ll=%s,%s&intent=browse&radius=%s&categoryId=%s&client_id=%s&client_secret=%s&v=%s" % (center["lat"], center["long"], distance, category_id, client_id, client_secret, time.strftime("%Y%m%d"))
 try:
 req = urllib2.Request(url)
 response = urllib2.urlopen(req)
 data = json.loads(response.read())
 response.close()
 #print data["response"]['venues']
 data = DataFrame(data["response"]['venues'])[requested_keys]


 df2 = DataFrame()
 venue_ids = []
 frames = []

 #print data["id"]
 for d in data["id"]: 
 requested_keys2 = ["id", "price.currency","rating", "likes.count"]

 url2 = "https://api.foursquare.com/v2/venues/%s?client_id=%s&client_secret=%s&v=%s" % (d, client_id, client_secret, time.strftime("%Y%m%d"))
 req2 = urllib2.Request(url2)
 response2 = urllib2.urlopen(req2)
 data2 = json.loads(response2.read())
 response.close()
 ddata = data2['response'] 

 nom_data = json_normalize(ddata['venue'])

 if "price.currency" not in nom_data.columns:
 nom_data["price.currency"] = 'NONE'

 if "rating" not in nom_data.columns:
 nom_data["rating"] = 'NONE' 

 venue_ids.append(d)
 frames.append(nom_data[requested_keys2])
 #print "getting attr for %s" % nom_data["name"]
 time.sleep(1)


 df2 = pd.concat(frames, keys=venue_ids)

 mdata = pd.merge(data, df2,how='left',on='id', suffixes=('_x', '_y'))

 #print mdata

 df = df.append(mdata,ignore_index=True)
 #print df

 #df.to_csv("test.csv")

 print center
 time.sleep(1) # stay within API limits
 except Exception, e:
 print e

df = df.drop_duplicates(cols='id',take_last=True)
print df

df["categories"] = df["categories"].apply(lambda x: dict(x[0])['name'])
df["lat"] = df["location"].apply(lambda x: dict(x)["lat"])
df["long"] = df["location"].apply(lambda x: dict(x)["lng"])
df["distance"] = df["location"].apply(lambda x: dict(x)["distance"])
df["checkins"] = df["stats"].apply(lambda x: dict(x)["checkinsCount"])

ordered_df = df[["id_x","name_x","categories","checkins", "distance","lat","long", "price.currency", "rating", "likes.count"]]
ordered_df.to_csv("foursquare_%s_grand_rapids.csv" % category,encoding='utf-8', index=False)

1 Response
Add your response

It throws error as "HTTP Error 400: Bad Request",, any reason?

over 1 year ago ·