From c2b39918d277bb6e868066b8c3ad4c3277d43725 Mon Sep 17 00:00:00 2001 From: Yaochen Date: Wed, 3 May 2023 22:13:07 -0400 Subject: [PATCH] add yelp scrapper --- Web-Scraping/Yelp-Scrapper/README.md | 7 ++++++ Web-Scraping/Yelp-Scrapper/scrapper.py | 35 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 Web-Scraping/Yelp-Scrapper/README.md create mode 100644 Web-Scraping/Yelp-Scrapper/scrapper.py diff --git a/Web-Scraping/Yelp-Scrapper/README.md b/Web-Scraping/Yelp-Scrapper/README.md new file mode 100644 index 0000000..e95a0f8 --- /dev/null +++ b/Web-Scraping/Yelp-Scrapper/README.md @@ -0,0 +1,7 @@ +# Yelp Scrapper + +## Description +This python script is used to get restaurant data from yelp website, including name, cuisine, address, rating, review counts and zip code. + +## Implementation +please replace API_key with your API_key obtained from https://www.yelp.com/developers/v3/manage_app \ No newline at end of file diff --git a/Web-Scraping/Yelp-Scrapper/scrapper.py b/Web-Scraping/Yelp-Scrapper/scrapper.py new file mode 100644 index 0000000..63a3f97 --- /dev/null +++ b/Web-Scraping/Yelp-Scrapper/scrapper.py @@ -0,0 +1,35 @@ +import requests +import decimal +import csv + +available_cuisines = ['french', 'italian', 'spanish', 'chinese', 'japanese', 'indian', 'korean', 'american', 'mexican'] +# please replace location with the location you preferred +location = 'manhattan' +restaurants = {} + +file = open('yelp_data.csv', 'a', encoding='utf-8') +writer = csv.writer(file) +id = [] + +for cuisine in available_cuisines: + for offset in range(0, 999, 50): + params = { + 'term': cuisine, + 'location': location, + 'offset': offset, + 'limit': 50 + } + + headers = { + # please replace API_key with your API_key obtained from https://www.yelp.com/developers/v3/manage_app + 'Authorization': 'Bearer API_Key' + } + + response = requests.get(url='https://api.yelp.com/v3/businesses/search', params=params, headers=headers) + restaurants = response.json()['businesses'] + for restaurant in restaurants: + if restaurant['id'] not in id: + id.append(restaurant['id']) + writer.writerow([restaurant['id'], restaurant['name'], cuisine, ", ".join(restaurant['location']['display_address']), + decimal.Decimal(str(restaurant['coordinates']['latitude'])), decimal.Decimal(str(restaurant['coordinates']['longitude'])), + decimal.Decimal(str(restaurant['rating'])), restaurant['review_count'], restaurant['location']['zip_code']]) \ No newline at end of file