GitHub - scrapingbypass/google-serp-scraper: Google SERP scraper

Google SERP scraper

Google SERP scraper code with Python

# SERP scraper import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException # Set up WebDriver options = webdriver.ChromeOptions() options.add_argument("--start-maximized") driver = webdriver.Chrome(options=options) # Load Google search page url = 'https://www.google.com/' driver.get(url) # Search for keyword search_box = driver.find_element(By.NAME, 'q') search_term = 'scrapingbypass' search_box.send_keys(search_term) search_box.send_keys(Keys.RETURN) num = 1 # Scrape multiple pages for page in range(1, 6): # Scrape the first 5 pages of results # Wait for the search results page to load try: element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '.g')) WebDriverWait(driver, 10).until(element_present) except TimeoutException: print("Timed out waiting for page to load") # Parse the search results search_results = driver.find_elements(By.CSS_SELECTOR, '.g') for result in search_results: link = result.find_element(By.CSS_SELECTOR, 'a').get_attribute('href') title = result.find_element(By.CSS_SELECTOR, 'h3').text print(num) print(title) print(link) num = num + 1 # Click on the next page try: next_button = driver.find_element(By.CSS_SELECTOR, '#pnnext') next_button.click() except: break # Close the WebDriver driver.quit()

Or

import requests from bs4 import BeautifulSoup # Search keywords search_term = "scrapingbypass" # Request headers headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"} # Search pages of results num_pages = 5 no=1 for page in range(0,num_pages): # Request URL if page == 0: url = f"https://www.google.com/search?q={search_term}" else: url = f"https://www.google.com/search?q={search_term}&start={page*10}" # Request response = requests.get(url, headers=headers) # Parse HTML soup = BeautifulSoup(response.content, "html.parser") # Extract search reult search_results = soup.select(".yuRUbf") # Print title and link for result in search_results: title = result.select_one("h3").text link = result.select_one("a")["href"] print(f"{no}: {title}: {link}") no=no+1

Name		Name	Last commit message	Last commit date
Latest commit History 4 Commits
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

Google SERP scraper

About

Uh oh!

Releases

Packages

scrapingbypass/google-serp-scraper

Folders and files

Latest commit

History

Repository files navigation

Google SERP scraper

About

Topics

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Packages