| 
 | 1 | +# Author: Mahesh Bharadwaj K (https://github.com/MaheshBharadwaj)  | 
 | 2 | + | 
 | 3 | +import os  | 
 | 4 | +import re  | 
 | 5 | +import sys  | 
 | 6 | + | 
 | 7 | +from bs4 import BeautifulSoup  | 
 | 8 | +from selenium import webdriver  | 
 | 9 | +from textwrap3 import wrap  | 
 | 10 | +from webdriver_manager.chrome import ChromeDriverManager  | 
 | 11 | + | 
 | 12 | +options = webdriver.ChromeOptions()  | 
 | 13 | +options.add_argument("headless") # headless chrome option  | 
 | 14 | + | 
 | 15 | +class InvalidCodeException(Exception):  | 
 | 16 | + """  | 
 | 17 | + Invalid problem code   | 
 | 18 | + """  | 
 | 19 | + pass  | 
 | 20 | + | 
 | 21 | + | 
 | 22 | +def parse_problem_statement(problem_code: str):  | 
 | 23 | + """  | 
 | 24 | + This function takes a Leet Code problem code as input and  | 
 | 25 | + scrapes the problem statement from the site and returns  | 
 | 26 | + the parsed problem statement as a text file.  | 
 | 27 | +
  | 
 | 28 | + PARAMETERS:  | 
 | 29 | + -----------  | 
 | 30 | + problem_code: string  | 
 | 31 | + LeetCode problem code  | 
 | 32 | +   | 
 | 33 | + RETURNS:  | 
 | 34 | + --------  | 
 | 35 | + problem_div.text: string  | 
 | 36 | + Extracted problem statement as string after removing HTML tags  | 
 | 37 | + """  | 
 | 38 | + URL = f"https://leetcode.com/problems/{problem_code}"  | 
 | 39 | + browser = webdriver.Chrome(ChromeDriverManager().install(), options=options) # install and open chrome driver  | 
 | 40 | + browser.get(URL)  | 
 | 41 | + print("[SCRAPING] - ", problem_code)  | 
 | 42 | + soup = BeautifulSoup(  | 
 | 43 | + browser.page_source, features="html.parser"  | 
 | 44 | + ) # parse page source  | 
 | 45 | + | 
 | 46 | + # If invalid program code, 404 page is displayed  | 
 | 47 | + if soup.find('div', class_='display-404'):  | 
 | 48 | + raise InvalidCodeException  | 
 | 49 | + | 
 | 50 | + # Problem statement div  | 
 | 51 | + problem_div = soup.find('div', class_=re.compile(r'content\w+ question-content\w+'))  | 
 | 52 | + return problem_div.text  | 
 | 53 | + | 
 | 54 | + | 
 | 55 | +if __name__ == "__main__":  | 
 | 56 | + if len(sys.argv) != 2:  | 
 | 57 | + print('Invalid Usage!\nRun: python3 leet_code_scraper.py [problem code]', file=sys.stderr)  | 
 | 58 | + sys.exit(1)  | 
 | 59 | + try:  | 
 | 60 | + problem_code = sys.argv[1]  | 
 | 61 | + parsed_problem = parse_problem_statement(problem_code)  | 
 | 62 | + | 
 | 63 | + with open(problem_code + '.txt', 'wt') as fout:  | 
 | 64 | + parsed_lines = parsed_problem.split('\n')  | 
 | 65 | + for line in parsed_lines:  | 
 | 66 | + if len(line) < 81:  | 
 | 67 | + print(line, file=fout)  | 
 | 68 | + else:  | 
 | 69 | + wrapped_lines = wrap(line, width=80) # Splitting long line into multiple lines  | 
 | 70 | + for l in wrapped_lines:  | 
 | 71 | + print(l, file=fout)  | 
 | 72 | + | 
 | 73 | + print(f"Successfully scraped {problem_code} and saved as {problem_code}.py!")  | 
 | 74 | + | 
 | 75 | + except InvalidCodeException:  | 
 | 76 | + print("Invalid Problem Code! Please check the problem code provided!")  | 
 | 77 | + | 
 | 78 | + except Exception as e:  | 
 | 79 | + print('Fatal: \n' + str(e))  | 
0 commit comments