From 84059a208ae8892e05c7a7f5f124079ebe1156b9 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Wed, 30 Jul 2025 18:43:04 +0700 Subject: [PATCH] Argument parsing for better cli experience --- driver.py | 56 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/driver.py b/driver.py index 912a5ee..a3efd13 100644 --- a/driver.py +++ b/driver.py @@ -1,21 +1,35 @@ import zipfile import undetected_chromedriver as uc import sys -from dotenv import load_dotenv +import argparse + +from urllib.parse import urlparse from bs4 import BeautifulSoup -if len(sys.argv) < 3: - sys.exit("usage: driver.py ") +parser = argparse.ArgumentParser(description='Scrape websites with a containerized web browser') +parser.add_argument('website', help='Desired website you want to scrape') +parser.add_argument('-b', '--browser-path', help='Path to browser binary', + default='/usr/bin/google-chrome') +parser.add_argument('-a', '--browser-args', nargs='+', help='Additional Args to pass the browser') +parser.add_argument('-p', '--proxy-url', help='Proxy URL (e.g., http://user:pass@host:port)') -load_dotenv() +args = parser.parse_args() -proxy_host = os.getenv('PROXY_HOST') -proxy_port = os.getenv('PROXY_PORT') -username = os.getenv('PROXY_USERNAME') -password = os.getenv('PROXY_PASSWORD') +options = uc.ChromeOptions() +if args.browser_args: + for b_arg in args.browser_args: + options.add_argument(f'--{b_arg}') -manifest_json = """ +if args.proxy_url: + parsed = urlparse(args.proxy_url) + + proxy_host = parsed.hostname + proxy_port = parsed.port + proxy_username = parsed.username + proxy_password = parsed.password + + manifest_json = """ { "version": "1.0.0", "manifest_version": 2, @@ -26,13 +40,13 @@ manifest_json = """ } """ -background_js = f""" + background_js = f""" var config = {{ mode: "fixed_servers", rules: {{ singleProxy: {{ scheme: "http", - host: "{proxy_server}", + host: "{proxy_host}", port: parseInt({proxy_port}) }}, bypassList: ["localhost"] @@ -44,8 +58,8 @@ chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}}); function callbackFn(details) {{ return {{ authCredentials: {{ - username: "{username}", - password: "{password}" + username: "{proxy_username}", + password: "{proxy_password}" }} }}; }} @@ -58,23 +72,19 @@ chrome.webRequest.onAuthRequired.addListener( """ -with zipfile.ZipFile('proxy_auth.zip', 'w') as zip_file: - zip_file.writestr("manifest.json", manifest_json) - zip_file.writestr("background.js", background_js) + with zipfile.ZipFile('proxy_auth.zip', 'w') as zip_file: + zip_file.writestr("manifest.json", manifest_json) + zip_file.writestr("background.js", background_js) -options = uc.ChromeOptions() -options.add_argument('--no-sandbox') -options.add_argument('--disable-dev-shm-usage') -options.add_argument('--disable-gpu') -options.add_extension('proxy_auth.zip') + options.add_extension('proxy_auth.zip') driver = uc.Chrome( - browser_executable_path=sys.argv[1], + browser_executable_path=args.browser_path, headless=True, use_subprocess=False, options=options ) -driver.get(sys.argv[2]) +driver.get(args.website) data = driver.execute_cdp_cmd('DOM.getDocument', {}) if data: