import zipfile import undetected_chromedriver as uc import sys from dotenv import load_dotenv from bs4 import BeautifulSoup if len(sys.argv) < 3: sys.exit("usage: driver.py ") load_dotenv() proxy_host = os.getenv('PROXY_HOST') proxy_port = os.getenv('PROXY_PORT') username = os.getenv('PROXY_USERNAME') password = os.getenv('PROXY_PASSWORD') manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": ["proxy", "tabs", "unlimitedStorage", "storage", "", "webRequest", "webRequestBlocking"], "background": {"scripts": ["background.js"], "persistent": true}, "minimum_chrome_version": "76.0.0" } """ background_js = f""" var config = {{ mode: "fixed_servers", rules: {{ singleProxy: {{ scheme: "http", host: "{proxy_server}", port: parseInt({proxy_port}) }}, bypassList: ["localhost"] }} }}; chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}}); function callbackFn(details) {{ return {{ authCredentials: {{ username: "{username}", password: "{password}" }} }}; }} chrome.webRequest.onAuthRequired.addListener( callbackFn, {{urls: [""]}}, ['blocking'] ); """ with zipfile.ZipFile('proxy_auth.zip', 'w') as zip_file: zip_file.writestr("manifest.json", manifest_json) zip_file.writestr("background.js", background_js) options = uc.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--disable-gpu') options.add_extension('proxy_auth.zip') driver = uc.Chrome( browser_executable_path=sys.argv[1], headless=True, use_subprocess=False, options=options ) driver.get(sys.argv[2]) data = driver.execute_cdp_cmd('DOM.getDocument', {}) if data: if 'root' in data: root_node_id = data['root']['nodeId'] html = driver.execute_cdp_cmd('DOM.getOuterHTML', {"nodeId": root_node_id}) soup = BeautifulSoup(html['outerHTML'], 'html.parser') print(soup.get_text()) else: print("Got data without a root:", data) else: print("Didn't get any data...")