diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d4374ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.env +/proxy_auth.zip diff --git a/Dockerfile b/Dockerfile index 870ed99..4e19ed2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ USER root RUN apt-get update && apt-get install -y python3 python3-pip && rm -rf /var/lib/apt/lists/* -RUN pip3 install --break-system-packages undetected-chromedriver +RUN pip3 install --break-system-packages undetected-chromedriver beautifulsoup4 COPY driver.py /app/ WORKDIR /app diff --git a/driver.py b/driver.py index 26bafb9..912a5ee 100644 --- a/driver.py +++ b/driver.py @@ -1,13 +1,72 @@ +import zipfile import undetected_chromedriver as uc import sys +from dotenv import load_dotenv +from bs4 import BeautifulSoup if len(sys.argv) < 3: sys.exit("usage: driver.py ") + +load_dotenv() + +proxy_host = os.getenv('PROXY_HOST') +proxy_port = os.getenv('PROXY_PORT') +username = os.getenv('PROXY_USERNAME') +password = os.getenv('PROXY_PASSWORD') + +manifest_json = """ +{ + "version": "1.0.0", + "manifest_version": 2, + "name": "Chrome Proxy", + "permissions": ["proxy", "tabs", "unlimitedStorage", "storage", "", "webRequest", "webRequestBlocking"], + "background": {"scripts": ["background.js"], "persistent": true}, + "minimum_chrome_version": "76.0.0" +} +""" + +background_js = f""" +var config = {{ + mode: "fixed_servers", + rules: {{ + singleProxy: {{ + scheme: "http", + host: "{proxy_server}", + port: parseInt({proxy_port}) + }}, + bypassList: ["localhost"] + }} +}}; + +chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}}); + +function callbackFn(details) {{ + return {{ + authCredentials: {{ + username: "{username}", + password: "{password}" + }} + }}; +}} + +chrome.webRequest.onAuthRequired.addListener( + callbackFn, + {{urls: [""]}}, + ['blocking'] +); +""" + + +with zipfile.ZipFile('proxy_auth.zip', 'w') as zip_file: + zip_file.writestr("manifest.json", manifest_json) + zip_file.writestr("background.js", background_js) + options = uc.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--disable-gpu') +options.add_extension('proxy_auth.zip') driver = uc.Chrome( browser_executable_path=sys.argv[1], @@ -22,4 +81,10 @@ if data: if 'root' in data: root_node_id = data['root']['nodeId'] html = driver.execute_cdp_cmd('DOM.getOuterHTML', {"nodeId": root_node_id}) - print(html) + soup = BeautifulSoup(html['outerHTML'], 'html.parser') + print(soup.get_text()) + else: + print("Got data without a root:", data) +else: + print("Didn't get any data...") +