auto-scraper/driver.py

26 lines
681 B
Python

import undetected_chromedriver as uc
import sys
if len(sys.argv) < 3:
sys.exit("usage: driver.py <path-to-browser> <site-to-scrape>")
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
driver = uc.Chrome(
browser_executable_path=sys.argv[1],
headless=True,
use_subprocess=False,
options=options
)
driver.get(sys.argv[2])
data = driver.execute_cdp_cmd('DOM.getDocument', {})
if data:
if 'root' in data:
root_node_id = data['root']['nodeId']
html = driver.execute_cdp_cmd('DOM.getOuterHTML', {"nodeId": root_node_id})
print(html)