26 lines
681 B
Python
26 lines
681 B
Python
import undetected_chromedriver as uc
|
|
import sys
|
|
|
|
if len(sys.argv) < 3:
|
|
sys.exit("usage: driver.py <path-to-browser> <site-to-scrape>")
|
|
|
|
options = uc.ChromeOptions()
|
|
options.add_argument('--no-sandbox')
|
|
options.add_argument('--disable-dev-shm-usage')
|
|
options.add_argument('--disable-gpu')
|
|
|
|
driver = uc.Chrome(
|
|
browser_executable_path=sys.argv[1],
|
|
headless=True,
|
|
use_subprocess=False,
|
|
options=options
|
|
)
|
|
driver.get(sys.argv[2])
|
|
|
|
data = driver.execute_cdp_cmd('DOM.getDocument', {})
|
|
if data:
|
|
if 'root' in data:
|
|
root_node_id = data['root']['nodeId']
|
|
html = driver.execute_cdp_cmd('DOM.getOuterHTML', {"nodeId": root_node_id})
|
|
print(html)
|