Using Selenium with Proxies
Configure Selenium WebDriver with HTTP, SOCKS, and authenticated proxies for anonymous and scalable web scraping.
Proxies are essential for scraping at scale with Selenium. Without them, target websites will quickly detect repeated requests from a single IP and block you. Selenium supports proxy configuration through Chrome options, Firefox profiles, and wire protocol extensions.
Basic HTTP Proxy with Chrome
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--headless")
options.add_argument("--proxy-server=http://proxy-server.com:8080")
driver = webdriver.Chrome(options=options)
driver.get("https://httpbin.org/ip")
print(driver.find_element("tag name", "body").text)
driver.quit()
SOCKS5 Proxy
options = Options()
options.add_argument("--proxy-server=socks5://proxy-server.com:1080")
driver = webdriver.Chrome(options=options)
Authenticated Proxy
Chrome does not natively support proxy authentication via command-line arguments. The workaround is to use a Chrome extension that injects credentials:
import zipfile
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def create_proxy_auth_extension(proxy_host, proxy_port, proxy_user, proxy_pass):
manifest_json = """{
"version": "1.0.0",
"manifest_version": 2,
"name": "Proxy Auth",
"permissions": ["proxy", "tabs", "unlimitedStorage",
"storage", "webRequest", "webRequestBlocking"],
"background": {"scripts": ["background.js"]}
}"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
}
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function(){});
chrome.webRequest.onAuthRequired.addListener(
function(details) {
return {authCredentials: {username: "%s", password: "%s"}};
},
{urls: ["<all_urls>"]},
['blocking']
);
""" % (proxy_host, proxy_port, proxy_user, proxy_pass)
ext_path = "proxy_auth_extension.zip"
with zipfile.ZipFile(ext_path, "w") as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return ext_path
# Create and use the extension
ext = create_proxy_auth_extension(
"proxy-server.com", "8080", "username", "password"
)
options = Options()
options.add_extension(ext)
driver = webdriver.Chrome(options=options)
driver.get("https://httpbin.org/ip")
print(driver.find_element("tag name", "body").text)
driver.quit()
Rotating Proxies
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import random
PROXIES = [
"http://proxy1.com:8080",
"http://proxy2.com:8080",
"http://proxy3.com:8080",
]
def get_driver_with_proxy(proxy):
options = Options()
options.add_argument("--headless")
options.add_argument(f"--proxy-server={proxy}")
return webdriver.Chrome(options=options)
urls = ["https://example.com/1", "https://example.com/2"]
for url in urls:
proxy = random.choice(PROXIES)
driver = get_driver_with_proxy(proxy)
try:
driver.get(url)
print(f"Scraped {url} via {proxy}")
finally:
driver.quit()
Using selenium-wire for Easy Proxy Authentication
The selenium-wire library makes authenticated proxies simple:
pip install selenium-wire
from seleniumwire import webdriver
options = {
"proxy": {
"http": "http://user:pass@proxy-server.com:8080",
"https": "http://user:pass@proxy-server.com:8080",
}
}
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get("https://httpbin.org/ip")
print(driver.find_element("tag name", "body").text)
driver.quit()
Skip the Proxy Hassle
Managing proxy pools, handling authentication, and dealing with dead proxies is tedious. ScraperAPI provides a single API endpoint that automatically rotates through millions of residential and datacenter proxies. ScrapingAnt offers a similar managed proxy solution bundled with their scraping API.
Next Steps
- Learn Puppeteer basics for web scraping
- Explore headless vs headed browser scraping
- Set up parallel browser scraping