Setting Up Rotating Proxies with Python
Step-by-step guide to setting up rotating proxies in Python using free lists, paid providers, and ScraperAPI.
Anti-Detection · #6intermediate2 min read
Rotating proxies assign a different IP address to each request, making it much harder for websites to detect and block your scraper.
Method 1: Build Your Own Proxy Rotator
If you have a list of proxies from a provider, you can rotate through them:
import requests
import itertools
from typing import Optional
class ProxyRotator:
def __init__(self, proxies: list[str]):
self.proxy_pool = itertools.cycle(proxies)
self.failed_proxies: set[str] = set()
def get_next_proxy(self) -> dict:
proxy = next(self.proxy_pool)
return {"http": proxy, "https": proxy}
def fetch(self, url: str, max_retries: int = 3) -> Optional[requests.Response]:
for _ in range(max_retries):
proxy = self.get_next_proxy()
try:
response = requests.get(url, proxies=proxy, timeout=15)
if response.status_code == 200:
return response
except requests.RequestException:
self.failed_proxies.add(proxy["http"])
continue
return None
# Usage
proxies = [
"http://user:pass@proxy1.example.com:8080",
"http://user:pass@proxy2.example.com:8080",
"http://user:pass@proxy3.example.com:8080",
]
rotator = ProxyRotator(proxies)
urls = ["https://httpbin.org/ip"] * 5
for url in urls:
resp = rotator.fetch(url)
if resp:
print(resp.json())
Method 2: Backconnect Proxy (Recommended)
Most commercial proxy providers offer a single gateway endpoint that automatically rotates IPs. This is much simpler:
import requests
# One endpoint, automatic rotation per request
PROXY = "http://user:pass@gate.provider.com:7777"
for i in range(5):
response = requests.get(
"https://httpbin.org/ip",
proxies={"http": PROXY, "https": PROXY},
timeout=15,
)
print(f"Request {i+1}: {response.json()['origin']}")
Method 3: ScraperAPI (Easiest)
ScraperAPI handles proxy rotation, retries, and anti-detection in one API call:
import requests
API_KEY = "YOUR_SCRAPERAPI_KEY"
def scrape(url: str) -> str:
response = requests.get(
"http://api.scraperapi.com",
params={"api_key": API_KEY, "url": url},
timeout=60,
)
response.raise_for_status()
return response.text
# Or use ScraperAPI as a proxy endpoint
proxy = f"http://scraperapi:{API_KEY}@proxy-server.scraperapi.com:8001"
response = requests.get(
"https://example.com",
proxies={"http": proxy, "https": proxy},
)
Async Rotation with aiohttp
For high-throughput scraping, use async proxy rotation:
import aiohttp
import asyncio
import random
PROXIES = [
"http://user:pass@proxy1.example.com:8080",
"http://user:pass@proxy2.example.com:8080",
"http://user:pass@proxy3.example.com:8080",
]
async def fetch(session, url):
proxy = random.choice(PROXIES)
async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=15)) as resp:
return await resp.text()
async def main():
urls = [f"https://httpbin.org/ip" for _ in range(10)]
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
for r in results:
if isinstance(r, str):
print(r[:80])
asyncio.run(main())
Best Practices
- Always implement retry logic with proxy rotation
- Remove consistently failing proxies from your pool
- Use sticky sessions when you need to maintain state across requests
- Monitor your success rate and switch proxy types if it drops below 90%