IP Ban Recovery Strategies
Learn how to detect, recover from, and prevent IP bans when web scraping, with practical Python examples.
Anti-Detection · #13intermediate3 min read
Getting your IP banned is a normal part of web scraping. What matters is how quickly you detect it, recover, and prevent it from happening again.
Detecting an IP Ban
Bans manifest in different ways. Your scraper should detect all of them:
import requests
def check_ban_status(response: requests.Response) -> str:
"""Detect if the response indicates a ban."""
# Explicit block status codes
if response.status_code == 403:
return "banned_403"
if response.status_code == 429:
return "rate_limited"
if response.status_code == 503:
return "service_unavailable"
# Check for CAPTCHA pages
content_lower = response.text.lower()
if "captcha" in content_lower or "recaptcha" in content_lower:
return "captcha_triggered"
# Check for block pages
block_signals = [
"access denied",
"you have been blocked",
"suspicious activity",
"please verify you are human",
]
for signal in block_signals:
if signal in content_lower:
return "soft_ban"
# Content length anomaly (block page vs real content)
if len(response.text) < 500 and response.status_code == 200:
return "possible_soft_ban"
return "ok"
Recovery Strategy 1: Automatic Proxy Failover
import requests
import time
from typing import Optional
class BanRecoveryScraper:
def __init__(self, proxies: list[str]):
self.proxies = proxies
self.banned_proxies: set[str] = set()
self.current_index = 0
def get_working_proxy(self) -> Optional[str]:
for _ in range(len(self.proxies)):
proxy = self.proxies[self.current_index % len(self.proxies)]
self.current_index += 1
if proxy not in self.banned_proxies:
return proxy
return None
def fetch(self, url: str) -> Optional[requests.Response]:
for attempt in range(5):
proxy = self.get_working_proxy()
if not proxy:
print("All proxies banned! Waiting 5 minutes...")
time.sleep(300)
self.banned_proxies.clear()
proxy = self.get_working_proxy()
try:
resp = requests.get(
url,
proxies={"http": proxy, "https": proxy},
timeout=15,
)
status = check_ban_status(resp)
if status == "ok":
return resp
elif status == "rate_limited":
retry_after = int(resp.headers.get("Retry-After", 30))
time.sleep(retry_after)
else:
print(f"Ban detected ({status}), rotating proxy")
self.banned_proxies.add(proxy)
except requests.RequestException:
self.banned_proxies.add(proxy)
return None
Recovery Strategy 2: Exponential Backoff
When you have limited IPs, back off exponentially:
import time
import random
def fetch_with_backoff(url: str, max_retries: int = 5) -> str:
for attempt in range(max_retries):
response = requests.get(url, timeout=15)
if response.status_code == 200:
return response.text
# Exponential backoff with jitter
wait_time = (2 ** attempt) + random.uniform(0, 1)
wait_time = min(wait_time, 300) # Cap at 5 minutes
print(f"Attempt {attempt+1} failed. Waiting {wait_time:.1f}s...")
time.sleep(wait_time)
raise Exception(f"Failed after {max_retries} retries")
Prevention Checklist
| Practice | Impact |
|---|---|
| Rotate proxies per request | High |
| Add random delays (1-5s) | High |
| Rotate User-Agents | Medium |
| Use residential proxies | High |
| Limit concurrent requests | Medium |
| Scrape during off-peak hours | Low |
| Maintain session cookies | Medium |
Let ScraperAPI Handle It
ScraperAPI manages an enormous proxy pool and automatically retries with different IPs when bans are detected. You never need to handle IP recovery yourself:
import requests
API_KEY = "YOUR_SCRAPERAPI_KEY"
response = requests.get(
f"http://api.scraperapi.com?api_key={API_KEY}&url=https://example.com"
)
# ScraperAPI retries with fresh IPs automatically
A managed service like ScraperAPI or ScrapingAnt eliminates IP ban headaches entirely.