Mimicking Human Behavior in Scrapers
Learn techniques to make your web scraper behave like a human user to avoid detection by anti-bot systems.
Anti-Detection · #14intermediate3 min read
Advanced anti-bot systems analyze user behavior, mouse movements, scroll patterns, typing speed, and navigation flow. A scraper that instantly teleports between pages without any interaction is easy to detect.
What Anti-Bot Systems Track
- Mouse movements, real humans move the mouse in curves, not straight lines
- Scroll behavior, humans scroll gradually, not instantly to the bottom
- Click patterns, humans do not click at pixel-perfect coordinates every time
- Timing, humans pause to read content, bots process instantly
- Navigation flow, humans visit the homepage first, then navigate to inner pages
Adding Realistic Mouse Movement
from playwright.sync_api import sync_playwright
import random
import time
def human_mouse_move(page, target_x, target_y, steps=10):
"""Move the mouse in a natural curve to the target position."""
current = page.evaluate("() => ({x: 0, y: 0})")
start_x, start_y = current["x"], current["y"]
for i in range(steps):
progress = (i + 1) / steps
# Add slight randomness to create a curve
noise_x = random.gauss(0, 3)
noise_y = random.gauss(0, 3)
x = start_x + (target_x - start_x) * progress + noise_x
y = start_y + (target_y - start_y) * progress + noise_y
page.mouse.move(x, y)
time.sleep(random.uniform(0.01, 0.05))
def human_click(page, selector):
"""Click an element with human-like behavior."""
element = page.query_selector(selector)
box = element.bounding_box()
# Click at a random position within the element (not dead center)
x = box["x"] + random.uniform(box["width"] * 0.2, box["width"] * 0.8)
y = box["y"] + random.uniform(box["height"] * 0.2, box["height"] * 0.8)
human_mouse_move(page, x, y)
time.sleep(random.uniform(0.05, 0.15))
page.mouse.click(x, y)
Human-Like Scrolling
def human_scroll(page, direction="down", distance=None):
"""Scroll the page like a human would."""
if distance is None:
distance = random.randint(200, 600)
# Scroll in small increments
scrolled = 0
while scrolled < distance:
increment = random.randint(30, 100)
delta = increment if direction == "down" else -increment
page.mouse.wheel(0, delta)
scrolled += increment
time.sleep(random.uniform(0.02, 0.08))
# Pause after scrolling (reading time)
time.sleep(random.uniform(0.5, 2.0))
Complete Human-Like Scraping Session
from playwright.sync_api import sync_playwright
import random
import time
def scrape_like_human(url):
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36",
)
page = context.new_page()
# 1. Visit homepage first (humans rarely go directly to deep pages)
base_url = url.split("/")[0] + "//" + url.split("/")[2]
page.goto(base_url, wait_until="domcontentloaded")
time.sleep(random.uniform(1.5, 3.0))
# 2. Move mouse around
page.mouse.move(random.randint(100, 800), random.randint(100, 400))
time.sleep(random.uniform(0.5, 1.5))
# 3. Scroll down a bit
for _ in range(random.randint(1, 3)):
page.mouse.wheel(0, random.randint(100, 400))
time.sleep(random.uniform(0.5, 2.0))
# 4. Now navigate to the target page
page.goto(url, wait_until="networkidle")
time.sleep(random.uniform(1.0, 2.5))
# 5. Scroll through the content
for _ in range(random.randint(2, 5)):
page.mouse.wheel(0, random.randint(150, 500))
time.sleep(random.uniform(0.8, 2.0))
content = page.content()
browser.close()
return content
Timing Patterns
| Action | Human Timing | Bot Timing |
|---|---|---|
| Between page loads | 3-15 seconds | < 100ms |
| Reading a page | 5-60 seconds | 0 seconds |
| Scrolling | Gradual over 2-5s | Instant |
| Form filling | 1-3s per field | Instant |
When to Skip Behavioral Simulation
Behavioral simulation is only necessary for browser-based scraping against sophisticated anti-bot systems. For simpler targets, ScraperAPI handles the detection evasion without you needing to simulate human behavior at all.