Hands-On: SERP API in Python, A Complete Walkthrough
Sign up, get a key, run your first query, parse the JSON, persist to a database. A complete production-shaped Python walkthrough.
What you’ll learn
- Make your first SERP-API call in Python.
- Handle pagination, rate limits, and retries.
- Parse a SERP response into normalized records.
- Persist to SQLite (or any DB) for repeatable analysis.
This is the end-to-end walkthrough. Pick any provider (the code below uses a generic shape, adjust URLs and field names to match yours). By the end you'll have a working SERP scraper that runs daily, persists results, and survives transient failures.
Step 1, get a key
Sign up at your chosen provider's free tier (lesson 3.32 has the list). Take note of:
- The base URL (e.g.
https://api.example-serp.com/search). - Your API key (passed as
api_keyparameter orAuthorizationheader). - The rate limit (free tier usually ~5 requests/second).
Set as env var:
export SERP_API_KEY="your_key_here"
Step 2, first call
# serp_basic.py
import os, requests
API_URL = "https://api.example-serp.com/search"
def search(query: str, **params) -> dict:
r = requests.get(API_URL, params={
"q": query,
"api_key": os.environ["SERP_API_KEY"],
"engine": "google",
"gl": "us",
"hl": "en",
**params,
}, timeout=30)
r.raise_for_status()
return r.json()
data = search("python web scraping")
print("Organic results:", len(data.get("organic_results", [])))
print("First title:", data["organic_results"][0]["title"])
Run it. You should see JSON in seconds.
Step 3, wrap in a class
# serp_client.py
import os, requests, time, random
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class SerpClient:
API_URL = "https://api.example-serp.com/search"
def __init__(self, api_key: str | None = None, timeout: float = 30.0):
self.api_key = api_key or os.environ["SERP_API_KEY"]
self.timeout = timeout
self.session = requests.Session()
retry = Retry(
total=5,
backoff_factor=1.0,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET"],
)
self.session.mount("https://", HTTPAdapter(max_retries=retry))
def search(self, q: str, **params) -> dict:
all_params = {
"q": q,
"api_key": self.api_key,
"engine": "google",
"gl": "us",
"hl": "en",
**params,
}
r = self.session.get(self.API_URL, params=all_params, timeout=self.timeout)
r.raise_for_status()
return r.json()
Step 4, normalize the response
Different providers return slightly different shapes. Convert to your internal shape immediately:
# normalize.py
def normalize_serp(data: dict) -> dict:
return {
"organic": [
{
"position": r.get("position"),
"title": r.get("title"),
"url": r.get("link"),
"snippet": r.get("snippet"),
"domain": _domain(r.get("link", "")),
}
for r in data.get("organic_results", [])
],
"ads": [
{
"position": a.get("position"),
"title": a.get("title"),
"url": a.get("link"),
"block": a.get("block_position"),
}
for a in data.get("ads", [])
],
"ai_overview": _normalize_ai(data.get("ai_overview")),
"knowledge_graph": _normalize_kg(data.get("knowledge_graph")),
"local_pack": [
{
"place_id": p.get("place_id"),
"name": p.get("title"),
"rating": p.get("rating"),
"phone": p.get("phone"),
"address": p.get("address"),
}
for p in (data.get("local_results", {}) or {}).get("places", [])
],
}
def _domain(url: str) -> str:
from urllib.parse import urlparse
return urlparse(url).netloc.lower()
def _normalize_ai(ao):
if not ao: return None
return {
"text": ao.get("text"),
"sources": [
{"domain": _domain(s.get("link", "")), "rank": i + 1, "url": s.get("link")}
for i, s in enumerate(ao.get("sources", []))
],
}
def _normalize_kg(kg):
if not kg: return None
return {
"title": kg.get("title"),
"type": kg.get("type"),
"description": kg.get("description"),
}
Step 5, persist to SQLite
# storage.py
import sqlite3, json
from contextlib import contextmanager
DB_PATH = "serp.db"
@contextmanager
def db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
try:
yield conn
conn.commit()
finally:
conn.close()
def init_db():
with db() as conn:
conn.executescript("""
CREATE TABLE IF NOT EXISTS serp (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query TEXT NOT NULL,
gl TEXT NOT NULL,
hl TEXT NOT NULL,
device TEXT NOT NULL,
collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
raw_json TEXT NOT NULL,
normalized_json TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_serp_query ON serp (query, gl, hl, device);
""")
def save_serp(query, gl, hl, device, raw, normalized):
with db() as conn:
conn.execute(
"INSERT INTO serp (query, gl, hl, device, raw_json, normalized_json) "
"VALUES (?, ?, ?, ?, ?, ?)",
(query, gl, hl, device, json.dumps(raw), json.dumps(normalized))
)
Step 6, the runner
# run_daily.py
from serp_client import SerpClient
from normalize import normalize_serp
from storage import init_db, save_serp
import time
KEYWORDS = [
"python web scraping",
"api scraping tutorial",
"best scraping tools 2026",
# ... your list
]
LOCALES = [("us", "en", "desktop"), ("us", "en", "mobile")]
def run():
init_db()
client = SerpClient()
for kw in KEYWORDS:
for gl, hl, device in LOCALES:
try:
data = client.search(kw, gl=gl, hl=hl, device=device)
norm = normalize_serp(data)
save_serp(kw, gl, hl, device, data, norm)
print(f"OK: {kw} | {gl}/{hl}/{device}")
time.sleep(0.3) # polite throttle
except Exception as e:
print(f"ERR: {kw} | {e}")
if __name__ == "__main__":
run()
Cron it nightly:
0 2 * * * cd /path/to/scraper && python run_daily.py >> serp.log 2>&1
Step 7, analysis queries
Once you have a few weeks of data:
# rank_for_domain.py
from storage import db
import json
def my_rank(domain: str, query: str):
with db() as conn:
rows = conn.execute(
"SELECT collected_at, normalized_json FROM serp "
"WHERE query = ? ORDER BY collected_at DESC LIMIT 30",
(query,)
).fetchall()
for row in rows:
org = json.loads(row["normalized_json"])["organic"]
rank = next((r["position"] for r in org if domain in r["domain"]), None)
print(f"{row['collected_at']}: {rank}")
my_rank("scrapingcentral.com", "python web scraping")
Step 8, alerting
A simple "rank dropped" alert:
def alert_on_drops(domain: str, query: str, threshold: int = 3):
"""Alert if rank dropped by `threshold` positions vs 7 days ago."""
with db() as conn:
today, week_ago = conn.execute(
"""SELECT
MAX(CASE WHEN collected_at > datetime('now', '-1 day') THEN normalized_json END) AS today,
MAX(CASE WHEN collected_at BETWEEN datetime('now', '-8 days') AND datetime('now', '-6 days') THEN normalized_json END) AS week_ago
FROM serp WHERE query = ?""",
(query,)
).fetchone()
if not (today and week_ago): return
today_rank = next((r["position"] for r in json.loads(today)["organic"] if domain in r["domain"]), 999)
week_rank = next((r["position"] for r in json.loads(week_ago)["organic"] if domain in r["domain"]), 999)
if today_rank - week_rank >= threshold:
# send email, Slack, etc.
print(f"ALERT: {query} dropped from {week_rank} to {today_rank}")
What you've built
In this lesson you went from "I have an API key" to a daily SEO dashboard pipeline:
- Sign in once, key in env.
- Wrap the API in a robust client with retries.
- Normalize responses to your internal shape.
- Persist raw + normalized to SQLite.
- Schedule daily collection.
- Query for rank, trends, drops.
- Alert on regression.
It's the minimum-viable SEO platform. Real products add: dashboards (Streamlit, Metabase), more sophisticated alerting (PagerDuty), multi-tenant isolation, etc.
Hands-on lab
Pick a SERP-API provider's free tier. Implement the seven files above. Run daily for a week. Query the database. See your real ranking trends. You've crossed from "I know what a SERP API is" to "I've shipped one in production."
Quiz, check your understanding
Pass mark is 70%. Pick the best answer; you’ll see the explanation right after.