Building a Price Monitoring Scraper
Build a complete price monitoring scraper that tracks product prices over time, detects price drops, and sends alerts. A real-world scraping project.
Python Scraping · #20intermediate4 min read
Price monitoring is one of the most practical applications of web scraping. In this tutorial, you will build a scraper that tracks product prices over time, stores historical data, and detects price drops.
Project Structure
price_monitor/
monitor.py
prices.db
The Complete Price Monitor
import requests
from bs4 import BeautifulSoup
import sqlite3
import json
from datetime import datetime
from dataclasses import dataclass
@dataclass
class Product:
url: str
name: str
price: float
currency: str
class PriceMonitor:
def __init__(self, db_path="prices.db"):
self.db_path = db_path
self.session = requests.Session()
self.session.headers["User-Agent"] = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
)
self._init_db()
def _init_db(self):
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS products (
url TEXT PRIMARY KEY,
name TEXT,
current_price REAL,
lowest_price REAL,
currency TEXT,
last_checked TEXT
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS price_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT,
price REAL,
checked_at TEXT,
FOREIGN KEY (url) REFERENCES products(url)
)
""")
def scrape_product(self, url):
"""Scrape product details from books.toscrape.com."""
response = self.session.get(url, timeout=15)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
name = soup.select_one("h1")
price_tag = soup.select_one("p.price_color")
if not name or not price_tag:
raise ValueError(f"Could not parse product at {url}")
price_text = price_tag.get_text(strip=True)
currency = price_text[0] # e.g., "£"
price = float(price_text[1:])
return Product(url=url, name=name.get_text(strip=True),
price=price, currency=currency)
def check_price(self, url):
"""Scrape current price and compare with stored data."""
product = self.scrape_product(url)
now = datetime.now().isoformat()
with sqlite3.connect(self.db_path) as conn:
# Get previous price
row = conn.execute(
"SELECT current_price, lowest_price FROM products WHERE url = ?",
(url,)
).fetchone()
previous_price = row[0] if row else None
lowest_price = row[1] if row else product.price
# Update product record
new_lowest = min(lowest_price, product.price)
conn.execute("""
INSERT INTO products (url, name, current_price, lowest_price, currency, last_checked)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
current_price = ?,
lowest_price = ?,
last_checked = ?
""", (url, product.name, product.price, new_lowest, product.currency, now,
product.price, new_lowest, now))
# Record price history
conn.execute(
"INSERT INTO price_history (url, price, checked_at) VALUES (?, ?, ?)",
(url, product.price, now)
)
# Detect price changes
if previous_price is not None and product.price < previous_price:
drop = previous_price - product.price
pct = (drop / previous_price) * 100
print(f"PRICE DROP: {product.name}")
print(f" {product.currency}{previous_price:.2f} -> {product.currency}{product.price:.2f} (-{pct:.1f}%)")
else:
print(f"Checked: {product.name} - {product.currency}{product.price:.2f}")
return product
def get_history(self, url):
"""Get price history for a product."""
with sqlite3.connect(self.db_path) as conn:
rows = conn.execute(
"SELECT price, checked_at FROM price_history WHERE url = ? ORDER BY checked_at",
(url,)
).fetchall()
return [{"price": r[0], "date": r[1]} for r in rows]
def check_all(self, urls):
"""Check prices for multiple products."""
results = []
for url in urls:
try:
product = self.check_price(url)
results.append(product)
except Exception as e:
print(f"Error checking {url}: {e}")
return results
# Usage
if __name__ == "__main__":
monitor = PriceMonitor()
products = [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
]
monitor.check_all(products)
# View price history
for url in products:
history = monitor.get_history(url)
if history:
print(f"\nHistory for {url.split('/')[-2]}:")
for entry in history[-5:]:
print(f" {entry['date'][:10]}: ${entry['price']:.2f}")
Running on a Schedule
Use a cron job or Python scheduler to run checks automatically.
# Check prices every 6 hours (add to crontab with `crontab -e`)
0 */6 * * * cd /path/to/price_monitor && python monitor.py
Tips
- Run price checks at consistent intervals (e.g., every 6 or 12 hours) to build reliable historical data.
- Use ScraperAPI to avoid getting blocked when monitoring prices across many products on the same site.
- Store the raw HTML alongside price data so you can re-parse if your selectors break.
- Add email or Slack notifications when a price drops below a threshold.
Next Steps
- Add concurrent scraping to monitor hundreds of products efficiently
- Extend the scraper to support multiple e-commerce sites