Building a Price Monitoring Scraper - Python Scraping

Build a complete price monitoring scraper that tracks product prices over time, detects price drops, and sends alerts. A real-world scraping project.

Price monitoring is one of the most practical applications of web scraping. In this tutorial, you will build a scraper that tracks product prices over time, stores historical data, and detects price drops.

Project Structure

price_monitor/
    monitor.py
    prices.db

The Complete Price Monitor

import requests
from bs4 import BeautifulSoup
import sqlite3
import json
from datetime import datetime
from dataclasses import dataclass


@dataclass
class Product:
    url: str
    name: str
    price: float
    currency: str


class PriceMonitor:
    def __init__(self, db_path="prices.db"):
        self.db_path = db_path
        self.session = requests.Session()
        self.session.headers["User-Agent"] = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        )
        self._init_db()

    def _init_db(self):
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS products (
                    url TEXT PRIMARY KEY,
                    name TEXT,
                    current_price REAL,
                    lowest_price REAL,
                    currency TEXT,
                    last_checked TEXT
                )
            """)
            conn.execute("""
                CREATE TABLE IF NOT EXISTS price_history (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    url TEXT,
                    price REAL,
                    checked_at TEXT,
                    FOREIGN KEY (url) REFERENCES products(url)
                )
            """)

    def scrape_product(self, url):
        """Scrape product details from books.toscrape.com."""
        response = self.session.get(url, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        name = soup.select_one("h1")
        price_tag = soup.select_one("p.price_color")

        if not name or not price_tag:
            raise ValueError(f"Could not parse product at {url}")

        price_text = price_tag.get_text(strip=True)
        currency = price_text[0]  # e.g., "£"
        price = float(price_text[1:])

        return Product(url=url, name=name.get_text(strip=True),
                       price=price, currency=currency)

    def check_price(self, url):
        """Scrape current price and compare with stored data."""
        product = self.scrape_product(url)
        now = datetime.now().isoformat()

        with sqlite3.connect(self.db_path) as conn:
            # Get previous price
            row = conn.execute(
                "SELECT current_price, lowest_price FROM products WHERE url = ?",
                (url,)
            ).fetchone()

            previous_price = row[0] if row else None
            lowest_price = row[1] if row else product.price

            # Update product record
            new_lowest = min(lowest_price, product.price)
            conn.execute("""
                INSERT INTO products (url, name, current_price, lowest_price, currency, last_checked)
                VALUES (?, ?, ?, ?, ?, ?)
                ON CONFLICT(url) DO UPDATE SET
                    current_price = ?,
                    lowest_price = ?,
                    last_checked = ?
            """, (url, product.name, product.price, new_lowest, product.currency, now,
                  product.price, new_lowest, now))

            # Record price history
            conn.execute(
                "INSERT INTO price_history (url, price, checked_at) VALUES (?, ?, ?)",
                (url, product.price, now)
            )

        # Detect price changes
        if previous_price is not None and product.price < previous_price:
            drop = previous_price - product.price
            pct = (drop / previous_price) * 100
            print(f"PRICE DROP: {product.name}")
            print(f"  {product.currency}{previous_price:.2f} -> {product.currency}{product.price:.2f} (-{pct:.1f}%)")
        else:
            print(f"Checked: {product.name} - {product.currency}{product.price:.2f}")

        return product

    def get_history(self, url):
        """Get price history for a product."""
        with sqlite3.connect(self.db_path) as conn:
            rows = conn.execute(
                "SELECT price, checked_at FROM price_history WHERE url = ? ORDER BY checked_at",
                (url,)
            ).fetchall()
        return [{"price": r[0], "date": r[1]} for r in rows]

    def check_all(self, urls):
        """Check prices for multiple products."""
        results = []
        for url in urls:
            try:
                product = self.check_price(url)
                results.append(product)
            except Exception as e:
                print(f"Error checking {url}: {e}")
        return results


# Usage
if __name__ == "__main__":
    monitor = PriceMonitor()

    products = [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
    ]

    monitor.check_all(products)

    # View price history
    for url in products:
        history = monitor.get_history(url)
        if history:
            print(f"\nHistory for {url.split('/')[-2]}:")
            for entry in history[-5:]:
                print(f"  {entry['date'][:10]}: ${entry['price']:.2f}")

Running on a Schedule

Use a cron job or Python scheduler to run checks automatically.

# Check prices every 6 hours (add to crontab with `crontab -e`)
0 */6 * * * cd /path/to/price_monitor && python monitor.py

Tips

Run price checks at consistent intervals (e.g., every 6 or 12 hours) to build reliable historical data.
Use ScraperAPI to avoid getting blocked when monitoring prices across many products on the same site.
Store the raw HTML alongside price data so you can re-parse if your selectors break.
Add email or Slack notifications when a price drops below a threshold.

Next Steps

Add concurrent scraping to monitor hundreds of products efficiently
Extend the scraper to support multiple e-commerce sites