Scraping Central is reader-supported. When you buy through links on our site, we may earn an affiliate commission.

Building a Price Monitoring Scraper

Build a complete price monitoring scraper that tracks product prices over time, detects price drops, and sends alerts. A real-world scraping project.

Python Scraping · #20intermediate4 min read
Share:WhatsAppLinkedIn

Price monitoring is one of the most practical applications of web scraping. In this tutorial, you will build a scraper that tracks product prices over time, stores historical data, and detects price drops.

Project Structure

price_monitor/
    monitor.py
    prices.db

The Complete Price Monitor

import requests
from bs4 import BeautifulSoup
import sqlite3
import json
from datetime import datetime
from dataclasses import dataclass


@dataclass
class Product:
    url: str
    name: str
    price: float
    currency: str


class PriceMonitor:
    def __init__(self, db_path="prices.db"):
        self.db_path = db_path
        self.session = requests.Session()
        self.session.headers["User-Agent"] = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        )
        self._init_db()

    def _init_db(self):
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS products (
                    url TEXT PRIMARY KEY,
                    name TEXT,
                    current_price REAL,
                    lowest_price REAL,
                    currency TEXT,
                    last_checked TEXT
                )
            """)
            conn.execute("""
                CREATE TABLE IF NOT EXISTS price_history (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    url TEXT,
                    price REAL,
                    checked_at TEXT,
                    FOREIGN KEY (url) REFERENCES products(url)
                )
            """)

    def scrape_product(self, url):
        """Scrape product details from books.toscrape.com."""
        response = self.session.get(url, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        name = soup.select_one("h1")
        price_tag = soup.select_one("p.price_color")

        if not name or not price_tag:
            raise ValueError(f"Could not parse product at {url}")

        price_text = price_tag.get_text(strip=True)
        currency = price_text[0]  # e.g., "£"
        price = float(price_text[1:])

        return Product(url=url, name=name.get_text(strip=True),
                       price=price, currency=currency)

    def check_price(self, url):
        """Scrape current price and compare with stored data."""
        product = self.scrape_product(url)
        now = datetime.now().isoformat()

        with sqlite3.connect(self.db_path) as conn:
            # Get previous price
            row = conn.execute(
                "SELECT current_price, lowest_price FROM products WHERE url = ?",
                (url,)
            ).fetchone()

            previous_price = row[0] if row else None
            lowest_price = row[1] if row else product.price

            # Update product record
            new_lowest = min(lowest_price, product.price)
            conn.execute("""
                INSERT INTO products (url, name, current_price, lowest_price, currency, last_checked)
                VALUES (?, ?, ?, ?, ?, ?)
                ON CONFLICT(url) DO UPDATE SET
                    current_price = ?,
                    lowest_price = ?,
                    last_checked = ?
            """, (url, product.name, product.price, new_lowest, product.currency, now,
                  product.price, new_lowest, now))

            # Record price history
            conn.execute(
                "INSERT INTO price_history (url, price, checked_at) VALUES (?, ?, ?)",
                (url, product.price, now)
            )

        # Detect price changes
        if previous_price is not None and product.price < previous_price:
            drop = previous_price - product.price
            pct = (drop / previous_price) * 100
            print(f"PRICE DROP: {product.name}")
            print(f"  {product.currency}{previous_price:.2f} -> {product.currency}{product.price:.2f} (-{pct:.1f}%)")
        else:
            print(f"Checked: {product.name} - {product.currency}{product.price:.2f}")

        return product

    def get_history(self, url):
        """Get price history for a product."""
        with sqlite3.connect(self.db_path) as conn:
            rows = conn.execute(
                "SELECT price, checked_at FROM price_history WHERE url = ? ORDER BY checked_at",
                (url,)
            ).fetchall()
        return [{"price": r[0], "date": r[1]} for r in rows]

    def check_all(self, urls):
        """Check prices for multiple products."""
        results = []
        for url in urls:
            try:
                product = self.check_price(url)
                results.append(product)
            except Exception as e:
                print(f"Error checking {url}: {e}")
        return results


# Usage
if __name__ == "__main__":
    monitor = PriceMonitor()

    products = [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
    ]

    monitor.check_all(products)

    # View price history
    for url in products:
        history = monitor.get_history(url)
        if history:
            print(f"\nHistory for {url.split('/')[-2]}:")
            for entry in history[-5:]:
                print(f"  {entry['date'][:10]}: ${entry['price']:.2f}")

Running on a Schedule

Use a cron job or Python scheduler to run checks automatically.

# Check prices every 6 hours (add to crontab with `crontab -e`)
0 */6 * * * cd /path/to/price_monitor && python monitor.py

Tips

  • Run price checks at consistent intervals (e.g., every 6 or 12 hours) to build reliable historical data.
  • Use ScraperAPI to avoid getting blocked when monitoring prices across many products on the same site.
  • Store the raw HTML alongside price data so you can re-parse if your selectors break.
  • Add email or Slack notifications when a price drops below a threshold.

Next Steps

  • Add concurrent scraping to monitor hundreds of products efficiently
  • Extend the scraper to support multiple e-commerce sites