Scraping Central is reader-supported. When you buy through links on our site, we may earn an affiliate commission.

Converting Scraped Data to Different Formats (CSV, JSON, Excel, SQL)

Export scraped data to CSV, JSON, Excel, SQLite, and other formats using Python. Learn best practices for each format and when to use them.

Data Parsing · #15beginner4 min read
Share:WhatsAppLinkedIn

After scraping and cleaning data, you need to store it in the right format. Each format has trade-offs in readability, size, and downstream compatibility.

Format Comparison

Format Best For Human Readable Supports Nesting File Size
CSV Spreadsheets, simple data Yes No Small
JSON APIs, nested data Yes Yes Medium
Excel Business stakeholders Yes (with formatting) Limited Medium
SQLite Querying, large datasets No Via relations Compact
Parquet Data science, big data No Yes Smallest

Export to CSV

import csv

data = [
    {"name": "ScraperAPI", "price": 49.99, "rating": 4.5, "url": "https://scraperapi.com"},
    {"name": "ScrapingAnt", "price": 29.00, "rating": 4.2, "url": "https://scrapingant.com"},
    {"name": "Bright Data", "price": 99.00, "rating": 4.7, "url": "https://brightdata.com"},
]

# With the csv module
with open("products.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=data[0].keys())
    writer.writeheader()
    writer.writerows(data)

# With pandas (simpler for DataFrames)
import pandas as pd
df = pd.DataFrame(data)
df.to_csv("products_pandas.csv", index=False, encoding="utf-8")

Export to JSON

import json

# Pretty-printed JSON (human-readable)
with open("products.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# JSON Lines format (one JSON object per line, better for streaming)
with open("products.jsonl", "w", encoding="utf-8") as f:
    for item in data:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")

# With pandas
df.to_json("products_pd.json", orient="records", indent=2, force_ascii=False)

Export to Excel

pip install openpyxl
import pandas as pd

df = pd.DataFrame(data)

# Basic Excel export
df.to_excel("products.xlsx", index=False, sheet_name="Products")

# Multiple sheets in one file
with pd.ExcelWriter("scraping_report.xlsx", engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="Products", index=False)

    # Summary sheet
    summary = pd.DataFrame([{
        "total_products": len(df),
        "avg_price": df["price"].mean(),
        "avg_rating": df["rating"].mean(),
    }])
    summary.to_excel(writer, sheet_name="Summary", index=False)

print("Excel report saved with 2 sheets")

Export to SQLite

import sqlite3
import pandas as pd

df = pd.DataFrame(data)

# pandas makes this trivial
conn = sqlite3.connect("scraping.db")
df.to_sql("products", conn, if_exists="replace", index=False)

# Verify the data
result = pd.read_sql("SELECT * FROM products WHERE price < 50", conn)
print(result)
conn.close()

For more control over the schema:

import sqlite3

conn = sqlite3.connect("scraping.db")
cursor = conn.cursor()

cursor.execute("""
    CREATE TABLE IF NOT EXISTS products (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT NOT NULL,
        price REAL,
        rating REAL,
        url TEXT UNIQUE,
        scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
""")

for item in data:
    cursor.execute(
        "INSERT OR REPLACE INTO products (name, price, rating, url) VALUES (?, ?, ?, ?)",
        (item["name"], item["price"], item["rating"], item["url"]),
    )

conn.commit()

# Query it
cursor.execute("SELECT name, price FROM products ORDER BY price ASC")
for row in cursor.fetchall():
    print(f"{row[0]}: ${row[1]}")

conn.close()

Export to Parquet (for Data Science)

pip install pyarrow
import pandas as pd

df = pd.DataFrame(data)
df.to_parquet("products.parquet", index=False)

# Read it back
df_loaded = pd.read_parquet("products.parquet")
print(f"Loaded {len(df_loaded)} rows from Parquet")

Universal Export Function

import pandas as pd
import json
from pathlib import Path

def export_data(data, filename, fmt="csv"):
    """Export scraped data to any supported format."""
    df = pd.DataFrame(data)
    path = Path(filename)

    exporters = {
        "csv": lambda: df.to_csv(path.with_suffix(".csv"), index=False),
        "json": lambda: path.with_suffix(".json").write_text(
            json.dumps(data, indent=2, ensure_ascii=False)
        ),
        "jsonl": lambda: path.with_suffix(".jsonl").write_text(
            "\n".join(json.dumps(r, ensure_ascii=False) for r in data)
        ),
        "excel": lambda: df.to_excel(path.with_suffix(".xlsx"), index=False),
        "parquet": lambda: df.to_parquet(path.with_suffix(".parquet"), index=False),
        "sql": lambda: _export_sql(df, path.stem),
    }

    if fmt not in exporters:
        raise ValueError(f"Unsupported format: {fmt}. Use: {list(exporters.keys())}")

    exporters[fmt]()
    print(f"Exported {len(data)} records as {fmt}")

def _export_sql(df, table_name):
    import sqlite3
    conn = sqlite3.connect(f"{table_name}.db")
    df.to_sql(table_name, conn, if_exists="replace", index=False)
    conn.close()

# Usage
export_data(data, "products", "csv")
export_data(data, "products", "json")
export_data(data, "products", "excel")
export_data(data, "products", "sql")

Choosing the Right Format

Scenario Best Format
Share with non-technical users Excel
Feed into another application JSON or CSV
Store incrementally (append) JSON Lines or SQLite
Large-scale data analysis Parquet
Import into a database CSV or direct SQL insert
Version control friendly JSON Lines (one record per line)

Whether you are scraping with ScraperAPI or ScrapingAnt, these export methods work the same, get your data clean, then save it in whatever format your downstream workflow needs.

Summary

Start with CSV for simplicity, use JSON when you have nested data, Excel for business reports, SQLite when you need queries, and Parquet for large analytical datasets. The universal export function above covers all common cases.