Tutorial
How to Scrape Pinterest Images and Pins
Learn how to scrape Pinterest images, pin data, and board information using Python. Covers API approaches, browser scraping, and image downloading.
Pinterest is a valuable source of visual content data for market research, design inspiration analysis, and trend tracking. Here is how to scrape it effectively.
What You Can Extract from Pinterest
- Pin images (multiple resolutions)
- Pin descriptions and titles
- Board names and pin counts
- User profiles and follower data
- Related pins and recommendations
Method 1: ScraperAPI (Recommended)
Pinterest uses heavy JavaScript rendering and bot detection. ScraperAPI handles both.
import requests
from bs4 import BeautifulSoup
import json
API_KEY = "YOUR_SCRAPERAPI_KEY"
response = requests.get(
"http://api.scraperapi.com",
params={
"api_key": API_KEY,
"url": "https://www.pinterest.com/search/pins/?q=web+design",
"render": "true"
}
)
soup = BeautifulSoup(response.text, "html.parser")
# Pinterest embeds data in script tags
scripts = soup.find_all("script", {"type": "application/json"})
for script in scripts:
try:
data = json.loads(script.string)
print(json.dumps(data, indent=2)[:500])
except:
pass
Method 2: Pinterest's Internal API
Pinterest's web app uses an internal API that returns JSON.
from curl_cffi import requests
session = requests.Session(impersonate="chrome136")
# Search for pins
search_url = "https://www.pinterest.com/resource/BaseSearchResource/get/"
params = {
"source_url": "/search/pins/?q=web%20design",
"data": '{"options":{"query":"web design","scope":"pins","page_size":25}}'
}
response = session.get(
search_url,
params=params,
headers={
"X-Requested-With": "XMLHttpRequest",
"Referer": "https://www.pinterest.com/"
}
)
if response.status_code == 200:
data = response.json()
results = data.get("resource_response", {}).get("data", {}).get("results", [])
for pin in results[:5]:
print(f"Title: {pin.get('title', 'N/A')}")
print(f"Image: {pin.get('images', {}).get('orig', {}).get('url', 'N/A')}")
print("---")
Method 3: Playwright for Dynamic Content
from playwright.sync_api import sync_playwright
import json
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
images = []
def capture_response(response):
if "resource/BaseSearchResource" in response.url:
try:
data = response.json()
results = data["resource_response"]["data"]["results"]
for pin in results:
img = pin.get("images", {}).get("orig", {}).get("url")
if img:
images.append(img)
except:
pass
page.on("response", capture_response)
page.goto("https://www.pinterest.com/search/pins/?q=web+design")
# Scroll to load more pins
for _ in range(3):
page.mouse.wheel(0, 3000)
page.wait_for_timeout(2000)
print(f"Collected {len(images)} image URLs")
browser.close()
Downloading Images
import os
import requests
def download_images(image_urls, output_dir="pinterest_images"):
os.makedirs(output_dir, exist_ok=True)
for i, url in enumerate(image_urls):
response = requests.get(url)
if response.status_code == 200:
ext = url.split(".")[-1].split("?")[0]
filepath = os.path.join(output_dir, f"pin_{i}.{ext}")
with open(filepath, "wb") as f:
f.write(response.content)
print(f"Downloaded: {filepath}")
Key Challenges
- Pinterest heavily relies on infinite scroll, requiring browser-based scraping for large datasets
- Rate limiting is aggressive on unauthenticated requests
- Image URLs contain expiring tokens on some endpoints
- The internal API structure changes periodically
For production Pinterest scraping, ScraperAPI with rendering is the most maintainable approach.