How to Scrape TripAdvisor Reviews and Ratings

A guide to scraping TripAdvisor hotel and restaurant reviews, ratings, and pricing data using Python and web scraping APIs.

TripAdvisor is a goldmine of travel data with millions of reviews for hotels, restaurants, and attractions. Scraping this data enables sentiment analysis, price comparison, and travel market research.

Challenges

TripAdvisor's protections include:

Cloudflare-based bot detection
Rate limiting and IP blocking
Lazy-loaded review content
Pagination requiring interaction

Scraping TripAdvisor with ScraperAPI

import requests
from bs4 import BeautifulSoup
import time

API_KEY = "YOUR_SCRAPERAPI_KEY"

def scrape_tripadvisor_hotel(hotel_url, review_pages=3):
    hotel_data = {"reviews": []}

    # Get hotel overview
    response = requests.get("https://api.scraperapi.com", params={
        "api_key": API_KEY,
        "url": hotel_url,
        "render": "true"
    })

    soup = BeautifulSoup(response.text, "html.parser")

    name = soup.select_one('[data-test-target="top-info-header"]')
    rating = soup.select_one('[class*="overallRating"]')

    hotel_data["name"] = name.text.strip() if name else "N/A"
    hotel_data["rating"] = rating.text.strip() if rating else "N/A"

    # Scrape reviews across pages
    for page in range(review_pages):
        offset = page * 10
        review_url = hotel_url.replace("-Reviews-", f"-Reviews-or{offset}-")

        response = requests.get("https://api.scraperapi.com", params={
            "api_key": API_KEY,
            "url": review_url,
            "render": "true"
        })

        soup = BeautifulSoup(response.text, "html.parser")
        reviews = soup.select('[data-test-target="reviews-tab"] [class*="review-container"]')

        for review in reviews:
            title_el = review.select_one('[data-test-target="review-title"]')
            text_el = review.select_one('[class*="reviewText"]')
            bubble = review.select_one('[class*="bubble_rating"]')

            hotel_data["reviews"].append({
                "title": title_el.text.strip() if title_el else "N/A",
                "text": text_el.text.strip() if text_el else "N/A",
                "rating": bubble.get("class", [""])[1] if bubble else "N/A"
            })

        time.sleep(3)

    return hotel_data

hotel = scrape_tripadvisor_hotel(
    "https://www.tripadvisor.com/Hotel_Review-g60763-d93437-Reviews-Hotel_Example-New_York.html"
)
print(f"{hotel['name']} - Rating: {hotel['rating']}")
print(f"Scraped {len(hotel['reviews'])} reviews")

Scraping Restaurant Data

def scrape_tripadvisor_restaurants(location_url):
    response = requests.get("https://api.scrapingant.com/v2/general", params={
        "x-api-key": "YOUR_SCRAPINGANT_KEY",
        "url": location_url,
        "browser": "true",
        "proxy_type": "residential"
    })

    html = response.json()["content"]
    soup = BeautifulSoup(html, "html.parser")

    restaurants = []
    for card in soup.select('[data-test*="restaurant"]'):
        name = card.select_one("a[class*='name']")
        rating = card.select_one('[class*="rating"]')
        cuisine = card.select_one('[class*="cuisine"]')

        if name:
            restaurants.append({
                "name": name.text.strip(),
                "rating": rating.text.strip() if rating else "N/A",
                "cuisine": cuisine.text.strip() if cuisine else "N/A"
            })

    return restaurants

Data You Can Extract

Hotel/restaurant name, address, and contact info
Overall rating and individual review scores
Review text, title, date, and reviewer info
Price range and amenities
Photos and room types
Ranking within destination

Best Practices

Use residential proxies, TripAdvisor blocks datacenter IPs aggressively
Enable JavaScript rendering, reviews are dynamically loaded
Add 3-5 second delays between requests
Handle Cloudflare challenges via ScraperAPI or ScrapingAnt
Monitor for HTML changes, TripAdvisor updates its structure frequently

Verdict

TripAdvisor scraping requires robust anti-bot handling. ScraperAPI with residential proxies and JavaScript rendering delivers the most consistent results. ScrapingAnt's headless Chrome approach is equally effective. Both are strongly recommended over attempting raw proxy rotation yourself.