Docs / Python

Python Integration Guide

Everything you need to scrape anti-bot protected sites with Python. From basic requests to batch scraping with BeautifulSoup parsing.

Prerequisites: Python 3.7+ and pip install requests beautifulsoup4

Sync Scraping

The simplest way to scrape a single URL. The API waits for the page to load and returns the HTML directly.

import requests

response = requests.post(
    "https://api.ultrawebscrapingapi.com/v1/scrape",
    headers={"X-API-Key": "your_api_key"},
    json={
        "urls": [{"url": "https://example.com"}],
        "mode": "sync"
    }
)

data = response.json()
print(data["html"])

Async Scraping

For multiple URLs, use async mode. Submit all URLs at once, then poll for results.

import requests
import time

# 1. Submit scrape job
response = requests.post(
    "https://api.ultrawebscrapingapi.com/v1/scrape",
    headers={"X-API-Key": "your_api_key"},
    json={
        "urls": [
            {"url": "https://site-a.com/page1"},
            {"url": "https://site-b.com/page2"},
            {"url": "https://site-c.com/page3"},
        ],
        "mode": "async"
    }
)
subscription_id = response.json()["subscriptionId"]

# 2. Poll for completion
while True:
    status = requests.get(
        f"https://api.ultrawebscrapingapi.com/v1/subscription/{subscription_id}",
        headers={"X-API-Key": "your_api_key"},
    ).json()

    print(f"Completed: {status['completed']}/{status['total']}")

    if status["processing"] == 0 and status["queued"] == 0:
        break
    time.sleep(5)

# 3. Fetch results
for job in status["jobs"]:
    if job["status"] == "completed":
        result = requests.get(
            f"https://api.ultrawebscrapingapi.com/v1/result/{subscription_id}/{job['index']}",
            headers={"X-API-Key": "your_api_key"},
        ).json()
        print(f"URL: {result['url']}")
        print(f"Title: {result['title']}")
        print(f"HTML length: {len(result['html'])} chars")

Wait for Elements

Some pages load content dynamically after the initial page load. Use waitForSelector to wait for a specific CSS selector, and waitFor for additional delay.

# Wait for a specific element to appear before capturing HTML
response = requests.post(
    "https://api.ultrawebscrapingapi.com/v1/scrape",
    headers={"X-API-Key": "your_api_key"},
    json={
        "urls": [{
            "url": "https://example.com/products",
            "waitForSelector": ".product-list",
            "waitFor": 3000  # additional 3s wait after selector found
        }],
        "mode": "sync"
    }
)

Batch Scraping

Scrape up to 100 URLs per batch and fetch results in parallel with ThreadPoolExecutor.

import requests
import time
from concurrent.futures import ThreadPoolExecutor

API_KEY = "your_api_key"
BASE = "https://api.ultrawebscrapingapi.com/v1"

urls = [
    "https://site-a.com/page1",
    "https://site-a.com/page2",
    "https://site-b.com/page1",
    # ... up to 100 URLs per batch
]

# 1. Submit batch
response = requests.post(
    f"{BASE}/scrape",
    headers={"X-API-Key": API_KEY},
    json={
        "urls": [{"url": u} for u in urls],
        "mode": "async"
    }
)
sub_id = response.json()["subscriptionId"]

# 2. Wait for completion
while True:
    status = requests.get(
        f"{BASE}/subscription/{sub_id}",
        headers={"X-API-Key": API_KEY},
    ).json()
    if status["processing"] == 0 and status["queued"] == 0:
        break
    time.sleep(5)

# 3. Fetch all results in parallel
def fetch_result(job):
    if job["status"] != "completed":
        return None
    return requests.get(
        f"{BASE}/result/{sub_id}/{job['index']}",
        headers={"X-API-Key": API_KEY},
    ).json()

with ThreadPoolExecutor(max_workers=10) as pool:
    results = list(pool.map(fetch_result, status["jobs"]))

for r in results:
    if r:
        print(f"{r['url']} — {len(r['html'])} chars")

Parse with BeautifulSoup

Combine UltraWebScrapingAPI with BeautifulSoup for complete scrape-and-parse workflows.

import requests
from bs4 import BeautifulSoup

# Scrape and parse in one flow
response = requests.post(
    "https://api.ultrawebscrapingapi.com/v1/scrape",
    headers={"X-API-Key": "your_api_key"},
    json={
        "urls": [{"url": "https://example.com/products"}],
        "mode": "sync"
    }
)

html = response.json()["html"]

# Parse with BeautifulSoup
soup = BeautifulSoup(html, "html.parser")

# Extract product data
products = []
for item in soup.select(".product-card"):
    products.append({
        "name": item.select_one(".product-name").text.strip(),
        "price": item.select_one(".product-price").text.strip(),
        "url": item.select_one("a")["href"],
    })

print(f"Found {len(products)} products")
for p in products:
    print(f"  {p['name']} — {p['price']}")

Webhooks

Instead of polling, register a webhook to get notified when scraping completes. Uses Flask for the webhook receiver.

import requests
from flask import Flask, request
import hmac, hashlib

app = Flask(__name__)
API_KEY = "your_api_key"
BASE = "https://api.ultrawebscrapingapi.com/v1"

# 1. Register webhook (run once)
def register_webhook():
    resp = requests.post(
        f"{BASE}/endpoint",
        headers={"X-API-Key": API_KEY},
        json={"url": "https://your-server.com/webhook"}
    )
    data = resp.json()
    # Save these:
    # data["endpointId"]  — use when submitting jobs
    # data["signingKey"]  — use for signature verification
    return data

# 2. Submit job with webhook
def submit_job(urls, endpoint_id):
    resp = requests.post(
        f"{BASE}/scrape",
        headers={"X-API-Key": API_KEY},
        json={
            "urls": [{"url": u} for u in urls],
            "mode": "async",
            "endpointId": endpoint_id
        }
    )
    return resp.json()["subscriptionId"]

# 3. Receive webhook notifications
@app.route("/webhook", methods=["POST"])
def webhook():
    # Verify signature
    signature = request.headers.get("X-Signature")
    timestamp = request.headers.get("X-Timestamp")
    payload = f"{timestamp}.{request.get_data(as_text=True)}"
    expected = hmac.new(
        SIGNING_KEY.encode(), payload.encode(), hashlib.sha256
    ).hexdigest()

    if not hmac.compare_digest(signature, expected):
        return "Invalid signature", 401

    data = request.json
    print(f"Job {data['subscriptionId']} done: "
          f"{data['completed']}/{data['total']}")
    return "OK"

Error Handling

Production-ready scraping with automatic retry, rate limit handling, and timeout management.

import requests
import time

def scrape_with_retry(url, api_key, max_retries=3):
    """Scrape a URL with automatic retry on failure."""
    for attempt in range(max_retries):
        try:
            response = requests.post(
                "https://api.ultrawebscrapingapi.com/v1/scrape",
                headers={"X-API-Key": api_key},
                json={
                    "urls": [{"url": url}],
                    "mode": "sync"
                },
                timeout=120
            )

            if response.status_code == 200:
                return response.json()

            if response.status_code == 402:
                raise Exception("Insufficient credits")

            if response.status_code == 429:
                wait = 2 ** attempt * 5
                print(f"Rate limited, waiting {wait}s...")
                time.sleep(wait)
                continue

            if response.status_code == 503:
                print(f"No capacity, retrying in 30s...")
                time.sleep(30)
                continue

            print(f"Error {response.status_code}: {response.text}")

        except requests.Timeout:
            print(f"Timeout on attempt {attempt + 1}")

    raise Exception(f"Failed after {max_retries} attempts")

# Usage
result = scrape_with_retry("https://example.com", "your_api_key")
print(result["html"])