Python Integration Guide
Everything you need to scrape anti-bot protected sites with Python. From basic requests to batch scraping with BeautifulSoup parsing.
pip install requests beautifulsoup4 Sync Scraping
The simplest way to scrape a single URL. The API waits for the page to load and returns the HTML directly.
import requests
response = requests.post(
"https://api.ultrawebscrapingapi.com/v1/scrape",
headers={"X-API-Key": "your_api_key"},
json={
"urls": [{"url": "https://example.com"}],
"mode": "sync"
}
)
data = response.json()
print(data["html"]) Async Scraping
For multiple URLs, use async mode. Submit all URLs at once, then poll for results.
import requests
import time
# 1. Submit scrape job
response = requests.post(
"https://api.ultrawebscrapingapi.com/v1/scrape",
headers={"X-API-Key": "your_api_key"},
json={
"urls": [
{"url": "https://site-a.com/page1"},
{"url": "https://site-b.com/page2"},
{"url": "https://site-c.com/page3"},
],
"mode": "async"
}
)
subscription_id = response.json()["subscriptionId"]
# 2. Poll for completion
while True:
status = requests.get(
f"https://api.ultrawebscrapingapi.com/v1/subscription/{subscription_id}",
headers={"X-API-Key": "your_api_key"},
).json()
print(f"Completed: {status['completed']}/{status['total']}")
if status["processing"] == 0 and status["queued"] == 0:
break
time.sleep(5)
# 3. Fetch results
for job in status["jobs"]:
if job["status"] == "completed":
result = requests.get(
f"https://api.ultrawebscrapingapi.com/v1/result/{subscription_id}/{job['index']}",
headers={"X-API-Key": "your_api_key"},
).json()
print(f"URL: {result['url']}")
print(f"Title: {result['title']}")
print(f"HTML length: {len(result['html'])} chars") Wait for Elements
Some pages load content dynamically after the initial page load. Use waitForSelector
to wait for a specific CSS selector, and waitFor for additional delay.
# Wait for a specific element to appear before capturing HTML
response = requests.post(
"https://api.ultrawebscrapingapi.com/v1/scrape",
headers={"X-API-Key": "your_api_key"},
json={
"urls": [{
"url": "https://example.com/products",
"waitForSelector": ".product-list",
"waitFor": 3000 # additional 3s wait after selector found
}],
"mode": "sync"
}
) Batch Scraping
Scrape up to 100 URLs per batch and fetch results in parallel with ThreadPoolExecutor.
import requests
import time
from concurrent.futures import ThreadPoolExecutor
API_KEY = "your_api_key"
BASE = "https://api.ultrawebscrapingapi.com/v1"
urls = [
"https://site-a.com/page1",
"https://site-a.com/page2",
"https://site-b.com/page1",
# ... up to 100 URLs per batch
]
# 1. Submit batch
response = requests.post(
f"{BASE}/scrape",
headers={"X-API-Key": API_KEY},
json={
"urls": [{"url": u} for u in urls],
"mode": "async"
}
)
sub_id = response.json()["subscriptionId"]
# 2. Wait for completion
while True:
status = requests.get(
f"{BASE}/subscription/{sub_id}",
headers={"X-API-Key": API_KEY},
).json()
if status["processing"] == 0 and status["queued"] == 0:
break
time.sleep(5)
# 3. Fetch all results in parallel
def fetch_result(job):
if job["status"] != "completed":
return None
return requests.get(
f"{BASE}/result/{sub_id}/{job['index']}",
headers={"X-API-Key": API_KEY},
).json()
with ThreadPoolExecutor(max_workers=10) as pool:
results = list(pool.map(fetch_result, status["jobs"]))
for r in results:
if r:
print(f"{r['url']} — {len(r['html'])} chars") Parse with BeautifulSoup
Combine UltraWebScrapingAPI with BeautifulSoup for complete scrape-and-parse workflows.
import requests
from bs4 import BeautifulSoup
# Scrape and parse in one flow
response = requests.post(
"https://api.ultrawebscrapingapi.com/v1/scrape",
headers={"X-API-Key": "your_api_key"},
json={
"urls": [{"url": "https://example.com/products"}],
"mode": "sync"
}
)
html = response.json()["html"]
# Parse with BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Extract product data
products = []
for item in soup.select(".product-card"):
products.append({
"name": item.select_one(".product-name").text.strip(),
"price": item.select_one(".product-price").text.strip(),
"url": item.select_one("a")["href"],
})
print(f"Found {len(products)} products")
for p in products:
print(f" {p['name']} — {p['price']}") Webhooks
Instead of polling, register a webhook to get notified when scraping completes. Uses Flask for the webhook receiver.
import requests
from flask import Flask, request
import hmac, hashlib
app = Flask(__name__)
API_KEY = "your_api_key"
BASE = "https://api.ultrawebscrapingapi.com/v1"
# 1. Register webhook (run once)
def register_webhook():
resp = requests.post(
f"{BASE}/endpoint",
headers={"X-API-Key": API_KEY},
json={"url": "https://your-server.com/webhook"}
)
data = resp.json()
# Save these:
# data["endpointId"] — use when submitting jobs
# data["signingKey"] — use for signature verification
return data
# 2. Submit job with webhook
def submit_job(urls, endpoint_id):
resp = requests.post(
f"{BASE}/scrape",
headers={"X-API-Key": API_KEY},
json={
"urls": [{"url": u} for u in urls],
"mode": "async",
"endpointId": endpoint_id
}
)
return resp.json()["subscriptionId"]
# 3. Receive webhook notifications
@app.route("/webhook", methods=["POST"])
def webhook():
# Verify signature
signature = request.headers.get("X-Signature")
timestamp = request.headers.get("X-Timestamp")
payload = f"{timestamp}.{request.get_data(as_text=True)}"
expected = hmac.new(
SIGNING_KEY.encode(), payload.encode(), hashlib.sha256
).hexdigest()
if not hmac.compare_digest(signature, expected):
return "Invalid signature", 401
data = request.json
print(f"Job {data['subscriptionId']} done: "
f"{data['completed']}/{data['total']}")
return "OK" Error Handling
Production-ready scraping with automatic retry, rate limit handling, and timeout management.
import requests
import time
def scrape_with_retry(url, api_key, max_retries=3):
"""Scrape a URL with automatic retry on failure."""
for attempt in range(max_retries):
try:
response = requests.post(
"https://api.ultrawebscrapingapi.com/v1/scrape",
headers={"X-API-Key": api_key},
json={
"urls": [{"url": url}],
"mode": "sync"
},
timeout=120
)
if response.status_code == 200:
return response.json()
if response.status_code == 402:
raise Exception("Insufficient credits")
if response.status_code == 429:
wait = 2 ** attempt * 5
print(f"Rate limited, waiting {wait}s...")
time.sleep(wait)
continue
if response.status_code == 503:
print(f"No capacity, retrying in 30s...")
time.sleep(30)
continue
print(f"Error {response.status_code}: {response.text}")
except requests.Timeout:
print(f"Timeout on attempt {attempt + 1}")
raise Exception(f"Failed after {max_retries} attempts")
# Usage
result = scrape_with_retry("https://example.com", "your_api_key")
print(result["html"])