Python Proxy Rotation — Deep Dive
System-level framing
A production proxy rotation system is a middleware layer between your application and target websites. It must manage proxy lifecycle (discovery, health checking, scoring, retirement), enforce per-domain rate limits regardless of proxy count, handle authentication with proxy providers, and gracefully degrade when proxy availability drops. The system sits at the intersection of networking, concurrency, and operational monitoring.
Proxy pool architecture
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
class ProxyProtocol(Enum):
HTTP = "http"
HTTPS = "https"
SOCKS5 = "socks5"
@dataclass
class Proxy:
url: str
protocol: ProxyProtocol = ProxyProtocol.HTTP
country: str | None = None
success_count: int = 0
failure_count: int = 0
total_latency: float = 0.0
last_used: datetime | None = None
last_failed: datetime | None = None
cooldown_until: datetime | None = None
@property
def success_rate(self) -> float:
total = self.success_count + self.failure_count
return self.success_count / total if total > 0 else 0.5
@property
def avg_latency(self) -> float:
return self.total_latency / self.success_count if self.success_count > 0 else float("inf")
@property
def score(self) -> float:
rate_weight = self.success_rate * 100
latency_penalty = min(self.avg_latency * 10, 50)
recency_bonus = 10 if self.failure_count == 0 else 0
return rate_weight - latency_penalty + recency_bonus
def is_available(self) -> bool:
if self.cooldown_until and datetime.utcnow() < self.cooldown_until:
return False
return self.success_rate > 0.1 # Retire proxies below 10% success
Pool manager with weighted selection
import random
import asyncio
from datetime import timedelta
class ProxyPool:
def __init__(self, proxies: list[Proxy], cooldown_minutes: int = 5):
self._proxies = proxies
self._cooldown = timedelta(minutes=cooldown_minutes)
self._lock = asyncio.Lock()
async def get_proxy(self, country: str | None = None) -> Proxy | None:
async with self._lock:
available = [p for p in self._proxies if p.is_available()]
if country:
available = [p for p in available if p.country == country]
if not available:
return None
return self._weighted_select(available)
def _weighted_select(self, proxies: list[Proxy]) -> Proxy:
scores = [max(p.score, 0.1) for p in proxies]
total = sum(scores)
weights = [s / total for s in scores]
return random.choices(proxies, weights=weights, k=1)[0]
async def report_success(self, proxy: Proxy, latency: float):
async with self._lock:
proxy.success_count += 1
proxy.total_latency += latency
proxy.last_used = datetime.utcnow()
async def report_failure(self, proxy: Proxy, status_code: int | None = None):
async with self._lock:
proxy.failure_count += 1
proxy.last_failed = datetime.utcnow()
if status_code == 429 or (proxy.failure_count % 3 == 0):
proxy.cooldown_until = datetime.utcnow() + self._cooldown
async def health_check(self, test_url: str = "https://httpbin.org/ip"):
import httpx
for proxy in self._proxies:
try:
async with httpx.AsyncClient(
proxies={"all://": proxy.url}, timeout=10
) as client:
start = asyncio.get_event_loop().time()
resp = await client.get(test_url)
latency = asyncio.get_event_loop().time() - start
if resp.status_code == 200:
await self.report_success(proxy, latency)
else:
await self.report_failure(proxy, resp.status_code)
except Exception:
await self.report_failure(proxy)
def get_stats(self) -> dict:
available = sum(1 for p in self._proxies if p.is_available())
avg_success = sum(p.success_rate for p in self._proxies) / len(self._proxies)
return {
"total": len(self._proxies),
"available": available,
"avg_success_rate": round(avg_success, 2),
"cooling_down": len(self._proxies) - available,
}
Integration with httpx
import httpx
import time
class RotatingClient:
def __init__(self, pool: ProxyPool):
self.pool = pool
async def get(
self, url: str, country: str | None = None, max_retries: int = 3
) -> httpx.Response:
for attempt in range(max_retries):
proxy = await self.pool.get_proxy(country=country)
if not proxy:
raise RuntimeError("No available proxies")
try:
start = time.monotonic()
async with httpx.AsyncClient(
proxies={"all://": proxy.url},
timeout=15,
headers=self._random_headers(),
) as client:
response = await client.get(url)
latency = time.monotonic() - start
if response.status_code == 200:
await self.pool.report_success(proxy, latency)
return response
elif response.status_code in (403, 429, 503):
await self.pool.report_failure(proxy, response.status_code)
continue
else:
await self.pool.report_success(proxy, latency)
return response
except (httpx.TimeoutException, httpx.ProxyError, httpx.ConnectError):
await self.pool.report_failure(proxy)
continue
raise RuntimeError(f"All {max_retries} attempts failed for {url}")
def _random_headers(self) -> dict:
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/125.0.0.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/18.0",
"Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
]
return {
"User-Agent": random.choice(user_agents),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": random.choice(["en-US,en;q=0.9", "en-GB,en;q=0.8"]),
"Accept-Encoding": "gzip, deflate, br",
}
Per-domain rate limiting
Proxy rotation solves IP-based blocking but must not be used to circumvent ethical rate limits:
from collections import defaultdict
class DomainThrottle:
def __init__(self, requests_per_second: float = 2.0):
self._interval = 1.0 / requests_per_second
self._last_request: dict[str, float] = defaultdict(float)
self._lock = asyncio.Lock()
async def wait(self, domain: str):
async with self._lock:
now = time.monotonic()
elapsed = now - self._last_request[domain]
if elapsed < self._interval:
await asyncio.sleep(self._interval - elapsed)
self._last_request[domain] = time.monotonic()
This enforces a global rate limit per domain regardless of how many proxies you have. Sending 100 requests per second through 100 different proxies still puts the same load on the target server.
SOCKS5 proxy support
Some providers offer SOCKS5 proxies, which work at a lower network level and support more protocols:
# httpx with SOCKS5 requires httpx[socks]
# pip install httpx[socks]
async with httpx.AsyncClient(
proxies={"all://": "socks5://user:pass@proxy.example.com:1080"},
timeout=15,
) as client:
response = await client.get("https://target.com")
SOCKS5 proxies are useful for non-HTTP protocols and provide better anonymity since they do not add X-Forwarded-For headers.
Proxy provider integration
Major proxy providers (Bright Data, Oxylabs, Smartproxy) offer rotating proxy gateways — a single endpoint that automatically rotates IPs:
# Provider gateway approach (simpler but less control)
PROVIDER_PROXY = "http://user-session123:pass@gate.provider.com:7777"
async with httpx.AsyncClient(proxies={"all://": PROVIDER_PROXY}) as client:
response = await client.get("https://target.com")
# Self-managed approach (more control)
proxies = [
Proxy(url="http://user:pass@1.2.3.4:8080", country="US"),
Proxy(url="http://user:pass@5.6.7.8:8080", country="DE"),
# ... hundreds more
]
pool = ProxyPool(proxies)
The self-managed approach gives you control over selection logic, health checking, and cost optimization. The provider gateway is simpler but treats the proxy layer as a black box.
Anti-detection beyond IP rotation
IP rotation alone is insufficient against sophisticated anti-bot systems (Cloudflare, PerimeterX, DataDome):
| Detection signal | Mitigation |
|---|---|
| IP reputation | Use residential/mobile proxies |
| TLS fingerprint | Use curl_cffi or tls-client to mimic browser TLS |
| HTTP/2 fingerprint | Match browser h2 settings (header order, pseudo-headers) |
| Header consistency | Rotate User-Agent and keep other headers consistent with it |
| Cookie behavior | Accept and return cookies like a browser |
| Request timing | Add random delays, avoid perfectly regular intervals |
| JavaScript challenges | Use headless browsers (Playwright) for JS-dependent sites |
Monitoring and alerting
import logging
logger = logging.getLogger("proxy_monitor")
async def monitor_pool(pool: ProxyPool, interval: int = 300):
while True:
stats = pool.get_stats()
logger.info(f"Proxy pool: {stats}")
if stats["available"] < stats["total"] * 0.2:
logger.warning(
f"Proxy pool critically low: {stats['available']}/{stats['total']} available"
)
if stats["avg_success_rate"] < 0.5:
logger.warning(
f"Average success rate dropped to {stats['avg_success_rate']}"
)
await asyncio.sleep(interval)
Track these metrics:
- Pool availability — percentage of proxies not in cooldown.
- Success rate — overall and per proxy.
- Average latency — indicates proxy quality.
- Cost per request — for budget-constrained operations.
One thing to remember: A proxy rotation system is a scored pool with lifecycle management. Weight selection by success rate and latency, cool down failing proxies, enforce per-domain rate limits regardless of pool size, and layer IP rotation with header and TLS fingerprint variation for effective anti-detection.
See Also
- Python Api Rate Limit Handling Why APIs tell your Python program to slow down, and how to handle it gracefully — explained so anyone can follow along.
- Python Sse Client Consumption How Python programs listen to live data streams from servers — like a radio that never stops playing — explained for complete beginners.
- Python Web Scraping Ethics When is it okay to collect data from websites with Python, and when does it cross the line? The rules explained for everyone.
- Python Webhook Handlers How Python programs receive instant notifications from other services when something happens — explained without technical jargon.
- Ci Cd Why big apps can ship updates every day without turning your phone into a glitchy mess — CI/CD is the behind-the-scenes quality gate and delivery truck.