Password Policies in Python — Deep Dive
Building a complete password validator
A production password validator combines multiple checks: length, breach status, pattern strength, and context relevance. Here’s a composable design:
from dataclasses import dataclass, field
@dataclass
class PolicyResult:
is_valid: bool
errors: list[str] = field(default_factory=list)
warnings: list[str] = field(default_factory=list)
score: int = 0 # 0-4 strength score
crack_time: str = ""
class PasswordPolicy:
def __init__(
self,
min_length: int = 12,
max_length: int = 128,
min_score: int = 3,
check_breaches: bool = True,
):
self.min_length = min_length
self.max_length = max_length
self.min_score = min_score
self.check_breaches = check_breaches
def validate(self, password: str, context: dict = None) -> PolicyResult:
"""Validate password against all policy rules."""
errors = []
warnings = []
context = context or {}
# Length checks
if len(password) < self.min_length:
errors.append(f"Must be at least {self.min_length} characters")
if len(password) > self.max_length:
errors.append(f"Must be at most {self.max_length} characters")
# Context checks
context_words = self._extract_context_words(context)
for word in context_words:
if len(word) > 3 and word.lower() in password.lower():
errors.append(f"Must not contain your {word}")
# Strength estimation
from zxcvbn import zxcvbn
user_inputs = list(context_words)
result = zxcvbn(password, user_inputs=user_inputs)
score = result["score"]
crack_time = result["crack_times_display"][
"offline_slow_hashing_1e4_per_second"
]
if score < self.min_score:
feedback = result.get("feedback", {})
if feedback.get("warning"):
errors.append(feedback["warning"])
for suggestion in feedback.get("suggestions", []):
warnings.append(suggestion)
# Breach check (skip if already failed basic checks)
if not errors and self.check_breaches:
breach_count = self._check_breach(password)
if breach_count > 0:
errors.append(
f"This password appeared in {breach_count:,} data breaches"
)
return PolicyResult(
is_valid=len(errors) == 0,
errors=errors,
warnings=warnings,
score=score,
crack_time=crack_time,
)
def _extract_context_words(self, context: dict) -> set[str]:
words = set()
for key in ("username", "email", "first_name", "last_name", "site_name"):
value = context.get(key, "")
if value:
words.add(value)
# Also add email local part
if "@" in value:
words.add(value.split("@")[0])
return words
def _check_breach(self, password: str) -> int:
import hashlib
import requests
sha1 = hashlib.sha1(password.encode()).hexdigest().upper()
prefix, suffix = sha1[:5], sha1[5:]
try:
resp = requests.get(
f"https://api.pwnedpasswords.com/range/{prefix}",
timeout=3,
)
for line in resp.text.splitlines():
hash_suffix, count = line.split(":")
if hash_suffix == suffix:
return int(count)
except requests.RequestException:
pass # fail open — don't block registration if API is down
return 0
Offline breach list checking
For high-traffic applications, calling the HIBP API on every password change adds latency. An alternative: download the breach list and query it locally.
import hashlib
import struct
from pathlib import Path
class LocalBreachChecker:
"""Check passwords against a local sorted hash file."""
def __init__(self, hash_file: Path):
self.hash_file = hash_file
self.file_size = hash_file.stat().st_size
self.line_length = 63 # SHA-1 hex (40) + : + count + \r\n
def is_breached(self, password: str) -> bool:
sha1 = hashlib.sha1(password.encode()).hexdigest().upper()
with open(self.hash_file, "r") as f:
# Binary search through the sorted file
low, high = 0, self.file_size // self.line_length
while low <= high:
mid = (low + high) // 2
f.seek(mid * self.line_length)
line = f.readline().strip()
if not line:
break
file_hash = line.split(":")[0]
if file_hash == sha1:
return True
elif file_hash < sha1:
low = mid + 1
else:
high = mid - 1
return False
The HIBP password list (~35 GB uncompressed) can be downloaded from the HIBP website. For most applications, the API approach with caching is simpler.
Password hashing with argon2
Argon2id (the recommended variant) is memory-hard, making GPU-based attacks expensive:
from argon2 import PasswordHasher
from argon2.exceptions import VerifyMismatchError
ph = PasswordHasher(
time_cost=3, # number of iterations
memory_cost=65536, # 64 MB of memory
parallelism=4, # number of threads
hash_len=32, # output hash length
salt_len=16, # salt length
)
# Hash a password
hashed = ph.hash("user_password_here")
# $argon2id$v=19$m=65536,t=3,p=4$randomsalt$hashoutput
# Verify a password
try:
ph.verify(hashed, "user_password_here")
# Check if rehashing is needed (parameters changed)
if ph.check_needs_rehash(hashed):
new_hash = ph.hash("user_password_here")
# Update the stored hash
except VerifyMismatchError:
print("Wrong password")
The check_needs_rehash method is crucial for parameter upgrades. When you increase time_cost or memory_cost, existing hashes still verify correctly, but the method flags them for re-hashing at next login.
Handling bcrypt’s 72-byte limit
bcrypt silently truncates input at 72 bytes. For long passphrases, this means characters beyond position 72 are ignored:
import bcrypt
import hashlib
def bcrypt_with_prehash(password: str) -> bytes:
"""Pre-hash with SHA-256 to avoid bcrypt's 72-byte truncation."""
# SHA-256 always produces 64 hex chars, fitting within bcrypt's limit
prehashed = hashlib.sha256(password.encode()).hexdigest()
return bcrypt.hashpw(prehashed.encode(), bcrypt.gensalt(rounds=12))
def bcrypt_verify_prehashed(password: str, hashed: bytes) -> bool:
prehashed = hashlib.sha256(password.encode()).hexdigest()
return bcrypt.checkpw(prehashed.encode(), hashed)
If you’re starting a new project, prefer argon2id over bcrypt — it doesn’t have this limitation and has stronger resistance to GPU attacks.
Django password validation
Django has a built-in password validation framework. Add custom validators alongside the defaults:
# settings.py
AUTH_PASSWORD_VALIDATORS = [
{"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"},
{"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
"OPTIONS": {"min_length": 12}},
{"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
{"NAME": "myapp.validators.BreachCheckValidator"},
{"NAME": "myapp.validators.ZxcvbnValidator"},
]
# myapp/validators.py
import hashlib
import requests
from django.core.exceptions import ValidationError
class BreachCheckValidator:
def validate(self, password, user=None):
sha1 = hashlib.sha1(password.encode()).hexdigest().upper()
prefix, suffix = sha1[:5], sha1[5:]
try:
resp = requests.get(
f"https://api.pwnedpasswords.com/range/{prefix}", timeout=3
)
if suffix in resp.text:
raise ValidationError(
"This password has appeared in a data breach.",
code="breached_password",
)
except requests.RequestException:
pass
def get_help_text(self):
return "Your password must not appear in known data breaches."
class ZxcvbnValidator:
def __init__(self, min_score=3):
self.min_score = min_score
def validate(self, password, user=None):
from zxcvbn import zxcvbn
user_inputs = []
if user:
user_inputs = [user.username, user.email, user.first_name, user.last_name]
user_inputs = [u for u in user_inputs if u]
result = zxcvbn(password, user_inputs=user_inputs)
if result["score"] < self.min_score:
raise ValidationError(
result["feedback"].get("warning", "Password is too weak."),
code="weak_password",
)
def get_help_text(self):
return "Your password must be strong enough to resist common attacks."
Password change and reset flows
Secure password changes require attention to several details:
from datetime import datetime, timedelta, timezone
import secrets
class PasswordResetManager:
TOKEN_EXPIRY = timedelta(hours=1)
def create_reset_token(self, user_id: str) -> str:
"""Generate a cryptographically secure reset token."""
token = secrets.token_urlsafe(32)
# Store hash of token (not plaintext) with expiry
token_hash = hashlib.sha256(token.encode()).hexdigest()
store_reset_token(user_id, token_hash, datetime.now(timezone.utc) + self.TOKEN_EXPIRY)
return token
def verify_and_consume(self, token: str) -> str | None:
"""Verify reset token and return user_id. Token is single-use."""
token_hash = hashlib.sha256(token.encode()).hexdigest()
record = get_reset_token(token_hash)
if not record:
return None
if record.expires_at < datetime.now(timezone.utc):
delete_reset_token(token_hash)
return None
# Single-use: delete immediately
delete_reset_token(token_hash)
# Invalidate all other reset tokens for this user
delete_all_reset_tokens(record.user_id)
return record.user_id
Key security details:
- Store hashed reset tokens — if the database leaks, raw tokens shouldn’t be exposed
- Make tokens single-use — delete on consumption
- Short expiry (1 hour max)
- Invalidate all sessions after password change
- Don’t reveal whether an email exists (“If an account exists, we’ve sent a reset link”)
Rate limiting password attempts
import time
from collections import defaultdict
class LoginRateLimiter:
def __init__(self):
self.attempts = defaultdict(list)
self.lockouts = {}
def allow_attempt(self, identifier: str) -> tuple[bool, int]:
"""Return (allowed, seconds_until_retry)."""
now = time.time()
# Check lockout
lockout_until = self.lockouts.get(identifier, 0)
if now < lockout_until:
return False, int(lockout_until - now)
# Clean old attempts (5-minute window)
self.attempts[identifier] = [
t for t in self.attempts[identifier] if now - t < 300
]
# Progressive delays: 5 attempts → 30s, 10 → 5min, 15 → 30min
count = len(self.attempts[identifier])
if count >= 15:
self.lockouts[identifier] = now + 1800
return False, 1800
elif count >= 10:
self.lockouts[identifier] = now + 300
return False, 300
elif count >= 5:
self.lockouts[identifier] = now + 30
return False, 30
self.attempts[identifier].append(now)
return True, 0
Rate limit by both IP address and username to prevent distributed attacks on a single account and credential-stuffing across accounts from one IP.
The one thing to remember: A robust password policy implementation combines breach checking, pattern-strength estimation, context awareness, proper hashing (argon2id), rate limiting, and secure reset flows — each addressing a different attack vector that simple complexity rules miss entirely.
See Also
- Python Api Key Management Why apps use special passwords called API keys, and how to keep them safe — explained with a library card analogy
- Python Attribute Based Access Control How apps make fine-grained permission decisions based on who you are, what you're accessing, and the circumstances — explained with an airport analogy
- Python Audit Logging Learn Audit Logging with a clear mental model so your Python code is easier to trust and maintain.
- Python Bandit Security Scanning Why Bandit Security Scanning helps Python teams catch painful mistakes early without slowing daily development.
- Python Clickjacking Prevention How invisible website layers trick you into clicking the wrong thing, and how Python apps stop it