Password Policies in Python — Deep Dive

Building a complete password validator

A production password validator combines multiple checks: length, breach status, pattern strength, and context relevance. Here’s a composable design:

from dataclasses import dataclass, field

@dataclass
class PolicyResult:
    is_valid: bool
    errors: list[str] = field(default_factory=list)
    warnings: list[str] = field(default_factory=list)
    score: int = 0  # 0-4 strength score
    crack_time: str = ""

class PasswordPolicy:
    def __init__(
        self,
        min_length: int = 12,
        max_length: int = 128,
        min_score: int = 3,
        check_breaches: bool = True,
    ):
        self.min_length = min_length
        self.max_length = max_length
        self.min_score = min_score
        self.check_breaches = check_breaches

    def validate(self, password: str, context: dict = None) -> PolicyResult:
        """Validate password against all policy rules."""
        errors = []
        warnings = []
        context = context or {}

        # Length checks
        if len(password) < self.min_length:
            errors.append(f"Must be at least {self.min_length} characters")
        if len(password) > self.max_length:
            errors.append(f"Must be at most {self.max_length} characters")

        # Context checks
        context_words = self._extract_context_words(context)
        for word in context_words:
            if len(word) > 3 and word.lower() in password.lower():
                errors.append(f"Must not contain your {word}")

        # Strength estimation
        from zxcvbn import zxcvbn
        user_inputs = list(context_words)
        result = zxcvbn(password, user_inputs=user_inputs)
        score = result["score"]
        crack_time = result["crack_times_display"][
            "offline_slow_hashing_1e4_per_second"
        ]

        if score < self.min_score:
            feedback = result.get("feedback", {})
            if feedback.get("warning"):
                errors.append(feedback["warning"])
            for suggestion in feedback.get("suggestions", []):
                warnings.append(suggestion)

        # Breach check (skip if already failed basic checks)
        if not errors and self.check_breaches:
            breach_count = self._check_breach(password)
            if breach_count > 0:
                errors.append(
                    f"This password appeared in {breach_count:,} data breaches"
                )

        return PolicyResult(
            is_valid=len(errors) == 0,
            errors=errors,
            warnings=warnings,
            score=score,
            crack_time=crack_time,
        )

    def _extract_context_words(self, context: dict) -> set[str]:
        words = set()
        for key in ("username", "email", "first_name", "last_name", "site_name"):
            value = context.get(key, "")
            if value:
                words.add(value)
                # Also add email local part
                if "@" in value:
                    words.add(value.split("@")[0])
        return words

    def _check_breach(self, password: str) -> int:
        import hashlib
        import requests

        sha1 = hashlib.sha1(password.encode()).hexdigest().upper()
        prefix, suffix = sha1[:5], sha1[5:]

        try:
            resp = requests.get(
                f"https://api.pwnedpasswords.com/range/{prefix}",
                timeout=3,
            )
            for line in resp.text.splitlines():
                hash_suffix, count = line.split(":")
                if hash_suffix == suffix:
                    return int(count)
        except requests.RequestException:
            pass  # fail open — don't block registration if API is down

        return 0

Offline breach list checking

For high-traffic applications, calling the HIBP API on every password change adds latency. An alternative: download the breach list and query it locally.

import hashlib
import struct
from pathlib import Path

class LocalBreachChecker:
    """Check passwords against a local sorted hash file."""

    def __init__(self, hash_file: Path):
        self.hash_file = hash_file
        self.file_size = hash_file.stat().st_size
        self.line_length = 63  # SHA-1 hex (40) + : + count + \r\n

    def is_breached(self, password: str) -> bool:
        sha1 = hashlib.sha1(password.encode()).hexdigest().upper()

        with open(self.hash_file, "r") as f:
            # Binary search through the sorted file
            low, high = 0, self.file_size // self.line_length

            while low <= high:
                mid = (low + high) // 2
                f.seek(mid * self.line_length)
                line = f.readline().strip()

                if not line:
                    break

                file_hash = line.split(":")[0]

                if file_hash == sha1:
                    return True
                elif file_hash < sha1:
                    low = mid + 1
                else:
                    high = mid - 1

        return False

The HIBP password list (~35 GB uncompressed) can be downloaded from the HIBP website. For most applications, the API approach with caching is simpler.

Password hashing with argon2

Argon2id (the recommended variant) is memory-hard, making GPU-based attacks expensive:

from argon2 import PasswordHasher
from argon2.exceptions import VerifyMismatchError

ph = PasswordHasher(
    time_cost=3,        # number of iterations
    memory_cost=65536,  # 64 MB of memory
    parallelism=4,      # number of threads
    hash_len=32,        # output hash length
    salt_len=16,        # salt length
)

# Hash a password
hashed = ph.hash("user_password_here")
# $argon2id$v=19$m=65536,t=3,p=4$randomsalt$hashoutput

# Verify a password
try:
    ph.verify(hashed, "user_password_here")
    # Check if rehashing is needed (parameters changed)
    if ph.check_needs_rehash(hashed):
        new_hash = ph.hash("user_password_here")
        # Update the stored hash
except VerifyMismatchError:
    print("Wrong password")

The check_needs_rehash method is crucial for parameter upgrades. When you increase time_cost or memory_cost, existing hashes still verify correctly, but the method flags them for re-hashing at next login.

Handling bcrypt’s 72-byte limit

bcrypt silently truncates input at 72 bytes. For long passphrases, this means characters beyond position 72 are ignored:

import bcrypt
import hashlib

def bcrypt_with_prehash(password: str) -> bytes:
    """Pre-hash with SHA-256 to avoid bcrypt's 72-byte truncation."""
    # SHA-256 always produces 64 hex chars, fitting within bcrypt's limit
    prehashed = hashlib.sha256(password.encode()).hexdigest()
    return bcrypt.hashpw(prehashed.encode(), bcrypt.gensalt(rounds=12))

def bcrypt_verify_prehashed(password: str, hashed: bytes) -> bool:
    prehashed = hashlib.sha256(password.encode()).hexdigest()
    return bcrypt.checkpw(prehashed.encode(), hashed)

If you’re starting a new project, prefer argon2id over bcrypt — it doesn’t have this limitation and has stronger resistance to GPU attacks.

Django password validation

Django has a built-in password validation framework. Add custom validators alongside the defaults:

# settings.py
AUTH_PASSWORD_VALIDATORS = [
    {"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"},
    {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
     "OPTIONS": {"min_length": 12}},
    {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
    {"NAME": "myapp.validators.BreachCheckValidator"},
    {"NAME": "myapp.validators.ZxcvbnValidator"},
]

# myapp/validators.py
import hashlib
import requests
from django.core.exceptions import ValidationError

class BreachCheckValidator:
    def validate(self, password, user=None):
        sha1 = hashlib.sha1(password.encode()).hexdigest().upper()
        prefix, suffix = sha1[:5], sha1[5:]

        try:
            resp = requests.get(
                f"https://api.pwnedpasswords.com/range/{prefix}", timeout=3
            )
            if suffix in resp.text:
                raise ValidationError(
                    "This password has appeared in a data breach.",
                    code="breached_password",
                )
        except requests.RequestException:
            pass

    def get_help_text(self):
        return "Your password must not appear in known data breaches."

class ZxcvbnValidator:
    def __init__(self, min_score=3):
        self.min_score = min_score

    def validate(self, password, user=None):
        from zxcvbn import zxcvbn

        user_inputs = []
        if user:
            user_inputs = [user.username, user.email, user.first_name, user.last_name]
            user_inputs = [u for u in user_inputs if u]

        result = zxcvbn(password, user_inputs=user_inputs)
        if result["score"] < self.min_score:
            raise ValidationError(
                result["feedback"].get("warning", "Password is too weak."),
                code="weak_password",
            )

    def get_help_text(self):
        return "Your password must be strong enough to resist common attacks."

Password change and reset flows

Secure password changes require attention to several details:

from datetime import datetime, timedelta, timezone
import secrets

class PasswordResetManager:
    TOKEN_EXPIRY = timedelta(hours=1)

    def create_reset_token(self, user_id: str) -> str:
        """Generate a cryptographically secure reset token."""
        token = secrets.token_urlsafe(32)
        # Store hash of token (not plaintext) with expiry
        token_hash = hashlib.sha256(token.encode()).hexdigest()
        store_reset_token(user_id, token_hash, datetime.now(timezone.utc) + self.TOKEN_EXPIRY)
        return token

    def verify_and_consume(self, token: str) -> str | None:
        """Verify reset token and return user_id. Token is single-use."""
        token_hash = hashlib.sha256(token.encode()).hexdigest()
        record = get_reset_token(token_hash)

        if not record:
            return None
        if record.expires_at < datetime.now(timezone.utc):
            delete_reset_token(token_hash)
            return None

        # Single-use: delete immediately
        delete_reset_token(token_hash)

        # Invalidate all other reset tokens for this user
        delete_all_reset_tokens(record.user_id)

        return record.user_id

Key security details:

  • Store hashed reset tokens — if the database leaks, raw tokens shouldn’t be exposed
  • Make tokens single-use — delete on consumption
  • Short expiry (1 hour max)
  • Invalidate all sessions after password change
  • Don’t reveal whether an email exists (“If an account exists, we’ve sent a reset link”)

Rate limiting password attempts

import time
from collections import defaultdict

class LoginRateLimiter:
    def __init__(self):
        self.attempts = defaultdict(list)
        self.lockouts = {}

    def allow_attempt(self, identifier: str) -> tuple[bool, int]:
        """Return (allowed, seconds_until_retry)."""
        now = time.time()

        # Check lockout
        lockout_until = self.lockouts.get(identifier, 0)
        if now < lockout_until:
            return False, int(lockout_until - now)

        # Clean old attempts (5-minute window)
        self.attempts[identifier] = [
            t for t in self.attempts[identifier] if now - t < 300
        ]

        # Progressive delays: 5 attempts → 30s, 10 → 5min, 15 → 30min
        count = len(self.attempts[identifier])
        if count >= 15:
            self.lockouts[identifier] = now + 1800
            return False, 1800
        elif count >= 10:
            self.lockouts[identifier] = now + 300
            return False, 300
        elif count >= 5:
            self.lockouts[identifier] = now + 30
            return False, 30

        self.attempts[identifier].append(now)
        return True, 0

Rate limit by both IP address and username to prevent distributed attacks on a single account and credential-stuffing across accounts from one IP.

The one thing to remember: A robust password policy implementation combines breach checking, pattern-strength estimation, context awareness, proper hashing (argon2id), rate limiting, and secure reset flows — each addressing a different attack vector that simple complexity rules miss entirely.

pythonsecurityauthenticationweb

See Also