Python Configuration Hierarchy — Deep Dive

Approach 1: Pydantic Settings (Modern Standard)

Pydantic Settings provides typed, validated configuration with built-in environment variable support:

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

class DatabaseSettings(BaseSettings):
    host: str = "localhost"
    port: int = 5432
    name: str = "myapp"
    password: str = Field(default="", json_schema_extra={"env": "DB_PASSWORD"})
    pool_min: int = 5
    pool_max: int = 20

class RedisSettings(BaseSettings):
    url: str = "redis://localhost:6379/0"
    ttl: int = 300

class AppSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix="APP_",
        env_nested_delimiter="__",
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
    )
    
    debug: bool = False
    port: int = 8000
    log_level: str = "INFO"
    database: DatabaseSettings = DatabaseSettings()
    redis: RedisSettings = RedisSettings()
    allowed_origins: list[str] = ["http://localhost:3000"]

Usage:

# Reads from defaults → .env file → environment variables
settings = AppSettings()

# Environment variable mapping:
# APP_DEBUG=true → settings.debug = True
# APP_DATABASE__HOST=prod.db.com → settings.database.host = "prod.db.com"
# APP_ALLOWED_ORIGINS='["https://example.com"]' → parsed as list

Custom Settings Sources

Pydantic Settings supports pluggable sources. Add YAML file support:

import yaml
from pydantic_settings import (
    BaseSettings, 
    PydanticBaseSettingsSource,
)

class YamlSettingsSource(PydanticBaseSettingsSource):
    def __init__(self, settings_cls, yaml_path: str):
        super().__init__(settings_cls)
        self.yaml_path = yaml_path

    def get_field_value(self, field, field_name):
        if not hasattr(self, '_data'):
            try:
                with open(self.yaml_path) as f:
                    self._data = yaml.safe_load(f) or {}
            except FileNotFoundError:
                self._data = {}
        
        value = self._data.get(field_name)
        return value, field_name, value is not None

    def __call__(self):
        return {
            field_name: self.get_field_value(field, field_name)[0]
            for field_name, field in self.settings_cls.model_fields.items()
            if self.get_field_value(field, field_name)[2]
        }

class AppSettings(BaseSettings):
    # ... fields ...

    @classmethod
    def settings_customise_sources(cls, settings_cls, **kwargs):
        return (
            kwargs["cli_settings_source"],      # Highest priority
            kwargs["env_settings"],              # Environment variables
            kwargs["dotenv_settings"],           # .env file
            YamlSettingsSource(settings_cls, "config.yaml"),  # YAML file
            kwargs["init_settings"],             # Constructor args
        )

The tuple order defines precedence — first source wins on conflicts.

Approach 2: Dynaconf (Feature-Rich)

Dynaconf is purpose-built for layered configuration:

from dynaconf import Dynaconf

settings = Dynaconf(
    settings_files=["config/default.toml", "config/.secrets.toml"],
    environments=True,               # Enable [development]/[production] sections
    env_prefix="APP",                # APP_* environment variables
    load_dotenv=True,                # Read .env files
    merge_enabled=True,              # Deep merge instead of replace
)

Config file (config/default.toml):

[default]
debug = false
port = 8000

[default.database]
host = "localhost"
port = 5432
pool_min = 5

[production]
debug = false

[production.database]
host = "prod-db.internal"
pool_min = 10
pool_max = 50

[development]
debug = true

Dynaconf selects the environment from ENV_FOR_DYNACONF:

ENV_FOR_DYNACONF=production python app.py
# Gets: production settings merged over default

Dynaconf Vault Integration

settings = Dynaconf(
    settings_files=["config/default.toml"],
    vault_enabled=True,
    vault_url="https://vault.internal:8200",
    vault_path="secret/data/myapp",
    vault_token=os.environ.get("VAULT_TOKEN"),
)

# Secrets from Vault are merged with file/env config
db_password = settings.DATABASE.PASSWORD

Approach 3: Custom Layered Loader

When you need full control over the merge process:

import os
import json
import yaml
from pathlib import Path
from typing import Any
from copy import deepcopy

class ConfigLayer:
    def __init__(self, name: str, data: dict):
        self.name = name
        self.data = data

class LayeredConfig:
    def __init__(self):
        self._layers: list[ConfigLayer] = []
        self._merged: dict = {}

    def add_layer(self, name: str, data: dict):
        self._layers.append(ConfigLayer(name, data))
        self._rebuild()

    def _rebuild(self):
        self._merged = {}
        for layer in self._layers:
            self._deep_merge(self._merged, layer.data)

    def _deep_merge(self, base: dict, override: dict):
        for key, value in override.items():
            if (
                key in base 
                and isinstance(base[key], dict) 
                and isinstance(value, dict)
            ):
                self._deep_merge(base[key], value)
            else:
                base[key] = deepcopy(value)

    def get(self, dotted_path: str, default: Any = None) -> Any:
        keys = dotted_path.split(".")
        current = self._merged
        for key in keys:
            if isinstance(current, dict) and key in current:
                current = current[key]
            else:
                return default
        return current

    def explain(self, dotted_path: str) -> list[tuple[str, Any]]:
        """Show which layers contributed to a value."""
        contributions = []
        for layer in self._layers:
            keys = dotted_path.split(".")
            current = layer.data
            for key in keys:
                if isinstance(current, dict) and key in current:
                    current = current[key]
                else:
                    current = None
                    break
            if current is not None:
                contributions.append((layer.name, current))
        return contributions

The explain method is invaluable for debugging — it shows exactly which layer provided (or overrode) a value:

config = LayeredConfig()
config.add_layer("defaults", {"database": {"port": 5432, "host": "localhost"}})
config.add_layer("production", {"database": {"host": "prod.db.com"}})
config.add_layer("env", {"database": {"host": "override.db.com"}})

config.get("database.host")  # "override.db.com"
config.explain("database.host")
# [("defaults", "localhost"), ("production", "prod.db.com"), ("env", "override.db.com")]

Environment Variable Parsing

Environment variables are always strings. Parsing them correctly matters:

import os
import json
from typing import Any

def parse_env_value(value: str) -> Any:
    """Parse environment variable string to Python type."""
    # Boolean
    if value.lower() in ("true", "1", "yes"):
        return True
    if value.lower() in ("false", "0", "no"):
        return False
    
    # Integer
    try:
        return int(value)
    except ValueError:
        pass
    
    # Float
    try:
        return float(value)
    except ValueError:
        pass
    
    # JSON (for lists, dicts)
    if value.startswith(("[", "{")):
        try:
            return json.loads(value)
        except json.JSONDecodeError:
            pass
    
    return value

def env_to_nested(prefix: str = "APP", separator: str = "__") -> dict:
    """Convert APP__DATABASE__HOST=x to {"database": {"host": "x"}}."""
    result = {}
    prefix_upper = prefix.upper() + separator
    
    for key, value in os.environ.items():
        if not key.startswith(prefix_upper):
            continue
        
        parts = key[len(prefix_upper):].lower().split(separator)
        current = result
        for part in parts[:-1]:
            current = current.setdefault(part, {})
        current[parts[-1]] = parse_env_value(value)
    
    return result

Configuration Validation

Loading config is only half the battle — validating it catches errors at startup:

from dataclasses import dataclass

@dataclass
class ValidationError:
    path: str
    message: str

def validate_config(config: dict) -> list[ValidationError]:
    errors = []
    
    # Required fields
    db_host = config.get("database", {}).get("host")
    if not db_host:
        errors.append(ValidationError(
            "database.host", "Database host is required"
        ))
    
    # Range checks
    pool_max = config.get("database", {}).get("pool_max", 20)
    pool_min = config.get("database", {}).get("pool_min", 5)
    if pool_min > pool_max:
        errors.append(ValidationError(
            "database.pool_min",
            f"pool_min ({pool_min}) cannot exceed pool_max ({pool_max})"
        ))
    
    # Consistency checks
    debug = config.get("debug", False)
    log_level = config.get("log_level", "INFO")
    if not debug and log_level == "DEBUG":
        errors.append(ValidationError(
            "log_level",
            "DEBUG logging in non-debug mode — intentional?"
        ))
    
    return errors

# At startup:
errors = validate_config(config.as_dict())
if errors:
    for e in errors:
        print(f"Config error at {e.path}: {e.message}")
    sys.exit(1)

Hot Reloading

Some settings (log levels, feature flags, rate limits) should be changeable without restarting:

import asyncio
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileModifiedEvent, FileSystemEventHandler

class ConfigReloader(FileSystemEventHandler):
    def __init__(self, config: LayeredConfig, config_path: str):
        self.config = config
        self.config_path = config_path
        self._reload_lock = asyncio.Lock()

    def on_modified(self, event):
        if event.src_path == self.config_path:
            asyncio.run_coroutine_threadsafe(
                self._reload(), asyncio.get_event_loop()
            )

    async def _reload(self):
        async with self._reload_lock:
            with open(self.config_path) as f:
                new_data = yaml.safe_load(f)
            self.config.add_layer("file", new_data)
            print(f"Configuration reloaded from {self.config_path}")

Important caveat: not all settings can be hot-reloaded. Database pool sizes, listening ports, and worker counts typically require a restart. Document which settings support hot-reload and which don’t.

Configuration Dump for Debugging

In production, you need to see the resolved configuration without exposing secrets:

import re

SENSITIVE_PATTERNS = re.compile(
    r"(password|secret|token|key|credential)", re.IGNORECASE
)

def safe_dump(config: dict, indent: int = 0) -> str:
    lines = []
    for key, value in sorted(config.items()):
        prefix = "  " * indent
        if isinstance(value, dict):
            lines.append(f"{prefix}{key}:")
            lines.append(safe_dump(value, indent + 1))
        elif SENSITIVE_PATTERNS.search(key):
            lines.append(f"{prefix}{key}: ****")
        else:
            lines.append(f"{prefix}{key}: {value}")
    return "\n".join(lines)

Log this at startup (at INFO level) so every deployment shows its resolved config with secrets masked.

One thing to remember: A production config system needs typed validation (Pydantic Settings), layered precedence (defaults → files → env → CLI), an explain mechanism for debugging which layer set a value, and careful separation of secrets from regular configuration.

pythonconfigurationproduction

See Also