Python importlib Custom Loaders — Deep Dive

The import system in detail

Python’s import system (defined in PEP 302, refined in PEP 451) has several phases that custom loaders can hook into:

  1. sys.modules check — cached modules are returned immediately. Your loader is never called for already-imported modules unless you manipulate sys.modules directly.
  2. Meta path traversalsys.meta_path finders are tried in order. The first to return a non-None ModuleSpec wins.
  3. Module creation — the loader’s create_module() is called. Return None for default behavior or a custom module object.
  4. Module execution — the loader’s exec_module() populates the module’s namespace.
  5. Caching — the module is stored in sys.modules before exec_module() runs (to handle circular imports).

Building a database-backed module loader

A production pattern for loading plugin code from a database:

import sys
import types
import hashlib
from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec

class DatabaseModuleStore:
    """Simulates a database of module source code."""
    def __init__(self):
        self._modules = {}
        self._versions = {}

    def store(self, name, source, version=1):
        self._modules[name] = source
        self._versions[name] = version

    def get(self, name):
        return self._modules.get(name)

    def get_version(self, name):
        return self._versions.get(name, 0)

class DatabaseModuleFinder(MetaPathFinder):
    PREFIX = "plugins."

    def __init__(self, store):
        self.store = store
        self._cache = {}

    def find_spec(self, name, path, target=None):
        if not name.startswith(self.PREFIX):
            return None

        module_key = name[len(self.PREFIX):]
        source = self.store.get(module_key)
        if source is None:
            return None

        loader = DatabaseModuleLoader(
            source=source,
            origin=f"db://{module_key}",
            version=self.store.get_version(module_key),
        )
        return ModuleSpec(
            name=name,
            loader=loader,
            origin=f"db://{module_key}",
        )

    def invalidate_caches(self):
        """Called by importlib.invalidate_caches()."""
        self._cache.clear()

class DatabaseModuleLoader(Loader):
    def __init__(self, source, origin, version):
        self.source = source
        self.origin = origin
        self.version = version

    def create_module(self, spec):
        return None  # Default module creation

    def exec_module(self, module):
        module.__version__ = self.version
        module.__source_hash__ = hashlib.sha256(
            self.source.encode()
        ).hexdigest()

        code = compile(self.source, self.origin, "exec")
        exec(code, module.__dict__)

    def get_source(self, fullname):
        return self.source

# Usage
store = DatabaseModuleStore()
store.store("my_plugin", '''
def process(data):
    return [x * 2 for x in data]

PLUGIN_VERSION = "1.0"
''')

finder = DatabaseModuleFinder(store)
sys.meta_path.insert(0, finder)

import plugins.my_plugin
print(plugins.my_plugin.process([1, 2, 3]))  # [2, 4, 6]

Implementing package support

To support packages (directories with __init__), set submodule_search_locations on the spec:

class PackageAwareFinder(MetaPathFinder):
    def __init__(self, registry):
        self.registry = registry  # {"pkg": {"__init__": src, "sub": src}}

    def find_spec(self, name, path, target=None):
        parts = name.split(".")
        current = self.registry

        for part in parts:
            if isinstance(current, dict) and part in current:
                current = current[part]
            else:
                return None

        is_package = isinstance(current, dict)

        if is_package:
            source = current.get("__init__", "")
        else:
            source = current

        loader = SimpleLoader(source)
        spec = ModuleSpec(name, loader, origin=f"registry://{name}")

        if is_package:
            spec.submodule_search_locations = [f"registry://{name}"]

        return spec

Hot reloading with custom loaders

A loader that watches for changes and reloads modules:

import sys
import importlib
import hashlib
from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec

class HotReloadFinder(MetaPathFinder):
    def __init__(self, source_provider):
        self.provider = source_provider
        self._hashes = {}

    def find_spec(self, name, path, target=None):
        source = self.provider.get_source(name)
        if source is None:
            return None

        new_hash = hashlib.sha256(source.encode()).hexdigest()
        old_hash = self._hashes.get(name)

        if name in sys.modules and new_hash == old_hash:
            return None  # No change, use cached

        self._hashes[name] = new_hash
        loader = HotReloadLoader(source)
        return ModuleSpec(name, loader, origin=f"hot://{name}")

    def check_for_updates(self):
        """Call periodically to detect changed modules."""
        for name in list(self._hashes.keys()):
            source = self.provider.get_source(name)
            if source is None:
                continue

            new_hash = hashlib.sha256(source.encode()).hexdigest()
            if new_hash != self._hashes[name]:
                print(f"Reloading changed module: {name}")
                self._hashes[name] = new_hash

                if name in sys.modules:
                    del sys.modules[name]
                importlib.import_module(name)

class HotReloadLoader(Loader):
    def __init__(self, source):
        self.source = source

    def create_module(self, spec):
        return None

    def exec_module(self, module):
        code = compile(self.source, module.__spec__.origin, "exec")
        exec(code, module.__dict__)

Import hooks for sandboxing

Restrict which modules can be imported:

import sys
from importlib.abc import MetaPathFinder

class ImportGuard(MetaPathFinder):
    def __init__(self, allowed_modules):
        self.allowed = set(allowed_modules)
        self.blocked_attempts = []

    def find_spec(self, name, path, target=None):
        top_level = name.split(".")[0]

        if top_level not in self.allowed:
            self.blocked_attempts.append(name)
            raise ImportError(
                f"Import of '{name}' is not allowed in this sandbox. "
                f"Allowed modules: {', '.join(sorted(self.allowed))}"
            )
        return None  # Allow default import machinery to handle it

# Install the guard FIRST in meta_path so it runs before default finders
guard = ImportGuard({"math", "json", "datetime", "collections"})
sys.meta_path.insert(0, guard)

The Loader protocol in detail

The full loader protocol includes optional methods beyond create_module and exec_module:

from importlib.abc import Loader

class FullLoader(Loader):
    def create_module(self, spec):
        """Return a module object or None for default."""
        return None

    def exec_module(self, module):
        """Execute the module code in the module's namespace."""
        pass

    def get_source(self, fullname):
        """Return source code as string (optional, for inspect)."""
        return self._sources.get(fullname)

    def get_code(self, fullname):
        """Return compiled code object (optional, for caching)."""
        source = self.get_source(fullname)
        if source:
            return compile(source, fullname, "exec")
        return None

    def is_package(self, fullname):
        """Return True if the module is a package."""
        return False

    def get_data(self, path):
        """Return bytes for a data file (for pkgutil)."""
        pass

Implementing get_source is important because tools like inspect.getsource() and debuggers rely on it to show source code.

Performance considerations

Custom loaders add overhead to every import statement that reaches them. Minimize this:

  1. Check prefixes early — if your loader only handles plugins.* modules, return None immediately for anything else.
  2. Cache find_spec results — avoid hitting databases or network on repeated imports. sys.modules handles runtime caching, but the finder is still called for new imports.
  3. Implement invalidate_caches()importlib.invalidate_caches() calls this on all finders. Use it to clear stale data.
  4. Compile source to code objects — cache compiled code objects when possible. Compilation is the expensive part, not execution.

Thread safety

The import system has a per-module lock (since Python 3.3) to prevent deadlocks during concurrent imports. Your custom loader’s exec_module runs under this lock. Avoid blocking operations (network calls, long computations) inside exec_module — do them in find_spec instead, or cache aggressively.

import threading
import importlib

class ThreadSafeLoader(Loader):
    def __init__(self):
        self._source_cache = {}
        self._lock = threading.Lock()

    def create_module(self, spec):
        return None

    def exec_module(self, module):
        # Source should already be cached by find_spec
        source = self._source_cache.get(module.__name__)
        if source:
            exec(compile(source, module.__name__, "exec"),
                 module.__dict__)

Debugging the import system

import sys

# Enable verbose import output
sys.flags.verbose  # Check if -v was passed

# Or set programmatically for specific debugging
import importlib
importlib.invalidate_caches()

# Inspect what finders are registered
for i, finder in enumerate(sys.meta_path):
    print(f"  meta_path[{i}]: {type(finder).__name__}")

# Check path hooks
for hook in sys.path_hooks:
    print(f"  path_hook: {hook}")

The one thing to remember: Production custom loaders need to handle the full lifecycle — from prefix-based filtering in find_spec for performance, through proper package support with submodule_search_locations, to implementing get_source for debugger compatibility — and must be thread-safe since exec_module runs under Python’s per-module import lock.

pythonimport-systemmetaprogramming

See Also

  • Python Ast Module Code Analysis How Python's ast module reads your code like a grammar teacher diagrams sentences — turning source text into a tree you can inspect and change.
  • Python Dis Module Bytecode How Python's dis module lets you peek at the secret instructions your computer actually runs when it executes your Python code.
  • Python Gc Module Internals How Python's garbage collector automatically cleans up memory you are no longer using — like a tidy roommate for your program.
  • Python Site Customization How Python's site module sets up your environment before your code even starts running — the invisible first step of every Python program.
  • Python Startup Optimization Why Python takes a moment to start and what you can do to make your scripts and tools launch faster.