Python importlib Custom Loaders — Deep Dive
The import system in detail
Python’s import system (defined in PEP 302, refined in PEP 451) has several phases that custom loaders can hook into:
- sys.modules check — cached modules are returned immediately. Your loader is never called for already-imported modules unless you manipulate
sys.modulesdirectly. - Meta path traversal —
sys.meta_pathfinders are tried in order. The first to return a non-NoneModuleSpecwins. - Module creation — the loader’s
create_module()is called. ReturnNonefor default behavior or a custom module object. - Module execution — the loader’s
exec_module()populates the module’s namespace. - Caching — the module is stored in
sys.modulesbeforeexec_module()runs (to handle circular imports).
Building a database-backed module loader
A production pattern for loading plugin code from a database:
import sys
import types
import hashlib
from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec
class DatabaseModuleStore:
"""Simulates a database of module source code."""
def __init__(self):
self._modules = {}
self._versions = {}
def store(self, name, source, version=1):
self._modules[name] = source
self._versions[name] = version
def get(self, name):
return self._modules.get(name)
def get_version(self, name):
return self._versions.get(name, 0)
class DatabaseModuleFinder(MetaPathFinder):
PREFIX = "plugins."
def __init__(self, store):
self.store = store
self._cache = {}
def find_spec(self, name, path, target=None):
if not name.startswith(self.PREFIX):
return None
module_key = name[len(self.PREFIX):]
source = self.store.get(module_key)
if source is None:
return None
loader = DatabaseModuleLoader(
source=source,
origin=f"db://{module_key}",
version=self.store.get_version(module_key),
)
return ModuleSpec(
name=name,
loader=loader,
origin=f"db://{module_key}",
)
def invalidate_caches(self):
"""Called by importlib.invalidate_caches()."""
self._cache.clear()
class DatabaseModuleLoader(Loader):
def __init__(self, source, origin, version):
self.source = source
self.origin = origin
self.version = version
def create_module(self, spec):
return None # Default module creation
def exec_module(self, module):
module.__version__ = self.version
module.__source_hash__ = hashlib.sha256(
self.source.encode()
).hexdigest()
code = compile(self.source, self.origin, "exec")
exec(code, module.__dict__)
def get_source(self, fullname):
return self.source
# Usage
store = DatabaseModuleStore()
store.store("my_plugin", '''
def process(data):
return [x * 2 for x in data]
PLUGIN_VERSION = "1.0"
''')
finder = DatabaseModuleFinder(store)
sys.meta_path.insert(0, finder)
import plugins.my_plugin
print(plugins.my_plugin.process([1, 2, 3])) # [2, 4, 6]
Implementing package support
To support packages (directories with __init__), set submodule_search_locations on the spec:
class PackageAwareFinder(MetaPathFinder):
def __init__(self, registry):
self.registry = registry # {"pkg": {"__init__": src, "sub": src}}
def find_spec(self, name, path, target=None):
parts = name.split(".")
current = self.registry
for part in parts:
if isinstance(current, dict) and part in current:
current = current[part]
else:
return None
is_package = isinstance(current, dict)
if is_package:
source = current.get("__init__", "")
else:
source = current
loader = SimpleLoader(source)
spec = ModuleSpec(name, loader, origin=f"registry://{name}")
if is_package:
spec.submodule_search_locations = [f"registry://{name}"]
return spec
Hot reloading with custom loaders
A loader that watches for changes and reloads modules:
import sys
import importlib
import hashlib
from importlib.abc import MetaPathFinder, Loader
from importlib.machinery import ModuleSpec
class HotReloadFinder(MetaPathFinder):
def __init__(self, source_provider):
self.provider = source_provider
self._hashes = {}
def find_spec(self, name, path, target=None):
source = self.provider.get_source(name)
if source is None:
return None
new_hash = hashlib.sha256(source.encode()).hexdigest()
old_hash = self._hashes.get(name)
if name in sys.modules and new_hash == old_hash:
return None # No change, use cached
self._hashes[name] = new_hash
loader = HotReloadLoader(source)
return ModuleSpec(name, loader, origin=f"hot://{name}")
def check_for_updates(self):
"""Call periodically to detect changed modules."""
for name in list(self._hashes.keys()):
source = self.provider.get_source(name)
if source is None:
continue
new_hash = hashlib.sha256(source.encode()).hexdigest()
if new_hash != self._hashes[name]:
print(f"Reloading changed module: {name}")
self._hashes[name] = new_hash
if name in sys.modules:
del sys.modules[name]
importlib.import_module(name)
class HotReloadLoader(Loader):
def __init__(self, source):
self.source = source
def create_module(self, spec):
return None
def exec_module(self, module):
code = compile(self.source, module.__spec__.origin, "exec")
exec(code, module.__dict__)
Import hooks for sandboxing
Restrict which modules can be imported:
import sys
from importlib.abc import MetaPathFinder
class ImportGuard(MetaPathFinder):
def __init__(self, allowed_modules):
self.allowed = set(allowed_modules)
self.blocked_attempts = []
def find_spec(self, name, path, target=None):
top_level = name.split(".")[0]
if top_level not in self.allowed:
self.blocked_attempts.append(name)
raise ImportError(
f"Import of '{name}' is not allowed in this sandbox. "
f"Allowed modules: {', '.join(sorted(self.allowed))}"
)
return None # Allow default import machinery to handle it
# Install the guard FIRST in meta_path so it runs before default finders
guard = ImportGuard({"math", "json", "datetime", "collections"})
sys.meta_path.insert(0, guard)
The Loader protocol in detail
The full loader protocol includes optional methods beyond create_module and exec_module:
from importlib.abc import Loader
class FullLoader(Loader):
def create_module(self, spec):
"""Return a module object or None for default."""
return None
def exec_module(self, module):
"""Execute the module code in the module's namespace."""
pass
def get_source(self, fullname):
"""Return source code as string (optional, for inspect)."""
return self._sources.get(fullname)
def get_code(self, fullname):
"""Return compiled code object (optional, for caching)."""
source = self.get_source(fullname)
if source:
return compile(source, fullname, "exec")
return None
def is_package(self, fullname):
"""Return True if the module is a package."""
return False
def get_data(self, path):
"""Return bytes for a data file (for pkgutil)."""
pass
Implementing get_source is important because tools like inspect.getsource() and debuggers rely on it to show source code.
Performance considerations
Custom loaders add overhead to every import statement that reaches them. Minimize this:
- Check prefixes early — if your loader only handles
plugins.*modules, returnNoneimmediately for anything else. - Cache find_spec results — avoid hitting databases or network on repeated imports.
sys.moduleshandles runtime caching, but the finder is still called for new imports. - Implement
invalidate_caches()—importlib.invalidate_caches()calls this on all finders. Use it to clear stale data. - Compile source to code objects — cache compiled code objects when possible. Compilation is the expensive part, not execution.
Thread safety
The import system has a per-module lock (since Python 3.3) to prevent deadlocks during concurrent imports. Your custom loader’s exec_module runs under this lock. Avoid blocking operations (network calls, long computations) inside exec_module — do them in find_spec instead, or cache aggressively.
import threading
import importlib
class ThreadSafeLoader(Loader):
def __init__(self):
self._source_cache = {}
self._lock = threading.Lock()
def create_module(self, spec):
return None
def exec_module(self, module):
# Source should already be cached by find_spec
source = self._source_cache.get(module.__name__)
if source:
exec(compile(source, module.__name__, "exec"),
module.__dict__)
Debugging the import system
import sys
# Enable verbose import output
sys.flags.verbose # Check if -v was passed
# Or set programmatically for specific debugging
import importlib
importlib.invalidate_caches()
# Inspect what finders are registered
for i, finder in enumerate(sys.meta_path):
print(f" meta_path[{i}]: {type(finder).__name__}")
# Check path hooks
for hook in sys.path_hooks:
print(f" path_hook: {hook}")
The one thing to remember: Production custom loaders need to handle the full lifecycle — from prefix-based filtering in find_spec for performance, through proper package support with submodule_search_locations, to implementing get_source for debugger compatibility — and must be thread-safe since exec_module runs under Python’s per-module import lock.
See Also
- Python Ast Module Code Analysis How Python's ast module reads your code like a grammar teacher diagrams sentences — turning source text into a tree you can inspect and change.
- Python Dis Module Bytecode How Python's dis module lets you peek at the secret instructions your computer actually runs when it executes your Python code.
- Python Gc Module Internals How Python's garbage collector automatically cleans up memory you are no longer using — like a tidy roommate for your program.
- Python Site Customization How Python's site module sets up your environment before your code even starts running — the invisible first step of every Python program.
- Python Startup Optimization Why Python takes a moment to start and what you can do to make your scripts and tools launch faster.