Macro Systems — Deep Dive
Anatomy of a Macro System
A complete Python macro system needs four components:
- Macro detection — identify which code should be transformed
- AST transformation — rewrite the syntax tree
- Source registration — preserve debuggability
- Integration — hook into Python’s compilation/import pipeline
Let us build each component.
Building an AST Macro Framework
Step 1: Macro Registry
import ast
from typing import Callable, Dict
_macros: Dict[str, Callable[[ast.AST], ast.AST]] = {}
def macro(name):
"""Register a function as a macro transformer."""
def decorator(transform_fn):
_macros[name] = transform_fn
return transform_fn
return decorator
@macro('debug_print')
def debug_print_macro(node: ast.Call) -> ast.AST:
"""Transform debug_print(expr) into print(f"expr = {expr}")."""
if not node.args:
return node
statements = []
for arg in node.args:
# Get the source representation of the argument
if isinstance(arg, ast.Name):
label = arg.id
else:
label = ast.dump(arg)
# Build: print(f"{label} = {arg!r}")
format_str = ast.JoinedStr(
values=[
ast.Constant(value=f"{label} = "),
ast.FormattedValue(
value=arg,
conversion=ord('r'),
format_spec=None,
),
]
)
print_call = ast.Call(
func=ast.Name(id='print', ctx=ast.Load()),
args=[format_str],
keywords=[],
)
statements.append(ast.Expr(value=print_call))
return statements
Step 2: AST Transformer
class MacroExpander(ast.NodeTransformer):
def __init__(self):
self.expanded_count = 0
def visit_Expr(self, node):
"""Expand macro calls that appear as statements."""
if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name):
macro_name = node.value.func.id
if macro_name in _macros:
result = _macros[macro_name](node.value)
self.expanded_count += 1
if isinstance(result, list):
return result
return ast.Expr(value=result) if not isinstance(result, ast.stmt) else result
return self.generic_visit(node)
def visit_Assign(self, node):
"""Expand macros in assignment expressions."""
if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name):
macro_name = node.value.func.id
if macro_name in _macros:
expanded = _macros[macro_name](node.value)
self.expanded_count += 1
if isinstance(expanded, ast.AST):
node.value = expanded
return self.generic_visit(node)
def expand_macros(source: str, filename: str = '<macro>') -> ast.Module:
"""Parse source and expand all registered macros."""
tree = ast.parse(source, filename)
expander = MacroExpander()
tree = expander.visit(tree)
ast.fix_missing_locations(tree)
return tree, expander.expanded_count
Step 3: Import Hook Integration
import sys
import importlib.abc
import importlib.util
import importlib.machinery
import linecache
from pathlib import Path
class MacroFinder(importlib.abc.MetaPathFinder):
def __init__(self, macro_packages):
self.macro_packages = set(macro_packages)
self._processing = set()
def find_spec(self, fullname, path, target=None):
if fullname in self._processing:
return None # prevent recursion
top_level = fullname.split('.')[0]
if top_level not in self.macro_packages:
return None
# Find the real source file
self._processing.add(fullname)
try:
for entry in sys.path:
parts = fullname.replace('.', '/')
source_path = Path(entry) / f'{parts}.py'
if source_path.exists():
return importlib.util.spec_from_file_location(
fullname,
source_path,
loader=MacroLoader(str(source_path)),
)
finally:
self._processing.discard(fullname)
return None
class MacroLoader(importlib.abc.Loader):
def __init__(self, source_path):
self.source_path = source_path
def create_module(self, spec):
return None
def exec_module(self, module):
source = Path(self.source_path).read_text()
# Expand macros
tree, count = expand_macros(source, self.source_path)
if count > 0:
# Register expanded source for debugging
expanded_source = ast.unparse(tree)
linecache.cache[self.source_path] = (
len(expanded_source),
None,
expanded_source.splitlines(True),
self.source_path,
)
code = compile(tree, self.source_path, 'exec')
exec(code, module.__dict__)
def enable_macros(*packages):
"""Enable macro expansion for specified packages."""
sys.meta_path.insert(0, MacroFinder(packages))
Advanced Macro Patterns
Quasiquotation
In Lisp, quasiquotation lets you write code templates with “holes” for dynamic values. We can approximate this in Python:
import ast
import copy
class Quote:
"""Represent a code template with substitution points."""
def __init__(self, source):
self.template = ast.parse(source, mode='eval').body
def substitute(self, **bindings):
"""Replace Name nodes matching binding keys with their values."""
tree = copy.deepcopy(self.template)
class Substituter(ast.NodeTransformer):
def visit_Name(self, node):
if node.id in bindings:
replacement = bindings[node.id]
if isinstance(replacement, ast.AST):
return replacement
return ast.Constant(value=replacement)
return node
result = Substituter().visit(tree)
ast.fix_missing_locations(result)
return result
# Usage in macro definitions
timing_template = Quote('__import__("time").time() - _start_time')
@macro('timed')
def timed_macro(node: ast.Call) -> list:
"""Transform timed(expr) into timing code."""
if not node.args:
return node
expr = node.args[0]
return [
ast.parse('_start_time = __import__("time").time()', mode='exec').body[0],
ast.parse('_result = None', mode='exec').body[0],
ast.Assign(
targets=[ast.Name(id='_result', ctx=ast.Store())],
value=expr,
),
ast.Expr(value=ast.Call(
func=ast.Name(id='print', ctx=ast.Load()),
args=[ast.JoinedStr(values=[
ast.Constant(value='Elapsed: '),
ast.FormattedValue(
value=timing_template.substitute(),
conversion=-1,
format_spec=ast.JoinedStr(values=[ast.Constant(value='.4f')]),
),
ast.Constant(value='s'),
])],
keywords=[],
)),
]
Hygiene: Avoiding Name Collisions
In Lisp macro systems, “hygiene” means macros do not accidentally capture or shadow user variables. Python macros need manual care:
import uuid
def hygienic_name(prefix='_macro'):
"""Generate a unique variable name that won't collide."""
return f'{prefix}_{uuid.uuid4().hex[:8]}'
@macro('with_lock')
def with_lock_macro(node: ast.Call):
"""Transform with_lock(lock, body) into properly locked execution."""
lock_var = hygienic_name('lock')
# Use unique names internally to avoid collision
# with user code variable names
...
pytest’s Assert Rewriting: A Real Macro System
pytest contains one of the most sophisticated macro-like systems in the Python ecosystem. It rewrites assert statements to provide detailed failure messages:
# What you write:
assert x == y + 1
# What pytest transforms it into (conceptually):
_result = x == y + 1
if not _result:
_x = x # capture intermediate values
_y = y
_sum = y + 1
raise AssertionError(
f"assert {_x} == {_y} + 1\n"
f" where {_x} = x\n"
f" and {_sum} = {_y} + 1"
)
pytest implements this via an import hook (_pytest.assertion.rewrite) that:
- Intercepts test file imports
- Parses the source into an AST
- Walks the tree to find
assertstatements - Rewrites each assert to capture all intermediate expression values
- Compiles and caches the modified bytecode
The implementation handles nested expressions, comparison chains, boolean operators, and generates source maps for accurate tracebacks.
Source-to-Source Transformation
For external DSLs that compile to Python, source-to-source transformation is cleaner than AST manipulation:
import re
from typing import List, Tuple
class SourceTransformer:
def __init__(self):
self.transforms: List[Tuple[re.Pattern, Callable]] = []
def rule(self, pattern):
"""Register a source transformation rule."""
compiled = re.compile(pattern, re.MULTILINE)
def decorator(func):
self.transforms.append((compiled, func))
return func
return decorator
def apply(self, source: str) -> str:
for pattern, handler in self.transforms:
source = pattern.sub(handler, source)
return source
transformer = SourceTransformer()
@transformer.rule(r'^(\s*)unless\s+(.+?):')
def unless_to_if_not(match):
"""Transform 'unless condition:' to 'if not (condition):'."""
indent = match.group(1)
condition = match.group(2)
return f'{indent}if not ({condition}):'
@transformer.rule(r'^(\s*)until\s+(.+?):')
def until_to_while_not(match):
"""Transform 'until condition:' to 'while not (condition):'."""
indent = match.group(1)
condition = match.group(2)
return f'{indent}while not ({condition}):'
This approach is simpler but less powerful than AST transformation — it cannot understand code structure, only text patterns.
Debugging Macro-Expanded Code
The biggest challenge with macros is debugging. Strategies:
1. Expansion Preview
def show_expansion(source):
"""Show what source looks like after macro expansion."""
tree, count = expand_macros(source)
print(f"# {count} macros expanded")
print(ast.unparse(tree))
2. Source Maps
Map generated code locations back to original source:
class SourceMap:
def __init__(self):
self.mappings = {} # generated_line -> (original_file, original_line)
def add(self, gen_line, orig_file, orig_line):
self.mappings[gen_line] = (orig_file, orig_line)
def translate_traceback(self, tb):
"""Convert a traceback to reference original source."""
for frame in tb:
if frame.lineno in self.mappings:
orig_file, orig_line = self.mappings[frame.lineno]
frame.filename = orig_file
frame.lineno = orig_line
3. Conditional Expansion
import os
MACRO_DEBUG = os.environ.get('MACRO_DEBUG', '').lower() in ('1', 'true')
class MacroLoader(importlib.abc.Loader):
def exec_module(self, module):
source = Path(self.source_path).read_text()
tree, count = expand_macros(source, self.source_path)
if MACRO_DEBUG and count > 0:
expanded = ast.unparse(tree)
debug_path = Path(self.source_path).with_suffix('.macro_expanded.py')
debug_path.write_text(f"# Auto-generated macro expansion\n{expanded}")
print(f"Macro expansion written to {debug_path}")
code = compile(tree, self.source_path, 'exec')
exec(code, module.__dict__)
Existing Macro Libraries
| Library | Approach | Status | Use Case |
|---|---|---|---|
| MacroPy | Import hooks + AST | Unmaintained | Historical reference |
| mcpy | Import hooks + AST | Active | Lightweight macro system |
| Cython | Source-to-source | Active | C extension compilation |
| mypyc | AST analysis | Active | Compiled Python |
| Coconut | Source-to-source | Active | Functional Python superset |
Tradeoffs
Power vs Readability: Macros can make code extremely concise but impossible to understand without knowing the macro definitions.
Compile-time vs Runtime: AST macros run at import/compile time, adding startup cost but zero runtime overhead. Decorator macros run at definition time with minimal overhead.
Tooling compatibility: IDEs, linters, and type checkers cannot understand macro-expanded code. This is the single biggest practical obstacle to Python macro adoption.
One thing to remember: Python macro systems combine import hooks for interception, AST transformers for rewriting, and source maps for debuggability — pytest’s assert rewriting proves this architecture works at scale, but the tradeoff is always IDE/tooling compatibility, making decorators and __init_subclass__ the pragmatic choice for most projects.
See Also
- Python Custom Import Hooks How Python's import system can be customized to load code from anywhere — databases, URLs, or even entirely new file formats.
- Python Dsl Design Patterns How to create mini-languages inside Python that let people express complex ideas in simple, natural words.
- Python Runtime Code Generation How Python can write and run its own code while your program is already running — like a chef inventing new recipes mid-dinner.
- Ci Cd Why big apps can ship updates every day without turning your phone into a glitchy mess — CI/CD is the behind-the-scenes quality gate and delivery truck.
- Containerization Why does software that works on your computer break on everyone else's? Containers fix that — and they're why Netflix can deploy 100 updates a day without the site going down.