Macro Systems — Deep Dive

Build Python macro systems from scratch — from AST rewriting pipelines and source-to-source compilers to production macro frameworks with debugging support.

Anatomy of a Macro System

A complete Python macro system needs four components:

Macro detection — identify which code should be transformed
AST transformation — rewrite the syntax tree
Source registration — preserve debuggability
Integration — hook into Python’s compilation/import pipeline

Let us build each component.

Building an AST Macro Framework

Step 1: Macro Registry

import ast
from typing import Callable, Dict

_macros: Dict[str, Callable[[ast.AST], ast.AST]] = {}

def macro(name):
    """Register a function as a macro transformer."""
    def decorator(transform_fn):
        _macros[name] = transform_fn
        return transform_fn
    return decorator

@macro('debug_print')
def debug_print_macro(node: ast.Call) -> ast.AST:
    """Transform debug_print(expr) into print(f"expr = {expr}")."""
    if not node.args:
        return node

    statements = []
    for arg in node.args:
        # Get the source representation of the argument
        if isinstance(arg, ast.Name):
            label = arg.id
        else:
            label = ast.dump(arg)

        # Build: print(f"{label} = {arg!r}")
        format_str = ast.JoinedStr(
            values=[
                ast.Constant(value=f"{label} = "),
                ast.FormattedValue(
                    value=arg,
                    conversion=ord('r'),
                    format_spec=None,
                ),
            ]
        )

        print_call = ast.Call(
            func=ast.Name(id='print', ctx=ast.Load()),
            args=[format_str],
            keywords=[],
        )
        statements.append(ast.Expr(value=print_call))

    return statements

Step 2: AST Transformer

class MacroExpander(ast.NodeTransformer):
    def __init__(self):
        self.expanded_count = 0

    def visit_Expr(self, node):
        """Expand macro calls that appear as statements."""
        if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name):
            macro_name = node.value.func.id
            if macro_name in _macros:
                result = _macros[macro_name](node.value)
                self.expanded_count += 1
                if isinstance(result, list):
                    return result
                return ast.Expr(value=result) if not isinstance(result, ast.stmt) else result
        return self.generic_visit(node)

    def visit_Assign(self, node):
        """Expand macros in assignment expressions."""
        if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Name):
            macro_name = node.value.func.id
            if macro_name in _macros:
                expanded = _macros[macro_name](node.value)
                self.expanded_count += 1
                if isinstance(expanded, ast.AST):
                    node.value = expanded
        return self.generic_visit(node)

def expand_macros(source: str, filename: str = '<macro>') -> ast.Module:
    """Parse source and expand all registered macros."""
    tree = ast.parse(source, filename)
    expander = MacroExpander()
    tree = expander.visit(tree)
    ast.fix_missing_locations(tree)
    return tree, expander.expanded_count

Step 3: Import Hook Integration

import sys
import importlib.abc
import importlib.util
import importlib.machinery
import linecache
from pathlib import Path

class MacroFinder(importlib.abc.MetaPathFinder):
    def __init__(self, macro_packages):
        self.macro_packages = set(macro_packages)
        self._processing = set()

    def find_spec(self, fullname, path, target=None):
        if fullname in self._processing:
            return None  # prevent recursion

        top_level = fullname.split('.')[0]
        if top_level not in self.macro_packages:
            return None

        # Find the real source file
        self._processing.add(fullname)
        try:
            for entry in sys.path:
                parts = fullname.replace('.', '/')
                source_path = Path(entry) / f'{parts}.py'
                if source_path.exists():
                    return importlib.util.spec_from_file_location(
                        fullname,
                        source_path,
                        loader=MacroLoader(str(source_path)),
                    )
        finally:
            self._processing.discard(fullname)
        return None

class MacroLoader(importlib.abc.Loader):
    def __init__(self, source_path):
        self.source_path = source_path

    def create_module(self, spec):
        return None

    def exec_module(self, module):
        source = Path(self.source_path).read_text()

        # Expand macros
        tree, count = expand_macros(source, self.source_path)

        if count > 0:
            # Register expanded source for debugging
            expanded_source = ast.unparse(tree)
            linecache.cache[self.source_path] = (
                len(expanded_source),
                None,
                expanded_source.splitlines(True),
                self.source_path,
            )

        code = compile(tree, self.source_path, 'exec')
        exec(code, module.__dict__)

def enable_macros(*packages):
    """Enable macro expansion for specified packages."""
    sys.meta_path.insert(0, MacroFinder(packages))

Advanced Macro Patterns

Quasiquotation

In Lisp, quasiquotation lets you write code templates with “holes” for dynamic values. We can approximate this in Python:

import ast
import copy

class Quote:
    """Represent a code template with substitution points."""
    def __init__(self, source):
        self.template = ast.parse(source, mode='eval').body

    def substitute(self, **bindings):
        """Replace Name nodes matching binding keys with their values."""
        tree = copy.deepcopy(self.template)

        class Substituter(ast.NodeTransformer):
            def visit_Name(self, node):
                if node.id in bindings:
                    replacement = bindings[node.id]
                    if isinstance(replacement, ast.AST):
                        return replacement
                    return ast.Constant(value=replacement)
                return node

        result = Substituter().visit(tree)
        ast.fix_missing_locations(result)
        return result

# Usage in macro definitions
timing_template = Quote('__import__("time").time() - _start_time')

@macro('timed')
def timed_macro(node: ast.Call) -> list:
    """Transform timed(expr) into timing code."""
    if not node.args:
        return node

    expr = node.args[0]
    return [
        ast.parse('_start_time = __import__("time").time()', mode='exec').body[0],
        ast.parse('_result = None', mode='exec').body[0],
        ast.Assign(
            targets=[ast.Name(id='_result', ctx=ast.Store())],
            value=expr,
        ),
        ast.Expr(value=ast.Call(
            func=ast.Name(id='print', ctx=ast.Load()),
            args=[ast.JoinedStr(values=[
                ast.Constant(value='Elapsed: '),
                ast.FormattedValue(
                    value=timing_template.substitute(),
                    conversion=-1,
                    format_spec=ast.JoinedStr(values=[ast.Constant(value='.4f')]),
                ),
                ast.Constant(value='s'),
            ])],
            keywords=[],
        )),
    ]

Hygiene: Avoiding Name Collisions

In Lisp macro systems, “hygiene” means macros do not accidentally capture or shadow user variables. Python macros need manual care:

import uuid

def hygienic_name(prefix='_macro'):
    """Generate a unique variable name that won't collide."""
    return f'{prefix}_{uuid.uuid4().hex[:8]}'

@macro('with_lock')
def with_lock_macro(node: ast.Call):
    """Transform with_lock(lock, body) into properly locked execution."""
    lock_var = hygienic_name('lock')
    # Use unique names internally to avoid collision
    # with user code variable names
    ...

pytest’s Assert Rewriting: A Real Macro System

pytest contains one of the most sophisticated macro-like systems in the Python ecosystem. It rewrites assert statements to provide detailed failure messages:

# What you write:
assert x == y + 1

# What pytest transforms it into (conceptually):
_result = x == y + 1
if not _result:
    _x = x          # capture intermediate values
    _y = y
    _sum = y + 1
    raise AssertionError(
        f"assert {_x} == {_y} + 1\n"
        f"  where {_x} = x\n"
        f"  and   {_sum} = {_y} + 1"
    )

pytest implements this via an import hook (_pytest.assertion.rewrite) that:

Intercepts test file imports
Parses the source into an AST
Walks the tree to find assert statements
Rewrites each assert to capture all intermediate expression values
Compiles and caches the modified bytecode

The implementation handles nested expressions, comparison chains, boolean operators, and generates source maps for accurate tracebacks.

Source-to-Source Transformation

For external DSLs that compile to Python, source-to-source transformation is cleaner than AST manipulation:

import re
from typing import List, Tuple

class SourceTransformer:
    def __init__(self):
        self.transforms: List[Tuple[re.Pattern, Callable]] = []

    def rule(self, pattern):
        """Register a source transformation rule."""
        compiled = re.compile(pattern, re.MULTILINE)
        def decorator(func):
            self.transforms.append((compiled, func))
            return func
        return decorator

    def apply(self, source: str) -> str:
        for pattern, handler in self.transforms:
            source = pattern.sub(handler, source)
        return source

transformer = SourceTransformer()

@transformer.rule(r'^(\s*)unless\s+(.+?):')
def unless_to_if_not(match):
    """Transform 'unless condition:' to 'if not (condition):'."""
    indent = match.group(1)
    condition = match.group(2)
    return f'{indent}if not ({condition}):'

@transformer.rule(r'^(\s*)until\s+(.+?):')
def until_to_while_not(match):
    """Transform 'until condition:' to 'while not (condition):'."""
    indent = match.group(1)
    condition = match.group(2)
    return f'{indent}while not ({condition}):'

This approach is simpler but less powerful than AST transformation — it cannot understand code structure, only text patterns.

Debugging Macro-Expanded Code

The biggest challenge with macros is debugging. Strategies:

1. Expansion Preview

def show_expansion(source):
    """Show what source looks like after macro expansion."""
    tree, count = expand_macros(source)
    print(f"# {count} macros expanded")
    print(ast.unparse(tree))

2. Source Maps

Map generated code locations back to original source:

class SourceMap:
    def __init__(self):
        self.mappings = {}  # generated_line -> (original_file, original_line)

    def add(self, gen_line, orig_file, orig_line):
        self.mappings[gen_line] = (orig_file, orig_line)

    def translate_traceback(self, tb):
        """Convert a traceback to reference original source."""
        for frame in tb:
            if frame.lineno in self.mappings:
                orig_file, orig_line = self.mappings[frame.lineno]
                frame.filename = orig_file
                frame.lineno = orig_line

3. Conditional Expansion

import os

MACRO_DEBUG = os.environ.get('MACRO_DEBUG', '').lower() in ('1', 'true')

class MacroLoader(importlib.abc.Loader):
    def exec_module(self, module):
        source = Path(self.source_path).read_text()
        tree, count = expand_macros(source, self.source_path)

        if MACRO_DEBUG and count > 0:
            expanded = ast.unparse(tree)
            debug_path = Path(self.source_path).with_suffix('.macro_expanded.py')
            debug_path.write_text(f"# Auto-generated macro expansion\n{expanded}")
            print(f"Macro expansion written to {debug_path}")

        code = compile(tree, self.source_path, 'exec')
        exec(code, module.__dict__)

Existing Macro Libraries

Library	Approach	Status	Use Case
MacroPy	Import hooks + AST	Unmaintained	Historical reference
mcpy	Import hooks + AST	Active	Lightweight macro system
Cython	Source-to-source	Active	C extension compilation
mypyc	AST analysis	Active	Compiled Python
Coconut	Source-to-source	Active	Functional Python superset

Tradeoffs

Power vs Readability: Macros can make code extremely concise but impossible to understand without knowing the macro definitions.

Compile-time vs Runtime: AST macros run at import/compile time, adding startup cost but zero runtime overhead. Decorator macros run at definition time with minimal overhead.

Tooling compatibility: IDEs, linters, and type checkers cannot understand macro-expanded code. This is the single biggest practical obstacle to Python macro adoption.

One thing to remember: Python macro systems combine import hooks for interception, AST transformers for rewriting, and source maps for debuggability — pytest’s assert rewriting proves this architecture works at scale, but the tradeoff is always IDE/tooling compatibility, making decorators and __init_subclass__ the pragmatic choice for most projects.

pythonmetaprogramminglanguage-design