Test Data Factories — Deep Dive
Factory architecture for large codebases
In serious projects, factories need organization. A common layout:
tests/
├── factories/
│ ├── __init__.py # Re-exports all factories
│ ├── base.py # Base factory classes
│ ├── users.py # User-related factories
│ ├── orders.py # Order-related factories
│ ├── products.py # Product-related factories
│ └── payments.py # Payment-related factories
├── conftest.py # Registers factories as fixtures
└── ...
The base module establishes conventions:
# tests/factories/base.py
import factory
from myapp.database import Session
class BaseFactory(factory.alchemy.SQLAlchemyModelFactory):
class Meta:
abstract = True
sqlalchemy_session = Session
sqlalchemy_session_persistence = "commit"
All concrete factories inherit from this, ensuring consistent database session handling.
SQLAlchemy integration patterns
factory_boy’s SQLAlchemy integration handles the ORM lifecycle:
# tests/factories/users.py
import factory
from myapp.models import User, Address, Subscription
from .base import BaseFactory
class AddressFactory(BaseFactory):
class Meta:
model = Address
street = factory.Faker("street_address")
city = factory.Faker("city")
state = factory.Faker("state_abbr")
zip_code = factory.Faker("zipcode")
country = "US"
class SubscriptionFactory(BaseFactory):
class Meta:
model = Subscription
plan = "free"
started_at = factory.Faker("date_time_this_year")
expires_at = None
auto_renew = False
class Params:
premium = factory.Trait(
plan="premium",
auto_renew=True,
expires_at=factory.Faker(
"date_time_between",
start_date="+30d",
end_date="+365d",
),
)
class UserFactory(BaseFactory):
class Meta:
model = User
name = factory.Faker("name")
email = factory.LazyAttribute(
lambda o: f"{o.name.lower().replace(' ', '.')}@test.com"
)
password_hash = "$2b$12$fixed_hash_for_testpass123"
is_active = True
created_at = factory.Faker("date_time_this_year")
address = factory.SubFactory(AddressFactory)
subscription = factory.SubFactory(SubscriptionFactory)
class Params:
premium = factory.Trait(
subscription=factory.SubFactory(
SubscriptionFactory, premium=True
),
)
with_orders = factory.Trait(
orders=factory.RelatedFactoryList(
"tests.factories.orders.OrderFactory",
factory_related_name="customer",
size=3,
),
)
The premium trait and with_orders trait compose: UserFactory(premium=True, with_orders=True) creates a premium user with three orders in one call.
Complex relationship graphs
Real applications have deep object graphs. Factories manage this through layered SubFactories:
# tests/factories/orders.py
from .base import BaseFactory
from .products import ProductFactory
from myapp.models import Order, OrderItem, Payment
import factory
class OrderItemFactory(BaseFactory):
class Meta:
model = OrderItem
product = factory.SubFactory(ProductFactory)
quantity = factory.Faker("random_int", min=1, max=5)
unit_price = factory.LazyAttribute(lambda o: o.product.price)
total = factory.LazyAttribute(
lambda o: o.unit_price * o.quantity
)
class PaymentFactory(BaseFactory):
class Meta:
model = Payment
method = "credit_card"
status = "completed"
amount = factory.LazyAttribute(lambda o: o.order.total)
processed_at = factory.Faker("date_time_this_month")
transaction_id = factory.Faker("uuid4")
class Params:
failed = factory.Trait(
status="failed",
error_code="card_declined",
processed_at=None,
)
class OrderFactory(BaseFactory):
class Meta:
model = Order
customer = factory.SubFactory(
"tests.factories.users.UserFactory"
)
status = "completed"
created_at = factory.Faker("date_time_this_month")
items = factory.RelatedFactoryList(
OrderItemFactory,
factory_related_name="order",
size=2,
)
payment = factory.RelatedFactory(
PaymentFactory,
factory_related_name="order",
)
@factory.lazy_attribute
def total(self):
# Calculated after items are created
return sum(
item.total for item in self.items
) if hasattr(self, '_items') else 99.99
class Params:
unpaid = factory.Trait(
status="pending",
payment=factory.RelatedFactory(
PaymentFactory,
factory_related_name="order",
failed=True,
),
)
Creating OrderFactory() generates a full object graph: Customer → Order → 2 OrderItems → 2 Products → Payment.
Custom strategies for edge cases
Build specialized factories for testing specific scenarios:
class EdgeCaseUserFactory(UserFactory):
"""Users with data that commonly causes bugs."""
class Params:
unicode_name = factory.Trait(
name="José María García-López",
email="jose.garcia@ejemplo.com",
)
extremely_long = factory.Trait(
name="A" * 200,
email="a" * 200 + "@example.com",
)
special_chars = factory.Trait(
name="O'Brien-Smith <script>alert('xss')</script>",
email="user+tag@sub.domain.example.com",
)
empty_optional = factory.Trait(
phone=None,
address=None,
bio="",
)
future_dates = factory.Trait(
created_at=factory.Faker(
"date_time_between",
start_date="+1y",
end_date="+5y",
),
)
Use these in parameterized tests:
@pytest.mark.parametrize("trait", [
"unicode_name",
"extremely_long",
"special_chars",
"empty_optional",
"future_dates",
])
def test_user_profile_handles_edge_cases(db_session, trait):
user = EdgeCaseUserFactory(**{trait: True})
response = client.get(f"/api/users/{user.id}")
assert response.status_code == 200
Performance optimization
Factories that hit the database on every call slow down large test suites. Strategies to speed things up:
Build instead of create
Factory.build() creates in-memory objects without database inserts:
# 100x faster than create for unit tests
user = UserFactory.build()
assert user.email.endswith("@test.com")
Bulk creation
For tests that need large datasets:
def test_pagination_with_many_users(db_session):
UserFactory.create_batch(500)
db_session.commit()
response = client.get("/api/users?page=5&per_page=20")
assert len(response.json()["items"]) == 20
assert response.json()["total"] == 500
Shared reference objects
Avoid creating duplicate parent objects:
def test_orders_for_same_customer(db_session):
customer = UserFactory.create()
# Reuse the same customer instead of creating 10 new ones
orders = OrderFactory.create_batch(
10, customer=customer
)
assert all(o.customer_id == customer.id for o in orders)
Factory fixtures with scope
# conftest.py
@pytest.fixture(scope="module")
def reference_product(db_session):
"""Shared product for all tests in this module."""
return ProductFactory.create(
name="Standard Widget",
price=29.99,
)
@pytest.fixture
def order(db_session, reference_product):
"""Order using the shared product."""
return OrderFactory.create(
items__0__product=reference_product,
)
Faker providers for domain-specific data
Create custom Faker providers for your domain:
from faker.providers import BaseProvider
class EcommerceProvider(BaseProvider):
PRODUCT_CATEGORIES = [
"Electronics", "Clothing", "Books",
"Home & Garden", "Sports", "Toys",
]
COUPON_PREFIXES = ["SAVE", "DEAL", "PROMO", "WELCOME"]
def product_category(self) -> str:
return self.random_element(self.PRODUCT_CATEGORIES)
def coupon_code(self) -> str:
prefix = self.random_element(self.COUPON_PREFIXES)
number = self.random_int(min=10, max=99)
return f"{prefix}{number}"
def tracking_number(self) -> str:
carrier = self.random_element(["1Z", "94", "JD"])
digits = "".join(
str(self.random_int(0, 9)) for _ in range(18)
)
return f"{carrier}{digits}"
# Register the provider
from faker import Faker
fake = Faker()
fake.add_provider(EcommerceProvider)
# Use in factories
class ProductFactory(BaseFactory):
category = factory.Faker("product_category")
class ShipmentFactory(BaseFactory):
tracking_number = factory.Faker("tracking_number")
Testing the factories themselves
Factories are code — they can have bugs too:
class TestFactoryIntegrity:
def test_user_factory_creates_valid_user(self, db_session):
user = UserFactory.create()
assert user.id is not None
assert "@" in user.email
assert user.is_active is True
def test_user_factory_creates_unique_emails(self, db_session):
users = UserFactory.create_batch(100)
emails = [u.email for u in users]
assert len(set(emails)) == 100
def test_order_factory_creates_full_graph(self, db_session):
order = OrderFactory.create()
assert order.customer is not None
assert len(order.items) == 2
assert order.payment is not None
assert order.payment.status == "completed"
def test_premium_trait_sets_correct_fields(self, db_session):
user = UserFactory.create(premium=True)
assert user.subscription.plan == "premium"
assert user.subscription.auto_renew is True
Run factory integrity tests as part of CI to catch issues early — a broken factory can cascade failures across hundreds of tests.
The one thing to remember: Well-architected test data factories centralize object creation logic, manage complex relationship graphs through SubFactories and traits, and use build mode for speed and create mode for integration tests — becoming the foundation that keeps your entire test suite maintainable.
See Also
- Python Acceptance Testing Patterns How Python teams verify software does what real users actually asked for.
- Python Approval Testing How approval testing lets you verify complex Python output by comparing it to a saved 'golden' copy you already checked.
- Python Behavior Driven Development Get an intuitive feel for Behavior Driven Development so Python behavior stops feeling unpredictable.
- Python Browser Automation Testing How Python can control a web browser like a robot to test websites automatically.
- Python Chaos Testing Applications Why breaking your own Python systems on purpose makes them stronger.