Image Inpainting in Python — Deep Dive
Production inpainting systems combine automated mask generation, multi-model selection, quality validation, and batch processing. This guide covers the full pipeline from automatic object detection through selective inpainting to seamless compositing.
Automatic mask generation
Object-based masking with SAM
Segment Anything Model (SAM) generates pixel-perfect masks from point prompts or bounding boxes:
from segment_anything import sam_model_registry, SamPredictor
import numpy as np
sam = sam_model_registry["vit_h"](checkpoint="sam_vit_h.pth")
sam = sam.to("cuda")
predictor = SamPredictor(sam)
predictor.set_image(np.array(source_image))
# Point prompt: click on the object to remove
masks, scores, _ = predictor.predict(
point_coords=np.array([[400, 300]]), # x, y of target
point_labels=np.array([1]), # 1 = foreground
multimask_output=True,
)
# Select highest confidence mask
best_mask = masks[scores.argmax()]
Semantic segmentation masking
For class-based removal (e.g., “remove all people”):
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
import torch
processor = SegformerImageProcessor.from_pretrained(
"nvidia/segformer-b5-finetuned-ade-640-640"
)
model = SegformerForSemanticSegmentation.from_pretrained(
"nvidia/segformer-b5-finetuned-ade-640-640"
).to("cuda")
inputs = processor(images=source_image, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model(**inputs)
# ADE20K class 12 = person
logits = outputs.logits
upsampled = torch.nn.functional.interpolate(
logits, size=source_image.size[::-1],
mode="bilinear", align_corners=False,
)
predicted = upsampled.argmax(dim=1).squeeze().cpu().numpy()
person_mask = (predicted == 12).astype(np.uint8) * 255
Mask refinement pipeline
Raw masks need refinement for clean inpainting:
import cv2
from PIL import Image
def refine_mask(mask: np.ndarray, dilate_px: int = 15, blur_px: int = 7):
"""Dilate to cover edges, then blur for smooth transitions."""
# Dilate to ensure full coverage of target object
kernel = cv2.getStructuringElement(
cv2.MORPH_ELLIPSE, (dilate_px, dilate_px)
)
dilated = cv2.dilate(mask, kernel, iterations=1)
# Remove small holes
closed = cv2.morphologyEx(
dilated, cv2.MORPH_CLOSE, kernel, iterations=2
)
# Feather edges for smooth blending
if blur_px > 0:
blurred = cv2.GaussianBlur(closed, (blur_px * 2 + 1, blur_px * 2 + 1), 0)
else:
blurred = closed
return blurred
SDXL inpainting pipeline
SDXL inpainting produces higher quality results at 1024×1024:
from diffusers import AutoPipelineForInpainting
import torch
pipe = AutoPipelineForInpainting.from_pretrained(
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
torch_dtype=torch.float16,
variant="fp16",
).to("cuda")
def inpaint_sdxl(
image, mask, prompt,
negative_prompt="blurry, low quality, artifacts, seam visible",
strength=0.85,
steps=30,
guidance_scale=8.0,
seed=None,
):
generator = None
if seed is not None:
generator = torch.Generator("cuda").manual_seed(seed)
# Resize to SDXL native resolution
image_resized = image.resize((1024, 1024))
mask_resized = mask.resize((1024, 1024))
result = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=image_resized,
mask_image=mask_resized,
strength=strength,
num_inference_steps=steps,
guidance_scale=guidance_scale,
generator=generator,
).images[0]
# Resize back to original dimensions
return result.resize(image.size)
Multi-pass inpainting for complex scenes
Large regions or complex objects benefit from multiple passes:
class MultiPassInpainter:
def __init__(self, pipe):
self.pipe = pipe
def inpaint_progressive(
self,
image,
mask,
prompt: str,
passes: int = 3,
initial_strength: float = 1.0,
final_strength: float = 0.5,
):
"""Start with high strength for structure, reduce for blending."""
current_image = image.copy()
for i in range(passes):
progress = i / max(passes - 1, 1)
strength = initial_strength + (
final_strength - initial_strength
) * progress
# Erode mask each pass to focus on remaining artifacts
mask_array = np.array(mask)
if i > 0:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
mask_array = cv2.erode(mask_array, kernel, iterations=i)
current_mask = Image.fromarray(mask_array)
current_image = self.pipe(
prompt=prompt,
image=current_image,
mask_image=current_mask,
strength=strength,
num_inference_steps=30,
).images[0]
return current_image
Seamless compositing
After inpainting, blend the result back into the original for artifact-free output:
def composite_with_poisson_blending(
original: np.ndarray,
inpainted: np.ndarray,
mask: np.ndarray,
):
"""Use Poisson blending for seamless edge transitions."""
# Find center of mask for blending anchor
moments = cv2.moments(mask)
if moments["m00"] == 0:
return inpainted
cx = int(moments["m10"] / moments["m00"])
cy = int(moments["m01"] / moments["m00"])
result = cv2.seamlessClone(
inpainted, original, mask,
(cx, cy), cv2.NORMAL_CLONE
)
return result
def composite_with_alpha_blend(
original: Image.Image,
inpainted: Image.Image,
mask: Image.Image,
feather_radius: int = 5,
):
"""Alpha blend with feathered mask for smooth transitions."""
from PIL import ImageFilter
# Feather the mask edges
feathered = mask.filter(
ImageFilter.GaussianBlur(radius=feather_radius)
)
# Use feathered mask as alpha channel for blending
return Image.composite(inpainted, original, feathered)
Batch processing architecture
Processing pipeline for e-commerce product photos
from dataclasses import dataclass
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import json
@dataclass
class InpaintJob:
image_path: str
output_path: str
remove_class: str # e.g., "background", "person", "watermark"
replace_prompt: str = ""
class BatchInpainter:
def __init__(self, max_batch_size: int = 4):
self.max_batch_size = max_batch_size
self.segmenter = self._load_segmenter()
self.inpaint_pipe = self._load_inpainter()
self.results = []
def _load_segmenter(self):
from transformers import pipeline
return pipeline(
"image-segmentation",
model="facebook/maskformer-swin-base-ade",
device=0,
)
def _load_inpainter(self):
from diffusers import AutoPipelineForInpainting
return AutoPipelineForInpainting.from_pretrained(
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
torch_dtype=torch.float16,
).to("cuda")
def process_job(self, job: InpaintJob) -> dict:
image = Image.open(job.image_path)
# Generate mask from segmentation
segments = self.segmenter(image)
target_masks = [
s["mask"] for s in segments
if job.remove_class.lower() in s["label"].lower()
]
if not target_masks:
return {"status": "no_match", "path": job.image_path}
# Combine all matching masks
combined = np.zeros_like(np.array(target_masks[0]))
for m in target_masks:
combined = np.maximum(combined, np.array(m))
mask = Image.fromarray(combined)
mask = Image.fromarray(refine_mask(np.array(mask)))
# Inpaint
prompt = job.replace_prompt or f"clean {job.remove_class} area"
result = self.inpaint_pipe(
prompt=prompt,
image=image.resize((1024, 1024)),
mask_image=mask.resize((1024, 1024)),
num_inference_steps=25,
).images[0]
result = result.resize(image.size)
result.save(job.output_path)
return {"status": "success", "path": job.output_path}
def process_batch(self, jobs: list[InpaintJob]) -> list[dict]:
results = []
for job in jobs:
try:
result = self.process_job(job)
results.append(result)
except Exception as e:
results.append({
"status": "error",
"path": job.image_path,
"error": str(e),
})
return results
Quality validation
Automated checks catch common inpainting failures:
class InpaintQualityChecker:
def __init__(self, edge_threshold: float = 30.0):
self.edge_threshold = edge_threshold
def check_seam_visibility(
self, original: np.ndarray, result: np.ndarray, mask: np.ndarray
) -> float:
"""Detect visible seams at mask boundary."""
# Find mask boundary pixels
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
boundary = cv2.dilate(mask, kernel) - cv2.erode(mask, kernel)
boundary_pixels = boundary > 0
# Compare gradient magnitude at boundary
diff = cv2.absdiff(original, result).astype(float)
boundary_diff = diff[boundary_pixels].mean()
return boundary_diff # Lower is better
def check_texture_consistency(
self, result: np.ndarray, mask: np.ndarray
) -> float:
"""Compare texture statistics inside and outside mask."""
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
inside = gray[mask > 127]
outside = gray[mask <= 127]
# Compare standard deviations (texture complexity)
std_ratio = np.std(inside) / (np.std(outside) + 1e-6)
return abs(1.0 - std_ratio) # Closer to 0 is better
def validate(self, original, result, mask) -> dict:
seam_score = self.check_seam_visibility(
np.array(original), np.array(result), np.array(mask)
)
texture_score = self.check_texture_consistency(
np.array(result), np.array(mask)
)
return {
"seam_visibility": seam_score,
"texture_consistency": texture_score,
"passed": seam_score < self.edge_threshold and texture_score < 0.5,
}
One thing to remember: Production inpainting pipelines chain three stages — automated mask generation (SAM or segmentation), context-aware fill (classical or diffusion), and quality-validated compositing (Poisson or alpha blending) — and the mask quality determines 80% of the final result.
See Also
- Diffusion Models Stable Diffusion and DALL-E don't 'draw' your images — they unspoil a scrambled mess until a picture emerges. Here's the surprisingly simple idea behind it.
- Python Controlnet Image Control Find out how ControlNet lets you boss around an AI artist by giving it sketches, poses, and outlines to follow.
- Python Gan Training Patterns Learn how two neural networks compete like an art forger and a detective to create incredibly realistic fake images.
- Python Image Generation Pipelines Discover how Python chains together multiple steps to turn your ideas into polished AI-generated images, like a factory assembly line for pictures.
- Python Lora Fine Tuning Learn how LoRA lets you teach an AI new tricks without replacing its entire brain, using tiny add-on lessons instead.