Image Inpainting in Python — Deep Dive

Production inpainting systems combine automated mask generation, multi-model selection, quality validation, and batch processing. This guide covers the full pipeline from automatic object detection through selective inpainting to seamless compositing.

Automatic mask generation

Object-based masking with SAM

Segment Anything Model (SAM) generates pixel-perfect masks from point prompts or bounding boxes:

from segment_anything import sam_model_registry, SamPredictor
import numpy as np

sam = sam_model_registry["vit_h"](checkpoint="sam_vit_h.pth")
sam = sam.to("cuda")
predictor = SamPredictor(sam)

predictor.set_image(np.array(source_image))

# Point prompt: click on the object to remove
masks, scores, _ = predictor.predict(
    point_coords=np.array([[400, 300]]),  # x, y of target
    point_labels=np.array([1]),           # 1 = foreground
    multimask_output=True,
)

# Select highest confidence mask
best_mask = masks[scores.argmax()]

Semantic segmentation masking

For class-based removal (e.g., “remove all people”):

from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
import torch

processor = SegformerImageProcessor.from_pretrained(
    "nvidia/segformer-b5-finetuned-ade-640-640"
)
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b5-finetuned-ade-640-640"
).to("cuda")

inputs = processor(images=source_image, return_tensors="pt").to("cuda")

with torch.no_grad():
    outputs = model(**inputs)

# ADE20K class 12 = person
logits = outputs.logits
upsampled = torch.nn.functional.interpolate(
    logits, size=source_image.size[::-1],
    mode="bilinear", align_corners=False,
)
predicted = upsampled.argmax(dim=1).squeeze().cpu().numpy()
person_mask = (predicted == 12).astype(np.uint8) * 255

Mask refinement pipeline

Raw masks need refinement for clean inpainting:

import cv2
from PIL import Image

def refine_mask(mask: np.ndarray, dilate_px: int = 15, blur_px: int = 7):
    """Dilate to cover edges, then blur for smooth transitions."""
    # Dilate to ensure full coverage of target object
    kernel = cv2.getStructuringElement(
        cv2.MORPH_ELLIPSE, (dilate_px, dilate_px)
    )
    dilated = cv2.dilate(mask, kernel, iterations=1)
    
    # Remove small holes
    closed = cv2.morphologyEx(
        dilated, cv2.MORPH_CLOSE, kernel, iterations=2
    )
    
    # Feather edges for smooth blending
    if blur_px > 0:
        blurred = cv2.GaussianBlur(closed, (blur_px * 2 + 1, blur_px * 2 + 1), 0)
    else:
        blurred = closed
    
    return blurred

SDXL inpainting pipeline

SDXL inpainting produces higher quality results at 1024×1024:

from diffusers import AutoPipelineForInpainting
import torch

pipe = AutoPipelineForInpainting.from_pretrained(
    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
    torch_dtype=torch.float16,
    variant="fp16",
).to("cuda")

def inpaint_sdxl(
    image, mask, prompt,
    negative_prompt="blurry, low quality, artifacts, seam visible",
    strength=0.85,
    steps=30,
    guidance_scale=8.0,
    seed=None,
):
    generator = None
    if seed is not None:
        generator = torch.Generator("cuda").manual_seed(seed)
    
    # Resize to SDXL native resolution
    image_resized = image.resize((1024, 1024))
    mask_resized = mask.resize((1024, 1024))
    
    result = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=image_resized,
        mask_image=mask_resized,
        strength=strength,
        num_inference_steps=steps,
        guidance_scale=guidance_scale,
        generator=generator,
    ).images[0]
    
    # Resize back to original dimensions
    return result.resize(image.size)

Multi-pass inpainting for complex scenes

Large regions or complex objects benefit from multiple passes:

class MultiPassInpainter:
    def __init__(self, pipe):
        self.pipe = pipe
    
    def inpaint_progressive(
        self,
        image,
        mask,
        prompt: str,
        passes: int = 3,
        initial_strength: float = 1.0,
        final_strength: float = 0.5,
    ):
        """Start with high strength for structure, reduce for blending."""
        current_image = image.copy()
        
        for i in range(passes):
            progress = i / max(passes - 1, 1)
            strength = initial_strength + (
                final_strength - initial_strength
            ) * progress
            
            # Erode mask each pass to focus on remaining artifacts
            mask_array = np.array(mask)
            if i > 0:
                kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
                mask_array = cv2.erode(mask_array, kernel, iterations=i)
            
            current_mask = Image.fromarray(mask_array)
            
            current_image = self.pipe(
                prompt=prompt,
                image=current_image,
                mask_image=current_mask,
                strength=strength,
                num_inference_steps=30,
            ).images[0]
        
        return current_image

Seamless compositing

After inpainting, blend the result back into the original for artifact-free output:

def composite_with_poisson_blending(
    original: np.ndarray,
    inpainted: np.ndarray,
    mask: np.ndarray,
):
    """Use Poisson blending for seamless edge transitions."""
    # Find center of mask for blending anchor
    moments = cv2.moments(mask)
    if moments["m00"] == 0:
        return inpainted
    
    cx = int(moments["m10"] / moments["m00"])
    cy = int(moments["m01"] / moments["m00"])
    
    result = cv2.seamlessClone(
        inpainted, original, mask,
        (cx, cy), cv2.NORMAL_CLONE
    )
    return result

def composite_with_alpha_blend(
    original: Image.Image,
    inpainted: Image.Image,
    mask: Image.Image,
    feather_radius: int = 5,
):
    """Alpha blend with feathered mask for smooth transitions."""
    from PIL import ImageFilter
    
    # Feather the mask edges
    feathered = mask.filter(
        ImageFilter.GaussianBlur(radius=feather_radius)
    )
    
    # Use feathered mask as alpha channel for blending
    return Image.composite(inpainted, original, feathered)

Batch processing architecture

Processing pipeline for e-commerce product photos

from dataclasses import dataclass
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import json

@dataclass
class InpaintJob:
    image_path: str
    output_path: str
    remove_class: str  # e.g., "background", "person", "watermark"
    replace_prompt: str = ""

class BatchInpainter:
    def __init__(self, max_batch_size: int = 4):
        self.max_batch_size = max_batch_size
        self.segmenter = self._load_segmenter()
        self.inpaint_pipe = self._load_inpainter()
        self.results = []
    
    def _load_segmenter(self):
        from transformers import pipeline
        return pipeline(
            "image-segmentation",
            model="facebook/maskformer-swin-base-ade",
            device=0,
        )
    
    def _load_inpainter(self):
        from diffusers import AutoPipelineForInpainting
        return AutoPipelineForInpainting.from_pretrained(
            "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
            torch_dtype=torch.float16,
        ).to("cuda")
    
    def process_job(self, job: InpaintJob) -> dict:
        image = Image.open(job.image_path)
        
        # Generate mask from segmentation
        segments = self.segmenter(image)
        target_masks = [
            s["mask"] for s in segments 
            if job.remove_class.lower() in s["label"].lower()
        ]
        
        if not target_masks:
            return {"status": "no_match", "path": job.image_path}
        
        # Combine all matching masks
        combined = np.zeros_like(np.array(target_masks[0]))
        for m in target_masks:
            combined = np.maximum(combined, np.array(m))
        
        mask = Image.fromarray(combined)
        mask = Image.fromarray(refine_mask(np.array(mask)))
        
        # Inpaint
        prompt = job.replace_prompt or f"clean {job.remove_class} area"
        result = self.inpaint_pipe(
            prompt=prompt,
            image=image.resize((1024, 1024)),
            mask_image=mask.resize((1024, 1024)),
            num_inference_steps=25,
        ).images[0]
        
        result = result.resize(image.size)
        result.save(job.output_path)
        
        return {"status": "success", "path": job.output_path}
    
    def process_batch(self, jobs: list[InpaintJob]) -> list[dict]:
        results = []
        for job in jobs:
            try:
                result = self.process_job(job)
                results.append(result)
            except Exception as e:
                results.append({
                    "status": "error",
                    "path": job.image_path,
                    "error": str(e),
                })
        return results

Quality validation

Automated checks catch common inpainting failures:

class InpaintQualityChecker:
    def __init__(self, edge_threshold: float = 30.0):
        self.edge_threshold = edge_threshold
    
    def check_seam_visibility(
        self, original: np.ndarray, result: np.ndarray, mask: np.ndarray
    ) -> float:
        """Detect visible seams at mask boundary."""
        # Find mask boundary pixels
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        boundary = cv2.dilate(mask, kernel) - cv2.erode(mask, kernel)
        boundary_pixels = boundary > 0
        
        # Compare gradient magnitude at boundary
        diff = cv2.absdiff(original, result).astype(float)
        boundary_diff = diff[boundary_pixels].mean()
        
        return boundary_diff  # Lower is better
    
    def check_texture_consistency(
        self, result: np.ndarray, mask: np.ndarray
    ) -> float:
        """Compare texture statistics inside and outside mask."""
        gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
        
        inside = gray[mask > 127]
        outside = gray[mask <= 127]
        
        # Compare standard deviations (texture complexity)
        std_ratio = np.std(inside) / (np.std(outside) + 1e-6)
        return abs(1.0 - std_ratio)  # Closer to 0 is better
    
    def validate(self, original, result, mask) -> dict:
        seam_score = self.check_seam_visibility(
            np.array(original), np.array(result), np.array(mask)
        )
        texture_score = self.check_texture_consistency(
            np.array(result), np.array(mask)
        )
        
        return {
            "seam_visibility": seam_score,
            "texture_consistency": texture_score,
            "passed": seam_score < self.edge_threshold and texture_score < 0.5,
        }

One thing to remember: Production inpainting pipelines chain three stages — automated mask generation (SAM or segmentation), context-aware fill (classical or diffusion), and quality-validated compositing (Poisson or alpha blending) — and the mask quality determines 80% of the final result.

pythonimage-inpaintingcomputer-visiongenerative-ai

See Also

  • Diffusion Models Stable Diffusion and DALL-E don't 'draw' your images — they unspoil a scrambled mess until a picture emerges. Here's the surprisingly simple idea behind it.
  • Python Controlnet Image Control Find out how ControlNet lets you boss around an AI artist by giving it sketches, poses, and outlines to follow.
  • Python Gan Training Patterns Learn how two neural networks compete like an art forger and a detective to create incredibly realistic fake images.
  • Python Image Generation Pipelines Discover how Python chains together multiple steps to turn your ideas into polished AI-generated images, like a factory assembly line for pictures.
  • Python Lora Fine Tuning Learn how LoRA lets you teach an AI new tricks without replacing its entire brain, using tiny add-on lessons instead.