Python Building Energy Simulation — Deep Dive

Build automated building energy analysis pipelines in Python using EnergyPlus, parametric optimization, and machine learning surrogate models.

Technical foundation

Building energy simulation in Python combines thermodynamic modeling, HVAC engineering, optimization, and data analysis. Production workflows automate model generation, run large parametric studies, and apply optimization algorithms to find energy-efficient designs. This deep dive covers the code patterns, EnergyPlus integration techniques, and advanced analysis methods used by building performance consultants and researchers.

EnergyPlus model creation with eppy

eppy provides a Pythonic interface to EnergyPlus Input Data Files (IDF):

from eppy.modeleditor import IDF

# Set EnergyPlus IDD (Input Data Dictionary) path
IDF.setiddname("/usr/local/EnergyPlus-24-1-0/Energy+.idd")

# Load a base model
idf = IDF("base_model.idf", "weather/chicago.epw")

# Modify wall construction
wall_construction = idf.idfobjects["CONSTRUCTION"][0]
print(f"Current wall: {wall_construction.Name}")

# Add insulation layer
idf.newidfobject(
    "MATERIAL",
    Name="XPS_Insulation_100mm",
    Roughness="MediumSmooth",
    Thickness=0.1,  # meters
    Conductivity=0.034,  # W/m·K
    Density=35,
    Specific_Heat=1400,
)

# Create new construction with insulation
idf.newidfobject(
    "CONSTRUCTION",
    Name="InsulatedWall",
    Outside_Layer="Brick_200mm",
    Layer_2="XPS_Insulation_100mm",
    Layer_3="Gypsum_Board_13mm",
)

# Apply to all exterior walls
for surface in idf.idfobjects["BUILDINGSURFACE:DETAILED"]:
    if surface.Surface_Type == "Wall" and surface.Outside_Boundary_Condition == "Outdoors":
        surface.Construction_Name = "InsulatedWall"

Window modifications

from geomeppy import IDF as GeoIDF

# geomeppy adds geometry-aware operations
geo_idf = GeoIDF("base_model.idf", "weather/chicago.epw")

# Set window-to-wall ratio for all orientations
geo_idf.set_wwr(wwr=0.4, construction="DblGlz_LowE_Argon")

# Or set different ratios per orientation
geo_idf.set_wwr(wwr=0.6, construction="DblGlz_LowE_Argon", orientation="south")
geo_idf.set_wwr(wwr=0.2, construction="DblGlz_LowE_Argon", orientation="north")

Running simulations programmatically

import subprocess
from pathlib import Path
import pandas as pd

def run_energyplus(
    idf_path: str,
    epw_path: str,
    output_dir: str,
    energyplus_path: str = "/usr/local/EnergyPlus-24-1-0/energyplus",
) -> dict:
    """Run EnergyPlus simulation and return key results."""
    output = Path(output_dir)
    output.mkdir(parents=True, exist_ok=True)

    result = subprocess.run(
        [
            energyplus_path,
            "--output-directory", str(output),
            "--weather", epw_path,
            "--readvars",
            idf_path,
        ],
        capture_output=True,
        text=True,
        timeout=600,
    )

    if result.returncode != 0:
        raise RuntimeError(f"EnergyPlus failed: {result.stderr[-500:]}")

    # Parse summary results
    return parse_results(output)

def parse_results(output_dir: Path) -> dict:
    """Extract key metrics from EnergyPlus output."""
    # Read the annual summary table
    html_path = output_dir / "eplustbl.htm"
    tables = pd.read_html(str(html_path))

    # Read hourly time-series data
    csv_path = output_dir / "eplusout.csv"
    if csv_path.exists():
        hourly = pd.read_csv(csv_path)
    else:
        hourly = None

    # Extract key annual metrics from the summary tables
    # Table indices vary by model; this is a common pattern
    results = {
        "total_site_energy_kwh": None,
        "heating_energy_kwh": None,
        "cooling_energy_kwh": None,
        "lighting_energy_kwh": None,
        "eui_kwh_per_m2": None,
        "peak_cooling_kw": None,
        "peak_heating_kw": None,
        "hourly_data": hourly,
    }

    # Parse from End Use table (typical at index ~4-6)
    for table in tables:
        if "Total End Uses" in table.values:
            # Extract values from the standard EnergyPlus end-use table
            results["parsing_note"] = "Found end-use summary table"
            break

    return results

Parametric study framework

from itertools import product
from concurrent.futures import ProcessPoolExecutor
import json

class ParametricStudy:
    """Run EnergyPlus parametric studies with combinatorial parameter sweeps."""

    def __init__(self, base_idf: str, epw_path: str, work_dir: str):
        self.base_idf = base_idf
        self.epw_path = epw_path
        self.work_dir = Path(work_dir)
        self.parameters = {}

    def add_parameter(self, name: str, values: list, apply_fn: callable):
        """Register a parameter with its possible values and application function."""
        self.parameters[name] = {"values": values, "apply_fn": apply_fn}

    def generate_cases(self) -> list[dict]:
        """Generate all parameter combinations."""
        param_names = list(self.parameters.keys())
        param_values = [self.parameters[p]["values"] for p in param_names]
        cases = []
        for combo in product(*param_values):
            case = dict(zip(param_names, combo))
            case["case_id"] = "_".join(f"{k}={v}" for k, v in case.items())
            cases.append(case)
        return cases

    def run_case(self, case: dict) -> dict:
        """Create variant model, run simulation, return results."""
        IDF.setiddname("/usr/local/EnergyPlus-24-1-0/Energy+.idd")
        idf = IDF(self.base_idf, self.epw_path)

        # Apply each parameter modification
        for param_name, value in case.items():
            if param_name == "case_id":
                continue
            apply_fn = self.parameters[param_name]["apply_fn"]
            apply_fn(idf, value)

        # Save variant and run
        case_dir = self.work_dir / case["case_id"]
        idf_path = str(case_dir / "model.idf")
        idf.saveas(idf_path)

        results = run_energyplus(idf_path, self.epw_path, str(case_dir))
        results["parameters"] = {k: v for k, v in case.items() if k != "case_id"}
        return results

    def run_all(self, max_workers: int = 4) -> pd.DataFrame:
        """Run all cases in parallel."""
        cases = self.generate_cases()
        print(f"Running {len(cases)} parametric cases with {max_workers} workers")

        results = []
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            futures = {executor.submit(self.run_case, case): case for case in cases}
            for future in futures:
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    print(f"Case failed: {futures[future]['case_id']}: {e}")

        return pd.DataFrame(results)

# Usage example
study = ParametricStudy("base_office.idf", "weather/chicago.epw", "parametric_runs/")

# Define parameter: wall insulation thickness
def set_insulation(idf, thickness_mm):
    for mat in idf.idfobjects["MATERIAL"]:
        if "insulation" in mat.Name.lower():
            mat.Thickness = thickness_mm / 1000

study.add_parameter("insulation_mm", [50, 100, 150, 200], set_insulation)

# Define parameter: window-to-wall ratio
def set_wwr(idf, wwr):
    # Simplified: adjust window area
    for window in idf.idfobjects["FENESTRATIONSURFACE:DETAILED"]:
        # Scale window vertices to achieve target WWR
        pass  # Implementation depends on geometry

study.add_parameter("wwr", [0.2, 0.3, 0.4, 0.5], set_wwr)

# Run all 16 combinations
results_df = study.run_all(max_workers=4)

Sensitivity analysis with SALib

Identify which parameters most influence energy performance:

from SALib.sample import sobol as sobol_sample
from SALib.analyze import sobol as sobol_analyze

# Define parameter space
problem = {
    "num_vars": 5,
    "names": [
        "insulation_thickness",
        "window_u_value",
        "infiltration_rate",
        "lighting_power_density",
        "cooling_setpoint",
    ],
    "bounds": [
        [0.05, 0.25],    # meters
        [1.0, 5.0],       # W/m²·K
        [0.1, 1.0],       # ACH (air changes per hour)
        [5.0, 15.0],      # W/m²
        [22.0, 26.0],     # °C
    ],
}

# Generate Sobol samples
param_values = sobol_sample.sample(problem, 256)
# This generates ~7680 samples (N × (2D + 2) for D=5 parameters)

# Run EnergyPlus for each sample (parallelized)
eui_results = np.array([
    run_single_simulation(params)["eui_kwh_per_m2"]
    for params in param_values
])

# Analyze sensitivity
Si = sobol_analyze.analyze(problem, eui_results)
print("First-order sensitivity indices:")
for name, s1 in zip(problem["names"], Si["S1"]):
    print(f"  {name}: {s1:.3f}")
print("Total-order sensitivity indices:")
for name, st in zip(problem["names"], Si["ST"]):
    print(f"  {name}: {st:.3f}")

First-order indices show the direct effect of each parameter. Total-order indices include interactions. If total-order is much larger than first-order, that parameter has strong interactions with others.

Machine learning surrogate models

When parametric studies require thousands of simulations, ML surrogates can accelerate exploration:

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score

def build_surrogate(results_df: pd.DataFrame, target: str = "eui_kwh_per_m2"):
    """Train a fast surrogate model from simulation results."""
    feature_cols = [c for c in results_df.columns
                    if c not in [target, "case_id", "hourly_data"]]
    X = results_df[feature_cols]
    y = results_df[target]

    model = GradientBoostingRegressor(
        n_estimators=200, max_depth=4, learning_rate=0.1,
    )

    # Cross-validate
    scores = cross_val_score(model, X, y, cv=5, scoring="r2")
    print(f"Surrogate R² = {scores.mean():.4f} ± {scores.std():.4f}")

    model.fit(X, y)
    return model

# Use surrogate for rapid optimization
from scipy.optimize import differential_evolution

surrogate = build_surrogate(results_df)

def surrogate_objective(params):
    X = pd.DataFrame([params], columns=feature_cols)
    return surrogate.predict(X)[0]

optimal = differential_evolution(
    surrogate_objective,
    bounds=list(zip(param_mins, param_maxs)),
)
print(f"Optimal parameters: {optimal.x}")
print(f"Predicted EUI: {optimal.fun:.1f} kWh/m²")

A surrogate trained on 500 EnergyPlus runs can evaluate 100,000 parameter combinations in seconds, enabling exhaustive optimization that would take weeks with the full simulation engine.

Comfort analysis

Beyond energy, modern building simulation evaluates occupant comfort:

def pmv_ppd(
    air_temp: float,     # °C
    mean_radiant: float, # °C
    air_speed: float,    # m/s
    humidity: float,     # % RH
    metabolic_rate: float = 1.2,  # met (typical office work)
    clothing: float = 0.7,        # clo (typical office summer)
) -> tuple[float, float]:
    """Calculate PMV and PPD thermal comfort indices (simplified Fanger model)."""
    # Operative temperature (simplified)
    t_op = 0.5 * air_temp + 0.5 * mean_radiant

    # Simplified PMV (full model has iterative heat balance)
    pmv = 0.303 * np.exp(-0.036 * metabolic_rate * 58.15) * (
        (metabolic_rate * 58.15 - 3.05e-3 * (5733 - 6.99 * metabolic_rate * 58.15 - humidity * 10))
        - 0.42 * (metabolic_rate * 58.15 - 58.15)
        - 1.7e-5 * metabolic_rate * 58.15 * (5867 - humidity * 10)
        - 0.0014 * metabolic_rate * (34 - air_temp)
        - 3.96e-8 * clothing * ((clothing * t_op + 273)**4 - (mean_radiant + 273)**4)
        - clothing * 2.38 * (clothing * t_op - air_temp)**0.25 * air_speed
    )

    # PPD: Predicted Percentage Dissatisfied
    ppd = 100 - 95 * np.exp(-0.03353 * pmv**4 - 0.2179 * pmv**2)
    return pmv, ppd

ASHRAE Standard 55 requires PMV between -0.5 and +0.5 for 80% occupant satisfaction (PPD < 20%).

Tradeoffs

Decision	Option A	Option B
Simulation engine	EnergyPlus (industry standard, free)	IES VE / TRNSYS (commercial, GUI-driven, some features unavailable in E+)
Model detail	Shoebox (single zone, fast screening)	Full geometry (all zones, accurate but slow)
Weather data	TMY (typical year, standard practice)	Future weather morphed files (captures climate change impact)
Optimization	Full parametric sweep (thorough, expensive)	Surrogate-assisted (faster, approximate)
Time resolution	Hourly (standard)	Sub-hourly (needed for detailed HVAC control analysis)

One thing to remember: Building energy simulation is where physics meets design optimization — Python’s role is to automate the enormous parameter space exploration that transforms simulation from a compliance exercise into a genuine design tool.

pythonbuilding-energysimulationsustainability