NetworkX for Graph Analysis — Deep Dive

Internal data structure

NetworkX represents graphs using nested dictionaries. For an undirected Graph, the adjacency structure looks like:

# Internal representation of G with edges (A,B) and (A,C)
{
    "A": {"B": {}, "C": {}},
    "B": {"A": {}},
    "C": {"A": {}},
}

Each outer key is a node, each inner key is a neighbor, and the innermost dict stores edge attributes. This design prioritizes flexibility — any hashable object can be a node, and attribute access is O(1) — at the cost of memory overhead compared to compressed sparse representations.

Building graphs

From scratch

import networkx as nx

G = nx.Graph()

# Add nodes with attributes
G.add_node("Alice", role="engineer")
G.add_node("Bob", role="designer")
G.add_node("Carol", role="manager")

# Add weighted edges
G.add_edge("Alice", "Bob", weight=0.9, project="frontend")
G.add_edge("Alice", "Carol", weight=0.7, project="backend")
G.add_edge("Bob", "Carol", weight=0.5, project="design-review")

print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")
print(f"Alice's neighbors: {list(G.neighbors('Alice'))}")

From data sources

import networkx as nx
import pandas as pd

# From an edge list DataFrame
edges = pd.DataFrame({
    "source": ["A", "A", "B", "C", "D"],
    "target": ["B", "C", "D", "D", "E"],
    "weight": [1.0, 2.0, 1.5, 0.5, 3.0],
})
G = nx.from_pandas_edgelist(edges, "source", "target", edge_attr="weight")

# From an adjacency matrix (NumPy)
import numpy as np

adj_matrix = np.array([
    [0, 1, 1, 0],
    [1, 0, 0, 1],
    [1, 0, 0, 1],
    [0, 1, 1, 0],
])
G = nx.from_numpy_array(adj_matrix)

# From a file
# G = nx.read_graphml("network.graphml")
# G = nx.read_edgelist("edges.txt", data=[("weight", float)])

Directed graphs

import networkx as nx

DG = nx.DiGraph()
DG.add_edges_from([
    ("user_A", "user_B"),  # A follows B
    ("user_A", "user_C"),
    ("user_B", "user_C"),
    ("user_C", "user_A"),
])

print(f"A's out-degree (following): {DG.out_degree('user_A')}")
print(f"C's in-degree (followers): {DG.in_degree('user_C')}")

Path algorithms

Shortest paths

import networkx as nx

G = nx.Graph()
G.add_weighted_edges_from([
    ("NYC", "Chicago", 790),
    ("NYC", "DC", 225),
    ("DC", "Atlanta", 640),
    ("Chicago", "Denver", 1000),
    ("Atlanta", "Denver", 1400),
    ("Denver", "LA", 1020),
    ("Atlanta", "LA", 2200),
])

# Unweighted shortest path (fewest hops)
path = nx.shortest_path(G, "NYC", "LA")
print(f"Fewest stops: {' → '.join(path)}")

# Weighted shortest path (minimum total distance)
path_w = nx.shortest_path(G, "NYC", "LA", weight="weight")
distance = nx.shortest_path_length(G, "NYC", "LA", weight="weight")
print(f"Shortest distance: {' → '.join(path_w)} ({distance} miles)")

# All pairs shortest paths (returns a generator)
all_pairs = dict(nx.all_pairs_dijkstra_path_length(G, weight="weight"))
print(f"NYC to Denver: {all_pairs['NYC']['Denver']} miles")

Path existence and connectivity

import networkx as nx

G = nx.Graph()
G.add_edges_from([(1, 2), (2, 3), (4, 5)])  # Two components

print(f"Connected: {nx.is_connected(G)}")
components = list(nx.connected_components(G))
print(f"Components: {components}")
# [{1, 2, 3}, {4, 5}]

# Check if path exists before computing it
print(f"Path 1→3 exists: {nx.has_path(G, 1, 3)}")
print(f"Path 1→5 exists: {nx.has_path(G, 1, 5)}")

Centrality algorithms

import networkx as nx

# Karate club — a classic social network dataset
G = nx.karate_club_graph()

# Degree centrality
deg_central = nx.degree_centrality(G)
top_degree = sorted(deg_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by degree centrality:")
for node, score in top_degree:
    print(f"  Node {node}: {score:.3f}")

# Betweenness centrality
betw_central = nx.betweenness_centrality(G)
top_betw = sorted(betw_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by betweenness centrality:")
for node, score in top_betw:
    print(f"  Node {node}: {score:.3f}")

# PageRank
pagerank = nx.pagerank(G, alpha=0.85)
top_pr = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by PageRank:")
for node, score in top_pr:
    print(f"  Node {node}: {score:.3f}")

# Eigenvector centrality
eigen_central = nx.eigenvector_centrality(G, max_iter=1000)

Interpreting centrality results

Different centrality measures highlight different roles:

MeasureIdentifiesReal-world analog
DegreeMost connectedCelebrity with many followers
BetweennessBridge between groupsDiplomat connecting rival factions
ClosenessCan reach everyone quicklyCentral distribution warehouse
PageRankEndorsed by important nodesPaper cited by landmark papers
EigenvectorConnected to high-scoring nodesPerson who knows all the influencers

Community detection

import networkx as nx
from networkx.algorithms.community import louvain_communities, modularity

G = nx.karate_club_graph()

# Louvain method
communities = louvain_communities(G, seed=42)
print(f"Found {len(communities)} communities")
for i, comm in enumerate(communities):
    print(f"  Community {i}: {sorted(comm)}")

# Modularity score (higher = better partition)
mod = modularity(G, communities)
print(f"Modularity: {mod:.3f}")

# Girvan-Newman (edge betweenness-based, slower but interpretable)
from networkx.algorithms.community import girvan_newman

gn_communities = girvan_newman(G)
first_partition = next(gn_communities)  # First split
print(f"Girvan-Newman first split: {len(first_partition)} groups")

Graph metrics and characterization

import networkx as nx

G = nx.karate_club_graph()

# Global metrics
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.3f}")
print(f"Average clustering: {nx.average_clustering(G):.3f}")
print(f"Transitivity: {nx.transitivity(G):.3f}")
print(f"Diameter: {nx.diameter(G)}")
print(f"Average shortest path: {nx.average_shortest_path_length(G):.2f}")

# Degree distribution
import collections

degree_seq = [d for n, d in G.degree()]
degree_count = collections.Counter(degree_seq)
print(f"Degree distribution: {dict(sorted(degree_count.items()))}")

# Check for small-world property
# Small-world: high clustering + short average path (relative to random graph)

Graph generators

NetworkX includes generators for common graph models:

import networkx as nx

# Erdős–Rényi random graph
er = nx.erdos_renyi_graph(n=100, p=0.05, seed=42)

# Barabási–Albert preferential attachment (scale-free)
ba = nx.barabasi_albert_graph(n=100, m=2, seed=42)

# Watts–Strogatz small-world
ws = nx.watts_strogatz_graph(n=100, k=4, p=0.3, seed=42)

# Complete graph, cycle, grid
complete = nx.complete_graph(10)
cycle = nx.cycle_graph(20)
grid = nx.grid_2d_graph(5, 5)

These are useful for benchmarking algorithms, testing hypotheses, and understanding how network structure affects dynamics.

Visualization

import networkx as nx
import matplotlib.pyplot as plt

G = nx.karate_club_graph()

# Basic drawing
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Spring layout
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, ax=axes[0], node_size=300, with_labels=True,
        node_color="lightblue", edge_color="gray", font_size=8)
axes[0].set_title("Spring Layout")

# Kamada-Kawai layout
pos_kk = nx.kamada_kawai_layout(G)
# Color by community
communities = list(nx.community.louvain_communities(G, seed=42))
color_map = {}
for i, comm in enumerate(communities):
    for node in comm:
        color_map[node] = i
colors = [color_map[n] for n in G.nodes()]

nx.draw(G, pos_kk, ax=axes[1], node_size=300, with_labels=True,
        node_color=colors, cmap=plt.cm.Set3, edge_color="gray", font_size=8)
axes[1].set_title("Communities (Louvain)")

plt.tight_layout()
plt.savefig("graph_analysis.png", dpi=150)

For interactive or large-scale visualization, export to Gephi (.gexf), Cytoscape, or use libraries like pyvis for web-based graphs.

Real-world application: fraud detection

import networkx as nx

# Build a transaction graph
G = nx.DiGraph()

transactions = [
    ("account_A", "account_B", {"amount": 10000, "date": "2026-01-15"}),
    ("account_B", "account_C", {"amount": 9800, "date": "2026-01-15"}),
    ("account_C", "account_D", {"amount": 9600, "date": "2026-01-16"}),
    ("account_D", "account_A", {"amount": 9400, "date": "2026-01-16"}),  # Cycle!
    ("account_E", "account_F", {"amount": 500, "date": "2026-01-17"}),
]
G.add_edges_from(transactions)

# Detect cycles (potential money laundering)
cycles = list(nx.simple_cycles(G))
print(f"Suspicious cycles found: {len(cycles)}")
for cycle in cycles:
    print(f"  Cycle: {' → '.join(cycle)}{cycle[0]}")

# Find accounts that bridge communities (potential mules)
betw = nx.betweenness_centrality(G)
suspicious = {k: v for k, v in betw.items() if v > 0.1}
print(f"High-betweenness accounts: {suspicious}")

Performance optimization

Algorithmic complexity

AlgorithmTime complexityPractical limit
BFS/DFSO(V + E)Millions of edges
Dijkstra shortest pathO((V + E) log V)Hundreds of thousands
Betweenness centralityO(V × E)~10k nodes
All-pairs shortest pathsO(V³) or O(V × E log V)~5k nodes
Community detection (Louvain)O(E) per passHundreds of thousands

Speeding up NetworkX

  1. Use approximate algorithms. nx.betweenness_centrality(G, k=100) samples 100 pivot nodes instead of using all nodes.
  2. Convert to sparse matrices. nx.to_scipy_sparse_array(G) gives you a scipy sparse matrix for linear algebra operations.
  3. Parallelize. Some algorithms accept a backend parameter in NetworkX 3.x+ for parallel execution.
  4. Switch libraries for scale. For graphs exceeding 100k nodes, consider igraph (C core), graph-tool (C++ core), or networkit.
import networkx as nx
import numpy as np
from scipy.sparse.linalg import eigsh

# Spectral analysis using sparse matrices
G = nx.karate_club_graph()
A = nx.to_scipy_sparse_array(G, dtype=float)

# Compute top eigenvalues
eigenvalues, eigenvectors = eigsh(A, k=5)
print(f"Top 5 eigenvalues: {eigenvalues}")

Export and interoperability

import networkx as nx
import json

G = nx.karate_club_graph()

# GraphML (standard graph interchange format)
nx.write_graphml(G, "karate.graphml")

# GEXF (for Gephi)
nx.write_gexf(G, "karate.gexf")

# JSON (for D3.js visualization)
data = nx.node_link_data(G)
with open("karate.json", "w") as f:
    json.dump(data, f, indent=2)

# Adjacency list (simple text format)
nx.write_adjlist(G, "karate.adjlist")

The one thing to remember: NetworkX makes graph analysis in Python effortless for prototyping and small-to-medium datasets — its dictionary-based design trades raw performance for extraordinary flexibility in modeling any type of relationship.

pythondata-sciencegraph-theory

See Also

  • Python Community Detection How Python finds hidden groups in networks — friend circles, customer segments, and research clusters — just by looking at who connects to whom.
  • Python Graph Embeddings How Python turns tangled webs of connections into neat lists of numbers that computers can actually work with.
  • Python Graph Neural Networks How Python teaches computers to learn from connections — not just data points — by combining neural networks with graph structures.
  • Python Link Prediction How Python guesses which connections are missing from a network — predicting future friendships, recommendations, and undiscovered relationships.
  • Python Arima Forecasting How ARIMA models use patterns in past numbers to predict the future, explained like a bedtime story.