NetworkX for Graph Analysis — Deep Dive
Internal data structure
NetworkX represents graphs using nested dictionaries. For an undirected Graph, the adjacency structure looks like:
# Internal representation of G with edges (A,B) and (A,C)
{
"A": {"B": {}, "C": {}},
"B": {"A": {}},
"C": {"A": {}},
}
Each outer key is a node, each inner key is a neighbor, and the innermost dict stores edge attributes. This design prioritizes flexibility — any hashable object can be a node, and attribute access is O(1) — at the cost of memory overhead compared to compressed sparse representations.
Building graphs
From scratch
import networkx as nx
G = nx.Graph()
# Add nodes with attributes
G.add_node("Alice", role="engineer")
G.add_node("Bob", role="designer")
G.add_node("Carol", role="manager")
# Add weighted edges
G.add_edge("Alice", "Bob", weight=0.9, project="frontend")
G.add_edge("Alice", "Carol", weight=0.7, project="backend")
G.add_edge("Bob", "Carol", weight=0.5, project="design-review")
print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")
print(f"Alice's neighbors: {list(G.neighbors('Alice'))}")
From data sources
import networkx as nx
import pandas as pd
# From an edge list DataFrame
edges = pd.DataFrame({
"source": ["A", "A", "B", "C", "D"],
"target": ["B", "C", "D", "D", "E"],
"weight": [1.0, 2.0, 1.5, 0.5, 3.0],
})
G = nx.from_pandas_edgelist(edges, "source", "target", edge_attr="weight")
# From an adjacency matrix (NumPy)
import numpy as np
adj_matrix = np.array([
[0, 1, 1, 0],
[1, 0, 0, 1],
[1, 0, 0, 1],
[0, 1, 1, 0],
])
G = nx.from_numpy_array(adj_matrix)
# From a file
# G = nx.read_graphml("network.graphml")
# G = nx.read_edgelist("edges.txt", data=[("weight", float)])
Directed graphs
import networkx as nx
DG = nx.DiGraph()
DG.add_edges_from([
("user_A", "user_B"), # A follows B
("user_A", "user_C"),
("user_B", "user_C"),
("user_C", "user_A"),
])
print(f"A's out-degree (following): {DG.out_degree('user_A')}")
print(f"C's in-degree (followers): {DG.in_degree('user_C')}")
Path algorithms
Shortest paths
import networkx as nx
G = nx.Graph()
G.add_weighted_edges_from([
("NYC", "Chicago", 790),
("NYC", "DC", 225),
("DC", "Atlanta", 640),
("Chicago", "Denver", 1000),
("Atlanta", "Denver", 1400),
("Denver", "LA", 1020),
("Atlanta", "LA", 2200),
])
# Unweighted shortest path (fewest hops)
path = nx.shortest_path(G, "NYC", "LA")
print(f"Fewest stops: {' → '.join(path)}")
# Weighted shortest path (minimum total distance)
path_w = nx.shortest_path(G, "NYC", "LA", weight="weight")
distance = nx.shortest_path_length(G, "NYC", "LA", weight="weight")
print(f"Shortest distance: {' → '.join(path_w)} ({distance} miles)")
# All pairs shortest paths (returns a generator)
all_pairs = dict(nx.all_pairs_dijkstra_path_length(G, weight="weight"))
print(f"NYC to Denver: {all_pairs['NYC']['Denver']} miles")
Path existence and connectivity
import networkx as nx
G = nx.Graph()
G.add_edges_from([(1, 2), (2, 3), (4, 5)]) # Two components
print(f"Connected: {nx.is_connected(G)}")
components = list(nx.connected_components(G))
print(f"Components: {components}")
# [{1, 2, 3}, {4, 5}]
# Check if path exists before computing it
print(f"Path 1→3 exists: {nx.has_path(G, 1, 3)}")
print(f"Path 1→5 exists: {nx.has_path(G, 1, 5)}")
Centrality algorithms
import networkx as nx
# Karate club — a classic social network dataset
G = nx.karate_club_graph()
# Degree centrality
deg_central = nx.degree_centrality(G)
top_degree = sorted(deg_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by degree centrality:")
for node, score in top_degree:
print(f" Node {node}: {score:.3f}")
# Betweenness centrality
betw_central = nx.betweenness_centrality(G)
top_betw = sorted(betw_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by betweenness centrality:")
for node, score in top_betw:
print(f" Node {node}: {score:.3f}")
# PageRank
pagerank = nx.pagerank(G, alpha=0.85)
top_pr = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by PageRank:")
for node, score in top_pr:
print(f" Node {node}: {score:.3f}")
# Eigenvector centrality
eigen_central = nx.eigenvector_centrality(G, max_iter=1000)
Interpreting centrality results
Different centrality measures highlight different roles:
| Measure | Identifies | Real-world analog |
|---|---|---|
| Degree | Most connected | Celebrity with many followers |
| Betweenness | Bridge between groups | Diplomat connecting rival factions |
| Closeness | Can reach everyone quickly | Central distribution warehouse |
| PageRank | Endorsed by important nodes | Paper cited by landmark papers |
| Eigenvector | Connected to high-scoring nodes | Person who knows all the influencers |
Community detection
import networkx as nx
from networkx.algorithms.community import louvain_communities, modularity
G = nx.karate_club_graph()
# Louvain method
communities = louvain_communities(G, seed=42)
print(f"Found {len(communities)} communities")
for i, comm in enumerate(communities):
print(f" Community {i}: {sorted(comm)}")
# Modularity score (higher = better partition)
mod = modularity(G, communities)
print(f"Modularity: {mod:.3f}")
# Girvan-Newman (edge betweenness-based, slower but interpretable)
from networkx.algorithms.community import girvan_newman
gn_communities = girvan_newman(G)
first_partition = next(gn_communities) # First split
print(f"Girvan-Newman first split: {len(first_partition)} groups")
Graph metrics and characterization
import networkx as nx
G = nx.karate_club_graph()
# Global metrics
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.3f}")
print(f"Average clustering: {nx.average_clustering(G):.3f}")
print(f"Transitivity: {nx.transitivity(G):.3f}")
print(f"Diameter: {nx.diameter(G)}")
print(f"Average shortest path: {nx.average_shortest_path_length(G):.2f}")
# Degree distribution
import collections
degree_seq = [d for n, d in G.degree()]
degree_count = collections.Counter(degree_seq)
print(f"Degree distribution: {dict(sorted(degree_count.items()))}")
# Check for small-world property
# Small-world: high clustering + short average path (relative to random graph)
Graph generators
NetworkX includes generators for common graph models:
import networkx as nx
# Erdős–Rényi random graph
er = nx.erdos_renyi_graph(n=100, p=0.05, seed=42)
# Barabási–Albert preferential attachment (scale-free)
ba = nx.barabasi_albert_graph(n=100, m=2, seed=42)
# Watts–Strogatz small-world
ws = nx.watts_strogatz_graph(n=100, k=4, p=0.3, seed=42)
# Complete graph, cycle, grid
complete = nx.complete_graph(10)
cycle = nx.cycle_graph(20)
grid = nx.grid_2d_graph(5, 5)
These are useful for benchmarking algorithms, testing hypotheses, and understanding how network structure affects dynamics.
Visualization
import networkx as nx
import matplotlib.pyplot as plt
G = nx.karate_club_graph()
# Basic drawing
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Spring layout
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, ax=axes[0], node_size=300, with_labels=True,
node_color="lightblue", edge_color="gray", font_size=8)
axes[0].set_title("Spring Layout")
# Kamada-Kawai layout
pos_kk = nx.kamada_kawai_layout(G)
# Color by community
communities = list(nx.community.louvain_communities(G, seed=42))
color_map = {}
for i, comm in enumerate(communities):
for node in comm:
color_map[node] = i
colors = [color_map[n] for n in G.nodes()]
nx.draw(G, pos_kk, ax=axes[1], node_size=300, with_labels=True,
node_color=colors, cmap=plt.cm.Set3, edge_color="gray", font_size=8)
axes[1].set_title("Communities (Louvain)")
plt.tight_layout()
plt.savefig("graph_analysis.png", dpi=150)
For interactive or large-scale visualization, export to Gephi (.gexf), Cytoscape, or use libraries like pyvis for web-based graphs.
Real-world application: fraud detection
import networkx as nx
# Build a transaction graph
G = nx.DiGraph()
transactions = [
("account_A", "account_B", {"amount": 10000, "date": "2026-01-15"}),
("account_B", "account_C", {"amount": 9800, "date": "2026-01-15"}),
("account_C", "account_D", {"amount": 9600, "date": "2026-01-16"}),
("account_D", "account_A", {"amount": 9400, "date": "2026-01-16"}), # Cycle!
("account_E", "account_F", {"amount": 500, "date": "2026-01-17"}),
]
G.add_edges_from(transactions)
# Detect cycles (potential money laundering)
cycles = list(nx.simple_cycles(G))
print(f"Suspicious cycles found: {len(cycles)}")
for cycle in cycles:
print(f" Cycle: {' → '.join(cycle)} → {cycle[0]}")
# Find accounts that bridge communities (potential mules)
betw = nx.betweenness_centrality(G)
suspicious = {k: v for k, v in betw.items() if v > 0.1}
print(f"High-betweenness accounts: {suspicious}")
Performance optimization
Algorithmic complexity
| Algorithm | Time complexity | Practical limit |
|---|---|---|
| BFS/DFS | O(V + E) | Millions of edges |
| Dijkstra shortest path | O((V + E) log V) | Hundreds of thousands |
| Betweenness centrality | O(V × E) | ~10k nodes |
| All-pairs shortest paths | O(V³) or O(V × E log V) | ~5k nodes |
| Community detection (Louvain) | O(E) per pass | Hundreds of thousands |
Speeding up NetworkX
- Use approximate algorithms.
nx.betweenness_centrality(G, k=100)samples 100 pivot nodes instead of using all nodes. - Convert to sparse matrices.
nx.to_scipy_sparse_array(G)gives you a scipy sparse matrix for linear algebra operations. - Parallelize. Some algorithms accept a
backendparameter in NetworkX 3.x+ for parallel execution. - Switch libraries for scale. For graphs exceeding 100k nodes, consider
igraph(C core),graph-tool(C++ core), ornetworkit.
import networkx as nx
import numpy as np
from scipy.sparse.linalg import eigsh
# Spectral analysis using sparse matrices
G = nx.karate_club_graph()
A = nx.to_scipy_sparse_array(G, dtype=float)
# Compute top eigenvalues
eigenvalues, eigenvectors = eigsh(A, k=5)
print(f"Top 5 eigenvalues: {eigenvalues}")
Export and interoperability
import networkx as nx
import json
G = nx.karate_club_graph()
# GraphML (standard graph interchange format)
nx.write_graphml(G, "karate.graphml")
# GEXF (for Gephi)
nx.write_gexf(G, "karate.gexf")
# JSON (for D3.js visualization)
data = nx.node_link_data(G)
with open("karate.json", "w") as f:
json.dump(data, f, indent=2)
# Adjacency list (simple text format)
nx.write_adjlist(G, "karate.adjlist")
The one thing to remember: NetworkX makes graph analysis in Python effortless for prototyping and small-to-medium datasets — its dictionary-based design trades raw performance for extraordinary flexibility in modeling any type of relationship.
See Also
- Python Community Detection How Python finds hidden groups in networks — friend circles, customer segments, and research clusters — just by looking at who connects to whom.
- Python Graph Embeddings How Python turns tangled webs of connections into neat lists of numbers that computers can actually work with.
- Python Graph Neural Networks How Python teaches computers to learn from connections — not just data points — by combining neural networks with graph structures.
- Python Link Prediction How Python guesses which connections are missing from a network — predicting future friendships, recommendations, and undiscovered relationships.
- Python Arima Forecasting How ARIMA models use patterns in past numbers to predict the future, explained like a bedtime story.