NetworkX for Graph Analysis — Deep Dive

Master NetworkX graph construction, algorithm implementation, centrality computation, community detection, and performance optimization patterns.

Internal data structure

NetworkX represents graphs using nested dictionaries. For an undirected Graph, the adjacency structure looks like:

# Internal representation of G with edges (A,B) and (A,C)
{
    "A": {"B": {}, "C": {}},
    "B": {"A": {}},
    "C": {"A": {}},
}

Each outer key is a node, each inner key is a neighbor, and the innermost dict stores edge attributes. This design prioritizes flexibility — any hashable object can be a node, and attribute access is O(1) — at the cost of memory overhead compared to compressed sparse representations.

Building graphs

From scratch

import networkx as nx

G = nx.Graph()

# Add nodes with attributes
G.add_node("Alice", role="engineer")
G.add_node("Bob", role="designer")
G.add_node("Carol", role="manager")

# Add weighted edges
G.add_edge("Alice", "Bob", weight=0.9, project="frontend")
G.add_edge("Alice", "Carol", weight=0.7, project="backend")
G.add_edge("Bob", "Carol", weight=0.5, project="design-review")

print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")
print(f"Alice's neighbors: {list(G.neighbors('Alice'))}")

From data sources

import networkx as nx
import pandas as pd

# From an edge list DataFrame
edges = pd.DataFrame({
    "source": ["A", "A", "B", "C", "D"],
    "target": ["B", "C", "D", "D", "E"],
    "weight": [1.0, 2.0, 1.5, 0.5, 3.0],
})
G = nx.from_pandas_edgelist(edges, "source", "target", edge_attr="weight")

# From an adjacency matrix (NumPy)
import numpy as np

adj_matrix = np.array([
    [0, 1, 1, 0],
    [1, 0, 0, 1],
    [1, 0, 0, 1],
    [0, 1, 1, 0],
])
G = nx.from_numpy_array(adj_matrix)

# From a file
# G = nx.read_graphml("network.graphml")
# G = nx.read_edgelist("edges.txt", data=[("weight", float)])

Directed graphs

import networkx as nx

DG = nx.DiGraph()
DG.add_edges_from([
    ("user_A", "user_B"),  # A follows B
    ("user_A", "user_C"),
    ("user_B", "user_C"),
    ("user_C", "user_A"),
])

print(f"A's out-degree (following): {DG.out_degree('user_A')}")
print(f"C's in-degree (followers): {DG.in_degree('user_C')}")

Path algorithms

Shortest paths

import networkx as nx

G = nx.Graph()
G.add_weighted_edges_from([
    ("NYC", "Chicago", 790),
    ("NYC", "DC", 225),
    ("DC", "Atlanta", 640),
    ("Chicago", "Denver", 1000),
    ("Atlanta", "Denver", 1400),
    ("Denver", "LA", 1020),
    ("Atlanta", "LA", 2200),
])

# Unweighted shortest path (fewest hops)
path = nx.shortest_path(G, "NYC", "LA")
print(f"Fewest stops: {' → '.join(path)}")

# Weighted shortest path (minimum total distance)
path_w = nx.shortest_path(G, "NYC", "LA", weight="weight")
distance = nx.shortest_path_length(G, "NYC", "LA", weight="weight")
print(f"Shortest distance: {' → '.join(path_w)} ({distance} miles)")

# All pairs shortest paths (returns a generator)
all_pairs = dict(nx.all_pairs_dijkstra_path_length(G, weight="weight"))
print(f"NYC to Denver: {all_pairs['NYC']['Denver']} miles")

Path existence and connectivity

import networkx as nx

G = nx.Graph()
G.add_edges_from([(1, 2), (2, 3), (4, 5)])  # Two components

print(f"Connected: {nx.is_connected(G)}")
components = list(nx.connected_components(G))
print(f"Components: {components}")
# [{1, 2, 3}, {4, 5}]

# Check if path exists before computing it
print(f"Path 1→3 exists: {nx.has_path(G, 1, 3)}")
print(f"Path 1→5 exists: {nx.has_path(G, 1, 5)}")

Centrality algorithms

import networkx as nx

# Karate club — a classic social network dataset
G = nx.karate_club_graph()

# Degree centrality
deg_central = nx.degree_centrality(G)
top_degree = sorted(deg_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by degree centrality:")
for node, score in top_degree:
    print(f"  Node {node}: {score:.3f}")

# Betweenness centrality
betw_central = nx.betweenness_centrality(G)
top_betw = sorted(betw_central.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by betweenness centrality:")
for node, score in top_betw:
    print(f"  Node {node}: {score:.3f}")

# PageRank
pagerank = nx.pagerank(G, alpha=0.85)
top_pr = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:3]
print("Top 3 by PageRank:")
for node, score in top_pr:
    print(f"  Node {node}: {score:.3f}")

# Eigenvector centrality
eigen_central = nx.eigenvector_centrality(G, max_iter=1000)

Interpreting centrality results

Different centrality measures highlight different roles:

Measure	Identifies	Real-world analog
Degree	Most connected	Celebrity with many followers
Betweenness	Bridge between groups	Diplomat connecting rival factions
Closeness	Can reach everyone quickly	Central distribution warehouse
PageRank	Endorsed by important nodes	Paper cited by landmark papers
Eigenvector	Connected to high-scoring nodes	Person who knows all the influencers

Community detection

import networkx as nx
from networkx.algorithms.community import louvain_communities, modularity

G = nx.karate_club_graph()

# Louvain method
communities = louvain_communities(G, seed=42)
print(f"Found {len(communities)} communities")
for i, comm in enumerate(communities):
    print(f"  Community {i}: {sorted(comm)}")

# Modularity score (higher = better partition)
mod = modularity(G, communities)
print(f"Modularity: {mod:.3f}")

# Girvan-Newman (edge betweenness-based, slower but interpretable)
from networkx.algorithms.community import girvan_newman

gn_communities = girvan_newman(G)
first_partition = next(gn_communities)  # First split
print(f"Girvan-Newman first split: {len(first_partition)} groups")

Graph metrics and characterization

import networkx as nx

G = nx.karate_club_graph()

# Global metrics
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.3f}")
print(f"Average clustering: {nx.average_clustering(G):.3f}")
print(f"Transitivity: {nx.transitivity(G):.3f}")
print(f"Diameter: {nx.diameter(G)}")
print(f"Average shortest path: {nx.average_shortest_path_length(G):.2f}")

# Degree distribution
import collections

degree_seq = [d for n, d in G.degree()]
degree_count = collections.Counter(degree_seq)
print(f"Degree distribution: {dict(sorted(degree_count.items()))}")

# Check for small-world property
# Small-world: high clustering + short average path (relative to random graph)

Graph generators

NetworkX includes generators for common graph models:

import networkx as nx

# Erdős–Rényi random graph
er = nx.erdos_renyi_graph(n=100, p=0.05, seed=42)

# Barabási–Albert preferential attachment (scale-free)
ba = nx.barabasi_albert_graph(n=100, m=2, seed=42)

# Watts–Strogatz small-world
ws = nx.watts_strogatz_graph(n=100, k=4, p=0.3, seed=42)

# Complete graph, cycle, grid
complete = nx.complete_graph(10)
cycle = nx.cycle_graph(20)
grid = nx.grid_2d_graph(5, 5)

These are useful for benchmarking algorithms, testing hypotheses, and understanding how network structure affects dynamics.

Visualization

import networkx as nx
import matplotlib.pyplot as plt

G = nx.karate_club_graph()

# Basic drawing
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Spring layout
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, ax=axes[0], node_size=300, with_labels=True,
        node_color="lightblue", edge_color="gray", font_size=8)
axes[0].set_title("Spring Layout")

# Kamada-Kawai layout
pos_kk = nx.kamada_kawai_layout(G)
# Color by community
communities = list(nx.community.louvain_communities(G, seed=42))
color_map = {}
for i, comm in enumerate(communities):
    for node in comm:
        color_map[node] = i
colors = [color_map[n] for n in G.nodes()]

nx.draw(G, pos_kk, ax=axes[1], node_size=300, with_labels=True,
        node_color=colors, cmap=plt.cm.Set3, edge_color="gray", font_size=8)
axes[1].set_title("Communities (Louvain)")

plt.tight_layout()
plt.savefig("graph_analysis.png", dpi=150)

For interactive or large-scale visualization, export to Gephi (.gexf), Cytoscape, or use libraries like pyvis for web-based graphs.

Real-world application: fraud detection

import networkx as nx

# Build a transaction graph
G = nx.DiGraph()

transactions = [
    ("account_A", "account_B", {"amount": 10000, "date": "2026-01-15"}),
    ("account_B", "account_C", {"amount": 9800, "date": "2026-01-15"}),
    ("account_C", "account_D", {"amount": 9600, "date": "2026-01-16"}),
    ("account_D", "account_A", {"amount": 9400, "date": "2026-01-16"}),  # Cycle!
    ("account_E", "account_F", {"amount": 500, "date": "2026-01-17"}),
]
G.add_edges_from(transactions)

# Detect cycles (potential money laundering)
cycles = list(nx.simple_cycles(G))
print(f"Suspicious cycles found: {len(cycles)}")
for cycle in cycles:
    print(f"  Cycle: {' → '.join(cycle)} → {cycle[0]}")

# Find accounts that bridge communities (potential mules)
betw = nx.betweenness_centrality(G)
suspicious = {k: v for k, v in betw.items() if v > 0.1}
print(f"High-betweenness accounts: {suspicious}")

Performance optimization

Algorithmic complexity

Algorithm	Time complexity	Practical limit
BFS/DFS	O(V + E)	Millions of edges
Dijkstra shortest path	O((V + E) log V)	Hundreds of thousands
Betweenness centrality	O(V × E)	~10k nodes
All-pairs shortest paths	O(V³) or O(V × E log V)	~5k nodes
Community detection (Louvain)	O(E) per pass	Hundreds of thousands

Speeding up NetworkX

Use approximate algorithms. nx.betweenness_centrality(G, k=100) samples 100 pivot nodes instead of using all nodes.
Convert to sparse matrices. nx.to_scipy_sparse_array(G) gives you a scipy sparse matrix for linear algebra operations.
Parallelize. Some algorithms accept a backend parameter in NetworkX 3.x+ for parallel execution.
Switch libraries for scale. For graphs exceeding 100k nodes, consider igraph (C core), graph-tool (C++ core), or networkit.

import networkx as nx
import numpy as np
from scipy.sparse.linalg import eigsh

# Spectral analysis using sparse matrices
G = nx.karate_club_graph()
A = nx.to_scipy_sparse_array(G, dtype=float)

# Compute top eigenvalues
eigenvalues, eigenvectors = eigsh(A, k=5)
print(f"Top 5 eigenvalues: {eigenvalues}")

Export and interoperability

import networkx as nx
import json

G = nx.karate_club_graph()

# GraphML (standard graph interchange format)
nx.write_graphml(G, "karate.graphml")

# GEXF (for Gephi)
nx.write_gexf(G, "karate.gexf")

# JSON (for D3.js visualization)
data = nx.node_link_data(G)
with open("karate.json", "w") as f:
    json.dump(data, f, indent=2)

# Adjacency list (simple text format)
nx.write_adjlist(G, "karate.adjlist")

The one thing to remember: NetworkX makes graph analysis in Python effortless for prototyping and small-to-medium datasets — its dictionary-based design trades raw performance for extraordinary flexibility in modeling any type of relationship.

pythondata-sciencegraph-theory