Signaling pathways can be studied computationally in two complementary ways: mechanistic models (ODEs, Boolean networks) that simulate the dynamics of molecular interactions, and data-driven methods that infer pathway activity from transcriptomic or proteomic measurements. Both approaches are used in practice, often together.
In this chapter we'll build a simple ODE model of the MAPK cascade, implement a Boolean network for the cell cycle G1/S transition, and compute pathway activity scores from expression data.
Setup
pip install numpy scipy matplotlib pandas requests
import numpy as np
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
import pandas as pd
ODE Modeling: The MAPK Cascade
The MAPK/ERK cascade (RAS → RAF → MEK → ERK) can be modeled as a series of activation/deactivation reactions. We'll use a simplified Michaelis-Menten kinetics model.
Model Design
For each kinase, we track the fraction that is in the active (phosphorylated) state. Each activation follows Michaelis-Menten kinetics; deactivation is linear (phosphatase activity).
dRAF*/dt = k1 * RAS * (1 - RAF*) / (Km1 + (1 - RAF*)) - k2 * RAF*
dMEK*/dt = k3 * RAF* * (1 - MEK*) / (Km3 + (1 - MEK*)) - k4 * MEK*
dERK*/dt = k5 * MEK* * (1 - ERK*) / (Km5 + (1 - ERK*)) - k6 * ERK*
Where:
X*= fraction of X in active state (0 to 1)k_odd= rate constants for activation (phosphorylation by upstream kinase)k_even= rate constants for deactivation (phosphatase)Km= Michaelis constant for phosphorylationRAS= input signal (fixed or time-varying)
def mapk_cascade(t, y, params, ras_signal):
raf_star, mek_star, erk_star = y
k1, k2, km1, k3, k4, km3, k5, k6, km5 = params
ras = ras_signal(t) # input signal as function of time
# RAF activation/deactivation
draf = k1 * ras * (1 - raf_star) / (km1 + (1 - raf_star)) - k2 * raf_star
# MEK activation/deactivation
dmek = k3 * raf_star * (1 - mek_star) / (km3 + (1 - mek_star)) - k4 * mek_star
# ERK activation/deactivation
derk = k5 * mek_star * (1 - erk_star) / (km5 + (1 - erk_star)) - k6 * erk_star
return [draf, dmek, derk]
# Parameters: [k1, k2, km1, k3, k4, km3, k5, k6, km5]
params = [
0.5, 0.1, 0.1, # RAF activation/deactivation/Km
0.5, 0.1, 0.1, # MEK activation/deactivation/Km
0.5, 0.1, 0.1, # ERK activation/deactivation/Km
]
# Initial conditions: all inactive
y0 = [0.0, 0.0, 0.0] # RAF*, MEK*, ERK*
# Time span
t_span = (0, 100)
t_eval = np.linspace(0, 100, 500)
Scenario 1: Step Input (Sustained Growth Factor)
# Constant RAS signal: growth factor applied at t=5
def ras_step(t):
return 1.0 if t >= 5 else 0.0
sol_step = solve_ivp(
mapk_cascade,
t_span,
y0,
t_eval=t_eval,
args=(params, ras_step),
method="RK45",
rtol=1e-6
)
plt.figure(figsize=(10, 5))
plt.plot(sol_step.t, sol_step.y[0], label="RAF* (active)", linewidth=2)
plt.plot(sol_step.t, sol_step.y[1], label="MEK* (active)", linewidth=2)
plt.plot(sol_step.t, sol_step.y[2], label="ERK* (active)", linewidth=2)
plt.axvline(x=5, color="gray", linestyle="--", alpha=0.5, label="Growth factor ON")
plt.xlabel("Time (min)")
plt.ylabel("Fraction active")
plt.title("MAPK Cascade — Step Input (Sustained GF)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig("mapk_step.png", dpi=150)
plt.show()
Scenario 2: Pulse Input (Transient Signal)
def ras_pulse(t):
return 1.0 if 5 <= t <= 15 else 0.0
sol_pulse = solve_ivp(
mapk_cascade, t_span, y0, t_eval=t_eval,
args=(params, ras_pulse), method="RK45", rtol=1e-6
)
plt.figure(figsize=(10, 5))
plt.plot(sol_pulse.t, sol_pulse.y[2], label="ERK* (pulse input)", linewidth=2, color="red")
plt.plot(sol_step.t, sol_step.y[2], label="ERK* (step input)", linewidth=2, color="blue", linestyle="--")
plt.xlabel("Time (min)")
plt.ylabel("ERK* fraction active")
plt.title("ERK Response: Pulse vs. Step Input")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig("erk_comparison.png", dpi=150)
plt.show()
Scenario 3: Simulating an Inhibitor
Simulate adding a MEK inhibitor (like trametinib) by reducing MEK activation rate:
def simulate_with_inhibitor(inhibitor_strength: float, params: list, label: str):
"""inhibitor_strength: 0=no effect, 1=complete inhibition"""
params_mod = params.copy()
params_mod[3] *= (1 - inhibitor_strength) # reduce k3 (MEK activation)
sol = solve_ivp(
mapk_cascade, t_span, y0, t_eval=t_eval,
args=(params_mod, ras_step), method="RK45", rtol=1e-6
)
return sol
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
for strength, label in [(0.0, "No inhibitor"), (0.5, "50% MEK inhibition"), (0.9, "90% MEK inhibition")]:
sol = simulate_with_inhibitor(strength, params, label)
axes[0].plot(sol.t, sol.y[1], label=f"MEK* ({label})")
axes[1].plot(sol.t, sol.y[2], label=f"ERK* ({label})")
for ax, title in zip(axes, ["MEK Activity", "ERK Activity"]):
ax.axvline(x=5, color="gray", linestyle="--", alpha=0.5)
ax.set_xlabel("Time (min)")
ax.set_ylabel("Fraction active")
ax.set_title(title)
ax.legend(fontsize=8)
ax.grid(alpha=0.3)
plt.suptitle("MEK Inhibitor Dose Response")
plt.tight_layout()
plt.savefig("mek_inhibitor.png", dpi=150)
plt.show()
Boolean Network: G1/S Cell Cycle Transition
For larger networks where kinetic parameters are unknown, Boolean models capture the logical structure:
class BooleanNetwork:
def __init__(self, update_rules: dict, initial_state: dict):
self.rules = update_rules
self.state = initial_state.copy()
self.history = [initial_state.copy()]
def step(self):
new_state = {}
for node, rule in self.rules.items():
new_state[node] = rule(self.state)
self.state = new_state
self.history.append(new_state.copy())
def run(self, n_steps: int):
for _ in range(n_steps):
self.step()
def get_attractor(self) -> tuple:
"""Check if the network has reached a steady state."""
if len(self.history) < 2:
return None
current = frozenset(self.state.items())
for i, past_state in enumerate(self.history[:-1]):
if frozenset(past_state.items()) == current:
return i # cycle detected, period = current_idx - i
return None
# G1/S transition Boolean model
# Nodes: GF (growth factor), CycD, Rb, E2F, CycE, S_phase
# Based on simplified Fauré et al. 2006
def make_g1s_network(gf_active: bool = True):
initial_state = {
"GF": gf_active, # growth factor signal
"CycD": False, # Cyclin D
"Rb": True, # RB protein (active = growth suppression)
"E2F": False, # E2F transcription factor (active = S-phase genes)
"CycE": False, # Cyclin E
"S_phase": False, # S-phase entry
}
update_rules = {
"GF": lambda s: s["GF"], # externally set
"CycD": lambda s: s["GF"], # CycD is driven by GF
"Rb": lambda s: not s["CycD"] and not s["CycE"], # inhibited by CycD and CycE
"E2F": lambda s: not s["Rb"], # activated when Rb is inactivated
"CycE": lambda s: s["E2F"], # activated by E2F
"S_phase": lambda s: s["E2F"] and s["CycE"], # requires both
}
return BooleanNetwork(update_rules, initial_state)
# Run with GF present
net_gf = make_g1s_network(gf_active=True)
net_gf.run(10)
# Run without GF
net_no_gf = make_g1s_network(gf_active=False)
net_no_gf.run(10)
# Visualize
def plot_boolean_trajectory(history: list, title: str, ax):
nodes = list(history[0].keys())
times = range(len(history))
for i, node in enumerate(nodes):
values = [h[node] for h in history]
ax.step(times, [v + i * 1.5 for v in values], where="post", linewidth=2)
ax.text(-0.5, i * 1.5 + 0.5, node, ha="right", fontsize=9)
ax.set_xlim(-1, len(history))
ax.set_xlabel("Time steps")
ax.set_yticks([])
ax.set_title(title)
ax.grid(axis="x", alpha=0.3)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
plot_boolean_trajectory(net_gf.history, "With Growth Factor", axes[0])
plot_boolean_trajectory(net_no_gf.history, "Without Growth Factor", axes[1])
plt.tight_layout()
plt.savefig("boolean_cell_cycle.png", dpi=150)
plt.show()
Pathway Activity Scoring from Expression Data
Given a gene expression matrix, compute pathway activity scores by summing the expression of pathway member genes.
Method 1: Simple Z-score Aggregation
import numpy as np
def pathway_score_zscore(
expression_matrix: np.ndarray, # genes × samples
gene_names: list,
pathway_genes: list
) -> np.ndarray:
"""Score pathway activity by mean z-score of member genes."""
# Find pathway gene indices
gene_index = {g: i for i, g in enumerate(gene_names)}
indices = [gene_index[g] for g in pathway_genes if g in gene_index]
if not indices:
return np.zeros(expression_matrix.shape[1])
# Z-score normalize per gene
mean = expression_matrix.mean(axis=1, keepdims=True)
std = expression_matrix.std(axis=1, keepdims=True) + 1e-8
z_scores = (expression_matrix - mean) / std
# Mean z-score of pathway genes per sample
return z_scores[indices, :].mean(axis=0)
# Example with simulated data
np.random.seed(42)
n_genes, n_samples = 1000, 50
gene_names = [f"GENE_{i}" for i in range(n_genes)]
# Simulate: MAPK pathway genes upregulated in first 25 samples
expression = np.random.randn(n_genes, n_samples)
mapk_pathway_indices = [10, 25, 50, 100, 200] # "MAPK pathway genes"
mapk_genes = [gene_names[i] for i in mapk_pathway_indices]
# Add signal to first 25 samples
expression[mapk_pathway_indices, :25] += 2.0
scores = pathway_score_zscore(expression, gene_names, mapk_genes)
print(f"Samples 1-25 (MAPK active): mean score = {scores[:25].mean():.2f}")
print(f"Samples 26-50 (MAPK inactive): mean score = {scores[25:].mean():.2f}")
Method 2: GSEA-style Enrichment Score
def gsea_enrichment_score(
gene_scores: dict, # gene → score (e.g., fold change)
gene_set: set, # genes in the pathway
) -> float:
"""
Compute a simplified GSEA enrichment score.
Ranks genes by score, then computes running sum.
"""
# Sort genes by score (descending)
sorted_genes = sorted(gene_scores.keys(), key=lambda g: -gene_scores[g])
n_total = len(sorted_genes)
n_set = sum(1 for g in sorted_genes if g in gene_set)
if n_set == 0:
return 0.0
# Compute running enrichment score
p_hit = 1.0 / n_set
p_miss = 1.0 / (n_total - n_set)
running_sum = 0.0
max_deviation = 0.0
for gene in sorted_genes:
if gene in gene_set:
running_sum += p_hit
else:
running_sum -= p_miss
if abs(running_sum) > abs(max_deviation):
max_deviation = running_sum
return max_deviation
# Example: test MAPK gene set enrichment
gene_fold_changes = {
f"GENE_{i}": np.random.randn() for i in range(1000)
}
# Artificially upregulate MAPK genes
for i in mapk_pathway_indices:
gene_fold_changes[f"GENE_{i}"] = 3.0 + np.random.randn() * 0.5
es = gsea_enrichment_score(gene_fold_changes, set(mapk_genes))
print(f"MAPK pathway enrichment score: {es:.3f}")
Using KEGG Pathway Data
import requests
def get_kegg_pathway_genes(pathway_id: str = "hsa04010") -> dict:
"""Fetch gene list for a KEGG pathway. hsa04010 = MAPK pathway (human)."""
url = f"https://rest.kegg.jp/get/{pathway_id}"
response = requests.get(url)
response.raise_for_status()
genes = {}
in_gene_section = False
for line in response.text.split("\n"):
if line.startswith("GENE"):
in_gene_section = True
line = line[12:] # skip "GENE "
elif line.startswith("COMPOUND") or line.startswith("REFERENCE"):
in_gene_section = False
if in_gene_section and line.strip():
parts = line.strip().split(";")
if len(parts) >= 1:
gene_part = parts[0].strip().split()
if len(gene_part) >= 2:
gene_id = gene_part[0]
gene_symbol = gene_part[1]
genes[gene_id] = gene_symbol
return genes
# Fetch MAPK pathway
try:
mapk_genes_kegg = get_kegg_pathway_genes("hsa04010")
print(f"MAPK pathway has {len(mapk_genes_kegg)} genes")
print("Sample genes:", list(mapk_genes_kegg.values())[:10])
except Exception as e:
print(f"KEGG fetch failed: {e}")
# Fallback: hardcoded core MAPK genes
mapk_genes_kegg = {
"5894": "RAF1", "673": "BRAF", "5604": "MAP2K1",
"5605": "MAP2K2", "5594": "MAPK1", "5595": "MAPK3",
"3265": "HRAS", "4893": "NRAS", "3845": "KRAS"
}
Complete Pipeline: From Expression to Pathway Activity
def compute_pathway_activities(
expression_df: pd.DataFrame, # rows=genes, cols=samples
pathways: dict, # pathway_name → [gene_list]
) -> pd.DataFrame:
"""
Compute pathway activity scores for all samples across multiple pathways.
Returns DataFrame with rows=samples, cols=pathways.
"""
activities = {}
gene_names = list(expression_df.index)
for pathway_name, pathway_genes in pathways.items():
scores = pathway_score_zscore(
expression_df.values,
gene_names,
pathway_genes
)
activities[pathway_name] = scores
return pd.DataFrame(activities, index=expression_df.columns)
# Simulated example
np.random.seed(42)
n_genes, n_samples = 2000, 60
sample_names = [f"tumor_{i}" for i in range(30)] + [f"normal_{i}" for i in range(30)]
gene_symbols = [f"GENE_{i}" for i in range(n_genes)]
expr_data = np.abs(np.random.randn(n_genes, n_samples))
# Simulate pathway activation in tumor samples
pathway_definitions = {
"MAPK_pathway": [f"GENE_{i}" for i in range(50, 60)],
"PI3K_pathway": [f"GENE_{i}" for i in range(100, 110)],
"Apoptosis": [f"GENE_{i}" for i in range(200, 210)],
}
# Activate MAPK in tumors, suppress apoptosis
for gene in pathway_definitions["MAPK_pathway"]:
idx = gene_symbols.index(gene)
expr_data[idx, :30] *= 3.0
for gene in pathway_definitions["Apoptosis"]:
idx = gene_symbols.index(gene)
expr_data[idx, :30] *= 0.3
expr_df = pd.DataFrame(expr_data, index=gene_symbols, columns=sample_names)
activity_df = compute_pathway_activities(expr_df, pathway_definitions)
print("Pathway activity scores (first 5 samples of each group):")
print("\nTumor samples:")
print(activity_df.loc[[f"tumor_{i}" for i in range(5)]].round(2))
print("\nNormal samples:")
print(activity_df.loc[[f"normal_{i}" for i in range(5)]].round(2))
# Visualize with heatmap
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 6))
im = ax.imshow(activity_df.T.values, aspect="auto", cmap="RdBu_r")
ax.set_xticks(range(0, n_samples, 5))
ax.set_xticklabels([sample_names[i] for i in range(0, n_samples, 5)], rotation=45, ha="right", fontsize=7)
ax.set_yticks(range(len(pathway_definitions)))
ax.set_yticklabels(list(pathway_definitions.keys()))
ax.set_title("Pathway Activity Scores: Tumor vs. Normal")
plt.colorbar(im, ax=ax, label="Activity z-score")
plt.tight_layout()
plt.savefig("pathway_activity_heatmap.png", dpi=150)
plt.show()
Summary
| Approach | When to use | Tools/Libraries |
|---|---|---|
| ODE models | When kinetic parameters are available; studying dynamics | scipy.integrate |
| Boolean networks | Large networks without quantitative params; logic structure | Custom (networkx for visualization) |
| Pathway scoring | Analyzing existing expression datasets | numpy, pandas |
| GSEA-style enrichment | Testing whether a gene set is enriched in differential data | fgsea (R), GSEA Java app |
| KEGG/Reactome API | Fetching curated pathway gene lists | requests + KEGG REST API |
Mechanistic modeling is most useful for generating testable hypotheses about pathway dynamics. Data-driven pathway scoring is most useful for interpreting existing datasets. In practice, you use both: models guide your biological interpretation of the data, and the data validates or challenges the models.