Seasonality and correlations¶

Two related questions show up the moment you look past a single metric in isolation: Does this metric move with the calendar? and How are these two metrics related?

plotsim has a direct knob for each: a global seasonality modulator that lifts (or dampens) values during named months, and a connections block that wires correlation pairs into the engine. Per-metric and per-segment dials let you mix immunity into the seasonality field and tune correlation strength with plain words.

A baseline without seasonality¶

A retail-style config: 24 monthly periods, two segments, one revenue metric. We'll plot the average revenue per period as the reference line.

In [ ]:

Copied!





import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotsim import create, generate_tables

def build(seasonality=None, sens=1.0):
    return create(
        about="Seasonality demo",
        unit="store",
        window=("2023-01", "2024-12", "monthly"),
        metrics=[
            {"name": "revenue", "type": "amount", "polarity": "positive",
             "range": [1000, 50000], "seasonal_sensitivity": sens},
        ],
        segments=[
            {"name": "flagship", "count": 8, "archetype": "growth",
             "attributes": {"banner": "flagship"}},
            {"name": "small_format", "count": 8, "archetype": "growth",
             "attributes": {"banner": "small_format"}},
        ],
        seasonality=seasonality or [],
    )

def avg_per_period(tables):
    fct = (tables["fct_store"]
           .merge(tables["dim_date"][["date_key", "period_label", "period_index"]],
                  on="date_key"))
    return (fct.groupby(["period_index", "period_label"])["revenue"]
              .mean().reset_index().sort_values("period_index"))

baseline = build()
tab_b = generate_tables(baseline, np.random.default_rng(baseline.seed))
avg_b = avg_per_period(tab_b)
print(f"Baseline mean revenue: {avg_b['revenue'].mean():,.0f}")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotsim import create, generate_tables

def build(seasonality=None, sens=1.0):
    return create(
        about="Seasonality demo",
        unit="store",
        window=("2023-01", "2024-12", "monthly"),
        metrics=[
            {"name": "revenue", "type": "amount", "polarity": "positive",
             "range": [1000, 50000], "seasonal_sensitivity": sens},
        ],
        segments=[
            {"name": "flagship", "count": 8, "archetype": "growth",
             "attributes": {"banner": "flagship"}},
            {"name": "small_format", "count": 8, "archetype": "growth",
             "attributes": {"banner": "small_format"}},
        ],
        seasonality=seasonality or [],
    )

def avg_per_period(tables):
    fct = (tables["fct_store"]
           .merge(tables["dim_date"][["date_key", "period_label", "period_index"]],
                  on="date_key"))
    return (fct.groupby(["period_index", "period_label"])["revenue"]
              .mean().reset_index().sort_values("period_index"))

baseline = build()
tab_b = generate_tables(baseline, np.random.default_rng(baseline.seed))
avg_b = avg_per_period(tab_b)
print(f"Baseline mean revenue: {avg_b['revenue'].mean():,.0f}")

Layering on global seasonality¶

seasonality is a list of named effects. Each effect names a set of calendar months and a strength — positive lifts, negative dampens. Effects can overlap; strengths sum at each period.

Below we add a +0.4 boost in November and December. Holding seed and segments fixed, the November/December averages should move up while everything else holds.

In [ ]:

Copied!





seasonal = build(seasonality=[
    {"months": [11, 12], "strength": 0.4},
])
tab_s = generate_tables(seasonal, np.random.default_rng(seasonal.seed))
avg_s = avg_per_period(tab_s)

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(avg_b["period_label"], avg_b["revenue"], marker="o",
        label="no seasonality")
ax.plot(avg_s["period_label"], avg_s["revenue"], marker="s",
        label="+0.4 in Nov/Dec")
ax.set_title("Mean revenue per period — global seasonality lifts Q4")
ax.tick_params(axis="x", rotation=45)
ax.legend(); plt.tight_layout(); plt.show()
seasonal = build(seasonality=[
    {"months": [11, 12], "strength": 0.4},
])
tab_s = generate_tables(seasonal, np.random.default_rng(seasonal.seed))
avg_s = avg_per_period(tab_s)

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(avg_b["period_label"], avg_b["revenue"], marker="o",
        label="no seasonality")
ax.plot(avg_s["period_label"], avg_s["revenue"], marker="s",
        label="+0.4 in Nov/Dec")
ax.set_title("Mean revenue per period — global seasonality lifts Q4")
ax.tick_params(axis="x", rotation=45)
ax.legend(); plt.tight_layout(); plt.show()

Per-metric and per-segment sensitivity¶

Each metric has a seasonal_sensitivity (default 1.0). Set it to 0.0 to make the metric immune to global seasonality, or to a negative number to invert the modulation. Segments have the same dial — useful when one cohort is seasonal but another isn't.

In [ ]:

Copied!





immune = create(
    about="Sensitivity dial",
    unit="store",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "revenue",  "type": "amount", "polarity": "positive",
         "range": [1000, 50000], "seasonal_sensitivity": 1.0},
        {"name": "headcount", "type": "amount", "polarity": "positive",
         "range": [5, 50], "seasonal_sensitivity": 0.0},  # immune to seasonality
    ],
    segments=[
        {"name": "flagship", "count": 8, "archetype": "growth"},
        {"name": "small_format", "count": 8, "archetype": "growth"},
    ],
    seasonality=[{"months": [11, 12], "strength": 0.5}],
)
tab_i = generate_tables(immune, np.random.default_rng(immune.seed))
fct = (tab_i["fct_store"]
       .merge(tab_i["dim_date"][["date_key", "period_label", "period_index"]],
              on="date_key"))
agg = (fct.groupby(["period_index", "period_label"])
         [["revenue", "headcount"]].mean().reset_index().sort_values("period_index"))

fig, axes = plt.subplots(1, 2, figsize=(11, 3.5))
for ax, col, title in zip(axes, ["revenue", "headcount"],
                           ["sensitivity=1.0 — moves with seasons",
                            "sensitivity=0.0 — immune"]):
    ax.plot(agg["period_label"], agg[col], marker="o")
    ax.set_title(title); ax.tick_params(axis="x", rotation=45)
plt.tight_layout(); plt.show()
immune = create(
    about="Sensitivity dial",
    unit="store",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "revenue",  "type": "amount", "polarity": "positive",
         "range": [1000, 50000], "seasonal_sensitivity": 1.0},
        {"name": "headcount", "type": "amount", "polarity": "positive",
         "range": [5, 50], "seasonal_sensitivity": 0.0},  # immune to seasonality
    ],
    segments=[
        {"name": "flagship", "count": 8, "archetype": "growth"},
        {"name": "small_format", "count": 8, "archetype": "growth"},
    ],
    seasonality=[{"months": [11, 12], "strength": 0.5}],
)
tab_i = generate_tables(immune, np.random.default_rng(immune.seed))
fct = (tab_i["fct_store"]
       .merge(tab_i["dim_date"][["date_key", "period_label", "period_index"]],
              on="date_key"))
agg = (fct.groupby(["period_index", "period_label"])
         [["revenue", "headcount"]].mean().reset_index().sort_values("period_index"))

fig, axes = plt.subplots(1, 2, figsize=(11, 3.5))
for ax, col, title in zip(axes, ["revenue", "headcount"],
                           ["sensitivity=1.0 — moves with seasons",
                            "sensitivity=0.0 — immune"]):
    ax.plot(agg["period_label"], agg[col], marker="o")
    ax.set_title(title); ax.tick_params(axis="x", rotation=45)
plt.tight_layout(); plt.show()

Connections — correlated metric pairs¶

A connections entry is a three-token sentence: metric_a relationship metric_b. The relationship word maps to a target Pearson coefficient:

Word	Coefficient
`mirrors`	+0.75
`driven_by`	+0.55
`related`	+0.40
`hints_at`	+0.20
`independent`	0.00
`hints_against`	−0.20
`resists`	−0.40
`opposes`	−0.55
`inverts`	−0.75

The engine uses a Gaussian copula to instantiate these — within-archetype Pearson should land close to the configured target, with some sampling slack at small entity counts.

In [ ]:

Copied!





correlated = create(
    about="Connections demo",
    unit="account",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "engagement",   "type": "score", "polarity": "positive"},
        {"name": "feature_use",  "type": "score", "polarity": "positive"},
        {"name": "churn_risk",   "type": "score", "polarity": "negative"},
        {"name": "support_load", "type": "score", "polarity": "negative"},
    ],
    connections=[
        "engagement mirrors feature_use",      # +0.75
        "engagement opposes churn_risk",       # -0.55
        "support_load related churn_risk",     # +0.40
    ],
    segments=[
        {"name": "core", "count": 100, "archetype": "growth"},
    ],
)
tab_c = generate_tables(correlated, np.random.default_rng(correlated.seed))
metrics = ["engagement", "feature_use", "churn_risk", "support_load"]
realized = tab_c["fct_account"][metrics].corr().round(2)
print("Realized Pearson correlations:")
realized
correlated = create(
    about="Connections demo",
    unit="account",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "engagement",   "type": "score", "polarity": "positive"},
        {"name": "feature_use",  "type": "score", "polarity": "positive"},
        {"name": "churn_risk",   "type": "score", "polarity": "negative"},
        {"name": "support_load", "type": "score", "polarity": "negative"},
    ],
    connections=[
        "engagement mirrors feature_use",      # +0.75
        "engagement opposes churn_risk",       # -0.55
        "support_load related churn_risk",     # +0.40
    ],
    segments=[
        {"name": "core", "count": 100, "archetype": "growth"},
    ],
)
tab_c = generate_tables(correlated, np.random.default_rng(correlated.seed))
metrics = ["engagement", "feature_use", "churn_risk", "support_load"]
realized = tab_c["fct_account"][metrics].corr().round(2)
print("Realized Pearson correlations:")
realized

Causal lag — `follows` + `delay`¶

When one metric responds to another with a built-in delay, declare it on the metric itself. follows: <other_metric> plus delay: <periods> says "this metric tracks the other, but lagged by N periods."

In [ ]:

Copied!





lagged = create(
    about="Causal lag",
    unit="account",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "engagement", "type": "score", "polarity": "positive"},
        {"name": "support_tickets", "type": "score", "polarity": "negative",
         "follows": "engagement", "delay": 3},
    ],
    segments=[
        {"name": "core", "count": 30, "archetype": "spike_then_crash"},
    ],
)
tab_l = generate_tables(lagged, np.random.default_rng(lagged.seed))
fct = (tab_l["fct_account"]
       .merge(tab_l["dim_date"][["date_key", "period_label", "period_index"]],
              on="date_key"))
avg = (fct.groupby("period_index")[["engagement", "support_tickets"]]
         .mean().reset_index())

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(avg["period_index"], avg["engagement"], marker="o", label="engagement")
ax.plot(avg["period_index"], avg["support_tickets"], marker="s",
        label="support_tickets (follows engagement, delay=3)")
ax.set_title("Causal lag — support_tickets tracks engagement 3 periods behind")
ax.set_xlabel("Period index")
ax.legend(); plt.tight_layout(); plt.show()
lagged = create(
    about="Causal lag",
    unit="account",
    window=("2023-01", "2024-12", "monthly"),
    metrics=[
        {"name": "engagement", "type": "score", "polarity": "positive"},
        {"name": "support_tickets", "type": "score", "polarity": "negative",
         "follows": "engagement", "delay": 3},
    ],
    segments=[
        {"name": "core", "count": 30, "archetype": "spike_then_crash"},
    ],
)
tab_l = generate_tables(lagged, np.random.default_rng(lagged.seed))
fct = (tab_l["fct_account"]
       .merge(tab_l["dim_date"][["date_key", "period_label", "period_index"]],
              on="date_key"))
avg = (fct.groupby("period_index")[["engagement", "support_tickets"]]
         .mean().reset_index())

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(avg["period_index"], avg["engagement"], marker="o", label="engagement")
ax.plot(avg["period_index"], avg["support_tickets"], marker="s",
        label="support_tickets (follows engagement, delay=3)")
ax.set_title("Causal lag — support_tickets tracks engagement 3 periods behind")
ax.set_xlabel("Period index")
ax.legend(); plt.tight_layout(); plt.show()

Where to next¶

Pipeline testing — pipeline_testing.ipynb shows how to use these correlation knobs as known-truth labels for downstream feature-engineering tests.
ML readiness — ml_readiness.ipynb builds entity-feature matrices on top of correlated metrics.
Metrics & connections — docs/site/user-guide/metrics-and-connections.md covers the connection vocabulary; seasonality.md covers global modulation.

Seasonality and correlations¶

A baseline without seasonality¶

Layering on global seasonality¶

Per-metric and per-segment sensitivity¶

Connections — correlated metric pairs¶

Causal lag — follows + delay¶

Where to next¶

Causal lag — `follows` + `delay`¶