Getting started with plotsim¶
This notebook goes from pip install plotsim to a complete retail dataset in nine cells: describe the data, generate it, plot a customer trajectory, query the star schema with pandas, and export to CSV.
1. Install¶
pip install plotsim
The core install pulls numpy, pandas, scipy, pyyaml, pydantic, and faker. plotsim makes no network calls at generation time, so the same config plus the same seed produces byte-identical output anywhere.
In [ ]:
Copied!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotsim import create, generate_tables, write_tables
# 50 retail customers across 24 monthly periods, three metrics, two
# behavioral segments. Distributions and the star schema (dim_date,
# dim_customer, fct_customer) are filled in automatically.
config = create(
about="Retail customers \u2014 loyalty and sessions",
unit="customer",
window=("2023-01", "2024-12", "monthly"),
metrics=[
{"name": "sessions", "type": "count", "polarity": "positive"},
{"name": "cart_value", "type": "amount", "polarity": "positive",
"range": [10, 500]},
{"name": "loyalty_score", "type": "score", "polarity": "positive"},
],
segments=[
{"name": "loyal_climbers", "count": 25, "archetype": "growth",
"attributes": {"segment": "loyal_climbers"}},
{"name": "holiday_shoppers", "count": 25, "archetype": "seasonal",
"attributes": {"segment": "holiday_shoppers"}},
],
)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotsim import create, generate_tables, write_tables
# 50 retail customers across 24 monthly periods, three metrics, two
# behavioral segments. Distributions and the star schema (dim_date,
# dim_customer, fct_customer) are filled in automatically.
config = create(
about="Retail customers \u2014 loyalty and sessions",
unit="customer",
window=("2023-01", "2024-12", "monthly"),
metrics=[
{"name": "sessions", "type": "count", "polarity": "positive"},
{"name": "cart_value", "type": "amount", "polarity": "positive",
"range": [10, 500]},
{"name": "loyalty_score", "type": "score", "polarity": "positive"},
],
segments=[
{"name": "loyal_climbers", "count": 25, "archetype": "growth",
"attributes": {"segment": "loyal_climbers"}},
{"name": "holiday_shoppers", "count": 25, "archetype": "seasonal",
"attributes": {"segment": "holiday_shoppers"}},
],
)
In [ ]:
Copied!
tables = generate_tables(config, np.random.default_rng(config.seed))
for name, df in tables.items():
print(f" {name:<14} {len(df):>4} rows \u00d7 {df.shape[1]} cols")
tables = generate_tables(config, np.random.default_rng(config.seed))
for name, df in tables.items():
print(f" {name:<14} {len(df):>4} rows \u00d7 {df.shape[1]} cols")
In [ ]:
Copied!
tables["fct_customer"].head()
tables["fct_customer"].head()
In [ ]:
Copied!
# Bring the period label and segment onto each fact row, then plot
# one customer from each segment.
fct = (
tables["fct_customer"]
.merge(tables["dim_date"][["date_key", "period_label"]], on="date_key")
.merge(tables["dim_customer"][["customer_id", "segment"]], on="customer_id")
)
samples = fct.groupby("segment")["customer_id"].first()
fig, ax = plt.subplots(figsize=(10, 4))
for seg, cid in samples.items():
rows = fct[fct["customer_id"] == cid].sort_values("date_key")
ax.plot(rows["period_label"], rows["loyalty_score"],
marker="o", label=f"{seg} \u2014 {cid}")
ax.set_title("Loyalty trajectory \u2014 one customer from each segment")
ax.set_xlabel("Period")
ax.set_ylabel("Loyalty score (0\u20131)")
ax.tick_params(axis="x", rotation=45)
ax.legend()
plt.tight_layout()
plt.show()
# Bring the period label and segment onto each fact row, then plot
# one customer from each segment.
fct = (
tables["fct_customer"]
.merge(tables["dim_date"][["date_key", "period_label"]], on="date_key")
.merge(tables["dim_customer"][["customer_id", "segment"]], on="customer_id")
)
samples = fct.groupby("segment")["customer_id"].first()
fig, ax = plt.subplots(figsize=(10, 4))
for seg, cid in samples.items():
rows = fct[fct["customer_id"] == cid].sort_values("date_key")
ax.plot(rows["period_label"], rows["loyalty_score"],
marker="o", label=f"{seg} \u2014 {cid}")
ax.set_title("Loyalty trajectory \u2014 one customer from each segment")
ax.set_xlabel("Period")
ax.set_ylabel("Loyalty score (0\u20131)")
ax.tick_params(axis="x", rotation=45)
ax.legend()
plt.tight_layout()
plt.show()
In [ ]:
Copied!
# Classic star-schema join: fact + date dim + customer dim, then aggregate.
joined = (
tables["fct_customer"]
.merge(tables["dim_date"][["date_key", "year", "quarter"]], on="date_key")
.merge(tables["dim_customer"][["customer_id", "segment"]], on="customer_id")
)
quarterly_sessions = (
joined.groupby(["year", "quarter", "segment"])["sessions"]
.sum()
.unstack("segment")
)
quarterly_sessions
# Classic star-schema join: fact + date dim + customer dim, then aggregate.
joined = (
tables["fct_customer"]
.merge(tables["dim_date"][["date_key", "year", "quarter"]], on="date_key")
.merge(tables["dim_customer"][["customer_id", "segment"]], on="customer_id")
)
quarterly_sessions = (
joined.groupby(["year", "quarter", "segment"])["sessions"]
.sum()
.unstack("segment")
)
quarterly_sessions
In [ ]:
Copied!
from pathlib import Path
output_dir = Path("./output_getting_started")
write_tables(tables, config, output_dir=output_dir)
print(f"Wrote to {output_dir}/")
for f in sorted(output_dir.glob("*")):
print(f" {f.name}")
from pathlib import Path
output_dir = Path("./output_getting_started")
write_tables(tables, config, output_dir=output_dir)
print(f"Wrote to {output_dir}/")
for f in sorted(output_dir.glob("*")):
print(f" {f.name}")
Where to next¶
- Bundled templates \u2014
from plotsim import create_from_yaml; create_from_yaml("plotsim/configs/templates/saas_template.yaml")for a richer starting point. Five domain flavors ship inplotsim/configs/templates/: saas, retail, education, marketing, hr. - Config fields \u2014
docs/site/config-reference.mdcatalogs every config field; companion docs for column types and the archetype DSL. - CLI \u2014 the same generate-then-validate flow runs from the command line:
plotsim run config.yaml -o ./output --validate.