Coverage for oc_botwatch / visualize.py: 98%
107 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 16:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 16:49 +0000
1# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import logging
6from pathlib import Path
8import matplotlib.dates as mdates
9import matplotlib.pyplot as plt
10import matplotlib.ticker as mticker
11import polars as pl
12from matplotlib.axes import Axes
14logger = logging.getLogger(__name__)
16BASE_DIR = Path(__file__).resolve().parent.parent
17OUTPUT_DIR = BASE_DIR / "output"
19_SERVICES: tuple[str, ...] = ("web", "api", "sparql")
20_CATEGORY_LABELS: tuple[str, ...] = ("Human", "Generic bot", "LLM bot")
21_CATEGORY_COLUMNS: tuple[str, ...] = ("human", "generic_bot", "llm_bot")
24def _prepare(df: pl.DataFrame) -> pl.DataFrame:
25 return df.with_columns(pl.col("date").str.to_date("%Y-%m-%d", strict=False)).drop_nulls("date").sort("date")
28def _fmt_axis(x: float, _pos: int) -> str:
29 return f"{x / 1e6:.0f}M" if x >= 1e6 else f"{x / 1e3:.0f}K" # noqa: PLR2004
32def _setup_xaxis(ax: Axes) -> None:
33 ax.xaxis.set_major_locator(mdates.MonthLocator())
34 ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
37def _pivot_by_service(long_df: pl.DataFrame, service: str) -> pl.DataFrame:
38 pivoted = (
39 long_df.filter(pl.col("service") == service).pivot(on="category", index="date", values="count").fill_null(0)
40 )
41 for cat in _CATEGORY_COLUMNS:
42 if cat not in pivoted.columns: 42 ↛ 43line 42 didn't jump to line 43 because the condition on line 42 was never true
43 pivoted = pivoted.with_columns(pl.lit(0).alias(cat))
44 return _prepare(pivoted.select("date", *_CATEGORY_COLUMNS))
47def plot_daily_traffic(df: pl.DataFrame, output: Path) -> None:
48 df = _prepare(df)
49 dates = df["date"].to_list()
50 human = df["human"].to_list()
51 generic_bot = df["generic_bot"].to_list()
52 llm_bot = df["llm_bot"].to_list()
54 fig, ax = plt.subplots(figsize=(12, 6))
55 ax.stackplot(dates, human, generic_bot, llm_bot, labels=list(_CATEGORY_LABELS), alpha=0.8)
56 ax.legend(loc="upper left")
57 ax.set_ylabel("Requests")
58 ax.yaxis.set_major_formatter(mticker.FuncFormatter(_fmt_axis))
59 _setup_xaxis(ax)
60 fig.autofmt_xdate()
61 fig.tight_layout()
62 fig.savefig(output, dpi=150)
63 plt.close(fig)
64 logger.info("Output: %s", output)
67def plot_daily_traffic_pct(df: pl.DataFrame, output: Path) -> None:
68 df = _prepare(df)
69 total = df["human"] + df["generic_bot"] + df["llm_bot"]
70 human_pct = (df["human"] / total * 100).to_list()
71 generic_pct = (df["generic_bot"] / total * 100).to_list()
72 llm_pct = (df["llm_bot"] / total * 100).to_list()
73 dates = df["date"].to_list()
75 fig, ax = plt.subplots(figsize=(12, 6))
76 ax.stackplot(dates, human_pct, generic_pct, llm_pct, labels=list(_CATEGORY_LABELS), alpha=0.8)
77 ax.legend(loc="upper left")
78 ax.set_ylabel("Share of daily requests (%)")
79 ax.set_ylim(0, 100)
80 ax.yaxis.set_major_formatter(mticker.PercentFormatter())
81 _setup_xaxis(ax)
82 fig.autofmt_xdate()
83 fig.tight_layout()
84 fig.savefig(output, dpi=150)
85 plt.close(fig)
86 logger.info("Output: %s", output)
89def plot_daily_traffic_by_service(long_df: pl.DataFrame, output: Path) -> None:
90 fig, axes = plt.subplots(len(_SERVICES), 1, figsize=(12, 12), sharex=True)
91 for ax, service in zip(axes, _SERVICES, strict=True):
92 sub = _pivot_by_service(long_df, service)
93 dates = sub["date"].to_list()
94 layers = [sub[c].to_list() for c in _CATEGORY_COLUMNS]
95 ax.stackplot(dates, *layers, labels=list(_CATEGORY_LABELS), alpha=0.8)
96 ax.legend(loc="upper left")
97 ax.set_title(service)
98 ax.set_ylabel("Requests")
99 ax.yaxis.set_major_formatter(mticker.FuncFormatter(_fmt_axis))
100 _setup_xaxis(axes[-1])
101 fig.autofmt_xdate()
102 fig.tight_layout()
103 fig.savefig(output, dpi=150)
104 plt.close(fig)
105 logger.info("Output: %s", output)
108def plot_daily_traffic_by_service_pct(long_df: pl.DataFrame, output: Path) -> None:
109 fig, axes = plt.subplots(len(_SERVICES), 1, figsize=(12, 12), sharex=True)
110 for ax, service in zip(axes, _SERVICES, strict=True):
111 sub = _pivot_by_service(long_df, service)
112 total = sub["human"] + sub["generic_bot"] + sub["llm_bot"]
113 layers = [(sub[c] / total * 100).fill_nan(0).to_list() for c in _CATEGORY_COLUMNS]
114 dates = sub["date"].to_list()
115 ax.stackplot(dates, *layers, labels=list(_CATEGORY_LABELS), alpha=0.8)
116 ax.legend(loc="upper left")
117 ax.set_title(service)
118 ax.set_ylabel("Share (%)")
119 ax.set_ylim(0, 100)
120 ax.yaxis.set_major_formatter(mticker.PercentFormatter())
121 _setup_xaxis(axes[-1])
122 fig.autofmt_xdate()
123 fig.tight_layout()
124 fig.savefig(output, dpi=150)
125 plt.close(fig)
126 logger.info("Output: %s", output)
129def main(output_dir: Path = OUTPUT_DIR) -> None:
130 logging.basicConfig(level=logging.INFO)
131 df = pl.read_csv(output_dir / "daily_traffic.csv")
132 plot_daily_traffic(df, output_dir / "daily_traffic.png")
133 plot_daily_traffic_pct(df, output_dir / "daily_traffic_pct.png")
135 long_df = pl.read_csv(output_dir / "daily_traffic_by_service.csv")
136 plot_daily_traffic_by_service(long_df, output_dir / "daily_traffic_by_service.png")
137 plot_daily_traffic_by_service_pct(long_df, output_dir / "daily_traffic_by_service_pct.png")
140if __name__ == "__main__":
141 main() # pragma: no cover