Add new charts

This commit is contained in:
Aarni Koskela
2026-03-12 15:39:39 +02:00
parent 663cd3d349
commit ac79c18b09
3 changed files with 54 additions and 2 deletions

View File

@@ -1,9 +1,9 @@
from bokeh import models as bm
from bokeh import plotting as bp
from bokeh.transform import factor_cmap
from pandas import DataFrame
from pandas import DataFrame, Series
from pulkka.data_utils import get_categorical_stats
from pulkka.data_utils import explode_multiselect, get_categorical_stats
CAT_Q_RADIUS = 0.1
@@ -88,3 +88,27 @@ def get_categorical_stats_plot(df, *, category, value, na_as_category=None, line
color="#B53471",
)
return plot
def get_multiselect_frequency_plot(
series: Series,
*,
title: str,
top_n: int = 20,
) -> bp.figure:
"""Horizontal bar chart of the top N values from a comma-separated multiselect column."""
counts = explode_multiselect(series, top_n=top_n)
# Reverse so highest count is at the top
labels = list(counts.index[::-1])
values = list(counts.values[::-1]) # noqa: PD011
plot = bp.figure(
title=title,
y_range=labels,
height=max(300, 22 * len(labels)),
width=700,
)
plot.hbar(y=labels, right=values, height=0.7, color="#2a6180")
plot.xaxis.axis_label = "Vastauksia"
plot.x_range.start = 0
return plot

View File

@@ -7,6 +7,7 @@ from pulkka.chart_utils import (
gender_colormap,
get_categorical_stats_plot,
get_df_hover_tool,
get_multiselect_frequency_plot,
set_yaxis_cash,
)
from pulkka.column_maps import (
@@ -19,6 +20,17 @@ from pulkka.column_maps import (
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data
MULTISELECT_CHARTS = {
"Data & ML": "Data & ML (top 15)",
"DevOps & pilvi": "DevOps & pilvi (top 20)",
"Edut (ei luontoisedut)": "Edut (top 15)",
"Käyttöjärjestelmä": "Käyttöjärjestelmä",
"Luontoisedut": "Luontoisedut (top 15)",
"Ohjelmointikieli": "Ohjelmointikielet (top 20)",
"Tietokannat": "Tietokannat (top 15)",
"Web-kehykset": "Web-kehykset (top 20)",
}
plot_funcs = set()
@@ -76,9 +88,24 @@ def plot_kaupunki_vuositulot(df: DataFrame):
def main():
df = read_data()
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
multiselect_plots = []
for col, title in MULTISELECT_CHARTS.items():
if col in df.columns:
top_n = 20 if "20" in title else 15
multiselect_plots.append(
get_multiselect_frequency_plot(df[col], title=title, top_n=top_n),
)
bp.output_file(
OUT_DIR / "charts2.html",
title="Koodiklinikan Palkkakysely Monivalinnat",
)
bp.save(bl.grid(multiselect_plots, ncols=2, sizing_mode="stretch_both"))
if __name__ == "__main__":
main()