diff --git a/pulkka/chart_utils.py b/pulkka/chart_utils.py index e42c3ed..6550edc 100644 --- a/pulkka/chart_utils.py +++ b/pulkka/chart_utils.py @@ -1,9 +1,9 @@ from bokeh import models as bm from bokeh import plotting as bp from bokeh.transform import factor_cmap -from pandas import DataFrame +from pandas import DataFrame, Series -from pulkka.data_utils import get_categorical_stats +from pulkka.data_utils import explode_multiselect, get_categorical_stats CAT_Q_RADIUS = 0.1 @@ -88,3 +88,27 @@ def get_categorical_stats_plot(df, *, category, value, na_as_category=None, line color="#B53471", ) return plot + + +def get_multiselect_frequency_plot( + series: Series, + *, + title: str, + top_n: int = 20, +) -> bp.figure: + """Horizontal bar chart of the top N values from a comma-separated multiselect column.""" + counts = explode_multiselect(series, top_n=top_n) + # Reverse so highest count is at the top + labels = list(counts.index[::-1]) + values = list(counts.values[::-1]) # noqa: PD011 + + plot = bp.figure( + title=title, + y_range=labels, + height=max(300, 22 * len(labels)), + width=700, + ) + plot.hbar(y=labels, right=values, height=0.7, color="#2a6180") + plot.xaxis.axis_label = "Vastauksia" + plot.x_range.start = 0 + return plot diff --git a/pulkka/generate_charts.py b/pulkka/generate_charts.py index ff8dcb5..867e90f 100644 --- a/pulkka/generate_charts.py +++ b/pulkka/generate_charts.py @@ -7,6 +7,7 @@ from pulkka.chart_utils import ( gender_colormap, get_categorical_stats_plot, get_df_hover_tool, + get_multiselect_frequency_plot, set_yaxis_cash, ) from pulkka.column_maps import ( @@ -19,6 +20,17 @@ from pulkka.column_maps import ( from pulkka.config import OUT_DIR from pulkka.data_ingest import read_data +MULTISELECT_CHARTS = { + "Data & ML": "Data & ML (top 15)", + "DevOps & pilvi": "DevOps & pilvi (top 20)", + "Edut (ei luontoisedut)": "Edut (top 15)", + "Käyttöjärjestelmä": "Käyttöjärjestelmä", + "Luontoisedut": "Luontoisedut (top 15)", + "Ohjelmointikieli": "Ohjelmointikielet (top 20)", + "Tietokannat": "Tietokannat (top 15)", + "Web-kehykset": "Web-kehykset (top 20)", +} + plot_funcs = set() @@ -76,9 +88,24 @@ def plot_kaupunki_vuositulot(df: DataFrame): def main(): df = read_data() plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)] + bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely") bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both")) + multiselect_plots = [] + for col, title in MULTISELECT_CHARTS.items(): + if col in df.columns: + top_n = 20 if "20" in title else 15 + multiselect_plots.append( + get_multiselect_frequency_plot(df[col], title=title, top_n=top_n), + ) + + bp.output_file( + OUT_DIR / "charts2.html", + title="Koodiklinikan Palkkakysely – Monivalinnat", + ) + bp.save(bl.grid(multiselect_plots, ncols=2, sizing_mode="stretch_both")) + if __name__ == "__main__": main() diff --git a/template/index.html b/template/index.html index 12a4847..30f77c5 100644 --- a/template/index.html +++ b/template/index.html @@ -42,6 +42,7 @@