Files
palkkakysely/pulkka/generate_charts.py
Aarni Koskela ac79c18b09 Add new charts
2026-03-12 15:55:11 +02:00

112 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import bokeh.layouts as bl
import bokeh.models as bm
import bokeh.plotting as bp
from pandas import DataFrame
from pulkka.chart_utils import (
gender_colormap,
get_categorical_stats_plot,
get_df_hover_tool,
get_multiselect_frequency_plot,
set_yaxis_cash,
)
from pulkka.column_maps import (
IKA_COL,
KAUPUNKI_COL,
SUKUPUOLI_COL,
TYOKOKEMUS_COL,
VUOSITULOT_COL,
)
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data
MULTISELECT_CHARTS = {
"Data & ML": "Data & ML (top 15)",
"DevOps & pilvi": "DevOps & pilvi (top 20)",
"Edut (ei luontoisedut)": "Edut (top 15)",
"Käyttöjärjestelmä": "Käyttöjärjestelmä",
"Luontoisedut": "Luontoisedut (top 15)",
"Ohjelmointikieli": "Ohjelmointikielet (top 20)",
"Tietokannat": "Tietokannat (top 15)",
"Web-kehykset": "Web-kehykset (top 20)",
}
plot_funcs = set()
def plot_this(fn):
"""
Decorator for marking a function as a plot generator.
"""
plot_funcs.add(fn)
@plot_this
def plot_kokemus_tulot(df: DataFrame):
source = bm.ColumnDataSource(df)
plot = bp.figure(title="Kokemus/Vuositulot")
plot.add_tools(get_df_hover_tool(df))
plot.xaxis.axis_label = "Työkokemus (v)"
set_yaxis_cash(plot)
plot.scatter(
x=TYOKOKEMUS_COL,
y=VUOSITULOT_COL,
source=source,
color=gender_colormap,
size=10,
)
return plot
@plot_this
def plot_ika_vuositulot(df: DataFrame):
return get_categorical_stats_plot(df, category=IKA_COL, value=VUOSITULOT_COL)
@plot_this
def plot_sukupuoli_vuositulot(df: DataFrame):
return get_categorical_stats_plot(
df,
category=SUKUPUOLI_COL,
value=VUOSITULOT_COL,
na_as_category="EOS",
)
@plot_this
def plot_kaupunki_vuositulot(df: DataFrame):
plot = get_categorical_stats_plot(
df,
category=KAUPUNKI_COL,
value=VUOSITULOT_COL,
line=False,
)
plot.xaxis.major_label_orientation = "vertical"
return plot
def main():
df = read_data()
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
multiselect_plots = []
for col, title in MULTISELECT_CHARTS.items():
if col in df.columns:
top_n = 20 if "20" in title else 15
multiselect_plots.append(
get_multiselect_frequency_plot(df[col], title=title, top_n=top_n),
)
bp.output_file(
OUT_DIR / "charts2.html",
title="Koodiklinikan Palkkakysely Monivalinnat",
)
bp.save(bl.grid(multiselect_plots, ncols=2, sizing_mode="stretch_both"))
if __name__ == "__main__":
main()