mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-20 03:06:12 +00:00
Add new charts
This commit is contained in:
@@ -1,9 +1,9 @@
|
|||||||
from bokeh import models as bm
|
from bokeh import models as bm
|
||||||
from bokeh import plotting as bp
|
from bokeh import plotting as bp
|
||||||
from bokeh.transform import factor_cmap
|
from bokeh.transform import factor_cmap
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame, Series
|
||||||
|
|
||||||
from pulkka.data_utils import get_categorical_stats
|
from pulkka.data_utils import explode_multiselect, get_categorical_stats
|
||||||
|
|
||||||
CAT_Q_RADIUS = 0.1
|
CAT_Q_RADIUS = 0.1
|
||||||
|
|
||||||
@@ -88,3 +88,27 @@ def get_categorical_stats_plot(df, *, category, value, na_as_category=None, line
|
|||||||
color="#B53471",
|
color="#B53471",
|
||||||
)
|
)
|
||||||
return plot
|
return plot
|
||||||
|
|
||||||
|
|
||||||
|
def get_multiselect_frequency_plot(
|
||||||
|
series: Series,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
top_n: int = 20,
|
||||||
|
) -> bp.figure:
|
||||||
|
"""Horizontal bar chart of the top N values from a comma-separated multiselect column."""
|
||||||
|
counts = explode_multiselect(series, top_n=top_n)
|
||||||
|
# Reverse so highest count is at the top
|
||||||
|
labels = list(counts.index[::-1])
|
||||||
|
values = list(counts.values[::-1]) # noqa: PD011
|
||||||
|
|
||||||
|
plot = bp.figure(
|
||||||
|
title=title,
|
||||||
|
y_range=labels,
|
||||||
|
height=max(300, 22 * len(labels)),
|
||||||
|
width=700,
|
||||||
|
)
|
||||||
|
plot.hbar(y=labels, right=values, height=0.7, color="#2a6180")
|
||||||
|
plot.xaxis.axis_label = "Vastauksia"
|
||||||
|
plot.x_range.start = 0
|
||||||
|
return plot
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from pulkka.chart_utils import (
|
|||||||
gender_colormap,
|
gender_colormap,
|
||||||
get_categorical_stats_plot,
|
get_categorical_stats_plot,
|
||||||
get_df_hover_tool,
|
get_df_hover_tool,
|
||||||
|
get_multiselect_frequency_plot,
|
||||||
set_yaxis_cash,
|
set_yaxis_cash,
|
||||||
)
|
)
|
||||||
from pulkka.column_maps import (
|
from pulkka.column_maps import (
|
||||||
@@ -19,6 +20,17 @@ from pulkka.column_maps import (
|
|||||||
from pulkka.config import OUT_DIR
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data
|
from pulkka.data_ingest import read_data
|
||||||
|
|
||||||
|
MULTISELECT_CHARTS = {
|
||||||
|
"Data & ML": "Data & ML (top 15)",
|
||||||
|
"DevOps & pilvi": "DevOps & pilvi (top 20)",
|
||||||
|
"Edut (ei luontoisedut)": "Edut (top 15)",
|
||||||
|
"Käyttöjärjestelmä": "Käyttöjärjestelmä",
|
||||||
|
"Luontoisedut": "Luontoisedut (top 15)",
|
||||||
|
"Ohjelmointikieli": "Ohjelmointikielet (top 20)",
|
||||||
|
"Tietokannat": "Tietokannat (top 15)",
|
||||||
|
"Web-kehykset": "Web-kehykset (top 20)",
|
||||||
|
}
|
||||||
|
|
||||||
plot_funcs = set()
|
plot_funcs = set()
|
||||||
|
|
||||||
|
|
||||||
@@ -76,9 +88,24 @@ def plot_kaupunki_vuositulot(df: DataFrame):
|
|||||||
def main():
|
def main():
|
||||||
df = read_data()
|
df = read_data()
|
||||||
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||||
|
|
||||||
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
|
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
|
||||||
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
||||||
|
|
||||||
|
multiselect_plots = []
|
||||||
|
for col, title in MULTISELECT_CHARTS.items():
|
||||||
|
if col in df.columns:
|
||||||
|
top_n = 20 if "20" in title else 15
|
||||||
|
multiselect_plots.append(
|
||||||
|
get_multiselect_frequency_plot(df[col], title=title, top_n=top_n),
|
||||||
|
)
|
||||||
|
|
||||||
|
bp.output_file(
|
||||||
|
OUT_DIR / "charts2.html",
|
||||||
|
title="Koodiklinikan Palkkakysely – Monivalinnat",
|
||||||
|
)
|
||||||
|
bp.save(bl.grid(multiselect_plots, ncols=2, sizing_mode="stretch_both"))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -42,6 +42,7 @@
|
|||||||
<h2>Työkalut</h2>
|
<h2>Työkalut</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="charts.html">Kaaviot</a></li>
|
<li><a href="charts.html">Kaaviot</a></li>
|
||||||
|
<li><a href="charts2.html">Kaaviot – monivalinnat</a></li>
|
||||||
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
|
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
|
||||||
<li><a href="/palkkakysely/analysaattori/?url=/palkkakysely/{{ year }}/data.json">Pivot-työkalu</a></li>
|
<li><a href="/palkkakysely/analysaattori/?url=/palkkakysely/{{ year }}/data.json">Pivot-työkalu</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
Reference in New Issue
Block a user