mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-12 22:03:25 +00:00
Add new charts
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
from bokeh import models as bm
|
||||
from bokeh import plotting as bp
|
||||
from bokeh.transform import factor_cmap
|
||||
from pandas import DataFrame
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
from pulkka.data_utils import get_categorical_stats
|
||||
from pulkka.data_utils import explode_multiselect, get_categorical_stats
|
||||
|
||||
CAT_Q_RADIUS = 0.1
|
||||
|
||||
@@ -88,3 +88,27 @@ def get_categorical_stats_plot(df, *, category, value, na_as_category=None, line
|
||||
color="#B53471",
|
||||
)
|
||||
return plot
|
||||
|
||||
|
||||
def get_multiselect_frequency_plot(
|
||||
series: Series,
|
||||
*,
|
||||
title: str,
|
||||
top_n: int = 20,
|
||||
) -> bp.figure:
|
||||
"""Horizontal bar chart of the top N values from a comma-separated multiselect column."""
|
||||
counts = explode_multiselect(series, top_n=top_n)
|
||||
# Reverse so highest count is at the top
|
||||
labels = list(counts.index[::-1])
|
||||
values = list(counts.values[::-1]) # noqa: PD011
|
||||
|
||||
plot = bp.figure(
|
||||
title=title,
|
||||
y_range=labels,
|
||||
height=max(300, 22 * len(labels)),
|
||||
width=700,
|
||||
)
|
||||
plot.hbar(y=labels, right=values, height=0.7, color="#2a6180")
|
||||
plot.xaxis.axis_label = "Vastauksia"
|
||||
plot.x_range.start = 0
|
||||
return plot
|
||||
|
||||
@@ -7,6 +7,7 @@ from pulkka.chart_utils import (
|
||||
gender_colormap,
|
||||
get_categorical_stats_plot,
|
||||
get_df_hover_tool,
|
||||
get_multiselect_frequency_plot,
|
||||
set_yaxis_cash,
|
||||
)
|
||||
from pulkka.column_maps import (
|
||||
@@ -19,6 +20,17 @@ from pulkka.column_maps import (
|
||||
from pulkka.config import OUT_DIR
|
||||
from pulkka.data_ingest import read_data
|
||||
|
||||
MULTISELECT_CHARTS = {
|
||||
"Data & ML": "Data & ML (top 15)",
|
||||
"DevOps & pilvi": "DevOps & pilvi (top 20)",
|
||||
"Edut (ei luontoisedut)": "Edut (top 15)",
|
||||
"Käyttöjärjestelmä": "Käyttöjärjestelmä",
|
||||
"Luontoisedut": "Luontoisedut (top 15)",
|
||||
"Ohjelmointikieli": "Ohjelmointikielet (top 20)",
|
||||
"Tietokannat": "Tietokannat (top 15)",
|
||||
"Web-kehykset": "Web-kehykset (top 20)",
|
||||
}
|
||||
|
||||
plot_funcs = set()
|
||||
|
||||
|
||||
@@ -76,9 +88,24 @@ def plot_kaupunki_vuositulot(df: DataFrame):
|
||||
def main():
|
||||
df = read_data()
|
||||
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||
|
||||
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
|
||||
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
||||
|
||||
multiselect_plots = []
|
||||
for col, title in MULTISELECT_CHARTS.items():
|
||||
if col in df.columns:
|
||||
top_n = 20 if "20" in title else 15
|
||||
multiselect_plots.append(
|
||||
get_multiselect_frequency_plot(df[col], title=title, top_n=top_n),
|
||||
)
|
||||
|
||||
bp.output_file(
|
||||
OUT_DIR / "charts2.html",
|
||||
title="Koodiklinikan Palkkakysely – Monivalinnat",
|
||||
)
|
||||
bp.save(bl.grid(multiselect_plots, ncols=2, sizing_mode="stretch_both"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
<h2>Työkalut</h2>
|
||||
<ul>
|
||||
<li><a href="charts.html">Kaaviot</a></li>
|
||||
<li><a href="charts2.html">Kaaviot – monivalinnat</a></li>
|
||||
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
|
||||
<li><a href="/palkkakysely/analysaattori/?url=/palkkakysely/{{ year }}/data.json">Pivot-työkalu</a></li>
|
||||
</ul>
|
||||
|
||||
Reference in New Issue
Block a user