mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-11 01:02:45 +00:00
Initial charts
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
*.py[cod]
|
*.py[cod]
|
||||||
*.html
|
*.html
|
||||||
|
out
|
||||||
|
|||||||
34
chart_utils.py
Normal file
34
chart_utils.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
from bokeh import models as bm, plotting as bp
|
||||||
|
from bokeh.transform import factor_cmap
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
from data_utils import get_categorical_stats
|
||||||
|
|
||||||
|
gender_colormap = factor_cmap("Sukupuoli", ["#4834d4", "#eb4d4b"], ["mies", "nainen"])
|
||||||
|
|
||||||
|
|
||||||
|
def get_df_hover_tool(df: DataFrame):
|
||||||
|
return bm.HoverTool(tooltips=[(c, f"@{{{c}}}") for c in df.columns])
|
||||||
|
|
||||||
|
|
||||||
|
def set_yaxis_cash(plot):
|
||||||
|
plot.yaxis.axis_label = "Vuositulot"
|
||||||
|
plot.yaxis[0].formatter = bm.NumeralTickFormatter(format="€0")
|
||||||
|
|
||||||
|
|
||||||
|
def get_categorical_stats_plot(df, *, category):
|
||||||
|
df = get_categorical_stats(df, category, "Vuositulot")
|
||||||
|
df.reset_index(inplace=True)
|
||||||
|
df[category] = df[category].astype("category")
|
||||||
|
plot = bp.figure(
|
||||||
|
title=f"{category}/tulot", x_range=list(df[category].cat.categories)
|
||||||
|
)
|
||||||
|
set_yaxis_cash(plot)
|
||||||
|
plot.vbar(df[category], 0.4, df["max"], df["min"], color="#a4b0be")
|
||||||
|
plot.line(
|
||||||
|
df[category], df["median"], legend_label="median", color="#1289A7", line_width=4
|
||||||
|
)
|
||||||
|
plot.line(
|
||||||
|
df[category], df["mean"], legend_label="mean", color="#B53471", line_width=4
|
||||||
|
)
|
||||||
|
return plot
|
||||||
13
data_utils.py
Normal file
13
data_utils.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def get_categorical_stats(
|
||||||
|
df: pd.DataFrame, category_col: str, value_col: str
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
# Drop records where value is not numeric before grouping...
|
||||||
|
df = df.copy()
|
||||||
|
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
|
||||||
|
df = df[df[value_col].notna() & df[value_col] > 0]
|
||||||
|
# ... then carry on.
|
||||||
|
group = df[[category_col, value_col]].groupby(category_col)
|
||||||
|
return group[value_col].agg(["mean", "min", "max", "median"])
|
||||||
50
generate_charts.py
Normal file
50
generate_charts.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import bokeh.plotting as bp
|
||||||
|
import bokeh.models as bm
|
||||||
|
import bokeh.layouts as bl
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
from chart_utils import (
|
||||||
|
gender_colormap,
|
||||||
|
get_df_hover_tool,
|
||||||
|
set_yaxis_cash,
|
||||||
|
get_categorical_stats_plot,
|
||||||
|
)
|
||||||
|
from data_ingest import read_data
|
||||||
|
|
||||||
|
plot_funcs = set()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_this(fn):
|
||||||
|
"""
|
||||||
|
Decorator for marking a function as a plot generator.
|
||||||
|
"""
|
||||||
|
plot_funcs.add(fn)
|
||||||
|
|
||||||
|
|
||||||
|
@plot_this
|
||||||
|
def plot_kokemus_tulot(df: DataFrame):
|
||||||
|
source = bm.ColumnDataSource(df)
|
||||||
|
plot = bp.figure(title="Kokemus/Tulot")
|
||||||
|
plot.add_tools(get_df_hover_tool(df))
|
||||||
|
plot.xaxis.axis_label = "Työkokemus (v)"
|
||||||
|
set_yaxis_cash(plot)
|
||||||
|
plot.circle(
|
||||||
|
x="Työkokemus", y="Vuositulot", source=source, color=gender_colormap, size=10
|
||||||
|
)
|
||||||
|
return plot
|
||||||
|
|
||||||
|
|
||||||
|
@plot_this
|
||||||
|
def plot_ika_tulot(df: DataFrame):
|
||||||
|
return get_categorical_stats_plot(df, category="Ikä")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = read_data()
|
||||||
|
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||||
|
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
|
||||||
|
bp.save(bl.column(plots))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
out/.gitkeep
Normal file
0
out/.gitkeep
Normal file
Reference in New Issue
Block a user