mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-02-05 01:48:31 +00:00
Initial charts
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
*.py[cod]
|
||||
*.html
|
||||
*.html
|
||||
out
|
||||
|
||||
34
chart_utils.py
Normal file
34
chart_utils.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from bokeh import models as bm, plotting as bp
|
||||
from bokeh.transform import factor_cmap
|
||||
from pandas import DataFrame
|
||||
|
||||
from data_utils import get_categorical_stats
|
||||
|
||||
gender_colormap = factor_cmap("Sukupuoli", ["#4834d4", "#eb4d4b"], ["mies", "nainen"])
|
||||
|
||||
|
||||
def get_df_hover_tool(df: DataFrame):
|
||||
return bm.HoverTool(tooltips=[(c, f"@{{{c}}}") for c in df.columns])
|
||||
|
||||
|
||||
def set_yaxis_cash(plot):
|
||||
plot.yaxis.axis_label = "Vuositulot"
|
||||
plot.yaxis[0].formatter = bm.NumeralTickFormatter(format="€0")
|
||||
|
||||
|
||||
def get_categorical_stats_plot(df, *, category):
|
||||
df = get_categorical_stats(df, category, "Vuositulot")
|
||||
df.reset_index(inplace=True)
|
||||
df[category] = df[category].astype("category")
|
||||
plot = bp.figure(
|
||||
title=f"{category}/tulot", x_range=list(df[category].cat.categories)
|
||||
)
|
||||
set_yaxis_cash(plot)
|
||||
plot.vbar(df[category], 0.4, df["max"], df["min"], color="#a4b0be")
|
||||
plot.line(
|
||||
df[category], df["median"], legend_label="median", color="#1289A7", line_width=4
|
||||
)
|
||||
plot.line(
|
||||
df[category], df["mean"], legend_label="mean", color="#B53471", line_width=4
|
||||
)
|
||||
return plot
|
||||
13
data_utils.py
Normal file
13
data_utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_categorical_stats(
|
||||
df: pd.DataFrame, category_col: str, value_col: str
|
||||
) -> pd.DataFrame:
|
||||
# Drop records where value is not numeric before grouping...
|
||||
df = df.copy()
|
||||
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
|
||||
df = df[df[value_col].notna() & df[value_col] > 0]
|
||||
# ... then carry on.
|
||||
group = df[[category_col, value_col]].groupby(category_col)
|
||||
return group[value_col].agg(["mean", "min", "max", "median"])
|
||||
50
generate_charts.py
Normal file
50
generate_charts.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import bokeh.plotting as bp
|
||||
import bokeh.models as bm
|
||||
import bokeh.layouts as bl
|
||||
from pandas import DataFrame
|
||||
|
||||
from chart_utils import (
|
||||
gender_colormap,
|
||||
get_df_hover_tool,
|
||||
set_yaxis_cash,
|
||||
get_categorical_stats_plot,
|
||||
)
|
||||
from data_ingest import read_data
|
||||
|
||||
plot_funcs = set()
|
||||
|
||||
|
||||
def plot_this(fn):
|
||||
"""
|
||||
Decorator for marking a function as a plot generator.
|
||||
"""
|
||||
plot_funcs.add(fn)
|
||||
|
||||
|
||||
@plot_this
|
||||
def plot_kokemus_tulot(df: DataFrame):
|
||||
source = bm.ColumnDataSource(df)
|
||||
plot = bp.figure(title="Kokemus/Tulot")
|
||||
plot.add_tools(get_df_hover_tool(df))
|
||||
plot.xaxis.axis_label = "Työkokemus (v)"
|
||||
set_yaxis_cash(plot)
|
||||
plot.circle(
|
||||
x="Työkokemus", y="Vuositulot", source=source, color=gender_colormap, size=10
|
||||
)
|
||||
return plot
|
||||
|
||||
|
||||
@plot_this
|
||||
def plot_ika_tulot(df: DataFrame):
|
||||
return get_categorical_stats_plot(df, category="Ikä")
|
||||
|
||||
|
||||
def main():
|
||||
df = read_data()
|
||||
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
|
||||
bp.save(bl.column(plots))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
out/.gitkeep
Normal file
0
out/.gitkeep
Normal file
Reference in New Issue
Block a user