Initial charts

This commit is contained in:
Aarni Koskela
2021-02-19 13:00:58 +02:00
parent 060a6f8858
commit e769c6d0ce
5 changed files with 99 additions and 1 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,3 @@
*.py[cod]
*.html
*.html
out

34
chart_utils.py Normal file
View File

@@ -0,0 +1,34 @@
from bokeh import models as bm, plotting as bp
from bokeh.transform import factor_cmap
from pandas import DataFrame
from data_utils import get_categorical_stats
gender_colormap = factor_cmap("Sukupuoli", ["#4834d4", "#eb4d4b"], ["mies", "nainen"])
def get_df_hover_tool(df: DataFrame):
return bm.HoverTool(tooltips=[(c, f"@{{{c}}}") for c in df.columns])
def set_yaxis_cash(plot):
plot.yaxis.axis_label = "Vuositulot"
plot.yaxis[0].formatter = bm.NumeralTickFormatter(format="€0")
def get_categorical_stats_plot(df, *, category):
df = get_categorical_stats(df, category, "Vuositulot")
df.reset_index(inplace=True)
df[category] = df[category].astype("category")
plot = bp.figure(
title=f"{category}/tulot", x_range=list(df[category].cat.categories)
)
set_yaxis_cash(plot)
plot.vbar(df[category], 0.4, df["max"], df["min"], color="#a4b0be")
plot.line(
df[category], df["median"], legend_label="median", color="#1289A7", line_width=4
)
plot.line(
df[category], df["mean"], legend_label="mean", color="#B53471", line_width=4
)
return plot

13
data_utils.py Normal file
View File

@@ -0,0 +1,13 @@
import pandas as pd
def get_categorical_stats(
df: pd.DataFrame, category_col: str, value_col: str
) -> pd.DataFrame:
# Drop records where value is not numeric before grouping...
df = df.copy()
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
df = df[df[value_col].notna() & df[value_col] > 0]
# ... then carry on.
group = df[[category_col, value_col]].groupby(category_col)
return group[value_col].agg(["mean", "min", "max", "median"])

50
generate_charts.py Normal file
View File

@@ -0,0 +1,50 @@
import bokeh.plotting as bp
import bokeh.models as bm
import bokeh.layouts as bl
from pandas import DataFrame
from chart_utils import (
gender_colormap,
get_df_hover_tool,
set_yaxis_cash,
get_categorical_stats_plot,
)
from data_ingest import read_data
plot_funcs = set()
def plot_this(fn):
"""
Decorator for marking a function as a plot generator.
"""
plot_funcs.add(fn)
@plot_this
def plot_kokemus_tulot(df: DataFrame):
source = bm.ColumnDataSource(df)
plot = bp.figure(title="Kokemus/Tulot")
plot.add_tools(get_df_hover_tool(df))
plot.xaxis.axis_label = "Työkokemus (v)"
set_yaxis_cash(plot)
plot.circle(
x="Työkokemus", y="Vuositulot", source=source, color=gender_colormap, size=10
)
return plot
@plot_this
def plot_ika_tulot(df: DataFrame):
return get_categorical_stats_plot(df, category="Ikä")
def main():
df = read_data()
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
bp.save(bl.column(plots))
if __name__ == "__main__":
main()

0
out/.gitkeep Normal file
View File