mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-02-22 21:55:56 +00:00
Allow remapping nans to a category
This commit is contained in:
@@ -16,8 +16,10 @@ def set_yaxis_cash(plot):
|
|||||||
plot.yaxis[0].formatter = bm.NumeralTickFormatter(format="€0")
|
plot.yaxis[0].formatter = bm.NumeralTickFormatter(format="€0")
|
||||||
|
|
||||||
|
|
||||||
def get_categorical_stats_plot(df, *, category):
|
def get_categorical_stats_plot(df, *, category, na_as_category=None):
|
||||||
df = get_categorical_stats(df, category, "Vuositulot")
|
df = get_categorical_stats(
|
||||||
|
df, category, "Vuositulot", na_as_category=na_as_category
|
||||||
|
)
|
||||||
df.reset_index(inplace=True)
|
df.reset_index(inplace=True)
|
||||||
df[category] = df[category].astype("category")
|
df[category] = df[category].astype("category")
|
||||||
plot = bp.figure(
|
plot = bp.figure(
|
||||||
|
|||||||
@@ -1,13 +1,23 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def get_categorical_stats(
|
def get_categorical_stats(
|
||||||
df: pd.DataFrame, category_col: str, value_col: str
|
df: pd.DataFrame,
|
||||||
|
category_col: str,
|
||||||
|
value_col: str,
|
||||||
|
*,
|
||||||
|
na_as_category: Optional[str] = None,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
# Drop records where value is not numeric before grouping...
|
# Drop records where value is not numeric before grouping...
|
||||||
df = df.copy()
|
df = df.copy()
|
||||||
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
|
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
|
||||||
df = df[df[value_col].notna() & df[value_col] > 0]
|
df = df[df[value_col].notna() & df[value_col] > 0]
|
||||||
|
if na_as_category:
|
||||||
|
df[category_col] = df[category_col].astype("string")
|
||||||
|
df.loc[df[category_col].isna(), category_col] = na_as_category
|
||||||
|
df[category_col] = df[category_col].astype("category")
|
||||||
# ... then carry on.
|
# ... then carry on.
|
||||||
group = df[[category_col, value_col]].groupby(category_col)
|
group = df[[category_col, value_col]].groupby(category_col)
|
||||||
return group[value_col].agg(["mean", "min", "max", "median"])
|
return group[value_col].agg(["mean", "min", "max", "median", "count"])
|
||||||
|
|||||||
Reference in New Issue
Block a user