mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-13 12:03:36 +00:00
Add 2025 survey data support
The 2025 survey uses a single English-only xlsx (instead of separate fi/en files) with a restructured schema: compensation is split into base salary, commission, lomaraha, bonus, and equity components; working time is h/week instead of percentage; and competitive salary is categorical instead of boolean. Vuositulot is now synthesized from the component fields. Drop COLUMN_MAP_2024, COLUMN_MAP_2024_EN_TO_FI, VALUE_MAP_2024_EN_TO_FI, read_initial_dfs_2024, read_data_2024, map_sukupuoli, map_vuositulot, split_boolean_column_to_other, apply_fixups, and the associated gender value lists and boolean text maps. All of this exists in version history. - KKPALKKA now includes base salary + commission (median 5500 → 5800) - Apply map_numberlike to tuntilaskutus and vuosilaskutus columns to handle string values like "60 000" and "100 000" - Filter out zeros when computing tunnusluvut on the index page so stats reflect actual reported values Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,3 +43,28 @@ def rename_na(df: pd.DataFrame, col: str, na_name: str) -> None:
|
||||
df[col] = df[col].astype("string")
|
||||
df.loc[df[col].isna(), col] = na_name
|
||||
df[col] = df[col].astype("category")
|
||||
|
||||
|
||||
def explode_multiselect(
|
||||
series: pd.Series,
|
||||
*,
|
||||
sep: str = ", ",
|
||||
top_n: int | None = None,
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Explode a comma-separated multiselect column into value counts.
|
||||
|
||||
Returns a Series of counts indexed by individual values,
|
||||
sorted descending. Optionally limited to top_n entries.
|
||||
"""
|
||||
counts = (
|
||||
series.dropna()
|
||||
.str.split(sep)
|
||||
.explode()
|
||||
.str.strip()
|
||||
.loc[lambda s: s != ""]
|
||||
.value_counts()
|
||||
)
|
||||
if top_n is not None:
|
||||
counts = counts.head(top_n)
|
||||
return counts
|
||||
|
||||
Reference in New Issue
Block a user