diff --git a/data/2023/results-en.xlsx b/data/2023/results-en.xlsx index ee5037d..8a1f39f 100644 Binary files a/data/2023/results-en.xlsx and b/data/2023/results-en.xlsx differ diff --git a/data/2023/results-fi.xlsx b/data/2023/results-fi.xlsx index e84fba0..06c7c4a 100644 Binary files a/data/2023/results-fi.xlsx and b/data/2023/results-fi.xlsx differ diff --git a/pulkka/data_ingest.py b/pulkka/data_ingest.py index d9647c9..0743458 100644 --- a/pulkka/data_ingest.py +++ b/pulkka/data_ingest.py @@ -147,6 +147,11 @@ def read_data() -> pd.DataFrame: df[SUKUPUOLI_COL] = df[SUKUPUOLI_COL].apply(map_sukupuoli).astype("category") df[IKA_COL] = df[IKA_COL].astype("category") + # Assume that people entering 37.5 (hours) as their tyƶaika means 100% + df.loc[df[TYOAIKA_COL] == 37.5, TYOAIKA_COL] = 100 + # Assume there is no actual 10x koodari among us + df.loc[df[TYOAIKA_COL] == 1000, TYOAIKA_COL] = 100 + df[TYOAIKA_COL] = to_percentage(df[TYOAIKA_COL], 100) df[LAHITYO_COL] = to_percentage(df[LAHITYO_COL], 100)