From 65e3480a78d0e7b66e06c5d3650c2a17ec14cd1c Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Fri, 26 Feb 2021 12:19:19 +0200 Subject: [PATCH] Clean up oy/oyj --- data_ingest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data_ingest.py b/data_ingest.py index 55f7e2b..8a0c327 100644 --- a/data_ingest.py +++ b/data_ingest.py @@ -82,6 +82,9 @@ def read_data() -> pd.DataFrame: df["Kuukausipalkka"] = df["Kuukausipalkka"].apply(map_numberlike) df["Vuositulot"] = df["Vuositulot"].apply(map_numberlike) + # Remove Oy, Oyj, etc. from work places + df["Tyƶpaikka"] = df["Tyƶpaikka"].replace(re.compile(r"\s+oy|oyj$", flags=re.I), "") + # Fill in Vuositulot as 12.5 * Kk-tulot if empty df["Vuositulot"] = df.apply(map_vuositulot, axis=1)