Add new data bits

This commit is contained in:
Aarni Koskela
2021-02-19 18:54:09 +02:00
parent 3874f48bb3
commit 2a1a817499
2 changed files with 15 additions and 1 deletions

View File

@@ -84,6 +84,9 @@ def read_data() -> pd.DataFrame:
# Fill in Vuositulot as 12.5 * Kk-tulot if empty
df["Vuositulot"] = df.apply(map_vuositulot, axis=1)
# Synthesize kk-tulot from Vuositulot
df["Kk-tulot"] = pd.to_numeric(df["Vuositulot"], errors="coerce") / 12
return df
@@ -93,6 +96,16 @@ def force_tulot_numeric(df):
return df
def force_age_numeric(df):
age_map = {}
for cat in df["Ikä"].cat.categories:
m = re.match("^(\d+)-(\d+) v", cat)
if m:
age_map[cat] = int(round(float(m.group(1)) + float(m.group(2)) / 2))
df["Ikä"] = df["Ikä"].apply(lambda r: age_map.get(r, r))
return df
def main():
pd.set_option("display.max_column", None)
pd.set_option("display.max_rows", None)

View File

@@ -1,10 +1,11 @@
from data_ingest import read_data, force_tulot_numeric
from data_ingest import read_data, force_tulot_numeric, force_age_numeric
from pandas_profiling import ProfileReport
def main():
df = read_data()
df = force_tulot_numeric(df)
df = force_age_numeric(df)
profile = ProfileReport(df)
profile.to_file("out/profiling_report.html")