mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-06 16:01:12 +00:00
Add new data bits
This commit is contained in:
@@ -84,6 +84,9 @@ def read_data() -> pd.DataFrame:
|
|||||||
|
|
||||||
# Fill in Vuositulot as 12.5 * Kk-tulot if empty
|
# Fill in Vuositulot as 12.5 * Kk-tulot if empty
|
||||||
df["Vuositulot"] = df.apply(map_vuositulot, axis=1)
|
df["Vuositulot"] = df.apply(map_vuositulot, axis=1)
|
||||||
|
|
||||||
|
# Synthesize kk-tulot from Vuositulot
|
||||||
|
df["Kk-tulot"] = pd.to_numeric(df["Vuositulot"], errors="coerce") / 12
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
@@ -93,6 +96,16 @@ def force_tulot_numeric(df):
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def force_age_numeric(df):
|
||||||
|
age_map = {}
|
||||||
|
for cat in df["Ikä"].cat.categories:
|
||||||
|
m = re.match("^(\d+)-(\d+) v", cat)
|
||||||
|
if m:
|
||||||
|
age_map[cat] = int(round(float(m.group(1)) + float(m.group(2)) / 2))
|
||||||
|
df["Ikä"] = df["Ikä"].apply(lambda r: age_map.get(r, r))
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
pd.set_option("display.max_column", None)
|
pd.set_option("display.max_column", None)
|
||||||
pd.set_option("display.max_rows", None)
|
pd.set_option("display.max_rows", None)
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
from data_ingest import read_data, force_tulot_numeric
|
from data_ingest import read_data, force_tulot_numeric, force_age_numeric
|
||||||
from pandas_profiling import ProfileReport
|
from pandas_profiling import ProfileReport
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
df = read_data()
|
df = read_data()
|
||||||
df = force_tulot_numeric(df)
|
df = force_tulot_numeric(df)
|
||||||
|
df = force_age_numeric(df)
|
||||||
profile = ProfileReport(df)
|
profile = ProfileReport(df)
|
||||||
profile.to_file("out/profiling_report.html")
|
profile.to_file("out/profiling_report.html")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user