mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-12 19:03:26 +00:00
Allow parametrizing paths
This commit is contained in:
5
pulkka/config.py
Normal file
5
pulkka/config.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
|
||||||
|
OUT_DIR = Path(os.environ.get("OUT_DIR", "out"))
|
||||||
@@ -1,13 +1,14 @@
|
|||||||
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data
|
from pulkka.data_ingest import read_data
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
df = read_data()
|
df = read_data()
|
||||||
df.to_html("out/data.html", index=False)
|
df.to_html(OUT_DIR / "data.html", index=False)
|
||||||
df.to_csv("out/data.csv", index=False)
|
df.to_csv(OUT_DIR / "data.csv", index=False)
|
||||||
df.to_excel("out/data.xlsx", index=False)
|
df.to_excel(OUT_DIR / "data.xlsx", index=False)
|
||||||
df.to_json(
|
df.to_json(
|
||||||
"out/data.json",
|
OUT_DIR / "data.json",
|
||||||
orient="records",
|
orient="records",
|
||||||
date_format="iso",
|
date_format="iso",
|
||||||
force_ascii=False,
|
force_ascii=False,
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ import re
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from pulkka.config import DATA_DIR
|
||||||
|
|
||||||
COLUMN_MAP = {
|
COLUMN_MAP = {
|
||||||
"Missä kaupungissa työpaikkasi pääasiallinen toimisto sijaitsee?": "Kaupunki",
|
"Missä kaupungissa työpaikkasi pääasiallinen toimisto sijaitsee?": "Kaupunki",
|
||||||
"Työaika (jos työsuhteessa)": "Työaika",
|
"Työaika (jos työsuhteessa)": "Työaika",
|
||||||
@@ -61,7 +63,7 @@ def map_ika(d):
|
|||||||
|
|
||||||
def read_data() -> pd.DataFrame:
|
def read_data() -> pd.DataFrame:
|
||||||
df: pd.DataFrame = pd.read_excel(
|
df: pd.DataFrame = pd.read_excel(
|
||||||
"data/results.xlsx",
|
DATA_DIR / "results.xlsx",
|
||||||
skiprows=[1], # Google Sheets exports one empty row
|
skiprows=[1], # Google Sheets exports one empty row
|
||||||
)
|
)
|
||||||
df.rename(columns=COLUMN_MAP, inplace=True)
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from pulkka.chart_utils import (
|
|||||||
set_yaxis_cash,
|
set_yaxis_cash,
|
||||||
get_categorical_stats_plot,
|
get_categorical_stats_plot,
|
||||||
)
|
)
|
||||||
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data
|
from pulkka.data_ingest import read_data
|
||||||
|
|
||||||
plot_funcs = set()
|
plot_funcs = set()
|
||||||
@@ -56,7 +57,7 @@ def plot_kaupunki_vuositulot(df: DataFrame):
|
|||||||
def main():
|
def main():
|
||||||
df = read_data()
|
df = read_data()
|
||||||
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||||
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
|
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
|
||||||
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data, force_tulot_numeric, force_age_numeric
|
from pulkka.data_ingest import read_data, force_tulot_numeric, force_age_numeric
|
||||||
from pandas_profiling import ProfileReport
|
from pandas_profiling import ProfileReport
|
||||||
|
|
||||||
@@ -7,7 +8,7 @@ def main():
|
|||||||
df = force_tulot_numeric(df)
|
df = force_tulot_numeric(df)
|
||||||
df = force_age_numeric(df)
|
df = force_age_numeric(df)
|
||||||
profile = ProfileReport(df)
|
profile = ProfileReport(df)
|
||||||
profile.to_file("out/profiling_report.html")
|
profile.to_file(OUT_DIR / "profiling_report.html")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import jinja2
|
|||||||
import numpy
|
import numpy
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data
|
from pulkka.data_ingest import read_data
|
||||||
|
|
||||||
|
|
||||||
@@ -20,7 +21,7 @@ def main():
|
|||||||
"df": read_data(),
|
"df": read_data(),
|
||||||
}
|
}
|
||||||
for filename in glob.glob("template/*"):
|
for filename in glob.glob("template/*"):
|
||||||
out_filename = os.path.join("out", os.path.relpath(filename, "template"))
|
out_filename = OUT_DIR / os.path.relpath(filename, "template")
|
||||||
with open(filename, "r") as inf:
|
with open(filename, "r") as inf:
|
||||||
tpl: jinja2.Template = env.from_string(inf.read())
|
tpl: jinja2.Template = env.from_string(inf.read())
|
||||||
content = tpl.render(data)
|
content = tpl.render(data)
|
||||||
|
|||||||
Reference in New Issue
Block a user