mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-01-26 11:23:59 +00:00
Allow parametrizing paths
This commit is contained in:
5
pulkka/config.py
Normal file
5
pulkka/config.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
|
||||
OUT_DIR = Path(os.environ.get("OUT_DIR", "out"))
|
||||
@@ -1,13 +1,14 @@
|
||||
from pulkka.config import OUT_DIR
|
||||
from pulkka.data_ingest import read_data
|
||||
|
||||
|
||||
def main():
|
||||
df = read_data()
|
||||
df.to_html("out/data.html", index=False)
|
||||
df.to_csv("out/data.csv", index=False)
|
||||
df.to_excel("out/data.xlsx", index=False)
|
||||
df.to_html(OUT_DIR / "data.html", index=False)
|
||||
df.to_csv(OUT_DIR / "data.csv", index=False)
|
||||
df.to_excel(OUT_DIR / "data.xlsx", index=False)
|
||||
df.to_json(
|
||||
"out/data.json",
|
||||
OUT_DIR / "data.json",
|
||||
orient="records",
|
||||
date_format="iso",
|
||||
force_ascii=False,
|
||||
|
||||
@@ -3,6 +3,8 @@ import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pulkka.config import DATA_DIR
|
||||
|
||||
COLUMN_MAP = {
|
||||
"Missä kaupungissa työpaikkasi pääasiallinen toimisto sijaitsee?": "Kaupunki",
|
||||
"Työaika (jos työsuhteessa)": "Työaika",
|
||||
@@ -61,7 +63,7 @@ def map_ika(d):
|
||||
|
||||
def read_data() -> pd.DataFrame:
|
||||
df: pd.DataFrame = pd.read_excel(
|
||||
"data/results.xlsx",
|
||||
DATA_DIR / "results.xlsx",
|
||||
skiprows=[1], # Google Sheets exports one empty row
|
||||
)
|
||||
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||
|
||||
@@ -9,6 +9,7 @@ from pulkka.chart_utils import (
|
||||
set_yaxis_cash,
|
||||
get_categorical_stats_plot,
|
||||
)
|
||||
from pulkka.config import OUT_DIR
|
||||
from pulkka.data_ingest import read_data
|
||||
|
||||
plot_funcs = set()
|
||||
@@ -56,7 +57,7 @@ def plot_kaupunki_vuositulot(df: DataFrame):
|
||||
def main():
|
||||
df = read_data()
|
||||
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
|
||||
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
|
||||
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
|
||||
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from pulkka.config import OUT_DIR
|
||||
from pulkka.data_ingest import read_data, force_tulot_numeric, force_age_numeric
|
||||
from pandas_profiling import ProfileReport
|
||||
|
||||
@@ -7,7 +8,7 @@ def main():
|
||||
df = force_tulot_numeric(df)
|
||||
df = force_age_numeric(df)
|
||||
profile = ProfileReport(df)
|
||||
profile.to_file("out/profiling_report.html")
|
||||
profile.to_file(OUT_DIR / "profiling_report.html")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -6,6 +6,7 @@ import jinja2
|
||||
import numpy
|
||||
import pandas
|
||||
|
||||
from pulkka.config import OUT_DIR
|
||||
from pulkka.data_ingest import read_data
|
||||
|
||||
|
||||
@@ -20,7 +21,7 @@ def main():
|
||||
"df": read_data(),
|
||||
}
|
||||
for filename in glob.glob("template/*"):
|
||||
out_filename = os.path.join("out", os.path.relpath(filename, "template"))
|
||||
out_filename = OUT_DIR / os.path.relpath(filename, "template")
|
||||
with open(filename, "r") as inf:
|
||||
tpl: jinja2.Template = env.from_string(inf.read())
|
||||
content = tpl.render(data)
|
||||
|
||||
Reference in New Issue
Block a user