Allow parametrizing paths

This commit is contained in:
Aarni Koskela
2022-08-31 15:12:46 +03:00
parent cdc6d9cc89
commit 538bc6083a
6 changed files with 19 additions and 8 deletions

5
pulkka/config.py Normal file
View File

@@ -0,0 +1,5 @@
import os
from pathlib import Path
DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
OUT_DIR = Path(os.environ.get("OUT_DIR", "out"))

View File

@@ -1,13 +1,14 @@
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data
def main():
df = read_data()
df.to_html("out/data.html", index=False)
df.to_csv("out/data.csv", index=False)
df.to_excel("out/data.xlsx", index=False)
df.to_html(OUT_DIR / "data.html", index=False)
df.to_csv(OUT_DIR / "data.csv", index=False)
df.to_excel(OUT_DIR / "data.xlsx", index=False)
df.to_json(
"out/data.json",
OUT_DIR / "data.json",
orient="records",
date_format="iso",
force_ascii=False,

View File

@@ -3,6 +3,8 @@ import re
import numpy as np
import pandas as pd
from pulkka.config import DATA_DIR
COLUMN_MAP = {
"Missä kaupungissa työpaikkasi pääasiallinen toimisto sijaitsee?": "Kaupunki",
"Työaika (jos työsuhteessa)": "Työaika",
@@ -61,7 +63,7 @@ def map_ika(d):
def read_data() -> pd.DataFrame:
df: pd.DataFrame = pd.read_excel(
"data/results.xlsx",
DATA_DIR / "results.xlsx",
skiprows=[1], # Google Sheets exports one empty row
)
df.rename(columns=COLUMN_MAP, inplace=True)

View File

@@ -9,6 +9,7 @@ from pulkka.chart_utils import (
set_yaxis_cash,
get_categorical_stats_plot,
)
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data
plot_funcs = set()
@@ -56,7 +57,7 @@ def plot_kaupunki_vuositulot(df: DataFrame):
def main():
df = read_data()
plots = [func(df) for func in sorted(plot_funcs, key=lambda f: f.__name__)]
bp.output_file("out/charts.html", title="Koodiklinikan Palkkakysely")
bp.output_file(OUT_DIR / "charts.html", title="Koodiklinikan Palkkakysely")
bp.save(bl.grid(plots, ncols=2, sizing_mode="stretch_both"))

View File

@@ -1,3 +1,4 @@
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data, force_tulot_numeric, force_age_numeric
from pandas_profiling import ProfileReport
@@ -7,7 +8,7 @@ def main():
df = force_tulot_numeric(df)
df = force_age_numeric(df)
profile = ProfileReport(df)
profile.to_file("out/profiling_report.html")
profile.to_file(OUT_DIR / "profiling_report.html")
if __name__ == "__main__":

View File

@@ -6,6 +6,7 @@ import jinja2
import numpy
import pandas
from pulkka.config import OUT_DIR
from pulkka.data_ingest import read_data
@@ -20,7 +21,7 @@ def main():
"df": read_data(),
}
for filename in glob.glob("template/*"):
out_filename = os.path.join("out", os.path.relpath(filename, "template"))
out_filename = OUT_DIR / os.path.relpath(filename, "template")
with open(filename, "r") as inf:
tpl: jinja2.Template = env.from_string(inf.read())
content = tpl.render(data)