mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-02-21 08:55:22 +00:00
29
.github/workflows/build.yml
vendored
29
.github/workflows/build.yml
vendored
@@ -14,26 +14,27 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v4
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
cache: pip
|
cache: pip
|
||||||
- uses: pre-commit/action@v3.0.0
|
- uses: pre-commit/action@v3.0.1
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.11
|
- uses: actions/setup-python@v5
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
cache: pip
|
- uses: astral-sh/setup-uv@v3
|
||||||
- name: Install dependencies
|
with:
|
||||||
run: python -m pip install -r requirements.txt
|
version: "0.4.x"
|
||||||
- name: Build
|
enable-cache: true
|
||||||
run: make -j3
|
cache-dependency-glob: "**/requirements*.txt"
|
||||||
- uses: actions/setup-node@v3
|
- run: uv pip install --system -r requirements.txt
|
||||||
|
- run: make -j3
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: "20"
|
node-version: "20"
|
||||||
cache: yarn
|
cache: yarn
|
||||||
|
|||||||
@@ -3,16 +3,13 @@ ci:
|
|||||||
autofix_prs: false
|
autofix_prs: false
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.0.291
|
rev: v0.7.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args:
|
args:
|
||||||
- --fix
|
- --fix
|
||||||
- repo: https://github.com/psf/black
|
- id: ruff-format
|
||||||
rev: 23.9.1
|
|
||||||
hooks:
|
|
||||||
- id: black
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||||
rev: v3.0.3
|
rev: v3.1.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: prettier
|
- id: prettier
|
||||||
|
|||||||
6
Makefile
6
Makefile
@@ -1,8 +1,8 @@
|
|||||||
YEAR := 2023
|
YEAR := 2024
|
||||||
DATA_DIR := data/${YEAR}
|
DATA_DIR := data/${YEAR}
|
||||||
OUT_DIR := out/${YEAR}
|
OUT_DIR := out/${YEAR}
|
||||||
DOCUMENT_ID_FI := 1sycmd6DGqHj9-0k6D8HclzlRghxqoVaBZNSZye1Jdbg
|
DOCUMENT_ID_FI := 1dvyVEJkn3_osBeKGIlhKmid671jjH7zYgcyH1BjiGF8
|
||||||
DOCUMENT_ID_EN := 1pmrQWsja3wRVF02PyEGO2F_CgttobTbxGUGjQ5K4H4Y
|
DOCUMENT_ID_EN := 1o1uakk1pkoUCtx2OGJhLclxt_uraYA-uK3DH8yCYHN4
|
||||||
XLSX_URL_FI := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_FI)/export?format=xlsx
|
XLSX_URL_FI := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_FI)/export?format=xlsx
|
||||||
TSV_URL_FI := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_FI)/export?format=tsv
|
TSV_URL_FI := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_FI)/export?format=tsv
|
||||||
XLSX_URL_EN := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_EN)/export?format=xlsx
|
XLSX_URL_EN := https://docs.google.com/spreadsheets/d/$(DOCUMENT_ID_EN)/export?format=xlsx
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
<li><a href="2021/">2021</a></li>
|
<li><a href="2021/">2021</a></li>
|
||||||
<li><a href="2022/">2022</a></li>
|
<li><a href="2022/">2022</a></li>
|
||||||
<li><a href="2023/">2023</a></li>
|
<li><a href="2023/">2023</a></li>
|
||||||
|
<li><a href="2024/">2024</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from bokeh import models as bm, plotting as bp
|
from bokeh import models as bm
|
||||||
|
from bokeh import plotting as bp
|
||||||
from bokeh.transform import factor_cmap
|
from bokeh.transform import factor_cmap
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ PALVELUT_COL = "Palvelut"
|
|||||||
ROOLI_COL = "Rooli"
|
ROOLI_COL = "Rooli"
|
||||||
ROOLI_NORM_COL = "Rooli (normalisoitu)"
|
ROOLI_NORM_COL = "Rooli (normalisoitu)"
|
||||||
SIIRTYNYT_COL = (
|
SIIRTYNYT_COL = (
|
||||||
"Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2022 jälkeen?"
|
"Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2023 jälkeen?"
|
||||||
)
|
)
|
||||||
SUKUPUOLI_COL = "Sukupuoli"
|
SUKUPUOLI_COL = "Sukupuoli"
|
||||||
TUNTILASKUTUS_ALV0_COL = "Tuntilaskutus (ALV 0%, euroina)"
|
TUNTILASKUTUS_ALV0_COL = "Tuntilaskutus (ALV 0%, euroina)"
|
||||||
@@ -30,10 +30,10 @@ VUOSILASKUTUS_ALV0_COL = "Vuosilaskutus (ALV 0%, euroina)"
|
|||||||
VUOSITULOT_COL = "Vuositulot"
|
VUOSITULOT_COL = "Vuositulot"
|
||||||
ID_COL = "Vastaustunniste"
|
ID_COL = "Vastaustunniste"
|
||||||
|
|
||||||
COLUMN_MAP_2023 = {
|
COLUMN_MAP_2024 = {
|
||||||
"Timestamp": "Timestamp",
|
"Timestamp": "Timestamp",
|
||||||
"Oletko palkansaaja vai laskuttaja?": PALKANSAAJA_VAI_LASKUTTAJA_COL,
|
"Oletko palkansaaja vai laskuttaja?": PALKANSAAJA_VAI_LASKUTTAJA_COL,
|
||||||
"Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2022 jälkeen?": SIIRTYNYT_COL,
|
"Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2023 jälkeen?": SIIRTYNYT_COL,
|
||||||
"Ikä": "Ikä",
|
"Ikä": "Ikä",
|
||||||
"Sukupuoli": "Sukupuoli",
|
"Sukupuoli": "Sukupuoli",
|
||||||
"Työkokemus alalta (vuosina)": TYOKOKEMUS_COL,
|
"Työkokemus alalta (vuosina)": TYOKOKEMUS_COL,
|
||||||
@@ -59,10 +59,10 @@ COLUMN_MAP_2023 = {
|
|||||||
"Palautetta kyselystä ja ideoita ensi vuoden kyselyyn": PALAUTE_COL,
|
"Palautetta kyselystä ja ideoita ensi vuoden kyselyyn": PALAUTE_COL,
|
||||||
}
|
}
|
||||||
|
|
||||||
COLUMN_MAP_2023_EN_TO_FI = {
|
COLUMN_MAP_2024_EN_TO_FI = {
|
||||||
"Timestamp": "Timestamp",
|
"Timestamp": "Timestamp",
|
||||||
"Employee or entrepreneur": "Oletko palkansaaja vai laskuttaja?",
|
"Employee or entrepreneur": "Oletko palkansaaja vai laskuttaja?",
|
||||||
"Have you switched from employment to entrepreneurship or vice versa after 1.10.2022?": "Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2022 jälkeen?",
|
"Have you switched from employment to entrepreneurship or vice versa after 1.10.2023?": "Oletko siirtynyt palkansaajasta laskuttajaksi tai päinvastoin 1.10.2023 jälkeen?",
|
||||||
"Age": "Ikä",
|
"Age": "Ikä",
|
||||||
"Gender": "Sukupuoli",
|
"Gender": "Sukupuoli",
|
||||||
"Relevant work experience from the industry (in years)": "Työkokemus alalta (vuosina)",
|
"Relevant work experience from the industry (in years)": "Työkokemus alalta (vuosina)",
|
||||||
@@ -89,9 +89,9 @@ COLUMN_MAP_2023_EN_TO_FI = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# ensure all columns have translations
|
# ensure all columns have translations
|
||||||
assert set(COLUMN_MAP_2023.keys()) == set(COLUMN_MAP_2023_EN_TO_FI.values())
|
assert set(COLUMN_MAP_2024.keys()) == set(COLUMN_MAP_2024_EN_TO_FI.values())
|
||||||
|
|
||||||
VALUE_MAP_2023_EN_TO_FI = {
|
VALUE_MAP_2024_EN_TO_FI = {
|
||||||
PALKANSAAJA_VAI_LASKUTTAJA_COL: {
|
PALKANSAAJA_VAI_LASKUTTAJA_COL: {
|
||||||
"Employee": "Palkansaaja",
|
"Employee": "Palkansaaja",
|
||||||
"Entrepreneur": "Laskuttaja",
|
"Entrepreneur": "Laskuttaja",
|
||||||
@@ -112,8 +112,16 @@ VALUE_MAP_2023_EN_TO_FI = {
|
|||||||
"Finland": "Suomesta",
|
"Finland": "Suomesta",
|
||||||
},
|
},
|
||||||
KAUPUNKI_COL: {
|
KAUPUNKI_COL: {
|
||||||
"PK-Seutu (Helsinki, Espoo, Vantaa)": "PK-seutu",
|
"Asun Porissa, toimisto Helsingissä, sijainnilla ei vaikutusta palkkaan": "Pori",
|
||||||
"Capital region (Helsinki, Espoo, Vantaa)": "PK-seutu",
|
"Capital region (Helsinki, Espoo, Vantaa)": "PK-seutu",
|
||||||
|
"Firmalla ei ole toimistoa": "Etätyöfirma",
|
||||||
|
"Hajautettu": "Etätyöfirma",
|
||||||
|
"New York City": "New York",
|
||||||
|
"New York, NY, USA": "New York",
|
||||||
|
"PK-Seutu (Helsinki, Espoo, Vantaa)": "PK-seutu",
|
||||||
|
"Tampere (etänä Berliiniin)": "Tampere",
|
||||||
|
"Turku/remote (HQ Austin, TX)": "Turku",
|
||||||
|
"Ulkomailla": "Ulkomaat",
|
||||||
},
|
},
|
||||||
MILLAISESSA_COL: {
|
MILLAISESSA_COL: {
|
||||||
"Product company with softaware as their core business": "Tuotetalossa, jonka core-bisnes on softa",
|
"Product company with softaware as their core business": "Tuotetalossa, jonka core-bisnes on softa",
|
||||||
@@ -166,23 +174,44 @@ ROLE_MAP = {
|
|||||||
"Full-stack cloud developer": FULL_STACK_ROLE,
|
"Full-stack cloud developer": FULL_STACK_ROLE,
|
||||||
"Fullstack developer, web apps": FULL_STACK_ROLE,
|
"Fullstack developer, web apps": FULL_STACK_ROLE,
|
||||||
}
|
}
|
||||||
|
|
||||||
NO_GENDER_VALUES = {
|
NO_GENDER_VALUES = {
|
||||||
"-",
|
"-",
|
||||||
"ei liity asiaan",
|
"on",
|
||||||
"epärelevantti",
|
|
||||||
"jänis",
|
|
||||||
"kyllä, kiitos",
|
|
||||||
"leppäkerttu",
|
|
||||||
"tihkutympönen",
|
|
||||||
"yes",
|
"yes",
|
||||||
}
|
}
|
||||||
|
|
||||||
OTHER_GENDER_VALUES = {
|
OTHER_GENDER_VALUES = {
|
||||||
"muu",
|
"muu",
|
||||||
"muu/ei",
|
"muunsukupuolinen",
|
||||||
"non-binary, afab",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FEMALE_GENDER_VALUES = (
|
||||||
|
"f",
|
||||||
|
"n",
|
||||||
|
"women",
|
||||||
|
)
|
||||||
|
|
||||||
|
MALE_GENDER_VALUES = (
|
||||||
|
"he / him / male",
|
||||||
|
"ihminen. kikkelillä.",
|
||||||
|
"m i ä s",
|
||||||
|
"m",
|
||||||
|
"mail", # probably a typo
|
||||||
|
"male presenting",
|
||||||
|
"male",
|
||||||
|
"man",
|
||||||
|
"meis",
|
||||||
|
"mie", # probably mies?
|
||||||
|
"miekkonen",
|
||||||
|
"mies",
|
||||||
|
"miesoletettu",
|
||||||
|
"miäs",
|
||||||
|
"ukko",
|
||||||
|
"äiä",
|
||||||
|
)
|
||||||
|
|
||||||
IDS_TO_DROP = {
|
IDS_TO_DROP = {
|
||||||
"6cab61607da9c2b6", # hupsu taisteluhelikopteri
|
"0bf579f8b0a771b9", # 2 euron palkka, rooli "2"
|
||||||
"aefdb9e69b1621d5", # See "SUBMITTED TWICE, SORRY!!" in English data
|
"9a3b73d810f6e983", # apache hyökkäyshelikopteri
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,35 +8,37 @@ import numpy as np
|
|||||||
import pandas
|
import pandas
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from pulkka.config import DATA_DIR, YEAR
|
|
||||||
from pulkka.column_maps import (
|
from pulkka.column_maps import (
|
||||||
COLUMN_MAP_2023_EN_TO_FI,
|
|
||||||
KIKY_COL,
|
|
||||||
KKPALKKA_COL,
|
|
||||||
PALVELUT_COL,
|
|
||||||
TYOAIKA_COL,
|
|
||||||
VUOSITULOT_COL,
|
|
||||||
TYOPAIKKA_COL,
|
|
||||||
ROOLI_COL,
|
|
||||||
KIKY_OTHER_COL,
|
|
||||||
BOOLEAN_TEXT_TO_BOOLEAN_MAP,
|
BOOLEAN_TEXT_TO_BOOLEAN_MAP,
|
||||||
|
COLUMN_MAP_2024,
|
||||||
|
COLUMN_MAP_2024_EN_TO_FI,
|
||||||
COMPANY_MAP,
|
COMPANY_MAP,
|
||||||
SUKUPUOLI_COL,
|
FEMALE_GENDER_VALUES,
|
||||||
ROLE_MAP,
|
|
||||||
COLUMN_MAP_2023,
|
|
||||||
VALUE_MAP_2023_EN_TO_FI,
|
|
||||||
LAHITYO_COL,
|
|
||||||
IKA_COL,
|
|
||||||
LANG_COL,
|
|
||||||
KK_TULOT_COL,
|
|
||||||
KK_TULOT_NORM_COL,
|
|
||||||
NO_GENDER_VALUES,
|
|
||||||
OTHER_GENDER_VALUES,
|
|
||||||
TYOKOKEMUS_COL,
|
|
||||||
ROOLI_NORM_COL,
|
|
||||||
ID_COL,
|
ID_COL,
|
||||||
IDS_TO_DROP,
|
IDS_TO_DROP,
|
||||||
|
IKA_COL,
|
||||||
|
KIKY_COL,
|
||||||
|
KIKY_OTHER_COL,
|
||||||
|
KK_TULOT_COL,
|
||||||
|
KK_TULOT_NORM_COL,
|
||||||
|
KKPALKKA_COL,
|
||||||
|
LAHITYO_COL,
|
||||||
|
LANG_COL,
|
||||||
|
MALE_GENDER_VALUES,
|
||||||
|
NO_GENDER_VALUES,
|
||||||
|
OTHER_GENDER_VALUES,
|
||||||
|
PALVELUT_COL,
|
||||||
|
ROLE_MAP,
|
||||||
|
ROOLI_COL,
|
||||||
|
ROOLI_NORM_COL,
|
||||||
|
SUKUPUOLI_COL,
|
||||||
|
TYOAIKA_COL,
|
||||||
|
TYOKOKEMUS_COL,
|
||||||
|
TYOPAIKKA_COL,
|
||||||
|
VALUE_MAP_2024_EN_TO_FI,
|
||||||
|
VUOSITULOT_COL,
|
||||||
)
|
)
|
||||||
|
from pulkka.config import DATA_DIR, YEAR
|
||||||
|
|
||||||
|
|
||||||
def map_sukupuoli(r: pd.Series) -> str | None:
|
def map_sukupuoli(r: pd.Series) -> str | None:
|
||||||
@@ -49,19 +51,11 @@ def map_sukupuoli(r: pd.Series) -> str | None:
|
|||||||
"nainen" in value
|
"nainen" in value
|
||||||
or "female" in value
|
or "female" in value
|
||||||
or "woman" in value
|
or "woman" in value
|
||||||
or value == "f"
|
or value in FEMALE_GENDER_VALUES
|
||||||
or value == "women"
|
|
||||||
):
|
):
|
||||||
return "nainen"
|
return "nainen"
|
||||||
|
|
||||||
if (
|
if value.strip() in MALE_GENDER_VALUES:
|
||||||
"mies" in value
|
|
||||||
or "uros" in value
|
|
||||||
or "miäs" in value
|
|
||||||
or "äiä" in value
|
|
||||||
or "male" in value
|
|
||||||
or value in ("m", "man", "m i ä s", "ukko")
|
|
||||||
):
|
|
||||||
return "mies"
|
return "mies"
|
||||||
|
|
||||||
if value in NO_GENDER_VALUES:
|
if value in NO_GENDER_VALUES:
|
||||||
@@ -70,7 +64,7 @@ def map_sukupuoli(r: pd.Series) -> str | None:
|
|||||||
if value in OTHER_GENDER_VALUES:
|
if value in OTHER_GENDER_VALUES:
|
||||||
return "muu"
|
return "muu"
|
||||||
|
|
||||||
raise NotImplementedError(f"Unknown sukupuoli: {value} (row ID {r[ID_COL]})")
|
raise NotImplementedError(f"Unknown sukupuoli: {value!r} (row ID {r[ID_COL]})")
|
||||||
|
|
||||||
|
|
||||||
def map_vuositulot(r):
|
def map_vuositulot(r):
|
||||||
@@ -110,7 +104,7 @@ def read_initial_dfs() -> pd.DataFrame:
|
|||||||
skiprows=[1], # Google Sheets exports one empty row
|
skiprows=[1], # Google Sheets exports one empty row
|
||||||
)
|
)
|
||||||
df_en[LANG_COL] = "en"
|
df_en[LANG_COL] = "en"
|
||||||
df_en = df_en.rename(columns=COLUMN_MAP_2023_EN_TO_FI)
|
df_en = df_en.rename(columns=COLUMN_MAP_2024_EN_TO_FI)
|
||||||
df = pd.concat([df_fi, df_en], ignore_index=True)
|
df = pd.concat([df_fi, df_en], ignore_index=True)
|
||||||
df = df[df["Timestamp"].notna()] # Remove rows with no timestamp
|
df = df[df["Timestamp"].notna()] # Remove rows with no timestamp
|
||||||
df[LANG_COL] = df[LANG_COL].astype("category")
|
df[LANG_COL] = df[LANG_COL].astype("category")
|
||||||
@@ -130,23 +124,24 @@ def map_case_insensitive(series: pd.Series, mapping: dict[str, str]) -> pd.Serie
|
|||||||
def map_value(v):
|
def map_value(v):
|
||||||
if v is np.nan:
|
if v is np.nan:
|
||||||
return ""
|
return ""
|
||||||
assert isinstance(v, str)
|
if not isinstance(v, str):
|
||||||
|
raise TypeError(f"Unexpected value {v!r} of type {type(v)}")
|
||||||
return lower_mapping.get(v.lower().strip(), v)
|
return lower_mapping.get(v.lower().strip(), v)
|
||||||
|
|
||||||
return series.apply(map_value).fillna(series)
|
return series.apply(map_value).fillna(series)
|
||||||
|
|
||||||
|
|
||||||
def read_data() -> pd.DataFrame:
|
def read_data() -> pd.DataFrame:
|
||||||
if YEAR != "2023":
|
if YEAR != "2024":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"This code only works for 2023. "
|
"This code only works for 2024. "
|
||||||
"Please use an older revision for older data.",
|
"Please use an older revision for older data.",
|
||||||
)
|
)
|
||||||
df = read_initial_dfs()
|
df = read_initial_dfs()
|
||||||
|
|
||||||
df = df.rename(columns=COLUMN_MAP_2023)
|
df = df.rename(columns=COLUMN_MAP_2024)
|
||||||
|
|
||||||
for col, val_map in VALUE_MAP_2023_EN_TO_FI.items():
|
for col, val_map in VALUE_MAP_2024_EN_TO_FI.items():
|
||||||
df[col] = df[col].map(val_map).fillna(df[col]).astype("category")
|
df[col] = df[col].map(val_map).fillna(df[col]).astype("category")
|
||||||
|
|
||||||
# Drop known bogus data
|
# Drop known bogus data
|
||||||
@@ -192,14 +187,7 @@ def read_data() -> pd.DataFrame:
|
|||||||
df = apply_fixups(
|
df = apply_fixups(
|
||||||
df,
|
df,
|
||||||
[
|
[
|
||||||
(
|
# ({ID_COL: "..."}, {VUOSITULOT_COL: 62000}),
|
||||||
{ID_COL: "a01216a11026d749", VUOSITULOT_COL: 620000},
|
|
||||||
{VUOSITULOT_COL: 62000},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{ID_COL: "79a200f529f6919b", VUOSITULOT_COL: 1500},
|
|
||||||
{VUOSITULOT_COL: 150_000},
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
# Fill in Vuositulot as 12.5 * Kk-tulot if empty
|
# Fill in Vuositulot as 12.5 * Kk-tulot if empty
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def get_categorical_stats(
|
|||||||
if na_as_category:
|
if na_as_category:
|
||||||
rename_na(df, category_col, na_as_category)
|
rename_na(df, category_col, na_as_category)
|
||||||
# ... then carry on.
|
# ... then carry on.
|
||||||
group = df[[category_col, value_col]].groupby(category_col)
|
group = df[[category_col, value_col]].groupby(category_col, observed=False)
|
||||||
return group[value_col].agg(
|
return group[value_col].agg(
|
||||||
["mean", "min", "max", "median", "count", q25, q50, q75, q90],
|
["mean", "min", "max", "median", "count", q25, q50, q75, q90],
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,20 +1,20 @@
|
|||||||
import bokeh.plotting as bp
|
|
||||||
import bokeh.models as bm
|
|
||||||
import bokeh.layouts as bl
|
import bokeh.layouts as bl
|
||||||
|
import bokeh.models as bm
|
||||||
|
import bokeh.plotting as bp
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
from pulkka.chart_utils import (
|
from pulkka.chart_utils import (
|
||||||
gender_colormap,
|
gender_colormap,
|
||||||
|
get_categorical_stats_plot,
|
||||||
get_df_hover_tool,
|
get_df_hover_tool,
|
||||||
set_yaxis_cash,
|
set_yaxis_cash,
|
||||||
get_categorical_stats_plot,
|
|
||||||
)
|
)
|
||||||
from pulkka.column_maps import (
|
from pulkka.column_maps import (
|
||||||
|
IKA_COL,
|
||||||
|
KAUPUNKI_COL,
|
||||||
|
SUKUPUOLI_COL,
|
||||||
TYOKOKEMUS_COL,
|
TYOKOKEMUS_COL,
|
||||||
VUOSITULOT_COL,
|
VUOSITULOT_COL,
|
||||||
KAUPUNKI_COL,
|
|
||||||
IKA_COL,
|
|
||||||
SUKUPUOLI_COL,
|
|
||||||
)
|
)
|
||||||
from pulkka.config import OUT_DIR
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data
|
from pulkka.data_ingest import read_data
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from ydata_profiling import ProfileReport
|
||||||
|
|
||||||
from pulkka.column_maps import KKPALKKA_COL, VUOSITULOT_COL
|
from pulkka.column_maps import KKPALKKA_COL, VUOSITULOT_COL
|
||||||
from pulkka.config import OUT_DIR
|
from pulkka.config import OUT_DIR
|
||||||
from pulkka.data_ingest import read_data, force_age_numeric
|
from pulkka.data_ingest import force_age_numeric, read_data
|
||||||
from ydata_profiling import ProfileReport
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ def main():
|
|||||||
)
|
)
|
||||||
env.globals.update(
|
env.globals.update(
|
||||||
{
|
{
|
||||||
"date": datetime.datetime.utcnow(),
|
"date": datetime.datetime.now(datetime.UTC),
|
||||||
"cm": column_maps,
|
"cm": column_maps,
|
||||||
"pd": pandas,
|
"pd": pandas,
|
||||||
"np": numpy,
|
"np": numpy,
|
||||||
|
|||||||
@@ -3,4 +3,5 @@ jinja2
|
|||||||
matplotlib<3.8.0 # ydata-profiling compat
|
matplotlib<3.8.0 # ydata-profiling compat
|
||||||
openpyxl
|
openpyxl
|
||||||
pandas
|
pandas
|
||||||
|
setuptools # implicitly required by ydata-profiling
|
||||||
ydata-profiling
|
ydata-profiling
|
||||||
|
|||||||
103
requirements.txt
103
requirements.txt
@@ -1,65 +1,68 @@
|
|||||||
#
|
# This file was autogenerated by uv via the following command:
|
||||||
# This file is autogenerated by pip-compile with Python 3.11
|
# uv pip compile requirements.in -o requirements.txt
|
||||||
# by the following command:
|
annotated-types==0.7.0
|
||||||
#
|
# via pydantic
|
||||||
# pip-compile requirements.in
|
attrs==24.2.0
|
||||||
#
|
|
||||||
attrs==23.1.0
|
|
||||||
# via visions
|
# via visions
|
||||||
bokeh==2.4.3
|
bokeh==2.4.3
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
certifi==2023.7.22
|
certifi==2024.8.30
|
||||||
# via requests
|
# via requests
|
||||||
charset-normalizer==3.2.0
|
charset-normalizer==3.4.0
|
||||||
# via requests
|
# via requests
|
||||||
contourpy==1.1.1
|
contourpy==1.3.0
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
cycler==0.11.0
|
cycler==0.12.1
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
dacite==1.8.1
|
dacite==1.8.1
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
fonttools==4.42.1
|
fonttools==4.54.1
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
htmlmin==0.1.12
|
htmlmin==0.1.12
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
idna==3.4
|
idna==3.10
|
||||||
# via requests
|
# via requests
|
||||||
imagehash==4.3.1
|
imagehash==4.3.1
|
||||||
# via
|
# via
|
||||||
# visions
|
# visions
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
jinja2==3.1.2
|
jinja2==3.1.4
|
||||||
# via
|
# via
|
||||||
# -r requirements.in
|
# -r requirements.in
|
||||||
# bokeh
|
# bokeh
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
joblib==1.3.2
|
joblib==1.4.2
|
||||||
# via phik
|
# via phik
|
||||||
kiwisolver==1.4.5
|
kiwisolver==1.4.7
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
markupsafe==2.1.3
|
llvmlite==0.43.0
|
||||||
|
# via numba
|
||||||
|
markupsafe==3.0.1
|
||||||
# via jinja2
|
# via jinja2
|
||||||
matplotlib==3.7.3
|
matplotlib==3.7.5
|
||||||
# via
|
# via
|
||||||
# -r requirements.in
|
# -r requirements.in
|
||||||
# phik
|
# phik
|
||||||
# seaborn
|
# seaborn
|
||||||
# wordcloud
|
# wordcloud
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
multimethod==1.10
|
multimethod==1.12
|
||||||
# via
|
# via
|
||||||
# visions
|
# visions
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
networkx==3.1
|
networkx==3.4.1
|
||||||
# via visions
|
# via visions
|
||||||
numpy==1.23.5
|
numba==0.60.0
|
||||||
|
# via ydata-profiling
|
||||||
|
numpy==1.26.4
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
# contourpy
|
# contourpy
|
||||||
# imagehash
|
# imagehash
|
||||||
# matplotlib
|
# matplotlib
|
||||||
|
# numba
|
||||||
# pandas
|
# pandas
|
||||||
# patsy
|
# patsy
|
||||||
# phik
|
# phik
|
||||||
@@ -70,14 +73,14 @@ numpy==1.23.5
|
|||||||
# visions
|
# visions
|
||||||
# wordcloud
|
# wordcloud
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
openpyxl==3.1.2
|
openpyxl==3.1.5
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
packaging==23.1
|
packaging==24.1
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# statsmodels
|
# statsmodels
|
||||||
pandas==2.0.3
|
pandas==2.2.3
|
||||||
# via
|
# via
|
||||||
# -r requirements.in
|
# -r requirements.in
|
||||||
# phik
|
# phik
|
||||||
@@ -85,68 +88,72 @@ pandas==2.0.3
|
|||||||
# statsmodels
|
# statsmodels
|
||||||
# visions
|
# visions
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
patsy==0.5.3
|
patsy==0.5.6
|
||||||
# via statsmodels
|
# via statsmodels
|
||||||
phik==0.12.3
|
phik==0.12.4
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
pillow==10.0.1
|
pillow==11.0.0
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
# imagehash
|
# imagehash
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# visions
|
# visions
|
||||||
# wordcloud
|
# wordcloud
|
||||||
pydantic==1.10.12
|
pydantic==2.9.2
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
pyparsing==3.1.1
|
pydantic-core==2.23.4
|
||||||
|
# via pydantic
|
||||||
|
pyparsing==3.2.0
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.9.0.post0
|
||||||
# via
|
# via
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# pandas
|
# pandas
|
||||||
pytz==2023.3.post1
|
pytz==2024.2
|
||||||
# via pandas
|
# via pandas
|
||||||
pywavelets==1.4.1
|
pywavelets==1.7.0
|
||||||
# via imagehash
|
# via imagehash
|
||||||
pyyaml==6.0.1
|
pyyaml==6.0.2
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
requests==2.31.0
|
requests==2.32.3
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
scipy==1.11.2
|
scipy==1.13.1
|
||||||
# via
|
# via
|
||||||
# imagehash
|
# imagehash
|
||||||
# phik
|
# phik
|
||||||
# statsmodels
|
# statsmodels
|
||||||
# ydata-profiling
|
# ydata-profiling
|
||||||
seaborn==0.12.2
|
seaborn==0.13.2
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
|
setuptools==75.2.0
|
||||||
|
# via -r requirements.in
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
# via
|
# via
|
||||||
# patsy
|
# patsy
|
||||||
# python-dateutil
|
# python-dateutil
|
||||||
statsmodels==0.14.0
|
statsmodels==0.14.4
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
tangled-up-in-unicode==0.2.0
|
tornado==6.4.1
|
||||||
# via visions
|
|
||||||
tornado==6.3.3
|
|
||||||
# via bokeh
|
# via bokeh
|
||||||
tqdm==4.66.1
|
tqdm==4.66.5
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
typeguard==2.13.3
|
typeguard==4.3.0
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
typing-extensions==4.8.0
|
typing-extensions==4.12.2
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
# pydantic
|
# pydantic
|
||||||
tzdata==2023.3
|
# pydantic-core
|
||||||
|
# typeguard
|
||||||
|
tzdata==2024.2
|
||||||
# via pandas
|
# via pandas
|
||||||
urllib3==2.0.5
|
urllib3==2.2.3
|
||||||
# via requests
|
# via requests
|
||||||
visions[type_image_path]==0.7.5
|
visions==0.7.6
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
wordcloud==1.9.2
|
wordcloud==1.9.3
|
||||||
# via ydata-profiling
|
# via ydata-profiling
|
||||||
ydata-profiling==4.5.1
|
ydata-profiling==4.11.0
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
|
|||||||
Reference in New Issue
Block a user