mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-01-26 03:14:03 +00:00
Add pandas-profiling report
This commit is contained in:
1
Makefile
1
Makefile
@@ -4,6 +4,7 @@ out: all-data
|
||||
python massage_templates.py
|
||||
python copy_massaged_data.py
|
||||
python generate_charts.py
|
||||
python generate_profiling.py
|
||||
cp data/results.xlsx out/raw.xlsx
|
||||
cp data/results.tsv out/raw.tsv
|
||||
|
||||
|
||||
12
generate_profiling.py
Normal file
12
generate_profiling.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from data_ingest import read_data
|
||||
from pandas_profiling import ProfileReport
|
||||
|
||||
|
||||
def main():
|
||||
df = read_data()
|
||||
profile = ProfileReport(df)
|
||||
profile.to_file("out/profiling_report.html")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -2,3 +2,4 @@ bokeh
|
||||
jinja2
|
||||
openpyxl
|
||||
pandas
|
||||
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling
|
||||
|
||||
@@ -4,43 +4,127 @@
|
||||
#
|
||||
# pip-compile requirements.in
|
||||
#
|
||||
attrs==20.3.0
|
||||
# via
|
||||
# pandas-profiling
|
||||
# visions
|
||||
bokeh==2.2.3
|
||||
# via -r requirements.in
|
||||
certifi==2020.12.5
|
||||
# via requests
|
||||
chardet==4.0.0
|
||||
# via requests
|
||||
confuse==1.4.0
|
||||
# via pandas-profiling
|
||||
cycler==0.10.0
|
||||
# via matplotlib
|
||||
decorator==4.4.2
|
||||
# via networkx
|
||||
et-xmlfile==1.0.1
|
||||
# via openpyxl
|
||||
htmlmin==0.1.12
|
||||
# via pandas-profiling
|
||||
idna==2.10
|
||||
# via requests
|
||||
imagehash==4.2.0
|
||||
# via visions
|
||||
jdcal==1.4.1
|
||||
# via openpyxl
|
||||
jinja2==2.11.3
|
||||
# via
|
||||
# -r requirements.in
|
||||
# bokeh
|
||||
# pandas-profiling
|
||||
joblib==1.0.1
|
||||
# via pandas-profiling
|
||||
kiwisolver==1.3.1
|
||||
# via matplotlib
|
||||
markupsafe==1.1.1
|
||||
# via jinja2
|
||||
matplotlib==3.3.4
|
||||
# via
|
||||
# missingno
|
||||
# pandas-profiling
|
||||
# seaborn
|
||||
missingno==0.4.2
|
||||
# via pandas-profiling
|
||||
networkx==2.5
|
||||
# via visions
|
||||
numpy==1.20.1
|
||||
# via
|
||||
# bokeh
|
||||
# imagehash
|
||||
# matplotlib
|
||||
# missingno
|
||||
# pandas
|
||||
# pandas-profiling
|
||||
# pywavelets
|
||||
# scipy
|
||||
# seaborn
|
||||
# visions
|
||||
openpyxl==3.0.6
|
||||
# via -r requirements.in
|
||||
packaging==20.9
|
||||
# via bokeh
|
||||
pandas==1.2.2
|
||||
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling
|
||||
# via -r requirements.in
|
||||
pandas==1.2.2
|
||||
# via
|
||||
# -r requirements.in
|
||||
# pandas-profiling
|
||||
# seaborn
|
||||
# visions
|
||||
pillow==8.1.0
|
||||
# via bokeh
|
||||
# via
|
||||
# bokeh
|
||||
# imagehash
|
||||
# matplotlib
|
||||
# visions
|
||||
pyparsing==2.4.7
|
||||
# via packaging
|
||||
# via
|
||||
# matplotlib
|
||||
# packaging
|
||||
python-dateutil==2.8.1
|
||||
# via
|
||||
# bokeh
|
||||
# matplotlib
|
||||
# pandas
|
||||
pytz==2021.1
|
||||
# via pandas
|
||||
pywavelets==1.1.1
|
||||
# via imagehash
|
||||
pyyaml==5.4.1
|
||||
# via bokeh
|
||||
# via
|
||||
# bokeh
|
||||
# confuse
|
||||
requests==2.25.1
|
||||
# via pandas-profiling
|
||||
scipy==1.6.1
|
||||
# via
|
||||
# imagehash
|
||||
# missingno
|
||||
# pandas-profiling
|
||||
# seaborn
|
||||
seaborn==0.11.1
|
||||
# via
|
||||
# missingno
|
||||
# pandas-profiling
|
||||
six==1.15.0
|
||||
# via python-dateutil
|
||||
# via
|
||||
# cycler
|
||||
# imagehash
|
||||
# python-dateutil
|
||||
tangled-up-in-unicode==0.0.6
|
||||
# via
|
||||
# pandas-profiling
|
||||
# visions
|
||||
tornado==6.1
|
||||
# via bokeh
|
||||
tqdm==4.57.0
|
||||
# via pandas-profiling
|
||||
typing-extensions==3.7.4.3
|
||||
# via bokeh
|
||||
urllib3==1.26.3
|
||||
# via requests
|
||||
visions[type_image_path]==0.6.0
|
||||
# via pandas-profiling
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
<li><a href="data.html">Lähdedata (HTML)</a></li>
|
||||
<li><a href="data.json">Lähdedata (JSON)</a></li>
|
||||
<li><a href="data.xlsx">Lähdedata (XLSX)</a></li>
|
||||
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
|
||||
<li><a href="raw.tsv">Raakadata (TSV)</a></li>
|
||||
<li><a href="raw.xlsx">Raakadata (XLSX)</a></li>
|
||||
</ul>
|
||||
|
||||
Reference in New Issue
Block a user