mirror of
https://github.com/koodiklinikka/palkkakysely.git
synced 2026-03-06 20:01:15 +00:00
Add pandas-profiling report
This commit is contained in:
1
Makefile
1
Makefile
@@ -4,6 +4,7 @@ out: all-data
|
|||||||
python massage_templates.py
|
python massage_templates.py
|
||||||
python copy_massaged_data.py
|
python copy_massaged_data.py
|
||||||
python generate_charts.py
|
python generate_charts.py
|
||||||
|
python generate_profiling.py
|
||||||
cp data/results.xlsx out/raw.xlsx
|
cp data/results.xlsx out/raw.xlsx
|
||||||
cp data/results.tsv out/raw.tsv
|
cp data/results.tsv out/raw.tsv
|
||||||
|
|
||||||
|
|||||||
12
generate_profiling.py
Normal file
12
generate_profiling.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from data_ingest import read_data
|
||||||
|
from pandas_profiling import ProfileReport
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
df = read_data()
|
||||||
|
profile = ProfileReport(df)
|
||||||
|
profile.to_file("out/profiling_report.html")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -2,3 +2,4 @@ bokeh
|
|||||||
jinja2
|
jinja2
|
||||||
openpyxl
|
openpyxl
|
||||||
pandas
|
pandas
|
||||||
|
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling
|
||||||
|
|||||||
@@ -4,43 +4,127 @@
|
|||||||
#
|
#
|
||||||
# pip-compile requirements.in
|
# pip-compile requirements.in
|
||||||
#
|
#
|
||||||
|
attrs==20.3.0
|
||||||
|
# via
|
||||||
|
# pandas-profiling
|
||||||
|
# visions
|
||||||
bokeh==2.2.3
|
bokeh==2.2.3
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
|
certifi==2020.12.5
|
||||||
|
# via requests
|
||||||
|
chardet==4.0.0
|
||||||
|
# via requests
|
||||||
|
confuse==1.4.0
|
||||||
|
# via pandas-profiling
|
||||||
|
cycler==0.10.0
|
||||||
|
# via matplotlib
|
||||||
|
decorator==4.4.2
|
||||||
|
# via networkx
|
||||||
et-xmlfile==1.0.1
|
et-xmlfile==1.0.1
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
|
htmlmin==0.1.12
|
||||||
|
# via pandas-profiling
|
||||||
|
idna==2.10
|
||||||
|
# via requests
|
||||||
|
imagehash==4.2.0
|
||||||
|
# via visions
|
||||||
jdcal==1.4.1
|
jdcal==1.4.1
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
jinja2==2.11.3
|
jinja2==2.11.3
|
||||||
# via
|
# via
|
||||||
# -r requirements.in
|
# -r requirements.in
|
||||||
# bokeh
|
# bokeh
|
||||||
|
# pandas-profiling
|
||||||
|
joblib==1.0.1
|
||||||
|
# via pandas-profiling
|
||||||
|
kiwisolver==1.3.1
|
||||||
|
# via matplotlib
|
||||||
markupsafe==1.1.1
|
markupsafe==1.1.1
|
||||||
# via jinja2
|
# via jinja2
|
||||||
|
matplotlib==3.3.4
|
||||||
|
# via
|
||||||
|
# missingno
|
||||||
|
# pandas-profiling
|
||||||
|
# seaborn
|
||||||
|
missingno==0.4.2
|
||||||
|
# via pandas-profiling
|
||||||
|
networkx==2.5
|
||||||
|
# via visions
|
||||||
numpy==1.20.1
|
numpy==1.20.1
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
|
# imagehash
|
||||||
|
# matplotlib
|
||||||
|
# missingno
|
||||||
# pandas
|
# pandas
|
||||||
|
# pandas-profiling
|
||||||
|
# pywavelets
|
||||||
|
# scipy
|
||||||
|
# seaborn
|
||||||
|
# visions
|
||||||
openpyxl==3.0.6
|
openpyxl==3.0.6
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
packaging==20.9
|
packaging==20.9
|
||||||
# via bokeh
|
# via bokeh
|
||||||
pandas==1.2.2
|
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling
|
||||||
# via -r requirements.in
|
# via -r requirements.in
|
||||||
|
pandas==1.2.2
|
||||||
|
# via
|
||||||
|
# -r requirements.in
|
||||||
|
# pandas-profiling
|
||||||
|
# seaborn
|
||||||
|
# visions
|
||||||
pillow==8.1.0
|
pillow==8.1.0
|
||||||
# via bokeh
|
# via
|
||||||
|
# bokeh
|
||||||
|
# imagehash
|
||||||
|
# matplotlib
|
||||||
|
# visions
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
# via packaging
|
# via
|
||||||
|
# matplotlib
|
||||||
|
# packaging
|
||||||
python-dateutil==2.8.1
|
python-dateutil==2.8.1
|
||||||
# via
|
# via
|
||||||
# bokeh
|
# bokeh
|
||||||
|
# matplotlib
|
||||||
# pandas
|
# pandas
|
||||||
pytz==2021.1
|
pytz==2021.1
|
||||||
# via pandas
|
# via pandas
|
||||||
|
pywavelets==1.1.1
|
||||||
|
# via imagehash
|
||||||
pyyaml==5.4.1
|
pyyaml==5.4.1
|
||||||
# via bokeh
|
# via
|
||||||
|
# bokeh
|
||||||
|
# confuse
|
||||||
|
requests==2.25.1
|
||||||
|
# via pandas-profiling
|
||||||
|
scipy==1.6.1
|
||||||
|
# via
|
||||||
|
# imagehash
|
||||||
|
# missingno
|
||||||
|
# pandas-profiling
|
||||||
|
# seaborn
|
||||||
|
seaborn==0.11.1
|
||||||
|
# via
|
||||||
|
# missingno
|
||||||
|
# pandas-profiling
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
# via python-dateutil
|
# via
|
||||||
|
# cycler
|
||||||
|
# imagehash
|
||||||
|
# python-dateutil
|
||||||
|
tangled-up-in-unicode==0.0.6
|
||||||
|
# via
|
||||||
|
# pandas-profiling
|
||||||
|
# visions
|
||||||
tornado==6.1
|
tornado==6.1
|
||||||
# via bokeh
|
# via bokeh
|
||||||
|
tqdm==4.57.0
|
||||||
|
# via pandas-profiling
|
||||||
typing-extensions==3.7.4.3
|
typing-extensions==3.7.4.3
|
||||||
# via bokeh
|
# via bokeh
|
||||||
|
urllib3==1.26.3
|
||||||
|
# via requests
|
||||||
|
visions[type_image_path]==0.6.0
|
||||||
|
# via pandas-profiling
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
<li><a href="data.html">Lähdedata (HTML)</a></li>
|
<li><a href="data.html">Lähdedata (HTML)</a></li>
|
||||||
<li><a href="data.json">Lähdedata (JSON)</a></li>
|
<li><a href="data.json">Lähdedata (JSON)</a></li>
|
||||||
<li><a href="data.xlsx">Lähdedata (XLSX)</a></li>
|
<li><a href="data.xlsx">Lähdedata (XLSX)</a></li>
|
||||||
|
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
|
||||||
<li><a href="raw.tsv">Raakadata (TSV)</a></li>
|
<li><a href="raw.tsv">Raakadata (TSV)</a></li>
|
||||||
<li><a href="raw.xlsx">Raakadata (XLSX)</a></li>
|
<li><a href="raw.xlsx">Raakadata (XLSX)</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
Reference in New Issue
Block a user