Add pandas-profiling report

This commit is contained in:
Aarni Koskela
2021-02-19 16:11:08 +02:00
parent ef70a3ae1b
commit e58804d4b5
5 changed files with 104 additions and 5 deletions

View File

@@ -4,6 +4,7 @@ out: all-data
python massage_templates.py
python copy_massaged_data.py
python generate_charts.py
python generate_profiling.py
cp data/results.xlsx out/raw.xlsx
cp data/results.tsv out/raw.tsv

12
generate_profiling.py Normal file
View File

@@ -0,0 +1,12 @@
from data_ingest import read_data
from pandas_profiling import ProfileReport
def main():
df = read_data()
profile = ProfileReport(df)
profile.to_file("out/profiling_report.html")
if __name__ == "__main__":
main()

View File

@@ -2,3 +2,4 @@ bokeh
jinja2
openpyxl
pandas
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling

View File

@@ -4,43 +4,127 @@
#
# pip-compile requirements.in
#
attrs==20.3.0
# via
# pandas-profiling
# visions
bokeh==2.2.3
# via -r requirements.in
certifi==2020.12.5
# via requests
chardet==4.0.0
# via requests
confuse==1.4.0
# via pandas-profiling
cycler==0.10.0
# via matplotlib
decorator==4.4.2
# via networkx
et-xmlfile==1.0.1
# via openpyxl
htmlmin==0.1.12
# via pandas-profiling
idna==2.10
# via requests
imagehash==4.2.0
# via visions
jdcal==1.4.1
# via openpyxl
jinja2==2.11.3
# via
# -r requirements.in
# bokeh
# pandas-profiling
joblib==1.0.1
# via pandas-profiling
kiwisolver==1.3.1
# via matplotlib
markupsafe==1.1.1
# via jinja2
matplotlib==3.3.4
# via
# missingno
# pandas-profiling
# seaborn
missingno==0.4.2
# via pandas-profiling
networkx==2.5
# via visions
numpy==1.20.1
# via
# bokeh
# imagehash
# matplotlib
# missingno
# pandas
# pandas-profiling
# pywavelets
# scipy
# seaborn
# visions
openpyxl==3.0.6
# via -r requirements.in
packaging==20.9
# via bokeh
pandas==1.2.2
https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling
# via -r requirements.in
pandas==1.2.2
# via
# -r requirements.in
# pandas-profiling
# seaborn
# visions
pillow==8.1.0
# via bokeh
# via
# bokeh
# imagehash
# matplotlib
# visions
pyparsing==2.4.7
# via packaging
# via
# matplotlib
# packaging
python-dateutil==2.8.1
# via
# bokeh
# matplotlib
# pandas
pytz==2021.1
# via pandas
pywavelets==1.1.1
# via imagehash
pyyaml==5.4.1
# via bokeh
# via
# bokeh
# confuse
requests==2.25.1
# via pandas-profiling
scipy==1.6.1
# via
# imagehash
# missingno
# pandas-profiling
# seaborn
seaborn==0.11.1
# via
# missingno
# pandas-profiling
six==1.15.0
# via python-dateutil
# via
# cycler
# imagehash
# python-dateutil
tangled-up-in-unicode==0.0.6
# via
# pandas-profiling
# visions
tornado==6.1
# via bokeh
tqdm==4.57.0
# via pandas-profiling
typing-extensions==3.7.4.3
# via bokeh
urllib3==1.26.3
# via requests
visions[type_image_path]==0.6.0
# via pandas-profiling

View File

@@ -29,6 +29,7 @@
<li><a href="data.html">Lähdedata (HTML)</a></li>
<li><a href="data.json">Lähdedata (JSON)</a></li>
<li><a href="data.xlsx">Lähdedata (XLSX)</a></li>
<li><a href="profiling_report.html">Lähdedatan analyysi</a></li>
<li><a href="raw.tsv">Raakadata (TSV)</a></li>
<li><a href="raw.xlsx">Raakadata (XLSX)</a></li>
</ul>