diff --git a/Makefile b/Makefile index 792409d..e5dfbe2 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ out: all-data python massage_templates.py python copy_massaged_data.py python generate_charts.py + python generate_profiling.py cp data/results.xlsx out/raw.xlsx cp data/results.tsv out/raw.tsv diff --git a/generate_profiling.py b/generate_profiling.py new file mode 100644 index 0000000..406dcaa --- /dev/null +++ b/generate_profiling.py @@ -0,0 +1,12 @@ +from data_ingest import read_data +from pandas_profiling import ProfileReport + + +def main(): + df = read_data() + profile = ProfileReport(df) + profile.to_file("out/profiling_report.html") + + +if __name__ == "__main__": + main() diff --git a/requirements.in b/requirements.in index 32288aa..c423ddb 100644 --- a/requirements.in +++ b/requirements.in @@ -2,3 +2,4 @@ bokeh jinja2 openpyxl pandas +https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling diff --git a/requirements.txt b/requirements.txt index 7c25a3f..90f6615 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,43 +4,127 @@ # # pip-compile requirements.in # +attrs==20.3.0 + # via + # pandas-profiling + # visions bokeh==2.2.3 # via -r requirements.in +certifi==2020.12.5 + # via requests +chardet==4.0.0 + # via requests +confuse==1.4.0 + # via pandas-profiling +cycler==0.10.0 + # via matplotlib +decorator==4.4.2 + # via networkx et-xmlfile==1.0.1 # via openpyxl +htmlmin==0.1.12 + # via pandas-profiling +idna==2.10 + # via requests +imagehash==4.2.0 + # via visions jdcal==1.4.1 # via openpyxl jinja2==2.11.3 # via # -r requirements.in # bokeh + # pandas-profiling +joblib==1.0.1 + # via pandas-profiling +kiwisolver==1.3.1 + # via matplotlib markupsafe==1.1.1 # via jinja2 +matplotlib==3.3.4 + # via + # missingno + # pandas-profiling + # seaborn +missingno==0.4.2 + # via pandas-profiling +networkx==2.5 + # via visions numpy==1.20.1 # via # bokeh + # imagehash + # matplotlib + # missingno # pandas + # pandas-profiling + # pywavelets + # scipy + # seaborn + # visions openpyxl==3.0.6 # via -r requirements.in packaging==20.9 # via bokeh -pandas==1.2.2 +https://github.com/akx/pandas-profiling/archive/no-hard-ipywidgets.zip#egg=pandas-profiling # via -r requirements.in +pandas==1.2.2 + # via + # -r requirements.in + # pandas-profiling + # seaborn + # visions pillow==8.1.0 - # via bokeh + # via + # bokeh + # imagehash + # matplotlib + # visions pyparsing==2.4.7 - # via packaging + # via + # matplotlib + # packaging python-dateutil==2.8.1 # via # bokeh + # matplotlib # pandas pytz==2021.1 # via pandas +pywavelets==1.1.1 + # via imagehash pyyaml==5.4.1 - # via bokeh + # via + # bokeh + # confuse +requests==2.25.1 + # via pandas-profiling +scipy==1.6.1 + # via + # imagehash + # missingno + # pandas-profiling + # seaborn +seaborn==0.11.1 + # via + # missingno + # pandas-profiling six==1.15.0 - # via python-dateutil + # via + # cycler + # imagehash + # python-dateutil +tangled-up-in-unicode==0.0.6 + # via + # pandas-profiling + # visions tornado==6.1 # via bokeh +tqdm==4.57.0 + # via pandas-profiling typing-extensions==3.7.4.3 # via bokeh +urllib3==1.26.3 + # via requests +visions[type_image_path]==0.6.0 + # via pandas-profiling diff --git a/template/index.html b/template/index.html index cfed1b4..dc30cd5 100644 --- a/template/index.html +++ b/template/index.html @@ -29,6 +29,7 @@
  • Lähdedata (HTML)
  • Lähdedata (JSON)
  • Lähdedata (XLSX)
  • +
  • Lähdedatan analyysi
  • Raakadata (TSV)
  • Raakadata (XLSX)