- [ ] president_speeches.db
- [ ] parquet
- [ ] Dockerfile
bash
$ du -h data
1.5M data/datagokr
3.1M data/db
1.1M data/parquet
bash
$ tree
.
├── Dockerfile
├── README.md
├── __pycache__
├── data
│ ├── datagokr
│ │ ├── president_archive_ministry_of_public_safety_president_speech_record_speech_20220817.csv
│ │ ├── president_archive_ministry_of_public_safety_president_speech_record_speech_roh_20220817.csv
│ │ └── president_speeches_sample100.csv
│ ├── db
│ │ └── president_speeches.db
│ └── parquet
│ ├── president_speeches_batch_data_1.parquet
│ ├── president_speeches_batch_data_2.parquet
│ ├── president_speeches_batch_data_3.parquet
│ ├── president_speeches_batch_data_4.parquet
│ ├── president_speeches_batch_data_5.parquet
│ ├── president_speeches_batch_data_6.parquet
│ ├── president_speeches_batch_data_7.parquet
│ └── president_speeches_batch_data_8.parquet
├── dist
│ ├── roh_moo_hyun-0.2.1-py3-none-any.whl
│ └── roh_moo_hyun-0.2.1.tar.gz
├── note
│ ├── datagokr.http
│ └── datagokr.ipynb
├── pdm.lock
├── pyproject.toml
├── requirements.txt
├── src
│ └── roh-moo-hyun
│ ├── __init__.py
│ ├── __pycache__
│ └── extract_speech_from_pdf.py
└── tests
├── __init__.py
└── __pycache__