Extract Tables from PDFs or website

PHOTO EMBED

Sun May 08 2022 13:36:33 GMT+0000 (Coordinated Universal Time)

Saved by @QuinnFox12 #folder

# pip
pip install camelot-py
# conda
conda install -c conda-forge camelot-py
import camelot
tables = camelot.read_pdf('foo.pdf', pages='1', flavor='lattice')
print(tables)
tables.export('foo.csv', f='csv', compress=True)
tables[0].to_csv('foo.csv')  # to a csv file
print(tables[0].df)  # to a df


# from website
import pandas as pd
simpsons = pd.read_html('https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)')
# getting the first 5 rows of the table "Season 1" (second table)
simpsons[1].head()
content_copyCOPY

https://medium.com/geekculture/automate-4-boring-tasks-in-python-with-5-lines-of-code-55901b3cd5dc