import pyarrow as pya
from pyarrow import orc
from glob import glob
import duckdb
conn = duckdb.connect(database='python_db.duckdb')
# Read Multiple orc file using pyarrow
orc_files = glob("orc_file_path/*.orc")
data_list = []
for orc_file in orc_files:
with open(orc_file,"rb") as orcfile:
data = orc.ORCFile(orcfile).read()
data_list.append(data)
# Combaine all orc table into single arrow table
final_table = pya.concat_tables(data_list)
# Register the Pyarrow Table in DuckDB As View
conn.register('orc_table',final_table)
# Query the view
conn.execute("SELECT * FROM orc_table;").df()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter