import pyarrow as pya
from pyarrow import orc
from glob import glob
import duckdb

conn = duckdb.connect(database='python_db.duckdb')

# Read Multiple orc file using pyarrow
orc_files = glob("orc_file_path/*.orc")
data_list = []
for orc_file in orc_files:
    with open(orc_file,"rb") as orcfile:
        data = orc.ORCFile(orcfile).read()
        data_list.append(data)

# Combaine all orc table into single arrow table
final_table = pya.concat_tables(data_list)

# Register the Pyarrow Table in DuckDB As View
conn.register('orc_table',final_table)

# Query the view
conn.execute("SELECT * FROM orc_table;").df()