Read and Write the Pandas dataframe as ORC
Wed May 28 2025 06:48:09 GMT+0000 (Coordinated Universal Time)
Saved by
@Saravana_Kumar
#python
import pandas as pd
import pyarrow as pa
import pyarrow.orc as orc
from glob import glob
dataset = pd.read_csv()
# Read the Pandas dataset as a PyArrow Table
pa_table = pa.Table.from_pandas(dataset)
# Write the PyArrow Table to an ORC file
with pa.OSFile("/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc", "wb") as sink:
with orc.ORCWriter(sink) as writer:
writer.write(pa_table)
# Read the ORC file back into a Pandas DataFrame
orc_file_path = "/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc"
df = orc.read_table(orc_file_path).to_pandas()
# Display the DataFrame
print(df.head())
content_copyCOPY
Comments