thiscodeWorks | thiscodeWorks

import pandas as pd
import pyarrow as pa
import pyarrow.orc as orc
from glob import glob

dataset = pd.read_csv()

# Read the Pandas dataset as a PyArrow Table
pa_table = pa.Table.from_pandas(dataset)

# Write the PyArrow Table to an ORC file
with pa.OSFile("/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc", "wb") as sink:
    with orc.ORCWriter(sink) as writer:
        writer.write(pa_table)

# Read the ORC file back into a Pandas DataFrame
orc_file_path = "/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc"
df = orc.read_table(orc_file_path).to_pandas()
# Display the DataFrame
print(df.head())