import pandas as pd import pyarrow as pa import pyarrow.orc as orc from glob import glob dataset = pd.read_csv() # Read the Pandas dataset as a PyArrow Table pa_table = pa.Table.from_pandas(dataset) # Write the PyArrow Table to an ORC file with pa.OSFile("/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc", "wb") as sink: with orc.ORCWriter(sink) as writer: writer.write(pa_table) # Read the ORC file back into a Pandas DataFrame orc_file_path = "/home/saravana/Saravana/s3_maintain/s3-maintenance/Download_S3_Files/Datavisiooh/month_03.orc" df = orc.read_table(orc_file_path).to_pandas() # Display the DataFrame print(df.head())
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter