Access S3 file in pyspark using aws_creds saved in linux env

PHOTO EMBED

Wed Apr 01 2026 06:16:23 GMT+0000 (Coordinated Universal Time)

Saved by @Saravana_Kumar #python

boto_session = boto3.Session(profile_name="default")
aws_creds = boto_session.get_credentials().get_frozen_credentials()
access_key = aws_creds.access_key
secret_key = aws_creds.secret_key

sc = SparkContext()
spark = (
    SparkSession.builder
    .config("spark.sql.files.ignoreMissingFiles", "true")
    .config("spark.sql.files.ignoreCorruptFiles", "true")
    .config("spark.hadoop.fs.s3a.access.key", access_key)
    .config("spark.hadoop.fs.s3a.secret.key", secret_key)
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    .appName("Daily Uniques Reports")
    .getOrCreate()
    )
content_copyCOPY