#when we are storing the data to any directory in AVRO FILE FORMAT, then we have to do some #extra thing that is need to download the jar from the maven like,
# if our spark is 3.0.3 version then we have to search like spark avro 3.0.3 and download the jar file from browser
#spark configuration along with jar setting in our conf
my_conf = SparkConf()
my_conf.set("spark.app.name","write API")
my_conf.set("spark.master","local[*]")
my_conf.set("spark.jars","/Downloads/spark-avro_2.12-3.0.3.jar")
#standard way of creating the df & loading the csv file
order_df = spark.read.format("csv")\
            .option("header",True)\
            .option("inferSchema",True)\
            .option("path","/Downloads/orders.csv")\
            .load()
# writing the data in avro format for that we have to download the jar and configure in sparkConf and the avro file will be stored in below path which is newfolder_data 
order_write_df = order_df.write.format("avro")\
                        .mode("overwrite")\
                        .option("path","/Users/Desktop/newfolder_data")\
                        .save()
#===============================================================================
# now how to submit this jar to process above file on terminal for that,
# on terminal use 'spark-submit' and '--jars' option <path-of-jar-with-jarfile_name.jar> our python_filename.py
#example below
spark-submit --jars C:\Downloads\spark-avro_2.12-3.0.3.jar  Writer_api.py
# done!
                                 
                             
                        
Comments