77796
New Contributor II

We can reproduce the above error for runtime 10.x and 11.x using the below code in a notebook.

import org.apache.hadoop.io.IntWritable

import org.apache.hadoop.io.Text

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat

import org.apache.spark.rdd.PairRDDFunctions

val l = List((10,"a"),(20,"b"),(30,"c"),(40,"d"))

val rdd = sc.parallelize(l)

val rddWritable = rdd.map(x=> (new IntWritable(x._1), new Text(x._2)))

val pairRDD = new PairRDDFunctions(rddWritable)

pairRDD.saveAsNewAPIHadoopFile("s3a://bucket/testout.dat",

 classOf[IntWritable],

 classOf[Text],

 classOf[TextOutputFormat[IntWritable,Text]],

 spark.sparkContext.hadoopConfiguration)