@Mahesh_rathi__ ,

Sample code which might help you:

from pyspark import SparkFiles

xml_dir = "dbfs:/FileStore/Mrathi/pattern-pool"

files = [f for f in dbutils.fs.ls(xml_dir) if f.name.endswith(".xml")]
for file in files:
    sc.addFile(file.path)

file_names = [f.name for f in files]
files_bc = sc.broadcast(file_names)

def read_local_files(_):
    # This runs on executors. SparkFiles.get resolves the executor-local path.
    from pyspark import SparkFiles
    for name in files_bc.value:
        local_path = SparkFiles.get(name) 
        print(local_path)
        with open(local_path, "r") as fh:
            for line in fh:
                yield line

rdd = sc.parallelize([0], sc.defaultParallelism).flatMap(read_local_files)
print(rdd.take(5))

 

Let me know if it works

 

Anudeep