@Govardhana Reddyโ :
Method 1:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("MyApp").getOrCreate()
# Create an empty DataFrame with a specified schema
empty_df = spark.createDataFrame([], schema=["column1", "column2", "column3"])
empty_df.show()
Method 2: From dictionary
data = [
{"name": "Alice", "age": 25},
{"name": "Bob", "age": 30},
{"name": "Charlie", "age": 35}
]
df = spark.createDataFrame(data)
df.show()
Method 3: From list of tuples
data = [("Alice", 25), ("Bob", 30), ("Charlie", 35)]
df = spark.createDataFrame(data, schema=["name", "age"])
df.show()
Method 4: From Pandas dataframe
import pandas as pd
pdf = pd.DataFrame({
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35]
})
df = spark.createDataFrame(pdf)
df.show()
Method 5: from cvs file
df = spark.read.csv("path/to/file.csv", header=True, inferSchema=True)
df.show()
Method 6: From parquet file
df = spark.read.parquet("path/to/file.parquet")
df.show()