Singleton Design Principle for pyspark database connector
A singleton is a design pattern that ensures that a class has only one instance, and provides a global access point to that instance. Here is an example of how you could implement a singleton design for a PySpark database connector in Python:
# Define the singleton decorator
def singleton(cls):
instances = {}
def get_instance(*args, **kwargs):
if cls not in instances:
instances[cls] = cls(*args, **kwargs)
return instances[cls]
return get_instance
# Define the MongoDBConnector class
@singleton
class MongoDBConnector:
def __init__(self, connection_string):
self.spark = SparkSession.builder.getOrCreate()
self.uri = connection_string
self.dataframes = {}
def connect(self, database_name, collection_name):
key = (database_name, collection_name)
if key not in self.dataframes:
dataframe = self.spark.read.format("com.mongodb.spark.sql.DefaultSource").option("uri", self.uri).option("database", database_name).option("collection", collection_name).load()
self.dataframes[key] = dataframe
return self.dataframes[key]
def get_dataframe(self, database_name, collection_name):
key = (database_name, collection_name)
if key in self.dataframes:
return self.dataframes[key]
else:
return None
# Create an instance of the MongoDBConnector class
mongo_connector = MongoDBConnector(connectionString)
# Connect to the "sample_supplies" database and "sales" collection
df1 = mongo_connector.connect("sample_supplies", "sales")
# Connect to the "sample_airbnb" database and "listingsAndReviews" collection
df2 = mongo_connector.connect("sample_airbnb", "listingsAndReviews")
# Print the id values of the DataFrame objects
print(id(df1))
print(id(df2))
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df3 = mongo_connector.get_dataframe("sample_supplies", "sales")
# Print the id value of the DataFrame object
print(id(df3))
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df4 = mongo_connector.get_dataframe("sample_airbnb", "listingsAndReviews")
# Print the id value of the DataFrame object
print(id(df4))
source:-chatgpt