<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Singleton Design Principle for pyspark database connector A singleton is a design pattern that ensures that a class has only one instance, and provide... in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/singleton-design-principle-for-pyspark-database-connector-a/m-p/13877#M8461</link>
    <description>&lt;P&gt;Singleton Design Principle for pyspark database connector&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;A singleton is a design pattern that ensures that a class has only one instance, and provides a global access point to that instance. Here is an example of how you could implement a singleton design for a PySpark database connector in Python:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;# Define the singleton decorator
def singleton(cls):
    instances = {}
    def get_instance(*args, **kwargs):
        if cls not in instances:
            instances[cls] = cls(*args, **kwargs)
        return instances[cls]
    return get_instance
&amp;nbsp;
# Define the MongoDBConnector class
@singleton
class MongoDBConnector:    
    def __init__(self, connection_string):
        self.spark = SparkSession.builder.getOrCreate()
        self.uri = connection_string
        self.dataframes = {}
    
    def connect(self, database_name, collection_name):
        key = (database_name, collection_name)
        if key not in self.dataframes:
            dataframe = self.spark.read.format("com.mongodb.spark.sql.DefaultSource").option("uri", self.uri).option("database", database_name).option("collection", collection_name).load()
            self.dataframes[key] = dataframe
        return self.dataframes[key]
    
    def get_dataframe(self, database_name, collection_name):
        key = (database_name, collection_name)
        if key in self.dataframes:
            return self.dataframes[key]
        else:
            return None
&amp;nbsp;
# Create an instance of the MongoDBConnector class
mongo_connector = MongoDBConnector(connectionString)
&amp;nbsp;
# Connect to the "sample_supplies" database and "sales" collection
df1 = mongo_connector.connect("sample_supplies", "sales")
&amp;nbsp;
# Connect to the "sample_airbnb" database and "listingsAndReviews" collection
df2 = mongo_connector.connect("sample_airbnb", "listingsAndReviews")
&amp;nbsp;
# Print the id values of the DataFrame objects
print(id(df1))
print(id(df2))
&amp;nbsp;
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df3 = mongo_connector.get_dataframe("sample_supplies", "sales")
&amp;nbsp;
# Print the id value of the DataFrame object
print(id(df3))
&amp;nbsp;
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df4 = mongo_connector.get_dataframe("sample_airbnb", "listingsAndReviews")
&amp;nbsp;
# Print the id value of the DataFrame object
print(id(df4))&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;source:-chatgpt&lt;/P&gt;</description>
    <pubDate>Tue, 03 Jan 2023 12:06:06 GMT</pubDate>
    <dc:creator>Prototype998</dc:creator>
    <dc:date>2023-01-03T12:06:06Z</dc:date>
    <item>
      <title>Singleton Design Principle for pyspark database connector A singleton is a design pattern that ensures that a class has only one instance, and provide...</title>
      <link>https://community.databricks.com/t5/data-engineering/singleton-design-principle-for-pyspark-database-connector-a/m-p/13877#M8461</link>
      <description>&lt;P&gt;Singleton Design Principle for pyspark database connector&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;A singleton is a design pattern that ensures that a class has only one instance, and provides a global access point to that instance. Here is an example of how you could implement a singleton design for a PySpark database connector in Python:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;# Define the singleton decorator
def singleton(cls):
    instances = {}
    def get_instance(*args, **kwargs):
        if cls not in instances:
            instances[cls] = cls(*args, **kwargs)
        return instances[cls]
    return get_instance
&amp;nbsp;
# Define the MongoDBConnector class
@singleton
class MongoDBConnector:    
    def __init__(self, connection_string):
        self.spark = SparkSession.builder.getOrCreate()
        self.uri = connection_string
        self.dataframes = {}
    
    def connect(self, database_name, collection_name):
        key = (database_name, collection_name)
        if key not in self.dataframes:
            dataframe = self.spark.read.format("com.mongodb.spark.sql.DefaultSource").option("uri", self.uri).option("database", database_name).option("collection", collection_name).load()
            self.dataframes[key] = dataframe
        return self.dataframes[key]
    
    def get_dataframe(self, database_name, collection_name):
        key = (database_name, collection_name)
        if key in self.dataframes:
            return self.dataframes[key]
        else:
            return None
&amp;nbsp;
# Create an instance of the MongoDBConnector class
mongo_connector = MongoDBConnector(connectionString)
&amp;nbsp;
# Connect to the "sample_supplies" database and "sales" collection
df1 = mongo_connector.connect("sample_supplies", "sales")
&amp;nbsp;
# Connect to the "sample_airbnb" database and "listingsAndReviews" collection
df2 = mongo_connector.connect("sample_airbnb", "listingsAndReviews")
&amp;nbsp;
# Print the id values of the DataFrame objects
print(id(df1))
print(id(df2))
&amp;nbsp;
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df3 = mongo_connector.get_dataframe("sample_supplies", "sales")
&amp;nbsp;
# Print the id value of the DataFrame object
print(id(df3))
&amp;nbsp;
# Try to get the DataFrame object for the "sample_supplies" database and "sales" collection
df4 = mongo_connector.get_dataframe("sample_airbnb", "listingsAndReviews")
&amp;nbsp;
# Print the id value of the DataFrame object
print(id(df4))&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;source:-chatgpt&lt;/P&gt;</description>
      <pubDate>Tue, 03 Jan 2023 12:06:06 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/singleton-design-principle-for-pyspark-database-connector-a/m-p/13877#M8461</guid>
      <dc:creator>Prototype998</dc:creator>
      <dc:date>2023-01-03T12:06:06Z</dc:date>
    </item>
  </channel>
</rss>

