<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34736#M25462</link>
    <description>&lt;P&gt;This looks like an error from redshift. Please check this aws document.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.aws.amazon.com/redshift/latest/dg/r_STL_LOADERROR_DETAIL.html" target="test_blank"&gt;https://docs.aws.amazon.com/redshift/latest/dg/r_STL_LOADERROR_DETAIL.html&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Sun, 04 Sep 2022 19:32:29 GMT</pubDate>
    <dc:creator>Prabakar</dc:creator>
    <dc:date>2022-09-04T19:32:29Z</dc:date>
    <item>
      <title>Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34721#M25447</link>
      <description>&lt;P&gt;I am trying to connect my Spark cluster to a Postgresql RDS instance. The Python notebook code that was used is seen below:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = ( spark.read \
  .format("jdbc") \
  .option("url", "jdbc:postgresql://&amp;lt;connection-string&amp;gt;:5432/database”)\
  .option("dbtable", “&amp;lt;schema.table&amp;gt;“)\
  .option("user", "postgres")\
  .option("password", “Pass*****”)
  .load()
)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;The following error message was received&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
&amp;lt;command-1164003892694289&amp;gt; in &amp;lt;module&amp;gt;
----&amp;gt; 1 df = ( spark.read \
      2   .format("jdbc") \
      3   .option("url", "jdbc:postgresql://&amp;lt;connection-string&amp;gt;:5432/database") \
      4   .option("dbtable", "&amp;lt;schema.table&amp;gt;") \
      5   .option("user", "postgres") \
&amp;nbsp;
/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)
    162             return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
    163         else:
--&amp;gt; 164             return self._df(self._jreader.load())
    165 
    166     def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1302 
   1303         answer = self.gateway_client.send_command(command)
-&amp;gt; 1304         return_value = get_return_value(
   1305             answer, self.gateway_client, self.target_id, self.name)
   1306 
&amp;nbsp;
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
    115     def deco(*a, **kw):
    116         try:
--&amp;gt; 117             return f(*a, **kw)
    118         except py4j.protocol.Py4JJavaError as e:
    119             converted = convert_exception(e.java_exception)
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--&amp;gt; 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)
&amp;nbsp;
Py4JJavaError: An error occurred while calling o1170.load.
: org.postgresql.util.PSQLException: The connection attempt failed.
	at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:315)
	at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:51)
	at org.postgresql.jdbc.PgConnection.&amp;lt;init&amp;gt;(PgConnection.java:223)
	at org.postgresql.Driver.makeConnection(Driver.java:465)
	at org.postgresql.Driver.connect(Driver.java:264)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider.getConnection(BasicConnectionProvider.scala:49)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProviderBase.create(ConnectionProvider.scala:102)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$createConnectionFactory$1(JdbcUtils.scala:69)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.getQueryOutputSchema(JDBCRDD.scala:63)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:58)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:241)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:36)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:385)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:356)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:323)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:323)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:222)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:295)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:251)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.SocketTimeoutException: connect timed out
	at java.net.PlainSocketImpl.socketConnect(Native Method)
	at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
	at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
	at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
	at java.net.Socket.connect(Socket.java:607)
	at org.postgresql.core.PGStream.createSocket(PGStream.java:231)
	at org.postgresql.core.PGStream.&amp;lt;init&amp;gt;(PGStream.java:95)
	at org.postgresql.core.v3.ConnectionFactoryImpl.tryConnect(ConnectionFactoryImpl.java:98)
	at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:213)
	... 28 more&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Any help or direction will be greatly appreciated, Thank you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please note that I changed the connection URL and other details for security reasons and that's not how they were entered.&lt;/P&gt;</description>
      <pubDate>Fri, 12 Aug 2022 16:37:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34721#M25447</guid>
      <dc:creator>Cano</dc:creator>
      <dc:date>2022-08-12T16:37:32Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34722#M25448</link>
      <description>&lt;P&gt;"Caused by: java.net.SocketTimeoutException: connect timed out" indicate the network connection between Databricks cluster and the postgress database on 5432 port was not established and eventually timed out.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;As a first step, please ensure the connection between DB cluster and postgres works fine &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;To test the connectivity, you can run below on a notebook or web terminal&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%sh nc -v &amp;lt;postgres host&amp;gt; 5432&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 12 Aug 2022 16:56:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34722#M25448</guid>
      <dc:creator>User16873043099</dc:creator>
      <dc:date>2022-08-12T16:56:15Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34723#M25449</link>
      <description>&lt;P&gt;Thank you Rohit for your response. I realized that %sh nc -v &amp;lt;postgres host&amp;gt; 5432 does not return anything (times out), meaning there's no connectivity to Postgres. Can you please direct me to where and how I can establish connectivity to Postgres? Where in DBricks can I allow port 5432? Will it be in the IAM profile? Any advice will help.&lt;/P&gt;</description>
      <pubDate>Mon, 15 Aug 2022 14:56:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34723#M25449</guid>
      <dc:creator>Cano</dc:creator>
      <dc:date>2022-08-15T14:56:35Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34724#M25450</link>
      <description>&lt;P&gt;Okay so I resolved this problem by creating a VPC peering between the Databricks VPC and my local VPC that hosts my Postgresql instance. &lt;A href="https://docs.databricks.com/administration-guide/cloud-configurations/aws/vpc-peering.html?_ga=2.42341152.449431182.1660574664-805235188.1647362174" target="test_blank"&gt;https://docs.databricks.com/administration-guide/cloud-configurations/aws/vpc-peering.html?_ga=2.42341152.449431182.1660574664-805235188.1647362174&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have now been able to create a dataframe linking to my Postgresql RDS with the same notebook python code as seen above.&lt;/P&gt;</description>
      <pubDate>Mon, 15 Aug 2022 19:14:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34724#M25450</guid>
      <dc:creator>Cano</dc:creator>
      <dc:date>2022-08-15T19:14:55Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34725#M25451</link>
      <description>&lt;P&gt;Glad to know the issue got resolved. &lt;/P&gt;&lt;P&gt;Yes, the connectivity needs to be established from the  AWS account VPC where DB clusters are hosted to the postgres. &lt;/P&gt;</description>
      <pubDate>Mon, 15 Aug 2022 19:22:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34725#M25451</guid>
      <dc:creator>User16873043099</dc:creator>
      <dc:date>2022-08-15T19:22:21Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34726#M25452</link>
      <description>&lt;P&gt;Hi @Rohit Rajendran​&amp;nbsp;and @charles okoh​&amp;nbsp;I was able to set up VPC peering for my customer's Databricks and Redshift account.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt; I tested the connection via %sh host -t a &amp;lt;redshift hostname&amp;gt; and confirmed the IP address. &lt;/LI&gt;&lt;LI&gt;I also ran a display(spark.sql("SELECT * FROM csv.`s3://&amp;lt;customer/s3bucketpath&amp;gt;`")) to confirm I can access the S3 bucket. &lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;However when I attempt to run:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;df = ( spark.read \&lt;/LI&gt;&lt;LI&gt; .format("jdbc") \&lt;/LI&gt;&lt;LI&gt; .option("url", "jdbc:redshift://&amp;lt;connection-string&amp;gt;:5439/database”)\&lt;/LI&gt;&lt;LI&gt; .option("dbtable", “&amp;lt;schema.table&amp;gt;“)\&lt;/LI&gt;&lt;LI&gt; .option("user", "customeruser")\&lt;/LI&gt;&lt;LI&gt; .option("password", “Pass*****”)&lt;/LI&gt;&lt;LI&gt; .load()&lt;/LI&gt;&lt;LI&gt;)&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;I get the error below:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Py4JJavaError: An error occurred while calling o486.load.&lt;/P&gt;&lt;P&gt;: java.sql.SQLException: The connection attempt failed.&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.util.RedshiftException.getSQLException(RedshiftException.java:56)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.Driver.connect(Driver.java:339)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.connect(DriverWrapper.scala:46)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.getConnector(RedshiftJDBCWrapper.scala:355)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.getConnector(RedshiftJDBCWrapper.scala:376)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftRelation.$anonfun$schema$1(RedshiftRelation.scala:76)&lt;/P&gt;&lt;P&gt;	at scala.Option.getOrElse(Option.scala:189)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftRelation.schema$lzycompute(RedshiftRelation.scala:73)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftRelation.schema(RedshiftRelation.scala:72)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:496)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:356)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:323)&lt;/P&gt;&lt;P&gt;	at scala.Option.getOrElse(Option.scala:189)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:323)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:222)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)&lt;/P&gt;&lt;P&gt;	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;/P&gt;&lt;P&gt;	at java.lang.reflect.Method.invoke(Method.java:498)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)&lt;/P&gt;&lt;P&gt;	at py4j.Gateway.invoke(Gateway.java:295)&lt;/P&gt;&lt;P&gt;	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)&lt;/P&gt;&lt;P&gt;	at py4j.commands.CallCommand.execute(CallCommand.java:79)&lt;/P&gt;&lt;P&gt;	at py4j.GatewayConnection.run(GatewayConnection.java:251)&lt;/P&gt;&lt;P&gt;	at java.lang.Thread.run(Thread.java:748)&lt;/P&gt;&lt;P&gt;Caused by: java.net.SocketTimeoutException: connect timed out&lt;/P&gt;&lt;P&gt;	at java.net.PlainSocketImpl.socketConnect(Native Method)&lt;/P&gt;&lt;P&gt;	at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)&lt;/P&gt;&lt;P&gt;	at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)&lt;/P&gt;&lt;P&gt;	at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)&lt;/P&gt;&lt;P&gt;	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)&lt;/P&gt;&lt;P&gt;	at java.net.Socket.connect(Socket.java:607)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.RedshiftStream.&amp;lt;init&amp;gt;(RedshiftStream.java:86)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.ConnectionFactoryImpl.tryConnect(ConnectionFactoryImpl.java:111)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:224)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.ConnectionFactory.openConnection(ConnectionFactory.java:51)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.jdbc.RedshiftConnectionImpl.&amp;lt;init&amp;gt;(RedshiftConnectionImpl.java:322)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.Driver.makeConnection(Driver.java:502)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.Driver.connect(Driver.java:315)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 18:09:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34726#M25452</guid>
      <dc:creator>kennyg</dc:creator>
      <dc:date>2022-09-01T18:09:26Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34727#M25453</link>
      <description>&lt;P&gt;maybe @Prabakar Ammeappin​&amp;nbsp;or @Debayan Mukherjee​&amp;nbsp;have some insight here??&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 18:39:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34727#M25453</guid>
      <dc:creator>BradSheridan</dc:creator>
      <dc:date>2022-09-01T18:39:33Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34728#M25454</link>
      <description>&lt;P&gt;@kennyg To test connectivity to redshift, please use the nc command. From the exception it looks like the spark nodes are unable to establish connectivity to redshift on port 5439&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%sh nc -v &amp;lt;redshit host&amp;gt; 5439&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 19:05:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34728#M25454</guid>
      <dc:creator>User16873043099</dc:creator>
      <dc:date>2022-09-01T19:05:49Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34729#M25455</link>
      <description>&lt;P&gt;@Rohit Rajendran​&amp;nbsp;I received this message: nc: connect to &amp;lt;redshift hostname&amp;gt; port 5439 (tcp) failed: Connection timed out. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please advise&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 19:12:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34729#M25455</guid>
      <dc:creator>kennyg</dc:creator>
      <dc:date>2022-09-01T19:12:04Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34730#M25456</link>
      <description>&lt;P&gt;Please make sure the connection from Databricks VPC to the redshift works. You can start by looking at the security groups &amp;gt; inbound/outbound rules for DB default SG and the redshift SG.&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 19:16:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34730#M25456</guid>
      <dc:creator>User16873043099</dc:creator>
      <dc:date>2022-09-01T19:16:14Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34731#M25457</link>
      <description>&lt;P&gt;@Rohit Rajendran​&amp;nbsp;I have confirmed that the Redshift SG has the inbound rule with the source as the security group ID of the&amp;nbsp;Unmanaged&amp;nbsp;Databricks&amp;nbsp;(step 8 here: &lt;A href="https://docs.databricks.com/administration-guide/cloud-configurations/aws/vpc-peering.html?_ga=2.108892290.1076300490.1661891181-1346035256.1661891180" alt="https://docs.databricks.com/administration-guide/cloud-configurations/aws/vpc-peering.html?_ga=2.108892290.1076300490.1661891181-1346035256.1661891180" target="_blank"&gt;https://docs.databricks.com/administration-guide/cloud-configurations/aws/vpc-peering.html?_ga=2.108892290.1076300490.1661891181-1346035256.1661891180&lt;/A&gt;) &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please advise&lt;/P&gt;</description>
      <pubDate>Thu, 01 Sep 2022 19:32:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34731#M25457</guid>
      <dc:creator>kennyg</dc:creator>
      <dc:date>2022-09-01T19:32:11Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34732#M25458</link>
      <description>&lt;P&gt;Hi @K G​&amp;nbsp;to eliminate Databricks from the scope, please try creating a VM in the same subnet where Databricks is deployed and check the connectivity. If it's working fine, then we can see what is wrong on Databricks' side. If the connectivity test fails from a VM on AWS, then it's out of Databricks scope, and it is advised you get the help of your networking team to sort this connection issue.&lt;/P&gt;</description>
      <pubDate>Fri, 02 Sep 2022 07:32:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34732#M25458</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2022-09-02T07:32:00Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34733#M25459</link>
      <description>&lt;P&gt;@Prabakar Ammeappin​&amp;nbsp;Is there VM type you recommend I spun up an Amazon Linux VM and was able to retrieve the private IP with %sh host -t a &amp;lt;VM hostname&amp;gt;&amp;nbsp; command. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I cannot use the %sh nc -v &amp;lt;hostname&amp;gt; &amp;lt;port number&amp;gt; command because it needs a port number.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please advise&lt;/P&gt;</description>
      <pubDate>Fri, 02 Sep 2022 11:06:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34733#M25459</guid>
      <dc:creator>kennyg</dc:creator>
      <dc:date>2022-09-02T11:06:25Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34734#M25460</link>
      <description>&lt;P&gt;Hi @K G​&amp;nbsp;if you are testing the connection for redshift, then the port number is already provided by @Rohit Rajendran​&amp;nbsp;.&lt;/P&gt;&lt;P&gt;nc -v &amp;lt;redshit host&amp;gt; 5439&lt;/P&gt;&lt;P&gt;Spin up a Ubuntu VM in the same subnet where Databricks is deployed and run this command in the VM.&lt;/P&gt;</description>
      <pubDate>Fri, 02 Sep 2022 11:13:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34734#M25460</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2022-09-02T11:13:49Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34735#M25461</link>
      <description>&lt;P&gt;@Prabakar Ammeappin​&amp;nbsp; Worked with the networking team and was able to use nc -v &amp;lt;redshit host&amp;gt; 5439 and connect thank you! I am able to read data from a table, read data from a query. However when I run:&lt;/P&gt;&lt;P&gt;df.write \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.format("com.databricks.spark.redshift") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("url", &amp;lt;jdbc redshift hostname&amp;gt;") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("user", "&amp;lt;redshift username&amp;gt;") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("password", "&amp;lt;redshift password&amp;gt;") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("dbtable", "&amp;lt;dbschema.dbtablename&amp;gt;") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("tempdir", "s3a://&amp;lt;name of&amp;gt;/&amp;lt;bucketpath&amp;gt;") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("forward_spark_s3_credentials", "true") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.mode("overwrite") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.save()&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I get this error:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Py4JJavaError: An error occurred while calling o1357.save.&lt;/P&gt;&lt;P&gt;: java.sql.SQLException: Exception thrown in awaitResult: &lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.executeInterruptibly(RedshiftJDBCWrapper.scala:223)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.executeInterruptibly(RedshiftJDBCWrapper.scala:197)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftWriter.$anonfun$doRedshiftLoad$4(RedshiftWriter.scala:169)&lt;/P&gt;&lt;P&gt;	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)&lt;/P&gt;&lt;P&gt;	at com.databricks.backend.daemon.driver.ProgressReporter$.withStatusCode(ProgressReporter.scala:377)&lt;/P&gt;&lt;P&gt;	at com.databricks.backend.daemon.driver.ProgressReporter$.withStatusCode(ProgressReporter.scala:363)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.SparkDatabricksProgressReporter$.withStatusCode(ProgressReporter.scala:34)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftWriter.$anonfun$doRedshiftLoad$3(RedshiftWriter.scala:169)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftWriter.$anonfun$doRedshiftLoad$3$adapted(RedshiftWriter.scala:155)&lt;/P&gt;&lt;P&gt;	at scala.Option.foreach(Option.scala:407)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftWriter.doRedshiftLoad(RedshiftWriter.scala:155)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.RedshiftWriter.saveToRedshift(RedshiftWriter.scala:448)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.DefaultSource.createRelation(DefaultSource.scala:115)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:80)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:78)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:89)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$1(QueryExecution.scala:160)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$8(SQLExecution.scala:239)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:386)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:186)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:968)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:141)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:336)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:160)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:156)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:575)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:167)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:575)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:268)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:264)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:551)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:156)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:324)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:156)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:141)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:132)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:186)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:959)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:427)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:396)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:258)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)&lt;/P&gt;&lt;P&gt;	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;/P&gt;&lt;P&gt;	at java.lang.reflect.Method.invoke(Method.java:498)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)&lt;/P&gt;&lt;P&gt;	at py4j.Gateway.invoke(Gateway.java:295)&lt;/P&gt;&lt;P&gt;	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)&lt;/P&gt;&lt;P&gt;	at py4j.commands.CallCommand.execute(CallCommand.java:79)&lt;/P&gt;&lt;P&gt;	at py4j.GatewayConnection.run(GatewayConnection.java:251)&lt;/P&gt;&lt;P&gt;	at java.lang.Thread.run(Thread.java:748)&lt;/P&gt;&lt;P&gt;Caused by: com.amazon.redshift.util.RedshiftException: ERROR: Load into table 'ref_cip_detailed_series_testkg' failed.  Check 'stl_load_errors' system table for details.&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:2607)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.QueryExecutorImpl.processResultsOnThread(QueryExecutorImpl.java:2275)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:1880)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:1872)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:368)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.jdbc.RedshiftStatementImpl.executeInternal(RedshiftStatementImpl.java:514)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.jdbc.RedshiftStatementImpl.execute(RedshiftStatementImpl.java:435)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.jdbc.RedshiftPreparedStatement.executeWithFlags(RedshiftPreparedStatement.java:200)&lt;/P&gt;&lt;P&gt;	at com.amazon.redshift.jdbc.RedshiftPreparedStatement.execute(RedshiftPreparedStatement.java:184)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.$anonfun$executeInterruptibly$1(RedshiftJDBCWrapper.scala:197)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.$anonfun$executeInterruptibly$1$adapted(RedshiftJDBCWrapper.scala:197)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.redshift.JDBCWrapper.$anonfun$executeInterruptibly$2(RedshiftJDBCWrapper.scala:215)&lt;/P&gt;&lt;P&gt;	at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)&lt;/P&gt;&lt;P&gt;	at scala.util.Success.$anonfun$map$1(Try.scala:255)&lt;/P&gt;&lt;P&gt;	at scala.util.Success.map(Try.scala:213)&lt;/P&gt;&lt;P&gt;	at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)&lt;/P&gt;&lt;P&gt;	at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)&lt;/P&gt;&lt;P&gt;	at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)&lt;/P&gt;&lt;P&gt;	at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/P&gt;&lt;P&gt;	... 1 more&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please advise. Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 02 Sep 2022 14:28:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34735#M25461</guid>
      <dc:creator>kennyg</dc:creator>
      <dc:date>2022-09-02T14:28:50Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting Databricks Spark Cluster to Postgresql RDS Instance</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34736#M25462</link>
      <description>&lt;P&gt;This looks like an error from redshift. Please check this aws document.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.aws.amazon.com/redshift/latest/dg/r_STL_LOADERROR_DETAIL.html" target="test_blank"&gt;https://docs.aws.amazon.com/redshift/latest/dg/r_STL_LOADERROR_DETAIL.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 04 Sep 2022 19:32:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-databricks-spark-cluster-to-postgresql-rds-instance/m-p/34736#M25462</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2022-09-04T19:32:29Z</dc:date>
    </item>
  </channel>
</rss>

