<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Unable to load data from Redshift in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/unable-to-load-data-from-redshift/m-p/3871#M769</link>
    <description>&lt;P&gt;I've been trying to connect to RedShift following &lt;A href="https://docs.databricks.com/external-data/amazon-redshift.html" alt="https://docs.databricks.com/external-data/amazon-redshift.html" target="_blank"&gt;Databrick's documentation&lt;/A&gt; and validated that I'm using runtime version 11.3 on my cluster and that I have read/write privileges on the tempdir bucket. But, I'm unable to load data from RedShift to a Spark data frame in a notebook.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My code:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;rsSQL = f"select * from {schema}.{tbl_name} where {field} in ('{data_date}')"
&amp;nbsp;
rsRDD = (spark.read
  .format("redshift")
  .option("query", rsSQL)
  .option("tempdir", "s3://MYBUCKET")
  .option("url", f"jdbc:redshift://{rs['host']}")
  .option("user", rs['user'])
  .option("password", rs['pwd'])
  .option("forward_spark_s3_credentials", True)
  .load()
)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Error received:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;Py4JJavaError: An error occurred while calling o659.load.
: java.lang.NullPointerException
	at com.databricks.spark.redshift.RedshiftRelation.$anonfun$schema$1(RedshiftRelation.scala:80)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.redshift.RedshiftRelation.schema$lzycompute(RedshiftRelation.scala:73)
	at com.databricks.spark.redshift.RedshiftRelation.schema(RedshiftRelation.scala:72)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:498)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:375)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:331)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:331)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:223)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;According to the documentation, for runtime 11.2 and above, Databricks Runtime includes the Redshift JDBC driver, accessible using the&amp;nbsp;Redshift&amp;nbsp;keyword for the format option, but this doesn't seems to be working for me, is there something I'm missing? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 30 May 2023 17:57:11 GMT</pubDate>
    <dc:creator>Edwin</dc:creator>
    <dc:date>2023-05-30T17:57:11Z</dc:date>
    <item>
      <title>Unable to load data from Redshift</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-data-from-redshift/m-p/3871#M769</link>
      <description>&lt;P&gt;I've been trying to connect to RedShift following &lt;A href="https://docs.databricks.com/external-data/amazon-redshift.html" alt="https://docs.databricks.com/external-data/amazon-redshift.html" target="_blank"&gt;Databrick's documentation&lt;/A&gt; and validated that I'm using runtime version 11.3 on my cluster and that I have read/write privileges on the tempdir bucket. But, I'm unable to load data from RedShift to a Spark data frame in a notebook.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My code:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;rsSQL = f"select * from {schema}.{tbl_name} where {field} in ('{data_date}')"
&amp;nbsp;
rsRDD = (spark.read
  .format("redshift")
  .option("query", rsSQL)
  .option("tempdir", "s3://MYBUCKET")
  .option("url", f"jdbc:redshift://{rs['host']}")
  .option("user", rs['user'])
  .option("password", rs['pwd'])
  .option("forward_spark_s3_credentials", True)
  .load()
)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Error received:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;Py4JJavaError: An error occurred while calling o659.load.
: java.lang.NullPointerException
	at com.databricks.spark.redshift.RedshiftRelation.$anonfun$schema$1(RedshiftRelation.scala:80)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.redshift.RedshiftRelation.schema$lzycompute(RedshiftRelation.scala:73)
	at com.databricks.spark.redshift.RedshiftRelation.schema(RedshiftRelation.scala:72)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:498)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:375)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:331)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:331)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:223)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;According to the documentation, for runtime 11.2 and above, Databricks Runtime includes the Redshift JDBC driver, accessible using the&amp;nbsp;Redshift&amp;nbsp;keyword for the format option, but this doesn't seems to be working for me, is there something I'm missing? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 30 May 2023 17:57:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-data-from-redshift/m-p/3871#M769</guid>
      <dc:creator>Edwin</dc:creator>
      <dc:date>2023-05-30T17:57:11Z</dc:date>
    </item>
  </channel>
</rss>

