<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to query sql warehouse tables with spark? in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/how-to-query-sql-warehouse-tables-with-spark/m-p/56518#M2154</link>
    <description>&lt;P&gt;Hey there... I managed to query my data following this guide &lt;A href="https://learn.microsoft.com/en-us/azure/databricks/dev-tools/python-sql-connector" target="_blank" rel="noopener"&gt;https://learn.microsoft.com/en-us/azure/databricks/dev-tools/python-sql-connector&lt;/A&gt;&lt;BR /&gt;using databricks sql&lt;/P&gt;&lt;PRE&gt;#!/usr/bin/env python3&lt;BR /&gt;&lt;BR /&gt;from databricks import sql&lt;BR /&gt;&lt;BR /&gt;with sql.connect(server_hostname = "adb-xxx.azuredatabricks.net",&lt;BR /&gt;                 http_path = "/sql/1.0/warehouses/xxx",&lt;BR /&gt;                 access_token = "xxx") as connection:&lt;BR /&gt;   with connection.cursor() as cursor:&lt;BR /&gt;   ##https://learn.microsoft.com/en-us/azure/databricks/quer&lt;BR /&gt;   cursor.execute("SELECT * FROM democatalog.users.people LIMIT 2")&lt;BR /&gt;   result = cursor.fetchall()&lt;BR /&gt;   for row in result:&lt;BR /&gt;     print(row)&lt;/PRE&gt;&lt;P&gt;But how do I translate this into pyspark?&lt;/P&gt;&lt;P&gt;below is my first jumbled up attempt. clearly lots if stuff is still missing..&lt;/P&gt;&lt;P&gt;It took me a day to just find out how to include the missing jars. Now I don't know how connect and address&lt;/P&gt;&lt;P&gt;my &lt;EM&gt;people&lt;/EM&gt; table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;#!/usr/bin/env python3&lt;BR /&gt;&lt;BR /&gt;from pyspark.sql import SparkSession&lt;BR /&gt;from delta.tables import&lt;BR /&gt;&lt;BR /&gt;additional_libraries = [&lt;BR /&gt;"io.delta:delta-core_2.12:2.4.0",&lt;BR /&gt;"com.databricks:spark-xml_2.12:0.17.0",&lt;BR /&gt;]&lt;BR /&gt;&lt;BR /&gt;# Set up the SparkSession: https://docs.delta.io/latest/quick-start.html&lt;BR /&gt;spark = SparkSession.builder \&lt;BR /&gt;    .appName("Databricks Table Query") \&lt;BR /&gt;    .config("spark.??????", server_hostname) \&lt;BR /&gt;    .config("spark.??????", access_token) \&lt;BR /&gt;    .config("spark.??????", http_path) \&lt;BR /&gt;    .config("spark.jars.packages", ",".join(additional_libraries)) \&lt;BR /&gt;    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \&lt;BR /&gt;    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \&lt;BR /&gt;    .config("spark.sql.execution.arrow.pyspark.enabled", "true") \&lt;BR /&gt;    .getOrCreate()&lt;BR /&gt;&lt;BR /&gt;# Specify the catalog, schema, and table&lt;BR /&gt;catalog = "democatalog"&lt;BR /&gt;schema = "users"&lt;BR /&gt;table = "people"&lt;BR /&gt;volume = 'sajt'&lt;BR /&gt;# ???....&lt;BR /&gt;# delta_table_identifier = f"{catalog}.{schema}.{table}"&lt;BR /&gt;# deltaTable = DeltaTable.forName(spark, delta_table_identifier)&lt;BR /&gt;# df = deltaTable.toDF()&lt;BR /&gt;# df.show()&lt;/PRE&gt;&lt;P&gt;Thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 05 Jan 2024 15:37:42 GMT</pubDate>
    <dc:creator>mobe</dc:creator>
    <dc:date>2024-01-05T15:37:42Z</dc:date>
    <item>
      <title>How to query sql warehouse tables with spark?</title>
      <link>https://community.databricks.com/t5/get-started-discussions/how-to-query-sql-warehouse-tables-with-spark/m-p/56518#M2154</link>
      <description>&lt;P&gt;Hey there... I managed to query my data following this guide &lt;A href="https://learn.microsoft.com/en-us/azure/databricks/dev-tools/python-sql-connector" target="_blank" rel="noopener"&gt;https://learn.microsoft.com/en-us/azure/databricks/dev-tools/python-sql-connector&lt;/A&gt;&lt;BR /&gt;using databricks sql&lt;/P&gt;&lt;PRE&gt;#!/usr/bin/env python3&lt;BR /&gt;&lt;BR /&gt;from databricks import sql&lt;BR /&gt;&lt;BR /&gt;with sql.connect(server_hostname = "adb-xxx.azuredatabricks.net",&lt;BR /&gt;                 http_path = "/sql/1.0/warehouses/xxx",&lt;BR /&gt;                 access_token = "xxx") as connection:&lt;BR /&gt;   with connection.cursor() as cursor:&lt;BR /&gt;   ##https://learn.microsoft.com/en-us/azure/databricks/quer&lt;BR /&gt;   cursor.execute("SELECT * FROM democatalog.users.people LIMIT 2")&lt;BR /&gt;   result = cursor.fetchall()&lt;BR /&gt;   for row in result:&lt;BR /&gt;     print(row)&lt;/PRE&gt;&lt;P&gt;But how do I translate this into pyspark?&lt;/P&gt;&lt;P&gt;below is my first jumbled up attempt. clearly lots if stuff is still missing..&lt;/P&gt;&lt;P&gt;It took me a day to just find out how to include the missing jars. Now I don't know how connect and address&lt;/P&gt;&lt;P&gt;my &lt;EM&gt;people&lt;/EM&gt; table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;#!/usr/bin/env python3&lt;BR /&gt;&lt;BR /&gt;from pyspark.sql import SparkSession&lt;BR /&gt;from delta.tables import&lt;BR /&gt;&lt;BR /&gt;additional_libraries = [&lt;BR /&gt;"io.delta:delta-core_2.12:2.4.0",&lt;BR /&gt;"com.databricks:spark-xml_2.12:0.17.0",&lt;BR /&gt;]&lt;BR /&gt;&lt;BR /&gt;# Set up the SparkSession: https://docs.delta.io/latest/quick-start.html&lt;BR /&gt;spark = SparkSession.builder \&lt;BR /&gt;    .appName("Databricks Table Query") \&lt;BR /&gt;    .config("spark.??????", server_hostname) \&lt;BR /&gt;    .config("spark.??????", access_token) \&lt;BR /&gt;    .config("spark.??????", http_path) \&lt;BR /&gt;    .config("spark.jars.packages", ",".join(additional_libraries)) \&lt;BR /&gt;    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \&lt;BR /&gt;    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \&lt;BR /&gt;    .config("spark.sql.execution.arrow.pyspark.enabled", "true") \&lt;BR /&gt;    .getOrCreate()&lt;BR /&gt;&lt;BR /&gt;# Specify the catalog, schema, and table&lt;BR /&gt;catalog = "democatalog"&lt;BR /&gt;schema = "users"&lt;BR /&gt;table = "people"&lt;BR /&gt;volume = 'sajt'&lt;BR /&gt;# ???....&lt;BR /&gt;# delta_table_identifier = f"{catalog}.{schema}.{table}"&lt;BR /&gt;# deltaTable = DeltaTable.forName(spark, delta_table_identifier)&lt;BR /&gt;# df = deltaTable.toDF()&lt;BR /&gt;# df.show()&lt;/PRE&gt;&lt;P&gt;Thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 05 Jan 2024 15:37:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/how-to-query-sql-warehouse-tables-with-spark/m-p/56518#M2154</guid>
      <dc:creator>mobe</dc:creator>
      <dc:date>2024-01-05T15:37:42Z</dc:date>
    </item>
    <item>
      <title>Re: How to query sql warehouse tables with spark?</title>
      <link>https://community.databricks.com/t5/get-started-discussions/how-to-query-sql-warehouse-tables-with-spark/m-p/56979#M2186</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/97578"&gt;@mobe&lt;/a&gt;&amp;nbsp; - Please refer to the github link for more examples -&amp;nbsp;&lt;A href="https://github.com/databricks/databricks-sql-python/blob/main/examples" target="_blank"&gt;https://github.com/databricks/databricks-sql-python/blob/main/examples&lt;/A&gt;.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks,Shan&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jan 2024 17:50:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/how-to-query-sql-warehouse-tables-with-spark/m-p/56979#M2186</guid>
      <dc:creator>shan_chandra</dc:creator>
      <dc:date>2024-01-11T17:50:45Z</dc:date>
    </item>
  </channel>
</rss>

