package com.example.databricks;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
public class DatabricksJDBCApp {
public static void main(String[] args) {
// Initialize Spark Session
SparkSession spark = SparkSession.builder()
.appName("Databricks JDBC Example")
.master("local")
.getOrCreate();
string pwd = "XXXXXXXXXXXXXXXXXXXXXXXX";
string warehouseId = "XXXXXXXXXXXXXXX";
String host = "XXXXXXXXXXXXXXXXXXX"
// JDBC URL to connect to Databricks
String url = "jdbc:databricks://host:443;" +
"transportMode=http;ssl=1;" +
"HttpPath=/sql/1.0/warehouses/warehouseId;" +
"UID=token;PWD="pwd;
// Specify schema and table
String dbTable = "framework_databricks.test_table3";
// JDBC Driver Class
String driver = "com.databricks.client.jdbc.Driver";
Dataset<Row> databricksDF = spark.read()
.format("jdbc")
.option("url", url)
.option("dbtable", dbTable)
.option("driver", driver)
.load();
// Show schema and data
databricksDF.printSchema();
System.out.println("Row Count: " + databricksDF.count());
databricksDF.show();
// Stop Spark session
spark.stop();
}
}
spark version :: 3.5.0
databricks jdbc version :: 2.6.40
Actual output ::
Row Count: 2
+---+----+
| id|name|
+---+----+
| id|name|
| id|name|
+---+----+
Expected output ::
Row Count: 2
+----+-----+
| id |name |
+----+-----+
| one|Alice|
| two|Bob |
+----+-----+
so i was trying to integrate databricks in my java code above is code snippet used for jdbc connection while trying to read data from databricks table i am facing issue where df is returning data as header only have provided expected and actual outputs of df after investigating alittle on google tried changing dbtable name with and without schema name but still same issue is there