<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic JDBC driver CPU consumption in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132369#M49445</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I am using JDBC driver to execute an insert statement with several thousand of rows (~4MB). It takes several seconds to complete and for some reason consumes 1 full CPU core for it.&lt;/P&gt;&lt;P&gt;It seems like a lot of the time is spent in this method:&lt;/P&gt;&lt;P&gt;com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.stripCatalogName&lt;/P&gt;&lt;P&gt;Sample stack trace:&lt;/P&gt;&lt;LI-CODE lang="java"&gt;void java.util.regex.Pattern.compile()
void java.util.regex.Pattern.&amp;lt;init&amp;gt;(String, int)
Pattern java.util.regex.Pattern.compile(String, int)
String com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.RemoveCatalogFromQueryStringInternal(String, String, ILogger)
String com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.stripCatalogName(String, ILogger, HiveJDBCSettings, IWarningListener)
void com.databricks.client.hivecommon.dataengine.HiveJDBCNativeQueryExecutor.&amp;lt;init&amp;gt;(ILogger, IHiveClient, HiveJDBCStatement, String, HiveJDBCCommonConnection, boolean, ConnSettingRequestMap, boolean, boolean)
IQueryExecutor com.databricks.client.hivecommon.dataengine.HiveJDBCDataEngine.prepare(String)
void com.databricks.client.jdbc.common.SPreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.jdbc.jdbc41.S41PreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.jdbc.jdbc42.S42PreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.hivecommon.jdbc42.Hive42PreparedStatement.&amp;lt;init&amp;gt;(String, HiveJDBCStatement, SConnection, int)
SPreparedStatement com.databricks.client.spark.jdbc.SparkJDBCObjectFactory.createPreparedStatement(String, IStatement, SConnection, int)
IJDBCPreparedStatement com.databricks.client.jdbc.common.JDBCObjectFactory.newPreparedStatement(String, IStatement, SConnection, int)
IJDBCPreparedStatement com.databricks.client.jdbc.common.SConnection$6.create(IStatement)
IJDBCStatement com.databricks.client.jdbc.common.SConnection$6.create(IStatement)
IJDBCStatement com.databricks.client.jdbc.common.SConnection$StatementCreator.create()
IJDBCPreparedStatement com.databricks.client.jdbc.common.SConnection.prepareStatement(String, int, int)
PreparedStatement com.databricks.client.jdbc.common.SConnection.prepareStatement(String, int, int)&lt;/LI-CODE&gt;&lt;P&gt;How can this be fixed so it would not be CPU bound?&lt;/P&gt;&lt;P&gt;Driver version:&lt;/P&gt;&lt;DIV&gt;&lt;PRE&gt;&lt;SPAN&gt;com.databricks:databricks-jdbc:2.6.40&lt;/SPAN&gt;&lt;/PRE&gt;&lt;/DIV&gt;</description>
    <pubDate>Thu, 18 Sep 2025 07:38:18 GMT</pubDate>
    <dc:creator>ivni</dc:creator>
    <dc:date>2025-09-18T07:38:18Z</dc:date>
    <item>
      <title>JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132369#M49445</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I am using JDBC driver to execute an insert statement with several thousand of rows (~4MB). It takes several seconds to complete and for some reason consumes 1 full CPU core for it.&lt;/P&gt;&lt;P&gt;It seems like a lot of the time is spent in this method:&lt;/P&gt;&lt;P&gt;com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.stripCatalogName&lt;/P&gt;&lt;P&gt;Sample stack trace:&lt;/P&gt;&lt;LI-CODE lang="java"&gt;void java.util.regex.Pattern.compile()
void java.util.regex.Pattern.&amp;lt;init&amp;gt;(String, int)
Pattern java.util.regex.Pattern.compile(String, int)
String com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.RemoveCatalogFromQueryStringInternal(String, String, ILogger)
String com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.stripCatalogName(String, ILogger, HiveJDBCSettings, IWarningListener)
void com.databricks.client.hivecommon.dataengine.HiveJDBCNativeQueryExecutor.&amp;lt;init&amp;gt;(ILogger, IHiveClient, HiveJDBCStatement, String, HiveJDBCCommonConnection, boolean, ConnSettingRequestMap, boolean, boolean)
IQueryExecutor com.databricks.client.hivecommon.dataengine.HiveJDBCDataEngine.prepare(String)
void com.databricks.client.jdbc.common.SPreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.jdbc.jdbc41.S41PreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.jdbc.jdbc42.S42PreparedStatement.&amp;lt;init&amp;gt;(String, IStatement, SConnection, int)
void com.databricks.client.hivecommon.jdbc42.Hive42PreparedStatement.&amp;lt;init&amp;gt;(String, HiveJDBCStatement, SConnection, int)
SPreparedStatement com.databricks.client.spark.jdbc.SparkJDBCObjectFactory.createPreparedStatement(String, IStatement, SConnection, int)
IJDBCPreparedStatement com.databricks.client.jdbc.common.JDBCObjectFactory.newPreparedStatement(String, IStatement, SConnection, int)
IJDBCPreparedStatement com.databricks.client.jdbc.common.SConnection$6.create(IStatement)
IJDBCStatement com.databricks.client.jdbc.common.SConnection$6.create(IStatement)
IJDBCStatement com.databricks.client.jdbc.common.SConnection$StatementCreator.create()
IJDBCPreparedStatement com.databricks.client.jdbc.common.SConnection.prepareStatement(String, int, int)
PreparedStatement com.databricks.client.jdbc.common.SConnection.prepareStatement(String, int, int)&lt;/LI-CODE&gt;&lt;P&gt;How can this be fixed so it would not be CPU bound?&lt;/P&gt;&lt;P&gt;Driver version:&lt;/P&gt;&lt;DIV&gt;&lt;PRE&gt;&lt;SPAN&gt;com.databricks:databricks-jdbc:2.6.40&lt;/SPAN&gt;&lt;/PRE&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 18 Sep 2025 07:38:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132369#M49445</guid>
      <dc:creator>ivni</dc:creator>
      <dc:date>2025-09-18T07:38:18Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132376#M49447</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/185360"&gt;@ivni&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Yes, that method could be CPU intensive. According to driver's docs it removes catalog name from query statement. But it doing this via regex patterns - this is heavy operation from CPU perspective, especially if you have a lot of complex queries.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="szymon_dybczak_0-1758181838729.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/20087i471E19315FD7FE52/image-size/medium?v=v2&amp;amp;px=400" role="button" title="szymon_dybczak_0-1758181838729.png" alt="szymon_dybczak_0-1758181838729.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;What you can try to do is to add &lt;STRONG&gt;useNativeQuery=1&amp;nbsp;&lt;/STRONG&gt;to your connection string. With that setting, the driver passes the SQL queries verbatim to Databricks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 18 Sep 2025 07:52:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132376#M49447</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-09-18T07:52:50Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132548#M49540</link>
      <description>&lt;P&gt;Thank you for the suggestion, but&amp;nbsp;&lt;STRONG&gt;useNativeQuery=1&amp;nbsp;&lt;/STRONG&gt;doesn't seem to reduce CPU usage. Usage example:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;        String sql = Files.readString(Path.of("insert.sql"));
        String url = "jdbc:databricks://host.cloud.databricks.com:443/data;connschema=schema;transportMode=http;ssl=1;AuthMech=3;httpPath=/path;useNativeQuery=1";

        Properties props = new Properties();
        props.setProperty("user", "token");
        props.setProperty("password", "&amp;lt;token&amp;gt;");
        props.setProperty("useNativeQuery", "1");
        Driver driver = DriverManager.getDriver(url);
        try (Connection conn = driver.connect(url, props);
             Statement st = conn.createStatement()) {
            st.execute(sql);
        }&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;Any other suggestions?&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 11:15:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132548#M49540</guid>
      <dc:creator>ivni</dc:creator>
      <dc:date>2025-09-19T11:15:05Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132549#M49541</link>
      <description>&lt;P&gt;Hi,&amp;nbsp;&lt;/P&gt;&lt;P&gt;You can also try to disable this StripCatalogName=0 in your jdbc connection string.&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 11:49:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132549#M49541</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-09-19T11:49:31Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132551#M49542</link>
      <description>&lt;P&gt;&lt;SPAN&gt;StripCatalogName=0 doesn't seem to have effect either.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 12:01:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132551#M49542</guid>
      <dc:creator>ivni</dc:creator>
      <dc:date>2025-09-19T12:01:37Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132554#M49544</link>
      <description>&lt;P&gt;Ok, one last thing. Try to add explicitly to jdbc connection string information about catalog and connSchema&lt;/P&gt;&lt;P&gt;ConnCatalog=your_catalog;ConnSchema=your_schema;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 12:04:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132554#M49544</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-09-19T12:04:02Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132561#M49550</link>
      <description>&lt;P&gt;So I guess something like this?&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;jdbc:databricks://host.cloud.databricks.com:443;httpPath=/path;ConnCatalog=data;ConnSchema=schema;transportMode=http;ssl=1;AuthMech=3;useNativeQuery=1;StripCatalogName=0&lt;/LI-CODE&gt;&lt;P&gt;These measures don't seem to influence CPU consumption.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 12:51:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132561#M49550</guid>
      <dc:creator>ivni</dc:creator>
      <dc:date>2025-09-19T12:51:03Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132562#M49551</link>
      <description>&lt;P&gt;Could you once again check stack trace then? In previous message you wrote that major time is spent at below method:&lt;/P&gt;&lt;P&gt;com.databricks.client.hivecommon.utils.HiveCommonQueryTranslationUtils.stripCatalogName&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;How it looks like now?&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 13:04:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132562#M49551</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-09-19T13:04:42Z</dc:date>
    </item>
    <item>
      <title>Re: JDBC driver CPU consumption</title>
      <link>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132570#M49554</link>
      <description>&lt;P&gt;It is still there:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="ivni_0-1758291599928.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/20119i7B944074137FAAF6/image-size/medium?v=v2&amp;amp;px=400" role="button" title="ivni_0-1758291599928.png" alt="ivni_0-1758291599928.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Sep 2025 14:22:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/jdbc-driver-cpu-consumption/m-p/132570#M49554</guid>
      <dc:creator>ivni</dc:creator>
      <dc:date>2025-09-19T14:22:56Z</dc:date>
    </item>
  </channel>
</rss>

