Databricks Community

ridrasura · ‎11-25-2022

Hi,

I am currently experimenting with databricks-jdbc : 2.6.29 and trying to execute batch insert queries

What is the optimal batch size recommended by Databricks for performing Batch Insert queries?

Currently it seems that values are inserted row by row even if Batch Insert is done.

Is there any recommended way to perform multi-row insert?

Note : using commons-dbutils as a wrapper on JDBC.

public static void insertRecordsBatchDbUtils() {
        String dbUrl = "jdbc:databricks://#####################";
        DataSource d = new DataSource();
        d.setURL(dbUrl);
        d.setUserID("token");
        d.setPassword("##############################");
        QueryRunner queryRunner = new QueryRunner();
        try (Connection con = d.getConnection()) {
            String sql = "INSERT INTO events.segment_debuggability_test VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
 
            int[] result = queryRunner.batch(con, sql, new Object[][]{
                    {"130", "bolt_regular_customer", "advertiser public", "DELETE", "3455642", "bolt", "org3","org","file_34","azure_blob_path_1","M-Particle","success","10/10/2022 20:00","compliance success","1"},
                    {"132", "swiggy_customer", "advertiser private", "ADD", "3455634", "swiggy", "org5","org","file_209","s3_path_134","Private Segment","success","25/10/2022 20:00","compliance success","1"},
                    {"122", "zomato_customer", "advertiser public", "ADD", "34556445", "zomato", "org45","org","file_090","s3_path_161","S3 Segment Upload","failure","13/10/2022 20:00","compliance failure : advertiser-non-compliant","1"}});
 
            System.out.println("Inserted Records :: " + result.length);
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }