cancel
Showing results forย 
Search instead forย 
Did you mean:ย 
Data Engineering
cancel
Showing results forย 
Search instead forย 
Did you mean:ย 

How to get the total number of records in a delta table from the stats, without querying it?

User16869510359
Esteemed Contributor
 
1 ACCEPTED SOLUTION

Accepted Solutions

User16869510359
Esteemed Contributor

The below code can be used to get the number of records in a Delta table without querying it

%scala
import com.databricks.sql.transaction.tahoe.DeltaLog
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
 
val deltaTablePath = "location of the table"
 
def getRecordCount(deltaTablePath: String): Any = {
  val snapshot = DeltaLog.forTable(spark, new Path(deltaTablePath)).update()
  val statsSchema = snapshot.statsSchema
  var files = snapshot.allFiles.withColumn("stats", from_json($"stats", statsSchema))
  val dfWithNumRecords = files.select($"path", $"stats.numRecords".as("numRecords"))
  val totalCount = dfWithNumRecords.select(sum($"numRecords")).first().get(0)
  return totalCount
 }
println(getRecordCount(deltaTablePath))

View solution in original post

1 REPLY 1

User16869510359
Esteemed Contributor

The below code can be used to get the number of records in a Delta table without querying it

%scala
import com.databricks.sql.transaction.tahoe.DeltaLog
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
 
val deltaTablePath = "location of the table"
 
def getRecordCount(deltaTablePath: String): Any = {
  val snapshot = DeltaLog.forTable(spark, new Path(deltaTablePath)).update()
  val statsSchema = snapshot.statsSchema
  var files = snapshot.allFiles.withColumn("stats", from_json($"stats", statsSchema))
  val dfWithNumRecords = files.select($"path", $"stats.numRecords".as("numRecords"))
  val totalCount = dfWithNumRecords.select(sum($"numRecords")).first().get(0)
  return totalCount
 }
println(getRecordCount(deltaTablePath))

Welcome to Databricks Community: Lets learn, network and celebrate together

Join our fast-growing data practitioner and expert community of 80K+ members, ready to discover, help and collaborate together while making meaningful connections. 

Click here to register and join today! 

Engage in exciting technical discussions, join a group with your peers and meet our Featured Members.