<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Show Vacuum operation result (files deleted) without DRY RUN in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26358#M18434</link>
    <description>&lt;PRE&gt;&lt;CODE&gt;SELECT * FROM (DESCRIBE HISTORY table)x WHERE operation IN ('VACUUM END', 'VACUUM START');&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;that gives us required information:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="imagen.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2058iE0915EC36145561F/image-size/large?v=v2&amp;amp;px=999" role="button" title="imagen.png" alt="imagen.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 07 Mar 2022 10:44:07 GMT</pubDate>
    <dc:creator>Hubert-Dudek</dc:creator>
    <dc:date>2022-03-07T10:44:07Z</dc:date>
    <item>
      <title>Show Vacuum operation result (files deleted) without DRY RUN</title>
      <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26356#M18432</link>
      <description>&lt;P&gt;Hi, I'm runing some scheduled vacuum jobs and would like to know how many files were deleted without making all the computation twice, with and without DRY RUN, is there a way to accomplish this?&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;</description>
      <pubDate>Mon, 07 Mar 2022 01:09:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26356#M18432</guid>
      <dc:creator>alejandrofm</dc:creator>
      <dc:date>2022-03-07T01:09:33Z</dc:date>
    </item>
    <item>
      <title>Re: Show Vacuum operation result (files deleted) without DRY RUN</title>
      <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26357#M18433</link>
      <description>&lt;P&gt;Hi @Alejandro Martinez​&amp;nbsp;:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;I don't think we have any such command to get the statistics before vacuum and after vacuum.&lt;/P&gt;&lt;P&gt;&amp;nbsp;Atleast I haven't come across any.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;If you want to capture more details, may be you can write a function to capture the statistics as below.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Data files size:&lt;/P&gt;&lt;P&gt;&amp;nbsp;Data files count:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Before:&lt;/P&gt;&lt;P&gt;	var getDataFileSize = 0	&lt;/P&gt;&lt;P&gt;	val getDataFileCount = dbutils.fs.ls(&amp;lt;Your Table Path&amp;gt;").toList.size&lt;/P&gt;&lt;P&gt;	dbutils.fs.ls(&amp;lt;Your Table Path&amp;gt;)&lt;/P&gt;&lt;P&gt;	&amp;nbsp;.foreach&lt;/P&gt;&lt;P&gt;	&amp;nbsp;{&lt;/P&gt;&lt;P&gt;		&amp;nbsp;&amp;nbsp;file =&amp;gt;&lt;/P&gt;&lt;P&gt;		&amp;nbsp;&amp;nbsp;getDataFileSize = getDataFileSize + file.size&lt;/P&gt;&lt;P&gt;	&amp;nbsp;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;After:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; Repeat above&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Lets see if other community members have better ideas on this.&lt;/P&gt;</description>
      <pubDate>Mon, 07 Mar 2022 05:51:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26357#M18433</guid>
      <dc:creator>RKNutalapati</dc:creator>
      <dc:date>2022-03-07T05:51:23Z</dc:date>
    </item>
    <item>
      <title>Re: Show Vacuum operation result (files deleted) without DRY RUN</title>
      <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26358#M18434</link>
      <description>&lt;PRE&gt;&lt;CODE&gt;SELECT * FROM (DESCRIBE HISTORY table)x WHERE operation IN ('VACUUM END', 'VACUUM START');&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;that gives us required information:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="imagen.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2058iE0915EC36145561F/image-size/large?v=v2&amp;amp;px=999" role="button" title="imagen.png" alt="imagen.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 07 Mar 2022 10:44:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26358#M18434</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-03-07T10:44:07Z</dc:date>
    </item>
    <item>
      <title>Re: Show Vacuum operation result (files deleted) without DRY RUN</title>
      <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26359#M18435</link>
      <description>&lt;P&gt;Thank you! Not the solution I was looking for, but it seems nothing better exists...yet so going for that.&lt;/P&gt;&lt;P&gt;Thanks!!!&lt;/P&gt;</description>
      <pubDate>Mon, 07 Mar 2022 14:13:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26359#M18435</guid>
      <dc:creator>alejandrofm</dc:creator>
      <dc:date>2022-03-07T14:13:20Z</dc:date>
    </item>
    <item>
      <title>Re: Show Vacuum operation result (files deleted) without DRY RUN</title>
      <link>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26360#M18436</link>
      <description>&lt;P&gt;We have to enable logging to capture the logs for vacuum.&lt;/P&gt;&lt;P&gt;spark.conf.set("spark.databricks.delta.vacuum.logging.enabled","true")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 07 Mar 2022 14:22:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/show-vacuum-operation-result-files-deleted-without-dry-run/m-p/26360#M18436</guid>
      <dc:creator>RKNutalapati</dc:creator>
      <dc:date>2022-03-07T14:22:17Z</dc:date>
    </item>
  </channel>
</rss>

