<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Vacuum on external tables that we mount on ADLS in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27717#M19572</link>
    <description>&lt;P&gt;vacuum will actually remove not used files (without the dry run option), depending on the retention interval.&lt;/P&gt;&lt;P&gt;check &lt;A href="https://community.databricks.com/s/feed/0D58Y00009KBQasSAH" alt="https://community.databricks.com/s/feed/0D58Y00009KBQasSAH" target="_blank"&gt;this topic&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 13 Oct 2022 10:33:20 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2022-10-13T10:33:20Z</dc:date>
    <item>
      <title>Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27713#M19568</link>
      <description>&lt;P&gt;Want to know the best process of removal of files on ADLS after Optimize and Vacuum Dry run is completed&lt;/P&gt;</description>
      <pubDate>Wed, 12 Oct 2022 19:19:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27713#M19568</guid>
      <dc:creator>ravikanthranjit</dc:creator>
      <dc:date>2022-10-12T19:19:46Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27714#M19569</link>
      <description>&lt;P&gt;Credits to one of the community member from which I took the code of file existence&lt;/P&gt;</description>
      <pubDate>Wed, 12 Oct 2022 19:49:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27714#M19569</guid>
      <dc:creator>ravikanthranjit</dc:creator>
      <dc:date>2022-10-12T19:49:14Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27715#M19570</link>
      <description>&lt;P&gt;Want to know community members feedback on the below code which can work for specific table that is specified, this can be parameterized and run.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;But is this the best way to manage (delete unwanted files of Delta tables that are externally stored in ADLS). Please let me know.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;def file_exists_delete(path):
    try:
        dbutils.fs.ls(path)
        dbutils.fs.rm(path)
        print('removed the file '+path)
        return True
    except Exception as e:
        if 'java.io.FileNotFoundException' in str(e):
            return False
        else:
            raise
 
  
 #Copy in Seperate Cell
spark.sql("OPTIMIZE tbl_name")
df=spark.sql("VACUUM tbl_name RETAIN 0 HOURS DRY RUN")
&amp;nbsp;
&amp;nbsp;
#Copy In seperate Cell
df_collect=df.collect()
&amp;nbsp;
#Copy in Seperate Cell and execute
for row in df_collect:
     file_exists_delete(row[0])&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 12 Oct 2022 19:53:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27715#M19570</guid>
      <dc:creator>ravikanthranjit</dc:creator>
      <dc:date>2022-10-12T19:53:35Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27716#M19571</link>
      <description>&lt;P&gt;do not remove files from delta lake tables manually.  That is why vacuum exists.&lt;/P&gt;&lt;P&gt;It can lead to a corrupt table.&lt;/P&gt;&lt;P&gt;Why not just run a vacuum without the dry run? &lt;/P&gt;</description>
      <pubDate>Thu, 13 Oct 2022 08:38:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27716#M19571</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-10-13T08:38:57Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27717#M19572</link>
      <description>&lt;P&gt;vacuum will actually remove not used files (without the dry run option), depending on the retention interval.&lt;/P&gt;&lt;P&gt;check &lt;A href="https://community.databricks.com/s/feed/0D58Y00009KBQasSAH" alt="https://community.databricks.com/s/feed/0D58Y00009KBQasSAH" target="_blank"&gt;this topic&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 13 Oct 2022 10:33:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27717#M19572</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-10-13T10:33:20Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27718#M19573</link>
      <description>&lt;P&gt;If you have external delta files, you can use Python syntax to clean them using path&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from delta.tables import *
&amp;nbsp;
deltaTable = DeltaTable.forPath(spark, pathToTable)
&amp;nbsp;
deltaTable.vacuum()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 16 Oct 2022 18:56:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27718#M19573</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-10-16T18:56:07Z</dc:date>
    </item>
    <item>
      <title>Re: Vacuum on external tables that we mount on ADLS</title>
      <link>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27719#M19574</link>
      <description>&lt;P&gt;Hi @Ravikanth Narayanabhatla​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope all is well! &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or&lt;B&gt; mark an answer as best&lt;/B&gt;? Else please let us know if you need more help.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 20 Nov 2022 06:41:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/vacuum-on-external-tables-that-we-mount-on-adls/m-p/27719#M19574</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-11-20T06:41:57Z</dc:date>
    </item>
  </channel>
</rss>

