<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: copy files from azure to s3 in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12559#M7359</link>
    <description>&lt;P&gt;Agreed, Azure Data Factory is definitely a better approach if all you are wanting to do is copy files to/from Azure Storage.&lt;/P&gt;</description>
    <pubDate>Wed, 11 Jan 2023 19:52:09 GMT</pubDate>
    <dc:creator>BigMF</dc:creator>
    <dc:date>2023-01-11T19:52:09Z</dc:date>
    <item>
      <title>copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12557#M7357</link>
      <description>&lt;P&gt;I am trying to copy files from azure to s3. I've created a solution by comparing file lists and copy manually to a temp file and upload. However, I just found AutoLoader and I would like to use that &lt;A href="https://docs.databricks.com/ingestion/auto-loader/index.html" target="test_blank"&gt;https://docs.databricks.com/ingestion/auto-loader/index.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The problem is, it is not clear from the documentation how to pass to the streamReader the azure blob storage credentials: tenant_id, container, account_url, client_id, client_secret and the azure_path. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What is the API to do that?&lt;/P&gt;</description>
      <pubDate>Wed, 11 Jan 2023 17:46:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12557#M7357</guid>
      <dc:creator>chanansh</dc:creator>
      <dc:date>2023-01-11T17:46:12Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12558#M7358</link>
      <description>&lt;P&gt;Copying files using a data factory can be cheaper and faster.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you want access to Blob Storage / Azure Data Lake storage, you can also make a permanent mount in databricks. I described how to do it here &lt;A href="https://community.databricks.com/s/feed/0D53f00001eQGOHCA4" alt="https://community.databricks.com/s/feed/0D53f00001eQGOHCA4" target="_blank"&gt;https://community.databricks.com/s/feed/0D53f00001eQG.OHCA4&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 11 Jan 2023 18:04:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12558#M7358</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-11T18:04:32Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12559#M7359</link>
      <description>&lt;P&gt;Agreed, Azure Data Factory is definitely a better approach if all you are wanting to do is copy files to/from Azure Storage.&lt;/P&gt;</description>
      <pubDate>Wed, 11 Jan 2023 19:52:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12559#M7359</guid>
      <dc:creator>BigMF</dc:creator>
      <dc:date>2023-01-11T19:52:09Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12560#M7360</link>
      <description>&lt;P&gt;I am not an azure user. I only have read permissions from the blob.​&lt;/P&gt;</description>
      <pubDate>Wed, 11 Jan 2023 19:55:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12560#M7360</guid>
      <dc:creator>chanansh</dc:creator>
      <dc:date>2023-01-11T19:55:17Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12561#M7361</link>
      <description>&lt;P&gt;I need it to update all the time so I need it to keep working continuously. Anyway I only have read permissions for the azure blob.​&lt;/P&gt;</description>
      <pubDate>Wed, 11 Jan 2023 19:56:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12561#M7361</guid>
      <dc:creator>chanansh</dc:creator>
      <dc:date>2023-01-11T19:56:23Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12562#M7362</link>
      <description>&lt;P&gt;ADF can be scheduled to run as often as needed or triggered based on files showing up in a container. However, based on your other statement below, it appears you are not working in an Azure environment and only have access to the storage container. I guess you could use Databricks to copy file but it seems wasteful. An analogy I would use is using a metal toolbox full of tools that are very useful for specific things and you use the box to hammer a nail in.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2023 02:18:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12562#M7362</guid>
      <dc:creator>BigMF</dc:creator>
      <dc:date>2023-01-12T02:18:33Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12563#M7363</link>
      <description>&lt;P&gt;Autoloader is the solution for me but I don't know how to set credentials ​&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2023 05:58:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12563#M7363</guid>
      <dc:creator>chanansh</dc:creator>
      <dc:date>2023-01-12T05:58:53Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12564#M7364</link>
      <description>&lt;P&gt;You can also use AWS Data Pipeline.&lt;/P&gt;&lt;P&gt;What I have read is that we are talking about a plain copy, no transformations.&lt;/P&gt;&lt;P&gt;In that case firing up a spark cluster is way too much overhead, and way to expensive.&lt;/P&gt;&lt;P&gt;If you lack permissions to connect to the azure blob, I would try to fix that and not trying to find a way around by using Databricks.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2023 10:35:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12564#M7364</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-01-12T10:35:21Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12565#M7365</link>
      <description>&lt;P&gt;I want to use AutoLoader. I just need to know how to pass credentials to the StreamReader &lt;/P&gt;</description>
      <pubDate>Sun, 15 Jan 2023 11:36:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12565#M7365</guid>
      <dc:creator>chanansh</dc:creator>
      <dc:date>2023-01-15T11:36:12Z</dc:date>
    </item>
    <item>
      <title>Re: copy files from azure to s3</title>
      <link>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12566#M7366</link>
      <description>&lt;P&gt;Just use tools like Goodsync and Gs Richcopy 360 to copy directly from blob to S3, I think you will never face problems like that ​&lt;/P&gt;</description>
      <pubDate>Fri, 20 Jan 2023 13:06:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/copy-files-from-azure-to-s3/m-p/12566#M7366</guid>
      <dc:creator>Falokun</dc:creator>
      <dc:date>2023-01-20T13:06:27Z</dc:date>
    </item>
  </channel>
</rss>

