<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Downloading and storing a PDF file to FileStore not working in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36170#M26067</link>
    <description>&lt;P&gt;This worked, thanks.&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 29 Jun 2023 14:14:03 GMT</pubDate>
    <dc:creator>msa_2j212</dc:creator>
    <dc:date>2023-06-29T14:14:03Z</dc:date>
    <item>
      <title>Downloading and storing a PDF file to FileStore not working</title>
      <link>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36160#M26062</link>
      <description>&lt;P&gt;I'm trying to download a PDF file and store it in FileStore using this code in a Notebook:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;with open('/dbfs/FileStore/file.pdf', 'wb') as f:
    f.write(requests.get('https://url.com/file.pdf').content)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;But I'm getting this error:&lt;/P&gt;&lt;P&gt;&lt;FONT face="courier new,courier"&gt;&lt;SPAN class=""&gt;FileNotFoundError&lt;/SPAN&gt;&lt;SPAN&gt;: [Errno 2] No such file or directory&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;What am I doing wrong?&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 29 Jun 2023 10:47:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36160#M26062</guid>
      <dc:creator>msa_2j212</dc:creator>
      <dc:date>2023-06-29T10:47:26Z</dc:date>
    </item>
    <item>
      <title>Re: Downloading and storing a PDF file to FileStore not working</title>
      <link>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36165#M26064</link>
      <description>&lt;P&gt;Might be easier to use curl commnad .. in a notebook you can run as shell command or python to first load the file into local driver temp storage&lt;/P&gt;&lt;PRE&gt;%sh curl https://url.com/file.pdf --output /tmp/file.pdf&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;or in python&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;import&lt;/SPAN&gt; &lt;SPAN class=""&gt;urllib&lt;/SPAN&gt;
&lt;SPAN class=""&gt;urllib&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;request&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;urlretrieve&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"https://url.com/file.pdf"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"/tmp/file.pdf.csv"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;Then move the file to DBFS&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;dbutils&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;fs&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;mv&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"file:/tmp/file.pdf"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"dbfs:/Filestore/file.pdf&lt;/SPAN&gt;&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 29 Jun 2023 13:09:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36165#M26064</guid>
      <dc:creator>Brian2</dc:creator>
      <dc:date>2023-06-29T13:09:37Z</dc:date>
    </item>
    <item>
      <title>Re: Downloading and storing a PDF file to FileStore not working</title>
      <link>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36170#M26067</link>
      <description>&lt;P&gt;This worked, thanks.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 29 Jun 2023 14:14:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/downloading-and-storing-a-pdf-file-to-filestore-not-working/m-p/36170#M26067</guid>
      <dc:creator>msa_2j212</dc:creator>
      <dc:date>2023-06-29T14:14:03Z</dc:date>
    </item>
  </channel>
</rss>

