<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66584#M33158</link>
    <description>&lt;P&gt;Hi &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/14792"&gt;@-werners-&lt;/a&gt;&amp;nbsp;, &amp;nbsp;User wants data in landing table like this only, they have some data like None as well... And can have some case when statement based on blank value and null value in next layer&lt;/P&gt;</description>
    <pubDate>Thu, 18 Apr 2024 10:48:03 GMT</pubDate>
    <dc:creator>RakeshRakesh_De</dc:creator>
    <dc:date>2024-04-18T10:48:03Z</dc:date>
    <item>
      <title>Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66574#M33152</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I am trying to read one file which having some blank value in column and we know spark convert blank value to null value during reading, how to read blank/empty value as empty value ?? tried DBR 13.2,14.3&lt;/P&gt;&lt;P&gt;I have tried all possible way but its not working&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;display(spark.read.option(&lt;/SPAN&gt;&lt;SPAN&gt;"emptyValue"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;""&lt;/SPAN&gt;&lt;SPAN&gt;).csv(&lt;/SPAN&gt;&lt;SPAN&gt;'/FileStore/tables/test2.csv'&lt;/SPAN&gt;&lt;SPAN&gt;,header=&lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;,inferSchema=&lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;display(spark.read.option("emptyValue","None").csv('/FileStore/tables/test2.csv',header=True,inferSchema=True&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;spark.read.option(&lt;/SPAN&gt;&lt;SPAN&gt;"nullValue"&lt;/SPAN&gt;&lt;SPAN&gt;, "&lt;/SPAN&gt;&lt;SPAN&gt;None"&lt;/SPAN&gt;&lt;SPAN&gt;).csv(&lt;/SPAN&gt;&lt;SPAN&gt;'/FileStore/tables/test2.csv'&lt;/SPAN&gt;&lt;SPAN&gt;,header=&lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;,inferSchema=&lt;/SPAN&gt;&lt;SPAN&gt;False&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;Sample file below as input csv&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="RakeshRakesh_De_0-1713431921922.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/7109i6CA7EBCB8EAB90D5/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="RakeshRakesh_De_0-1713431921922.png" alt="RakeshRakesh_De_0-1713431921922.png" /&gt;&lt;/span&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 18 Apr 2024 09:25:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66574#M33152</guid>
      <dc:creator>RakeshRakesh_De</dc:creator>
      <dc:date>2024-04-18T09:25:08Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66581#M33155</link>
      <description>&lt;P&gt;May I ask why you do not want null?&amp;nbsp; It is THE way to indicate a value is missing (and gives you filtering possibilities using isNull/isNotNull).&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 10:06:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66581#M33155</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-04-18T10:06:48Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66584#M33158</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/14792"&gt;@-werners-&lt;/a&gt;&amp;nbsp;, &amp;nbsp;User wants data in landing table like this only, they have some data like None as well... And can have some case when statement based on blank value and null value in next layer&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 10:48:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66584#M33158</guid>
      <dc:creator>RakeshRakesh_De</dc:creator>
      <dc:date>2024-04-18T10:48:03Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66585#M33159</link>
      <description>&lt;P&gt;.option("nullValue", "") should do the trick.&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 10:51:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66585#M33159</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-04-18T10:51:31Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66602#M33164</link>
      <description>&lt;DIV class=""&gt;&lt;P&gt;&lt;SPAN&gt;.option(nullValue, "")&lt;/SPAN&gt;&lt;BR /&gt;empty strings are interpreted as null values by default. If you set&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;nullValue&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;to anything but&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;"", like&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;"null"&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;or&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;"none", empty strings will be read as empty strings and not as null values anymore.&lt;BR /&gt;&lt;BR /&gt;Please check-&lt;BR /&gt;&lt;A href="https://stackoverflow.com/questions/64317510/read-spark-csv-with-empty-values-without-converting-to-null" target="_blank" rel="noopener"&gt;dataframe - Read spark csv with empty values without converting to null - Stack Overflow&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 18 Apr 2024 14:16:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66602#M33164</guid>
      <dc:creator>Riyakh</dc:creator>
      <dc:date>2024-04-18T14:16:13Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66605#M33165</link>
      <description>&lt;P&gt;dont quote something from stackoverflow because those are old version in spark tried.. have you tried the thing on your own to verify if this really working or not in spark3??&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 14:34:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66605#M33165</guid>
      <dc:creator>RakeshRakesh_De</dc:creator>
      <dc:date>2024-04-18T14:34:29Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66610#M33167</link>
      <description>&lt;P&gt;afaik nullValue, "" should do the trick.&amp;nbsp; But I tested myself on your example and indeed it does not work.&lt;BR /&gt;Gonna do some checking...&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 15:40:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/66610#M33167</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-04-18T15:40:35Z</dc:date>
    </item>
    <item>
      <title>Re: Spark CSV file read option to read blank/empty value from file as empty value only instead Null</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/67165#M33294</link>
      <description>&lt;P&gt;OK, after some tests:&lt;BR /&gt;The trick is in surrounding text in your csv with quotes.&amp;nbsp; Like that spark can actually make a difference between a missing value and an empty value.&amp;nbsp; Missing values are null and can only be converted to something else implicitely (by using coalesce f.e.).&lt;BR /&gt;When a column contains '', nullvalue = "''" will create an empty value and not null.&lt;BR /&gt;The same for emptyValue if you want.&lt;BR /&gt;Not sure if it is workable for you though.&lt;/P&gt;</description>
      <pubDate>Wed, 24 Apr 2024 07:43:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-csv-file-read-option-to-read-blank-empty-value-from-file/m-p/67165#M33294</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-04-24T07:43:18Z</dc:date>
    </item>
  </channel>
</rss>

