<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: DataFrame to CSV write has issues due to multiple commas inside an row value in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66384#M7093</link>
    <description>&lt;P&gt;&lt;SPAN&gt;Hello,&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;You will need to use some CSV configuration parameters&lt;/P&gt;&lt;P&gt;follow the documentation below&lt;BR /&gt;&lt;A href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html" target="_blank"&gt;https://spark.apache.org/docs/latest/sql-data-sources-csv.html&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 16 Apr 2024 19:05:00 GMT</pubDate>
    <dc:creator>ThomazRossito</dc:creator>
    <dc:date>2024-04-16T19:05:00Z</dc:date>
    <item>
      <title>DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66080#M7087</link>
      <description>&lt;P&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;Hi all&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;iam&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;w&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;k&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;n a&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;J&lt;/SPAN&gt;&lt;SPAN class=""&gt;S&lt;/SPAN&gt;&lt;SPAN class=""&gt;O&lt;/SPAN&gt;&lt;SPAN class=""&gt;N&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;f&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;l&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;w&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;b&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;C&lt;/SPAN&gt;&lt;SPAN class=""&gt;S&lt;/SPAN&gt;&lt;SPAN class=""&gt;V&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;f&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;iam&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;f&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;l&lt;/SPAN&gt;&lt;SPAN class=""&gt;l&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;u&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;w&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;J&lt;/SPAN&gt;&lt;SPAN class=""&gt;S&lt;/SPAN&gt;&lt;SPAN class=""&gt;O&lt;/SPAN&gt;&lt;SPAN class=""&gt;N&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;b&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;p&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;a&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;l&lt;/SPAN&gt;&lt;SPAN class=""&gt;u&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;l&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;m&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;d&lt;/SPAN&gt;&lt;SPAN class=""&gt;u&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;g&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;t&lt;/SPAN&gt;&lt;SPAN class=""&gt;h&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;v&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;i&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;n&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;p&lt;/SPAN&gt;&lt;SPAN class=""&gt;r&lt;/SPAN&gt;&lt;SPAN class=""&gt;o&lt;/SPAN&gt;&lt;SPAN class=""&gt;c&lt;/SPAN&gt;&lt;SPAN class=""&gt;e&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;s&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;i tried several methods to modify data and tried to escape the comma comes under the row value but it doesent works at the same time i should make sure the JSON code has to be in correct syntax as that will be used in some other place within the project&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;the sample pyspark code and data that i worked with&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType


schema = StructType([
    StructField("IDcol1", IntegerType(), True),
    StructField("IDcol2", IntegerType(), True),
    StructField("AdditionalRequestParameters", StringType(), True),
    StructField("RequestURL", StringType(), True)
])

data = [
    (1, 2, "{'Locale':'en','KnowledgeType':[{'Name':'IndustryKnowledge'},{'Name':'KnowledgeIndustry'}],'SegmentCountry':[{'Country':'US','IndustrySegment':'IP'}],'Setversion':'VersionValue','Flags':null,'ScalingID':0,'VersionInfo':{'PracticeSubType':'SubPracticeValue','Version':'VersionValue'}}", 'https://abc/something/API/2021v3/nothing/data'),
    
]

df = spark.createDataFrame(data, schema=schema)
df.display()&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;by displaying as an dataframe its no doubt it works fine&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="sai_sathya_0-1712850570456.png" style="width: 595px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/7024i3FDF7209F28257AD/image-dimensions/595x119/is-moderation-mode/true?v=v2" width="595" height="119" role="button" title="sai_sathya_0-1712850570456.png" alt="sai_sathya_0-1712850570456.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;and that is how the expected data should be but while writing it into an CSV file in my ADLS it misbehaves and creates an new column for every comma that comes under the JSON column&lt;/P&gt;&lt;P&gt;anyway iam unable to read the csv data i tried display() and show() and when i look into the csv file that generated from the container this is what i was able to find&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="sai_sathya_1-1712850991923.png" style="width: 763px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/7025iF3C842997B8F8087/image-dimensions/763x31/is-moderation-mode/true?v=v2" width="763" height="31" role="button" title="sai_sathya_1-1712850991923.png" alt="sai_sathya_1-1712850991923.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;please help me how to handle this commas . Thnaks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 11 Apr 2024 16:06:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66080#M7087</guid>
      <dc:creator>sai_sathya</dc:creator>
      <dc:date>2024-04-11T16:06:25Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66213#M7088</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I managed to parse using the "json_tuple" function&lt;/P&gt;&lt;P&gt;There are other functions that can help&lt;BR /&gt;schema_of_json&lt;BR /&gt;get_json_object&lt;BR /&gt;from_json&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;dd = &lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;df.select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;json_tuple&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"AdditionalRequestParameters"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"Locale"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Setversion"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Flags"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"ScalingID"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"VersionInfo"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.toDF&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Locale"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Setversion"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Flags"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"ScalingID"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"VersionInfo"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"*"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; json_tuple&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"VersionInfo"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"PracticeSubType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Version"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.drop&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"VersionInfo"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.toDF&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Locale"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Setversion"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Flags"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"ScalingID"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"PracticeSubType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Version"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.withColumn&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; regexp_replace&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"[\[\]]"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;""&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"*"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; json_tuple&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"Country"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"IndustrySegment"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.drop&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"SegmentCountry"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.toDF&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Locale"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Setversion"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Flags"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"ScalingID"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"PracticeSubType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Version"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Country"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"IndustrySegment"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.withColumn&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; regexp_replace&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"[\[\]]"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;""&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"*"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; json_tuple&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt; &lt;SPAN&gt;"Name"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Name"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.drop&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"KnowledgeType"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.toDF&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Locale"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Setversion"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Flags"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"ScalingID"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"PracticeSubType"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Version"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Country"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"IndustrySegment"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Name"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt; &lt;SPAN&gt;"Name1"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;dd.display()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Sun, 14 Apr 2024 14:39:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66213#M7088</guid>
      <dc:creator>ThomazRossito</dc:creator>
      <dc:date>2024-04-14T14:39:17Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66214#M7089</link>
      <description>&lt;P&gt;OHH ,thankyou for your time, that was working well for but that was extracting the json data from the json column which is ok but our real issue is when we try to write the dataframe into an csv we get values from&amp;nbsp;AdditionalRequestParameters column that gets splitted into many columns due to comma contains inside the data and finally instead of having 4 columns while reading the csv file we get many number of columns . please help me if any technique can be used to handle this issue . thanks&lt;/P&gt;</description>
      <pubDate>Sun, 14 Apr 2024 16:19:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66214#M7089</guid>
      <dc:creator>sai_sathya</dc:creator>
      <dc:date>2024-04-14T16:19:48Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66320#M7090</link>
      <description>&lt;P&gt;Hi Sai,&lt;/P&gt;
&lt;P&gt;I assume that the problem comes not from the PySpark, but from Excel.&lt;/P&gt;
&lt;P&gt;I tried to reproduce the error and didn't find the way - that a good thing, right ? Please try the following :&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="python"&gt;df.write.format("csv").save("/Volumes/&amp;lt;my_catalog_name&amp;gt;/&amp;lt;my_schema_name&amp;gt;/&amp;lt;my_volume_name&amp;gt;/&amp;lt;file_name&amp;gt;")&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;(&lt;A href="https://docs.databricks.com/en/connect/unity-catalog/volumes.html" target="_self"&gt;how to create a Volume&lt;/A&gt; ; Volumes have a lot of value, but if you prefer to use a path in your object storage it's also ok)&lt;/P&gt;
&lt;DIV&gt;Once the file is downloaded locally, open it in Excel. It will not recognize the columns. Click on the Data tab in the ribbon and find the "Text to Columns" button. In the opened dialog : Delimited -&amp;gt; Comma -&amp;gt; Finish.&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Hope it helps,&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Best,&lt;/DIV&gt;
&lt;DIV&gt;Artem&lt;/DIV&gt;</description>
      <pubDate>Tue, 16 Apr 2024 11:58:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66320#M7090</guid>
      <dc:creator>artsheiko</dc:creator>
      <dc:date>2024-04-16T11:58:45Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66357#M7091</link>
      <description>&lt;P&gt;sounds an cool option but here we are leveraging Azure Data Lake Storage as an medium of storage and we directly write the data into the preferred location within ADLS so thats where things gets complicated , any idea?&lt;/P&gt;</description>
      <pubDate>Tue, 16 Apr 2024 14:39:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66357#M7091</guid>
      <dc:creator>sai_sathya</dc:creator>
      <dc:date>2024-04-16T14:39:22Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66371#M7092</link>
      <description>&lt;P&gt;Regardless on how you create the file (however, take a look at Volumes, I'm 100% you'll see the value), please try to proceed with an approach with Excel I described above&lt;/P&gt;</description>
      <pubDate>Tue, 16 Apr 2024 16:10:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66371#M7092</guid>
      <dc:creator>artsheiko</dc:creator>
      <dc:date>2024-04-16T16:10:37Z</dc:date>
    </item>
    <item>
      <title>Re: DataFrame to CSV write has issues due to multiple commas inside an row value</title>
      <link>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66384#M7093</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Hello,&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;You will need to use some CSV configuration parameters&lt;/P&gt;&lt;P&gt;follow the documentation below&lt;BR /&gt;&lt;A href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html" target="_blank"&gt;https://spark.apache.org/docs/latest/sql-data-sources-csv.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 16 Apr 2024 19:05:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/dataframe-to-csv-write-has-issues-due-to-multiple-commas-inside/m-p/66384#M7093</guid>
      <dc:creator>ThomazRossito</dc:creator>
      <dc:date>2024-04-16T19:05:00Z</dc:date>
    </item>
  </channel>
</rss>

