<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic transform a dataframe column as concatenated string in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/52719#M29555</link>
    <description>&lt;P&gt;Hello,&lt;BR /&gt;I have a single column dataframe and I want to transform the content into a string&lt;/P&gt;&lt;P&gt;EG df=&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;abc&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;def&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;xyz&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;To&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;abc, def, xyz&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Fri, 17 Nov 2023 16:48:25 GMT</pubDate>
    <dc:creator>geertvanhove</dc:creator>
    <dc:date>2023-11-17T16:48:25Z</dc:date>
    <item>
      <title>transform a dataframe column as concatenated string</title>
      <link>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/52719#M29555</link>
      <description>&lt;P&gt;Hello,&lt;BR /&gt;I have a single column dataframe and I want to transform the content into a string&lt;/P&gt;&lt;P&gt;EG df=&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;abc&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;def&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;xyz&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;To&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="623"&gt;&lt;P&gt;abc, def, xyz&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 17 Nov 2023 16:48:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/52719#M29555</guid>
      <dc:creator>geertvanhove</dc:creator>
      <dc:date>2023-11-17T16:48:25Z</dc:date>
    </item>
    <item>
      <title>Re: transform a dataframe column as concatenated string</title>
      <link>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53637#M29850</link>
      <description>&lt;P&gt;I get following error:&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;'DataFrame' object has no attribute 'tolist'&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 23 Nov 2023 14:00:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53637#M29850</guid>
      <dc:creator>geertvanhove</dc:creator>
      <dc:date>2023-11-23T14:00:47Z</dc:date>
    </item>
    <item>
      <title>Re: transform a dataframe column as concatenated string</title>
      <link>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53639#M29852</link>
      <description>&lt;P&gt;sure:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;%python&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.functions &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; from_json, col, concat_ws&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt; &lt;SPAN&gt;*&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;schema &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;StructType&lt;/SPAN&gt;&lt;SPAN&gt;([&lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'meterDateTime'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;StringType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'meterId'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;LongType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'meteringState'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;StringType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'value'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;DoubleType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'versionTimestamp'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;StringType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'file_name'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;StringType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;False&lt;/SPAN&gt;&lt;SPAN&gt;), &lt;/SPAN&gt;&lt;SPAN&gt;StructField&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'file_modification_time'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;TimestampType&lt;/SPAN&gt;&lt;SPAN&gt;(), &lt;/SPAN&gt;&lt;SPAN&gt;False&lt;/SPAN&gt;&lt;SPAN&gt;)])&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;df &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; ( spark&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .read&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"json"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;schema&lt;/SPAN&gt;&lt;SPAN&gt;(schema)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;load&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;'&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;path_sep&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;/*/*/*/*.json'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"meterId"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; )&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;result &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;', '&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;join&lt;/SPAN&gt;&lt;SPAN&gt;(df.&lt;/SPAN&gt;&lt;SPAN&gt;tolist&lt;/SPAN&gt;&lt;SPAN&gt;())&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(result) &lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 23 Nov 2023 14:24:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53639#M29852</guid>
      <dc:creator>geertvanhove</dc:creator>
      <dc:date>2023-11-23T14:24:30Z</dc:date>
    </item>
    <item>
      <title>Re: transform a dataframe column as concatenated string</title>
      <link>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53648#M29856</link>
      <description>&lt;P&gt;Thanks, but can you give me an example for this because following still gives me an error:&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;result &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;', '&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;join&lt;/SPAN&gt;&lt;SPAN&gt;(df.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"meterId"&lt;/SPAN&gt;&lt;SPAN&gt;)).&lt;/SPAN&gt;&lt;SPAN&gt;tolist&lt;/SPAN&gt;&lt;SPAN&gt;())&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 23 Nov 2023 15:01:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/transform-a-dataframe-column-as-concatenated-string/m-p/53648#M29856</guid>
      <dc:creator>geertvanhove</dc:creator>
      <dc:date>2023-11-23T15:01:05Z</dc:date>
    </item>
  </channel>
</rss>

