<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Pyspark serialization in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75339#M34935</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I was looking for comprehensive documentation on implementing serialization in pyspark, most of the places I have seen is all about serialization with scala. Could you point out where I can get a detailed explanation on it?&lt;/P&gt;</description>
    <pubDate>Fri, 21 Jun 2024 11:50:53 GMT</pubDate>
    <dc:creator>yusufd</dc:creator>
    <dc:date>2024-06-21T11:50:53Z</dc:date>
    <item>
      <title>Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75339#M34935</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I was looking for comprehensive documentation on implementing serialization in pyspark, most of the places I have seen is all about serialization with scala. Could you point out where I can get a detailed explanation on it?&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jun 2024 11:50:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75339#M34935</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-06-21T11:50:53Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75789#M35060</link>
      <description>&lt;P&gt;This is awesome. Thank you for replying.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to ask one more thing before we close this, in Scala-spark java serialization is default and additionally we have kryo serialization as well which is better. So these are not applicable in pyspark if i get correctly. Kindly confirm.&lt;/P&gt;</description>
      <pubDate>Wed, 26 Jun 2024 07:37:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75789#M35060</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-06-26T07:37:20Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75812#M35064</link>
      <description>&lt;P&gt;This is awesome. Thank you for replying.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to ask one more thing before we close this, in Scala-spark java serialization is default and additionally we have kryo serialization as well which is better. So, can we use them in pyspark as well?&lt;/P&gt;&lt;P&gt;Another important thing, the code below creates a sparkcontext on local, that doesnt work on databricks. When I try to change the sparkcontext arguments, i get an error , attached screenshot, how can we resolve this, ultimately i dont want to run spark locally, but on databricks. Would appreciate if you answer this.&lt;/P&gt;&lt;P&gt;Thanks for the support.&lt;/P&gt;</description>
      <pubDate>Wed, 26 Jun 2024 09:50:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/75812#M35064</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-06-26T09:50:14Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76286#M35181</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp;Could you clarify on my query? Eagerly awaiting response.&lt;/P&gt;</description>
      <pubDate>Mon, 01 Jul 2024 11:27:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76286#M35181</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-07-01T11:27:26Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76310#M35196</link>
      <description>&lt;P&gt;Thank you&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp; for the prompt reply. This clears the things and also distinguishes between spark-scala and pyspark. Appreciate your explanation. Will apply this and also share any findings based on this which will help the community!&lt;/P&gt;</description>
      <pubDate>Mon, 01 Jul 2024 13:05:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76310#M35196</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-07-01T13:05:46Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark serialization</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76311#M35197</link>
      <description>&lt;P&gt;This is great to know!&lt;/P&gt;&lt;P&gt;Thank you for the explanation.&lt;/P&gt;</description>
      <pubDate>Mon, 01 Jul 2024 13:06:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-serialization/m-p/76311#M35197</guid>
      <dc:creator>yusufd</dc:creator>
      <dc:date>2024-07-01T13:06:58Z</dc:date>
    </item>
  </channel>
</rss>

