<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Date schema issues with pyspark dataframe creation in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23820#M16518</link>
    <description>&lt;P&gt;Hi @Kevin Kim​&amp;nbsp;, Could you please try upgrading the spark version? Also, please provide the full error logs. &lt;/P&gt;</description>
    <pubDate>Fri, 04 Nov 2022 05:06:32 GMT</pubDate>
    <dc:creator>Debayan</dc:creator>
    <dc:date>2022-11-04T05:06:32Z</dc:date>
    <item>
      <title>Date schema issues with pyspark dataframe creation</title>
      <link>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23819#M16517</link>
      <description>&lt;P&gt;I'm having some issues with creating a dataframe with a date column. Could I know what is wrong?&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
from pyspark.sql.types import DateType, FloatType
 
spark = SparkSession.builder.appName('DataFrame').getOrCreate()
schema = StructType() \
      .add("DATE", DateType(), True) \
      .add("A", FloatType(), True) \
      .add("B", FloatType(), True)
&amp;nbsp;
df = spark.read.format("csv").option("header", True).option("dateFormat", "MM/dd/yyyy").schema(schem).load(''test.csv")
&amp;nbsp;
df.show()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This is the error I'm getting:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 158.0 failed 4 times, most recent failure: Lost task 0.3 in stage 158.0 (TID 1823) (10.237.208.145 executor 5): org.apache.spark.SparkUpgradeException: [INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER] You may get a different result due to the upgrading to Spark &amp;gt;= 3.0:&lt;/P&gt;</description>
      <pubDate>Fri, 04 Nov 2022 03:34:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23819#M16517</guid>
      <dc:creator>ckwan48</dc:creator>
      <dc:date>2022-11-04T03:34:25Z</dc:date>
    </item>
    <item>
      <title>Re: Date schema issues with pyspark dataframe creation</title>
      <link>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23822#M16520</link>
      <description>&lt;P&gt;Hi @Kaniz Fatma​,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I actually changed the date format to 'M/d/Y' and it didn't throw any errors. I found in my csv file that it had dates like '3/1/2022'. Could that be the issue? But some dates also were like '12/1/2022. So I'm kind of confused.&lt;/P&gt;</description>
      <pubDate>Tue, 22 Nov 2022 17:22:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23822#M16520</guid>
      <dc:creator>ckwan48</dc:creator>
      <dc:date>2022-11-22T17:22:05Z</dc:date>
    </item>
    <item>
      <title>Re: Date schema issues with pyspark dataframe creation</title>
      <link>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23820#M16518</link>
      <description>&lt;P&gt;Hi @Kevin Kim​&amp;nbsp;, Could you please try upgrading the spark version? Also, please provide the full error logs. &lt;/P&gt;</description>
      <pubDate>Fri, 04 Nov 2022 05:06:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/date-schema-issues-with-pyspark-dataframe-creation/m-p/23820#M16518</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2022-11-04T05:06:32Z</dc:date>
    </item>
  </channel>
</rss>

