<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: CHAR/VARCHAR fields sometimes show as STRING in a view in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/char-varchar-fields-sometimes-show-as-string-in-a-view/m-p/121669#M46515</link>
    <description>&lt;P&gt;In Spark SQL&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;string&amp;nbsp;is the&amp;nbsp;canonical type&amp;nbsp;for all textual data.&lt;/LI&gt;&lt;LI&gt;char(n)&amp;nbsp;and&amp;nbsp;varchar(n)&amp;nbsp;are&amp;nbsp;parsed and stored as metadata, but&amp;nbsp;internally treated as&amp;nbsp;string.&lt;/LI&gt;&lt;LI&gt;When you create a&amp;nbsp;view, Spark&amp;nbsp;does not preserve the original&amp;nbsp;char(n)&amp;nbsp;or&amp;nbsp;varchar(n)&amp;nbsp;types&amp;nbsp;— it&amp;nbsp;normalizes everything to&amp;nbsp;string.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;This is why:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;In your first view (without comments), Spark is able to&amp;nbsp;preserve the metadata&amp;nbsp;from the underlying table for&amp;nbsp;str2&amp;nbsp;and&amp;nbsp;str3.&lt;/LI&gt;&lt;LI&gt;But when you&amp;nbsp;add column comments, Spark&amp;nbsp;reconstructs the schema&amp;nbsp;from the&amp;nbsp;SELECT&amp;nbsp;clause and&amp;nbsp;drops the original metadata, defaulting all string-like types to&amp;nbsp;string.&lt;/LI&gt;&lt;/UL&gt;</description>
    <pubDate>Thu, 12 Jun 2025 20:01:04 GMT</pubDate>
    <dc:creator>UmaMahesh1</dc:creator>
    <dc:date>2025-06-12T20:01:04Z</dc:date>
    <item>
      <title>CHAR/VARCHAR fields sometimes show as STRING in a view</title>
      <link>https://community.databricks.com/t5/data-engineering/char-varchar-fields-sometimes-show-as-string-in-a-view/m-p/121664#M46514</link>
      <description>&lt;P&gt;We've found an interesting behavior where `char` and `varchar` fields in a table show as the `string` type in a view. Consider the following table and view:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;create or replace table thirty_day_tables.kit_varchar_string (
  str1 string,
  str2 char(10),
  str3 varchar(10)
);

create or replace view thirty_day_tables.kit_varchar_string_vw as 
select str1, str2, str3, cast("dummy" as varchar(10)) as str4 
from thirty_day_tables.kit_varchar_string;&lt;/LI-CODE&gt;&lt;P&gt;In this case, it mostly works as I expected &lt;U&gt;except&lt;/U&gt; for the manufactured field `str4`. The output for `&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;describe&lt;/SPAN&gt;&lt;SPAN&gt; thirty_day_tables.kit_varchar_string_vw`:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;TABLE border="1" width="100%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;col_name&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;data_type&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;comment&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str1&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;string&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;null&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str2&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;char(10)&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;null&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str3&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;varchar(10)&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;null&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;str4&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;&lt;FONT color="#FF0000"&gt;string&lt;/FONT&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;null&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;Why does str4 show as a string despite the explicit cast to a varchar(10)?&lt;/P&gt;&lt;P&gt;Even stranger, if I add a comment to each column I get a different result:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;create or replace view thirty_day_tables.kit_varchar_string_vw (
  str1 comment "OK - expected to be a string in the view",
  str2 comment "ERROR - expected to be a char(10) in the view",
  str3 comment "ERROR - expected to be a varchar(10) in the view",
  str4 comment "ERROR - expected to be a varchar(10) in the view"
 ) as 
select str1, str2, str3, cast("dummy" as varchar(10)) as str4 
from thirty_day_tables.kit_varchar_string;&lt;/LI-CODE&gt;&lt;P&gt;The output for `&lt;SPAN&gt;describe&lt;/SPAN&gt;&lt;SPAN&gt; thirty_day_tables.kit_varchar_string_vw`:&lt;/SPAN&gt;&lt;/P&gt;&lt;TABLE border="1" width="100%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;col_name&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;data_type&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;STRONG&gt;comment&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str1&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;string&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;OK - expected to be a string in the view&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str2&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;FONT color="#FF0000"&gt;string&lt;/FONT&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;ERROR - expected to be a char(10) in the view&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;str3&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;&lt;FONT color="#FF0000"&gt;string&lt;/FONT&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="30px"&gt;ERROR - expected to be a varchar(10) in the view&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;str4&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;&lt;FONT color="#FF0000"&gt;string&lt;/FONT&gt;&lt;/TD&gt;&lt;TD width="33.333333333333336%" height="21px"&gt;ERROR - expected to be a varchar(10) in the view&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;Why do all the columns show as string???&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jun 2025 19:42:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/char-varchar-fields-sometimes-show-as-string-in-a-view/m-p/121664#M46514</guid>
      <dc:creator>kmenke-em</dc:creator>
      <dc:date>2025-06-12T19:42:48Z</dc:date>
    </item>
    <item>
      <title>Re: CHAR/VARCHAR fields sometimes show as STRING in a view</title>
      <link>https://community.databricks.com/t5/data-engineering/char-varchar-fields-sometimes-show-as-string-in-a-view/m-p/121669#M46515</link>
      <description>&lt;P&gt;In Spark SQL&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;string&amp;nbsp;is the&amp;nbsp;canonical type&amp;nbsp;for all textual data.&lt;/LI&gt;&lt;LI&gt;char(n)&amp;nbsp;and&amp;nbsp;varchar(n)&amp;nbsp;are&amp;nbsp;parsed and stored as metadata, but&amp;nbsp;internally treated as&amp;nbsp;string.&lt;/LI&gt;&lt;LI&gt;When you create a&amp;nbsp;view, Spark&amp;nbsp;does not preserve the original&amp;nbsp;char(n)&amp;nbsp;or&amp;nbsp;varchar(n)&amp;nbsp;types&amp;nbsp;— it&amp;nbsp;normalizes everything to&amp;nbsp;string.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;This is why:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;In your first view (without comments), Spark is able to&amp;nbsp;preserve the metadata&amp;nbsp;from the underlying table for&amp;nbsp;str2&amp;nbsp;and&amp;nbsp;str3.&lt;/LI&gt;&lt;LI&gt;But when you&amp;nbsp;add column comments, Spark&amp;nbsp;reconstructs the schema&amp;nbsp;from the&amp;nbsp;SELECT&amp;nbsp;clause and&amp;nbsp;drops the original metadata, defaulting all string-like types to&amp;nbsp;string.&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Thu, 12 Jun 2025 20:01:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/char-varchar-fields-sometimes-show-as-string-in-a-view/m-p/121669#M46515</guid>
      <dc:creator>UmaMahesh1</dc:creator>
      <dc:date>2025-06-12T20:01:04Z</dc:date>
    </item>
  </channel>
</rss>

