<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: NameError: name 'col' is not defined in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/nameerror-name-col-is-not-defined/m-p/27858#M19704</link>
    <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://users/31015/mudassar45.html" target="_blank"&gt;@mudassar45@gmail.com&lt;/A&gt; &lt;/P&gt;
&lt;P&gt;as the document describe generic column not yet associated. Please refer the below code.&lt;/P&gt;
&lt;P&gt;display(peopleDF.select("firstName").filter("firstName = 'An'"))&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 22 Aug 2019 09:18:12 GMT</pubDate>
    <dc:creator>MOHAN_KUMARL_N</dc:creator>
    <dc:date>2019-08-22T09:18:12Z</dc:date>
    <item>
      <title>NameError: name 'col' is not defined</title>
      <link>https://community.databricks.com/t5/data-engineering/nameerror-name-col-is-not-defined/m-p/27857#M19703</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I m executing the below code and using Pyhton in notebook and it appears that the col() function is not getting recognized .&lt;/P&gt;
&lt;P&gt;I want to know if the col() function belongs to any specific Dataframe library or Python library .I dont want to use pyspark api and would like to write code using sql dataframes API&lt;/P&gt;
&lt;P&gt;Trying to run the below code and getting error -NameError: name 'col' is not defined&lt;/P&gt;
&lt;P&gt;peopleDF = spark.read.parquet("/mnt/training/dataframes/people-10m.parquet") peopleDF.printSchema() peopleDF.show() peopleDF.select(col("firstName")).filter(col("firstName"))=="An"&lt;/P&gt;
&lt;P&gt;As per SPARK doc&lt;/P&gt;
&lt;P&gt;&lt;A href="https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.Column" target="test_blank"&gt;https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.Column&lt;/A&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;df("columnName")            // On a specific `df` DataFrame.
col("columnName")           // A generic column not yet associated with a DataFrame.
col("columnName.field")     // Extracting a struct field
col("`a.column.with.dots`") // Escape `.` in column names.
$"columnName"               // Scala short hand for a named column.&lt;/CODE&gt;&lt;/PRE&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 21 Aug 2019 22:15:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/nameerror-name-col-is-not-defined/m-p/27857#M19703</guid>
      <dc:creator>Maser_AZ</dc:creator>
      <dc:date>2019-08-21T22:15:22Z</dc:date>
    </item>
    <item>
      <title>Re: NameError: name 'col' is not defined</title>
      <link>https://community.databricks.com/t5/data-engineering/nameerror-name-col-is-not-defined/m-p/27858#M19704</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://users/31015/mudassar45.html" target="_blank"&gt;@mudassar45@gmail.com&lt;/A&gt; &lt;/P&gt;
&lt;P&gt;as the document describe generic column not yet associated. Please refer the below code.&lt;/P&gt;
&lt;P&gt;display(peopleDF.select("firstName").filter("firstName = 'An'"))&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 22 Aug 2019 09:18:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/nameerror-name-col-is-not-defined/m-p/27858#M19704</guid>
      <dc:creator>MOHAN_KUMARL_N</dc:creator>
      <dc:date>2019-08-22T09:18:12Z</dc:date>
    </item>
  </channel>
</rss>

