<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Calling a .py Function using DF from another file in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/calling-a-py-function-using-df-from-another-file/m-p/57463#M30792</link>
    <description>&lt;P&gt;You should create a udf on top of &lt;STRONG&gt;getMediumText&lt;/STRONG&gt; function and then use the udf in the sql statement.&lt;/P&gt;</description>
    <pubDate>Tue, 16 Jan 2024 14:37:48 GMT</pubDate>
    <dc:creator>Lakshay</dc:creator>
    <dc:date>2024-01-16T14:37:48Z</dc:date>
    <item>
      <title>Calling a .py Function using DF from another file</title>
      <link>https://community.databricks.com/t5/data-engineering/calling-a-py-function-using-df-from-another-file/m-p/57444#M30783</link>
      <description>&lt;P&gt;I have created a file NBF_TextTranslation&lt;/P&gt;&lt;PRE&gt;spark = SparkSession.builder.getOrCreate()

df_TextTranslation = spark.read.&lt;SPAN class=""&gt;format&lt;/SPAN&gt;(&lt;SPAN class=""&gt;'delta'&lt;/SPAN&gt;).load(textTranslation_path)

&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;getMediumText&lt;/SPAN&gt;(&lt;SPAN class=""&gt;TextID, PlantName&lt;/SPAN&gt;):
   df1 = spark.sql(&lt;SPAN class=""&gt;"SELECT TextID, PlantName, LanguageID, Short, Medium, Extended, Active FROM delta.`{0}` tt"&lt;/SPAN&gt;.&lt;SPAN class=""&gt;format&lt;/SPAN&gt;(textTranslation_path))
   df2 = df1.&lt;SPAN class=""&gt;filter&lt;/SPAN&gt;((df1.PlantName == PlantName) &amp;amp; (df1.TextID == TextID) &amp;amp; (df1.LanguageID == &lt;SPAN class=""&gt;'1033'&lt;/SPAN&gt;) &amp;amp; (df1.Active == &lt;SPAN class=""&gt;"True"&lt;/SPAN&gt;))
    medium_Value = df2.select(&lt;SPAN class=""&gt;'Medium'&lt;/SPAN&gt;).first()[&lt;SPAN class=""&gt;0&lt;/SPAN&gt;]
   &lt;SPAN class=""&gt;return&lt;/SPAN&gt; medium_Value&lt;/PRE&gt;&lt;P&gt;I am calling my function from file DimDepartment_Amit&lt;/P&gt;&lt;PRE&gt;%run &lt;SPAN class=""&gt;"../Functions/NBF_TextTranslation"&lt;/SPAN&gt;
TextID = &lt;SPAN class=""&gt;'106905303'&lt;/SPAN&gt;
PlantName = &lt;SPAN class=""&gt;'BKIZ'&lt;/SPAN&gt;
LanguageID = &lt;SPAN class=""&gt;1033&lt;/SPAN&gt;

medium_value = getMediumText(TextID, PlantName)
&lt;SPAN class=""&gt;print&lt;/SPAN&gt;(medium_value)&lt;/PRE&gt;&lt;P&gt;This part is working but when I am calling it from SQL.&lt;/P&gt;&lt;PRE&gt;sqlStatement =  &lt;SPAN class=""&gt;"\
    SELECT\
        PlantName as BK_PlantName,\
        TextId,\
        getMediumText(TextId, PlantName) as DepartmentName,\
    FROM delta.`{0}`"&lt;/SPAN&gt;.&lt;SPAN class=""&gt;format&lt;/SPAN&gt;(src_path_Department)

df_stgDimDepartment = spark.sql(sqlStatement)

df_stgDimDepartment.show()&lt;/PRE&gt;&lt;P&gt;I am getting below error&lt;/P&gt;&lt;P&gt;Cannot resolve function&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;getMediumText&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;on search path [system.builtin,&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;system.session,&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;spark_catalog.default]. SQLSTATE: 42883; line 1 pos 167&lt;/P&gt;</description>
      <pubDate>Tue, 16 Jan 2024 12:08:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/calling-a-py-function-using-df-from-another-file/m-p/57444#M30783</guid>
      <dc:creator>Amit_Garg</dc:creator>
      <dc:date>2024-01-16T12:08:57Z</dc:date>
    </item>
    <item>
      <title>Re: Calling a .py Function using DF from another file</title>
      <link>https://community.databricks.com/t5/data-engineering/calling-a-py-function-using-df-from-another-file/m-p/57463#M30792</link>
      <description>&lt;P&gt;You should create a udf on top of &lt;STRONG&gt;getMediumText&lt;/STRONG&gt; function and then use the udf in the sql statement.&lt;/P&gt;</description>
      <pubDate>Tue, 16 Jan 2024 14:37:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/calling-a-py-function-using-df-from-another-file/m-p/57463#M30792</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2024-01-16T14:37:48Z</dc:date>
    </item>
  </channel>
</rss>

