<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Throwing IndexoutofBound Exception in Pyspark in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13618#M706</link>
    <description>&lt;P&gt;You might have to share the code above the cell. Please paste the code using code editor and not as an image.. &lt;/P&gt;</description>
    <pubDate>Thu, 14 Jul 2022 09:56:22 GMT</pubDate>
    <dc:creator>AmanSehgal</dc:creator>
    <dc:date>2022-07-14T09:56:22Z</dc:date>
    <item>
      <title>Throwing IndexoutofBound Exception in Pyspark</title>
      <link>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13617#M705</link>
      <description>&lt;P&gt;Hello All,&lt;/P&gt;&lt;P&gt;I am trying to read the data and trying to group the data in order to pass it to predict function via @F.pandas_udf method.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;#Loading Model 
pkl_model = pickle.load(open(filepath,'rb')) 
&amp;nbsp;
 # build schema for output labels
 filter_schema=[]
  t = T.StructField("anomaly_prediction", T.IntegerType(),True)
  filter_schema.append(t)         
  
  t1 = T.StructField("anomaly_score", T.DoubleType(),True)
  filter_schema.append(t1)         
  
  return_schema = T.StructType(df.select(df.columns).schema.fields+filter_schema)                                       
&amp;nbsp;
  @F.pandas_udf(return_schema, F.PandasUDFType.GROUPED_MAP)
  def inferdata(data):
    dt = data[labelnames].to_numpy()
    #dt = np.asarray(dt).astype('float64')
    score, pred = pkl_model.predict(dt)
    print('score and prediction is ',score, pred)
    data["anomaly_prediction"] = pred
    data["anomaly_score"] = score
    return(data)
  
  df = df.groupby('filename').apply(inferdata)
  print(df.show(2))&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;But it is throwing an error: &lt;/P&gt;&lt;P&gt;"java.lang.IndexOutOfBoundsException: index: 16384, length: 4 (expected: range(0, 16384))"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="error_db"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1699iE96C3D726451C7BF/image-size/large?v=v2&amp;amp;px=999" role="button" title="error_db" alt="error_db" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="error_2_db"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1708iEF88A621DF0DFAFF/image-size/large?v=v2&amp;amp;px=999" role="button" title="error_2_db" alt="error_2_db" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="error_3_db"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1698i66C4C5E057D7C586/image-size/large?v=v2&amp;amp;px=999" role="button" title="error_3_db" alt="error_3_db" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have attached the code snippet and error images for your reference. I have been stuck with this problem for a week.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Could anybody please help me to resolve this issue?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 14 Jul 2022 05:25:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13617#M705</guid>
      <dc:creator>Santhanalakshmi</dc:creator>
      <dc:date>2022-07-14T05:25:17Z</dc:date>
    </item>
    <item>
      <title>Re: Throwing IndexoutofBound Exception in Pyspark</title>
      <link>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13618#M706</link>
      <description>&lt;P&gt;You might have to share the code above the cell. Please paste the code using code editor and not as an image.. &lt;/P&gt;</description>
      <pubDate>Thu, 14 Jul 2022 09:56:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13618#M706</guid>
      <dc:creator>AmanSehgal</dc:creator>
      <dc:date>2022-07-14T09:56:22Z</dc:date>
    </item>
    <item>
      <title>Re: Throwing IndexoutofBound Exception in Pyspark</title>
      <link>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13619#M707</link>
      <description>&lt;P&gt;Thanks I have updated the code in the cell&lt;/P&gt;</description>
      <pubDate>Thu, 14 Jul 2022 10:28:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13619#M707</guid>
      <dc:creator>Santhanalakshmi</dc:creator>
      <dc:date>2022-07-14T10:28:14Z</dc:date>
    </item>
    <item>
      <title>Re: Throwing IndexoutofBound Exception in Pyspark</title>
      <link>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13620#M708</link>
      <description>&lt;P&gt;@Santhanalakshmi Manoharan​&amp;nbsp; Was this issue resolved, Am also getting same error, any guidance would be of great help.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Appreciate your help.&lt;/P&gt;</description>
      <pubDate>Tue, 18 Apr 2023 20:30:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/throwing-indexoutofbound-exception-in-pyspark/m-p/13620#M708</guid>
      <dc:creator>Vindhya</dc:creator>
      <dc:date>2023-04-18T20:30:14Z</dc:date>
    </item>
  </channel>
</rss>

