<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Cannot Reproduce Result scikit-learn random forest in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/cannot-reproduce-result-scikit-learn-random-forest/m-p/27657#M19518</link>
    <description>&lt;P&gt;I'm running some machine learning experiments in databricks. For random forest algorithm when i restart the cluster, each time the training output is changes even though random state is set. Anyone has any clue about this issue?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Note : I tried the same algorithm with same code in anacoda enviroment in my local machine, there is no different in the result even though I restart the machine.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 18 Feb 2022 10:37:31 GMT</pubDate>
    <dc:creator>umair</dc:creator>
    <dc:date>2022-02-18T10:37:31Z</dc:date>
    <item>
      <title>Cannot Reproduce Result scikit-learn random forest</title>
      <link>https://community.databricks.com/t5/data-engineering/cannot-reproduce-result-scikit-learn-random-forest/m-p/27657#M19518</link>
      <description>&lt;P&gt;I'm running some machine learning experiments in databricks. For random forest algorithm when i restart the cluster, each time the training output is changes even though random state is set. Anyone has any clue about this issue?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Note : I tried the same algorithm with same code in anacoda enviroment in my local machine, there is no different in the result even though I restart the machine.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Feb 2022 10:37:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/cannot-reproduce-result-scikit-learn-random-forest/m-p/27657#M19518</guid>
      <dc:creator>umair</dc:creator>
      <dc:date>2022-02-18T10:37:31Z</dc:date>
    </item>
    <item>
      <title>Re: Cannot Reproduce Result scikit-learn random forest</title>
      <link>https://community.databricks.com/t5/data-engineering/cannot-reproduce-result-scikit-learn-random-forest/m-p/27659#M19520</link>
      <description>&lt;P&gt;RF is non-deterministic by it´s nature.&lt;/P&gt;&lt;P&gt;However as you mentioned you can control this by using random_state.&lt;/P&gt;&lt;P&gt;This will guarantee a deterministic result ON A CERTAIN SYSTEM, but not necessarily over systems.&lt;/P&gt;&lt;P&gt;&lt;A href="https://stackoverflow.com/questions/70020127/is-result-of-train-test-split-the-same-on-different-machines-with-set-random-sta" alt="https://stackoverflow.com/questions/70020127/is-result-of-train-test-split-the-same-on-different-machines-with-set-random-sta" target="_blank"&gt;SO has a topic about this, check it out, very interesting&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 21 Feb 2022 10:43:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/cannot-reproduce-result-scikit-learn-random-forest/m-p/27659#M19520</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-02-21T10:43:33Z</dc:date>
    </item>
  </channel>
</rss>

