<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>article How Human Feedback Shapes AI Training in DatabricksTV</title>
    <link>https://community.databricks.com/t5/databrickstv/how-human-feedback-shapes-ai-training/ba-p/113200</link>
    <description>&lt;P&gt;&lt;SPAN&gt;In this episode, Brandon Cui, Research Scientist at Mosaic ML and Databricks, dives into cutting-edge advancements in AI model optimization, focusing on Reward Models and Reinforcement Learning from Human Feedback (RLHF).&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Brandon highlights a fundamental insight into AI training:&lt;BR data-start="994" data-end="997" /&gt;&lt;STRONG&gt;&lt;EM data-start="997" data-end="1225"&gt;"Language models, no matter what size or capability, if you give them enough time to generate, they will have the ability to actually create the right answer. What you need to do is you need to be able to surface this answer."&lt;/EM&gt;&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;IFRAME src="https://www.youtube.com/embed/6aIDtb9IKEU?si=WaYoWGNqnqxMoeFf" width="560" height="315" frameborder="0" allowfullscreen="" title="YouTube video player" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin"&gt;&lt;/IFRAME&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 31 Mar 2025 12:02:22 GMT</pubDate>
    <dc:creator>Demetrios_MLOps</dc:creator>
    <dc:date>2025-03-31T12:02:22Z</dc:date>
    <item>
      <title>How Human Feedback Shapes AI Training</title>
      <link>https://community.databricks.com/t5/databrickstv/how-human-feedback-shapes-ai-training/ba-p/113200</link>
      <description>&lt;P&gt;&lt;SPAN&gt;In this episode, Brandon Cui, Research Scientist at Mosaic ML and Databricks, dives into cutting-edge advancements in AI model optimization, focusing on Reward Models and Reinforcement Learning from Human Feedback (RLHF).&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Brandon highlights a fundamental insight into AI training:&lt;BR data-start="994" data-end="997" /&gt;&lt;STRONG&gt;&lt;EM data-start="997" data-end="1225"&gt;"Language models, no matter what size or capability, if you give them enough time to generate, they will have the ability to actually create the right answer. What you need to do is you need to be able to surface this answer."&lt;/EM&gt;&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;IFRAME src="https://www.youtube.com/embed/6aIDtb9IKEU?si=WaYoWGNqnqxMoeFf" width="560" height="315" frameborder="0" allowfullscreen="" title="YouTube video player" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin"&gt;&lt;/IFRAME&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 31 Mar 2025 12:02:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/databrickstv/how-human-feedback-shapes-ai-training/ba-p/113200</guid>
      <dc:creator>Demetrios_MLOps</dc:creator>
      <dc:date>2025-03-31T12:02:22Z</dc:date>
    </item>
  </channel>
</rss>

