<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Improve Query Performance in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/improve-query-performance/m-p/61417#M6593</link>
    <description>&lt;P&gt;Hello&lt;/P&gt;&lt;P&gt;I have a query in one of my notebooks that took around 3.5 hours on D12_V2 cluster and workers between 5 to 25 .&lt;/P&gt;&lt;P&gt;is there a way to write the query in diffrenet way in order to improve performance and cost :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;select /*+ BROADCAST(b) */ MD5(CONCAT(NVL(client_id, ''), '|', NVL(event_name, ''), '|', NVL(CAST(event_date as STRING),''), '|', NVL(CAST(client_id_type as STRING),''), '|', NVL(CAST(policy_number as STRING), ''))) as event_gk,client_id,client_id_type,event_date,event_name,event_group,event_domain,event_sub_domain,source_system,policy_number,agent_id,event_details,event_status,event_performed_by,event_addressed_to,event_application_date,fund_number,loan_number,cancel_reason,amount_bruto,amount_neto,premia_bruto,anaf,kod_pnia,premia_neto,premia_discount,premia_paid,premia_late,premia_preserved,vif_policy,policy_owner_id,policy_owner_id_type,policy_payer_id,policy_payer_id_type,application_code,application_name,action_code,action_name,page_title,page_info,page_details,page_url,page_variables,page_arrived_from_url,page_action,user_agent,user_ip,agent_user_name,log_id,session_id,instance_guid,flow_guid,addressing_system_code,addressing_system_name,call_center_name,call_center_type,case_number,case_topic,case_sub_topic,case_source,case_status,case_next_step,case_connect_type,media,case_number_of_actions,case_is_recurrent,case_is_justified,case_preservation_details,treatment_number,treatment_type,treatment_description,treatment_status,treatment_transfer,next_treatment_number,next_treatment_days_to,client_type,client_level,client_asiron,call_center_team_id,call_center_emp_id,call_center_emp_id_next_treatment,campaign_code,campaign_name,campaign_objective,channel_name,call_center_rtdm_action,call_center_rtdm_remark,verint_categories,cti_interaction_id,cti_session_id,cti_call_date,cti_interaction_count,cti_direction,cti_ivr_time_sec,cti_wait_time_sec,cti_talk_time_sec,cti_held_time_sec,cti_conference_time_sec,cti_external_time_sec,rtdm_request_comment,rtdm_response_comment,rtdm_result,preservation_tool,treatment_channel,rtdm_process_id,is_last_record_on_process,rtdm_call_center_name,rtdm_call_center_details,document_id,document_type_id,document_type_name,document_create_date,document_receive_date,document_source,request_number,process_name,track_name,department_name,message_id,message_type_id,message_type_name,message_content,claim_number,claim_type,claim_coverage_group,claim_classificaton,claim_specialty,claim_diagnosis,claim_hearing_decision,claim_legal_status,claim_decision_line_number,claim_payment_line_number,claim_line_type,claim_line_coverage_group,claim_line_coverage_name,claim_line_opertion,claim_line_covered_event,claim_line_supplier_type,claim_line_supplier_id,claim_line_decision_type,claim_line_decision_comment,claim_line_required_amount,claim_line_decision_amount,claim_line_waiver_amount,claim_line_expense_amount,claim_line_paid_amount,claim_line_required_percentage,claim_line_decided_percentage,claim_line_canceled,claim_line_storno,claim_sub_decision_line_number,hdl_create_date,hdl_update_date,source_script &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;from ( select a.* FROM raw_df a LEFT JOIN prc.clients_to_delete b ON a.client_id = b.client_id AND a.client_id_type = b.client_id_type WHERE b.client_id IS NULL )final&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;thank in advanced&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Amir&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 22 Feb 2024 08:01:58 GMT</pubDate>
    <dc:creator>Krubug</dc:creator>
    <dc:date>2024-02-22T08:01:58Z</dc:date>
    <item>
      <title>Improve Query Performance</title>
      <link>https://community.databricks.com/t5/get-started-discussions/improve-query-performance/m-p/61417#M6593</link>
      <description>&lt;P&gt;Hello&lt;/P&gt;&lt;P&gt;I have a query in one of my notebooks that took around 3.5 hours on D12_V2 cluster and workers between 5 to 25 .&lt;/P&gt;&lt;P&gt;is there a way to write the query in diffrenet way in order to improve performance and cost :&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;select /*+ BROADCAST(b) */ MD5(CONCAT(NVL(client_id, ''), '|', NVL(event_name, ''), '|', NVL(CAST(event_date as STRING),''), '|', NVL(CAST(client_id_type as STRING),''), '|', NVL(CAST(policy_number as STRING), ''))) as event_gk,client_id,client_id_type,event_date,event_name,event_group,event_domain,event_sub_domain,source_system,policy_number,agent_id,event_details,event_status,event_performed_by,event_addressed_to,event_application_date,fund_number,loan_number,cancel_reason,amount_bruto,amount_neto,premia_bruto,anaf,kod_pnia,premia_neto,premia_discount,premia_paid,premia_late,premia_preserved,vif_policy,policy_owner_id,policy_owner_id_type,policy_payer_id,policy_payer_id_type,application_code,application_name,action_code,action_name,page_title,page_info,page_details,page_url,page_variables,page_arrived_from_url,page_action,user_agent,user_ip,agent_user_name,log_id,session_id,instance_guid,flow_guid,addressing_system_code,addressing_system_name,call_center_name,call_center_type,case_number,case_topic,case_sub_topic,case_source,case_status,case_next_step,case_connect_type,media,case_number_of_actions,case_is_recurrent,case_is_justified,case_preservation_details,treatment_number,treatment_type,treatment_description,treatment_status,treatment_transfer,next_treatment_number,next_treatment_days_to,client_type,client_level,client_asiron,call_center_team_id,call_center_emp_id,call_center_emp_id_next_treatment,campaign_code,campaign_name,campaign_objective,channel_name,call_center_rtdm_action,call_center_rtdm_remark,verint_categories,cti_interaction_id,cti_session_id,cti_call_date,cti_interaction_count,cti_direction,cti_ivr_time_sec,cti_wait_time_sec,cti_talk_time_sec,cti_held_time_sec,cti_conference_time_sec,cti_external_time_sec,rtdm_request_comment,rtdm_response_comment,rtdm_result,preservation_tool,treatment_channel,rtdm_process_id,is_last_record_on_process,rtdm_call_center_name,rtdm_call_center_details,document_id,document_type_id,document_type_name,document_create_date,document_receive_date,document_source,request_number,process_name,track_name,department_name,message_id,message_type_id,message_type_name,message_content,claim_number,claim_type,claim_coverage_group,claim_classificaton,claim_specialty,claim_diagnosis,claim_hearing_decision,claim_legal_status,claim_decision_line_number,claim_payment_line_number,claim_line_type,claim_line_coverage_group,claim_line_coverage_name,claim_line_opertion,claim_line_covered_event,claim_line_supplier_type,claim_line_supplier_id,claim_line_decision_type,claim_line_decision_comment,claim_line_required_amount,claim_line_decision_amount,claim_line_waiver_amount,claim_line_expense_amount,claim_line_paid_amount,claim_line_required_percentage,claim_line_decided_percentage,claim_line_canceled,claim_line_storno,claim_sub_decision_line_number,hdl_create_date,hdl_update_date,source_script &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;from ( select a.* FROM raw_df a LEFT JOIN prc.clients_to_delete b ON a.client_id = b.client_id AND a.client_id_type = b.client_id_type WHERE b.client_id IS NULL )final&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;thank in advanced&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Amir&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 22 Feb 2024 08:01:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/improve-query-performance/m-p/61417#M6593</guid>
      <dc:creator>Krubug</dc:creator>
      <dc:date>2024-02-22T08:01:58Z</dc:date>
    </item>
  </channel>
</rss>

