Spark SQL execution did not complete even after 12 hours, i ran it on i3.xlarge with 4 worker nodes.
only two worker nodes showed as running, with CPU at 100%
what should i do differently?
--SQL
INSERT into attribute_results
...
SELECT /*+ BROADCAST(t) RANGE_JOIN(ta, 46) */
..,
..,
..
from
(select * from transactions where txn_date ='2025-05-01' ) t
INNER JOIN transaction_attributes ta
ON t.txn_date BETWEEN ta.analysis_start_date AND ta.analysis_end_date;
select count(1) from transactions;
2,782,521
select count(1) from transactions where txn_date ='2025-05-01' ;
92,387
select count(1) from transaction_attributes ta
where to_date('2025-05-01') BETWEEN ta.analysis_start_date AND ta.analysis_end_date
43,589,999