I am trying to find the cost per Task in each Job every time it was executed (daily) but currently getting very huge numbers due to duplicates, can someone help me ?
WITH workspace AS (
SELECT
account_id,
workspace_id,
workspace_name,
workspace_url,
status,
workspace_name
FROM system.access.workspaces_latest
),
usage_with_ws_filtered_by_date AS (
SELECT
u.*,
w.workspace_name,
w.workspace_url,
w.workspace_full_name
FROM system.billing.usage u
INNER JOIN workspace w ON u.workspace_id = w.workspace_id
WHERE u.billing_origin_product = 'JOBS'
AND u.usage_date BETWEEN DATE_ADD(CURRENT_DATE(), -30) AND CURRENT_DATE()
),
task_usage AS (
SELECT
u.workspace_id,
u.workspace_name,
u.workspace_url,
u.workspace_full_name,
u.usage_metadata.job_id,
u.usage_metadata.job_run_id AS run_id,
t.task_key,
t.change_time AS task_change_time,
u.usage_start_time,
u.usage_end_time,
u.usage_quantity,
u.sku_name,
u.identity_metadata.run_as AS user,
u.usage_date
FROM usage_with_ws_filtered_by_date u
INNER JOIN system.lakeflow.job_tasks t
ON u.workspace_id = t.workspace_id
AND u.usage_metadata.job_id = t.job_id
),
task_costs AS (
SELECT
tu.*,
lp.pricing.default AS unit_price,
tu.usage_quantity * lp.pricing.default AS cost
FROM task_usage tu
LEFT JOIN system.billing.list_prices lp
ON tu.sku_name = lp.sku_name
AND tu.usage_start_time >= lp.price_start_time
AND (tu.usage_end_time <= lp.price_end_time OR lp.price_end_time IS NULL)
AND lp.currency_code = 'USD'
)
SELECT
tc.workspace_id,
tc.workspace_name,
tc.workspace_url,
tc.user,
tc.job_id,
tc.run_id,
tc.task_key,
tc.task_change_time,
tc.usage_start_time,
tc.usage_end_time,
tc.usage_date,
SUM(tc.cost) AS total_cost
FROM task_costs tc
GROUP BY
tc.workspace_id, tc.workspace_name, tc.workspace_url, tc.user, tc.job_id, tc.run_id,
tc.task_key, tc.task_change_time, tc.usage_start_time, tc.usage_end_time, tc.usage_date
ORDER BY
tc.usage_date DESC, tc.workspace_id, tc.job_id, tc.run_id, tc.task_key