I have build this script in order to collect ganglia metrics but the size of stderr and sdtout ganglia is 0. It doesn't work.
I Have put this script on Workspace due to migration databricks all init-script should be place on Workspace not dbfs, but it doesnt work. Do you know how can i solve this issue?
#!/bin/bash
set -e # Exit on errors
# This script must run only on the driver node
if [ \$DB_IS_DRIVER != "TRUE" ]; then
exit 0
fi
if [ \$GANGLIA_METRICS_ENABLED != "true" ]; then
echo "Ganglia metrics on \$DB_CLUSTER_NAME are not enabled"
exit 0
fi
echo "Enabling collection of metrics on \$DB_CLUSTER_NAME"
cat <<'EOF' >>/tmp/gather_ganglia_metrics.sh
#!/bin/bash
ROOT_PATH="/Workspace/ganglia_metrics"
LOGS_DIR="\$ROOT_PATH/\$DB_CLUSTER_NAME"
# Assign poll interval
re='^[0-9]+$'
if [[ \$GANGLIA_METRICS_POLL_INTERVAL =~ \$re ]]; then
POLL_INTERVAL=\$GANGLIA_METRICS_POLL_INTERVAL
else
POLL_INTERVAL=15
fi
echo "Poll interval is \$POLL_INTERVAL seconds"
if [ ! -d \$LOGS_DIR ]; then
sudo mkdir -p \$LOGS_DIR
echo "directory \$LOGS_DIR is created"
fi
while true; do
LOG_TIMESTAMP=$(date '+%Y%m%d%H%M%S')
LOG_PATH="\$LOGS_DIR/\${DB_DRIVER_IP}_\${LOG_TIMESTAMP}.xml"
echo \$LOG_PATH
curl http://localhost:8652/cluster/*/{cpu_aidle,cpu_idle,cpu_nice,cpu_num,cpu_report,cpu_speed,cpu_system} >> \$LOG_PATH
sleep \$POLL_INTERVAL
done
EOF
if [ $DB_IS_DRIVER ]; then
sudo chmod a+x /tmp/gather_ganglia_metrics.sh
nohup /tmp/gather_ganglia_metrics.sh &
fi