#!/bin/bash
# Ray Worker Entrypoint
# Connects to Ray head node and registers custom GPU resources
#
# Environment variables:
#   RAY_HEAD_SVC    - Ray head service name (default: ray-head-svc)
#   GPU_RESOURCE    - Custom GPU resource name (default: gpu_amd)
#   NUM_GPUS        - Number of GPUs to register (default: 1)
#   RAY_OBJECT_STORE_MEMORY - Object store memory limit (optional)

set -euo pipefail

# Ensure Ray CLI is in PATH (works across all base images)
export PATH="/home/ray/.local/bin:/home/ray/anaconda3/bin:${PATH}"

# Configuration with defaults
RAY_HEAD_ADDRESS="${RAY_HEAD_SVC:-ray-head-svc}:6379"
GPU_RESOURCE="${GPU_RESOURCE:-gpu_amd}"
NUM_GPUS="${NUM_GPUS:-1}"

# Log startup info
echo "============================================="
echo "Ray Worker Starting"
echo "============================================="
echo "  Head address:  ${RAY_HEAD_ADDRESS}"
echo "  GPU resource:  ${GPU_RESOURCE}"
echo "  Num GPUs:      ${NUM_GPUS}"
echo "  Python:        $(python3 --version)"
echo "  Ray version:   $(ray --version)"
echo "============================================="

# Wait for Ray head to be available (with retry)
MAX_RETRIES=30
RETRY_INTERVAL=5
retry_count=0

echo "Waiting for Ray head node..."
until ray health-check --address="${RAY_HEAD_ADDRESS}" 2>/dev/null; do
    retry_count=$((retry_count + 1))
    if [ $retry_count -ge $MAX_RETRIES ]; then
        echo "ERROR: Ray head not available after ${MAX_RETRIES} attempts"
        exit 1
    fi
    echo "  Attempt ${retry_count}/${MAX_RETRIES} - retrying in ${RETRY_INTERVAL}s..."
    sleep "${RETRY_INTERVAL}"
done
echo "Ray head is ready!"

# Build ray start command with optional args
RAY_START_ARGS=(
    --address="${RAY_HEAD_ADDRESS}"
    --num-gpus="${NUM_GPUS}"
    --resources="{\"${GPU_RESOURCE}\": 1}"
    --block
)

# Add object store memory limit if specified
if [ -n "${RAY_OBJECT_STORE_MEMORY:-}" ]; then
    RAY_START_ARGS+=(--object-store-memory="${RAY_OBJECT_STORE_MEMORY}")
fi

# Start Ray worker
echo "Starting Ray worker with resources: {\"${GPU_RESOURCE}\": 1}"
exec ray start "${RAY_START_ARGS[@]}"