- Add Dockerfiles for nvidia, rdna2, strixhalo, and intel GPU targets - Add ray-serve modules (embeddings, whisper, tts, llm, reranker) - Add Gitea Actions workflow for automated builds - Add Makefile for local development - Update README with comprehensive documentation
54 lines
1.3 KiB
Docker
54 lines
1.3 KiB
Docker
# NVIDIA GPU Ray Worker for elminster (RTX 2070)
|
|
# Used for: Whisper STT, TTS
|
|
#
|
|
# Build from llm-workflows root:
|
|
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest -f dockerfiles/Dockerfile.ray-worker-nvidia .
|
|
#
|
|
FROM rayproject/ray:2.53.0-py311-cu121
|
|
|
|
LABEL maintainer="billy-davies-2"
|
|
LABEL description="Ray worker for NVIDIA GPUs (Whisper, TTS)"
|
|
LABEL gpu.target="nvidia-cuda"
|
|
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies for audio processing
|
|
USER root
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
ffmpeg \
|
|
libsndfile1 \
|
|
git \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
USER ray
|
|
|
|
# Install Python dependencies for inference
|
|
RUN pip install --no-cache-dir \
|
|
faster-whisper \
|
|
openai-whisper \
|
|
TTS \
|
|
soundfile \
|
|
pydub \
|
|
librosa \
|
|
torch \
|
|
torchaudio \
|
|
fastapi \
|
|
uvicorn \
|
|
httpx \
|
|
pydantic
|
|
|
|
# Copy Ray Serve Python code
|
|
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
|
|
ENV PYTHONPATH=/app
|
|
|
|
# Copy Ray Serve entrypoint
|
|
COPY --chown=ray:ray dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
|
RUN chmod +x /app/ray-entrypoint.sh
|
|
|
|
# Default environment variables
|
|
ENV CUDA_VISIBLE_DEVICES=0
|
|
ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
|
|
ENV GPU_RESOURCE="gpu_nvidia"
|
|
ENV NUM_GPUS="1"
|
|
|
|
ENTRYPOINT ["/app/ray-entrypoint.sh"]
|