Implements ADR-0024: Ray Repository Structure - Ray Serve deployments for GPU-shared AI inference - Published as PyPI package for dynamic code loading - Deployments: LLM, embeddings, reranker, whisper, TTS - CI/CD workflow publishes to Gitea PyPI on push to main Extracted from kuberay-images repo per ADR-0024
16 lines
469 B
Python
16 lines
469 B
Python
# Ray Serve deployments for GPU-shared AI inference
|
|
# Published to Gitea PyPI as ray-serve-apps
|
|
from ray_serve.serve_embeddings import app as embeddings_app
|
|
from ray_serve.serve_llm import app as llm_app
|
|
from ray_serve.serve_reranker import app as reranker_app
|
|
from ray_serve.serve_tts import app as tts_app
|
|
from ray_serve.serve_whisper import app as whisper_app
|
|
|
|
__all__ = [
|
|
"embeddings_app",
|
|
"llm_app",
|
|
"reranker_app",
|
|
"tts_app",
|
|
"whisper_app",
|
|
]
|