%% Data Analytics Lakehouse Architecture
%% Related: ADR-0033
flowchart TB
subgraph Ingestion["Data Ingestion"]
Kafka["Kafka
Event Streams"]
APIs["REST APIs
Batch Loads"]
Files["File Drops
S3/NFS"]
end
subgraph Processing["Processing Layer"]
subgraph Batch["Batch Processing"]
Spark["Apache Spark
spark-operator"]
end
subgraph Stream["Stream Processing"]
Flink["Apache Flink
flink-operator"]
end
subgraph Realtime["Real-time"]
RisingWave["RisingWave
Streaming SQL"]
end
end
subgraph Catalog["Lakehouse Catalog"]
Nessie["Nessie
Git-like Versioning"]
Iceberg["Apache Iceberg
Table Format"]
end
subgraph Storage["Storage Layer"]
S3["S3 (MinIO)
Object Storage"]
Parquet["Parquet Files
Columnar Format"]
end
subgraph Query["Query Layer"]
Trino["Trino
Distributed SQL"]
end
subgraph Serve["Serving Layer"]
Grafana["Grafana
Dashboards"]
Jupyter["JupyterHub
Notebooks"]
Apps["Applications
REST APIs"]
end
subgraph Metadata["Metadata Store"]
PostgreSQL["CloudNativePG
analytics-db"]
end
Kafka --> Flink
Kafka --> RisingWave
APIs --> Spark
Files --> Spark
Spark --> Nessie
Flink --> Nessie
RisingWave --> Nessie
Nessie --> Iceberg
Iceberg --> S3
S3 --> Parquet
Nessie --> PostgreSQL
Trino --> Nessie
Trino --> Iceberg
Trino --> Grafana
Trino --> Jupyter
Trino --> Apps
classDef ingest fill:#4a5568,stroke:#718096,color:#fff
classDef batch fill:#3182ce,stroke:#2b6cb0,color:#fff
classDef stream fill:#38a169,stroke:#2f855a,color:#fff
classDef catalog fill:#d69e2e,stroke:#b7791f,color:#fff
classDef storage fill:#718096,stroke:#4a5568,color:#fff
classDef query fill:#805ad5,stroke:#6b46c1,color:#fff
classDef serve fill:#e53e3e,stroke:#c53030,color:#fff
classDef meta fill:#319795,stroke:#2c7a7b,color:#fff
class Kafka,APIs,Files ingest
class Spark batch
class Flink,RisingWave stream
class Nessie,Iceberg catalog
class S3,Parquet storage
class Trino query
class Grafana,Jupyter,Apps serve
class PostgreSQL meta