Files
homelab-design/diagrams/analytics-lakehouse.mmd

86 lines
2.3 KiB
Plaintext

%% Data Analytics Lakehouse Architecture
%% Related: ADR-0033
flowchart TB
subgraph Ingestion["Data Ingestion"]
Kafka["Kafka<br/>Event Streams"]
APIs["REST APIs<br/>Batch Loads"]
Files["File Drops<br/>S3/NFS"]
end
subgraph Processing["Processing Layer"]
subgraph Batch["Batch Processing"]
Spark["Apache Spark<br/>spark-operator"]
end
subgraph Stream["Stream Processing"]
Flink["Apache Flink<br/>flink-operator"]
end
subgraph Realtime["Real-time"]
RisingWave["RisingWave<br/>Streaming SQL"]
end
end
subgraph Catalog["Lakehouse Catalog"]
Nessie["Nessie<br/>Git-like Versioning"]
Iceberg["Apache Iceberg<br/>Table Format"]
end
subgraph Storage["Storage Layer"]
S3["S3 (MinIO)<br/>Object Storage"]
Parquet["Parquet Files<br/>Columnar Format"]
end
subgraph Query["Query Layer"]
Trino["Trino<br/>Distributed SQL"]
end
subgraph Serve["Serving Layer"]
Grafana["Grafana<br/>Dashboards"]
Jupyter["JupyterHub<br/>Notebooks"]
Apps["Applications<br/>REST APIs"]
end
subgraph Metadata["Metadata Store"]
PostgreSQL["CloudNativePG<br/>analytics-db"]
end
Kafka --> Flink
Kafka --> RisingWave
APIs --> Spark
Files --> Spark
Spark --> Nessie
Flink --> Nessie
RisingWave --> Nessie
Nessie --> Iceberg
Iceberg --> S3
S3 --> Parquet
Nessie --> PostgreSQL
Trino --> Nessie
Trino --> Iceberg
Trino --> Grafana
Trino --> Jupyter
Trino --> Apps
classDef ingest fill:#4a5568,stroke:#718096,color:#fff
classDef batch fill:#3182ce,stroke:#2b6cb0,color:#fff
classDef stream fill:#38a169,stroke:#2f855a,color:#fff
classDef catalog fill:#d69e2e,stroke:#b7791f,color:#fff
classDef storage fill:#718096,stroke:#4a5568,color:#fff
classDef query fill:#805ad5,stroke:#6b46c1,color:#fff
classDef serve fill:#e53e3e,stroke:#c53030,color:#fff
classDef meta fill:#319795,stroke:#2c7a7b,color:#fff
class Kafka,APIs,Files ingest
class Spark batch
class Flink,RisingWave stream
class Nessie,Iceberg catalog
class S3,Parquet storage
class Trino query
class Grafana,Jupyter,Apps serve
class PostgreSQL meta