86 lines
2.3 KiB
Plaintext
86 lines
2.3 KiB
Plaintext
%% Data Analytics Lakehouse Architecture
|
|
%% Related: ADR-0033
|
|
|
|
flowchart TB
|
|
subgraph Ingestion["Data Ingestion"]
|
|
Kafka["Kafka<br/>Event Streams"]
|
|
APIs["REST APIs<br/>Batch Loads"]
|
|
Files["File Drops<br/>S3/NFS"]
|
|
end
|
|
|
|
subgraph Processing["Processing Layer"]
|
|
subgraph Batch["Batch Processing"]
|
|
Spark["Apache Spark<br/>spark-operator"]
|
|
end
|
|
subgraph Stream["Stream Processing"]
|
|
Flink["Apache Flink<br/>flink-operator"]
|
|
end
|
|
subgraph Realtime["Real-time"]
|
|
RisingWave["RisingWave<br/>Streaming SQL"]
|
|
end
|
|
end
|
|
|
|
subgraph Catalog["Lakehouse Catalog"]
|
|
Nessie["Nessie<br/>Git-like Versioning"]
|
|
Iceberg["Apache Iceberg<br/>Table Format"]
|
|
end
|
|
|
|
subgraph Storage["Storage Layer"]
|
|
S3["S3 (MinIO)<br/>Object Storage"]
|
|
Parquet["Parquet Files<br/>Columnar Format"]
|
|
end
|
|
|
|
subgraph Query["Query Layer"]
|
|
Trino["Trino<br/>Distributed SQL"]
|
|
end
|
|
|
|
subgraph Serve["Serving Layer"]
|
|
Grafana["Grafana<br/>Dashboards"]
|
|
Jupyter["JupyterHub<br/>Notebooks"]
|
|
Apps["Applications<br/>REST APIs"]
|
|
end
|
|
|
|
subgraph Metadata["Metadata Store"]
|
|
PostgreSQL["CloudNativePG<br/>analytics-db"]
|
|
end
|
|
|
|
Kafka --> Flink
|
|
Kafka --> RisingWave
|
|
APIs --> Spark
|
|
Files --> Spark
|
|
|
|
Spark --> Nessie
|
|
Flink --> Nessie
|
|
RisingWave --> Nessie
|
|
|
|
Nessie --> Iceberg
|
|
Iceberg --> S3
|
|
S3 --> Parquet
|
|
|
|
Nessie --> PostgreSQL
|
|
|
|
Trino --> Nessie
|
|
Trino --> Iceberg
|
|
|
|
Trino --> Grafana
|
|
Trino --> Jupyter
|
|
Trino --> Apps
|
|
|
|
classDef ingest fill:#4a5568,stroke:#718096,color:#fff
|
|
classDef batch fill:#3182ce,stroke:#2b6cb0,color:#fff
|
|
classDef stream fill:#38a169,stroke:#2f855a,color:#fff
|
|
classDef catalog fill:#d69e2e,stroke:#b7791f,color:#fff
|
|
classDef storage fill:#718096,stroke:#4a5568,color:#fff
|
|
classDef query fill:#805ad5,stroke:#6b46c1,color:#fff
|
|
classDef serve fill:#e53e3e,stroke:#c53030,color:#fff
|
|
classDef meta fill:#319795,stroke:#2c7a7b,color:#fff
|
|
|
|
class Kafka,APIs,Files ingest
|
|
class Spark batch
|
|
class Flink,RisingWave stream
|
|
class Nessie,Iceberg catalog
|
|
class S3,Parquet storage
|
|
class Trino query
|
|
class Grafana,Jupyter,Apps serve
|
|
class PostgreSQL meta
|