diff --git a/Makefile b/Makefile index e531170..efed701 100644 --- a/Makefile +++ b/Makefile @@ -162,18 +162,6 @@ test-all: eunit ## Запустить ВСЕ тесты (EUnit + API) # ============================================================================ # LOAD TESTING -#3. Мониторинг во время нагрузочного теста -#Во время теста полезно следить за состоянием ноды: -# -#Через Docker (если приложение в контейнере): -#bash -#docker stats eventhub -#docker exec eventhub /app/bin/eventhub remote_console -#Внутри консоли Erlang можно выполнить: -# -#erlang -#observer:start(). % графический мониторинг -#recon:proc_count(5). % топ-5 процессов по памяти (если установлен recon) # ============================================================================ tsung-test: ## Запустить нагрузочный тест Tsung @echo "Запуск нагрузочного теста Tsung..." diff --git a/docker/Dockerfile b/docker/Dockerfile index fde0ffe..0dbff6b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,6 +17,7 @@ COPY src/ src/ # Копируем sys.config из src/config/ в config/ COPY src/config/sys.config ./config/sys.config +COPY src/config/vm.args ./config/vm.args RUN rebar3 as prod release RUN rebar3 as prod tar @@ -38,7 +39,9 @@ RUN mkdir -p /app/data && chmod 777 /app/data WORKDIR /app EXPOSE 8080 8081 8445 8446 +ENV PATH="/app/erts-16.3.1/bin:$PATH" + ENV RELX_REPLACE_OS_VARS=true ENV MNESIA_DIR=/app/data -CMD ["/app/bin/eventhub", "foreground"] \ No newline at end of file +CMD /app/bin/eventhub foreground \ No newline at end of file diff --git a/docker/ObserverWeb.Dockerfile b/docker/ObserverWeb.Dockerfile new file mode 100644 index 0000000..e4992bd --- /dev/null +++ b/docker/ObserverWeb.Dockerfile @@ -0,0 +1,27 @@ +FROM erlang:28-alpine + +RUN apk add --no-cache \ + elixir \ + nodejs \ + npm \ + git \ + inotify-tools + +WORKDIR /app + +RUN git clone https://github.com/thiagoesteves/observer_web.git . + +RUN mix local.hex --force && mix local.rebar --force && mix deps.get + +RUN mkdir -p priv/static && \ + touch priv/static/app.css && touch priv/static/app.js && \ + cd assets && npm install && cd .. && \ + mix assets.build + +# Копируем наш dev.exs с автоподключением к нодам EventHub +COPY docker/observer_web/dev.exs . + +EXPOSE 4000 + +# Только запуск сервера, без ручного Node.connect +CMD elixir --sname observer_web@observer_web --cookie eventhub_cookie -S mix run --no-halt dev.exs \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 20efa3d..6959196 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: eventhub-node1: build: @@ -18,6 +16,7 @@ services: - ADMIN_HTTP_PORT=8445 - ADMIN_WS_PORT=8446 - MNESIA_DIR=/app/data + - RELEASE_COOKIE=eventhub_cookie volumes: - eventhub-node1-data:/app/data networks: @@ -41,6 +40,7 @@ services: - ADMIN_HTTP_PORT=8445 - ADMIN_WS_PORT=8446 - MNESIA_DIR=/app/data + - RELEASE_COOKIE=eventhub_cookie - JOIN_NODES=eventhub-node1@eventhub-node1 volumes: - eventhub-node2-data:/app/data @@ -67,6 +67,7 @@ services: - ADMIN_HTTP_PORT=8445 - ADMIN_WS_PORT=8446 - MNESIA_DIR=/app/data + - RELEASE_COOKIE=eventhub_cookie - JOIN_NODES=eventhub-node1@eventhub-node1 volumes: - eventhub-node3-data:/app/data @@ -76,10 +77,57 @@ services: - eventhub-node1 restart: unless-stopped + observer_web: + build: + context: .. + dockerfile: docker/ObserverWeb.Dockerfile + container_name: observer_web + ports: + - "4000:4000" + environment: + - RELEASE_COOKIE=eventhub_cookie + networks: + - eventhub-net + restart: unless-stopped + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + ports: + - "9090:9090" + networks: + - eventhub-net + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + depends_on: + - prometheus + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=zxs45gvHB + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + networks: + - eventhub-net + restart: unless-stopped + volumes: eventhub-node1-data: eventhub-node2-data: eventhub-node3-data: + prometheus-data: + grafana-data: networks: eventhub-net: diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh deleted file mode 100644 index a4c35d8..0000000 --- a/docker/docker-entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -set -e - -# Создаём директорию Mnesia, если не существует -mkdir -p ${MNESIA_DIR} - -# Запускаем приложение -exec /app/bin/eventhub foreground \ No newline at end of file diff --git a/docker/grafana/eventhub-erlang.json b/docker/grafana/eventhub-erlang.json new file mode 100644 index 0000000..269f671 --- /dev/null +++ b/docker/grafana/eventhub-erlang.json @@ -0,0 +1,1543 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": "Prometheus", + "expr": "count(up{job=~\"eventhub.*\", instance=~\"$instance\"})", + "refId": "A" + } + ], + "title": "Nodes Up", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": "Prometheus", + "expr": "sum(erlang_vm_process_count{instance=~\"$instance\"})", + "refId": "A" + } + ], + "title": "Total Processes", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": "Prometheus", + "expr": "avg(erlang_vm_process_count{instance=~\"$instance\"} / erlang_vm_process_limit{instance=~\"$instance\"}) * 100", + "refId": "A" + } + ], + "title": "Avg Process Usage %", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decgbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": "Prometheus", + "expr": "avg(erlang_vm_memory_bytes_total{instance=~\"$instance\"}) / 1024^3", + "refId": "A" + } + ], + "title": "Avg Memory (GB)", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 101, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_memory_bytes_total{kind=\"system\", instance=~\"$instance\"}", + "legendFormat": "System - {{instance}}", + "refId": "A" + }, + { + "expr": "erlang_vm_memory_bytes_total{kind=\"processes\", instance=~\"$instance\"}", + "legendFormat": "Processes - {{instance}}", + "refId": "B" + }, + { + "expr": "erlang_vm_memory_system_bytes_total{usage=\"atom\", instance=~\"$instance\"}", + "legendFormat": "Atom - {{instance}}", + "refId": "C" + }, + { + "expr": "erlang_vm_memory_system_bytes_total{usage=\"binary\", instance=~\"$instance\"}", + "legendFormat": "Binary - {{instance}}", + "refId": "D" + }, + { + "expr": "erlang_vm_memory_system_bytes_total{usage=\"ets\", instance=~\"$instance\"}", + "legendFormat": "ETS - {{instance}}", + "refId": "E" + } + ], + "title": "Memory Breakdown (Stacked)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 102, + "panels": [], + "title": "Processes & Atoms", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Process Limit" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 5 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Atom Limit" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 5 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_process_count{instance=~\"$instance\"}", + "legendFormat": "Process Count - {{instance}}", + "refId": "A" + }, + { + "expr": "erlang_vm_process_limit{instance=~\"$instance\"}", + "legendFormat": "Process Limit - {{instance}}", + "refId": "B" + }, + { + "expr": "erlang_vm_atom_count{instance=~\"$instance\"}", + "legendFormat": "Atom Count - {{instance}}", + "refId": "C" + }, + { + "expr": "erlang_vm_atom_limit{instance=~\"$instance\"}", + "legendFormat": "Atom Limit - {{instance}}", + "refId": "D" + } + ], + "title": "Process & Atom Limits", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_process_count{instance=~\"$instance\"} / erlang_vm_process_limit{instance=~\"$instance\"} * 100", + "legendFormat": "Process Usage % - {{instance}}", + "refId": "A" + }, + { + "expr": "erlang_vm_atom_count{instance=~\"$instance\"} / erlang_vm_atom_limit{instance=~\"$instance\"} * 100", + "legendFormat": "Atom Usage % - {{instance}}", + "refId": "B" + } + ], + "title": "Capacity Utilization (%)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 103, + "panels": [], + "title": "Mnesia", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Committed Tx Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Failed Tx Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(erlang_mnesia_committed_transactions{instance=~\"$instance\"}[1m])", + "legendFormat": "Committed Tx Rate - {{instance}}", + "refId": "A" + }, + { + "expr": "rate(erlang_mnesia_failed_transactions{instance=~\"$instance\"}[1m])", + "legendFormat": "Failed Tx Rate - {{instance}}", + "refId": "B" + } + ], + "title": "Transaction Rates", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_mnesia_held_locks{instance=~\"$instance\"}", + "legendFormat": "Held Locks - {{instance}}", + "refId": "A" + }, + { + "expr": "erlang_mnesia_lock_queue{instance=~\"$instance\"}", + "legendFormat": "Lock Queue - {{instance}}", + "refId": "B" + }, + { + "expr": "erlang_mnesia_transaction_participants{instance=~\"$instance\"}", + "legendFormat": "Participants - {{instance}}", + "refId": "C" + }, + { + "expr": "erlang_mnesia_transaction_coordinators{instance=~\"$instance\"}", + "legendFormat": "Coordinators - {{instance}}", + "refId": "D" + } + ], + "title": "Locks & Participants", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 104, + "panels": [], + "title": "HTTP (Cowboy)", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Errors Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(cowboy_requests_total{instance=~\"$instance\"}[1m])", + "legendFormat": "Requests Rate - {{instance}}", + "refId": "A" + }, + { + "expr": "rate(cowboy_errors_total{instance=~\"$instance\"}[1m])", + "legendFormat": "Errors Rate - {{instance}}", + "refId": "B" + } + ], + "title": "Request & Error Rates", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 37 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(cowboy_requests_total{instance=~\"$instance\"})", + "refId": "A" + } + ], + "title": "Total Requests", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 37 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "sum(cowboy_errors_total{instance=~\"$instance\"})", + "refId": "A" + } + ], + "title": "Total Errors", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 105, + "panels": [], + "title": "Distribution", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "sum" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(erlang_vm_dist_recv_bytes{instance=~\"$instance\"}[1m])", + "legendFormat": "Recv - {{peer}}", + "refId": "A" + }, + { + "expr": "rate(erlang_vm_dist_send_bytes{instance=~\"$instance\"}[1m])", + "legendFormat": "Sent - {{peer}}", + "refId": "B" + } + ], + "title": "Traffic per Peer", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "Disconnected", + "color": "red" + }, + "1": { + "text": "Connected", + "color": "green" + } + } + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 46 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_dist_node_state{instance=~\"$instance\"}", + "legendFormat": "State - {{peer}}", + "refId": "A" + } + ], + "title": "Node Connection State", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 46 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_dist_node_queue_size_bytes{instance=~\"$instance\"}", + "legendFormat": "Queue - {{peer}}", + "refId": "A" + } + ], + "title": "Node Queue Size", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 106, + "panels": [], + "title": "VM Internals", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Bytes Reclaimed Rate" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(erlang_vm_statistics_garbage_collection_number_of_gcs{instance=~\"$instance\"}[1m])", + "legendFormat": "GC Rate - {{instance}}", + "refId": "A" + }, + { + "expr": "rate(erlang_vm_statistics_garbage_collection_bytes_reclaimed{instance=~\"$instance\"}[1m])", + "legendFormat": "Bytes Reclaimed Rate - {{instance}}", + "refId": "B" + }, + { + "expr": "rate(erlang_vm_statistics_reductions_total{instance=~\"$instance\"}[1m])", + "legendFormat": "Reductions Rate - {{instance}}", + "refId": "C" + } + ], + "title": "GC & Reductions", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "erlang_vm_statistics_run_queues_length{instance=~\"$instance\"}", + "legendFormat": "Run Queue - {{instance}}", + "refId": "A" + }, + { + "expr": "erlang_vm_statistics_dirty_cpu_run_queue_length{instance=~\"$instance\"}", + "legendFormat": "Dirty CPU Queue - {{instance}}", + "refId": "B" + }, + { + "expr": "erlang_vm_statistics_dirty_io_run_queue_length{instance=~\"$instance\"}", + "legendFormat": "Dirty IO Queue - {{instance}}", + "refId": "C" + } + ], + "title": "Scheduler Queues", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "eventhub", + "erlang" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "Prometheus", + "definition": "label_values(up{job=~\"eventhub.*\"}, instance)", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "options": [], + "query": "label_values(up{job=~\"eventhub.*\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "EventHub - Erlang Dashboard", + "version": 0 +} \ No newline at end of file diff --git a/docker/grafana/provisioning/dashboards/dashboard.yml b/docker/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..ff95e0b --- /dev/null +++ b/docker/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,13 @@ +apiVersion: 1 + +providers: + - name: 'EventHub' + orgId: 1 + folder: '' + type: file + disableDeletion: true + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /etc/grafana/dashboards + foldersFromFilesStructure: true \ No newline at end of file diff --git a/docker/grafana/provisioning/datasources/prometheus.yml b/docker/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..27d6bb9 --- /dev/null +++ b/docker/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 # имя сервиса Prometheus в docker-compose + isDefault: true + editable: false \ No newline at end of file diff --git a/docker/observer_web/dev.exs b/docker/observer_web/dev.exs new file mode 100644 index 0000000..b28aef0 --- /dev/null +++ b/docker/observer_web/dev.exs @@ -0,0 +1,115 @@ +# Development server for Observer Web + +# Phoenix + +defmodule WebDev.Router do + use Phoenix.Router, helpers: false + + import Observer.Web.Router + + pipeline :browser do + plug(:fetch_session) + end + + scope "/" do + pipe_through(:browser) + + observer_dashboard("/observer") + end +end + +defmodule WebDev.Endpoint do + use Phoenix.Endpoint, otp_app: :observer_web + + socket("/live", Phoenix.LiveView.Socket) + socket("/phoenix/live_reload/socket", Phoenix.LiveReloader.Socket) + + plug(Phoenix.LiveReloader) + plug(Phoenix.CodeReloader) + + plug(Plug.Session, + store: :cookie, + key: "_observer_web_key", + signing_salt: "/VEDsdfsffMnp5" + ) + + plug(WebDev.Router) +end + +defmodule WebDev.ErrorHTML do + use Phoenix.Component + + def render(template, _assigns) do + Phoenix.Controller.status_message_from_template(template) + end +end + +# Configuration + +port = "PORT" |> System.get_env("4000") |> String.to_integer() + +Application.put_env(:observer_web, WebDev.Endpoint, + adapter: Bandit.PhoenixAdapter, + check_origin: false, + debug_errors: true, + http: [port: port], + live_view: [signing_salt: "eX7TFPY6Y/+XQ1o2pOUW3DjgAoXGTAdX"], + pubsub_server: ObserverWeb.PubSub, + render_errors: [formats: [html: WebDev.ErrorHTML], layout: false], + secret_key_base: "jAu3udxm+8tIRDXLLKo+EupAlEvdLsnNG82O8e9nqylpBM9gP8AjUnZ4PWNttztU", + url: [host: "localhost"], + watchers: [ + esbuild: {Esbuild, :install_and_run, [:default, ~w(--sourcemap=inline --watch)]}, + tailwind: {Tailwind, :install_and_run, [:default, ~w(--watch)]} + ], + live_reload: [ + patterns: [ + ~r"priv/static/.*(js|css|png|jpeg|jpg|gif|svg)$", + ~r"lib/web/components/.*(ex)$", + ~r"lib/web/live/.*(ex)$" + ] + ] +) + +Application.put_env(:phoenix, :serve_endpoints, true) +Application.put_env(:phoenix, :persistent, true) + +Task.async(fn -> + # Stop the default Telemetry server to start a new one with new defaults + mode = "OBSERVER_WEB_TELEMETRY_MODE" |> System.get_env("local") |> String.to_atom() + + retention_period = + "OBSERVER_WEB_TELEMETRY_RETENTION_PERIOD" |> System.get_env("1800000") |> String.to_integer() + + telemetry_module = ObserverWeb.Telemetry.Storage + :ok = Supervisor.terminate_child(ObserverWeb.Application, telemetry_module) + :ok = Supervisor.delete_child(ObserverWeb.Application, telemetry_module) + + {:ok, _} = + Supervisor.start_child( + ObserverWeb.Application, + {telemetry_module, [mode: mode, data_retention_period: retention_period]} + ) + + {:ok, _} = Supervisor.start_child(ObserverWeb.Application, WebDev.Endpoint) + + Process.sleep(:infinity) +end) + +# ============================================================ +# Автоподключение к узлам EventHub с задержкой +# ============================================================ +Task.start(fn -> + :timer.sleep(7000) # даём время на полный старт Phoenix + nodes = [ + :"eventhub-node1@eventhub-node1", + :"eventhub-node2@eventhub-node2", + :"eventhub-node3@eventhub-node3" + ] + Enum.each(nodes, fn node -> + case Node.connect(node) do + true -> IO.puts("Connected to #{node}") + false -> IO.puts("Failed to connect to #{node} (will retry manually)") + end + end) +end) \ No newline at end of file diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml new file mode 100644 index 0000000..2db29e1 --- /dev/null +++ b/docker/prometheus/prometheus.yml @@ -0,0 +1,22 @@ +global: + scrape_interval: 5s + +scrape_configs: + - job_name: 'eventhub-node1' + static_configs: + - targets: ['eventhub-node1:8080'] # http://localhost:8080/metrics/default + labels: + node: 'node1' + metrics_path: '/metrics/default' + - job_name: 'eventhub-node2' + static_configs: + - targets: ['eventhub-node2:8080'] + labels: + node: 'node2' + metrics_path: '/metrics/default' + - job_name: 'eventhub-node3' + static_configs: + - targets: ['eventhub-node3:8080'] + labels: + node: 'node3' + metrics_path: '/metrics/default' \ No newline at end of file diff --git a/rebar.config b/rebar.config index cdc751c..8fbf25d 100644 --- a/rebar.config +++ b/rebar.config @@ -8,7 +8,8 @@ {jose, "1.11.10"}, {argon2, "1.2.0"}, {meck, "0.9.2"}, - {gun, "2.0.0"} + {gun, "2.0.0"}, + {prometheus_cowboy, "0.2.0"} ]}. {shell, [ @@ -25,7 +26,7 @@ {profiles, [ {prod, [ {relx, [ - {release, {eventhub, "0.0.1"}, [eventhub, sasl]}, + {release, {eventhub, "0.0.1"}, [eventhub, sasl, runtime_tools, os_mon, prometheus_cowboy]}, {include_erts, true}, {extended_start_script, true}, {sys_config, "./src/config/sys.config"} diff --git a/src/config/vm.args b/src/config/vm.args new file mode 100644 index 0000000..c23151d --- /dev/null +++ b/src/config/vm.args @@ -0,0 +1,3 @@ +-sname ${NODE_NAME} +-setcookie ${RELEASE_COOKIE} +-kernel inet_dist_use_interface {0,0,0,0} \ No newline at end of file diff --git a/src/eventhub_app.erl b/src/eventhub_app.erl index c93095a..1df5c9e 100644 --- a/src/eventhub_app.erl +++ b/src/eventhub_app.erl @@ -14,6 +14,9 @@ start(_StartType, _StartArgs) -> ok = infra_mnesia:wait_for_tables(), start_http(), start_admin_http(), + % Запускаем сборщик метрик Prometheus + application:ensure_all_started(prometheus), + application:ensure_all_started(prometheus_cowboy), {ok, Pid}; Error -> Error @@ -27,6 +30,7 @@ start_http() -> Dispatch = cowboy_router:compile([ {'_', [ + {"/metrics/[:registry]", prometheus_cowboy2_handler, []}, {"/health", handler_health, []}, {"/v1/register", handler_register, []}, {"/v1/login", handler_login, []},