From e6710a32835df3503859abc48e793d5f14bd9baf Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 8 Oct 2025 09:06:38 +0200 Subject: [PATCH 01/51] Add pulsar to docker-compose --- deploy/dockerephemeral/docker-compose.yaml | 43 +++++++++++++++++++ .../pulsar-config/create-superuser.sh | 11 +++++ 2 files changed, 54 insertions(+) create mode 100755 deploy/dockerephemeral/pulsar-config/create-superuser.sh diff --git a/deploy/dockerephemeral/docker-compose.yaml b/deploy/dockerephemeral/docker-compose.yaml index 96eb142a54..46010ab43b 100644 --- a/deploy/dockerephemeral/docker-compose.yaml +++ b/deploy/dockerephemeral/docker-compose.yaml @@ -419,6 +419,49 @@ services: # - DNS_SERVER_RECURSION_ALLOWED_NETWORKS=127.0.0.1, 192.168.1.0/24 #Comma separated list of IP addresses or network addresses to allow recursion. Valid only for `UseSpecifiedNetworkACL` recursion option. This option is obsolete and DNS_SERVER_RECURSION_NETWORK_ACL should be used instead. # - DNS_SERVER_ENABLE_BLOCKING=false #Sets the DNS server to block domain names using Blocked Zone and Block List Zone. + pulsar: + image: apachepulsar/pulsar:latest + container_name: pulsar + ports: + - 6650:6650 + - 8080:8080 + networks: + - demo_wire + command: bin/pulsar standalone + + # Admin GUI + # Url: http://localhost:9527 + # Username: pulsar + # Password: walter-frosch + pulsar-manager: + image: apachepulsar/pulsar-manager:latest + container_name: pulsar-manager + ports: + - 9527:9527 + - 7750:7750 + networks: + - demo_wire + environment: + - "SPRING_CONFIGURATION_FILE=/pulsar-manager/pulsar-manager/application.properties" + healthcheck: + test: + ["CMD-SHELL", "curl http://127.0.0.1:7750/actuator/health || exit 1 "] + interval: 3s + timeout: 5s + retries: 100 + + pulsar-manager-init: + container_name: pulsar-manager-init + image: curlimages/curl:latest + networks: + - demo_wire + command: create-superuser + volumes: + - ./pulsar-config/create-superuser.sh:/bin/create-superuser + depends_on: + pulsar-manager: + condition: service_healthy + volumes: redis-node-1-data: redis-node-2-data: diff --git a/deploy/dockerephemeral/pulsar-config/create-superuser.sh b/deploy/dockerephemeral/pulsar-config/create-superuser.sh new file mode 100755 index 0000000000..73ebbccc39 --- /dev/null +++ b/deploy/dockerephemeral/pulsar-config/create-superuser.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env sh + +# Create a superuser in pulsar-manager. +# username: pulsar +# password: walter-frosch +CSRF_TOKEN=$(curl http://pulsar-manager:7750/pulsar-manager/csrf-token) +curl -v -H "X-XSRF-TOKEN: $CSRF_TOKEN" \ + -H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \ + -H "Content-Type: application/json" \ + -X PUT http://pulsar-manager:7750/pulsar-manager/users/superuser \ + -d '{"name": "pulsar", "password": "walter-frosch", "description": "test", "email": "username@test.org"}' From 3900186ee3dd73f2decde3d43a9050e57b6dacfa Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 13 Nov 2025 19:11:00 +0100 Subject: [PATCH 02/51] Add pulsar-client-hs to nix env --- nix/default.nix | 8 ++++++++ nix/haskell-pins.nix | 12 ++++++++++++ nix/manual-overrides.nix | 9 ++++++++- nix/wire-server.nix | 2 +- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/nix/default.nix b/nix/default.nix index b945b4adc9..ba5181c4fa 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -4,6 +4,14 @@ let pkgs = import sources.nixpkgs { config.allowUnfree = true; overlays = [ + # TODO: We do this to get a patched version of `libpulsar`. And, then + # have to declare `pulsar-client-hs` elsewhere. Good enough for + # experimenting. Should be fixed before merge. + (import (builtins.fetchGit + { + url = "https://github.com/wireapp/pulsar-hs.git"; + rev = "d6021c45c088be67446ba92472eb993b8aafa947"; + } + "/nix/overlay.nix")) # All wire-server specific packages (import ./overlay.nix) (import ./overlay-docs.nix) diff --git a/nix/haskell-pins.nix b/nix/haskell-pins.nix index 8184e90576..f49763633b 100644 --- a/nix/haskell-pins.nix +++ b/nix/haskell-pins.nix @@ -71,6 +71,18 @@ let }; }; + pulsar-hs = { + src = fetchgit { + url = "https://github.com/wireapp/pulsar-hs"; + rev = "d6021c45c088be67446ba92472eb993b8aafa947"; + hash = "sha256-afztcgLmunGN7smOqU+FH0pm0HLoSIJmtDFkq19uQtQ="; + }; + packages = { + pulsar-client-hs = "pulsar-client-hs"; + pulsar-admin = "pulsar-admin"; + }; + }; + # -------------------- # END maintained by us # -------------------- diff --git a/nix/manual-overrides.nix b/nix/manual-overrides.nix index 189db41faa..37196ab87c 100644 --- a/nix/manual-overrides.nix +++ b/nix/manual-overrides.nix @@ -1,4 +1,4 @@ -{ libsodium, protobuf, hlib, mls-test-cli, fetchurl, curl, pkg-config, postgresql, openssl, ... }: +{ libsodium, protobuf, hlib, mls-test-cli, fetchurl, curl, pkg-config, postgresql, openssl, libpulsar, ... }: # FUTUREWORK: Figure out a way to detect if some of these packages are not # actually marked broken, so we can cleanup this file on every nixpkgs bump. hself: hsuper: { @@ -102,4 +102,11 @@ hself: hsuper: { unix ]; }); + + pulsar-client-hs = hlib.doJailbreak (hlib.overrideCabal hsuper.pulsar-client-hs (drv: { + # TODO: This should just replace `pulsar` (broken package, not related to + # message brokers) with the `libpulsar` patched by us. However, we only + # want to try `pulsar-client-hs` now... + librarySystemDepends = [ libpulsar ]; + })); } diff --git a/nix/wire-server.nix b/nix/wire-server.nix index 5063f8c4f8..6b56753837 100644 --- a/nix/wire-server.nix +++ b/nix/wire-server.nix @@ -158,7 +158,7 @@ let ]; manualOverrides = import ./manual-overrides.nix (with pkgs; { - inherit (pkgs) libsodium protobuf fetchpatch fetchurl curl pkg-config postgresql openssl; + inherit (pkgs) libsodium protobuf fetchpatch fetchurl curl pkg-config postgresql openssl libpulsar; inherit hlib mls-test-cli; }); From 4085e809d91c805861f2b68f61bf9b9a3a8f7d8d Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 13 Nov 2025 19:11:20 +0100 Subject: [PATCH 03/51] Pretend to use pulsar-client-hs in Cannon --- services/cannon/cannon.cabal | 1 + services/cannon/default.nix | 2 ++ 2 files changed, 3 insertions(+) diff --git a/services/cannon/cannon.cabal b/services/cannon/cannon.cabal index c526d3cd9c..3ce70d06c2 100644 --- a/services/cannon/cannon.cabal +++ b/services/cannon/cannon.cabal @@ -108,6 +108,7 @@ library , metrics-wai >=0.4 , mwc-random >=0.13 , prometheus-client + , pulsar-client-hs , retry >=0.7 , safe-exceptions , servant-conduit diff --git a/services/cannon/default.nix b/services/cannon/default.nix index 0544d33564..b654c145f0 100644 --- a/services/cannon/default.nix +++ b/services/cannon/default.nix @@ -34,6 +34,7 @@ , metrics-wai , mwc-random , prometheus-client +, pulsar-client-hs , QuickCheck , random , retry @@ -95,6 +96,7 @@ mkDerivation { metrics-wai mwc-random prometheus-client + pulsar-client-hs retry safe-exceptions servant-conduit From d8c55b0d110e6dcbf8f8a1f7d05de5e8f948d988 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 14 Nov 2025 15:50:02 +0100 Subject: [PATCH 04/51] Upate pulsar-hs pin --- nix/default.nix | 2 +- nix/haskell-pins.nix | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/default.nix b/nix/default.nix index ba5181c4fa..42f1bcf21d 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -10,7 +10,7 @@ let (import (builtins.fetchGit { url = "https://github.com/wireapp/pulsar-hs.git"; - rev = "d6021c45c088be67446ba92472eb993b8aafa947"; + rev = "c5e8520b0c3efbd022659ceb642fb73e903bd933"; } + "/nix/overlay.nix")) # All wire-server specific packages (import ./overlay.nix) diff --git a/nix/haskell-pins.nix b/nix/haskell-pins.nix index f49763633b..c50f0e0bb7 100644 --- a/nix/haskell-pins.nix +++ b/nix/haskell-pins.nix @@ -74,8 +74,8 @@ let pulsar-hs = { src = fetchgit { url = "https://github.com/wireapp/pulsar-hs"; - rev = "d6021c45c088be67446ba92472eb993b8aafa947"; - hash = "sha256-afztcgLmunGN7smOqU+FH0pm0HLoSIJmtDFkq19uQtQ="; + rev = "c5e8520b0c3efbd022659ceb642fb73e903bd933"; + hash = "sha256-0TNnhpM/PXCnZ8+Jw5CfDt8ZkeBrakCQ0DwyJQRRFrk="; }; packages = { pulsar-client-hs = "pulsar-client-hs"; From 91de5337b411888ba42643d111224b89a2c2afc5 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 17 Nov 2025 10:40:24 +0100 Subject: [PATCH 05/51] Better start with Gundeck --- services/cannon/cannon.cabal | 1 - services/cannon/default.nix | 2 -- services/gundeck/default.nix | 2 ++ services/gundeck/gundeck.cabal | 1 + 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/services/cannon/cannon.cabal b/services/cannon/cannon.cabal index 3ce70d06c2..c526d3cd9c 100644 --- a/services/cannon/cannon.cabal +++ b/services/cannon/cannon.cabal @@ -108,7 +108,6 @@ library , metrics-wai >=0.4 , mwc-random >=0.13 , prometheus-client - , pulsar-client-hs , retry >=0.7 , safe-exceptions , servant-conduit diff --git a/services/cannon/default.nix b/services/cannon/default.nix index b654c145f0..0544d33564 100644 --- a/services/cannon/default.nix +++ b/services/cannon/default.nix @@ -34,7 +34,6 @@ , metrics-wai , mwc-random , prometheus-client -, pulsar-client-hs , QuickCheck , random , retry @@ -96,7 +95,6 @@ mkDerivation { metrics-wai mwc-random prometheus-client - pulsar-client-hs retry safe-exceptions servant-conduit diff --git a/services/gundeck/default.nix b/services/gundeck/default.nix index a709d333df..2da6e313ca 100644 --- a/services/gundeck/default.nix +++ b/services/gundeck/default.nix @@ -52,6 +52,7 @@ , optparse-applicative , prometheus-client , psqueues +, pulsar-client-hs , QuickCheck , quickcheck-instances , quickcheck-state-machine @@ -133,6 +134,7 @@ mkDerivation { network-uri prometheus-client psqueues + pulsar-client-hs raw-strings-qq resourcet retry diff --git a/services/gundeck/gundeck.cabal b/services/gundeck/gundeck.cabal index 470cd7b5ae..3cd14ea3c9 100644 --- a/services/gundeck/gundeck.cabal +++ b/services/gundeck/gundeck.cabal @@ -149,6 +149,7 @@ library , network-uri >=2.6 , prometheus-client , psqueues >=0.2.2 + , pulsar-client-hs , raw-strings-qq , resourcet >=1.1 , retry >=0.5 From 35762772d7856254ac34f66a6ac90f2bad5a0de6 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 17 Nov 2025 17:49:38 +0100 Subject: [PATCH 06/51] Replace pulsar-hs overlay Otherwisem, HLS goes insane... :/ --- nix/default.nix | 8 -------- nix/manual-overrides.nix | 2 ++ nix/overlay.nix | 12 ++++++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/nix/default.nix b/nix/default.nix index 42f1bcf21d..b945b4adc9 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -4,14 +4,6 @@ let pkgs = import sources.nixpkgs { config.allowUnfree = true; overlays = [ - # TODO: We do this to get a patched version of `libpulsar`. And, then - # have to declare `pulsar-client-hs` elsewhere. Good enough for - # experimenting. Should be fixed before merge. - (import (builtins.fetchGit - { - url = "https://github.com/wireapp/pulsar-hs.git"; - rev = "c5e8520b0c3efbd022659ceb642fb73e903bd933"; - } + "/nix/overlay.nix")) # All wire-server specific packages (import ./overlay.nix) (import ./overlay-docs.nix) diff --git a/nix/manual-overrides.nix b/nix/manual-overrides.nix index 37196ab87c..2571a57cae 100644 --- a/nix/manual-overrides.nix +++ b/nix/manual-overrides.nix @@ -108,5 +108,7 @@ hself: hsuper: { # message brokers) with the `libpulsar` patched by us. However, we only # want to try `pulsar-client-hs` now... librarySystemDepends = [ libpulsar ]; + # This is really evil: Nix ignores Cabal's ghc-options! + configureFlags = [ "--ghc-options=-optl-Wl,--wrap=pulsar_client_configuration_set_logger_t" ]; })); } diff --git a/nix/overlay.nix b/nix/overlay.nix index 36af565994..d17ede6895 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -83,4 +83,16 @@ self: super: { rabbitmqadmin = super.callPackage ./pkgs/rabbitmqadmin { }; sbomqs = super.callPackage ./pkgs/sbomqs { }; + + # TODO: Quick and dirty patching. Using the overlay causes troubles to HLS + # (why ever...). This is good enough for experimenting, though. + libpulsar = super.libpulsar.overrideAttrs (old: { + patches = (old.patches or [ ]) ++ [ + (builtins.fetchGit + { + url = "https://github.com/wireapp/pulsar-hs.git"; + rev = "c5e8520b0c3efbd022659ceb642fb73e903bd933"; + } + /nix/add-stdbool-table-view.patch) + ]; + }); } From d2366c5e4a56333ffe9f2896755b4c0f1a4ccd4d Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 17 Nov 2025 18:38:18 +0100 Subject: [PATCH 07/51] Push Gundeck notifications also to Pulsar We simply push both for now, RabbitMQ and Pulsar. --- services/gundeck/src/Gundeck/Push.hs | 52 ++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 3609bb5dfb..232189479e 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -54,6 +54,7 @@ import Control.Lens (to, view, (.~), (^.)) import Control.Monad.Catch import Control.Monad.Except (throwError) import Data.Aeson qualified as Aeson +import Data.ByteString qualified as B import Data.ByteString.Conversion (toByteString') import Data.Id import Data.List.Extra qualified as List @@ -84,9 +85,11 @@ import Network.AMQP (Message (..)) import Network.AMQP qualified as Q import Network.HTTP.Types import Network.Wai.Utilities +import Pulsar.Client qualified as Pulsar import System.Logger.Class (msg, val, (+++), (.=), (~~)) import System.Logger.Class qualified as Log import UnliftIO (pooledMapConcurrentlyN) +import UnliftIO.Resource import Util.Options import Wire.API.Internal.Notification import Wire.API.Notification @@ -115,6 +118,7 @@ class (MonadThrow m) => MonadPushAll m where mpaRunWithBudget :: Int -> a -> m a -> m a mpaGetClients :: Set UserId -> m UserClientsFull mpaPublishToRabbitMq :: Text -> Text -> Q.Message -> m () + mpaPublishToPulsar :: Text -> Q.Message -> m () instance MonadPushAll Gundeck where mpaNotificationTTL = view (options . settings . notificationTTL) @@ -128,12 +132,39 @@ instance MonadPushAll Gundeck where mpaRunWithBudget = runWithBudget'' mpaGetClients = getClients mpaPublishToRabbitMq = publishToRabbitMq + mpaPublishToPulsar = publishToPulsar publishToRabbitMq :: Text -> Text -> Q.Message -> Gundeck () publishToRabbitMq exchangeName routingKey qMsg = do chan <- getRabbitMqChan void $ liftIO $ Q.publishMsg chan exchangeName routingKey qMsg +publishToPulsar :: Text -> Q.Message -> Gundeck () +publishToPulsar routingKey qMsg = do + Pulsar.withClient Pulsar.defaultClientConfiguration "pulsar://localhost:6650" $ + Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName onPulsarError $ do + result <- runResourceT $ do + (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ B.toStrict (Q.msgBody qMsg)} + lift $ Pulsar.sendMessage message + lift $ logPulsarResult result + where + topicName = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ Text.unpack routingKey + + onPulsarError :: Pulsar.RawResult -> Gundeck () + onPulsarError result = + case Pulsar.renderResult result of + Just r -> Log.err $ Log.msg errorMsg . Log.field "error" (show r) + Nothing -> Log.err $ Log.msg errorMsg . Log.field "error" (show (Pulsar.unRawResult result)) + + errorMsg :: String + errorMsg = "Failed to create Pulsar producer." :: String + + logPulsarResult :: Pulsar.RawResult -> Gundeck () + logPulsarResult result = + case Pulsar.renderResult result of + Just r -> Log.err $ Log.msg errorMsg . Log.field "error" (show r) + Nothing -> Log.err $ Log.msg errorMsg . Log.field "error" (show (Pulsar.unRawResult result)) + -- | Another layer of wrap around 'runWithBudget'. runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a runWithBudget'' budget fallback action = do @@ -247,7 +278,7 @@ pushAll pushes = do pushAllLegacy legacyNotifs allUserClients rabbitmqNotifs <- mapM mkNewNotification rabbitmqPushes - pushAllViaRabbitMq rabbitmqNotifs allUserClients + pushAllViaMessageBroker rabbitmqNotifs allUserClients -- Note that Cells needs all notifications because it doesn't matter whether -- some recipients have rabbitmq clients or not. @@ -297,9 +328,10 @@ pushNativeWithBudget notif psh dontPush = do mpaRunWithBudget cost () $ mpaPushNative notif (psh ^. pushNativePriority) =<< nativeTargets psh rcps' dontPush -pushAllViaRabbitMq :: (MonadPushAll m, MonadMapAsync m, MonadNativeTargets m) => [NewNotification] -> UserClientsFull -> m () -pushAllViaRabbitMq newNotifs userClientsFull = do +pushAllViaMessageBroker :: (MonadPushAll m, MonadMapAsync m, MonadNativeTargets m) => [NewNotification] -> UserClientsFull -> m () +pushAllViaMessageBroker newNotifs userClientsFull = do for_ newNotifs $ pushViaRabbitMq + for_ newNotifs $ pushViaPulsar mpaForkIO $ do for_ newNotifs $ \newNotif -> do let cassandraClients = Map.map (Set.filter $ not . supportsConsumableNotifications) userClientsFull.userClientsFull @@ -320,6 +352,20 @@ pushViaRabbitMq newNotif = do for_ routingKeys $ \routingKey -> mpaPublishToRabbitMq userNotificationExchangeName routingKey qMsg +pushViaPulsar :: (MonadPushAll m) => NewNotification -> m () +pushViaPulsar newNotif = do + qMsg <- mkMessage newNotif.nnNotification + let routingKeys = + Set.unions $ + flip Set.map (Set.fromList . toList $ newNotif.nnRecipients) \r -> + case r._recipientClients of + RecipientClientsAll -> + Set.singleton $ userRoutingKey r._recipientId + RecipientClientsSome (toList -> cs) -> + Set.fromList $ map (clientRoutingKey r._recipientId) cs + for_ routingKeys $ \routingKey -> + mpaPublishToPulsar routingKey qMsg + pushAllToCells :: (MonadPushAll m, Log.MonadLogger m) => [NewNotification] -> m () pushAllToCells newNotifs = do mQueue <- mpaCellsEventQueue From a79b6567f0122d16dc2fc7736ecedf4b68d40f10 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 17 Nov 2025 18:39:14 +0100 Subject: [PATCH 08/51] Add todo --- services/gundeck/src/Gundeck/Push.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 232189479e..57982fc91b 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -330,6 +330,7 @@ pushNativeWithBudget notif psh dontPush = do pushAllViaMessageBroker :: (MonadPushAll m, MonadMapAsync m, MonadNativeTargets m) => [NewNotification] -> UserClientsFull -> m () pushAllViaMessageBroker newNotifs userClientsFull = do + -- TODO: Stop pushing to RabbitMQ to prevent the rabbit from slowing us down. for_ newNotifs $ pushViaRabbitMq for_ newNotifs $ pushViaPulsar mpaForkIO $ do From 92fd10f3957b13676c65395445efb74155c6c99f Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Tue, 18 Nov 2025 08:49:14 +0100 Subject: [PATCH 09/51] Prepare user-notifications topic in test setup --- deploy/dockerephemeral/docker-compose.yaml | 9 +++++---- .../{create-superuser.sh => init-pulsar.sh} | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) rename deploy/dockerephemeral/pulsar-config/{create-superuser.sh => init-pulsar.sh} (65%) diff --git a/deploy/dockerephemeral/docker-compose.yaml b/deploy/dockerephemeral/docker-compose.yaml index 46010ab43b..ca7b7d47bb 100644 --- a/deploy/dockerephemeral/docker-compose.yaml +++ b/deploy/dockerephemeral/docker-compose.yaml @@ -422,6 +422,7 @@ services: pulsar: image: apachepulsar/pulsar:latest container_name: pulsar + # TODO: Change the mapping of port 8080. It has already been taken by nginz. ports: - 6650:6650 - 8080:8080 @@ -450,14 +451,14 @@ services: timeout: 5s retries: 100 - pulsar-manager-init: - container_name: pulsar-manager-init + pulsar-init: + container_name: pulsar-init image: curlimages/curl:latest networks: - demo_wire - command: create-superuser + command: init-pulsar volumes: - - ./pulsar-config/create-superuser.sh:/bin/create-superuser + - ./pulsar-config/init-pulsar.sh:/bin/init-pulsar depends_on: pulsar-manager: condition: service_healthy diff --git a/deploy/dockerephemeral/pulsar-config/create-superuser.sh b/deploy/dockerephemeral/pulsar-config/init-pulsar.sh similarity index 65% rename from deploy/dockerephemeral/pulsar-config/create-superuser.sh rename to deploy/dockerephemeral/pulsar-config/init-pulsar.sh index 73ebbccc39..44169d45c7 100755 --- a/deploy/dockerephemeral/pulsar-config/create-superuser.sh +++ b/deploy/dockerephemeral/pulsar-config/init-pulsar.sh @@ -9,3 +9,9 @@ curl -v -H "X-XSRF-TOKEN: $CSRF_TOKEN" \ -H "Content-Type: application/json" \ -X PUT http://pulsar-manager:7750/pulsar-manager/users/superuser \ -d '{"name": "pulsar", "password": "walter-frosch", "description": "test", "email": "username@test.org"}' + +curl -v -X PUT http://pulsar:8080/admin/v2/tenants/wire \ + -H "Content-Type: application/json" \ + -d '{"adminRoles": ["pulsar"], "allowedClusters": ["standalone"]}' + +curl -v -X PUT http://pulsar:8080/admin/v2/namespaces/wire/user-notifications From 830ee0e8fe17e0beffa75c40b8384e82688d7bed Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Tue, 18 Nov 2025 09:24:13 +0100 Subject: [PATCH 10/51] Improved logging on Pulsar notification push --- services/gundeck/src/Gundeck/Push.hs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 57982fc91b..582e87632a 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -142,16 +142,16 @@ publishToRabbitMq exchangeName routingKey qMsg = do publishToPulsar :: Text -> Q.Message -> Gundeck () publishToPulsar routingKey qMsg = do Pulsar.withClient Pulsar.defaultClientConfiguration "pulsar://localhost:6650" $ - Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName onPulsarError $ do + Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName logPulsarError $ do result <- runResourceT $ do (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ B.toStrict (Q.msgBody qMsg)} lift $ Pulsar.sendMessage message lift $ logPulsarResult result where - topicName = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ Text.unpack routingKey + topicName = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ Text.unpack routingKey - onPulsarError :: Pulsar.RawResult -> Gundeck () - onPulsarError result = + logPulsarError :: Pulsar.RawResult -> Gundeck () + logPulsarError result = case Pulsar.renderResult result of Just r -> Log.err $ Log.msg errorMsg . Log.field "error" (show r) Nothing -> Log.err $ Log.msg errorMsg . Log.field "error" (show (Pulsar.unRawResult result)) @@ -162,8 +162,11 @@ publishToPulsar routingKey qMsg = do logPulsarResult :: Pulsar.RawResult -> Gundeck () logPulsarResult result = case Pulsar.renderResult result of - Just r -> Log.err $ Log.msg errorMsg . Log.field "error" (show r) - Nothing -> Log.err $ Log.msg errorMsg . Log.field "error" (show (Pulsar.unRawResult result)) + Just r -> Log.err $ Log.msg resultMsg . Log.field "result" (show r) + Nothing -> Log.err $ Log.msg resultMsg . Log.field "result" (show (Pulsar.unRawResult result)) + + resultMsg :: String + resultMsg = "Result of sending Pulsar message." :: String -- | Another layer of wrap around 'runWithBudget'. runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a From f0dc1e3d3e1b9d36627ff8022220a12c478d9c40 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Tue, 18 Nov 2025 09:51:15 +0100 Subject: [PATCH 11/51] Configure message TTL Keep them around for a while. --- deploy/dockerephemeral/pulsar-config/init-pulsar.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/deploy/dockerephemeral/pulsar-config/init-pulsar.sh b/deploy/dockerephemeral/pulsar-config/init-pulsar.sh index 44169d45c7..5e39894f11 100755 --- a/deploy/dockerephemeral/pulsar-config/init-pulsar.sh +++ b/deploy/dockerephemeral/pulsar-config/init-pulsar.sh @@ -14,4 +14,7 @@ curl -v -X PUT http://pulsar:8080/admin/v2/tenants/wire \ -H "Content-Type: application/json" \ -d '{"adminRoles": ["pulsar"], "allowedClusters": ["standalone"]}' -curl -v -X PUT http://pulsar:8080/admin/v2/namespaces/wire/user-notifications +curl -v -X PUT \ + http://pulsar:8080/admin/v2/namespaces/wire/user-notifications \ + -H "Content-Type: application/json" \ + -d '{"message_ttl_in_seconds":3600}' From 69fb21daad97fd3f396d307371849c37a36843fd Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Tue, 18 Nov 2025 09:51:56 +0100 Subject: [PATCH 12/51] Add logger to C client This gives us more insights about what't happening. --- services/gundeck/src/Gundeck/Push.hs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 582e87632a..f66d7965e7 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -86,6 +86,7 @@ import Network.AMQP qualified as Q import Network.HTTP.Types import Network.Wai.Utilities import Pulsar.Client qualified as Pulsar +import System.Logger qualified as Logger import System.Logger.Class (msg, val, (+++), (.=), (~~)) import System.Logger.Class qualified as Log import UnliftIO (pooledMapConcurrentlyN) @@ -141,7 +142,8 @@ publishToRabbitMq exchangeName routingKey qMsg = do publishToPulsar :: Text -> Q.Message -> Gundeck () publishToPulsar routingKey qMsg = do - Pulsar.withClient Pulsar.defaultClientConfiguration "pulsar://localhost:6650" $ + logger <- view applog + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (internalLogger logger)}) "pulsar://localhost:6650" $ Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName logPulsarError $ do result <- runResourceT $ do (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ B.toStrict (Q.msgBody qMsg)} @@ -168,6 +170,12 @@ publishToPulsar routingKey qMsg = do resultMsg :: String resultMsg = "Result of sending Pulsar message." :: String + -- TODO: Far from perfect, ignores log level and uses no fields + internalLogger :: Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () + internalLogger logger level file line message = + Logger.debug logger $ + Log.msg ("[" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message) + -- | Another layer of wrap around 'runWithBudget'. runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a runWithBudget'' budget fallback action = do From c8236b4865741b08613fc9c730a4ec811a7b08e1 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 19 Nov 2025 09:41:16 +0100 Subject: [PATCH 13/51] WIP: Use Pulsar for notification websockets (Cannon) --- .../src/Wire/API/Event/WebSocketProtocol.hs | 8 +- nix/haskell-pins.nix | 4 +- services/cannon/cannon.cabal | 5 +- services/cannon/default.nix | 8 + services/cannon/src/Cannon/API/Public.hs | 4 +- ...tMqConsumerApp.hs => PulsarConsumerApp.hs} | 202 ++++++++++++++---- 6 files changed, 177 insertions(+), 54 deletions(-) rename services/cannon/src/Cannon/{RabbitMqConsumerApp.hs => PulsarConsumerApp.hs} (58%) diff --git a/libs/wire-api/src/Wire/API/Event/WebSocketProtocol.hs b/libs/wire-api/src/Wire/API/Event/WebSocketProtocol.hs index 493e029cb0..548584e34a 100644 --- a/libs/wire-api/src/Wire/API/Event/WebSocketProtocol.hs +++ b/libs/wire-api/src/Wire/API/Event/WebSocketProtocol.hs @@ -24,13 +24,13 @@ import Data.Aeson (FromJSON, ToJSON) import Data.Aeson qualified as A import Data.Aeson.Types qualified as A import Data.Schema -import Data.Word import Imports import Wire.API.Internal.Notification import Wire.Arbitrary data AckData = AckData - { deliveryTag :: Word64, + { deliveryTag :: String, -- TODO: Maybe, use Pulsar.Client.MessageId? + -- | Acknowledge all deliveryTags <= 'deliveryTag', see RabbitMQ -- documenation: -- https://www.rabbitmq.com/docs/confirms#consumer-acks-multiple-parameter @@ -49,7 +49,7 @@ instance ToSchema AckData where data EventData = EventData { event :: QueuedNotification, - deliveryTag :: Word64 + deliveryTag :: String -- TODO: Maybe, use Pulsar.Client.MessageId? } deriving (Show, Eq, Generic) deriving (Arbitrary) via (GenericUniform EventData) @@ -64,7 +64,7 @@ instance ToSchema EventData where data SynchronizationData = SynchronizationData { markerId :: Text, - deliveryTag :: Word64 + deliveryTag :: String -- TODO: Maybe, use Pulsar.Client.MessageId? } deriving (Show, Eq, Generic) deriving (Arbitrary) via (GenericUniform SynchronizationData) diff --git a/nix/haskell-pins.nix b/nix/haskell-pins.nix index c50f0e0bb7..9253b42c60 100644 --- a/nix/haskell-pins.nix +++ b/nix/haskell-pins.nix @@ -74,8 +74,8 @@ let pulsar-hs = { src = fetchgit { url = "https://github.com/wireapp/pulsar-hs"; - rev = "c5e8520b0c3efbd022659ceb642fb73e903bd933"; - hash = "sha256-0TNnhpM/PXCnZ8+Jw5CfDt8ZkeBrakCQ0DwyJQRRFrk="; + rev = "1473ac3d78760d20d29e29c31077aad62b37a25a"; + hash = "sha256-wqWSw9JerP5QyOJHf5AB5vbceUUp2RoRNmA3Qt9+/rI="; }; packages = { pulsar-client-hs = "pulsar-client-hs"; diff --git a/services/cannon/cannon.cabal b/services/cannon/cannon.cabal index c526d3cd9c..7b1a15ab08 100644 --- a/services/cannon/cannon.cabal +++ b/services/cannon/cannon.cabal @@ -23,8 +23,8 @@ library Cannon.App Cannon.Dict Cannon.Options + Cannon.PulsarConsumerApp Cannon.RabbitMq - Cannon.RabbitMqConsumerApp Cannon.Run Cannon.Types Cannon.WS @@ -85,6 +85,7 @@ library , api-field-json-th >=0.1.0.2 , async >=2.0 , base >=4.6 && <5 + , base64 , bilge >=0.12 , binary , bytestring >=0.10 @@ -108,6 +109,7 @@ library , metrics-wai >=0.4 , mwc-random >=0.13 , prometheus-client + , pulsar-client-hs , retry >=0.7 , safe-exceptions , servant-conduit @@ -119,6 +121,7 @@ library , types-common >=0.16 , unix , unliftio + , utf8-string , vector >=0.10 , wai >=3.0 , wai-extra >=3.0 diff --git a/services/cannon/default.nix b/services/cannon/default.nix index 0544d33564..a21a613452 100644 --- a/services/cannon/default.nix +++ b/services/cannon/default.nix @@ -8,6 +8,7 @@ , api-field-json-th , async , base +, base64 , bilge , binary , bytestring @@ -34,6 +35,8 @@ , metrics-wai , mwc-random , prometheus-client +, proto-lens +, pulsar-client-hs , QuickCheck , random , retry @@ -50,6 +53,7 @@ , types-common , unix , unliftio +, utf8-string , uuid , vector , wai @@ -72,6 +76,7 @@ mkDerivation { api-field-json-th async base + base64 bilge binary bytestring @@ -95,6 +100,8 @@ mkDerivation { metrics-wai mwc-random prometheus-client + proto-lens + pulsar-client-hs retry safe-exceptions servant-conduit @@ -106,6 +113,7 @@ mkDerivation { types-common unix unliftio + utf8-string vector wai wai-extra diff --git a/services/cannon/src/Cannon/API/Public.hs b/services/cannon/src/Cannon/API/Public.hs index 45a56c38b6..6fd3ab661d 100644 --- a/services/cannon/src/Cannon/API/Public.hs +++ b/services/cannon/src/Cannon/API/Public.hs @@ -21,7 +21,7 @@ module Cannon.API.Public where import Cannon.App (wsapp) -import Cannon.RabbitMqConsumerApp (rabbitMQWebSocketApp) +import Cannon.PulsarConsumerApp (pulsarWebSocketApp) import Cannon.Types import Cannon.WS import Control.Monad.IO.Class @@ -47,4 +47,4 @@ streamData userId connId clientId con = do consumeEvents :: UserId -> Maybe ClientId -> Maybe Text -> PendingConnection -> Cannon () consumeEvents userId mClientId mSyncMarker con = do e <- wsenv - liftIO $ rabbitMQWebSocketApp userId mClientId mSyncMarker e con + liftIO $ pulsarWebSocketApp userId mClientId mSyncMarker e con diff --git a/services/cannon/src/Cannon/RabbitMqConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs similarity index 58% rename from services/cannon/src/Cannon/RabbitMqConsumerApp.hs rename to services/cannon/src/Cannon/PulsarConsumerApp.hs index 16c3c0e53f..05bb289b03 100644 --- a/services/cannon/src/Cannon/RabbitMqConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -1,45 +1,37 @@ {-# LANGUAGE RecordWildCards #-} --- This file is part of the Wire Server implementation. --- --- Copyright (C) 2025 Wire Swiss GmbH --- --- This program is free software: you can redistribute it and/or modify it under --- the terms of the GNU Affero General Public License as published by the Free --- Software Foundation, either version 3 of the License, or (at your option) any --- later version. --- --- This program is distributed in the hope that it will be useful, but WITHOUT --- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS --- FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more --- details. --- --- You should have received a copy of the GNU Affero General Public License along --- with this program. If not, see . - -module Cannon.RabbitMqConsumerApp (rabbitMQWebSocketApp) where +module Cannon.PulsarConsumerApp (pulsarWebSocketApp) where import Cannon.App (rejectOnError) import Cannon.Options import Cannon.RabbitMq import Cannon.WS hiding (env) import Cassandra as C hiding (batch) +import Conduit (runResourceT) import Control.Concurrent.Async -import Control.Exception (Handler (..), bracket, catch, catches, handle, throwIO, try) +import Control.Exception (Handler (..), catches) +import Control.Exception.Base import Control.Lens hiding ((#)) import Control.Monad.Codensity import Data.Aeson hiding (Key) +import Data.Aeson qualified as A +import Data.Base64.Types +import Data.ByteString qualified as BS +import Data.ByteString.Base64 +import Data.ByteString.UTF8 qualified as BSUTF8 import Data.Id import Data.Text -import Data.Text qualified as Text +import Data.Text qualified as T import Data.Text.Lazy qualified as TL import Data.Text.Lazy.Encoding qualified as TLE +import Debug.Trace import Imports hiding (min, threadDelay) import Network.AMQP (newQueue) import Network.AMQP qualified as Q import Network.WebSockets import Network.WebSockets qualified as WS import Network.WebSockets.Connection +import Pulsar.Client qualified as Pulsar import System.Logger qualified as Log import System.Timeout import Wire.API.Event.WebSocketProtocol @@ -50,11 +42,76 @@ data InactivityTimeout = InactivityTimeout instance Exception InactivityTimeout -rabbitMQWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp -rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = +-- TODO: The name is a misleading. However, while developing, it's useful to keep the analogies with RabbitMQ. +data PulsarChannel = PulsarChannel + {msgVar :: MVar (Maybe (ByteString, ByteString))} + +data PulsarQueueInfo = PulsarQueueInfo + { queueNames :: [Text], + messageCount :: Int + } + deriving (Show) + +data PulsarMessage = PulsarMessage + { msgBody :: Text, + msgContentType :: String, + -- TODO: This could be a sum type + msgType :: Maybe String + } + deriving (Generic) + +instance FromJSON PulsarMessage + +instance ToJSON PulsarMessage + +createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) +createPulsarChannel uid mCid = do + msgVar :: MVar (Maybe (ByteString, ByteString)) <- lift newEmptyMVar + let queueNames = case mCid of + Nothing -> [userRoutingKey uid, temporaryRoutingKey uid] + Just cid -> pure $ clientNotificationQueueName uid cid + + liftIO $ for_ queueNames $ \qn -> + do + traceM $ "Connecting ..." + -- TODO: Add logger + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ do + -- TODO: Is the `default` namespace correct? Not `user-notifications`? + let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/default/" ++ unpack qn + -- Pulsar.Topic + -- { Pulsar.type' = Pulsar.Persistent, + -- Pulsar.tenant = "wire", + -- Pulsar.namespace = "default", + -- Pulsar.name = Pulsar.TopicName qn + -- } + -- subscription = Pulsar.Subscription Pulsar.Shared "cannon-websocket" + traceM $ "newConsumer " ++ show topic + -- Pulsar.Consumer {..} <- liftIO . handle (\(e :: SomeException) -> trace ("Caugth" ++ show e) (throw e)) $ Pulsar.newConsumer topic subscription + -- TODO: Add error logger + Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket" topic undefined $ do + traceM $ "Ready" + env :: Pulsar.Consumer <- ask + void . liftIO . async . forever . flip runReaderT env $ do + Pulsar.receiveMessage (onError) $ do + content <- Pulsar.messageContent + msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize + putMVar msgVar (Just (msgId, content)) + Pulsar.acknowledgeMessage + -- pMsg@(Pulsar.Message pMsgId _) <- fetch + -- putMVar msgVar (Just pMsg) + -- ack pMsgId + pure () + pure (PulsarChannel msgVar, PulsarQueueInfo queueNames undefined) + where + -- TODO: Log error + onError = undefined + +pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp +pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = handle handleTooManyChannels . lowerCodensity $ do - (chan, queueInfo) <- createChannel uid mcid e.pool createQueue + (chan, queueInfo) <- createPulsarChannel uid mcid + traceM $ "pulsarWebSocketApp " ++ show queueInfo conn <- Codensity $ bracket openWebSocket closeWebSocket activity <- liftIO newEmptyMVar let wsConn = @@ -77,8 +134,9 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = ] $ do traverse_ (sendFullSyncMessageIfNeeded wsConn uid e) mcid - traverse_ (Q.publishMsg chan.inner "" queueInfo.queueName . mkSynchronizationMessage e.notificationTTL) (mcid *> mSyncMarkerId) - sendNotifications chan wsConn + -- traverse_ (Q.publishMsg chan.inner "" queueInfo.queueName . mkSynchronizationMessage e.notificationTTL) (mcid *> mSyncMarkerId) + traverse_ (publishSyncMessage queueInfo.queueNames . mkSynchronizationMessage) mSyncMarkerId + sendNotifications chan queueInfo wsConn let monitor = do timeout wsConn.activityTimeout (takeMVar wsConn.activity) >>= \case @@ -93,6 +151,27 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = liftIO $ wait main where + publishSyncMessage :: [Text] -> ByteString -> IO () + publishSyncMessage queueNames message = for_ queueNames $ \qn -> + -- TODO: Set logger + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ + let topic = Pulsar.TopicName $ "persistent://wire/default" ++ unpack qn + in -- TODO: Set logging callback + Pulsar.withProducer Pulsar.defaultProducerConfiguration topic undefined $ do + -- Pulsar.runPulsar (Pulsar.connect Pulsar.defaultConnectData) $ do + -- let topic = + -- Pulsar.Topic + -- { Pulsar.type' = Pulsar.Persistent, + -- Pulsar.tenant = "wire", + -- Pulsar.namespace = "default", + -- Pulsar.name = Pulsar.TopicName qn + -- } + -- TODO: log result + result <- runResourceT $ do + (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} + lift $ Pulsar.sendMessage message' + pure () + logClient = Log.field "user" (idToText uid) . Log.field "client" (maybe "" clientToText mcid) @@ -106,19 +185,23 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = -- ignore any exceptions when sending the close message void . try @SomeException $ WS.sendClose wsConn ("" :: ByteString) - getEventData :: RabbitMqChannel -> IO (Either EventData SynchronizationData) + getMessagePulsar :: PulsarChannel -> IO (ByteString, ByteString) + getMessagePulsar chan = takeMVar chan.msgVar >>= maybe (throwIO ChannelClosed) pure + + getEventData :: PulsarChannel -> IO (Either EventData SynchronizationData) getEventData chan = do - (msg, envelope) <- getMessage chan - case msg.msgType of + (msgId, msg) <- getMessagePulsar chan + decMsg :: PulsarMessage <- either (\err -> logParseError err >> error "Unexpected parse error") pure $ A.eitherDecode (BS.fromStrict msg) + case decMsg.msgType of Just "synchronization" -> do let syncData = SynchronizationData - { markerId = TL.toStrict $ TLE.decodeUtf8 msg.msgBody, - deliveryTag = envelope.envDeliveryTag + { markerId = TL.toStrict $ TLE.decodeUtf8 (BS.fromStrict msg), + deliveryTag = encodeMsgId msgId } pure $ Right syncData _ -> do - case eitherDecode @QueuedNotification msg.msgBody of + case eitherDecode @QueuedNotification (BS.fromStrict msg) of Left err -> do logParseError err -- This message cannot be parsed, make sure it doesn't requeue. There @@ -129,7 +212,9 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = -- en masse, if at some point we decide that Events should not be -- pushed as JSONs, hopefully we think of the parsing side if/when -- that happens. - Q.rejectEnv envelope False + + -- TODO: We cannot reject, yet. This would require a change in Supernova. See https://pulsar.apache.org/docs/4.1.x/client-libraries-websocket/#negatively-acknowledge-messages + -- Q.rejectEnv envelope False -- try again getEventData chan Right notif -> do @@ -138,7 +223,7 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = Left $ EventData { event = notif, - deliveryTag = envelope.envDeliveryTag + deliveryTag = encodeMsgId msgId } handleWebSocketExceptions wsConn = @@ -218,17 +303,18 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = (queueName, messageCount, _) <- Q.declareQueue chan $ queueOpts (clientNotificationQueueName uid cid) k $ QueueInfo queueName messageCount - mkSynchronizationMessage ttl markerId = - Q.newMsg - { Q.msgBody = TLE.encodeUtf8 (TL.fromStrict markerId), - Q.msgContentType = Just "text/plain; charset=utf-8", - Q.msgDeliveryMode = Just Q.Persistent, - Q.msgExpiration = Just (Text.pack $ show ttl), - Q.msgType = Just "synchronization" - } - - sendNotifications :: RabbitMqChannel -> WSConnection -> IO () - sendNotifications chan wsConn = do + mkSynchronizationMessage :: StrictText -> ByteString + mkSynchronizationMessage markerId = + -- TODO: Check all fromStrict/toStrict calls: It makes not sense to be "sometimes lazy". + BS.toStrict . encode $ + PulsarMessage + { msgBody = markerId, + msgContentType = "text/plain; charset=utf-8", + msgType = Just "synchronization" + } + + sendNotifications :: PulsarChannel -> PulsarQueueInfo -> WSConnection -> IO () + sendNotifications chan queueInfo wsConn = do let consumeRabbitMq = forever $ do eventData <- getEventData chan let msg = case eventData of @@ -246,13 +332,39 @@ rabbitMQWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckFullSync -> throwIO UnexpectedAck AckMessage ackData -> do logAckReceived ackData - void $ ackMessage chan ackData.deliveryTag ackData.multiple + -- TODO: ACKing for all queues seems to be a bit too rough... + -- TODO: Define logger + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ + for_ queueInfo.queueNames $ \qn -> do + let topic = Pulsar.TopicName $ "persistent://wire/default" ++ unpack qn + Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket-ack" (Pulsar.Topic topic) undefined $ do + consumer <- ask + Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ do + Pulsar.acknowledgeMessageId + -- let topic = + -- Pulsar.Topic + -- { Pulsar.type' = Pulsar.Persistent, + -- Pulsar.tenant = "wire", + -- Pulsar.namespace = "default", + -- Pulsar.name = Pulsar.TopicName qn + -- } + -- subscription = Pulsar.Subscription Pulsar.Shared "cannon-websocket-ack" + -- Pulsar.Consumer {..} <- Pulsar.newConsumer topic subscription + -- ack (decodeMsgId ackData.deliveryTag) + + -- void $ ackMessage chan ackData.deliveryTag ackData.multiple -- run both loops concurrently, so that -- - notifications are delivered without having to wait for acks -- - exceptions on either side do not cause a deadlock concurrently_ consumeRabbitMq consumeWebsocket + decodeMsgId :: String -> ByteString + decodeMsgId = either (error . unpack) id . decodeBase64Untyped . BSUTF8.fromString + + encodeMsgId :: ByteString -> String + encodeMsgId = T.unpack . extractBase64 . encodeBase64 + logParseError :: String -> IO () logParseError err = Log.err e.logg $ From d10d176cea9e4ee35ab47e71913909f13f5074ae Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 19 Nov 2025 12:22:25 +0100 Subject: [PATCH 14/51] Improve "logging" --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 73 ++++++++++--------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 05bb289b03..ab3e97fec9 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -26,8 +26,6 @@ import Data.Text.Lazy qualified as TL import Data.Text.Lazy.Encoding qualified as TLE import Debug.Trace import Imports hiding (min, threadDelay) -import Network.AMQP (newQueue) -import Network.AMQP qualified as Q import Network.WebSockets import Network.WebSockets qualified as WS import Network.WebSockets.Connection @@ -74,10 +72,9 @@ createPulsarChannel uid mCid = do liftIO $ for_ queueNames $ \qn -> do traceM $ "Connecting ..." - -- TODO: Add logger - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ do + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do -- TODO: Is the `default` namespace correct? Not `user-notifications`? - let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/default/" ++ unpack qn + let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn -- Pulsar.Topic -- { Pulsar.type' = Pulsar.Persistent, -- Pulsar.tenant = "wire", @@ -87,24 +84,42 @@ createPulsarChannel uid mCid = do -- subscription = Pulsar.Subscription Pulsar.Shared "cannon-websocket" traceM $ "newConsumer " ++ show topic -- Pulsar.Consumer {..} <- liftIO . handle (\(e :: SomeException) -> trace ("Caugth" ++ show e) (throw e)) $ Pulsar.newConsumer topic subscription - -- TODO: Add error logger - Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket" topic undefined $ do + Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket" topic (onPulsarError "createPulsarChannel consumer") $ do traceM $ "Ready" env :: Pulsar.Consumer <- ask void . liftIO . async . forever . flip runReaderT env $ do - Pulsar.receiveMessage (onError) $ do + Pulsar.receiveMessage (onPulsarError "receiveMessage") $ do content <- Pulsar.messageContent msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize putMVar msgVar (Just (msgId, content)) - Pulsar.acknowledgeMessage + void $ logPulsarResult "createPulsarChannel" <$> Pulsar.acknowledgeMessage -- pMsg@(Pulsar.Message pMsgId _) <- fetch -- putMVar msgVar (Just pMsg) -- ack pMsgId pure () pure (PulsarChannel msgVar, PulsarQueueInfo queueNames undefined) - where - -- TODO: Log error - onError = undefined + +-- TODO: Replace Debug.Trace with regular logging +onPulsarError :: String -> Pulsar.RawResult -> IO () +onPulsarError provenance result = + traceM $ + provenance ++ case Pulsar.renderResult result of + Just r -> " error: " ++ (show r) + Nothing -> " error: " ++ (show (Pulsar.unRawResult result)) + +-- TODO: Replace Debug.Trace with regular logging +pulsarClientLogger :: String -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () +pulsarClientLogger provenance level file line message = traceM $ provenance ++ " [" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message + +-- TODO: Replace Debug.Trace with regular logging +logPulsarResult :: String -> Pulsar.RawResult -> Pulsar.RawResult +logPulsarResult provenance result = + trace + ( provenance ++ case Pulsar.renderResult result of + Just r -> " result: " ++ (show r) + Nothing -> " result: " ++ (show (Pulsar.unRawResult result)) + ) + result pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = @@ -153,11 +168,10 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = where publishSyncMessage :: [Text] -> ByteString -> IO () publishSyncMessage queueNames message = for_ queueNames $ \qn -> - -- TODO: Set logger - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ - let topic = Pulsar.TopicName $ "persistent://wire/default" ++ unpack qn + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) "pulsar://localhost:6650" $ + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ unpack qn in -- TODO: Set logging callback - Pulsar.withProducer Pulsar.defaultProducerConfiguration topic undefined $ do + Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do -- Pulsar.runPulsar (Pulsar.connect Pulsar.defaultConnectData) $ do -- let topic = -- Pulsar.Topic @@ -170,6 +184,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = result <- runResourceT $ do (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} lift $ Pulsar.sendMessage message' + void . pure $ logPulsarResult "consumeWebsocket" result pure () logClient = @@ -288,21 +303,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = rejectBody = "" } - createQueue chan = case mcid of - Nothing -> Codensity $ \k -> do - (queueName, messageCount, _) <- - Q.declareQueue chan $ - newQueue - { Q.queueExclusive = True, - Q.queueAutoDelete = True - } - for_ [userRoutingKey uid, temporaryRoutingKey uid] $ - Q.bindQueue chan queueName userNotificationExchangeName - k $ QueueInfo {queueName = queueName, messageCount = messageCount} - Just cid -> Codensity $ \k -> do - (queueName, messageCount, _) <- Q.declareQueue chan $ queueOpts (clientNotificationQueueName uid cid) - k $ QueueInfo queueName messageCount - mkSynchronizationMessage :: StrictText -> ByteString mkSynchronizationMessage markerId = -- TODO: Check all fromStrict/toStrict calls: It makes not sense to be "sometimes lazy". @@ -334,13 +334,14 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = logAckReceived ackData -- TODO: ACKing for all queues seems to be a bit too rough... -- TODO: Define logger - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Nothing}) "pulsar://localhost:6650" $ + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ for_ queueInfo.queueNames $ \qn -> do - let topic = Pulsar.TopicName $ "persistent://wire/default" ++ unpack qn - Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket-ack" (Pulsar.Topic topic) undefined $ do + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ unpack qn + Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket-ack" (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do consumer <- ask - Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ do - Pulsar.acknowledgeMessageId + Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ + void $ + logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId -- let topic = -- Pulsar.Topic -- { Pulsar.type' = Pulsar.Persistent, From ab142c86f2d374d69138aea817291c06a5da076e Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 19 Nov 2025 15:40:08 +0100 Subject: [PATCH 15/51] Send PulsarMessage(s) via Pulsar Such that they can be decoded on the other end. --- libs/wire-api/src/Wire/API/Notification.hs | 13 ++++ .../cannon/src/Cannon/PulsarConsumerApp.hs | 77 ++++++++----------- services/gundeck/src/Gundeck/Push.hs | 14 +++- 3 files changed, 59 insertions(+), 45 deletions(-) diff --git a/libs/wire-api/src/Wire/API/Notification.hs b/libs/wire-api/src/Wire/API/Notification.hs index d3b5a40511..c3e4b34e8d 100644 --- a/libs/wire-api/src/Wire/API/Notification.hs +++ b/libs/wire-api/src/Wire/API/Notification.hs @@ -45,6 +45,7 @@ module Wire.API.Notification temporaryRoutingKey, clientRoutingKey, queueOpts, + PulsarMessage (..), ) where @@ -255,3 +256,15 @@ queueOpts qName = ) ] } + +data PulsarMessage = PulsarMessage + { msgBody :: Text, + msgContentType :: String, + -- TODO: This could be a sum type + msgType :: Maybe String + } + deriving (Generic) + +instance FromJSON PulsarMessage + +instance ToJSON PulsarMessage diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index ab3e97fec9..eaaf68ea5e 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -8,6 +8,7 @@ import Cannon.RabbitMq import Cannon.WS hiding (env) import Cassandra as C hiding (batch) import Conduit (runResourceT) +import Control.Concurrent (threadDelay) import Control.Concurrent.Async import Control.Exception (Handler (..), catches) import Control.Exception.Base @@ -22,6 +23,7 @@ import Data.ByteString.UTF8 qualified as BSUTF8 import Data.Id import Data.Text import Data.Text qualified as T +import Data.Text.Encoding qualified as TE import Data.Text.Lazy qualified as TL import Data.Text.Lazy.Encoding qualified as TLE import Debug.Trace @@ -45,59 +47,46 @@ data PulsarChannel = PulsarChannel {msgVar :: MVar (Maybe (ByteString, ByteString))} data PulsarQueueInfo = PulsarQueueInfo - { queueNames :: [Text], - messageCount :: Int - } + {queueNames :: [Text]} deriving (Show) -data PulsarMessage = PulsarMessage - { msgBody :: Text, - msgContentType :: String, - -- TODO: This could be a sum type - msgType :: Maybe String - } - deriving (Generic) - -instance FromJSON PulsarMessage - -instance ToJSON PulsarMessage - createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) createPulsarChannel uid mCid = do msgVar :: MVar (Maybe (ByteString, ByteString)) <- lift newEmptyMVar let queueNames = case mCid of Nothing -> [userRoutingKey uid, temporaryRoutingKey uid] - Just cid -> pure $ clientNotificationQueueName uid cid - + -- TODO: Decide how to deal with clients: Via multiple-subscriptions, maybe? + Just _cid -> [userRoutingKey uid] -- pure $ clientNotificationQueueName uid cid liftIO $ for_ queueNames $ \qn -> do traceM $ "Connecting ..." - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do + void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do -- TODO: Is the `default` namespace correct? Not `user-notifications`? let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn - -- Pulsar.Topic - -- { Pulsar.type' = Pulsar.Persistent, - -- Pulsar.tenant = "wire", - -- Pulsar.namespace = "default", - -- Pulsar.name = Pulsar.TopicName qn - -- } - -- subscription = Pulsar.Subscription Pulsar.Shared "cannon-websocket" traceM $ "newConsumer " ++ show topic -- Pulsar.Consumer {..} <- liftIO . handle (\(e :: SomeException) -> trace ("Caugth" ++ show e) (throw e)) $ Pulsar.newConsumer topic subscription - Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket" topic (onPulsarError "createPulsarChannel consumer") $ do - traceM $ "Ready" - env :: Pulsar.Consumer <- ask - void . liftIO . async . forever . flip runReaderT env $ do - Pulsar.receiveMessage (onPulsarError "receiveMessage") $ do - content <- Pulsar.messageContent - msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize - putMVar msgVar (Just (msgId, content)) - void $ logPulsarResult "createPulsarChannel" <$> Pulsar.acknowledgeMessage - -- pMsg@(Pulsar.Message pMsgId _) <- fetch - -- putMVar msgVar (Just pMsg) - -- ack pMsgId + Pulsar.withConsumer + ( Pulsar.defaultConsumerConfiguration + { Pulsar.consumerType = Just Pulsar.ConsumerShared, + Pulsar.consumerSubscriptionInitialPosition = Just Pulsar.Earliest + } + ) + ("cannon-websocket-" ++ unpack qn) + topic + (onPulsarError "createPulsarChannel consumer") + $ do + traceM $ "Ready" + void . forever $ do + Pulsar.receiveMessage (onPulsarError "receiveMessage") $ do + content <- Pulsar.messageContent + traceM $ "XXX - received message with content " ++ BSUTF8.toString content + msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize + putMVar msgVar (Just (msgId, content)) + void $ logPulsarResult "createPulsarChannel" <$> Pulsar.acknowledgeMessage pure () - pure (PulsarChannel msgVar, PulsarQueueInfo queueNames undefined) + liftIO $ threadDelay 1_000_000 + traceM "createPulsarChannel: Done" + pure (PulsarChannel msgVar, PulsarQueueInfo queueNames) -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () @@ -126,7 +115,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = handle handleTooManyChannels . lowerCodensity $ do (chan, queueInfo) <- createPulsarChannel uid mcid - traceM $ "pulsarWebSocketApp " ++ show queueInfo + traceM $ "XXX pulsarWebSocketApp " ++ show queueInfo conn <- Codensity $ bracket openWebSocket closeWebSocket activity <- liftIO newEmptyMVar let wsConn = @@ -169,7 +158,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = publishSyncMessage :: [Text] -> ByteString -> IO () publishSyncMessage queueNames message = for_ queueNames $ \qn -> Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) "pulsar://localhost:6650" $ - let topic = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ unpack qn + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn in -- TODO: Set logging callback Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do -- Pulsar.runPulsar (Pulsar.connect Pulsar.defaultConnectData) $ do @@ -216,7 +205,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = } pure $ Right syncData _ -> do - case eitherDecode @QueuedNotification (BS.fromStrict msg) of + case eitherDecode @QueuedNotification ((BS.fromStrict . TE.encodeUtf8) decMsg.msgBody) of Left err -> do logParseError err -- This message cannot be parsed, make sure it doesn't requeue. There @@ -336,8 +325,8 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = -- TODO: Define logger Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ for_ queueInfo.queueNames $ \qn -> do - let topic = Pulsar.TopicName $ "persistent://wire/user-notifications" ++ unpack qn - Pulsar.withConsumer Pulsar.defaultConsumerConfiguration "cannon-websocket-ack" (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn + Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-ack-" ++ unpack qn) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do consumer <- ask Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ void $ @@ -361,7 +350,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = concurrently_ consumeRabbitMq consumeWebsocket decodeMsgId :: String -> ByteString - decodeMsgId = either (error . unpack) id . decodeBase64Untyped . BSUTF8.fromString + decodeMsgId = either (error . ("decodeMsgId: " ++) . unpack) id . decodeBase64Untyped . BSUTF8.fromString encodeMsgId :: ByteString -> String encodeMsgId = T.unpack . extractBase64 . encodeBase64 diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index f66d7965e7..5df34f423d 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -53,8 +53,10 @@ import Control.Error import Control.Lens (to, view, (.~), (^.)) import Control.Monad.Catch import Control.Monad.Except (throwError) +import Data.Aeson qualified as A import Data.Aeson qualified as Aeson import Data.ByteString qualified as B +import Data.ByteString qualified as BS import Data.ByteString.Conversion (toByteString') import Data.Id import Data.List.Extra qualified as List @@ -63,6 +65,7 @@ import Data.Map qualified as Map import Data.Misc import Data.Set qualified as Set import Data.Text qualified as Text +import Data.Text.Encoding qualified as TE import Data.These import Data.Timeout import Data.UUID qualified as UUID @@ -146,7 +149,7 @@ publishToPulsar routingKey qMsg = do Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (internalLogger logger)}) "pulsar://localhost:6650" $ Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName logPulsarError $ do result <- runResourceT $ do - (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ B.toStrict (Q.msgBody qMsg)} + (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ BS.toStrict (A.encode pulsarMessage)} lift $ Pulsar.sendMessage message lift $ logPulsarResult result where @@ -176,6 +179,15 @@ publishToPulsar routingKey qMsg = do Logger.debug logger $ Log.msg ("[" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message) + pulsarMessage :: PulsarMessage + pulsarMessage = + PulsarMessage + { msgBody = TE.decodeUtf8 . B.toStrict $ (Q.msgBody qMsg), + msgContentType = "application/json", + -- TODO: This could be a sum type + msgType = Nothing + } + -- | Another layer of wrap around 'runWithBudget'. runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a runWithBudget'' budget fallback action = do From 0322be48bca543a9f98c8a5ad5579a532487a27f Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 19 Nov 2025 16:08:08 +0100 Subject: [PATCH 16/51] Do not push to RabbitMq This ist superfluous and would just make the rabbit explode. --- services/gundeck/src/Gundeck/Push.hs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 5df34f423d..d21ec08b1d 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -353,8 +353,6 @@ pushNativeWithBudget notif psh dontPush = do pushAllViaMessageBroker :: (MonadPushAll m, MonadMapAsync m, MonadNativeTargets m) => [NewNotification] -> UserClientsFull -> m () pushAllViaMessageBroker newNotifs userClientsFull = do - -- TODO: Stop pushing to RabbitMQ to prevent the rabbit from slowing us down. - for_ newNotifs $ pushViaRabbitMq for_ newNotifs $ pushViaPulsar mpaForkIO $ do for_ newNotifs $ \newNotif -> do @@ -362,20 +360,6 @@ pushAllViaMessageBroker newNotifs userClientsFull = do cassandraClientIds = Map.foldMapWithKey (\uid clients -> Set.map (\c -> (uid, c.clientId)) clients) cassandraClients pushNativeWithBudget newNotif.nnNotification newNotif.nnPush (Set.toList $ cassandraClientIds) -pushViaRabbitMq :: (MonadPushAll m) => NewNotification -> m () -pushViaRabbitMq newNotif = do - qMsg <- mkMessage newNotif.nnNotification - let routingKeys = - Set.unions $ - flip Set.map (Set.fromList . toList $ newNotif.nnRecipients) \r -> - case r._recipientClients of - RecipientClientsAll -> - Set.singleton $ userRoutingKey r._recipientId - RecipientClientsSome (toList -> cs) -> - Set.fromList $ map (clientRoutingKey r._recipientId) cs - for_ routingKeys $ \routingKey -> - mpaPublishToRabbitMq userNotificationExchangeName routingKey qMsg - pushViaPulsar :: (MonadPushAll m) => NewNotification -> m () pushViaPulsar newNotif = do qMsg <- mkMessage newNotif.nnNotification From bd4580877bfc996f686b0ae21b7e3e1b503c87cd Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 19 Nov 2025 17:38:01 +0100 Subject: [PATCH 17/51] Remove clutter --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index eaaf68ea5e..e9d0174b42 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -64,7 +64,6 @@ createPulsarChannel uid mCid = do -- TODO: Is the `default` namespace correct? Not `user-notifications`? let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn traceM $ "newConsumer " ++ show topic - -- Pulsar.Consumer {..} <- liftIO . handle (\(e :: SomeException) -> trace ("Caugth" ++ show e) (throw e)) $ Pulsar.newConsumer topic subscription Pulsar.withConsumer ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerShared, @@ -331,18 +330,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ void $ logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId - -- let topic = - -- Pulsar.Topic - -- { Pulsar.type' = Pulsar.Persistent, - -- Pulsar.tenant = "wire", - -- Pulsar.namespace = "default", - -- Pulsar.name = Pulsar.TopicName qn - -- } - -- subscription = Pulsar.Subscription Pulsar.Shared "cannon-websocket-ack" - -- Pulsar.Consumer {..} <- Pulsar.newConsumer topic subscription - -- ack (decodeMsgId ackData.deliveryTag) - - -- void $ ackMessage chan ackData.deliveryTag ackData.multiple -- run both loops concurrently, so that -- - notifications are delivered without having to wait for acks From 7e428f52875023523eaa1d92aff6398d9a84caeb Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 20 Nov 2025 13:49:40 +0100 Subject: [PATCH 18/51] Remove clutter --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index e9d0174b42..937f070c2e 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -61,7 +61,6 @@ createPulsarChannel uid mCid = do do traceM $ "Connecting ..." void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do - -- TODO: Is the `default` namespace correct? Not `user-notifications`? let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn traceM $ "newConsumer " ++ show topic Pulsar.withConsumer @@ -137,7 +136,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = ] $ do traverse_ (sendFullSyncMessageIfNeeded wsConn uid e) mcid - -- traverse_ (Q.publishMsg chan.inner "" queueInfo.queueName . mkSynchronizationMessage e.notificationTTL) (mcid *> mSyncMarkerId) traverse_ (publishSyncMessage queueInfo.queueNames . mkSynchronizationMessage) mSyncMarkerId sendNotifications chan queueInfo wsConn @@ -160,15 +158,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn in -- TODO: Set logging callback Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do - -- Pulsar.runPulsar (Pulsar.connect Pulsar.defaultConnectData) $ do - -- let topic = - -- Pulsar.Topic - -- { Pulsar.type' = Pulsar.Persistent, - -- Pulsar.tenant = "wire", - -- Pulsar.namespace = "default", - -- Pulsar.name = Pulsar.TopicName qn - -- } - -- TODO: log result result <- runResourceT $ do (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} lift $ Pulsar.sendMessage message' @@ -321,7 +310,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckMessage ackData -> do logAckReceived ackData -- TODO: ACKing for all queues seems to be a bit too rough... - -- TODO: Define logger Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ for_ queueInfo.queueNames $ \qn -> do let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn From 4c02e980744c1a04387b1b8bc3045a453f760381 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 20 Nov 2025 13:50:00 +0100 Subject: [PATCH 19/51] Fix subscriptions and topics --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 937f070c2e..12c8560451 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -47,29 +47,31 @@ data PulsarChannel = PulsarChannel {msgVar :: MVar (Maybe (ByteString, ByteString))} data PulsarQueueInfo = PulsarQueueInfo - {queueNames :: [Text]} + {subscription :: Text} deriving (Show) createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) createPulsarChannel uid mCid = do msgVar :: MVar (Maybe (ByteString, ByteString)) <- lift newEmptyMVar - let queueNames = case mCid of - Nothing -> [userRoutingKey uid, temporaryRoutingKey uid] - -- TODO: Decide how to deal with clients: Via multiple-subscriptions, maybe? - Just _cid -> [userRoutingKey uid] -- pure $ clientNotificationQueueName uid cid - liftIO $ for_ queueNames $ \qn -> + let subscription = case mCid of + Nothing -> temporaryRoutingKey uid + Just cid -> clientNotificationQueueName uid cid + subscriptionType = case mCid of + Nothing -> Pulsar.Latest + Just _cid -> Pulsar.Earliest + liftIO $ do traceM $ "Connecting ..." void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do - let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn + let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) traceM $ "newConsumer " ++ show topic Pulsar.withConsumer ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerShared, - Pulsar.consumerSubscriptionInitialPosition = Just Pulsar.Earliest + Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType } ) - ("cannon-websocket-" ++ unpack qn) + ("cannon-websocket-" ++ unpack subscription) topic (onPulsarError "createPulsarChannel consumer") $ do @@ -84,7 +86,7 @@ createPulsarChannel uid mCid = do pure () liftIO $ threadDelay 1_000_000 traceM "createPulsarChannel: Done" - pure (PulsarChannel msgVar, PulsarQueueInfo queueNames) + pure $ (PulsarChannel msgVar, PulsarQueueInfo subscription) -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () @@ -136,7 +138,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = ] $ do traverse_ (sendFullSyncMessageIfNeeded wsConn uid e) mcid - traverse_ (publishSyncMessage queueInfo.queueNames . mkSynchronizationMessage) mSyncMarkerId + traverse_ (publishSyncMessage uid . mkSynchronizationMessage) mSyncMarkerId sendNotifications chan queueInfo wsConn let monitor = do @@ -152,17 +154,16 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = liftIO $ wait main where - publishSyncMessage :: [Text] -> ByteString -> IO () - publishSyncMessage queueNames message = for_ queueNames $ \qn -> - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) "pulsar://localhost:6650" $ - let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn - in -- TODO: Set logging callback - Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do - result <- runResourceT $ do - (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} - lift $ Pulsar.sendMessage message' - void . pure $ logPulsarResult "consumeWebsocket" result - pure () + publishSyncMessage :: UserId -> ByteString -> IO () + publishSyncMessage userId message = + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) "pulsar://localhost:6650" $ do + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey userId) + Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do + result <- runResourceT $ do + (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} + lift $ Pulsar.sendMessage message' + void . pure $ logPulsarResult "consumeWebsocket" result + pure () logClient = Log.field "user" (idToText uid) @@ -310,14 +311,13 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckMessage ackData -> do logAckReceived ackData -- TODO: ACKing for all queues seems to be a bit too rough... - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ - for_ queueInfo.queueNames $ \qn -> do - let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack qn - Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-ack-" ++ unpack qn) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do - consumer <- ask - Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ - void $ - logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ do + let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) + Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-ack-" ++ unpack queueInfo.subscription) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do + consumer <- ask + Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ + void $ + logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId -- run both loops concurrently, so that -- - notifications are delivered without having to wait for acks From c0a00c4ed9c3404746b3b4ca3956025f63e6d0bf Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 20 Nov 2025 15:23:51 +0100 Subject: [PATCH 20/51] Fix SynchronizationData creation --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 12c8560451..4e464c995e 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -24,8 +24,6 @@ import Data.Id import Data.Text import Data.Text qualified as T import Data.Text.Encoding qualified as TE -import Data.Text.Lazy qualified as TL -import Data.Text.Lazy.Encoding qualified as TLE import Debug.Trace import Imports hiding (min, threadDelay) import Network.WebSockets @@ -189,7 +187,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = Just "synchronization" -> do let syncData = SynchronizationData - { markerId = TL.toStrict $ TLE.decodeUtf8 (BS.fromStrict msg), + { markerId = decMsg.msgBody, deliveryTag = encodeMsgId msgId } pure $ Right syncData From 5282cc032e29f3ec52ae1cbef9a7904d81e1d7eb Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 20 Nov 2025 15:24:20 +0100 Subject: [PATCH 21/51] Add TODO --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 4e464c995e..e1431935e2 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -128,7 +128,8 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = $ withAsync $ flip catches - [ handleClientMisbehaving conn, + [ -- TODO: Review exceptions. pulsar-hs and amqp exceptions surely differ. + handleClientMisbehaving conn, handleWebSocketExceptions conn, handleRabbitMqChannelException conn, handleInactivity conn, From 937bc6d000a961d4696f42dc705131fee13def0f Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 20 Nov 2025 15:25:05 +0100 Subject: [PATCH 22/51] Remove obsolete TODO --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index e1431935e2..112e243573 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -309,7 +309,6 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckFullSync -> throwIO UnexpectedAck AckMessage ackData -> do logAckReceived ackData - -- TODO: ACKing for all queues seems to be a bit too rough... Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ do let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-ack-" ++ unpack queueInfo.subscription) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do From 95ffc95c3985fa9a1ef49467aabb48adf6b0c513 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 21 Nov 2025 12:33:33 +0100 Subject: [PATCH 23/51] Close consumer when websocket closes Also, do not acknowledge messages outside of the originally receiving consumer context. --- libs/wire-api/src/Wire/API/Notification.hs | 2 +- .../cannon/src/Cannon/PulsarConsumerApp.hs | 117 ++++++++++++------ services/gundeck/src/Gundeck/API/Internal.hs | 4 +- services/gundeck/src/Gundeck/Client.hs | 55 ++++++-- services/gundeck/src/Gundeck/Push.hs | 5 +- 5 files changed, 129 insertions(+), 54 deletions(-) diff --git a/libs/wire-api/src/Wire/API/Notification.hs b/libs/wire-api/src/Wire/API/Notification.hs index c3e4b34e8d..fd8852ee29 100644 --- a/libs/wire-api/src/Wire/API/Notification.hs +++ b/libs/wire-api/src/Wire/API/Notification.hs @@ -263,7 +263,7 @@ data PulsarMessage = PulsarMessage -- TODO: This could be a sum type msgType :: Maybe String } - deriving (Generic) + deriving (Generic, Show) instance FromJSON PulsarMessage diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 112e243573..ff107666ad 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -10,6 +10,7 @@ import Cassandra as C hiding (batch) import Conduit (runResourceT) import Control.Concurrent (threadDelay) import Control.Concurrent.Async +import Control.Concurrent.Chan import Control.Exception (Handler (..), catches) import Control.Exception.Base import Control.Lens hiding ((#)) @@ -19,6 +20,7 @@ import Data.Aeson qualified as A import Data.Base64.Types import Data.ByteString qualified as BS import Data.ByteString.Base64 +import Data.ByteString.UTF8 import Data.ByteString.UTF8 qualified as BSUTF8 import Data.Id import Data.Text @@ -32,6 +34,7 @@ import Network.WebSockets.Connection import Pulsar.Client qualified as Pulsar import System.Logger qualified as Log import System.Timeout +import UnliftIO qualified import Wire.API.Event.WebSocketProtocol import Wire.API.Notification @@ -42,7 +45,10 @@ instance Exception InactivityTimeout -- TODO: The name is a misleading. However, while developing, it's useful to keep the analogies with RabbitMQ. data PulsarChannel = PulsarChannel - {msgVar :: MVar (Maybe (ByteString, ByteString))} + { -- TODO: Rename: msgChannel + msgVar :: Chan (ByteString, ByteString), + closeSignal :: MVar () + } data PulsarQueueInfo = PulsarQueueInfo {subscription :: Text} @@ -50,7 +56,8 @@ data PulsarQueueInfo = PulsarQueueInfo createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) createPulsarChannel uid mCid = do - msgVar :: MVar (Maybe (ByteString, ByteString)) <- lift newEmptyMVar + msgChannel :: Chan (ByteString, ByteString) <- lift newChan + closeSignal :: MVar () <- lift $ newEmptyMVar let subscription = case mCid of Nothing -> temporaryRoutingKey uid Just cid -> clientNotificationQueueName uid cid @@ -74,17 +81,24 @@ createPulsarChannel uid mCid = do (onPulsarError "createPulsarChannel consumer") $ do traceM $ "Ready" - void . forever $ do - Pulsar.receiveMessage (onPulsarError "receiveMessage") $ do - content <- Pulsar.messageContent - traceM $ "XXX - received message with content " ++ BSUTF8.toString content - msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize - putMVar msgVar (Just (msgId, content)) - void $ logPulsarResult "createPulsarChannel" <$> Pulsar.acknowledgeMessage + UnliftIO.race_ (receiveMsgs msgChannel) (blockOnCloseSignal closeSignal) pure () liftIO $ threadDelay 1_000_000 traceM "createPulsarChannel: Done" - pure $ (PulsarChannel msgVar, PulsarQueueInfo subscription) + pure $ (PulsarChannel msgChannel closeSignal, PulsarQueueInfo subscription) + where + receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> ReaderT Pulsar.Consumer m () + receiveMsgs msgChannel = forever $ do + Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage") $ do + content <- Pulsar.messageContent + traceM $ "XXX - received message with content " ++ BSUTF8.toString content + msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize + liftIO $ writeChan msgChannel (msgId, content) + traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content + void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage + + blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m () + blockOnCloseSignal = readMVar -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () @@ -152,6 +166,8 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = _ <- Codensity $ withAsync monitor liftIO $ wait main + -- TODO: This probably needs more exception handling... However, I'd like to see the principle working first. + putMVar chan.closeSignal () where publishSyncMessage :: UserId -> ByteString -> IO () publishSyncMessage userId message = @@ -177,12 +193,11 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = -- ignore any exceptions when sending the close message void . try @SomeException $ WS.sendClose wsConn ("" :: ByteString) - getMessagePulsar :: PulsarChannel -> IO (ByteString, ByteString) - getMessagePulsar chan = takeMVar chan.msgVar >>= maybe (throwIO ChannelClosed) pure - getEventData :: PulsarChannel -> IO (Either EventData SynchronizationData) getEventData chan = do - (msgId, msg) <- getMessagePulsar chan + traceM $ "getEventData called" + (msgId, msg) <- readChan chan.msgVar + traceM $ "getEventData received message" <> show (toString msg) decMsg :: PulsarMessage <- either (\err -> logParseError err >> error "Unexpected parse error") pure $ A.eitherDecode (BS.fromStrict msg) case decMsg.msgType of Just "synchronization" -> do @@ -194,21 +209,45 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = pure $ Right syncData _ -> do case eitherDecode @QueuedNotification ((BS.fromStrict . TE.encodeUtf8) decMsg.msgBody) of - Left err -> do - logParseError err - -- This message cannot be parsed, make sure it doesn't requeue. There - -- is no need to throw an error and kill the websocket as this is - -- probably caused by a bug or someone messing with RabbitMQ. - -- - -- The bug case is slightly dangerous as it could drop a lot of events - -- en masse, if at some point we decide that Events should not be - -- pushed as JSONs, hopefully we think of the parsing side if/when - -- that happens. - - -- TODO: We cannot reject, yet. This would require a change in Supernova. See https://pulsar.apache.org/docs/4.1.x/client-libraries-websocket/#negatively-acknowledge-messages - -- Q.rejectEnv envelope False - -- try again - getEventData chan + Left err -> + do + logParseError err + -- This message cannot be parsed, make sure it doesn't requeue. There + -- is no need to throw an error and kill the websocket as this is + -- probably caused by a bug or someone messing with RabbitMQ. + -- + -- The bug case is slightly dangerous as it could drop a lot of events + -- en masse, if at some point we decide that Events should not be + -- pushed as JSONs, hopefully we think of the parsing side if/when + -- that happens. + let subscription = case mcid of + Nothing -> temporaryRoutingKey uid + Just cid -> clientNotificationQueueName uid cid + subscriptionType = case mcid of + Nothing -> Pulsar.Latest + Just _cid -> Pulsar.Earliest + liftIO $ do + -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. + -- traceM $ "Connecting ..." + -- void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "getEventData")}) "pulsar://localhost:6650" $ do + -- let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) + -- traceM $ "newConsumer " ++ show topic + -- Pulsar.withConsumer + -- ( Pulsar.defaultConsumerConfiguration + -- { Pulsar.consumerType = Just Pulsar.ConsumerShared, + -- Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType + -- } + -- ) + -- ("cannon-websocket-" ++ unpack subscription) + -- topic + -- (onPulsarError "getEventData consumer") + -- $ do + -- consumer <- ask + -- Pulsar.withDeserializedMessageId consumer msgId $ + -- Pulsar.acknowledgeNegativeMessageId + + -- TODO: Deadlettering hasn't been configured, yet. See e.g. https://pulsar.apache.org/docs/4.1.x/client-libraries-websocket/#negatively-acknowledge-messages + getEventData chan Right notif -> do logEvent notif pure $ @@ -292,12 +331,14 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = sendNotifications :: PulsarChannel -> PulsarQueueInfo -> WSConnection -> IO () sendNotifications chan queueInfo wsConn = do + traceM $ "XXX - sendNotifications called " let consumeRabbitMq = forever $ do + traceM $ "XXX - sendNotifications consumeRabbitMq called " eventData <- getEventData chan let msg = case eventData of Left event -> EventMessage event Right sync -> EventSyncMessage sync - + traceM $ "XXX - sendNotifications sending ... " <> show msg catch (WS.sendBinaryData wsConn.inner (encode msg)) $ \(err :: SomeException) -> do logSendFailure err @@ -309,13 +350,15 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckFullSync -> throwIO UnexpectedAck AckMessage ackData -> do logAckReceived ackData - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ do - let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) - Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-ack-" ++ unpack queueInfo.subscription) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do - consumer <- ask - Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ - void $ - logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId + -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. + -- Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ do + -- let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) + -- Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-" ++ unpack queueInfo.subscription) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do + -- consumer <- ask + -- -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. + -- Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ + -- void $ + -- logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId -- run both loops concurrently, so that -- - notifications are delivered without having to wait for acks diff --git a/services/gundeck/src/Gundeck/API/Internal.hs b/services/gundeck/src/Gundeck/API/Internal.hs index c1c1591ab8..8c33a69545 100644 --- a/services/gundeck/src/Gundeck/API/Internal.hs +++ b/services/gundeck/src/Gundeck/API/Internal.hs @@ -69,6 +69,6 @@ getPushTokensH uid = PushTok.PushTokenList <$> (view PushTok.addrPushToken <$$> registerConsumableNotificationsClient :: UserId -> ClientId -> Gundeck NoContent registerConsumableNotificationsClient uid cid = do - chan <- getRabbitMqChan - void . liftIO $ setupConsumableNotifications chan uid cid + -- TODO: Care about long term subscriptions + -- liftIO $ setupConsumableNotifications uid cid pure NoContent diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs index 9d89c2da80..ff30532810 100644 --- a/services/gundeck/src/Gundeck/Client.hs +++ b/services/gundeck/src/Gundeck/Client.hs @@ -19,12 +19,14 @@ module Gundeck.Client where import Control.Lens (view) import Data.Id +import Data.Text qualified as T +import Debug.Trace import Gundeck.Monad import Gundeck.Notification.Data qualified as Notifications import Gundeck.Push.Data qualified as Push import Gundeck.Push.Native import Imports -import Network.AMQP +import Pulsar.Client qualified as Pulsar import Wire.API.Notification unregister :: UserId -> ClientId -> Gundeck () @@ -42,16 +44,45 @@ removeUser user = do Notifications.deleteAll user setupConsumableNotifications :: - Channel -> UserId -> ClientId -> - IO Text -setupConsumableNotifications chan uid cid = do - let qName = clientNotificationQueueName uid cid - void $ - declareQueue - chan - (queueOpts qName) - for_ [userRoutingKey uid, clientRoutingKey uid cid] $ - bindQueue chan qName userNotificationExchangeName - pure qName + IO () +setupConsumableNotifications uid cid = do + -- A hacky way to create a Pulsar subscription + let subscription = "cannon-websocket-" ++ T.unpack (clientNotificationQueueName uid cid) + subscriptionType = Pulsar.Earliest + topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ T.unpack (userRoutingKey uid) + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications")}) "pulsar://localhost:6650" $ do + Pulsar.withConsumer + ( Pulsar.defaultConsumerConfiguration + { Pulsar.consumerType = Just Pulsar.ConsumerShared, + Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType + } + ) + subscription + topic + (onPulsarError "setupConsumableNotifications consumer") + (pure ()) + traceM $ "XXX - setupConsumableNotifications created subscription " <> show subscription <> " on topic " <> show topic + +-- TODO: Replace Debug.Trace with regular logging +onPulsarError :: String -> Pulsar.RawResult -> IO () +onPulsarError provenance result = + traceM $ + provenance ++ case Pulsar.renderResult result of + Just r -> " error: " ++ (show r) + Nothing -> " error: " ++ (show (Pulsar.unRawResult result)) + +-- TODO: Replace Debug.Trace with regular logging +pulsarClientLogger :: String -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () +pulsarClientLogger provenance level file line message = traceM $ provenance ++ " [" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message + +-- TODO: Replace Debug.Trace with regular logging +logPulsarResult :: String -> Pulsar.RawResult -> Pulsar.RawResult +logPulsarResult provenance result = + trace + ( provenance ++ case Pulsar.renderResult result of + Just r -> " result: " ++ (show r) + Nothing -> " result: " ++ (show (Pulsar.unRawResult result)) + ) + result diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index d21ec08b1d..11c74543ee 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -366,11 +366,12 @@ pushViaPulsar newNotif = do let routingKeys = Set.unions $ flip Set.map (Set.fromList . toList $ newNotif.nnRecipients) \r -> + -- TODO: This pattern match is pretty bogus now. case r._recipientClients of RecipientClientsAll -> Set.singleton $ userRoutingKey r._recipientId - RecipientClientsSome (toList -> cs) -> - Set.fromList $ map (clientRoutingKey r._recipientId) cs + RecipientClientsSome _ -> + Set.singleton $ userRoutingKey r._recipientId for_ routingKeys $ \routingKey -> mpaPublishToPulsar routingKey qMsg From 97dcdd332b9036f402aed690c4fc2552d57a8235 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 21 Nov 2025 14:02:27 +0100 Subject: [PATCH 24/51] WIP more consumer functionality --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index ff107666ad..20d7412135 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -47,7 +47,9 @@ instance Exception InactivityTimeout data PulsarChannel = PulsarChannel { -- TODO: Rename: msgChannel msgVar :: Chan (ByteString, ByteString), - closeSignal :: MVar () + closeSignal :: MVar (), + acknowledgeMessages :: Chan ByteString, + rejectMessages :: Chan ByteString } data PulsarQueueInfo = PulsarQueueInfo @@ -57,6 +59,8 @@ data PulsarQueueInfo = PulsarQueueInfo createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) createPulsarChannel uid mCid = do msgChannel :: Chan (ByteString, ByteString) <- lift newChan + acknowledgeMessages :: Chan ByteString <- lift newChan + rejectMessages :: Chan ByteString <- lift newChan closeSignal :: MVar () <- lift $ newEmptyMVar let subscription = case mCid of Nothing -> temporaryRoutingKey uid @@ -81,14 +85,24 @@ createPulsarChannel uid mCid = do (onPulsarError "createPulsarChannel consumer") $ do traceM $ "Ready" - UnliftIO.race_ (receiveMsgs msgChannel) (blockOnCloseSignal closeSignal) + receiveMsgsAsync :: Async () <- receiveMsgs msgChannel + blockOnCloseSignalAsync :: Async () <- blockOnCloseSignal closeSignal + void $ UnliftIO.waitAny [receiveMsgsAsync, blockOnCloseSignalAsync] pure () liftIO $ threadDelay 1_000_000 traceM "createPulsarChannel: Done" - pure $ (PulsarChannel msgChannel closeSignal, PulsarQueueInfo subscription) + pure $ + ( PulsarChannel + { msgVar = msgChannel, + closeSignal = closeSignal, + acknowledgeMessages = acknowledgeMessages, + rejectMessages = rejectMessages + }, + PulsarQueueInfo subscription + ) where - receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> ReaderT Pulsar.Consumer m () - receiveMsgs msgChannel = forever $ do + receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> ReaderT Pulsar.Consumer m (Async ()) + receiveMsgs msgChannel = UnliftIO.async . forever $ do Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage") $ do content <- Pulsar.messageContent traceM $ "XXX - received message with content " ++ BSUTF8.toString content @@ -97,8 +111,8 @@ createPulsarChannel uid mCid = do traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage - blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m () - blockOnCloseSignal = readMVar + blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) + blockOnCloseSignal = UnliftIO.async . readMVar -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () From 6530ee9aa3728b3d52ba201d5fd918f1924d98ed Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 21 Nov 2025 15:49:31 +0100 Subject: [PATCH 25/51] Acknowledge/reject messages --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 68 ++++++------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 20d7412135..eb14f427e6 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -87,8 +87,11 @@ createPulsarChannel uid mCid = do traceM $ "Ready" receiveMsgsAsync :: Async () <- receiveMsgs msgChannel blockOnCloseSignalAsync :: Async () <- blockOnCloseSignal closeSignal - void $ UnliftIO.waitAny [receiveMsgsAsync, blockOnCloseSignalAsync] + acknowledgeMsgsAsync <- acknowledgeMsgs acknowledgeMessages + rejectMsgsAsync <- rejectMsgs rejectMessages + void $ UnliftIO.waitAny [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] pure () + -- TODO: Get rid of this delay. liftIO $ threadDelay 1_000_000 traceM "createPulsarChannel: Done" pure $ @@ -114,6 +117,20 @@ createPulsarChannel uid mCid = do blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) blockOnCloseSignal = UnliftIO.async . readMVar + acknowledgeMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> ReaderT Pulsar.Consumer m (Async ()) + acknowledgeMsgs chan = + UnliftIO.async . forever $ do + msgId <- UnliftIO.readChan chan + consumer :: Pulsar.Consumer <- ask + logPulsarResult "createPulsarChannel - acknowledge message result: " <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) + + rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> ReaderT Pulsar.Consumer m (Async ()) + rejectMsgs chan = + UnliftIO.async . forever $ do + msgId <- UnliftIO.readChan chan + consumer :: Pulsar.Consumer <- ask + Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeNegativeMessageId + -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () onPulsarError provenance result = @@ -226,42 +243,9 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = Left err -> do logParseError err - -- This message cannot be parsed, make sure it doesn't requeue. There - -- is no need to throw an error and kill the websocket as this is - -- probably caused by a bug or someone messing with RabbitMQ. - -- - -- The bug case is slightly dangerous as it could drop a lot of events - -- en masse, if at some point we decide that Events should not be - -- pushed as JSONs, hopefully we think of the parsing side if/when - -- that happens. - let subscription = case mcid of - Nothing -> temporaryRoutingKey uid - Just cid -> clientNotificationQueueName uid cid - subscriptionType = case mcid of - Nothing -> Pulsar.Latest - Just _cid -> Pulsar.Earliest - liftIO $ do - -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. - -- traceM $ "Connecting ..." - -- void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "getEventData")}) "pulsar://localhost:6650" $ do - -- let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) - -- traceM $ "newConsumer " ++ show topic - -- Pulsar.withConsumer - -- ( Pulsar.defaultConsumerConfiguration - -- { Pulsar.consumerType = Just Pulsar.ConsumerShared, - -- Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType - -- } - -- ) - -- ("cannon-websocket-" ++ unpack subscription) - -- topic - -- (onPulsarError "getEventData consumer") - -- $ do - -- consumer <- ask - -- Pulsar.withDeserializedMessageId consumer msgId $ - -- Pulsar.acknowledgeNegativeMessageId - - -- TODO: Deadlettering hasn't been configured, yet. See e.g. https://pulsar.apache.org/docs/4.1.x/client-libraries-websocket/#negatively-acknowledge-messages - getEventData chan + writeChan chan.rejectMessages msgId + -- TODO: Deadlettering hasn't been configured, yet. See e.g. https://pulsar.apache.org/docs/4.1.x/client-libraries-websocket/#negatively-acknowledge-messages + getEventData chan Right notif -> do logEvent notif pure $ @@ -364,15 +348,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = AckFullSync -> throwIO UnexpectedAck AckMessage ackData -> do logAckReceived ackData - -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. - -- Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "sendNotifications")}) "pulsar://localhost:6650" $ do - -- let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) - -- Pulsar.withConsumer (Pulsar.defaultConsumerConfiguration {Pulsar.consumerType = Just Pulsar.ConsumerShared}) ("cannon-websocket-" ++ unpack queueInfo.subscription) (Pulsar.Topic topic) (onPulsarError "publishSyncMessage consumer") $ do - -- consumer <- ask - -- -- TODO: This trick probably doesn't work. We need to get into the context of the consumer. E.g. via a MVar. - -- Pulsar.withDeserializedMessageId consumer (decodeMsgId ackData.deliveryTag) $ - -- void $ - -- logPulsarResult "consumeWebsocket consumer" <$> Pulsar.acknowledgeMessageId + writeChan chan.acknowledgeMessages $ decodeMsgId ackData.deliveryTag -- run both loops concurrently, so that -- - notifications are delivered without having to wait for acks From c9f6b93b7deddda5f9166391b132124067d7fb18 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 24 Nov 2025 15:03:15 +0100 Subject: [PATCH 26/51] Cancel other Asyncs on close signal --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index eb14f427e6..932a7ea3f2 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -89,7 +89,7 @@ createPulsarChannel uid mCid = do blockOnCloseSignalAsync :: Async () <- blockOnCloseSignal closeSignal acknowledgeMsgsAsync <- acknowledgeMsgs acknowledgeMessages rejectMsgsAsync <- rejectMsgs rejectMessages - void $ UnliftIO.waitAny [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] + void $ UnliftIO.waitAnyCancel [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] pure () -- TODO: Get rid of this delay. liftIO $ threadDelay 1_000_000 From 4ab56c0cf9ab1cfaa3e51354bf91f88f75f9aa58 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 24 Nov 2025 15:04:58 +0100 Subject: [PATCH 27/51] Subscription lifecycle Create one when a client is registered. Attach to it when a WebSocket is opened. Do never unsubscribe as this deletes the subscription. --- nix/haskell-pins.nix | 4 ++-- services/cannon/src/Cannon/PulsarConsumerApp.hs | 4 ++-- services/gundeck/src/Gundeck/API/Internal.hs | 3 +-- services/gundeck/src/Gundeck/Client.hs | 6 +++--- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/nix/haskell-pins.nix b/nix/haskell-pins.nix index 9253b42c60..0fec6ff5a8 100644 --- a/nix/haskell-pins.nix +++ b/nix/haskell-pins.nix @@ -74,8 +74,8 @@ let pulsar-hs = { src = fetchgit { url = "https://github.com/wireapp/pulsar-hs"; - rev = "1473ac3d78760d20d29e29c31077aad62b37a25a"; - hash = "sha256-wqWSw9JerP5QyOJHf5AB5vbceUUp2RoRNmA3Qt9+/rI="; + rev = "8135d4ace819c6d4b8f5030a11c07aeaf8ad2498"; + hash = "sha256-w+s365QpHwAiLMwo6h/GfyKAQxTOYbPg0hZ3xkZ0GII="; }; packages = { pulsar-client-hs = "pulsar-client-hs"; diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 932a7ea3f2..650eb08635 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -74,9 +74,9 @@ createPulsarChannel uid mCid = do void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) traceM $ "newConsumer " ++ show topic - Pulsar.withConsumer + Pulsar.withConsumerNoUnsubscribe ( Pulsar.defaultConsumerConfiguration - { Pulsar.consumerType = Just Pulsar.ConsumerShared, + { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType } ) diff --git a/services/gundeck/src/Gundeck/API/Internal.hs b/services/gundeck/src/Gundeck/API/Internal.hs index 8c33a69545..1730697fb2 100644 --- a/services/gundeck/src/Gundeck/API/Internal.hs +++ b/services/gundeck/src/Gundeck/API/Internal.hs @@ -69,6 +69,5 @@ getPushTokensH uid = PushTok.PushTokenList <$> (view PushTok.addrPushToken <$$> registerConsumableNotificationsClient :: UserId -> ClientId -> Gundeck NoContent registerConsumableNotificationsClient uid cid = do - -- TODO: Care about long term subscriptions - -- liftIO $ setupConsumableNotifications uid cid + liftIO $ setupConsumableNotifications uid cid pure NoContent diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs index ff30532810..1909f7460b 100644 --- a/services/gundeck/src/Gundeck/Client.hs +++ b/services/gundeck/src/Gundeck/Client.hs @@ -27,6 +27,7 @@ import Gundeck.Push.Data qualified as Push import Gundeck.Push.Native import Imports import Pulsar.Client qualified as Pulsar +import Pulsar.Subscription qualified as Pulsar import Wire.API.Notification unregister :: UserId -> ClientId -> Gundeck () @@ -53,16 +54,15 @@ setupConsumableNotifications uid cid = do subscriptionType = Pulsar.Earliest topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ T.unpack (userRoutingKey uid) Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications")}) "pulsar://localhost:6650" $ do - Pulsar.withConsumer + Pulsar.createSubscription ( Pulsar.defaultConsumerConfiguration - { Pulsar.consumerType = Just Pulsar.ConsumerShared, + { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType } ) subscription topic (onPulsarError "setupConsumableNotifications consumer") - (pure ()) traceM $ "XXX - setupConsumableNotifications created subscription " <> show subscription <> " on topic " <> show topic -- TODO: Replace Debug.Trace with regular logging From ece7c34775f323d93b4ff9580730fe0d0c331454 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 24 Nov 2025 17:52:51 +0100 Subject: [PATCH 28/51] Limit amount of non-acked messages --- services/cannon/cannon.cabal | 1 + services/cannon/default.nix | 4 +- .../cannon/src/Cannon/PulsarConsumerApp.hs | 40 ++++++++++++++----- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/services/cannon/cannon.cabal b/services/cannon/cannon.cabal index 7b1a15ab08..0bf130f499 100644 --- a/services/cannon/cannon.cabal +++ b/services/cannon/cannon.cabal @@ -114,6 +114,7 @@ library , safe-exceptions , servant-conduit , servant-server + , stm , strict >=0.3.2 , text >=1.1 , tinylog >=0.10 diff --git a/services/cannon/default.nix b/services/cannon/default.nix index a21a613452..0843d3738c 100644 --- a/services/cannon/default.nix +++ b/services/cannon/default.nix @@ -35,7 +35,6 @@ , metrics-wai , mwc-random , prometheus-client -, proto-lens , pulsar-client-hs , QuickCheck , random @@ -43,6 +42,7 @@ , safe-exceptions , servant-conduit , servant-server +, stm , strict , tasty , tasty-hunit @@ -100,12 +100,12 @@ mkDerivation { metrics-wai mwc-random prometheus-client - proto-lens pulsar-client-hs retry safe-exceptions servant-conduit servant-server + stm strict text tinylog diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 650eb08635..7af13238f3 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -15,6 +15,7 @@ import Control.Exception (Handler (..), catches) import Control.Exception.Base import Control.Lens hiding ((#)) import Control.Monad.Codensity +import Control.Monad.STM qualified as STM import Data.Aeson hiding (Key) import Data.Aeson qualified as A import Data.Base64.Types @@ -62,6 +63,7 @@ createPulsarChannel uid mCid = do acknowledgeMessages :: Chan ByteString <- lift newChan rejectMessages :: Chan ByteString <- lift newChan closeSignal :: MVar () <- lift $ newEmptyMVar + unackedMsgsCounter :: TVar Int <- newTVarIO 0 let subscription = case mCid of Nothing -> temporaryRoutingKey uid Just cid -> clientNotificationQueueName uid cid @@ -85,10 +87,10 @@ createPulsarChannel uid mCid = do (onPulsarError "createPulsarChannel consumer") $ do traceM $ "Ready" - receiveMsgsAsync :: Async () <- receiveMsgs msgChannel + receiveMsgsAsync :: Async () <- receiveMsgs msgChannel unackedMsgsCounter blockOnCloseSignalAsync :: Async () <- blockOnCloseSignal closeSignal - acknowledgeMsgsAsync <- acknowledgeMsgs acknowledgeMessages - rejectMsgsAsync <- rejectMsgs rejectMessages + acknowledgeMsgsAsync <- acknowledgeMsgs acknowledgeMessages unackedMsgsCounter + rejectMsgsAsync <- rejectMsgs rejectMessages unackedMsgsCounter void $ UnliftIO.waitAnyCancel [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] pure () -- TODO: Get rid of this delay. @@ -104,32 +106,48 @@ createPulsarChannel uid mCid = do PulsarQueueInfo subscription ) where - receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> ReaderT Pulsar.Consumer m (Async ()) - receiveMsgs msgChannel = UnliftIO.async . forever $ do + receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + receiveMsgs msgChannel unackedMsgsCounter = UnliftIO.async . forever $ do + liftIO $ waitUntilCounterBelow unackedMsgsCounter 500 Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage") $ do content <- Pulsar.messageContent traceM $ "XXX - received message with content " ++ BSUTF8.toString content msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize liftIO $ writeChan msgChannel (msgId, content) + liftIO $ incCounter unackedMsgsCounter traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content - void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage + -- void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) blockOnCloseSignal = UnliftIO.async . readMVar - acknowledgeMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> ReaderT Pulsar.Consumer m (Async ()) - acknowledgeMsgs chan = + acknowledgeMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + acknowledgeMsgs chan unackedMsgsCounter = UnliftIO.async . forever $ do msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask - logPulsarResult "createPulsarChannel - acknowledge message result: " <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) + traceM $ "acknowledgeMsgs" + void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) + liftIO $ decCounter unackedMsgsCounter - rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> ReaderT Pulsar.Consumer m (Async ()) - rejectMsgs chan = + rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + rejectMsgs chan unackedMsgsCounter = UnliftIO.async . forever $ do msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeNegativeMessageId + liftIO $ decCounter unackedMsgsCounter + + incCounter :: TVar Int -> IO () + incCounter tv = atomically $ modifyTVar' tv (+ 1) + + decCounter :: TVar Int -> IO () + decCounter tv = atomically $ modifyTVar' tv (subtract 1) + + waitUntilCounterBelow :: TVar Int -> Int -> IO () + waitUntilCounterBelow tv threshold = atomically $ do + v <- readTVar tv + STM.check (v < threshold) -- blocks (retry) until v < threshold -- TODO: Replace Debug.Trace with regular logging onPulsarError :: String -> Pulsar.RawResult -> IO () From 9b297817169d59386308e44ccaec32ce5436a700 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 24 Nov 2025 17:56:30 +0100 Subject: [PATCH 29/51] Remove RabbitMQ leftovers --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 7af13238f3..048014de8e 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -4,7 +4,6 @@ module Cannon.PulsarConsumerApp (pulsarWebSocketApp) where import Cannon.App (rejectOnError) import Cannon.Options -import Cannon.RabbitMq import Cannon.WS hiding (env) import Cassandra as C hiding (batch) import Conduit (runResourceT) @@ -173,7 +172,7 @@ logPulsarResult provenance result = pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = - handle handleTooManyChannels . lowerCodensity $ + lowerCodensity $ do (chan, queueInfo) <- createPulsarChannel uid mcid traceM $ "XXX pulsarWebSocketApp " ++ show queueInfo @@ -194,14 +193,13 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = [ -- TODO: Review exceptions. pulsar-hs and amqp exceptions surely differ. handleClientMisbehaving conn, handleWebSocketExceptions conn, - handleRabbitMqChannelException conn, handleInactivity conn, handleOtherExceptions conn ] $ do traverse_ (sendFullSyncMessageIfNeeded wsConn uid e) mcid traverse_ (publishSyncMessage uid . mkSynchronizationMessage) mSyncMarkerId - sendNotifications chan queueInfo wsConn + sendNotifications chan wsConn let monitor = do timeout wsConn.activityTimeout (takeMVar wsConn.activity) >>= \case @@ -316,25 +314,11 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = . logClient WS.sendCloseCode wsConn 1003 ("unexpected-ack" :: ByteString) - handleRabbitMqChannelException wsConn = do - Handler $ \ChannelClosed -> do - Log.debug e.logg $ Log.msg (Log.val "RabbitMQ channel closed") . logClient - WS.sendCloseCode wsConn 1001 ("" :: ByteString) - handleOtherExceptions wsConn = Handler $ \(err :: SomeException) -> do WS.sendCloseCode wsConn 1003 ("internal-error" :: ByteString) throwIO err - handleTooManyChannels TooManyChannels = - rejectRequestWith pendingConn $ - RejectRequest - { rejectCode = 503, - rejectMessage = "Service Unavailable", - rejectHeaders = [], - rejectBody = "" - } - mkSynchronizationMessage :: StrictText -> ByteString mkSynchronizationMessage markerId = -- TODO: Check all fromStrict/toStrict calls: It makes not sense to be "sometimes lazy". @@ -345,8 +329,8 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = msgType = Just "synchronization" } - sendNotifications :: PulsarChannel -> PulsarQueueInfo -> WSConnection -> IO () - sendNotifications chan queueInfo wsConn = do + sendNotifications :: PulsarChannel -> WSConnection -> IO () + sendNotifications chan wsConn = do traceM $ "XXX - sendNotifications called " let consumeRabbitMq = forever $ do traceM $ "XXX - sendNotifications consumeRabbitMq called " From 1cfc69cd9aac0950054e4b0793d2008de6bfc9c3 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 24 Nov 2025 18:12:21 +0100 Subject: [PATCH 30/51] A bit more expressive types --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 048014de8e..a9fcb392a1 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -46,21 +46,23 @@ instance Exception InactivityTimeout -- TODO: The name is a misleading. However, while developing, it's useful to keep the analogies with RabbitMQ. data PulsarChannel = PulsarChannel { -- TODO: Rename: msgChannel - msgVar :: Chan (ByteString, ByteString), + msgVar :: Chan (PulsarMsgId, ByteString), closeSignal :: MVar (), - acknowledgeMessages :: Chan ByteString, - rejectMessages :: Chan ByteString + acknowledgeMessages :: Chan PulsarMsgId, + rejectMessages :: Chan PulsarMsgId } -data PulsarQueueInfo = PulsarQueueInfo +newtype PulsarMsgId = PulsarMsgId {unPulsarMsgId :: ByteString} + +newtype PulsarQueueInfo = PulsarQueueInfo {subscription :: Text} deriving (Show) createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) createPulsarChannel uid mCid = do - msgChannel :: Chan (ByteString, ByteString) <- lift newChan - acknowledgeMessages :: Chan ByteString <- lift newChan - rejectMessages :: Chan ByteString <- lift newChan + msgChannel :: Chan (PulsarMsgId, ByteString) <- lift newChan + acknowledgeMessages :: Chan PulsarMsgId <- lift newChan + rejectMessages :: Chan PulsarMsgId <- lift newChan closeSignal :: MVar () <- lift $ newEmptyMVar unackedMsgsCounter :: TVar Int <- newTVarIO 0 let subscription = case mCid of @@ -105,14 +107,14 @@ createPulsarChannel uid mCid = do PulsarQueueInfo subscription ) where - receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (ByteString, ByteString) -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (PulsarMsgId, ByteString) -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) receiveMsgs msgChannel unackedMsgsCounter = UnliftIO.async . forever $ do liftIO $ waitUntilCounterBelow unackedMsgsCounter 500 Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage") $ do content <- Pulsar.messageContent traceM $ "XXX - received message with content " ++ BSUTF8.toString content - msgId :: ByteString <- Pulsar.messageId Pulsar.messageIdSerialize - liftIO $ writeChan msgChannel (msgId, content) + msgId <- Pulsar.messageId Pulsar.messageIdSerialize + liftIO $ writeChan msgChannel (PulsarMsgId msgId, content) liftIO $ incCounter unackedMsgsCounter traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content -- void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage @@ -120,19 +122,19 @@ createPulsarChannel uid mCid = do blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) blockOnCloseSignal = UnliftIO.async . readMVar - acknowledgeMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + acknowledgeMsgs :: (UnliftIO.MonadUnliftIO m) => Chan PulsarMsgId -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) acknowledgeMsgs chan unackedMsgsCounter = UnliftIO.async . forever $ do - msgId <- UnliftIO.readChan chan + PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask traceM $ "acknowledgeMsgs" void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) liftIO $ decCounter unackedMsgsCounter - rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan ByteString -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) + rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan PulsarMsgId -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) rejectMsgs chan unackedMsgsCounter = UnliftIO.async . forever $ do - msgId <- UnliftIO.readChan chan + PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeNegativeMessageId liftIO $ decCounter unackedMsgsCounter @@ -357,11 +359,11 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = -- - exceptions on either side do not cause a deadlock concurrently_ consumeRabbitMq consumeWebsocket - decodeMsgId :: String -> ByteString - decodeMsgId = either (error . ("decodeMsgId: " ++) . unpack) id . decodeBase64Untyped . BSUTF8.fromString + decodeMsgId :: String -> PulsarMsgId + decodeMsgId = either (error . ("decodeMsgId: " ++) . unpack) PulsarMsgId . decodeBase64Untyped . BSUTF8.fromString - encodeMsgId :: ByteString -> String - encodeMsgId = T.unpack . extractBase64 . encodeBase64 + encodeMsgId :: PulsarMsgId -> String + encodeMsgId = T.unpack . extractBase64 . encodeBase64 . unPulsarMsgId logParseError :: String -> IO () logParseError err = From ca4e41581e763a1216a32d377e292abac066ebfb Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 26 Nov 2025 18:05:58 +0100 Subject: [PATCH 31/51] Remove bad threadDelay --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index a9fcb392a1..745153b64f 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -7,7 +7,6 @@ import Cannon.Options import Cannon.WS hiding (env) import Cassandra as C hiding (batch) import Conduit (runResourceT) -import Control.Concurrent (threadDelay) import Control.Concurrent.Async import Control.Concurrent.Chan import Control.Exception (Handler (..), catches) @@ -94,8 +93,6 @@ createPulsarChannel uid mCid = do rejectMsgsAsync <- rejectMsgs rejectMessages unackedMsgsCounter void $ UnliftIO.waitAnyCancel [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] pure () - -- TODO: Get rid of this delay. - liftIO $ threadDelay 1_000_000 traceM "createPulsarChannel: Done" pure $ ( PulsarChannel From 5639ec6291ce81258bc24b4f82c169654ebff16d Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 08:58:02 +0100 Subject: [PATCH 32/51] Move Pulsar REST API port 8080 -> 5080 8080 already belongs to nginz in our local test env. --- deploy/dockerephemeral/docker-compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/dockerephemeral/docker-compose.yaml b/deploy/dockerephemeral/docker-compose.yaml index ca7b7d47bb..a0f379c183 100644 --- a/deploy/dockerephemeral/docker-compose.yaml +++ b/deploy/dockerephemeral/docker-compose.yaml @@ -422,10 +422,10 @@ services: pulsar: image: apachepulsar/pulsar:latest container_name: pulsar - # TODO: Change the mapping of port 8080. It has already been taken by nginz. + # 8080 belongs to nginz ports: - 6650:6650 - - 8080:8080 + - 5080:8080 networks: - demo_wire command: bin/pulsar standalone From 45f01f221548763a75cb5bc16bc2dafe65c3ccb8 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 10:42:01 +0100 Subject: [PATCH 33/51] Mark missing test mock implementation with `todo` --- services/gundeck/test/unit/MockGundeck.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/services/gundeck/test/unit/MockGundeck.hs b/services/gundeck/test/unit/MockGundeck.hs index aaef773618..1477c8585e 100644 --- a/services/gundeck/test/unit/MockGundeck.hs +++ b/services/gundeck/test/unit/MockGundeck.hs @@ -447,6 +447,7 @@ instance MonadPushAll MockGundeck where mpaRunWithBudget _ _ = id -- no throttling needed as long as we don't overdo it in the tests... mpaGetClients = mockGetClients mpaPublishToRabbitMq = mockPushRabbitMq + mpaPublishToPulsar = todo instance MonadNativeTargets MockGundeck where mntgtLogErr _ = pure () From 157fc49269dca114d3e788b5293036a300d02221 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 11:05:26 +0100 Subject: [PATCH 34/51] Make Pulsar Endpoint configurable --- libs/cassandra-util/src/Cassandra/Options.hs | 4 ++++ services/cannon/cannon.integration.yaml | 4 ++++ services/cannon/src/Cannon/Options.hs | 7 +++++-- services/cannon/src/Cannon/PulsarConsumerApp.hs | 10 +++++----- services/cannon/src/Cannon/Types.hs | 4 +++- services/cannon/src/Cannon/WS.hs | 6 ++++-- services/gundeck/gundeck.integration.yaml | 4 ++++ services/gundeck/src/Gundeck/API/Internal.hs | 5 ++++- services/gundeck/src/Gundeck/Client.hs | 6 ++++-- services/gundeck/src/Gundeck/Env.hs | 6 ++++-- services/gundeck/src/Gundeck/Options.hs | 1 + services/gundeck/src/Gundeck/Push.hs | 3 ++- 12 files changed, 44 insertions(+), 16 deletions(-) diff --git a/libs/cassandra-util/src/Cassandra/Options.hs b/libs/cassandra-util/src/Cassandra/Options.hs index 6ca3273636..e4be064b65 100644 --- a/libs/cassandra-util/src/Cassandra/Options.hs +++ b/libs/cassandra-util/src/Cassandra/Options.hs @@ -23,6 +23,7 @@ module Cassandra.Options where import Data.Aeson.TH +import Data.Text qualified as T import Imports data Endpoint = Endpoint @@ -47,3 +48,6 @@ data CassandraOpts = CassandraOpts deriving (Show, Generic) deriveFromJSON defaultOptions ''CassandraOpts + +toPulsarUrl :: Endpoint -> String +toPulsarUrl e = "pulsar://" <> T.unpack e.host <> ":" <> show e.port diff --git a/services/cannon/cannon.integration.yaml b/services/cannon/cannon.integration.yaml index 2df084dffc..cb578f1a50 100644 --- a/services/cannon/cannon.integration.yaml +++ b/services/cannon/cannon.integration.yaml @@ -30,6 +30,10 @@ rabbitmq: caCert: test/resources/rabbitmq-ca.pem insecureSkipVerifyTls: false +pulsar: + host: localhost + port: 6650 + drainOpts: gracePeriodSeconds: 1 millisecondsBetweenBatches: 500 diff --git a/services/cannon/src/Cannon/Options.hs b/services/cannon/src/Cannon/Options.hs index 3a1cee6988..4ac6af588e 100644 --- a/services/cannon/src/Cannon/Options.hs +++ b/services/cannon/src/Cannon/Options.hs @@ -44,10 +44,11 @@ module Cannon.Options DrainOpts, WSOpts (..), validateOpts, + pulsar, ) where -import Cassandra.Options (CassandraOpts) +import Cassandra.Options (CassandraOpts, Endpoint) import Control.Lens (makeFields) import Data.Aeson import Data.Aeson.APIFieldJsonTH @@ -130,7 +131,8 @@ data Opts = Opts _optsRabbitMqMaxConnections :: Int, -- | Maximum number of rabbitmq channels per connection. Must be strictly positive. _optsRabbitMqMaxChannels :: Int, - _optsNotificationTTL :: Int + _optsNotificationTTL :: Int, + _optsPulsar :: Endpoint } deriving (Show, Generic) @@ -159,3 +161,4 @@ instance FromJSON Opts where <*> o .:? "rabbitMqMaxConnections" .!= 1000 <*> o .:? "rabbitMqMaxChannels" .!= 300 <*> o .: "notificationTTL" + <*> o .: "pulsar" diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 745153b64f..94749f99d8 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -57,8 +57,8 @@ newtype PulsarQueueInfo = PulsarQueueInfo {subscription :: Text} deriving (Show) -createPulsarChannel :: UserId -> Maybe ClientId -> Codensity IO (PulsarChannel, PulsarQueueInfo) -createPulsarChannel uid mCid = do +createPulsarChannel :: UserId -> Maybe ClientId -> Env -> Codensity IO (PulsarChannel, PulsarQueueInfo) +createPulsarChannel uid mCid env = do msgChannel :: Chan (PulsarMsgId, ByteString) <- lift newChan acknowledgeMessages :: Chan PulsarMsgId <- lift newChan rejectMessages :: Chan PulsarMsgId <- lift newChan @@ -73,7 +73,7 @@ createPulsarChannel uid mCid = do liftIO $ do traceM $ "Connecting ..." - void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) "pulsar://localhost:6650" $ do + void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) env.pulsarUrl $ do let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) traceM $ "newConsumer " ++ show topic Pulsar.withConsumerNoUnsubscribe @@ -173,7 +173,7 @@ pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = lowerCodensity $ do - (chan, queueInfo) <- createPulsarChannel uid mcid + (chan, queueInfo) <- createPulsarChannel uid mcid e traceM $ "XXX pulsarWebSocketApp " ++ show queueInfo conn <- Codensity $ bracket openWebSocket closeWebSocket activity <- liftIO newEmptyMVar @@ -217,7 +217,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = where publishSyncMessage :: UserId -> ByteString -> IO () publishSyncMessage userId message = - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) "pulsar://localhost:6650" $ do + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) e.pulsarUrl $ do let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey userId) Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do result <- runResourceT $ do diff --git a/services/cannon/src/Cannon/Types.hs b/services/cannon/src/Cannon/Types.hs index f1499f308f..718393674e 100644 --- a/services/cannon/src/Cannon/Types.hs +++ b/services/cannon/src/Cannon/Types.hs @@ -38,8 +38,9 @@ import Cannon.RabbitMq import Cannon.WS (Clock, Key, Websocket) import Cannon.WS qualified as WS import Cassandra (ClientState) +import Cassandra.Options (toPulsarUrl) import Control.Concurrent.Async (mapConcurrently) -import Control.Lens ((^.)) +import Control.Lens (to, (^.)) import Control.Monad.Catch import Control.Monad.Codensity import Data.Id @@ -136,6 +137,7 @@ mkEnv external o cs l d conns p g t endpoint = do cs pool (o ^. notificationTTL) + (o ^. pulsar . to toPulsarUrl) pure $ Env o l d conns (RequestId defRequestId) wsEnv runCannon :: Env -> Cannon a -> IO a diff --git a/services/cannon/src/Cannon/WS.hs b/services/cannon/src/Cannon/WS.hs index d470019f44..b25811b3b4 100644 --- a/services/cannon/src/Cannon/WS.hs +++ b/services/cannon/src/Cannon/WS.hs @@ -159,7 +159,8 @@ data Env = Env wsOpts :: WSOpts, cassandra :: ClientState, pool :: RabbitMqPool, - notificationTTL :: Int + notificationTTL :: Int, + pulsarUrl :: String } setRequestId :: RequestId -> Env -> Env @@ -210,8 +211,9 @@ env :: ClientState -> RabbitMqPool -> Int -> + String -> Env -env externalHostname portnum gundeckHost gundeckPort logg manager websockets rabbitConnections rand clock drainOpts wsOpts cassandra pool notificationTTL = +env externalHostname portnum gundeckHost gundeckPort logg manager websockets rabbitConnections rand clock drainOpts wsOpts cassandra pool notificationTTL pulsarUrl = let upstream = (Bilge.host gundeckHost . Bilge.port gundeckPort $ empty) reqId = RequestId defRequestId in Env {..} diff --git a/services/gundeck/gundeck.integration.yaml b/services/gundeck/gundeck.integration.yaml index 1c33557402..4b376d721d 100644 --- a/services/gundeck/gundeck.integration.yaml +++ b/services/gundeck/gundeck.integration.yaml @@ -42,6 +42,10 @@ rabbitmq: caCert: test/resources/rabbitmq-ca.pem insecureSkipVerifyTls: false +pulsar: + host: localhost + port: 6650 + settings: httpPoolSize: 1024 notificationTTL: 24192200 diff --git a/services/gundeck/src/Gundeck/API/Internal.hs b/services/gundeck/src/Gundeck/API/Internal.hs index 1730697fb2..cfa76f4bd5 100644 --- a/services/gundeck/src/Gundeck/API/Internal.hs +++ b/services/gundeck/src/Gundeck/API/Internal.hs @@ -22,10 +22,12 @@ module Gundeck.API.Internal where import Cassandra qualified +import Cassandra.Options import Control.Lens (view) import Data.Id import Gundeck.Client import Gundeck.Client qualified as Client +import Gundeck.Env import Gundeck.Monad import Gundeck.Presence qualified as Presence import Gundeck.Push qualified as Push @@ -69,5 +71,6 @@ getPushTokensH uid = PushTok.PushTokenList <$> (view PushTok.addrPushToken <$$> registerConsumableNotificationsClient :: UserId -> ClientId -> Gundeck NoContent registerConsumableNotificationsClient uid cid = do - liftIO $ setupConsumableNotifications uid cid + pulsarEndpoint :: Endpoint <- view pulsar + liftIO $ setupConsumableNotifications uid cid pulsarEndpoint pure NoContent diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs index 1909f7460b..a3048d8c08 100644 --- a/services/gundeck/src/Gundeck/Client.hs +++ b/services/gundeck/src/Gundeck/Client.hs @@ -17,6 +17,7 @@ module Gundeck.Client where +import Cassandra.Options import Control.Lens (view) import Data.Id import Data.Text qualified as T @@ -47,13 +48,14 @@ removeUser user = do setupConsumableNotifications :: UserId -> ClientId -> + Endpoint -> IO () -setupConsumableNotifications uid cid = do +setupConsumableNotifications uid cid pulsarEndpoint = do -- A hacky way to create a Pulsar subscription let subscription = "cannon-websocket-" ++ T.unpack (clientNotificationQueueName uid cid) subscriptionType = Pulsar.Earliest topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ T.unpack (userRoutingKey uid) - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications")}) "pulsar://localhost:6650" $ do + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications")}) (toPulsarUrl pulsarEndpoint) $ do Pulsar.createSubscription ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, diff --git a/services/gundeck/src/Gundeck/Env.hs b/services/gundeck/src/Gundeck/Env.hs index e3670c13a8..3a6cb6cd49 100644 --- a/services/gundeck/src/Gundeck/Env.hs +++ b/services/gundeck/src/Gundeck/Env.hs @@ -49,6 +49,7 @@ import Network.TLS as TLS import Network.TLS.Extra qualified as TLS import System.Logger qualified as Log import System.Logger.Extended qualified as Logger +import Util.Options (Endpoint) data Env = Env { _reqId :: !RequestId, @@ -61,7 +62,8 @@ data Env = Env _awsEnv :: !Aws.Env, _time :: !(IO Milliseconds), _threadBudgetState :: !(Maybe ThreadBudgetState), - _rabbitMqChannel :: MVar Channel + _rabbitMqChannel :: MVar Channel, + _pulsar :: Endpoint } makeLenses ''Env @@ -105,7 +107,7 @@ createEnv o = do } mtbs <- mkThreadBudgetState `mapM` (o ^. settings . maxConcurrentNativePushes) rabbitMqChannelMVar <- Q.mkRabbitMqChannelMVar l (Just "gundeck") (o ^. rabbitmq) - pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar + pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar (o ^. Opt.pulsar) reqIdMsg :: RequestId -> Logger.Msg -> Logger.Msg reqIdMsg = ("request" Logger..=) . unRequestId diff --git a/services/gundeck/src/Gundeck/Options.hs b/services/gundeck/src/Gundeck/Options.hs index ee55c98beb..1043c153ec 100644 --- a/services/gundeck/src/Gundeck/Options.hs +++ b/services/gundeck/src/Gundeck/Options.hs @@ -139,6 +139,7 @@ data Opts = Opts _rabbitmq :: !AmqpEndpoint, _discoUrl :: !(Maybe Text), _settings :: !Settings, + _pulsar :: !Endpoint, -- Logging -- | Log level (Debug, Info, etc) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 11c74543ee..afc17ec6c5 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -146,7 +146,8 @@ publishToRabbitMq exchangeName routingKey qMsg = do publishToPulsar :: Text -> Q.Message -> Gundeck () publishToPulsar routingKey qMsg = do logger <- view applog - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (internalLogger logger)}) "pulsar://localhost:6650" $ + pulsarEndpoint <- view Gundeck.Env.pulsar + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (internalLogger logger)}) (toPulsarUrl pulsarEndpoint) $ Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName logPulsarError $ do result <- runResourceT $ do (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ BS.toStrict (A.encode pulsarMessage)} From 7c819d04939c9ec0f6a0f926e3a7d66003735b33 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 11:11:50 +0100 Subject: [PATCH 35/51] Add Pulsar endpoint config to Helm charts We could configure more. However, we can do that when we exactly know what. --- charts/cannon/templates/configmap.yaml | 6 ++++++ charts/cannon/values.yaml | 4 ++++ charts/gundeck/templates/configmap.yaml | 6 ++++++ charts/gundeck/values.yaml | 4 ++++ 4 files changed, 20 insertions(+) diff --git a/charts/cannon/templates/configmap.yaml b/charts/cannon/templates/configmap.yaml index e7fb94bfba..b5190a812c 100644 --- a/charts/cannon/templates/configmap.yaml +++ b/charts/cannon/templates/configmap.yaml @@ -39,6 +39,12 @@ data: rabbitMqMaxConnections: {{ .config.rabbitMqMaxConnections }} rabbitMqMaxChannels: {{ .config.rabbitMqMaxChannels }} + {{- with .config.pulsar }} + pulsar: + host: {{ .host }} + port: {{ .port }} + {{- end }} + drainOpts: gracePeriodSeconds: {{ .config.drainOpts.gracePeriodSeconds }} millisecondsBetweenBatches: {{ .config.drainOpts.millisecondsBetweenBatches }} diff --git a/charts/cannon/values.yaml b/charts/cannon/values.yaml index d1428b21d8..dd05b5cc4b 100644 --- a/charts/cannon/values.yaml +++ b/charts/cannon/values.yaml @@ -29,6 +29,10 @@ config: # name: # key: + pulsar: + host: localhost + port: 6650 + # See also the section 'Controlling the speed of websocket draining during # cannon pod replacement' in docs/how-to/install/configuration-options.rst drainOpts: diff --git a/charts/gundeck/templates/configmap.yaml b/charts/gundeck/templates/configmap.yaml index 26b04fc933..89d9049509 100644 --- a/charts/gundeck/templates/configmap.yaml +++ b/charts/gundeck/templates/configmap.yaml @@ -41,6 +41,12 @@ data: {{- end }} {{- end }} + {{- with .pulsar }} + pulsar: + host: {{ .host }} + port: {{ .port }} + {{- end }} + redis: host: {{ .redis.host }} port: {{ .redis.port }} diff --git a/charts/gundeck/values.yaml b/charts/gundeck/values.yaml index 1a989c8510..04c0c1974a 100644 --- a/charts/gundeck/values.yaml +++ b/charts/gundeck/values.yaml @@ -50,6 +50,10 @@ config: # name: # key: + pulsar: + host: localhost + port: 6650 + # To enable additional writes during a migration: # redisAdditionalWrite: # host: redis-two From f1af4a298504ae7c559d33c31589bdda4086b00c Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 17:21:45 +0100 Subject: [PATCH 36/51] Fix Gundeck unit test setup We now also need to mock Pulsar. --- services/gundeck/test/unit/MockGundeck.hs | 30 ++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/services/gundeck/test/unit/MockGundeck.hs b/services/gundeck/test/unit/MockGundeck.hs index 1477c8585e..baa323024e 100644 --- a/services/gundeck/test/unit/MockGundeck.hs +++ b/services/gundeck/test/unit/MockGundeck.hs @@ -109,8 +109,10 @@ data MockState = MockState -- | Non-transient notifications that are stored in the database first thing before -- delivery (so clients can always come back and pick them up later until they expire). _msCassQueue :: NotifQueue, - -- | A record of notifications that have been puhsed via RabbitMQ. - _msRabbitQueue :: Map (Text, Text) IntMultiSet + -- | A record of notifications that have been pushed via RabbitMQ. + _msRabbitQueue :: Map (Text, Text) IntMultiSet, + -- | A record of notifications that have been pushed via Pulsar. + _msPulsarQueue :: Map Text IntMultiSet } deriving (Eq) @@ -126,13 +128,13 @@ makeLenses ''MockEnv makeLenses ''MockState instance Show MockState where - show (MockState w n c r) = + show (MockState w n c r p) = intercalate "\n" - ["", "websocket: " <> show w, "native: " <> show n, "cassandra: " <> show c, "rabbitmq: " <> show r, ""] + ["", "websocket: " <> show w, "native: " <> show n, "cassandra: " <> show c, "rabbitmq: " <> show r, "pulsar: " <> show p, ""] emptyMockState :: MockState -emptyMockState = MockState mempty mempty mempty mempty +emptyMockState = MockState mempty mempty mempty mempty mempty -- these custom instances make for better error reports if tests fail. instance ToJSON MockEnv where @@ -447,7 +449,7 @@ instance MonadPushAll MockGundeck where mpaRunWithBudget _ _ = id -- no throttling needed as long as we don't overdo it in the tests... mpaGetClients = mockGetClients mpaPublishToRabbitMq = mockPushRabbitMq - mpaPublishToPulsar = todo + mpaPublishToPulsar = mockPushPulsar instance MonadNativeTargets MockGundeck where mntgtLogErr _ = pure () @@ -576,7 +578,8 @@ handlePushRabbit Push {..} = do forM_ _pushRecipients $ \(Recipient uid _ cids) -> do clients <- Set.toList . Set.unions . Map.elems . (.userClientsFull) <$> mpaGetClients (Set.singleton uid) let legacyClients = map (.clientId) $ filter (not . supportsConsumableNotifications) clients - let routingKeys = case cids of + -- TODO: This could surely be expressed in a more elegant manner. However, it should be correct. + let routingKeys = nub $ case cids of RecipientClientsAll -> case legacyClients of [] -> [userRoutingKey uid] @@ -584,9 +587,11 @@ handlePushRabbit Push {..} = do let rabbitClients = filter (`notElem` legacyClients) $ map (.clientId) clients in [userRoutingKey uid | not (null rabbitClients)] RecipientClientsSome cc -> - (clientRoutingKey uid <$> filter (`notElem` legacyClients) (toList cc)) + case filter (`notElem` legacyClients) (toList cc) of + [] -> [] + _xs -> [userRoutingKey uid] for routingKeys $ \routingKey -> - msRabbitQueue %= deliver ("user-notifications", routingKey) _pushPayload + msPulsarQueue %= deliver routingKey _pushPayload when _pushIsCellsEvent $ do msRabbitQueue %= deliver ("", "cells") _pushPayload @@ -653,6 +658,13 @@ mockPushRabbitMq exchange routingKey message = do Right (queuedNotif :: QueuedNotification) -> msRabbitQueue %= deliver (exchange, routingKey) (queuedNotif ^. queuedNotificationPayload) +mockPushPulsar :: Text -> AMQP.Message -> MockGundeck () +mockPushPulsar exchange message = do + case Aeson.eitherDecode message.msgBody of + Left e -> error $ "Invalid message body: " <> e + Right (queuedNotif :: QueuedNotification) -> + msPulsarQueue %= deliver exchange (List1 (queuedNotif ^. queuedNotificationPayload)) + mockLookupAddresses :: (HasCallStack, m ~ MockGundeck) => UserId -> From d22bcb670935d97369522972a97c792aefcb2b5f Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 17:33:57 +0100 Subject: [PATCH 37/51] Simplify expression --- services/gundeck/src/Gundeck/Push.hs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index afc17ec6c5..45b7348360 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -364,15 +364,9 @@ pushAllViaMessageBroker newNotifs userClientsFull = do pushViaPulsar :: (MonadPushAll m) => NewNotification -> m () pushViaPulsar newNotif = do qMsg <- mkMessage newNotif.nnNotification - let routingKeys = - Set.unions $ - flip Set.map (Set.fromList . toList $ newNotif.nnRecipients) \r -> - -- TODO: This pattern match is pretty bogus now. - case r._recipientClients of - RecipientClientsAll -> - Set.singleton $ userRoutingKey r._recipientId - RecipientClientsSome _ -> - Set.singleton $ userRoutingKey r._recipientId + let routingKeys = Set.unions $ + flip Set.map (Set.fromList . toList $ newNotif.nnRecipients) \r -> + Set.singleton $ userRoutingKey r._recipientId for_ routingKeys $ \routingKey -> mpaPublishToPulsar routingKey qMsg From ff2da8a8ab0c470d6a92dff12d7dac7b0fa960d3 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 17:43:32 +0100 Subject: [PATCH 38/51] Improve logging --- services/gundeck/src/Gundeck/Push.hs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 45b7348360..b8f148d66f 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -177,8 +177,17 @@ publishToPulsar routingKey qMsg = do -- TODO: Far from perfect, ignores log level and uses no fields internalLogger :: Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () internalLogger logger level file line message = - Logger.debug logger $ - Log.msg ("[" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message) + Logger.log logger (toLogLevel level) $ + Log.msg message + . Log.field "file" file + . Log.field "line" (show line) + where + toLogLevel :: Pulsar.LogLevel -> Log.Level + toLogLevel 0 = Log.Debug + toLogLevel 1 = Log.Info + toLogLevel 2 = Log.Warn + toLogLevel 3 = Log.Error + toLogLevel n = error ("Unknown Pulsar log level" <> show n) pulsarMessage :: PulsarMessage pulsarMessage = From 49a0c06a43f54c2af5cb3ba7b01bc26ffc968b86 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 18:00:20 +0100 Subject: [PATCH 39/51] Typo --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 94749f99d8..195898a74d 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -320,7 +320,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = mkSynchronizationMessage :: StrictText -> ByteString mkSynchronizationMessage markerId = - -- TODO: Check all fromStrict/toStrict calls: It makes not sense to be "sometimes lazy". + -- TODO: Check all fromStrict/toStrict calls: It makes no sense to be "sometimes lazy". BS.toStrict . encode $ PulsarMessage { msgBody = markerId, From 67c4117a54fb9301eb71e545f3b7538ef69077d8 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 18:00:44 +0100 Subject: [PATCH 40/51] Replace traceM with logging framework --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 72 +++++++++++-------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 195898a74d..776d08bc8f 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -73,7 +73,7 @@ createPulsarChannel uid mCid env = do liftIO $ do traceM $ "Connecting ..." - void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel")}) env.pulsarUrl $ do + void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel" env.logg)}) env.pulsarUrl $ do let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) traceM $ "newConsumer " ++ show topic Pulsar.withConsumerNoUnsubscribe @@ -84,7 +84,7 @@ createPulsarChannel uid mCid env = do ) ("cannon-websocket-" ++ unpack subscription) topic - (onPulsarError "createPulsarChannel consumer") + (onPulsarError "createPulsarChannel consumer" env.logg) $ do traceM $ "Ready" receiveMsgsAsync :: Async () <- receiveMsgs msgChannel unackedMsgsCounter @@ -107,7 +107,7 @@ createPulsarChannel uid mCid env = do receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (PulsarMsgId, ByteString) -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) receiveMsgs msgChannel unackedMsgsCounter = UnliftIO.async . forever $ do liftIO $ waitUntilCounterBelow unackedMsgsCounter 500 - Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage") $ do + Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage" env.logg) $ do content <- Pulsar.messageContent traceM $ "XXX - received message with content " ++ BSUTF8.toString content msgId <- Pulsar.messageId Pulsar.messageIdSerialize @@ -125,7 +125,7 @@ createPulsarChannel uid mCid env = do PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask traceM $ "acknowledgeMsgs" - void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) + void $ logPulsarResult "createPulsarChannel - acknowledge message result: " env.logg <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) liftIO $ decCounter unackedMsgsCounter rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan PulsarMsgId -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) @@ -147,27 +147,43 @@ createPulsarChannel uid mCid env = do v <- readTVar tv STM.check (v < threshold) -- blocks (retry) until v < threshold --- TODO: Replace Debug.Trace with regular logging -onPulsarError :: String -> Pulsar.RawResult -> IO () -onPulsarError provenance result = - traceM $ - provenance ++ case Pulsar.renderResult result of - Just r -> " error: " ++ (show r) - Nothing -> " error: " ++ (show (Pulsar.unRawResult result)) - --- TODO: Replace Debug.Trace with regular logging -pulsarClientLogger :: String -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () -pulsarClientLogger provenance level file line message = traceM $ provenance ++ " [" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message - --- TODO: Replace Debug.Trace with regular logging -logPulsarResult :: String -> Pulsar.RawResult -> Pulsar.RawResult -logPulsarResult provenance result = - trace - ( provenance ++ case Pulsar.renderResult result of - Just r -> " result: " ++ (show r) - Nothing -> " result: " ++ (show (Pulsar.unRawResult result)) - ) - result +onPulsarError :: String -> Log.Logger -> Pulsar.RawResult -> IO () +onPulsarError provenance logger result = + Log.err logger $ + Log.msg message + . Log.field "provenance" provenance + where + message = + "error: " <> pulsarResultToString result + +pulsarResultToString :: Pulsar.RawResult -> String +pulsarResultToString result = case Pulsar.renderResult result of + Just r -> show r + Nothing -> (show . Pulsar.unRawResult) result + +pulsarClientLogger :: String -> Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () +pulsarClientLogger provenance logger level file line message = + Log.log logger (toLogLevel level) $ + Log.msg message + . Log.field "file" file + . Log.field "line" (show line) + . Log.field "provenance" provenance + where + toLogLevel :: Pulsar.LogLevel -> Log.Level + toLogLevel 0 = Log.Debug + toLogLevel 1 = Log.Info + toLogLevel 2 = Log.Warn + toLogLevel 3 = Log.Error + toLogLevel n = error ("Unknown Pulsar log level" <> show n) + +logPulsarResult :: String -> Log.Logger -> Pulsar.RawResult -> IO () +logPulsarResult provenance logger result = + Log.debug logger $ + Log.msg message + . Log.field "provenance" provenance + where + message = + "result: " <> pulsarResultToString result pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = @@ -217,13 +233,13 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = where publishSyncMessage :: UserId -> ByteString -> IO () publishSyncMessage userId message = - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage")}) e.pulsarUrl $ do + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage" e.logg)}) e.pulsarUrl $ do let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey userId) - Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer") $ do + Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer" e.logg) $ do result <- runResourceT $ do (_, message') <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ message} lift $ Pulsar.sendMessage message' - void . pure $ logPulsarResult "consumeWebsocket" result + liftIO $ logPulsarResult "consumeWebsocket" e.logg result pure () logClient = From a7e006ebfe35c9409ba397273406b89f9f31ea47 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 19:00:19 +0100 Subject: [PATCH 41/51] Extract common Pulsar logging helpers --- libs/extended/default.nix | 2 + libs/extended/extended.cabal | 2 + libs/extended/src/Pulsar/Client/Logging.hs | 43 +++++++++++++++++++ .../cannon/src/Cannon/PulsarConsumerApp.hs | 42 ++---------------- services/gundeck/src/Gundeck/API/Internal.hs | 5 +-- services/gundeck/src/Gundeck/Client.hs | 43 ++++++------------- services/gundeck/src/Gundeck/Push.hs | 41 ++---------------- 7 files changed, 68 insertions(+), 110 deletions(-) create mode 100644 libs/extended/src/Pulsar/Client/Logging.hs diff --git a/libs/extended/default.nix b/libs/extended/default.nix index 4090a02a77..fc2c68d910 100644 --- a/libs/extended/default.nix +++ b/libs/extended/default.nix @@ -27,6 +27,7 @@ , metrics-wai , monad-control , prometheus-client +, pulsar-client-hs , retry , servant , servant-client @@ -70,6 +71,7 @@ mkDerivation { metrics-wai monad-control prometheus-client + pulsar-client-hs retry servant servant-client diff --git a/libs/extended/extended.cabal b/libs/extended/extended.cabal index a771fe1590..2821479d2c 100644 --- a/libs/extended/extended.cabal +++ b/libs/extended/extended.cabal @@ -23,6 +23,7 @@ library Hasql.Pool.Extended Network.AMQP.Extended Network.RabbitMqAdmin + Pulsar.Client.Logging Servant.API.Extended Servant.API.Extended.Endpath System.Logger.Extended @@ -98,6 +99,7 @@ library , metrics-wai , monad-control , prometheus-client + , pulsar-client-hs , retry , servant , servant-client diff --git a/libs/extended/src/Pulsar/Client/Logging.hs b/libs/extended/src/Pulsar/Client/Logging.hs new file mode 100644 index 0000000000..d333a24bfd --- /dev/null +++ b/libs/extended/src/Pulsar/Client/Logging.hs @@ -0,0 +1,43 @@ +module Pulsar.Client.Logging where + +import Imports +import Pulsar.Client qualified as Pulsar +import System.Logger qualified as Log + +onPulsarError :: (MonadIO m) => String -> Log.Logger -> Pulsar.RawResult -> m () +onPulsarError provenance logger result = + Log.err logger $ + Log.msg message + . Log.field "provenance" provenance + where + message = + "error: " <> pulsarResultToString result + +pulsarResultToString :: Pulsar.RawResult -> String +pulsarResultToString result = case Pulsar.renderResult result of + Just r -> show r + Nothing -> (show . Pulsar.unRawResult) result + +pulsarClientLogger :: (MonadIO m) => String -> Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> m () +pulsarClientLogger provenance logger level file line message = + Log.log logger (toLogLevel level) $ + Log.msg message + . Log.field "file" file + . Log.field "line" (show line) + . Log.field "provenance" provenance + where + toLogLevel :: Pulsar.LogLevel -> Log.Level + toLogLevel 0 = Log.Debug + toLogLevel 1 = Log.Info + toLogLevel 2 = Log.Warn + toLogLevel 3 = Log.Error + toLogLevel n = error ("Unknown Pulsar log level" <> show n) + +logPulsarResult :: (MonadIO m) => String -> Log.Logger -> Pulsar.RawResult -> m () +logPulsarResult provenance logger result = + Log.debug logger $ + Log.msg message + . Log.field "provenance" provenance + where + message = + "result: " <> pulsarResultToString result diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 776d08bc8f..dbaa58d971 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -31,6 +31,7 @@ import Network.WebSockets import Network.WebSockets qualified as WS import Network.WebSockets.Connection import Pulsar.Client qualified as Pulsar +import Pulsar.Client.Logging import System.Logger qualified as Log import System.Timeout import UnliftIO qualified @@ -125,7 +126,8 @@ createPulsarChannel uid mCid env = do PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask traceM $ "acknowledgeMsgs" - void $ logPulsarResult "createPulsarChannel - acknowledge message result: " env.logg <$> (Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId) + result <- liftIO $ Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId + liftIO $ logPulsarResult "createPulsarChannel - acknowledge message result: " env.logg result liftIO $ decCounter unackedMsgsCounter rejectMsgs :: (UnliftIO.MonadUnliftIO m) => Chan PulsarMsgId -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) @@ -147,44 +149,6 @@ createPulsarChannel uid mCid env = do v <- readTVar tv STM.check (v < threshold) -- blocks (retry) until v < threshold -onPulsarError :: String -> Log.Logger -> Pulsar.RawResult -> IO () -onPulsarError provenance logger result = - Log.err logger $ - Log.msg message - . Log.field "provenance" provenance - where - message = - "error: " <> pulsarResultToString result - -pulsarResultToString :: Pulsar.RawResult -> String -pulsarResultToString result = case Pulsar.renderResult result of - Just r -> show r - Nothing -> (show . Pulsar.unRawResult) result - -pulsarClientLogger :: String -> Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () -pulsarClientLogger provenance logger level file line message = - Log.log logger (toLogLevel level) $ - Log.msg message - . Log.field "file" file - . Log.field "line" (show line) - . Log.field "provenance" provenance - where - toLogLevel :: Pulsar.LogLevel -> Log.Level - toLogLevel 0 = Log.Debug - toLogLevel 1 = Log.Info - toLogLevel 2 = Log.Warn - toLogLevel 3 = Log.Error - toLogLevel n = error ("Unknown Pulsar log level" <> show n) - -logPulsarResult :: String -> Log.Logger -> Pulsar.RawResult -> IO () -logPulsarResult provenance logger result = - Log.debug logger $ - Log.msg message - . Log.field "provenance" provenance - where - message = - "result: " <> pulsarResultToString result - pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = lowerCodensity $ diff --git a/services/gundeck/src/Gundeck/API/Internal.hs b/services/gundeck/src/Gundeck/API/Internal.hs index cfa76f4bd5..482a02b285 100644 --- a/services/gundeck/src/Gundeck/API/Internal.hs +++ b/services/gundeck/src/Gundeck/API/Internal.hs @@ -22,12 +22,10 @@ module Gundeck.API.Internal where import Cassandra qualified -import Cassandra.Options import Control.Lens (view) import Data.Id import Gundeck.Client import Gundeck.Client qualified as Client -import Gundeck.Env import Gundeck.Monad import Gundeck.Presence qualified as Presence import Gundeck.Push qualified as Push @@ -71,6 +69,5 @@ getPushTokensH uid = PushTok.PushTokenList <$> (view PushTok.addrPushToken <$$> registerConsumableNotificationsClient :: UserId -> ClientId -> Gundeck NoContent registerConsumableNotificationsClient uid cid = do - pulsarEndpoint :: Endpoint <- view pulsar - liftIO $ setupConsumableNotifications uid cid pulsarEndpoint + setupConsumableNotifications uid cid pure NoContent diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs index a3048d8c08..b988e3daa3 100644 --- a/services/gundeck/src/Gundeck/Client.hs +++ b/services/gundeck/src/Gundeck/Client.hs @@ -21,14 +21,16 @@ import Cassandra.Options import Control.Lens (view) import Data.Id import Data.Text qualified as T -import Debug.Trace +import Gundeck.Env import Gundeck.Monad import Gundeck.Notification.Data qualified as Notifications import Gundeck.Push.Data qualified as Push import Gundeck.Push.Native import Imports import Pulsar.Client qualified as Pulsar +import Pulsar.Client.Logging import Pulsar.Subscription qualified as Pulsar +import System.Logger qualified as Log import Wire.API.Notification unregister :: UserId -> ClientId -> Gundeck () @@ -48,14 +50,14 @@ removeUser user = do setupConsumableNotifications :: UserId -> ClientId -> - Endpoint -> - IO () -setupConsumableNotifications uid cid pulsarEndpoint = do - -- A hacky way to create a Pulsar subscription + Gundeck () +setupConsumableNotifications uid cid = do let subscription = "cannon-websocket-" ++ T.unpack (clientNotificationQueueName uid cid) subscriptionType = Pulsar.Earliest topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ T.unpack (userRoutingKey uid) - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications")}) (toPulsarUrl pulsarEndpoint) $ do + pulsarEndpoint :: Endpoint <- view pulsar + logger <- view applog + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications" logger)}) (toPulsarUrl pulsarEndpoint) $ do Pulsar.createSubscription ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, @@ -64,27 +66,8 @@ setupConsumableNotifications uid cid pulsarEndpoint = do ) subscription topic - (onPulsarError "setupConsumableNotifications consumer") - traceM $ "XXX - setupConsumableNotifications created subscription " <> show subscription <> " on topic " <> show topic - --- TODO: Replace Debug.Trace with regular logging -onPulsarError :: String -> Pulsar.RawResult -> IO () -onPulsarError provenance result = - traceM $ - provenance ++ case Pulsar.renderResult result of - Just r -> " error: " ++ (show r) - Nothing -> " error: " ++ (show (Pulsar.unRawResult result)) - --- TODO: Replace Debug.Trace with regular logging -pulsarClientLogger :: String -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () -pulsarClientLogger provenance level file line message = traceM $ provenance ++ " [" <> show level <> "] " <> file <> ":" <> show line <> ":" <> message - --- TODO: Replace Debug.Trace with regular logging -logPulsarResult :: String -> Pulsar.RawResult -> Pulsar.RawResult -logPulsarResult provenance result = - trace - ( provenance ++ case Pulsar.renderResult result of - Just r -> " result: " ++ (show r) - Nothing -> " result: " ++ (show (Pulsar.unRawResult result)) - ) - result + (onPulsarError "setupConsumableNotifications consumer" logger) + Log.debug logger $ + Log.msg @String "Subscription created" + . Log.field "topic" (show topic) + . Log.field "subscription" (show subscription) diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index b8f148d66f..50a698d334 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -89,7 +89,7 @@ import Network.AMQP qualified as Q import Network.HTTP.Types import Network.Wai.Utilities import Pulsar.Client qualified as Pulsar -import System.Logger qualified as Logger +import Pulsar.Client.Logging import System.Logger.Class (msg, val, (+++), (.=), (~~)) import System.Logger.Class qualified as Log import UnliftIO (pooledMapConcurrentlyN) @@ -147,48 +147,15 @@ publishToPulsar :: Text -> Q.Message -> Gundeck () publishToPulsar routingKey qMsg = do logger <- view applog pulsarEndpoint <- view Gundeck.Env.pulsar - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (internalLogger logger)}) (toPulsarUrl pulsarEndpoint) $ - Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName logPulsarError $ do + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishToPulsar" logger)}) (toPulsarUrl pulsarEndpoint) $ + Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName (onPulsarError "publishToPulsar" logger) $ do result <- runResourceT $ do (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ BS.toStrict (A.encode pulsarMessage)} lift $ Pulsar.sendMessage message - lift $ logPulsarResult result + logPulsarResult "publishToPulsar" logger result where topicName = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ Text.unpack routingKey - logPulsarError :: Pulsar.RawResult -> Gundeck () - logPulsarError result = - case Pulsar.renderResult result of - Just r -> Log.err $ Log.msg errorMsg . Log.field "error" (show r) - Nothing -> Log.err $ Log.msg errorMsg . Log.field "error" (show (Pulsar.unRawResult result)) - - errorMsg :: String - errorMsg = "Failed to create Pulsar producer." :: String - - logPulsarResult :: Pulsar.RawResult -> Gundeck () - logPulsarResult result = - case Pulsar.renderResult result of - Just r -> Log.err $ Log.msg resultMsg . Log.field "result" (show r) - Nothing -> Log.err $ Log.msg resultMsg . Log.field "result" (show (Pulsar.unRawResult result)) - - resultMsg :: String - resultMsg = "Result of sending Pulsar message." :: String - - -- TODO: Far from perfect, ignores log level and uses no fields - internalLogger :: Log.Logger -> Pulsar.LogLevel -> Pulsar.LogFile -> Pulsar.LogLine -> Pulsar.LogMessage -> IO () - internalLogger logger level file line message = - Logger.log logger (toLogLevel level) $ - Log.msg message - . Log.field "file" file - . Log.field "line" (show line) - where - toLogLevel :: Pulsar.LogLevel -> Log.Level - toLogLevel 0 = Log.Debug - toLogLevel 1 = Log.Info - toLogLevel 2 = Log.Warn - toLogLevel 3 = Log.Error - toLogLevel n = error ("Unknown Pulsar log level" <> show n) - pulsarMessage :: PulsarMessage pulsarMessage = PulsarMessage From aad4fe3f9293418dca1c19aa19d2d96c7249293c Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 19:00:36 +0100 Subject: [PATCH 42/51] Delete useless comment --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index dbaa58d971..1d87751cd5 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -115,7 +115,6 @@ createPulsarChannel uid mCid env = do liftIO $ writeChan msgChannel (PulsarMsgId msgId, content) liftIO $ incCounter unackedMsgsCounter traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content - -- void $ logPulsarResult "createPulsarChannel - acknowledge message result: " <$> Pulsar.acknowledgeMessage blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) blockOnCloseSignal = UnliftIO.async . readMVar From ebaa1bd8cc05bd3782f683a9be1b777049ec663d Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 19:19:37 +0100 Subject: [PATCH 43/51] Delete PulsarQueueInfo It served no purpose anymore... --- services/cannon/src/Cannon/PulsarConsumerApp.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 1d87751cd5..20f85e378e 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -54,11 +54,7 @@ data PulsarChannel = PulsarChannel newtype PulsarMsgId = PulsarMsgId {unPulsarMsgId :: ByteString} -newtype PulsarQueueInfo = PulsarQueueInfo - {subscription :: Text} - deriving (Show) - -createPulsarChannel :: UserId -> Maybe ClientId -> Env -> Codensity IO (PulsarChannel, PulsarQueueInfo) +createPulsarChannel :: UserId -> Maybe ClientId -> Env -> Codensity IO PulsarChannel createPulsarChannel uid mCid env = do msgChannel :: Chan (PulsarMsgId, ByteString) <- lift newChan acknowledgeMessages :: Chan PulsarMsgId <- lift newChan @@ -152,8 +148,7 @@ pulsarWebSocketApp :: UserId -> Maybe ClientId -> Maybe Text -> Env -> ServerApp pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = lowerCodensity $ do - (chan, queueInfo) <- createPulsarChannel uid mcid e - traceM $ "XXX pulsarWebSocketApp " ++ show queueInfo + chan <- createPulsarChannel uid mcid e conn <- Codensity $ bracket openWebSocket closeWebSocket activity <- liftIO newEmptyMVar let wsConn = From d1694427047250fc6395fa51f64c833e7d000943 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 1 Dec 2025 19:19:53 +0100 Subject: [PATCH 44/51] Replace traceM with serious logging --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 60 ++++++++++++------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index 20f85e378e..d1a6f94d14 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -25,7 +25,6 @@ import Data.Id import Data.Text import Data.Text qualified as T import Data.Text.Encoding qualified as TE -import Debug.Trace import Imports hiding (min, threadDelay) import Network.WebSockets import Network.WebSockets qualified as WS @@ -69,10 +68,10 @@ createPulsarChannel uid mCid env = do Just _cid -> Pulsar.Earliest liftIO $ do - traceM $ "Connecting ..." + Log.debug env.logg $ + Log.msg (Log.val "Connecting Pulsar consumer") + . Log.field "topic" (show topic) void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel" env.logg)}) env.pulsarUrl $ do - let topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) - traceM $ "newConsumer " ++ show topic Pulsar.withConsumerNoUnsubscribe ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, @@ -83,34 +82,44 @@ createPulsarChannel uid mCid env = do topic (onPulsarError "createPulsarChannel consumer" env.logg) $ do - traceM $ "Ready" receiveMsgsAsync :: Async () <- receiveMsgs msgChannel unackedMsgsCounter blockOnCloseSignalAsync :: Async () <- blockOnCloseSignal closeSignal acknowledgeMsgsAsync <- acknowledgeMsgs acknowledgeMessages unackedMsgsCounter rejectMsgsAsync <- rejectMsgs rejectMessages unackedMsgsCounter + Log.info env.logg $ + Log.msg (Log.val "Consumer ready. Waiting for external input.") + . Log.field "topic" (show topic) void $ UnliftIO.waitAnyCancel [receiveMsgsAsync, blockOnCloseSignalAsync, acknowledgeMsgsAsync, rejectMsgsAsync] pure () - traceM "createPulsarChannel: Done" + Log.debug env.logg $ + Log.msg @String "createPulsarChannel: Done" + . Log.field "topic" (show topic) pure $ - ( PulsarChannel - { msgVar = msgChannel, - closeSignal = closeSignal, - acknowledgeMessages = acknowledgeMessages, - rejectMessages = rejectMessages - }, - PulsarQueueInfo subscription - ) + PulsarChannel + { msgVar = msgChannel, + closeSignal = closeSignal, + acknowledgeMessages = acknowledgeMessages, + rejectMessages = rejectMessages + } where + topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey uid) + receiveMsgs :: (UnliftIO.MonadUnliftIO m) => Chan (PulsarMsgId, ByteString) -> TVar Int -> ReaderT Pulsar.Consumer m (Async ()) receiveMsgs msgChannel unackedMsgsCounter = UnliftIO.async . forever $ do liftIO $ waitUntilCounterBelow unackedMsgsCounter 500 Pulsar.receiveMessage (liftIO . onPulsarError "receiveMessage" env.logg) $ do content <- Pulsar.messageContent - traceM $ "XXX - received message with content " ++ BSUTF8.toString content + Log.debug env.logg $ + Log.msg @String "received message with content" + . Log.field "content" (BSUTF8.toString content) + . Log.field "topic" (show topic) msgId <- Pulsar.messageId Pulsar.messageIdSerialize liftIO $ writeChan msgChannel (PulsarMsgId msgId, content) liftIO $ incCounter unackedMsgsCounter - traceM $ "XXX - wrote message to channel:" ++ BSUTF8.toString content + Log.debug env.logg $ + Log.msg @String "wrote message to channel" + . Log.field "content" (BSUTF8.toString content) + . Log.field "topic" (show topic) blockOnCloseSignal :: (UnliftIO.MonadUnliftIO m) => MVar () -> m (Async ()) blockOnCloseSignal = UnliftIO.async . readMVar @@ -120,7 +129,9 @@ createPulsarChannel uid mCid env = do UnliftIO.async . forever $ do PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask - traceM $ "acknowledgeMsgs" + Log.debug env.logg $ + Log.msg @String "acknowledgeMsgs" + . Log.field "topic" (show topic) result <- liftIO $ Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeMessageId liftIO $ logPulsarResult "createPulsarChannel - acknowledge message result: " env.logg result liftIO $ decCounter unackedMsgsCounter @@ -130,6 +141,9 @@ createPulsarChannel uid mCid env = do UnliftIO.async . forever $ do PulsarMsgId msgId <- UnliftIO.readChan chan consumer :: Pulsar.Consumer <- ask + Log.debug env.logg $ + Log.msg @String "rejectMsgs" + . Log.field "topic" (show topic) Pulsar.withDeserializedMessageId consumer msgId Pulsar.acknowledgeNegativeMessageId liftIO $ decCounter unackedMsgsCounter @@ -215,9 +229,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = getEventData :: PulsarChannel -> IO (Either EventData SynchronizationData) getEventData chan = do - traceM $ "getEventData called" (msgId, msg) <- readChan chan.msgVar - traceM $ "getEventData received message" <> show (toString msg) decMsg :: PulsarMessage <- either (\err -> logParseError err >> error "Unexpected parse error") pure $ A.eitherDecode (BS.fromStrict msg) case decMsg.msgType of Just "synchronization" -> do @@ -304,14 +316,18 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = sendNotifications :: PulsarChannel -> WSConnection -> IO () sendNotifications chan wsConn = do - traceM $ "XXX - sendNotifications called " + Log.debug e.logg $ + Log.msg (Log.val "sendNotifications called ") let consumeRabbitMq = forever $ do - traceM $ "XXX - sendNotifications consumeRabbitMq called " + Log.debug e.logg $ + Log.msg (Log.val "sendNotifications consumeRabbitMq called ") eventData <- getEventData chan let msg = case eventData of Left event -> EventMessage event Right sync -> EventSyncMessage sync - traceM $ "XXX - sendNotifications sending ... " <> show msg + Log.debug e.logg $ + Log.msg @String "sendNotifications sending" + . Log.field "messahe" (show msg) catch (WS.sendBinaryData wsConn.inner (encode msg)) $ \(err :: SomeException) -> do logSendFailure err From 6527dcba21b29aa01ad93d4db699742411fb72de Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Tue, 2 Dec 2025 07:57:52 +0100 Subject: [PATCH 45/51] Fix warning about List1 --- services/gundeck/test/unit/MockGundeck.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/gundeck/test/unit/MockGundeck.hs b/services/gundeck/test/unit/MockGundeck.hs index baa323024e..e9c56580d1 100644 --- a/services/gundeck/test/unit/MockGundeck.hs +++ b/services/gundeck/test/unit/MockGundeck.hs @@ -663,7 +663,7 @@ mockPushPulsar exchange message = do case Aeson.eitherDecode message.msgBody of Left e -> error $ "Invalid message body: " <> e Right (queuedNotif :: QueuedNotification) -> - msPulsarQueue %= deliver exchange (List1 (queuedNotif ^. queuedNotificationPayload)) + msPulsarQueue %= deliver exchange (queuedNotif ^. queuedNotificationPayload) mockLookupAddresses :: (HasCallStack, m ~ MockGundeck) => From 6c4c031b38a2ed6ce3b3e13cfa6005eee47e1426 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Thu, 4 Dec 2025 18:51:53 +0100 Subject: [PATCH 46/51] registerConsumableNotificationsClient: Log all exceptions --- services/gundeck/src/Gundeck/API/Internal.hs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/services/gundeck/src/Gundeck/API/Internal.hs b/services/gundeck/src/Gundeck/API/Internal.hs index 482a02b285..3a311acf02 100644 --- a/services/gundeck/src/Gundeck/API/Internal.hs +++ b/services/gundeck/src/Gundeck/API/Internal.hs @@ -22,7 +22,9 @@ module Gundeck.API.Internal where import Cassandra qualified +import Control.Exception import Control.Lens (view) +import Control.Monad.Catch import Data.Id import Gundeck.Client import Gundeck.Client qualified as Client @@ -33,6 +35,7 @@ import Gundeck.Push.Data qualified as PushTok import Gundeck.Push.Native.Types qualified as PushTok import Imports import Servant +import System.Logger.Class import Wire.API.Push.Token qualified as PushTok import Wire.API.Push.V2 import Wire.API.Routes.Internal.Gundeck @@ -68,6 +71,14 @@ getPushTokensH :: UserId -> Gundeck PushTok.PushTokenList getPushTokensH uid = PushTok.PushTokenList <$> (view PushTok.addrPushToken <$$> PushTok.lookup uid Cassandra.All) registerConsumableNotificationsClient :: UserId -> ClientId -> Gundeck NoContent -registerConsumableNotificationsClient uid cid = do - setupConsumableNotifications uid cid - pure NoContent +registerConsumableNotificationsClient uid cid = + -- TODO: This error handling is crazy: However, if there is any exception we want to see during this debug phase. + Control.Monad.Catch.catch + ( do + setupConsumableNotifications uid cid + pure NoContent + ) + handler + where + handler :: SomeException -> Gundeck NoContent + handler e = System.Logger.Class.log Error (msg $ displayException e) >> pure NoContent From f590c30af7255aade22704b52383de8677865879 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 5 Dec 2025 11:38:12 +0100 Subject: [PATCH 47/51] Hi CI From affb323c4d377b5094ee837d78e4baeb062b3c19 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Fri, 5 Dec 2025 16:09:01 +0100 Subject: [PATCH 48/51] use Admin API to create subscriptions for clients I suspect that the protobuf variant leads to crashes. Let's see if the Admin REST API works better. --- charts/gundeck/templates/configmap.yaml | 6 +++ charts/gundeck/values.yaml | 4 ++ libs/cassandra-util/src/Cassandra/Options.hs | 3 ++ services/gundeck/default.nix | 4 ++ services/gundeck/gundeck.cabal | 2 + services/gundeck/gundeck.integration.yaml | 4 ++ services/gundeck/src/Gundeck/Client.hs | 48 ++++++++++++-------- services/gundeck/src/Gundeck/Env.hs | 5 +- services/gundeck/src/Gundeck/Options.hs | 1 + 9 files changed, 55 insertions(+), 22 deletions(-) diff --git a/charts/gundeck/templates/configmap.yaml b/charts/gundeck/templates/configmap.yaml index 89d9049509..b1c19ebfcd 100644 --- a/charts/gundeck/templates/configmap.yaml +++ b/charts/gundeck/templates/configmap.yaml @@ -47,6 +47,12 @@ data: port: {{ .port }} {{- end }} + {{- with .pulsarAdmin }} + pulsarAdmin: + host: {{ .host }} + port: {{ .port }} + {{- end }} + redis: host: {{ .redis.host }} port: {{ .redis.port }} diff --git a/charts/gundeck/values.yaml b/charts/gundeck/values.yaml index 04c0c1974a..d79857d7c4 100644 --- a/charts/gundeck/values.yaml +++ b/charts/gundeck/values.yaml @@ -54,6 +54,10 @@ config: host: localhost port: 6650 + pulsarAdmin: + host: localhost + port: 8080 + # To enable additional writes during a migration: # redisAdditionalWrite: # host: redis-two diff --git a/libs/cassandra-util/src/Cassandra/Options.hs b/libs/cassandra-util/src/Cassandra/Options.hs index e4be064b65..04ac0c2a37 100644 --- a/libs/cassandra-util/src/Cassandra/Options.hs +++ b/libs/cassandra-util/src/Cassandra/Options.hs @@ -51,3 +51,6 @@ deriveFromJSON defaultOptions ''CassandraOpts toPulsarUrl :: Endpoint -> String toPulsarUrl e = "pulsar://" <> T.unpack e.host <> ":" <> show e.port + +toPulsarAdminUrl :: Endpoint -> String +toPulsarAdminUrl e = "https://" <> T.unpack e.host <> ":" <> show e.port diff --git a/services/gundeck/default.nix b/services/gundeck/default.nix index 2da6e313ca..ce7025a2f6 100644 --- a/services/gundeck/default.nix +++ b/services/gundeck/default.nix @@ -52,6 +52,7 @@ , optparse-applicative , prometheus-client , psqueues +, pulsar-admin , pulsar-client-hs , QuickCheck , quickcheck-instances @@ -64,6 +65,7 @@ , safe-exceptions , scientific , servant +, servant-client , servant-server , string-conversions , tagged @@ -134,12 +136,14 @@ mkDerivation { network-uri prometheus-client psqueues + pulsar-admin pulsar-client-hs raw-strings-qq resourcet retry safe-exceptions servant + servant-client servant-server text these diff --git a/services/gundeck/gundeck.cabal b/services/gundeck/gundeck.cabal index 3cd14ea3c9..0c675c9295 100644 --- a/services/gundeck/gundeck.cabal +++ b/services/gundeck/gundeck.cabal @@ -149,12 +149,14 @@ library , network-uri >=2.6 , prometheus-client , psqueues >=0.2.2 + , pulsar-admin , pulsar-client-hs , raw-strings-qq , resourcet >=1.1 , retry >=0.5 , safe-exceptions , servant + , servant-client , servant-server , text >=1.1 , these diff --git a/services/gundeck/gundeck.integration.yaml b/services/gundeck/gundeck.integration.yaml index 4b376d721d..df39731dba 100644 --- a/services/gundeck/gundeck.integration.yaml +++ b/services/gundeck/gundeck.integration.yaml @@ -46,6 +46,10 @@ pulsar: host: localhost port: 6650 +pulsarAdmin: + host: localhost + port: 5080 + settings: httpPoolSize: 1024 notificationTTL: 24192200 diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs index b988e3daa3..ab124020bf 100644 --- a/services/gundeck/src/Gundeck/Client.hs +++ b/services/gundeck/src/Gundeck/Client.hs @@ -20,16 +20,14 @@ module Gundeck.Client where import Cassandra.Options import Control.Lens (view) import Data.Id -import Data.Text qualified as T import Gundeck.Env import Gundeck.Monad import Gundeck.Notification.Data qualified as Notifications import Gundeck.Push.Data qualified as Push import Gundeck.Push.Native import Imports -import Pulsar.Client qualified as Pulsar -import Pulsar.Client.Logging -import Pulsar.Subscription qualified as Pulsar +import Pulsar.Admin +import Servant.Client import System.Logger qualified as Log import Wire.API.Notification @@ -52,22 +50,32 @@ setupConsumableNotifications :: ClientId -> Gundeck () setupConsumableNotifications uid cid = do - let subscription = "cannon-websocket-" ++ T.unpack (clientNotificationQueueName uid cid) - subscriptionType = Pulsar.Earliest - topic = Pulsar.Topic . Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ T.unpack (userRoutingKey uid) - pulsarEndpoint :: Endpoint <- view pulsar - logger <- view applog - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "setupConsumableNotifications" logger)}) (toPulsarUrl pulsarEndpoint) $ do - Pulsar.createSubscription - ( Pulsar.defaultConsumerConfiguration - { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, - Pulsar.consumerSubscriptionInitialPosition = Just subscriptionType + let -- Rebuilding `latest` here. See https://github.com/apache/pulsar/blob/master/pulsar-client/src/main/java/org/apache/pulsar/client/impl/ResetCursorData.java#L58 + resetCursorCfg = + ResetCursorData + { resetCursorDataBatchIndex = Just (-1), + resetCursorDataEntryId = Just $ fromIntegral (maxBound :: Int64), + resetCursorDataExcluded = Nothing, + resetCursorDataLedgerId = Just $ fromIntegral (maxBound :: Int64), + resetCursorDataPartitionIndex = Just (-1), + resetCursorDataProperties = Nothing + } + cfg = + PersistentTopicsCreateSubscriptionParameters + { persistentTopicsCreateSubscriptionTenant = "wire", + persistentTopicsCreateSubscriptionNamespace = "user-notifications", + persistentTopicsCreateSubscriptionTopic = userRoutingKey uid, + persistentTopicsCreateSubscriptionSubscriptionName = ("cannon-websocket-" :: Text) <> clientNotificationQueueName uid cid, + persistentTopicsCreateSubscriptionAuthoritative = Nothing, + persistentTopicsCreateSubscriptionReplicated = Nothing, + persistentTopicsCreateSubscriptionMessageId = resetCursorCfg } - ) - subscription - topic - (onPulsarError "setupConsumableNotifications consumer" logger) + httpManager <- view Gundeck.Monad.manager + pulsarAdminUrlString <- toPulsarAdminUrl <$> view pulsarAdmin + pulsarAdminUrl <- parseBaseUrl pulsarAdminUrlString + liftIO . void $ flip runClientM (mkClientEnv httpManager pulsarAdminUrl) $ persistentTopicsCreateSubscription cfg + logger <- view applog Log.debug logger $ Log.msg @String "Subscription created" - . Log.field "topic" (show topic) - . Log.field "subscription" (show subscription) + . Log.field "topic" (show cfg.persistentTopicsCreateSubscriptionTopic) + . Log.field "subscription" (show cfg.persistentTopicsCreateSubscriptionSubscriptionName) diff --git a/services/gundeck/src/Gundeck/Env.hs b/services/gundeck/src/Gundeck/Env.hs index 3a6cb6cd49..2cd4a031f7 100644 --- a/services/gundeck/src/Gundeck/Env.hs +++ b/services/gundeck/src/Gundeck/Env.hs @@ -63,7 +63,8 @@ data Env = Env _time :: !(IO Milliseconds), _threadBudgetState :: !(Maybe ThreadBudgetState), _rabbitMqChannel :: MVar Channel, - _pulsar :: Endpoint + _pulsar :: Endpoint, + _pulsarAdmin :: Endpoint } makeLenses ''Env @@ -107,7 +108,7 @@ createEnv o = do } mtbs <- mkThreadBudgetState `mapM` (o ^. settings . maxConcurrentNativePushes) rabbitMqChannelMVar <- Q.mkRabbitMqChannelMVar l (Just "gundeck") (o ^. rabbitmq) - pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar (o ^. Opt.pulsar) + pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar (o ^. Opt.pulsar) (o ^. Opt.pulsarAdmin) reqIdMsg :: RequestId -> Logger.Msg -> Logger.Msg reqIdMsg = ("request" Logger..=) . unRequestId diff --git a/services/gundeck/src/Gundeck/Options.hs b/services/gundeck/src/Gundeck/Options.hs index 1043c153ec..fb33151db6 100644 --- a/services/gundeck/src/Gundeck/Options.hs +++ b/services/gundeck/src/Gundeck/Options.hs @@ -140,6 +140,7 @@ data Opts = Opts _discoUrl :: !(Maybe Text), _settings :: !Settings, _pulsar :: !Endpoint, + _pulsarAdmin :: !Endpoint, -- Logging -- | Log level (Debug, Info, etc) From fdf55c455534db6d500d05ff02e95c782f345f5d Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Mon, 8 Dec 2025 15:22:34 +0100 Subject: [PATCH 49/51] WIP: Add notification benchmark test Juggling two branches makes things just more difficult in the long run... --- Makefile | 3 +- integration/integration.cabal | 2 + .../test/Test/NotificationsBenchmark.hs | 139 ++++++++++++++++++ integration/test/Testlib/Env.hs | 16 +- integration/test/Testlib/Options.hs | 11 +- integration/test/Testlib/Run.hs | 9 +- integration/test/Testlib/RunServices.hs | 4 +- integration/test/Testlib/Types.hs | 22 ++- services/integration.yaml | 4 + 9 files changed, 195 insertions(+), 15 deletions(-) create mode 100644 integration/test/Test/NotificationsBenchmark.hs diff --git a/Makefile b/Makefile index ecf4201345..d451a3b0b0 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,8 @@ fake-aws fake-aws-s3 fake-aws-sqs aws-ingress fluent-bit kibana backoffice \ calling-test demo-smtp elasticsearch-curator elasticsearch-external \ elasticsearch-ephemeral minio-external cassandra-external \ ingress-nginx-controller nginx-ingress-services reaper restund \ -k8ssandra-test-cluster ldap-scim-bridge wire-server-enterprise +k8ssandra-test-cluster ldap-scim-bridge wire-server-enterprise \ +integration KIND_CLUSTER_NAME := wire-server HELM_PARALLELISM ?= 1 # 1 for sequential tests; 6 for all-parallel tests # (run `psql -h localhost -p 5432 -d backendA -U wire-server -w` for the list of options for PSQL_DB) diff --git a/integration/integration.cabal b/integration/integration.cabal index 0cdf6972ef..70386331cf 100644 --- a/integration/integration.cabal +++ b/integration/integration.cabal @@ -184,6 +184,7 @@ library Test.MLS.Unreachable Test.NginxZAuthModule Test.Notifications + Test.NotificationsBenchmark Test.OAuth Test.PasswordReset Test.Presence @@ -299,6 +300,7 @@ library , split , stm , streaming-commons + , streamly , string-conversions , system-linux-proc , tagged diff --git a/integration/test/Test/NotificationsBenchmark.hs b/integration/test/Test/NotificationsBenchmark.hs new file mode 100644 index 0000000000..479e0564d7 --- /dev/null +++ b/integration/test/Test/NotificationsBenchmark.hs @@ -0,0 +1,139 @@ +module Test.NotificationsBenchmark where + +import API.Brig +import API.BrigCommon +import API.Common +import API.GundeckInternal +import Control.Concurrent +import Control.Monad.Codensity (Codensity (..)) +import Control.Monad.Reader (asks) +import Control.Monad.Reader.Class (local) +import Control.Retry +import qualified Data.Map.Strict as Map +import Data.String.Conversions (cs) +import Data.Time +import GHC.Conc (numCapabilities) +import GHC.Stack +import SetupHelpers +import qualified Streamly.Data.Fold.Prelude as Fold +import qualified Streamly.Data.Stream.Prelude as Stream +import System.Random +import qualified Test.Events as TestEvents +import Testlib.Prekeys +import Testlib.Prelude + +data TestRecipient = TestRecipient + { user :: Value, + clientIds :: [String] + } + deriving (Show) + +testBench :: (HasCallStack) => App () +testBench = do + shardingGroupCount <- asks (.shardingGroupCount) + shardingGroup <- asks (.shardingGroup) + maxUserNo <- asks (.maxUserNo) + + -- Preparation + let parCfg = Stream.maxThreads (numCapabilities * 2) . Stream.ordered False + toMap = Fold.foldl' (\kv (k, v) -> Map.insert k v kv) Map.empty + -- Later, we only read from this map. Thus, it doesn't have to be thread-safe. + userMap :: Map Word TestRecipient <- + Stream.fromList [0 :: Word .. maxUserNo] + & Stream.filter ((shardingGroup ==) . (`mod` shardingGroupCount)) + & Stream.parMapM parCfg (\i -> generateTestRecipient >>= \r -> pure (i, r)) + & Stream.fold toMap + + now <- liftIO getCurrentTime + + -- TODO: To be replaced with real data from the file. (See + -- https://wearezeta.atlassian.net/wiki/spaces/PET/pages/2118680620/Simulating+production-like+data) + let fakeData = zip (plusDelta now <$> [0 :: Word ..]) (cycle [0 .. maxUserNo]) + + Stream.fromList fakeData + & Stream.filter (\(_t, uNo) -> (uNo `mod` shardingGroupCount) == shardingGroup) + & Stream.parMapM parCfg (\(t, uNo) -> waitForTimeStamp t >> sendAndReceive uNo userMap) + & Stream.fold Fold.drain + +waitForTimeStamp :: UTCTime -> App () +waitForTimeStamp timestamp = liftIO $ do + now <- getCurrentTime + when (now < timestamp) + $ + -- Event comes from the simulated future: Wait here until now and timestamp are aligned. + let delta = diffTimeToMicroSeconds $ diffUTCTime timestamp now + in print ("Waiting " ++ show delta ++ " microseconds. (timestamp, now)" ++ show (timestamp, now)) + >> threadDelay delta + where + diffTimeToMicroSeconds :: NominalDiffTime -> Int + diffTimeToMicroSeconds dt = floor @Double (realToFrac dt * 1_000_000) + +plusDelta :: UTCTime -> Word -> UTCTime +plusDelta timestamp deltaMilliSeconds = addUTCTime (fromIntegral deltaMilliSeconds / 1000) timestamp + +sendAndReceive :: Word -> Map Word TestRecipient -> App () +sendAndReceive userNo userMap = do + print $ "pushing to user" ++ show userNo + let testRecipient = userMap Map.! (fromIntegral userNo) + alice = testRecipient.user + + r <- recipient alice + payload :: Value <- toJSON <$> liftIO randomPayload + now <- liftIO $ getCurrentTime + let push = + object + [ "recipients" .= [r], + "payload" + .= [ object + [ "foo" .= payload, + "sent_at" .= now + ] + ] + ] + + void $ postPush alice [push] >>= getBody 200 + + messageDeliveryTimeout <- asks $ fromIntegral . (.maxDeliveryDelay) + forM_ (testRecipient.clientIds) $ \(cid :: String) -> + runCodensity (TestEvents.createEventsWebSocket alice (Just cid)) $ \ws -> do + -- TODO: Tweak this value to the least acceptable event delivery duration + local (setTimeoutTo messageDeliveryTimeout) $ TestEvents.assertFindsEvent ws $ \e -> do + receivedAt <- liftIO getCurrentTime + sentAt :: UTCTime <- (e %. "payload.sent_at" >>= asByteString) <&> fromJust . decode . cs + print $ "Message sent/receive delta: " ++ show (diffUTCTime receivedAt sentAt) + + e %. "payload" `shouldMatch` [object ["foo" .= payload]] + where + -- \| Generate a random string with random length up to 2048 bytes + randomPayload :: IO String + randomPayload = + -- Measured with + -- `kubectl exec --namespace databases -it gundeck-gundeck-eks-eu-west-1a-sts-0 -- sh -c 'cqlsh -e "select blobAsText(payload) from gundeck.notifications LIMIT 5000;" ' | sed 's/^[ \t]*//;s/[ \t]*$//' | wc` + let len :: Int = 884 -- measured in prod + in mapM (\_ -> randomRIO ('\32', '\126')) [1 .. len] -- printable ASCII + +setTimeoutTo :: Int -> Env -> Env +setTimeoutTo tSecs env = env {timeOutSeconds = tSecs} + +generateTestRecipient :: (HasCallStack) => App TestRecipient +generateTestRecipient = do + print "generateTestRecipient" + user <- recover $ (randomUser OwnDomain def) + r <- randomRIO @Word (1, 8) + clientIds <- forM [0 .. r] $ \_ -> do + client <- + recover + $ addClient + user + def + { acapabilities = Just ["consumable-notifications"], + prekeys = Just $ take 10 somePrekeysRendered, + lastPrekey = Just $ head someLastPrekeysRendered + } + >>= getJSON 201 + objId client + + pure $ TestRecipient user clientIds + where + recover :: App a -> App a + recover = recoverAll (limitRetriesByCumulativeDelay 300 (exponentialBackoff 1_000_000)) . const diff --git a/integration/test/Testlib/Env.hs b/integration/test/Testlib/Env.hs index 67a90228ad..1980425272 100644 --- a/integration/test/Testlib/Env.hs +++ b/integration/test/Testlib/Env.hs @@ -62,8 +62,8 @@ serviceHostPort m Stern = m.stern serviceHostPort m FederatorInternal = m.federatorInternal serviceHostPort m WireServerEnterprise = m.wireServerEnterprise -mkGlobalEnv :: FilePath -> Codensity IO GlobalEnv -mkGlobalEnv cfgFile = do +mkGlobalEnv :: FilePath -> Word -> Codensity IO GlobalEnv +mkGlobalEnv cfgFile shardingGroup = do eith <- liftIO $ Yaml.decodeFileEither cfgFile intConfig <- liftIO $ case eith of Left err -> do @@ -145,7 +145,11 @@ mkGlobalEnv cfgFile = do gDNSMockServerConfig = intConfig.dnsMockServer, gCellsEventQueue = intConfig.cellsEventQueue, gCellsEventWatchersLock, - gCellsEventWatchers + gCellsEventWatchers, + gShardingGroupCount = intConfig.shardingGroupCount, + gShardingGroup = shardingGroup, + gMaxUserNo = intConfig.maxUserNo, + gMaxDeliveryDelay = intConfig.maxDeliveryDelay } where createSSLContext :: Maybe FilePath -> IO (Maybe OpenSSL.SSLContext) @@ -201,7 +205,11 @@ mkEnv currentTestName ge = do dnsMockServerConfig = ge.gDNSMockServerConfig, cellsEventQueue = ge.gCellsEventQueue, cellsEventWatchersLock = ge.gCellsEventWatchersLock, - cellsEventWatchers = ge.gCellsEventWatchers + cellsEventWatchers = ge.gCellsEventWatchers, + shardingGroupCount = ge.gShardingGroupCount, + shardingGroup = ge.gShardingGroup, + maxUserNo = ge.gMaxUserNo, + maxDeliveryDelay = ge.gMaxDeliveryDelay } allCiphersuites :: [Ciphersuite] diff --git a/integration/test/Testlib/Options.hs b/integration/test/Testlib/Options.hs index f109e13d8f..7e9bfc5b47 100644 --- a/integration/test/Testlib/Options.hs +++ b/integration/test/Testlib/Options.hs @@ -27,7 +27,9 @@ data TestOptions = TestOptions excludeTests :: [String], listTests :: Bool, xmlReport :: Maybe FilePath, - configFile :: String + configFile :: String, + shardingGroup :: Word + } parser :: Parser TestOptions @@ -64,6 +66,13 @@ parser = <> help "Use configuration FILE" <> value "services/integration.yaml" ) + <*> option + auto + ( long "sharding-group" + <> short 's' + <> help "The sharding group of this instance" + <> value 0 + ) optInfo :: ParserInfo TestOptions optInfo = diff --git a/integration/test/Testlib/Run.hs b/integration/test/Testlib/Run.hs index 1ae1ddf06d..7447230f44 100644 --- a/integration/test/Testlib/Run.hs +++ b/integration/test/Testlib/Run.hs @@ -123,6 +123,7 @@ main = do opts <- getOptions let f = testFilter opts cfg = opts.configFile + shardingGroup = opts.shardingGroup allTests <- mkAllTests let tests = @@ -132,10 +133,10 @@ main = do let qualifiedName = fromMaybe module_ (stripPrefix "Test." module_) <> "." <> name in (qualifiedName, summary, full, action) - if opts.listTests then doListTests tests else runTests tests opts.xmlReport cfg + if opts.listTests then doListTests tests else runTests tests opts.xmlReport cfg shardingGroup -runTests :: [(String, x, y, App ())] -> Maybe FilePath -> FilePath -> IO () -runTests tests mXMLOutput cfg = do +runTests :: [(String, x, y, App ())] -> Maybe FilePath -> FilePath -> Word -> IO () +runTests tests mXMLOutput cfg shardingGroup = do output <- newChan let displayOutput = readChan output >>= \case @@ -180,7 +181,7 @@ runTests tests mXMLOutput cfg = do where mkEnvs :: FilePath -> Codensity IO (GlobalEnv, Env) mkEnvs fp = do - g <- mkGlobalEnv fp + g <- mkGlobalEnv fp shardingGroup e <- mkEnv Nothing g pure (g, e) diff --git a/integration/test/Testlib/RunServices.hs b/integration/test/Testlib/RunServices.hs index 4a9f6403d4..5fbcabc2e2 100644 --- a/integration/test/Testlib/RunServices.hs +++ b/integration/test/Testlib/RunServices.hs @@ -88,8 +88,10 @@ main = do let cp = proc "sh" (["-c", "exec \"$@\"", "--"] <> opts.runSubprocess) (_, _, _, ph) <- createProcess cp exitWith =<< waitForProcess ph + -- The shardingGroup only matters for the testBench test; probably not here. + shardingGroup = 0 - runCodensity (mkGlobalEnv cfg >>= mkEnv Nothing) $ \env -> + runCodensity (mkGlobalEnv cfg shardingGroup >>= mkEnv Nothing) $ \env -> runAppWithEnv env $ lowerCodensity $ do diff --git a/integration/test/Testlib/Types.hs b/integration/test/Testlib/Types.hs index 29aa0b2b4b..99044af3c6 100644 --- a/integration/test/Testlib/Types.hs +++ b/integration/test/Testlib/Types.hs @@ -144,7 +144,11 @@ data GlobalEnv = GlobalEnv gDNSMockServerConfig :: DNSMockServerConfig, gCellsEventQueue :: String, gCellsEventWatchersLock :: MVar (), - gCellsEventWatchers :: IORef (Map String QueueWatcher) + gCellsEventWatchers :: IORef (Map String QueueWatcher), + gShardingGroupCount :: Word, + gShardingGroup :: Word, + gMaxUserNo :: Word, + gMaxDeliveryDelay :: Word } data IntegrationConfig = IntegrationConfig @@ -160,7 +164,10 @@ data IntegrationConfig = IntegrationConfig rabbitmqV1 :: RabbitMqAdminOpts, cassandra :: CassandraConfig, dnsMockServer :: DNSMockServerConfig, - cellsEventQueue :: String + cellsEventQueue :: String, + shardingGroupCount :: Word, + maxUserNo :: Word, + maxDeliveryDelay :: Word } deriving (Show, Generic) @@ -181,6 +188,9 @@ instance FromJSON IntegrationConfig where <*> o .: fromString "cassandra" <*> o .: fromString "dnsMockServer" <*> o .: fromString "cellsEventQueue" + <*> o .: fromString "shardingGroupCount" + <*> o .: fromString "maxUserNo" + <*> o .: fromString "maxDeliveryDelay" data ServiceMap = ServiceMap { brig :: HostPort, @@ -271,7 +281,11 @@ data Env = Env dnsMockServerConfig :: DNSMockServerConfig, cellsEventQueue :: String, cellsEventWatchersLock :: MVar (), - cellsEventWatchers :: IORef (Map String QueueWatcher) + cellsEventWatchers :: IORef (Map String QueueWatcher), + shardingGroupCount :: Word, + shardingGroup :: Word, + maxUserNo :: Word, + maxDeliveryDelay :: Word } data Response = Response @@ -446,7 +460,7 @@ hoistCodensity m = Codensity $ \k -> do getServiceMap :: (HasCallStack) => String -> App ServiceMap getServiceMap fedDomain = do env <- ask - assertJust ("Could not find service map for federation domain: " <> fedDomain) (Map.lookup fedDomain env.serviceMap) + assertJust ("Could not find service map for federation domain: " <> fedDomain <> " in " <> show (Map.keys env.serviceMap)) (Map.lookup fedDomain env.serviceMap) getMLSState :: App MLSState getMLSState = do diff --git a/services/integration.yaml b/services/integration.yaml index 427aa761d1..9ec920a603 100644 --- a/services/integration.yaml +++ b/services/integration.yaml @@ -330,3 +330,7 @@ integrationTestHostName: "localhost" additionalElasticSearch: https://localhost:9201 cellsEventQueue: cells_events + +shardingGroupCount: 1 +maxUserNo: 1000 +maxDeliveryDelay: 120 From cf311234e1cc0755e8bb9583e6adce7a6eb2f221 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 10 Dec 2025 09:03:25 +0100 Subject: [PATCH 50/51] Create one Pulsar client per app Consider pulsar-client-cpp to be (hopefully!) thread safe. To speed up further usage, create the Client once per app and re-use it whenever a client is needed. --- .../cannon/src/Cannon/PulsarConsumerApp.hs | 4 +- services/cannon/src/Cannon/Run.hs | 104 +++++++++--------- services/cannon/src/Cannon/Types.hs | 5 +- services/cannon/src/Cannon/WS.hs | 7 +- services/gundeck/default.nix | 1 + services/gundeck/gundeck.cabal | 2 + services/gundeck/src/Gundeck/Env.hs | 11 +- services/gundeck/src/Gundeck/Push.hs | 4 +- services/gundeck/src/Gundeck/Run.hs | 56 +++++----- services/gundeck/test/integration/Util.hs | 18 ++- 10 files changed, 119 insertions(+), 93 deletions(-) diff --git a/services/cannon/src/Cannon/PulsarConsumerApp.hs b/services/cannon/src/Cannon/PulsarConsumerApp.hs index d1a6f94d14..48512aa0e2 100644 --- a/services/cannon/src/Cannon/PulsarConsumerApp.hs +++ b/services/cannon/src/Cannon/PulsarConsumerApp.hs @@ -71,7 +71,7 @@ createPulsarChannel uid mCid env = do Log.debug env.logg $ Log.msg (Log.val "Connecting Pulsar consumer") . Log.field "topic" (show topic) - void . async $ Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "createPulsarChannel" env.logg)}) env.pulsarUrl $ do + void . async $ flip runReaderT env.pulsarClient $ do Pulsar.withConsumerNoUnsubscribe ( Pulsar.defaultConsumerConfiguration { Pulsar.consumerType = Just Pulsar.ConsumerExclusive, @@ -205,7 +205,7 @@ pulsarWebSocketApp uid mcid mSyncMarkerId e pendingConn = where publishSyncMessage :: UserId -> ByteString -> IO () publishSyncMessage userId message = - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishSyncMessage" e.logg)}) e.pulsarUrl $ do + flip runReaderT e.pulsarClient $ do let topic = Pulsar.TopicName $ "persistent://wire/user-notifications/" ++ unpack (userRoutingKey userId) Pulsar.withProducer Pulsar.defaultProducerConfiguration topic (onPulsarError "publishSyncMessage producer" e.logg) $ do result <- runResourceT $ do diff --git a/services/cannon/src/Cannon/Run.hs b/services/cannon/src/Cannon/Run.hs index 1583072882..094d11a89b 100644 --- a/services/cannon/src/Cannon/Run.hs +++ b/services/cannon/src/Cannon/Run.hs @@ -30,6 +30,7 @@ import Cannon.Options import Cannon.RabbitMq import Cannon.Types hiding (Env) import Cannon.WS hiding (drainOpts, env) +import Cassandra.Options (toPulsarUrl) import Cassandra.Util (defInitCassandra) import Control.Concurrent import Control.Concurrent.Async qualified as Async @@ -52,6 +53,8 @@ import OpenTelemetry.Instrumentation.Wai import OpenTelemetry.Trace hiding (Server) import OpenTelemetry.Trace qualified as Otel import Prometheus qualified as Prom +import Pulsar.Client qualified as Pulsar +import Pulsar.Client.Logging import Servant import System.IO.Strict qualified as Strict import System.Logger.Class qualified as LC @@ -68,61 +71,60 @@ import Wire.OpenTelemetry (withTracer) type CombinedAPI = CannonAPI :<|> Internal.API run :: Opts -> IO () -run o = lowerCodensity $ do - lift $ validateOpts o - tracer <- Codensity withTracer - when (o ^. drainOpts . millisecondsBetweenBatches == 0) $ - error "drainOpts.millisecondsBetweenBatches must not be set to 0." - when (o ^. drainOpts . gracePeriodSeconds == 0) $ - error "drainOpts.gracePeriodSeconds must not be set to 0." - ext <- lift loadExternal - g <- - Codensity $ - E.bracket - (L.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat)) - L.close - cassandra <- lift $ defInitCassandra (o ^. cassandraOpts) g +run o = do + g <- L.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat) + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "Cannon run" g)}) (toPulsarUrl (o ^. pulsar)) $ do + pulsarClient <- ask + liftIO . lowerCodensity $ do + lift $ validateOpts o + tracer <- Codensity withTracer + when (o ^. drainOpts . millisecondsBetweenBatches == 0) $ + error "drainOpts.millisecondsBetweenBatches must not be set to 0." + when (o ^. drainOpts . gracePeriodSeconds == 0) $ + error "drainOpts.gracePeriodSeconds must not be set to 0." + ext <- lift loadExternal + cassandra <- lift $ defInitCassandra (o ^. cassandraOpts) g - e <- do - d1 <- D.empty numDictSlices - d2 <- D.empty numDictSlices - man <- lift $ newManager defaultManagerSettings {managerConnCount = 128} - rnd <- lift createSystemRandom - clk <- lift mkClock - mkEnv ext o cassandra g d1 d2 man rnd clk (o ^. Cannon.Options.rabbitmq) + e <- do + d1 <- D.empty numDictSlices + d2 <- D.empty numDictSlices + man <- lift $ newManager defaultManagerSettings {managerConnCount = 128} + rnd <- lift createSystemRandom + clk <- lift mkClock + mkEnv ext o cassandra g d1 d2 man rnd clk (o ^. Cannon.Options.rabbitmq) pulsarClient - void $ Codensity $ Async.withAsync $ runCannon e refreshMetrics - let s = newSettings $ Server (o ^. cannon . host) (o ^. cannon . port) (applog e) (Just idleTimeout) + void $ Codensity $ Async.withAsync $ runCannon e refreshMetrics + let s = newSettings $ Server (o ^. cannon . host) (o ^. cannon . port) (applog e) (Just idleTimeout) - otelMiddleWare <- lift newOpenTelemetryWaiMiddleware - let middleware :: Wai.Middleware - middleware = - versionMiddleware (foldMap expandVersionExp (o ^. disabledAPIVersions)) - . requestIdMiddleware g defaultRequestIdHeaderName - . servantPrometheusMiddleware (Proxy @CombinedAPI) - . otelMiddleWare - . Gzip.gzip Gzip.def - . catchErrors g defaultRequestIdHeaderName - app :: Application - app = middleware (serve (Proxy @CombinedAPI) server) - server :: Servant.Server CombinedAPI - server = - hoistServer (Proxy @CannonAPI) (runCannonToServant e) publicAPIServer - :<|> hoistServer (Proxy @Internal.API) (runCannonToServant e) internalServer - tid <- lift myThreadId + otelMiddleWare <- lift newOpenTelemetryWaiMiddleware + let middleware :: Wai.Middleware + middleware = + versionMiddleware (foldMap expandVersionExp (o ^. disabledAPIVersions)) + . requestIdMiddleware g defaultRequestIdHeaderName + . servantPrometheusMiddleware (Proxy @CombinedAPI) + . otelMiddleWare + . Gzip.gzip Gzip.def + . catchErrors g defaultRequestIdHeaderName + app :: Application + app = middleware (serve (Proxy @CombinedAPI) server) + server :: Servant.Server CombinedAPI + server = + hoistServer (Proxy @CannonAPI) (runCannonToServant e) publicAPIServer + :<|> hoistServer (Proxy @Internal.API) (runCannonToServant e) internalServer + tid <- lift myThreadId - Codensity $ \k -> - inSpan tracer "cannon" defaultSpanArguments {kind = Otel.Server} (k ()) - lift $ - E.handle uncaughtExceptionHandler $ do - let handler = signalHandler (env e) (o ^. drainOpts) tid - void $ installHandler sigTERM handler Nothing - void $ installHandler sigINT handler Nothing - -- FUTUREWORK(@akshaymankar, @fisx): we may want to call `runSettingsWithShutdown` here, - -- but it's a sensitive change, and it looks like this is closing all the websockets at - -- the same time and then calling the drain script. I suspect this might be due to some - -- cleanup in wai. this needs to be tested very carefully when touched. - runSettings s app + Codensity $ \k -> + inSpan tracer "cannon" defaultSpanArguments {kind = Otel.Server} (k ()) + lift $ + E.handle uncaughtExceptionHandler $ do + let handler = signalHandler (env e) (o ^. drainOpts) tid + void $ installHandler sigTERM handler Nothing + void $ installHandler sigINT handler Nothing + -- FUTUREWORK(@akshaymankar, @fisx): we may want to call `runSettingsWithShutdown` here, + -- but it's a sensitive change, and it looks like this is closing all the websockets at + -- the same time and then calling the drain script. I suspect this might be due to some + -- cleanup in wai. this needs to be tested very carefully when touched. + runSettings s app where idleTimeout = fromIntegral $ maxPingInterval + 3 -- Each cannon instance advertises its own location (ip or dns name) to gundeck. diff --git a/services/cannon/src/Cannon/Types.hs b/services/cannon/src/Cannon/Types.hs index 718393674e..f7e8363e8e 100644 --- a/services/cannon/src/Cannon/Types.hs +++ b/services/cannon/src/Cannon/Types.hs @@ -50,6 +50,7 @@ import Imports import Network.AMQP qualified as Q import Network.AMQP.Extended (AmqpEndpoint) import Prometheus +import Pulsar.Client qualified as Pulsar import Servant qualified import System.Logger qualified as Logger import System.Logger.Class hiding (info) @@ -110,8 +111,9 @@ mkEnv :: GenIO -> Clock -> AmqpEndpoint -> + Pulsar.Client -> Codensity IO Env -mkEnv external o cs l d conns p g t endpoint = do +mkEnv external o cs l d conns p g t endpoint pulsarC = do let poolOpts = RabbitMqPoolOptions { endpoint = endpoint, @@ -138,6 +140,7 @@ mkEnv external o cs l d conns p g t endpoint = do pool (o ^. notificationTTL) (o ^. pulsar . to toPulsarUrl) + pulsarC pure $ Env o l d conns (RequestId defRequestId) wsEnv runCannon :: Env -> Cannon a -> IO a diff --git a/services/cannon/src/Cannon/WS.hs b/services/cannon/src/Cannon/WS.hs index b25811b3b4..5eab1debba 100644 --- a/services/cannon/src/Cannon/WS.hs +++ b/services/cannon/src/Cannon/WS.hs @@ -77,6 +77,7 @@ import Network.HTTP.Types.Method import Network.HTTP.Types.Status import Network.Wai.Utilities.Error import Network.WebSockets hiding (Request) +import Pulsar.Client qualified as Pulsar import System.Logger qualified as Logger import System.Logger.Class hiding (Error, Settings, close, (.=)) import System.Random.MWC (GenIO, uniform) @@ -160,7 +161,8 @@ data Env = Env cassandra :: ClientState, pool :: RabbitMqPool, notificationTTL :: Int, - pulsarUrl :: String + pulsarUrl :: String, + pulsarClient :: Pulsar.Client } setRequestId :: RequestId -> Env -> Env @@ -212,8 +214,9 @@ env :: RabbitMqPool -> Int -> String -> + Pulsar.Client -> Env -env externalHostname portnum gundeckHost gundeckPort logg manager websockets rabbitConnections rand clock drainOpts wsOpts cassandra pool notificationTTL pulsarUrl = +env externalHostname portnum gundeckHost gundeckPort logg manager websockets rabbitConnections rand clock drainOpts wsOpts cassandra pool notificationTTL pulsarUrl pulsarClient = let upstream = (Bilge.host gundeckHost . Bilge.port gundeckPort $ empty) reqId = RequestId defRequestId in Env {..} diff --git a/services/gundeck/default.nix b/services/gundeck/default.nix index ce7025a2f6..7a5936246a 100644 --- a/services/gundeck/default.nix +++ b/services/gundeck/default.nix @@ -186,6 +186,7 @@ mkDerivation { network network-uri optparse-applicative + pulsar-client-hs random retry safe diff --git a/services/gundeck/gundeck.cabal b/services/gundeck/gundeck.cabal index 0c675c9295..b6907f92f6 100644 --- a/services/gundeck/gundeck.cabal +++ b/services/gundeck/gundeck.cabal @@ -309,6 +309,7 @@ executable gundeck-integration , cassandra-util , containers , exceptions + , extended , gundeck , HsOpenSSL , http-client @@ -320,6 +321,7 @@ executable gundeck-integration , network , network-uri , optparse-applicative + , pulsar-client-hs , random , retry , safe diff --git a/services/gundeck/src/Gundeck/Env.hs b/services/gundeck/src/Gundeck/Env.hs index 2cd4a031f7..c35275d06d 100644 --- a/services/gundeck/src/Gundeck/Env.hs +++ b/services/gundeck/src/Gundeck/Env.hs @@ -47,6 +47,7 @@ import Network.HTTP.Client (responseTimeoutMicro) import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.TLS as TLS import Network.TLS.Extra qualified as TLS +import Pulsar.Client qualified as Pulsar import System.Logger qualified as Log import System.Logger.Extended qualified as Logger import Util.Options (Endpoint) @@ -64,14 +65,14 @@ data Env = Env _threadBudgetState :: !(Maybe ThreadBudgetState), _rabbitMqChannel :: MVar Channel, _pulsar :: Endpoint, - _pulsarAdmin :: Endpoint + _pulsarAdmin :: Endpoint, + _pulsarClient :: Pulsar.Client } makeLenses ''Env -createEnv :: Opts -> IO ([Async ()], Env) -createEnv o = do - l <- Logger.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat) +createEnv :: Opts -> Logger.Logger -> Pulsar.Client -> IO ([Async ()], Env) +createEnv o l pulsarClientArg = do n <- newManager tlsManagerSettings @@ -108,7 +109,7 @@ createEnv o = do } mtbs <- mkThreadBudgetState `mapM` (o ^. settings . maxConcurrentNativePushes) rabbitMqChannelMVar <- Q.mkRabbitMqChannelMVar l (Just "gundeck") (o ^. rabbitmq) - pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar (o ^. Opt.pulsar) (o ^. Opt.pulsarAdmin) + pure $! (rThread : rAdditionalThreads,) $! Env (RequestId defRequestId) o l n p r rAdditional a io mtbs rabbitMqChannelMVar (o ^. Opt.pulsar) (o ^. Opt.pulsarAdmin) pulsarClientArg reqIdMsg :: RequestId -> Logger.Msg -> Logger.Msg reqIdMsg = ("request" Logger..=) . unRequestId diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index 50a698d334..cba938ac82 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -146,8 +146,8 @@ publishToRabbitMq exchangeName routingKey qMsg = do publishToPulsar :: Text -> Q.Message -> Gundeck () publishToPulsar routingKey qMsg = do logger <- view applog - pulsarEndpoint <- view Gundeck.Env.pulsar - Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishToPulsar" logger)}) (toPulsarUrl pulsarEndpoint) $ + pulsarC <- view Gundeck.Env.pulsarClient + flip runReaderT pulsarC $ Pulsar.withProducer Pulsar.defaultProducerConfiguration topicName (onPulsarError "publishToPulsar" logger) $ do result <- runResourceT $ do (_, message) <- Pulsar.buildMessage $ Pulsar.defaultMessageBuilder {Pulsar.content = Just $ BS.toStrict (A.encode pulsarMessage)} diff --git a/services/gundeck/src/Gundeck/Run.hs b/services/gundeck/src/Gundeck/Run.hs index f3f1ed140d..4a2117399d 100644 --- a/services/gundeck/src/Gundeck/Run.hs +++ b/services/gundeck/src/Gundeck/Run.hs @@ -70,10 +70,13 @@ import Network.Wai.Utilities.Server hiding (serverPort) import OpenTelemetry.Instrumentation.Wai (newOpenTelemetryWaiMiddleware) import OpenTelemetry.Trace (defaultSpanArguments, inSpan, kind) import OpenTelemetry.Trace qualified as Otel +import Pulsar.Client qualified as Pulsar +import Pulsar.Client.Logging import Servant (Handler (Handler), (:<|>) (..)) import Servant qualified import System.Logger qualified as Log import System.Logger.Class qualified as MonadLogger +import System.Logger.Extended qualified as Logger import UnliftIO.Async qualified as Async import Util.Options import Wire.API.Notification @@ -84,31 +87,34 @@ import Wire.OpenTelemetry run :: Opts -> IO () run opts = withTracer \tracer -> do - (rThreads, env) <- createEnv opts - let logger = env ^. applog - - runDirect env setUpRabbitMqExchangesAndQueues - - runClient (env ^. cstate) $ - versionCheck lastSchemaVersion - let s = newSettings $ defaultServer (unpack . host $ opts ^. gundeck) (port $ opts ^. gundeck) logger - let throttleMillis = fromMaybe defSqsThrottleMillis $ opts ^. (settings . sqsThrottleMillis) - - lst <- Async.async $ Aws.execute (env ^. awsEnv) (Aws.listen throttleMillis (runDirect env . onEvent)) - wtbs <- forM (env ^. threadBudgetState) $ \tbs -> Async.async $ runDirect env $ watchThreadBudgetState tbs 10 - wCollectAuth <- Async.async (collectAuthMetrics (Aws._awsEnv (Env._awsEnv env))) - - app <- middleware env <*> pure (mkApp env) - inSpan tracer "gundeck" defaultSpanArguments {kind = Otel.Server} (runSettingsWithShutdown s app Nothing) `finally` do - Log.info logger $ Log.msg (Log.val "Shutting down ...") - shutdown (env ^. cstate) - Async.cancel lst - Async.cancel wCollectAuth - forM_ wtbs Async.cancel - forM_ rThreads Async.cancel - Redis.disconnect =<< takeMVar (env ^. rstate) - whenJust (env ^. rstateAdditionalWrite) $ (=<<) Redis.disconnect . takeMVar - Log.close (env ^. applog) + logger <- Logger.mkLogger (opts ^. logLevel) (opts ^. logNetStrings) (opts ^. logFormat) + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "publishToPulsar" logger)}) (toPulsarUrl (opts ^. Gundeck.Options.pulsar)) $ do + pulsarC <- ask + liftIO $ do + (rThreads, env) <- createEnv opts logger pulsarC + + runDirect env setUpRabbitMqExchangesAndQueues + + runClient (env ^. cstate) $ + versionCheck lastSchemaVersion + let s = newSettings $ defaultServer (unpack . host $ opts ^. gundeck) (port $ opts ^. gundeck) logger + let throttleMillis = fromMaybe defSqsThrottleMillis $ opts ^. (settings . sqsThrottleMillis) + + lst <- Async.async $ Aws.execute (env ^. awsEnv) (Aws.listen throttleMillis (runDirect env . onEvent)) + wtbs <- forM (env ^. threadBudgetState) $ \tbs -> Async.async $ runDirect env $ watchThreadBudgetState tbs 10 + wCollectAuth <- Async.async (collectAuthMetrics (Aws._awsEnv (Env._awsEnv env))) + + app <- middleware env <*> pure (mkApp env) + inSpan tracer "gundeck" defaultSpanArguments {kind = Otel.Server} (runSettingsWithShutdown s app Nothing) `finally` do + Log.info logger $ Log.msg (Log.val "Shutting down ...") + shutdown (env ^. cstate) + Async.cancel lst + Async.cancel wCollectAuth + forM_ wtbs Async.cancel + forM_ rThreads Async.cancel + Redis.disconnect =<< takeMVar (env ^. rstate) + whenJust (env ^. rstateAdditionalWrite) $ (=<<) Redis.disconnect . takeMVar + Log.close (env ^. applog) where setUpRabbitMqExchangesAndQueues :: Gundeck () setUpRabbitMqExchangesAndQueues = do diff --git a/services/gundeck/test/integration/Util.hs b/services/gundeck/test/integration/Util.hs index d6790424b2..cb292baac4 100644 --- a/services/gundeck/test/integration/Util.hs +++ b/services/gundeck/test/integration/Util.hs @@ -18,6 +18,7 @@ module Util where import Bilge qualified +import Cassandra.Options import Control.Concurrent (forkFinally) import Control.Concurrent.Async (race_) import Control.Exception qualified as E @@ -33,17 +34,24 @@ import Imports import Network.Socket hiding (openSocket) import Network.Socket.ByteString (recv, sendAll) import Network.Wai.Utilities.MockServer (withMockServer) +import Pulsar.Client qualified as Pulsar +import Pulsar.Client.Logging +import System.Logger.Extended qualified as Logger import TestSetup withSettingsOverrides :: (Opts -> Opts) -> TestM a -> TestM a withSettingsOverrides f action = do ts <- ask let opts = f (view tsOpts ts) - (_rThreads, env) <- liftIO $ createEnv opts - liftIO . lowerCodensity $ do - let app = mkApp env - p <- withMockServer app - liftIO $ Bilge.runHttpT (ts ^. tsManager) $ runReaderT (runTestM action) $ ts & tsGundeck .~ GundeckR (mkRequest p) + logger <- liftIO $ Logger.mkLogger (opts ^. logLevel) (opts ^. logNetStrings) (opts ^. logFormat) + Pulsar.withClient (Pulsar.defaultClientConfiguration {Pulsar.clientLogger = Just (pulsarClientLogger "withSettingsOverrides" logger)}) (toPulsarUrl (opts ^. Gundeck.Options.pulsar)) $ do + pulsarC <- ask + liftIO $ do + (_rThreads, env) <- liftIO $ createEnv opts logger pulsarC + liftIO . lowerCodensity $ do + let app = mkApp env + p <- withMockServer app + liftIO $ Bilge.runHttpT (ts ^. tsManager) $ runReaderT (runTestM action) $ ts & tsGundeck .~ GundeckR (mkRequest p) where mkRequest p = Bilge.host "127.0.0.1" . Bilge.port p From 8225fc98d64a586b50b789dd29382df61e45c604 Mon Sep 17 00:00:00 2001 From: Sven Tennie Date: Wed, 10 Dec 2025 18:55:21 +0100 Subject: [PATCH 51/51] Adjust NotificationBenchmark for Pulsar - Ensure that each client is only attached once at a time (subscriptions are exclusive). This is done by switching to a more primitive concurrency model. - Adjust the message format a bit - Ensure that clients have distinct ids --- integration/default.nix | 2 + .../test/Test/NotificationsBenchmark.hs | 75 ++++++++++++++----- 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/integration/default.nix b/integration/default.nix index 04163fc507..03512c4bc5 100644 --- a/integration/default.nix +++ b/integration/default.nix @@ -74,6 +74,7 @@ , split , stm , streaming-commons +, streamly , string-conversions , system-linux-proc , tagged @@ -179,6 +180,7 @@ mkDerivation { split stm streaming-commons + streamly string-conversions system-linux-proc tagged diff --git a/integration/test/Test/NotificationsBenchmark.hs b/integration/test/Test/NotificationsBenchmark.hs index 479e0564d7..c137414f22 100644 --- a/integration/test/Test/NotificationsBenchmark.hs +++ b/integration/test/Test/NotificationsBenchmark.hs @@ -5,14 +5,16 @@ import API.BrigCommon import API.Common import API.GundeckInternal import Control.Concurrent +import Control.Concurrent.Async (Async) +import Control.Concurrent.STM.TBQueue import Control.Monad.Codensity (Codensity (..)) -import Control.Monad.Reader (asks) +import Control.Monad.Reader (MonadReader (ask), asks) import Control.Monad.Reader.Class (local) import Control.Retry import qualified Data.Map.Strict as Map -import Data.String.Conversions (cs) import Data.Time -import GHC.Conc (numCapabilities) +import Debug.Trace +import GHC.Conc.Sync import GHC.Stack import SetupHelpers import qualified Streamly.Data.Fold.Prelude as Fold @@ -21,6 +23,7 @@ import System.Random import qualified Test.Events as TestEvents import Testlib.Prekeys import Testlib.Prelude +import UnliftIO (async, waitAnyCancel) data TestRecipient = TestRecipient { user :: Value, @@ -33,9 +36,11 @@ testBench = do shardingGroupCount <- asks (.shardingGroupCount) shardingGroup <- asks (.shardingGroup) maxUserNo <- asks (.maxUserNo) + env <- ask -- Preparation - let parCfg = Stream.maxThreads (numCapabilities * 2) . Stream.ordered False + let threadCount = min numCapabilities (fromIntegral maxUserNo) + parCfg = Stream.maxThreads threadCount . Stream.ordered False toMap = Fold.foldl' (\kv (k, v) -> Map.insert k v kv) Map.empty -- Later, we only read from this map. Thus, it doesn't have to be thread-safe. userMap :: Map Word TestRecipient <- @@ -48,12 +53,34 @@ testBench = do -- TODO: To be replaced with real data from the file. (See -- https://wearezeta.atlassian.net/wiki/spaces/PET/pages/2118680620/Simulating+production-like+data) - let fakeData = zip (plusDelta now <$> [0 :: Word ..]) (cycle [0 .. maxUserNo]) - - Stream.fromList fakeData - & Stream.filter (\(_t, uNo) -> (uNo `mod` shardingGroupCount) == shardingGroup) - & Stream.parMapM parCfg (\(t, uNo) -> waitForTimeStamp t >> sendAndReceive uNo userMap) - & Stream.fold Fold.drain + let fakeData = zip (plusDelta now <$> [0 :: Word ..]) (cycle [1 .. maxUserNo]) + + tbchans :: [(Int, TBQueue (UTCTime, Word))] <- liftIO $ forM [1 .. threadCount] $ \i -> do + q <- atomically $ newTBQueue 1 -- capacity 100, adjust as needed + pure (i, q) + + workers :: [Async ()] <- forM tbchans $ \(i, chan) -> liftIO + . async + -- . handle (\(e :: SomeException) -> traceM $ "Caught exception in worker id=" <> show i <> " e=" <> show e) + . forever + . runAppWithEnv env + $ do + traceM $ "worker taking from id=" <> show i + (t, uNo) <- liftIO $ atomically $ readTBQueue chan + traceM $ "worker took from id=" <> show i <> " val=" <> show (t, uNo) + sendAndReceive uNo userMap + traceM $ "worker end id=" <> show i + + producer <- async $ forM_ fakeData $ \(t, uNo) -> + when ((uNo `mod` shardingGroupCount) == shardingGroup) + $ let workerShard = fromIntegral uNo `mod` threadCount + (i, chan) = tbchans !! workerShard + in do + waitForTimeStamp t + traceM $ "producer putting to shard=" <> show workerShard <> " id=" <> show i <> " val=" <> show (t, uNo) + liftIO $ atomically $ writeTBQueue chan (t, uNo) + + liftIO . void . waitAnyCancel $ producer : workers waitForTimeStamp :: UTCTime -> App () waitForTimeStamp timestamp = liftIO $ do @@ -93,16 +120,26 @@ sendAndReceive userNo userMap = do void $ postPush alice [push] >>= getBody 200 + traceM $ "pushed to userNo=" <> show userNo <> " push=" <> show push + messageDeliveryTimeout <- asks $ fromIntegral . (.maxDeliveryDelay) - forM_ (testRecipient.clientIds) $ \(cid :: String) -> + traceM $ "XXX all clientIds " <> show testRecipient.clientIds + forM_ (testRecipient.clientIds) $ \(cid :: String) -> do + traceM $ "XXX using clientId=" <> show cid runCodensity (TestEvents.createEventsWebSocket alice (Just cid)) $ \ws -> do -- TODO: Tweak this value to the least acceptable event delivery duration local (setTimeoutTo messageDeliveryTimeout) $ TestEvents.assertFindsEvent ws $ \e -> do receivedAt <- liftIO getCurrentTime - sentAt :: UTCTime <- (e %. "payload.sent_at" >>= asByteString) <&> fromJust . decode . cs + p <- e %. "data.event.payload.0" + sentAt <- + (p %. "sent_at" >>= asString) + <&> ( \sentAtStr -> + fromMaybe (error ("Cannot parse timestamp: " <> sentAtStr)) $ parseUTC sentAtStr + ) print $ "Message sent/receive delta: " ++ show (diffUTCTime receivedAt sentAt) - e %. "payload" `shouldMatch` [object ["foo" .= payload]] + p %. "foo" `shouldMatch` payload + traceM $ "XXX Succeeded for clientId=" <> show cid where -- \| Generate a random string with random length up to 2048 bytes randomPayload :: IO String @@ -111,6 +148,8 @@ sendAndReceive userNo userMap = do -- `kubectl exec --namespace databases -it gundeck-gundeck-eks-eu-west-1a-sts-0 -- sh -c 'cqlsh -e "select blobAsText(payload) from gundeck.notifications LIMIT 5000;" ' | sed 's/^[ \t]*//;s/[ \t]*$//' | wc` let len :: Int = 884 -- measured in prod in mapM (\_ -> randomRIO ('\32', '\126')) [1 .. len] -- printable ASCII + parseUTC :: String -> Maybe UTCTime + parseUTC = parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S%QZ" setTimeoutTo :: Int -> Env -> Env setTimeoutTo tSecs env = env {timeOutSeconds = tSecs} @@ -119,16 +158,18 @@ generateTestRecipient :: (HasCallStack) => App TestRecipient generateTestRecipient = do print "generateTestRecipient" user <- recover $ (randomUser OwnDomain def) - r <- randomRIO @Word (1, 8) - clientIds <- forM [0 .. r] $ \_ -> do + r <- randomRIO @Int (1, 8) + clientIds <- forM [0 .. r] $ \i -> do client <- recover $ addClient user def { acapabilities = Just ["consumable-notifications"], - prekeys = Just $ take 10 somePrekeysRendered, - lastPrekey = Just $ head someLastPrekeysRendered + prekeys = Nothing, + lastPrekey = Just $ (someLastPrekeysRendered !! i), + clabel = "Test Client No. " <> show i, + model = "Test Model No. " <> show i } >>= getJSON 201 objId client