@@ -724,43 +724,100 @@ repair_amqqueue_nodes(Q0) ->
724724 {Name , _ } = amqqueue :get_pid (Q0 ),
725725 Members = ra_leaderboard :lookup_members (Name ),
726726 RaNodes = [N || {_ , N } <- Members ],
727- Nodes = get_nodes (Q0 ),
728- case lists :sort (RaNodes ) =:= lists :sort (Nodes ) of
727+ case rabbit_feature_flags :is_enabled (track_qq_members_uids ) of
728+ false ->
729+ Nodes = get_nodes (Q0 ),
730+ case lists :sort (RaNodes ) =:= lists :sort (Nodes ) of
731+ true ->
732+ % % up to date
733+ ok ;
734+ false ->
735+ % % update amqqueue record
736+ Fun = fun (Q ) ->
737+ TS0 = amqqueue :get_type_state (Q ),
738+ TS = TS0 #{nodes => RaNodes },
739+ amqqueue :set_type_state (Q , TS )
740+ end ,
741+ _ = rabbit_amqqueue :update (QName , Fun ),
742+ repaired
743+ end ;
744+ true ->
745+ {ok , Q0 } = rabbit_amqqueue :lookup (QName ),
746+ OldTypeState = amqqueue :get_type_state (Q0 ),
747+ case OldTypeState of
748+ #{nodes := List } when is_list (List ) ->
749+ repair_with_list_nodes (QName , Name , RaNodes , OldTypeState );
750+ #{nodes := Map } when is_map (Map ) ->
751+ repair_with_map_nodes (QName , Name , RaNodes , Map )
752+ end
753+ end .
754+
755+ % % @doc Repair logic when OldTypeState has a list as nodes value.
756+ % % Only updates the queue state if ALL nodes return valid UIDs.
757+ repair_with_list_nodes (QName , Name , RaNodes , _OldTypeState ) ->
758+ case gather_node_uids (QName , Name , RaNodes ) of
759+ {NewNodesUids , _ErrorList = []} ->
760+ % % All nodes returned valid UIDs, proceed with update
761+ Fun = fun (Q ) ->
762+ Ts0 = amqqueue :get_type_state (Q ),
763+ Ts = Ts0 #{nodes => NewNodesUids },
764+ amqqueue :set_type_state (Q , Ts )
765+ end ,
766+ _ = rabbit_amqqueue :update (QName , Fun ),
767+ repaired ;
768+ _ ->
769+ % % Fetching UID for at least some nodes failed
770+ % % Do not update the queue state
771+ ok
772+ end .
773+
774+ % % @doc Repair logic when OldTypeState has a map as nodes value.
775+ % % Only adds new nodes that return valid UIDs.
776+ repair_with_map_nodes (QName , Name , RaNodes , PreviousUidsMap ) ->
777+ PrevNodes = maps :keys (PreviousUidsMap ),
778+ case lists :sort (PrevNodes ) == lists :sort (RaNodes ) of
729779 true ->
730- % % up to date
731780 ok ;
732781 false ->
733- % % update amqqueue record
782+ NodesToAdd = RaNodes -- PrevNodes ,
783+ {AddedNodesUids , _ErrorList } = gather_node_uids (QName , Name , NodesToAdd ),
784+ RemainingNodesUids = maps :with (RaNodes , PreviousUidsMap ),
785+ NewNodes = maps :merge (RemainingNodesUids , AddedNodesUids ),
734786 Fun = fun (Q ) ->
735- TS0 = amqqueue :get_type_state (Q ),
736- TS = case rabbit_feature_flags :is_enabled (track_qq_members_uids ) of
737- false ->
738- TS0 #{nodes => RaNodes };
739- true ->
740- RaUidsList = [begin
741- Uid = erpc :call (N , ra_directory , uid_of ,
742- [? RA_SYSTEM , Name ],
743- ? RPC_TIMEOUT ),
744- case Uid of
745- undefined ->
746- ? LOG_WARNING (" Unexpected undefined uuid from node ~p for quorum queue ~ts during repair_amqqueue_nodes" ,
747- [N , rabbit_misc :rs (QName )]);
748- _ ->
749- ok
750- end ,
751- {N , Uid }
752- end
753- || N <- RaNodes ],
754-
755- RaUids = maps :from_list (RaUidsList ),
756- TS0 #{nodes => RaUids }
757- end ,
758- amqqueue :set_type_state (Q , TS )
787+ Ts0 = amqqueue :get_type_state (Q ),
788+ Ts = Ts0 #{nodes => NewNodes },
789+ amqqueue :set_type_state (Q , Ts )
759790 end ,
760791 _ = rabbit_amqqueue :update (QName , Fun ),
761792 repaired
762793 end .
763794
795+ gather_node_uids (QName , Name , RaNodes ) ->
796+ RPCRes = erpc :multicall (RaNodes , ra_directory , uid_of , [? RA_SYSTEM , Name ], ? RPC_TIMEOUT ),
797+ NewNodesUidsList0 = lists :zip (RaNodes , RPCRes ),
798+
799+ % % Check if all nodes returned valid UIDs
800+ {ValidList , ErrorList } =
801+ lists :partition (
802+ fun ({_Node , {ok , UId }}) when UId =/= undefined ->
803+ true ;
804+ (_ ) ->
805+ false
806+ end , NewNodesUidsList0 ),
807+ NewNodesUidsList = [{Node , UId } || {Node , {ok , UId }} <- ValidList ],
808+
809+ lists :foreach (fun ({Node , {ok , undefined }}) ->
810+ ? LOG_WARNING (" Unexpected undefined uuid from node ~p "
811+ " for quorum ~ts during repair_amqqueue_nodes" ,
812+ [Node , rabbit_misc :rs (QName )]);
813+ ({Node , CaughtCallException }) ->
814+ ? LOG_WARNING (" Call exception while retrieving uuid from node ~p "
815+ " for quorum ~ts during repair_amqqueue_nodes: ~p " ,
816+ [Node , rabbit_misc :rs (QName ), CaughtCallException ])
817+ end , ErrorList ),
818+
819+ {maps :from_list (NewNodesUidsList ), ErrorList }.
820+
764821reductions (Name ) ->
765822 try
766823 {reductions , R } = process_info (whereis (Name ), reductions ),
@@ -821,7 +878,7 @@ recover(_Vhost, Queues) ->
821878 RaUId = ra_directory :uid_of (? RA_SYSTEM , Name ),
822879 case RaUId of
823880 undefined ->
824- ? LOG_WARNING (" Unexpected undefined uuid for current node for quorum queue ~ts during recover" ,
881+ ? LOG_WARNING (" Unexpected undefined uuid for current node for quorum ~ts during recover" ,
825882 [rabbit_misc :rs (QName )]);
826883 _ ->
827884 ok
@@ -838,7 +895,7 @@ recover(_Vhost, Queues) ->
838895 #{node () := _NewRaUId } ->
839896 % % Queue is aware but it does not match the one returned by
840897 % % ra_directory
841- rabbit_log :info (" Quorum queue ~ts : detected node uuid change, "
898+ rabbit_log :info (" Quorum ~ts : detected node uuid change, "
842899 " deleting old data directory" , [rabbit_misc :rs (QName )]),
843900 maybe_delete_data_dir (RaUId )
844901 end ,
0 commit comments