fix(gateway): harden manager calls and guild circuit-breaker ETS handling

This commit is contained in:
Hampus Kraft 2026-02-19 19:49:56 +00:00
parent 2264f1decb
commit b178c90879
No known key found for this signature in database
GPG Key ID: 6090864C465A454D
3 changed files with 74 additions and 31 deletions

View File

@ -136,7 +136,7 @@ update_circuit_state(GuildPid, Result, PrevState) ->
IsSuccess = is_success_result(Result),
case {IsSuccess, PrevState} of
{true, half_open} ->
ets:delete(?CIRCUIT_BREAKER_TABLE, GuildPid),
safe_delete(GuildPid),
ok;
{true, closed} ->
reset_failures(GuildPid);
@ -152,7 +152,7 @@ is_success_result(_) -> false.
reset_failures(GuildPid) ->
case safe_lookup(GuildPid) of
[{_, State}] ->
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{failures => 0}}),
safe_insert(GuildPid, State#{failures => 0}),
ok;
[] ->
ok
@ -163,27 +163,21 @@ record_failure(GuildPid) ->
Now = erlang:system_time(millisecond),
case safe_lookup(GuildPid) of
[] ->
ets:insert(
?CIRCUIT_BREAKER_TABLE,
{GuildPid, #{
state => closed,
failures => 1,
concurrent => 0
}}
),
safe_insert(GuildPid, #{
state => closed,
failures => 1,
concurrent => 0
}),
ok;
[{_, #{failures := F} = State}] when F + 1 >= ?FAILURE_THRESHOLD ->
ets:insert(
?CIRCUIT_BREAKER_TABLE,
{GuildPid, State#{
state => open,
failures => F + 1,
opened_at => Now
}}
),
safe_insert(GuildPid, State#{
state => open,
failures => F + 1,
opened_at => Now
}),
ok;
[{_, #{failures := F} = State}] ->
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{failures => F + 1}}),
safe_insert(GuildPid, State#{failures => F + 1}),
ok
end.
@ -191,19 +185,16 @@ record_failure(GuildPid) ->
acquire_slot(GuildPid) ->
case safe_lookup(GuildPid) of
[] ->
ets:insert(
?CIRCUIT_BREAKER_TABLE,
{GuildPid, #{
state => closed,
failures => 0,
concurrent => 1
}}
),
safe_insert(GuildPid, #{
state => closed,
failures => 0,
concurrent => 1
}),
ok;
[{_, #{concurrent := C}}] when C >= ?MAX_CONCURRENT ->
{error, too_many_requests};
[{_, #{concurrent := C} = State}] ->
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{concurrent => C + 1}}),
safe_insert(GuildPid, State#{concurrent => C + 1}),
ok
end.
@ -211,12 +202,36 @@ acquire_slot(GuildPid) ->
release_slot(GuildPid) ->
case safe_lookup(GuildPid) of
[{_, #{concurrent := C} = State}] when C > 0 ->
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{concurrent => C - 1}}),
safe_insert(GuildPid, State#{concurrent => C - 1}),
ok;
_ ->
ok
end.
-spec safe_insert(pid(), map()) -> ok.
safe_insert(GuildPid, State) ->
ensure_table(),
try ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State}) of
true -> ok
catch
error:badarg ->
ensure_table(),
try ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State}) of
true -> ok
catch
error:badarg -> ok
end
end.
-spec safe_delete(pid()) -> ok.
safe_delete(GuildPid) ->
ensure_table(),
try ets:delete(?CIRCUIT_BREAKER_TABLE, GuildPid) of
true -> ok
catch
error:badarg -> ok
end.
-spec safe_lookup(pid()) -> list().
safe_lookup(GuildPid) ->
try ets:lookup(?CIRCUIT_BREAKER_TABLE, GuildPid) of
@ -369,6 +384,18 @@ record_failure_opens_circuit_test() ->
?assertEqual(open, maps:get(state, State)),
Pid ! done.
record_failure_recreates_missing_table_test() ->
catch ets:delete(?CIRCUIT_BREAKER_TABLE),
Pid = spawn(fun() ->
receive
done -> ok
end
end),
?assertEqual(ok, record_failure(Pid)),
[{Pid, State}] = ets:lookup(?CIRCUIT_BREAKER_TABLE, Pid),
?assertEqual(1, maps:get(failures, State)),
Pid ! done.
is_success_result_test() ->
?assertEqual(true, is_success_result({ok, #{}})),
?assertEqual(false, is_success_result({error, timeout})),

View File

@ -248,7 +248,14 @@ call_shard(GuildId, Request, Timeout) ->
-spec call_via_manager(term(), pos_integer()) -> term().
call_via_manager(Request, Timeout) ->
gen_server:call(?MODULE, Request, Timeout + 1000).
case catch gen_server:call(?MODULE, Request, Timeout + 1000) of
{'EXIT', {timeout, _}} ->
{error, timeout};
{'EXIT', _} ->
{error, unavailable};
Reply ->
Reply
end.
-spec forward_call(guild_id(), term(), state()) -> {term(), state()}.
forward_call(GuildId, {start_or_lookup, _} = Request, State) ->

View File

@ -136,6 +136,8 @@ call_shard(SessionId, Request, Timeout) ->
case shard_pid_from_table(SessionId) of
{ok, Pid} ->
case catch gen_server:call(Pid, Request, Timeout) of
{'EXIT', {timeout, _}} ->
{error, timeout};
{'EXIT', _} ->
call_via_manager(SessionId, Request, Timeout);
Reply ->
@ -147,7 +149,14 @@ call_shard(SessionId, Request, Timeout) ->
-spec call_via_manager(session_id(), term(), pos_integer()) -> term().
call_via_manager(SessionId, Request, Timeout) ->
gen_server:call(?MODULE, {proxy_call, SessionId, Request, Timeout}, Timeout + 1000).
case catch gen_server:call(?MODULE, {proxy_call, SessionId, Request, Timeout}, Timeout + 1000) of
{'EXIT', {timeout, _}} ->
{error, timeout};
{'EXIT', _} ->
{error, unavailable};
Reply ->
Reply
end.
-spec forward_call(session_id(), term(), pos_integer(), state()) -> {term(), state()}.
forward_call(SessionId, Request, Timeout, State) ->