fix(gateway): harden manager calls and guild circuit-breaker ETS handling
This commit is contained in:
parent
2264f1decb
commit
b178c90879
@ -136,7 +136,7 @@ update_circuit_state(GuildPid, Result, PrevState) ->
|
|||||||
IsSuccess = is_success_result(Result),
|
IsSuccess = is_success_result(Result),
|
||||||
case {IsSuccess, PrevState} of
|
case {IsSuccess, PrevState} of
|
||||||
{true, half_open} ->
|
{true, half_open} ->
|
||||||
ets:delete(?CIRCUIT_BREAKER_TABLE, GuildPid),
|
safe_delete(GuildPid),
|
||||||
ok;
|
ok;
|
||||||
{true, closed} ->
|
{true, closed} ->
|
||||||
reset_failures(GuildPid);
|
reset_failures(GuildPid);
|
||||||
@ -152,7 +152,7 @@ is_success_result(_) -> false.
|
|||||||
reset_failures(GuildPid) ->
|
reset_failures(GuildPid) ->
|
||||||
case safe_lookup(GuildPid) of
|
case safe_lookup(GuildPid) of
|
||||||
[{_, State}] ->
|
[{_, State}] ->
|
||||||
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{failures => 0}}),
|
safe_insert(GuildPid, State#{failures => 0}),
|
||||||
ok;
|
ok;
|
||||||
[] ->
|
[] ->
|
||||||
ok
|
ok
|
||||||
@ -163,27 +163,21 @@ record_failure(GuildPid) ->
|
|||||||
Now = erlang:system_time(millisecond),
|
Now = erlang:system_time(millisecond),
|
||||||
case safe_lookup(GuildPid) of
|
case safe_lookup(GuildPid) of
|
||||||
[] ->
|
[] ->
|
||||||
ets:insert(
|
safe_insert(GuildPid, #{
|
||||||
?CIRCUIT_BREAKER_TABLE,
|
state => closed,
|
||||||
{GuildPid, #{
|
failures => 1,
|
||||||
state => closed,
|
concurrent => 0
|
||||||
failures => 1,
|
}),
|
||||||
concurrent => 0
|
|
||||||
}}
|
|
||||||
),
|
|
||||||
ok;
|
ok;
|
||||||
[{_, #{failures := F} = State}] when F + 1 >= ?FAILURE_THRESHOLD ->
|
[{_, #{failures := F} = State}] when F + 1 >= ?FAILURE_THRESHOLD ->
|
||||||
ets:insert(
|
safe_insert(GuildPid, State#{
|
||||||
?CIRCUIT_BREAKER_TABLE,
|
state => open,
|
||||||
{GuildPid, State#{
|
failures => F + 1,
|
||||||
state => open,
|
opened_at => Now
|
||||||
failures => F + 1,
|
}),
|
||||||
opened_at => Now
|
|
||||||
}}
|
|
||||||
),
|
|
||||||
ok;
|
ok;
|
||||||
[{_, #{failures := F} = State}] ->
|
[{_, #{failures := F} = State}] ->
|
||||||
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{failures => F + 1}}),
|
safe_insert(GuildPid, State#{failures => F + 1}),
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
@ -191,19 +185,16 @@ record_failure(GuildPid) ->
|
|||||||
acquire_slot(GuildPid) ->
|
acquire_slot(GuildPid) ->
|
||||||
case safe_lookup(GuildPid) of
|
case safe_lookup(GuildPid) of
|
||||||
[] ->
|
[] ->
|
||||||
ets:insert(
|
safe_insert(GuildPid, #{
|
||||||
?CIRCUIT_BREAKER_TABLE,
|
state => closed,
|
||||||
{GuildPid, #{
|
failures => 0,
|
||||||
state => closed,
|
concurrent => 1
|
||||||
failures => 0,
|
}),
|
||||||
concurrent => 1
|
|
||||||
}}
|
|
||||||
),
|
|
||||||
ok;
|
ok;
|
||||||
[{_, #{concurrent := C}}] when C >= ?MAX_CONCURRENT ->
|
[{_, #{concurrent := C}}] when C >= ?MAX_CONCURRENT ->
|
||||||
{error, too_many_requests};
|
{error, too_many_requests};
|
||||||
[{_, #{concurrent := C} = State}] ->
|
[{_, #{concurrent := C} = State}] ->
|
||||||
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{concurrent => C + 1}}),
|
safe_insert(GuildPid, State#{concurrent => C + 1}),
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
@ -211,12 +202,36 @@ acquire_slot(GuildPid) ->
|
|||||||
release_slot(GuildPid) ->
|
release_slot(GuildPid) ->
|
||||||
case safe_lookup(GuildPid) of
|
case safe_lookup(GuildPid) of
|
||||||
[{_, #{concurrent := C} = State}] when C > 0 ->
|
[{_, #{concurrent := C} = State}] when C > 0 ->
|
||||||
ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State#{concurrent => C - 1}}),
|
safe_insert(GuildPid, State#{concurrent => C - 1}),
|
||||||
ok;
|
ok;
|
||||||
_ ->
|
_ ->
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec safe_insert(pid(), map()) -> ok.
|
||||||
|
safe_insert(GuildPid, State) ->
|
||||||
|
ensure_table(),
|
||||||
|
try ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State}) of
|
||||||
|
true -> ok
|
||||||
|
catch
|
||||||
|
error:badarg ->
|
||||||
|
ensure_table(),
|
||||||
|
try ets:insert(?CIRCUIT_BREAKER_TABLE, {GuildPid, State}) of
|
||||||
|
true -> ok
|
||||||
|
catch
|
||||||
|
error:badarg -> ok
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec safe_delete(pid()) -> ok.
|
||||||
|
safe_delete(GuildPid) ->
|
||||||
|
ensure_table(),
|
||||||
|
try ets:delete(?CIRCUIT_BREAKER_TABLE, GuildPid) of
|
||||||
|
true -> ok
|
||||||
|
catch
|
||||||
|
error:badarg -> ok
|
||||||
|
end.
|
||||||
|
|
||||||
-spec safe_lookup(pid()) -> list().
|
-spec safe_lookup(pid()) -> list().
|
||||||
safe_lookup(GuildPid) ->
|
safe_lookup(GuildPid) ->
|
||||||
try ets:lookup(?CIRCUIT_BREAKER_TABLE, GuildPid) of
|
try ets:lookup(?CIRCUIT_BREAKER_TABLE, GuildPid) of
|
||||||
@ -369,6 +384,18 @@ record_failure_opens_circuit_test() ->
|
|||||||
?assertEqual(open, maps:get(state, State)),
|
?assertEqual(open, maps:get(state, State)),
|
||||||
Pid ! done.
|
Pid ! done.
|
||||||
|
|
||||||
|
record_failure_recreates_missing_table_test() ->
|
||||||
|
catch ets:delete(?CIRCUIT_BREAKER_TABLE),
|
||||||
|
Pid = spawn(fun() ->
|
||||||
|
receive
|
||||||
|
done -> ok
|
||||||
|
end
|
||||||
|
end),
|
||||||
|
?assertEqual(ok, record_failure(Pid)),
|
||||||
|
[{Pid, State}] = ets:lookup(?CIRCUIT_BREAKER_TABLE, Pid),
|
||||||
|
?assertEqual(1, maps:get(failures, State)),
|
||||||
|
Pid ! done.
|
||||||
|
|
||||||
is_success_result_test() ->
|
is_success_result_test() ->
|
||||||
?assertEqual(true, is_success_result({ok, #{}})),
|
?assertEqual(true, is_success_result({ok, #{}})),
|
||||||
?assertEqual(false, is_success_result({error, timeout})),
|
?assertEqual(false, is_success_result({error, timeout})),
|
||||||
|
|||||||
@ -248,7 +248,14 @@ call_shard(GuildId, Request, Timeout) ->
|
|||||||
|
|
||||||
-spec call_via_manager(term(), pos_integer()) -> term().
|
-spec call_via_manager(term(), pos_integer()) -> term().
|
||||||
call_via_manager(Request, Timeout) ->
|
call_via_manager(Request, Timeout) ->
|
||||||
gen_server:call(?MODULE, Request, Timeout + 1000).
|
case catch gen_server:call(?MODULE, Request, Timeout + 1000) of
|
||||||
|
{'EXIT', {timeout, _}} ->
|
||||||
|
{error, timeout};
|
||||||
|
{'EXIT', _} ->
|
||||||
|
{error, unavailable};
|
||||||
|
Reply ->
|
||||||
|
Reply
|
||||||
|
end.
|
||||||
|
|
||||||
-spec forward_call(guild_id(), term(), state()) -> {term(), state()}.
|
-spec forward_call(guild_id(), term(), state()) -> {term(), state()}.
|
||||||
forward_call(GuildId, {start_or_lookup, _} = Request, State) ->
|
forward_call(GuildId, {start_or_lookup, _} = Request, State) ->
|
||||||
|
|||||||
@ -136,6 +136,8 @@ call_shard(SessionId, Request, Timeout) ->
|
|||||||
case shard_pid_from_table(SessionId) of
|
case shard_pid_from_table(SessionId) of
|
||||||
{ok, Pid} ->
|
{ok, Pid} ->
|
||||||
case catch gen_server:call(Pid, Request, Timeout) of
|
case catch gen_server:call(Pid, Request, Timeout) of
|
||||||
|
{'EXIT', {timeout, _}} ->
|
||||||
|
{error, timeout};
|
||||||
{'EXIT', _} ->
|
{'EXIT', _} ->
|
||||||
call_via_manager(SessionId, Request, Timeout);
|
call_via_manager(SessionId, Request, Timeout);
|
||||||
Reply ->
|
Reply ->
|
||||||
@ -147,7 +149,14 @@ call_shard(SessionId, Request, Timeout) ->
|
|||||||
|
|
||||||
-spec call_via_manager(session_id(), term(), pos_integer()) -> term().
|
-spec call_via_manager(session_id(), term(), pos_integer()) -> term().
|
||||||
call_via_manager(SessionId, Request, Timeout) ->
|
call_via_manager(SessionId, Request, Timeout) ->
|
||||||
gen_server:call(?MODULE, {proxy_call, SessionId, Request, Timeout}, Timeout + 1000).
|
case catch gen_server:call(?MODULE, {proxy_call, SessionId, Request, Timeout}, Timeout + 1000) of
|
||||||
|
{'EXIT', {timeout, _}} ->
|
||||||
|
{error, timeout};
|
||||||
|
{'EXIT', _} ->
|
||||||
|
{error, unavailable};
|
||||||
|
Reply ->
|
||||||
|
Reply
|
||||||
|
end.
|
||||||
|
|
||||||
-spec forward_call(session_id(), term(), pos_integer(), state()) -> {term(), state()}.
|
-spec forward_call(session_id(), term(), pos_integer(), state()) -> {term(), state()}.
|
||||||
forward_call(SessionId, Request, Timeout, State) ->
|
forward_call(SessionId, Request, Timeout, State) ->
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user