Skip to content

Commit 2a9bdf9

Browse files
Add a task to clean up expired JWT failure rate limits (#380)
Co-authored-by: Manuel Iñaki Bilbao <manuel.bilbao@lambdaclass.com>
1 parent 9392c59 commit 2a9bdf9

7 files changed

Lines changed: 120 additions & 2 deletions

File tree

Cargo.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ tower-http = { version = "0.6", features = ["trace"] }
7474
tracing = "0.1.40"
7575
tracing-appender = "0.2.3"
7676
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
77+
tracing-test = { version = "0.2.5", features = ["no-env-filter"] }
7778
tree_hash = "0.9"
7879
tree_hash_derive = "0.9"
7980
typenum = "1.17.0"

config.example.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ port = 20000
165165
# Number of JWT authentication attempts a client can fail before blocking that client temporarily from Signer access
166166
# OPTIONAL, DEFAULT: 3
167167
jwt_auth_fail_limit = 3
168-
# How long to block a client from Signer access, in seconds, if it failed JWT authentication too many times
168+
# How long to block a client from Signer access, in seconds, if it failed JWT authentication too many times.
169+
# This also defines the interval at which failed attempts are regularly checked and expired ones are cleaned up.
169170
# OPTIONAL, DEFAULT: 300
170171
jwt_auth_fail_timeout_seconds = 300
171172

crates/common/src/config/signer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ pub struct SignerConfig {
8888
pub jwt_auth_fail_limit: u32,
8989

9090
/// Duration in seconds to rate limit an endpoint after the JWT auth failure
91-
/// limit has been reached
91+
/// limit has been reached. This also defines the interval at which failed
92+
/// attempts are regularly checked and expired ones are cleaned up.
9293
#[serde(default = "default_u32::<SIGNER_JWT_AUTH_FAIL_TIMEOUT_SECONDS_DEFAULT>")]
9394
pub jwt_auth_fail_timeout_seconds: u32,
9495

crates/signer/src/service.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,22 @@ impl SigningService {
143143
.route_layer(middleware::from_fn(log_request))
144144
.route(STATUS_PATH, get(handle_status));
145145

146+
// Run the JWT cleaning task
147+
let jwt_cleaning_task = tokio::spawn(async move {
148+
let mut interval = tokio::time::interval(state.jwt_auth_fail_timeout);
149+
loop {
150+
interval.tick().await;
151+
let mut failures = state.jwt_auth_failures.write().await;
152+
let before = failures.len();
153+
failures
154+
.retain(|_, info| info.last_failure.elapsed() < state.jwt_auth_fail_timeout);
155+
let after = failures.len();
156+
if before != after {
157+
debug!("Cleaned up {} old JWT auth failure entries", before - after);
158+
}
159+
}
160+
});
161+
146162
let server_result = if let Some(tls_config) = config.tls_certificates {
147163
if CryptoProvider::get_default().is_none() {
148164
// Install the AWS-LC provider if no default is set, usually for CI
@@ -184,6 +200,10 @@ impl SigningService {
184200
)
185201
.await
186202
};
203+
204+
// Shutdown the JWT cleaning task
205+
jwt_cleaning_task.abort();
206+
187207
server_result.wrap_err("signer service exited")
188208
}
189209

tests/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,6 @@ tracing.workspace = true
2121
tracing-subscriber.workspace = true
2222
tree_hash.workspace = true
2323
url.workspace = true
24+
25+
[dev-dependencies]
26+
tracing-test.workspace = true
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
use std::{collections::HashMap, time::Duration};
2+
3+
use alloy::primitives::b256;
4+
use cb_common::{
5+
commit::constants::GET_PUBKEYS_PATH,
6+
config::{ModuleSigningConfig, load_module_signing_configs},
7+
types::ModuleId,
8+
utils::create_jwt,
9+
};
10+
use cb_tests::{
11+
signer_service::start_server,
12+
utils::{self},
13+
};
14+
use eyre::Result;
15+
use reqwest::StatusCode;
16+
17+
const JWT_MODULE: &str = "test-module";
18+
const JWT_SECRET: &str = "test-jwt-secret";
19+
const ADMIN_SECRET: &str = "test-admin-secret";
20+
21+
async fn create_mod_signing_configs() -> HashMap<ModuleId, ModuleSigningConfig> {
22+
let mut cfg =
23+
utils::get_commit_boost_config(utils::get_pbs_static_config(utils::get_pbs_config(0)));
24+
25+
let module_id = ModuleId(JWT_MODULE.to_string());
26+
let signing_id = b256!("0101010101010101010101010101010101010101010101010101010101010101");
27+
28+
cfg.modules = Some(vec![utils::create_module_config(module_id.clone(), signing_id)]);
29+
30+
let jwts = HashMap::from([(module_id.clone(), JWT_SECRET.to_string())]);
31+
32+
load_module_signing_configs(&cfg, &jwts).unwrap()
33+
}
34+
35+
#[tokio::test]
36+
#[tracing_test::traced_test]
37+
async fn test_signer_jwt_fail_cleanup() -> Result<()> {
38+
// setup_test_env() isn't used because we want to capture logs with tracing_test
39+
let module_id = ModuleId(JWT_MODULE.to_string());
40+
let mod_cfgs = create_mod_signing_configs().await;
41+
let start_config = start_server(20102, &mod_cfgs, ADMIN_SECRET.to_string(), false).await?;
42+
let mod_cfg = mod_cfgs.get(&module_id).expect("JWT config for test module not found");
43+
44+
// Run as many pubkeys requests as the fail limit
45+
let jwt = create_jwt(&module_id, "incorrect secret", GET_PUBKEYS_PATH, None)?;
46+
let client = reqwest::Client::new();
47+
let url = format!("http://{}{}", start_config.endpoint, GET_PUBKEYS_PATH);
48+
for _ in 0..start_config.jwt_auth_fail_limit {
49+
let response = client.get(&url).bearer_auth(&jwt).send().await?;
50+
assert!(response.status() == StatusCode::UNAUTHORIZED);
51+
}
52+
53+
// Run another request - this should fail due to rate limiting now
54+
let jwt = create_jwt(&module_id, &mod_cfg.jwt_secret, GET_PUBKEYS_PATH, None)?;
55+
let response = client.get(&url).bearer_auth(&jwt).send().await?;
56+
assert!(response.status() == StatusCode::TOO_MANY_REQUESTS);
57+
58+
// Wait until the cleanup task should have run properly, takes a while for the
59+
// timing to work out
60+
tokio::time::sleep(Duration::from_secs(
61+
(start_config.jwt_auth_fail_timeout_seconds * 3) as u64,
62+
))
63+
.await;
64+
65+
// Make sure the cleanup message was logged - it's all internal state so without
66+
// refactoring or exposing it, this is the easiest way to check if it triggered
67+
assert!(logs_contain("Cleaned up 1 old JWT auth failure entries"));
68+
69+
Ok(())
70+
}

0 commit comments

Comments
 (0)