diff --git a/changelog/8209-pool-recycle-setting.yaml b/changelog/8209-pool-recycle-setting.yaml new file mode 100644 index 00000000000..d11fc8e8fd2 --- /dev/null +++ b/changelog/8209-pool-recycle-setting.yaml @@ -0,0 +1,4 @@ +type: Added +description: Added configurable pool_recycle setting for database connections +pr: 8209 +labels: [] diff --git a/src/fides/api/db/ctl_session.py b/src/fides/api/db/ctl_session.py index 55bba0726c1..48763ad57d3 100644 --- a/src/fides/api/db/ctl_session.py +++ b/src/fides/api/db/ctl_session.py @@ -34,6 +34,9 @@ pool_size=CONFIG.database.api_async_engine_pool_size, max_overflow=CONFIG.database.api_async_engine_max_overflow, pool_pre_ping=CONFIG.database.api_async_engine_pool_pre_ping, + pool_recycle=CONFIG.database.pool_recycle + if CONFIG.database.pool_recycle is not None + else -1, # -1 is SQLAlchemy's default (no recycling) ) async_session_factory = sessionmaker( async_engine, class_=AsyncSession, expire_on_commit=False @@ -60,6 +63,9 @@ pool_size=CONFIG.database.async_readonly_database_pool_size, max_overflow=CONFIG.database.async_readonly_database_max_overflow, pool_pre_ping=CONFIG.database.async_readonly_database_pre_ping, + pool_recycle=CONFIG.database.pool_recycle + if CONFIG.database.pool_recycle is not None + else -1, # -1 is SQLAlchemy's default (no recycling) # Don't rollback before returning a connection to the pool - this improves performance dramatically; # can be turned off via config but the default is to not reset on return pool_reset_on_return=( diff --git a/src/fides/api/db/session.py b/src/fides/api/db/session.py index e1ac84bcc33..dab4ddf7633 100644 --- a/src/fides/api/db/session.py +++ b/src/fides/api/db/session.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Callable, Dict +from typing import Any, Callable, Dict, Optional from loguru import logger from sqlalchemy import create_engine @@ -26,6 +26,7 @@ def get_db_engine( keepalives_interval: int | None = None, keepalives_count: int | None = None, pool_pre_ping: bool = True, + pool_recycle: Optional[int] = None, disable_pooling: bool = False, ) -> Engine: """Return a database engine. @@ -90,6 +91,8 @@ def get_db_engine( engine_args["pool_pre_ping"] = pool_pre_ping engine_args["pool_size"] = pool_size engine_args["max_overflow"] = max_overflow + if pool_recycle is not None: + engine_args["pool_recycle"] = pool_recycle return create_engine(database_uri, **engine_args) diff --git a/src/fides/api/tasks/__init__.py b/src/fides/api/tasks/__init__.py index ef077c441e7..9951b615874 100644 --- a/src/fides/api/tasks/__init__.py +++ b/src/fides/api/tasks/__init__.py @@ -89,6 +89,7 @@ def get_new_session(self) -> ContextManager[Session]: pool_size=CONFIG.database.task_engine_pool_size, max_overflow=CONFIG.database.task_engine_max_overflow, pool_pre_ping=CONFIG.database.task_engine_pool_pre_ping, + pool_recycle=CONFIG.database.pool_recycle, ) # same for the sessionmaker diff --git a/src/fides/common/session_management.py b/src/fides/common/session_management.py index 5024074b284..21c17af14e4 100644 --- a/src/fides/common/session_management.py +++ b/src/fides/common/session_management.py @@ -40,6 +40,7 @@ def get_api_session() -> Session: pool_size=CONFIG.database.api_engine_pool_size, max_overflow=CONFIG.database.api_engine_max_overflow, pool_pre_ping=CONFIG.database.api_engine_pool_pre_ping, + pool_recycle=CONFIG.database.pool_recycle, ) SessionLocal = get_db_session(CONFIG, engine=_engine) return SessionLocal() @@ -157,6 +158,7 @@ def get_readonly_api_session() -> Session: pool_size=CONFIG.database.api_engine_pool_size, max_overflow=CONFIG.database.api_engine_max_overflow, pool_pre_ping=CONFIG.database.api_engine_pool_pre_ping, + pool_recycle=CONFIG.database.pool_recycle, ) SessionLocal = get_db_session(CONFIG, engine=_readonly_engine) return SessionLocal() diff --git a/src/fides/config/database_settings.py b/src/fides/config/database_settings.py index e90da08e2b7..7390dc90f6e 100644 --- a/src/fides/config/database_settings.py +++ b/src/fides/config/database_settings.py @@ -55,6 +55,18 @@ class DatabaseSettings(FidesSettings): description="If true, the engine will pre-ping connections to ensure they are still valid before using them.", ) + # Pool Recycle (applies to all engines) + pool_recycle: Optional[int] = Field( + default=None, + gt=0, + description=( + "Number of seconds after which a database connection is automatically " + "recycled (closed and replaced). Useful when a connection proxy or " + "firewall imposes an idle connection timeout. Set this to a value lower " + "than the proxy/DB timeout. When unset (None), connections are never recycled." + ), + ) + # Async Engine Settings # Note: We purposely do not include async engine equivalents of the sync engine's # keepalives_* settings as they are not supported by asyncpg. diff --git a/tests/ctl/core/config/test_config.py b/tests/ctl/core/config/test_config.py index 00b93a69eed..21826b19d2a 100644 --- a/tests/ctl/core/config/test_config.py +++ b/tests/ctl/core/config/test_config.py @@ -71,6 +71,7 @@ def test_get_config_default() -> None: """Check that get_config loads default values when given an empty TOML.""" config = get_config() assert config.database.api_engine_pool_size == 50 + assert config.database.pool_recycle is None assert config.security.env == "prod" assert config.security.app_encryption_key == "" assert config.logging.level == "INFO" @@ -246,6 +247,28 @@ def test_get_alembic_config_with_special_char_in_database_url(): get_alembic_config(database_url) +@pytest.mark.unit +def test_database_settings_pool_recycle_defaults_to_none() -> None: + """pool_recycle is optional and defaults to None.""" + db_settings = DatabaseSettings() + assert db_settings.pool_recycle is None + + +@pytest.mark.unit +def test_database_settings_pool_recycle_accepts_positive() -> None: + """pool_recycle accepts a positive integer.""" + db_settings = DatabaseSettings(pool_recycle=1800) + assert db_settings.pool_recycle == 1800 + + +@pytest.mark.unit +@pytest.mark.parametrize("value", [0, -1, -5], ids=["zero", "neg_one", "neg_five"]) +def test_database_settings_pool_recycle_rejects_invalid(value: int) -> None: + """pool_recycle must be > 0 when set.""" + with pytest.raises(ValidationError): + DatabaseSettings(pool_recycle=value) + + @pytest.mark.unit def test_database_settings_migration_role_defaults_to_none() -> None: """migration_role is optional and defaults to None.""" diff --git a/tests/lib/test_session.py b/tests/lib/test_session.py index 9c8865b933d..7293cd99461 100644 --- a/tests/lib/test_session.py +++ b/tests/lib/test_session.py @@ -81,6 +81,24 @@ def test_config_with_keepalives(self) -> None: finally: engine.dispose() + def test_pool_recycle_passed_to_engine(self) -> None: + """pool_recycle is forwarded to the underlying QueuePool.""" + creator = make_sync_creator() + engine = session.get_db_engine(creator=creator, pool_size=1, pool_recycle=900) + try: + assert engine.pool._recycle == 900 + finally: + engine.dispose() + + def test_pool_recycle_default(self) -> None: + """Default pool_recycle (None) leaves SQLAlchemy's default of -1.""" + creator = make_sync_creator() + engine = session.get_db_engine(creator=creator, pool_size=1) + try: + assert engine.pool._recycle == -1 + finally: + engine.dispose() + def test_disable_pooling(self) -> None: """disable_pooling uses NullPool — no connections are kept.""" from sqlalchemy.pool import NullPool diff --git a/tests/ops/api/test_deps.py b/tests/ops/api/test_deps.py index 8dc11215c28..802bba1af1f 100644 --- a/tests/ops/api/test_deps.py +++ b/tests/ops/api/test_deps.py @@ -31,6 +31,14 @@ def mock_config_changed_db_engine_settings(): CONFIG.database.api_engine_max_overflow = max_overflow +@pytest.fixture +def mock_config_pool_recycle(): + original = CONFIG.database.pool_recycle + CONFIG.database.pool_recycle = 1800 + yield + CONFIG.database.pool_recycle = original + + @pytest.mark.usefixtures("mock_config") def test_get_cache_not_enabled(): with pytest.raises(RedisNotConfigured): @@ -53,3 +61,12 @@ def test_get_api_session(config_fixture, request): pool: QueuePool = engine.pool assert pool.size() == pool_size assert pool._max_overflow == max_overflow + + +@pytest.mark.usefixtures("mock_config_pool_recycle") +def test_get_api_session_pool_recycle(): + session_management._engine = None + session: Session = get_api_session() + engine: Engine = session.get_bind() + pool: QueuePool = engine.pool + assert pool._recycle == CONFIG.database.pool_recycle diff --git a/tests/ops/tasks/test_database_task.py b/tests/ops/tasks/test_database_task.py index c2907ade17a..ce799a0b64d 100644 --- a/tests/ops/tasks/test_database_task.py +++ b/tests/ops/tasks/test_database_task.py @@ -19,9 +19,12 @@ def mock_config_changed_db_engine_settings(self): CONFIG.database.task_engine_pool_size = pool_size + 5 max_overflow = CONFIG.database.task_engine_max_overflow CONFIG.database.task_engine_max_overflow = max_overflow + 5 + pool_recycle = CONFIG.database.pool_recycle + CONFIG.database.pool_recycle = 1800 yield CONFIG.database.task_engine_pool_size = pool_size CONFIG.database.task_engine_max_overflow = max_overflow + CONFIG.database.pool_recycle = pool_recycle @pytest.fixture def recovering_session_maker(self): @@ -42,6 +45,15 @@ def always_failing_session_maker(self): mock_maker.side_effect = OperationalError("connection failed", None, None) return mock_maker + @pytest.mark.usefixtures("mock_config_changed_db_engine_settings") + def test_task_engine_pool_recycle(self): + """pool_recycle from config is applied to the task engine.""" + task = DatabaseTask() + task._task_engine = None + task._sessionmaker = None + task.get_new_session() + assert task._task_engine.pool._recycle == CONFIG.database.pool_recycle + def test_retry_on_operational_error(self, recovering_session_maker): """Test that session creation retries on OperationalError"""