Skip to content

Commit a8d69f3

Browse files
johnscawgrant
authored andcommitted
feat: filter query results by user db_role (ENG-2477,ENG-2476,ENG-2478) (#47)
If the current user has a `db_role`, they should only see query results that they have generated, so that they don't see results which contain info about resources they don't have permission to view. feat: use per-user db role for query exec (ENG-2474) (#48) * feat: use per-user db role for query exec (ENG-2474) Login with per-user PG database role, if available, to ensure that RLS policies are applied to user queries. * Reject login from unknown SSO users * Use pre-filtered query for QueryResults rather than session-level `set role` * Add docstring with non-obvious context
1 parent 8dbad29 commit a8d69f3

4 files changed

Lines changed: 74 additions & 7 deletions

File tree

redash/authentication/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ def jwt_token_load_user_from_request(request):
191191

192192
if not valid_token:
193193
return None
194+
195+
if payload.get("stacklet:db_role") == "limited_visibility":
196+
raise Unauthorized("Unable to determine SSO identity")
194197

195198
# it might actually be a username or something, but it doesn't actually matter
196199
email = identity

redash/models/__init__.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pytz
88
from sqlalchemy import UniqueConstraint, and_, cast, distinct, func, or_
99
from sqlalchemy.dialects.postgresql import ARRAY, DOUBLE_PRECISION, JSONB
10+
from flask_login import current_user
1011
from sqlalchemy.event import listens_for
1112
from sqlalchemy.ext.hybrid import hybrid_property
1213
from sqlalchemy.orm import (
@@ -36,6 +37,7 @@
3637
gfk_type,
3738
key_type,
3839
primary_key,
40+
BaseQuery,
3941
)
4042
from redash.models.changes import Change, ChangeTrackingMixin # noqa
4143
from redash.models.mixins import BelongsToOrgMixin, TimestampMixin
@@ -346,8 +348,11 @@ def unused(cls, days=7):
346348
)
347349

348350
@classmethod
349-
def get_latest(cls, data_source, query, max_age=0):
350-
query_hash = gen_query_hash(query)
351+
def get_latest(cls, data_source, query, max_age=0, is_hash=False):
352+
if is_hash:
353+
query_hash = query
354+
else:
355+
query_hash = gen_query_hash(query)
351356

352357
if max_age == -1 and settings.QUERY_RESULTS_EXPIRED_TTL_ENABLED:
353358
max_age = settings.QUERY_RESULTS_EXPIRED_TTL
@@ -391,6 +396,35 @@ def groups(self):
391396
return self.data_source.groups
392397

393398

399+
@listens_for(BaseQuery, "before_compile", retval=True)
400+
def prefilter_query_results(query):
401+
"""
402+
Ensure that a user with a db_role defined can only see QueryResults that
403+
they themselves created.
404+
405+
This is to ensure that they don't see results that might include resources
406+
from accounts that shouldn't be visibile to them. Ideally, this would use
407+
`set role` and the RLS policy that is applied to the table, but without a
408+
"post-query" type event, that's not really feasible.
409+
410+
The RLS policy on the redash.query_results table still applies to the
411+
arbitrary query that the user executes, so that they can't issue a query
412+
directly against that table and get around this check.
413+
"""
414+
for desc in query.column_descriptions:
415+
if desc['type'] is QueryResult:
416+
db_role = getattr(current_user, "db_role", None)
417+
if not db_role:
418+
continue
419+
limit = query._limit
420+
offset = query._offset
421+
query = query.limit(None).offset(None)
422+
query.offset(None)
423+
query = query.filter(desc['entity'].db_role == db_role)
424+
query = query.limit(limit).offset(offset)
425+
return query
426+
427+
394428
def should_schedule_next(previous_iteration, now, interval, time=None, day_of_week=None, failures=0):
395429
# if previous_iteration is None, it means the query has never been run before
396430
# so we should schedule it immediately

redash/query_runner/pg.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import logging
23
import os
34
import select
@@ -20,7 +21,7 @@
2021
JobTimeoutException,
2122
register,
2223
)
23-
24+
from redash import settings
2425
from redash.stacklet.auth import inject_iam_auth
2526

2627
logger = logging.getLogger(__name__)
@@ -251,12 +252,29 @@ def _get_tables(self, schema):
251252

252253
return list(schema.values())
253254

255+
def _gen_role_pass(self, role_name: str) -> str:
256+
"""
257+
Generate a password for a given role using the datasource secret and role name.
258+
"""
259+
secret = settings.DATASOURCE_SECRET_KEY
260+
return hashlib.sha256(f"{secret}:{role_name}".encode("utf-8")).hexdigest()
261+
254262
@inject_iam_auth
255-
def _get_connection(self):
263+
def _get_connection(self, user):
264+
if getattr(user, "db_role", None):
265+
auth_config = dict(
266+
user=user.db_role,
267+
password=self._gen_role_pass(user.db_role),
268+
)
269+
else:
270+
auth_config = dict(
271+
user=self.configuration.get("user"),
272+
password=self.configuration.get("password"),
273+
)
274+
logger.info(f"Connecting to datasource as {auth_config['user']}")
256275
self.ssl_config = _get_ssl_config(self.configuration)
257276
connection = psycopg2.connect(
258-
user=self.configuration.get("user"),
259-
password=self.configuration.get("password"),
277+
**auth_config,
260278
host=self.configuration.get("host"),
261279
port=self.configuration.get("port"),
262280
dbname=self.configuration.get("dbname"),
@@ -267,7 +285,7 @@ def _get_connection(self):
267285
return connection
268286

269287
def run_query(self, query, user):
270-
connection = self._get_connection()
288+
connection = self._get_connection(user)
271289
_wait(connection, timeout=10)
272290

273291
cursor = connection.cursor()

redash/serializers/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ def serialize_query(
137137
if with_visualizations:
138138
d["visualizations"] = [serialize_visualization(vis, with_query=False) for vis in query.visualizations]
139139

140+
if getattr(current_user, "db_role", None):
141+
# Override the latest_query_data_id for users with a db_role because
142+
# they may not actually be able to see that one due to their db_role
143+
# and may have one specific to them instead.
144+
latest_result = models.QueryResult.get_latest(
145+
data_source=query.data_source,
146+
query=query.query_hash,
147+
max_age=-1,
148+
is_hash=True,
149+
)
150+
d["latest_query_data_id"] = latest_result and latest_result.id or None
151+
140152
return d
141153

142154

0 commit comments

Comments
 (0)