Skip to content

Commit c953fa7

Browse files
authored
fix(api): resolve check_title filter to check_id for consistent finding-group counts (#10486)
1 parent 73907db commit c953fa7

File tree

6 files changed

+269
-35
lines changed

6 files changed

+269
-35
lines changed

api/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ All notable changes to the **Prowler API** are documented in this file.
1616
- Filter transient Neo4j defunct connection logs in Sentry `before_send` to suppress false-positive alerts handled by `RetryableSession` retries [(#10452)](https://github.com/prowler-cloud/prowler/pull/10452)
1717
- `MANAGE_ACCOUNT` permission no longer required for listing and creating tenants [(#10468)](https://github.com/prowler-cloud/prowler/pull/10468)
1818
- Finding groups muted filter, counters, metadata extraction and mute reaggregation [(#10477)](https://github.com/prowler-cloud/prowler/pull/10477)
19+
- Finding groups `check_title__icontains` resolution, `name__icontains` resource filter and `resource_group` field in `/resources` response [(#10486)](https://github.com/prowler-cloud/prowler/pull/10486)
1920

2021
## [1.23.0] (Prowler v5.22.0)
2122

api/src/backend/api/filters.py

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
ProviderGroup,
4545
ProviderSecret,
4646
Resource,
47+
ResourceFindingMapping,
4748
ResourceTag,
4849
Role,
4950
Scan,
@@ -197,17 +198,13 @@ class CommonFindingFilters(FilterSet):
197198
field_name="resource_services", lookup_expr="icontains"
198199
)
199200

200-
resource_uid = CharFilter(field_name="resources__uid")
201-
resource_uid__in = CharInFilter(field_name="resources__uid", lookup_expr="in")
202-
resource_uid__icontains = CharFilter(
203-
field_name="resources__uid", lookup_expr="icontains"
204-
)
201+
resource_uid = CharFilter(method="filter_resource_uid")
202+
resource_uid__in = CharInFilter(method="filter_resource_uid_in")
203+
resource_uid__icontains = CharFilter(method="filter_resource_uid_icontains")
205204

206-
resource_name = CharFilter(field_name="resources__name")
207-
resource_name__in = CharInFilter(field_name="resources__name", lookup_expr="in")
208-
resource_name__icontains = CharFilter(
209-
field_name="resources__name", lookup_expr="icontains"
210-
)
205+
resource_name = CharFilter(method="filter_resource_name")
206+
resource_name__in = CharInFilter(method="filter_resource_name_in")
207+
resource_name__icontains = CharFilter(method="filter_resource_name_icontains")
211208

212209
resource_type = CharFilter(method="filter_resource_type")
213210
resource_type__in = CharInFilter(field_name="resource_types", lookup_expr="overlap")
@@ -266,10 +263,49 @@ def filter_resource_tag(self, queryset, name, value):
266263
return queryset.filter(overall_query).distinct()
267264

268265
def filter_check_title_icontains(self, queryset, name, value):
266+
# Resolve from the summary table (has check_title column + trigram
267+
# GIN index) instead of scanning JSON in the findings table.
268+
matching_check_ids = (
269+
FindingGroupDailySummary.objects.filter(
270+
check_title__icontains=value,
271+
)
272+
.values_list("check_id", flat=True)
273+
.distinct()
274+
)
275+
return queryset.filter(check_id__in=matching_check_ids)
276+
277+
# --- Resource subquery filters ---
278+
# Resolve resource → RFM → finding_ids first, then filter findings
279+
# by id__in. This avoids a 3-way JOIN driven from the (huge)
280+
# findings side and lets PostgreSQL start from the resources
281+
# unique-constraint index instead.
282+
283+
@staticmethod
284+
def _finding_ids_for_resources(**lookup):
285+
return ResourceFindingMapping.objects.filter(
286+
resource__in=Resource.objects.filter(**lookup).values("id")
287+
).values("finding_id")
288+
289+
def filter_resource_uid(self, queryset, name, value):
290+
return queryset.filter(id__in=self._finding_ids_for_resources(uid=value))
291+
292+
def filter_resource_uid_in(self, queryset, name, value):
293+
return queryset.filter(id__in=self._finding_ids_for_resources(uid__in=value))
294+
295+
def filter_resource_uid_icontains(self, queryset, name, value):
269296
return queryset.filter(
270-
Q(check_metadata__CheckTitle__icontains=value)
271-
| Q(check_metadata__checktitle__icontains=value)
272-
| Q(check_metadata__Checktitle__icontains=value)
297+
id__in=self._finding_ids_for_resources(uid__icontains=value)
298+
)
299+
300+
def filter_resource_name(self, queryset, name, value):
301+
return queryset.filter(id__in=self._finding_ids_for_resources(name=value))
302+
303+
def filter_resource_name_in(self, queryset, name, value):
304+
return queryset.filter(id__in=self._finding_ids_for_resources(name__in=value))
305+
306+
def filter_resource_name_icontains(self, queryset, name, value):
307+
return queryset.filter(
308+
id__in=self._finding_ids_for_resources(name__icontains=value)
273309
)
274310

275311

@@ -919,7 +955,19 @@ class Meta:
919955
}
920956

921957

922-
class FindingGroupSummaryFilter(FilterSet):
958+
class _CheckTitleToCheckIdMixin:
959+
"""Resolve check_title search to check_ids so all provider rows are kept."""
960+
961+
def filter_check_title_to_check_ids(self, queryset, name, value):
962+
matching_check_ids = (
963+
queryset.filter(check_title__icontains=value)
964+
.values_list("check_id", flat=True)
965+
.distinct()
966+
)
967+
return queryset.filter(check_id__in=matching_check_ids)
968+
969+
970+
class FindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
923971
"""
924972
Filter for FindingGroupDailySummary queries.
925973
@@ -942,9 +990,7 @@ class FindingGroupSummaryFilter(FilterSet):
942990
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
943991
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
944992
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
945-
check_title__icontains = CharFilter(
946-
field_name="check_title", lookup_expr="icontains"
947-
)
993+
check_title__icontains = CharFilter(method="filter_check_title_to_check_ids")
948994

949995
# Provider filters
950996
provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact")
@@ -1032,7 +1078,7 @@ def _maybe_date_to_datetime(value):
10321078
return dt
10331079

10341080

1035-
class LatestFindingGroupSummaryFilter(FilterSet):
1081+
class LatestFindingGroupSummaryFilter(_CheckTitleToCheckIdMixin, FilterSet):
10361082
"""
10371083
Filter for FindingGroupDailySummary /latest endpoint.
10381084
@@ -1044,9 +1090,7 @@ class LatestFindingGroupSummaryFilter(FilterSet):
10441090
check_id = CharFilter(field_name="check_id", lookup_expr="exact")
10451091
check_id__in = CharInFilter(field_name="check_id", lookup_expr="in")
10461092
check_id__icontains = CharFilter(field_name="check_id", lookup_expr="icontains")
1047-
check_title__icontains = CharFilter(
1048-
field_name="check_title", lookup_expr="icontains"
1049-
)
1093+
check_title__icontains = CharFilter(method="filter_check_title_to_check_ids")
10501094

10511095
# Provider filters
10521096
provider_id = UUIDFilter(field_name="provider_id", lookup_expr="exact")

api/src/backend/api/tests/test_views.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15810,6 +15810,50 @@ def test_finding_groups_check_title_icontains(
1581015810
assert len(data) == 1
1581115811
assert data[0]["id"] == "s3_bucket_public_access"
1581215812

15813+
@pytest.mark.parametrize(
15814+
"extra_filters",
15815+
[
15816+
{},
15817+
{"filter[muted]": "include"},
15818+
],
15819+
ids=["summary_path", "finding_level_path"],
15820+
)
15821+
def test_check_title_icontains_includes_all_title_variants(
15822+
self,
15823+
authenticated_client,
15824+
finding_groups_title_variants_fixture,
15825+
extra_filters,
15826+
):
15827+
"""
15828+
Regression: two providers report the same check_id with different
15829+
checktitle values (e.g. after a Prowler version upgrade). Filtering
15830+
by check_title__icontains with a term that matches only ONE variant
15831+
must still return the finding group with counts from BOTH providers.
15832+
15833+
Parametrized to cover both aggregation paths:
15834+
- summary_path: default, uses _CheckTitleToCheckIdMixin on summaries
15835+
- finding_level_path: filter[muted]=include forces CommonFindingFilters
15836+
"""
15837+
params = {
15838+
"filter[inserted_at]": TODAY,
15839+
"filter[check_title.icontains]": "Ensure repository",
15840+
**extra_filters,
15841+
}
15842+
response = authenticated_client.get(
15843+
reverse("finding-group-list"),
15844+
params,
15845+
)
15846+
assert response.status_code == status.HTTP_200_OK
15847+
data = response.json()["data"]
15848+
assert len(data) == 1
15849+
assert data[0]["id"] == "github_secret_scanning_enabled"
15850+
attrs = data[0]["attributes"]
15851+
# Both providers' findings must be counted
15852+
assert attrs["fail_count"] == 2, (
15853+
"fail_count must include findings from both providers, "
15854+
"regardless of which title variant matches the search"
15855+
)
15856+
1581315857
def test_resources_not_found(self, authenticated_client):
1581415858
"""Test 404 returned for nonexistent check_id."""
1581515859
response = authenticated_client.get(
@@ -15851,6 +15895,44 @@ def test_resources_fields(self, authenticated_client, finding_groups_fixture):
1585115895
assert resource.get("region"), "resource.region must not be empty"
1585215896
assert resource.get("type"), "resource.type must not be empty"
1585315897

15898+
def test_resources_resource_group(
15899+
self, authenticated_client, finding_groups_fixture
15900+
):
15901+
"""Test resource_group is extracted from check_metadata.resourcegroup."""
15902+
response = authenticated_client.get(
15903+
reverse(
15904+
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
15905+
),
15906+
{"filter[inserted_at]": TODAY},
15907+
)
15908+
assert response.status_code == status.HTTP_200_OK
15909+
data = response.json()["data"]
15910+
assert len(data) == 2
15911+
for item in data:
15912+
resource = item["attributes"]["resource"]
15913+
assert (
15914+
resource["resource_group"] == "storage"
15915+
), "resource_group must be 'storage'"
15916+
15917+
def test_resources_name_icontains(
15918+
self, authenticated_client, finding_groups_fixture
15919+
):
15920+
"""Test resource_name__icontains filters resources by name substring."""
15921+
# s3_bucket_public_access has "My Instance 1" and "My Instance 2"
15922+
response = authenticated_client.get(
15923+
reverse(
15924+
"finding-group-resources", kwargs={"pk": "s3_bucket_public_access"}
15925+
),
15926+
{
15927+
"filter[inserted_at]": TODAY,
15928+
"filter[resource_name.icontains]": "Instance 1",
15929+
},
15930+
)
15931+
assert response.status_code == status.HTTP_200_OK
15932+
data = response.json()["data"]
15933+
assert len(data) == 1
15934+
assert "Instance 1" in data[0]["attributes"]["resource"]["name"]
15935+
1585415936
def test_resources_provider_info(
1585515937
self, authenticated_client, finding_groups_fixture
1585615938
):

api/src/backend/api/v1/serializers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4194,6 +4194,7 @@ class JSONAPIMeta:
41944194
"service": {"type": "string"},
41954195
"region": {"type": "string"},
41964196
"type": {"type": "string"},
4197+
"resource_group": {"type": "string"},
41974198
},
41984199
}
41994200
)
@@ -4205,6 +4206,7 @@ def get_resource(self, obj):
42054206
"service": obj.get("resource_service", ""),
42064207
"region": obj.get("resource_region", ""),
42074208
"type": obj.get("resource_type", ""),
4209+
"resource_group": obj.get("resource_group", ""),
42084210
}
42094211

42104212
@extend_schema_field(

api/src/backend/api/v1/views.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3483,7 +3483,7 @@ def list(self, request, *args, **kwargs):
34833483
request,
34843484
filtered_queryset,
34853485
manager=Finding.all_objects,
3486-
select_related=["scan"],
3486+
select_related=["scan__provider"],
34873487
prefetch_related=["resources"],
34883488
)
34893489

@@ -3653,7 +3653,7 @@ def latest(self, request):
36533653
tenant_id = request.tenant_id
36543654
filtered_queryset = self.filter_queryset(self.get_queryset())
36553655

3656-
latest_scan_ids = (
3656+
latest_scan_ids = list(
36573657
Scan.all_objects.filter(tenant_id=tenant_id, state=StateChoices.COMPLETED)
36583658
.order_by("provider_id", "-inserted_at")
36593659
.distinct("provider_id")
@@ -3667,7 +3667,7 @@ def latest(self, request):
36673667
request,
36683668
filtered_queryset,
36693669
manager=Finding.all_objects,
3670-
select_related=["scan"],
3670+
select_related=["scan__provider"],
36713671
prefetch_related=["resources"],
36723672
)
36733673

@@ -6878,8 +6878,15 @@ def retrieve(self, request, *args, **kwargs):
68786878
"resource_type__icontains": "type__icontains",
68796879
}
68806880

6881+
# Fields accepted directly by LatestResourceFilter (no translation needed)
6882+
_RESOURCE_FILTER_FIELDS = {
6883+
f"{field}__{lookup}"
6884+
for field, lookups in LatestResourceFilter.Meta.fields.items()
6885+
for lookup in lookups
6886+
} | set(LatestResourceFilter.Meta.fields.keys())
6887+
68816888
def _split_resource_filters(self, params: QueryDict) -> tuple[QueryDict, QueryDict]:
6882-
resource_keys = set(self.RESOURCE_FILTER_MAP)
6889+
resource_keys = set(self.RESOURCE_FILTER_MAP) | self._RESOURCE_FILTER_FIELDS
68836890
finding_params = QueryDict(mutable=True)
68846891
resource_params = QueryDict(mutable=True)
68856892
for key, values in params.lists():
@@ -6900,11 +6907,16 @@ def _resource_ids_from_params(
69006907
queryset = queryset.filter(tenant_id=tenant_id)
69016908

69026909
filter_params = QueryDict(mutable=True)
6903-
for key, mapped_key in self.RESOURCE_FILTER_MAP.items():
6904-
if key not in params:
6910+
for key, values in params.lists():
6911+
# Translate resource_* prefixed keys via the map
6912+
if key in self.RESOURCE_FILTER_MAP:
6913+
mapped_key = self.RESOURCE_FILTER_MAP[key]
6914+
elif key in self._RESOURCE_FILTER_FIELDS:
6915+
mapped_key = key
6916+
else:
69056917
continue
6918+
69066919
if key == "resources" or key.endswith("__in"):
6907-
values = params.getlist(key)
69086920
items: list[str] = []
69096921
for value in values:
69106922
if value is None:
@@ -7232,11 +7244,13 @@ def _build_resource_aggregation(
72327244
),
72337245
first_seen_at=Min("finding__first_seen_at"),
72347246
last_seen_at=Max("finding__inserted_at"),
7235-
# Max() picks an arbitrary reason when a resource has multiple
7236-
# muted findings; this is acceptable because mute rules are
7237-
# applied per-check so all findings for the same resource
7238-
# share the same muted_reason in practice.
7247+
# Max() on muted_reason / check_metadata is safe because
7248+
# all findings for the same resource+check share identical
7249+
# values (mute rules and metadata are applied per-check).
72397250
muted_reason=Max("finding__muted_reason"),
7251+
resource_group=Max(
7252+
KeyTextTransform("resourcegroup", "finding__check_metadata")
7253+
),
72407254
)
72417255
.filter(resource_id__isnull=False)
72427256
.order_by("resource_id")
@@ -7273,6 +7287,7 @@ def _post_process_resources(self, resource_data):
72737287
"first_seen_at": row["first_seen_at"],
72747288
"last_seen_at": row["last_seen_at"],
72757289
"muted_reason": row.get("muted_reason"),
7290+
"resource_group": row.get("resource_group", ""),
72767291
}
72777292
)
72787293

@@ -7307,14 +7322,19 @@ def _build_aggregated_queryset(self, finding_params, latest=False):
73077322
raise ValidationError(filterset.errors)
73087323
filtered_queryset = filterset.qs
73097324
# Only include summaries from each provider's most recent date
7310-
# (within the filtered range)
7311-
filtered_queryset = filtered_queryset.annotate(
7325+
# (within the filtered range).
7326+
# We use a subquery to strip the Window annotation so it does not
7327+
# leak into the GROUP BY of _aggregate_daily_summaries.
7328+
latest_per_provider = filtered_queryset.annotate(
73127329
_max_provider_date=Window(
73137330
expression=Max("inserted_at"),
73147331
partition_by=[F("provider_id")],
73157332
),
73167333
).filter(inserted_at=F("_max_provider_date"))
7317-
return self._aggregate_daily_summaries(filtered_queryset)
7334+
clean_queryset = FindingGroupDailySummary.objects.filter(
7335+
pk__in=latest_per_provider.values("pk")
7336+
)
7337+
return self._aggregate_daily_summaries(clean_queryset)
73187338

73197339
def _sorted_paginated_response(self, request, aggregated_queryset):
73207340
"""Apply ordering, pagination, post-processing, and return the Response."""

0 commit comments

Comments
 (0)