Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 187 additions & 6 deletions src/sentry/preprod/size_analysis/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
FileSavingsResultGroup,
FilesInsightResult,
GroupsInsightResult,
ImageOptimizationInsightResult,
OptimizableImageFile,
StripBinaryFileInfo,
StripBinaryInsightResult,
)
from sentry.preprod.size_analysis.models import (
ComparisonResults,
Expand Down Expand Up @@ -546,6 +550,138 @@ def _compare_groups(
return diff_items


def _compare_optimizable_images(
head_files: list[OptimizableImageFile], base_files: list[OptimizableImageFile]
) -> list[DiffItem]:
"""Compare optimizable image files between head and base builds."""
Copy link
Member

@rbro112 rbro112 Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Would recommend removing these type of comments, usually they're not adding anything since the function is pretty self-explanatory

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point.

diff_items = []

head_files_dict = {f.file_path: f for f in head_files}
base_files_dict = {f.file_path: f for f in base_files}

all_paths = set(head_files_dict.keys()) | set(base_files_dict.keys())

for path in sorted(all_paths):
head_file = head_files_dict.get(path)
base_file = base_files_dict.get(path)

if head_file and base_file:
# File exists in both - check for savings change
size_diff = head_file.potential_savings - base_file.potential_savings

# Skip files with no savings change
if size_diff == 0:
continue

diff_type = DiffType.INCREASED if size_diff > 0 else DiffType.DECREASED

diff_items.append(
DiffItem(
size_diff=size_diff,
head_size=head_file.potential_savings,
base_size=base_file.potential_savings,
path=path,
item_type=None,
type=diff_type,
diff_items=None,
)
)
elif head_file:
# File added in head
diff_items.append(
DiffItem(
size_diff=head_file.potential_savings,
head_size=head_file.potential_savings,
base_size=None,
path=path,
item_type=None,
type=DiffType.ADDED,
diff_items=None,
)
)
elif base_file:
# File removed from head
diff_items.append(
DiffItem(
size_diff=-base_file.potential_savings,
head_size=None,
base_size=base_file.potential_savings,
path=path,
item_type=None,
type=DiffType.REMOVED,
diff_items=None,
)
)

return diff_items


def _compare_strip_binary_files(
head_files: list[StripBinaryFileInfo], base_files: list[StripBinaryFileInfo]
) -> list[DiffItem]:
"""Compare strip binary files between head and base builds."""
diff_items = []

head_files_dict = {f.file_path: f for f in head_files}
base_files_dict = {f.file_path: f for f in base_files}

all_paths = set(head_files_dict.keys()) | set(base_files_dict.keys())

for path in sorted(all_paths):
head_file = head_files_dict.get(path)
base_file = base_files_dict.get(path)

if head_file and base_file:
# File exists in both - check for savings change
size_diff = head_file.total_savings - base_file.total_savings

# Skip files with no savings change
if size_diff == 0:
continue

diff_type = DiffType.INCREASED if size_diff > 0 else DiffType.DECREASED

diff_items.append(
DiffItem(
size_diff=size_diff,
head_size=head_file.total_savings,
base_size=base_file.total_savings,
path=path,
item_type=None,
type=diff_type,
diff_items=None,
)
)
elif head_file:
# File added in head
diff_items.append(
DiffItem(
size_diff=head_file.total_savings,
head_size=head_file.total_savings,
base_size=None,
path=path,
item_type=None,
type=DiffType.ADDED,
diff_items=None,
)
)
elif base_file:
# File removed from head
diff_items.append(
DiffItem(
size_diff=-base_file.total_savings,
head_size=None,
base_size=base_file.total_savings,
path=path,
item_type=None,
type=DiffType.REMOVED,
diff_items=None,
)
)

return diff_items


def _diff_insight(
insight_type: str,
head_insight: BaseInsightResult | None,
Expand All @@ -564,6 +700,16 @@ def _diff_insight(
base_files = base_insight.files if isinstance(base_insight, FilesInsightResult) else []
head_groups = []
base_groups = base_insight.groups if isinstance(base_insight, GroupsInsightResult) else []
head_optimizable_images = []
base_optimizable_images = (
base_insight.optimizable_files
if isinstance(base_insight, ImageOptimizationInsightResult)
else []
)
head_strip_binary_files = []
base_strip_binary_files = (
base_insight.files if isinstance(base_insight, StripBinaryInsightResult) else []
)
elif base_insight is None:
# Should never happen, but here for mypy passing
assert head_insight is not None
Expand All @@ -574,6 +720,16 @@ def _diff_insight(
base_files = []
head_groups = head_insight.groups if isinstance(head_insight, GroupsInsightResult) else []
base_groups = []
head_optimizable_images = (
head_insight.optimizable_files
if isinstance(head_insight, ImageOptimizationInsightResult)
else []
)
base_optimizable_images = []
head_strip_binary_files = (
head_insight.files if isinstance(head_insight, StripBinaryInsightResult) else []
)
base_strip_binary_files = []
else:
status = "unresolved"
# Unresolved insight - compare both
Expand All @@ -582,9 +738,25 @@ def _diff_insight(
base_files = base_insight.files if isinstance(base_insight, FilesInsightResult) else []
head_groups = head_insight.groups if isinstance(head_insight, GroupsInsightResult) else []
base_groups = base_insight.groups if isinstance(base_insight, GroupsInsightResult) else []
head_optimizable_images = (
Copy link
Member

@rbro112 rbro112 Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be better as a follow up, but all these assignments might be better suited if we instead just check the instance type of the overall insight where our current file/group checks are (line 761). So something more like

if isinstance(head_insight, FilesInsightResults) and isinstance(base_insight, FilesInsightResults): 
  # Handle files insight
elif isinstance(head_insight, GroupsInsightsResults) ... :
  ...
elif ... 

I know you're just following existing patterns here, so no worries, but could be a nice cleanup now that there's more branching logic

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I saw that the existing pattern wasn't scaling well. Good call.

head_insight.optimizable_files
if isinstance(head_insight, ImageOptimizationInsightResult)
else []
)
base_optimizable_images = (
base_insight.optimizable_files
if isinstance(base_insight, ImageOptimizationInsightResult)
else []
)
head_strip_binary_files = (
head_insight.files if isinstance(head_insight, StripBinaryInsightResult) else []
)
base_strip_binary_files = (
base_insight.files if isinstance(base_insight, StripBinaryInsightResult) else []
)

file_diffs = []
group_diffs = []
file_diffs: list[DiffItem] = []
group_diffs: list[DiffItem] = []

if head_files or base_files:
file_diffs = _compare_files(head_files, base_files)
Expand All @@ -596,8 +768,16 @@ def _diff_insight(
# If no group diffs, we don't want to show an irrelevant insight diff item
if len(group_diffs) == 0:
return None

# TODO(EME-678) Implement non-File/GroupinsightsResult insight diffs in future
elif head_optimizable_images or base_optimizable_images:
file_diffs = _compare_optimizable_images(head_optimizable_images, base_optimizable_images)
# If no file diffs, we don't want to show an irrelevant insight diff item
if len(file_diffs) == 0:
return None
elif head_strip_binary_files or base_strip_binary_files:
file_diffs = _compare_strip_binary_files(head_strip_binary_files, base_strip_binary_files)
# If no file diffs, we don't want to show an irrelevant insight diff item
if len(file_diffs) == 0:
return None

return InsightDiffItem(
insight_type=insight_type,
Expand All @@ -621,11 +801,12 @@ def _compare_insights(
return insight_diff_items

# Convert insights to dictionaries for easier comparison
# Use isinstance check to filter out non-insight fields like 'platform' discriminator
head_insight_dict = (
{
field_name: value
for field_name, value in vars(head_insights).items()
if value is not None
if isinstance(value, BaseInsightResult)
}
if head_insights
else {}
Expand All @@ -634,7 +815,7 @@ def _compare_insights(
{
field_name: value
for field_name, value in vars(base_insights).items()
if value is not None
if isinstance(value, BaseInsightResult)
}
if base_insights
else {}
Expand Down
10 changes: 8 additions & 2 deletions src/sentry/preprod/size_analysis/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from enum import Enum
from typing import Annotated, Literal

from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, Field

from sentry.preprod.models import PreprodArtifactSizeMetrics
from sentry.preprod.size_analysis.insight_models import (
Expand Down Expand Up @@ -31,6 +32,7 @@


class AndroidInsightResults(BaseModel):
platform: Literal["android"] = "android"
duplicate_files: DuplicateFilesInsightResult | None = None
webp_optimization: WebPOptimizationInsightResult | None = None
large_images: LargeImageFileInsightResult | None = None
Expand All @@ -41,6 +43,7 @@ class AndroidInsightResults(BaseModel):


class AppleInsightResults(BaseModel):
platform: Literal["apple"] = "apple"
duplicate_files: DuplicateFilesInsightResult | None = None
large_images: LargeImageFileInsightResult | None = None
large_audios: LargeAudioFileInsightResult | None = None
Expand Down Expand Up @@ -134,7 +137,10 @@ class SizeAnalysisResults(BaseModel):
download_size: int
install_size: int
treemap: TreemapResults | None = None
insights: AndroidInsightResults | AppleInsightResults | None = None
insights: (
Annotated[AndroidInsightResults | AppleInsightResults, Field(discriminator="platform")]
| None
) = None
Comment on lines +140 to +143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The SizeAnalysisResults model now requires a platform field. Parsing old analysis data lacking this field via parse_raw() will raise an unhandled ValidationError, crashing the task.
Severity: HIGH

Suggested Fix

To ensure backward compatibility, add try/except ValidationError blocks around the parse_raw() calls to gracefully handle parsing failures for old data. The exception handler could attempt to re-parse the data with a default platform or log the failure without crashing the entire task. Alternatively, adjust the Pydantic model to handle the absence of the platform field in old data.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/preprod/size_analysis/models.py#L140-L143

Potential issue: The introduction of a Pydantic discriminated union on the `insights`
field in `SizeAnalysisResults` makes the `platform` field a required discriminator.
However, historical size analysis data stored in the database lacks this field. When a
size comparison task is run against an old baseline, the call to
`SizeAnalysisResults.parse_raw()` will fail with a `ValidationError`. Since these calls
are not wrapped in error handling blocks, the task will crash, leading to failed
analysis comparisons and incomplete results.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a valid issue. The platform field has a default value on both models:

class AndroidInsightResults(BaseModel):
    platform: Literal["android"] = "android"

class AppleInsightResults(BaseModel):
    platform: Literal["apple"] = "apple"

When the discriminator field is missing from historical data, Pydantic does not raise a ValidationError. Instead, it falls back to left-to-right union validation and uses the default value.

Verified in Django shell:

>>> old_data = '{"analysis_duration": 0.5, "insights": {"image_optimization": {"total_savings": 1200, "optimizable_files": []}}}'
>>> result = SizeAnalysisResults.parse_raw(old_data)
>>> result.insights
AndroidInsightResults(platform='android', ...)  # Parses successfully, no crash

The behavior is:

  • Old data (no platform field): Parses successfully, maps to AndroidInsightResults, iOS-specific fields silently dropped (existing behavior)
  • New data (with platform field): Parses successfully, correctly maps to the right model type

This is the intended backwards-compatible behavior documented in the PR.

analysis_version: str | None = None
file_analysis: FileAnalysis | None = None
app_components: list[AppComponent] | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const FILE_DIFF_INSIGHT_TYPES = [
'multiple_native_library_archs',
'video_compression',
'image_optimization',
'strip_binary',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing frontend entry for alternate_icons_optimization insight

Medium Severity

The backend now generates file-level diffs for ImageOptimizationInsightResult, which is used by both image_optimization and alternate_icons_optimization fields in AppleInsightResults. However, only 'strip_binary' was added to FILE_DIFF_INSIGHT_TYPES in the frontend - 'alternate_icons_optimization' is missing. When comparison data for alternate icons optimization exists, the frontend will silently drop it (returning null in the map), and users won't see the insight despite the backend generating valid comparison data.

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ticket only talks to LocalizedStringInsightResult, ImageOptimizationInsightResult and StripBinaryInsightResult

];

const GROUP_DIFF_INSIGHT_TYPES = ['duplicate_files', 'loose_images'];
Expand Down
Loading
Loading