Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 157 additions & 15 deletions src/firetower/incidents/hooks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from dataclasses import dataclass
from typing import Any

from django.conf import settings
from django.contrib.auth.models import User
Expand All @@ -15,6 +16,7 @@
)
from firetower.integrations.services import (
DatadogService,
LinearService,
PagerDutyService,
SlackService,
)
Expand Down Expand Up @@ -857,6 +859,139 @@
logger.exception(f"Failed to post to feed channel for {ctx.channel_name}")


def _linear_issue_title(incident: Incident) -> str:
if incident.is_private:
return f"[{incident.incident_number}] Private Incident"
return f"[{incident.incident_number}] {incident.title}"


def _sync_linear_title(incident: Incident) -> None:
if not settings.LINEAR or not incident.linear_parent_issue_id:
return
try:
Comment thread
sentry[bot] marked this conversation as resolved.
linear_service = LinearService()
linear_service.update_issue(
incident.linear_parent_issue_id,
title=_linear_issue_title(incident),
)
except Exception:
logger.exception(
f"Failed to update Linear issue title for incident {incident.id}"
)


LINEAR_PARENT_DESCRIPTION = (
"Relate action items to this ticket to have them tracked by Firetower. "
"Child issues or other relations (related, blocking, etc.) will all work. "
"Do not update title or captain here, use Firetower for that."
)


MAX_CLAIM_ATTEMPTS = 5


def _claim_linear_issue(
Comment thread
spalmurray marked this conversation as resolved.
linear_service: LinearService,
incident: Incident,
team_id: str,
project_id: str | None,
) -> dict[str, Any] | None:
identifier = incident.incident_number

for _ in range(MAX_CLAIM_ATTEMPTS):
issue = linear_service.get_issue(identifier)
Comment thread
spalmurray marked this conversation as resolved.
if issue:
return issue
# Created issue may get a different identifier; discard it and retry lookup
Comment thread
sentry[bot] marked this conversation as resolved.
result = linear_service.create_issue("Placeholder", "", team_id, project_id)
if not result:
logger.warning(
f"Failed to create placeholder Linear issue for {identifier}"
)

Check warning on line 910 in src/firetower/incidents/hooks.py

View check run for this annotation

@sentry/warden / warden: code-review

_claim_linear_issue leaks up to N-1 orphaned Linear issues per incident

Each loop iteration that calls `create_issue` but then finds a mismatched identifier discards the created placeholder without deleting it, leaving abandoned Linear issues. With `MAX_CLAIM_ATTEMPTS = 5`, up to 4 orphaned placeholder issues can be created per incident if Linear assigns sequential identifiers slowly.
return None
Comment thread
spalmurray marked this conversation as resolved.

Comment thread
spalmurray marked this conversation as resolved.
return None


def create_linear_parent_issue(incident: Incident) -> None:
linear_config = settings.LINEAR
if not linear_config:
return
team_id = str(linear_config.get("TEAM_ID", ""))
if not team_id:
return

linear_link, created = ExternalLink.objects.get_or_create(
incident=incident,
type=ExternalLinkType.LINEAR,
defaults={"url": ""},

Check warning on line 927 in src/firetower/incidents/hooks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

TOCTOU race allows duplicate Linear parent issues when `linear_parent_issue_id` is not yet persisted

The guard on line 927 checks the stale in-memory `incident.linear_parent_issue_id` rather than the DB value; a second concurrent call sees `created=False` (ExternalLink already exists) but `incident.linear_parent_issue_id` is still `None`, so it bypasses the guard and creates a second Linear issue. Add `incident.refresh_from_db(fields=["linear_parent_issue_id"])` before the guard check.
)
if not created and incident.linear_parent_issue_id:
logger.info(f"Incident {incident.id} already has a Linear link, skipping")
return
Comment thread
cursor[bot] marked this conversation as resolved.
Comment thread
spalmurray marked this conversation as resolved.

try:
linear_service = LinearService()
project_id = str(linear_config.get("PROJECT_ID", "")) or None
sync_identifiers = linear_config.get("SYNC_IDENTIFIERS", False)
title = _linear_issue_title(incident)

if sync_identifiers:
issue = _claim_linear_issue(linear_service, incident, team_id, project_id)
if not issue:
linear_link.delete()
logger.warning(
f"Failed to claim Linear issue for incident {incident.id}"
)
return

states = linear_service.get_workflow_states(team_id)
started_state_id = states.get("started") if states else None
if not linear_service.update_issue(
issue["id"],
title=title,
description=LINEAR_PARENT_DESCRIPTION,
state_id=started_state_id,
):
linear_link.delete()
logger.warning(
f"Failed to update claimed Linear issue for incident {incident.id}"
)
return
else:
issue = linear_service.create_issue(
title, LINEAR_PARENT_DESCRIPTION, team_id, project_id
)
if not issue:
linear_link.delete()
logger.warning(
f"Failed to create Linear issue for incident {incident.id}"
)
return
Comment thread
sentry[bot] marked this conversation as resolved.
Comment thread
sentry[bot] marked this conversation as resolved.

linear_link.url = issue["url"]
linear_link.save(update_fields=["url"])

incident.linear_parent_issue_id = issue["id"]
incident.save(update_fields=["linear_parent_issue_id"])
except Exception:
linear_link.delete()
logger.exception(
f"Failed to create Linear parent issue for incident {incident.id}"
)
return

try:
incident_url = _build_incident_url(incident)
linear_service.create_attachment(
issue["id"], incident_url, f"Firetower: {incident.incident_number}"
)
except Exception:
logger.exception(
f"Failed to create Linear attachment for incident {incident.id}"
)


def on_incident_created(incident: Incident) -> None:
# Use get_or_create to atomically claim the ExternalLink row before calling
# the Slack API. If two concurrent requests both reach this point, only one
Expand Down Expand Up @@ -962,7 +1097,14 @@
# appears first in the channel.
_create_datadog_notebook(incident, channel_id)
_create_troubleshooting_doc(incident, channel_id)

try:

Check warning on line 1101 in src/firetower/incidents/hooks.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[4P8-6WD] TOCTOU race allows duplicate Linear parent issues when `linear_parent_issue_id` is not yet persisted (additional location)

The guard on line 927 checks the stale in-memory `incident.linear_parent_issue_id` rather than the DB value; a second concurrent call sees `created=False` (ExternalLink already exists) but `incident.linear_parent_issue_id` is still `None`, so it bypasses the guard and creates a second Linear issue. Add `incident.refresh_from_db(fields=["linear_parent_issue_id"])` before the guard check.
create_linear_parent_issue(incident)
except Exception:
logger.exception(
f"Failed to create Linear parent issue for incident {incident.id}"
)


def on_status_changed(incident: Incident, old_status: str) -> None:
channel_id: str | None = None
Expand Down Expand Up @@ -1046,31 +1188,31 @@
def on_title_changed(incident: Incident) -> None:
try:
channel_id = _get_channel_id(incident)
if not channel_id:
return

_slack_service.set_channel_topic(channel_id, build_channel_topic(incident))
if channel_id:
_slack_service.set_channel_topic(channel_id, build_channel_topic(incident))
except Exception:
logger.exception(f"Error in on_title_changed for incident {incident.id}")

_sync_linear_title(incident)


def on_visibility_changed(incident: Incident) -> None:
try:
channel_id = _get_channel_id(incident)
if not channel_id:
return

visibility = "private" if incident.is_private else "public"
incident_url = _build_incident_url(incident)
message = (
f"This incident has been marked as *{visibility}* in Firetower. "
f"If you want to make this channel {visibility}, you will need a Slack admin to make the change.\n"
f"<{incident_url}|View in Firetower>"
)
_slack_service.post_message(channel_id, message)
if channel_id:
visibility = "private" if incident.is_private else "public"
incident_url = _build_incident_url(incident)
message = (
f"This incident has been marked as *{visibility}* in Firetower. "
f"If you want to make this channel {visibility}, you will need a Slack admin to make the change.\n"
f"<{incident_url}|View in Firetower>"
)
_slack_service.post_message(channel_id, message)
except Exception:
logger.exception(f"Error in on_visibility_changed for incident {incident.id}")

_sync_linear_title(incident)


def on_captain_changed(incident: Incident) -> None:
try:
Expand Down
12 changes: 10 additions & 2 deletions src/firetower/incidents/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,16 @@
stats = ActionItemsSyncStats()

if not incident.linear_parent_issue_id:
stats.skipped = True
return stats
# Backfill for incidents created before Linear integration

Check warning on line 219 in src/firetower/incidents/services.py

View check run for this annotation

@sentry/warden / warden: code-review

Concurrent calls to sync_action_items_from_linear can create duplicate Linear parent issues

Two concurrent requests (e.g. simultaneous GET `/action-items/` calls for the same incident) both see `linear_parent_issue_id` as `None`, both enter the backfill branch, and both call `create_linear_parent_issue`. The guard inside that function (`not created and incident.linear_parent_issue_id`) uses the stale in-memory `incident` object, so the second caller proceeds past it and creates a second Linear issue. Consider adding a `select_for_update()` on the incident row before the backfill check, or re-fetching `linear_parent_issue_id` from the DB inside `create_linear_parent_issue` instead of checking the in-memory value.
from firetower.incidents.hooks import ( # noqa: PLC0415
create_linear_parent_issue,
)

create_linear_parent_issue(incident)
incident.refresh_from_db(fields=["linear_parent_issue_id"])

Check warning on line 225 in src/firetower/incidents/services.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[4P8-6WD] TOCTOU race allows duplicate Linear parent issues when `linear_parent_issue_id` is not yet persisted (additional location)

The guard on line 927 checks the stale in-memory `incident.linear_parent_issue_id` rather than the DB value; a second concurrent call sees `created=False` (ExternalLink already exists) but `incident.linear_parent_issue_id` is still `None`, so it bypasses the guard and creates a second Linear issue. Add `incident.refresh_from_db(fields=["linear_parent_issue_id"])` before the guard check.
if not incident.linear_parent_issue_id:
stats.skipped = True
return stats
Comment thread
spalmurray marked this conversation as resolved.

if not force and incident.action_items_last_synced_at:
time_since_sync = timezone.now() - incident.action_items_last_synced_at
Expand Down
Loading
Loading