Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions ami/main/management/commands/import_taxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ class Command(BaseCommand):
This is a very specific command for importing taxa from an exiting format. A more general
import command with support for all taxon ranks & fields should be written.


@TODO: Add --project parameter(s) to scope the taxa list to specific projects.
This would allow multiple projects to have taxa lists with the same name.
Usage would be: --project project-slug --project another-project-slug

Example taxa.json
```
Expand Down Expand Up @@ -234,7 +236,8 @@ def handle(self, *args, **options):
else:
list_name = pathlib.Path(fname).stem

taxalist, created = TaxaList.objects.get_or_create(name=list_name)
# Uses get_or_create_for_project with project=None to create a global list
taxalist, created = TaxaList.objects.get_or_create_for_project(name=list_name, project=None)
if created:
self.stdout.write(self.style.SUCCESS('Successfully created taxa list "%s"' % taxalist))

Expand Down
7 changes: 6 additions & 1 deletion ami/main/management/commands/update_taxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class Command(BaseCommand):
```

You can include any column that exists in the Taxon model.

@TODO: Add --project parameter(s) to scope the taxa list to specific projects.
This would allow multiple projects to have taxa lists with the same name.
Usage would be: --project project-slug --project another-project-slug
"""

help = "Update existing taxa with new data from a CSV file."
Expand Down Expand Up @@ -123,10 +127,11 @@ def handle(self, *args, **options):
incoming_taxa = read_csv(fname)

# Get or create taxa list if specified
# Uses get_or_create_for_project with project=None to create a global list
taxalist = None
if options["list"]:
list_name = options["list"]
taxalist, created = TaxaList.objects.get_or_create(name=list_name)
taxalist, created = TaxaList.objects.get_or_create_for_project(name=list_name, project=None)
if created:
self.stdout.write(self.style.SUCCESS(f"Created new taxa list '{list_name}'"))
else:
Expand Down
40 changes: 40 additions & 0 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3609,6 +3609,44 @@ def save(self, update_calculated_fields=True, *args, **kwargs):
self.update_calculated_fields(save=True)


class TaxaListQuerySet(BaseQuerySet):
def get_or_create_for_project(
self, name: str, project: "Project | None" = None, **defaults
) -> tuple["TaxaList", bool]:
"""
Get or create a TaxaList with uniqueness scoped to project.

- If project is None: looks for/creates a global list (no project associations)
- If project is provided: looks for/creates a list associated with that project

Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The **defaults parameter is documented in the type signature but not used in the 'get' path (line 3633). This is intentional since defaults only apply during creation, but could be confusing. Consider adding a docstring parameter description explaining that defaults are only used when creating a new TaxaList, not when retrieving an existing one.

Suggested change
Parameters:
name: The name of the taxa list to look up or create.
project: The project whose namespace the taxa list belongs to, or None
for a global (project-less) taxa list.
**defaults: Extra fields used only when creating a new TaxaList. If a
matching TaxaList already exists, these values are ignored and the
existing instance is returned unchanged.

Copilot uses AI. Check for mistakes.
Returns:
Tuple of (TaxaList, created: bool)
"""
if project is None:
# Global list: find list with this name that has no project associations
qs = self.filter(name=name).annotate(project_count=models.Count("projects")).filter(project_count=0)
else:
# Project-specific: find list with this name in this project
qs = self.filter(name=name, projects=project)

try:
return qs.get(), False
except self.model.DoesNotExist:
taxa_list = self.create(name=name, **defaults)
if project:
taxa_list.projects.add(project)
return taxa_list, True
except self.model.MultipleObjectsReturned:
# Handle existing duplicates gracefully - return the oldest one
taxa_list = qs.order_by("created_at").first()
assert taxa_list is not None # We know there's at least one
return taxa_list, False
Comment on lines +3625 to +3643
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method has a potential race condition. Between the DoesNotExist check and the create operation (followed by projects.add), another thread could create the same TaxaList, leading to duplicates. Consider wrapping the entire try-except block in a transaction.atomic() context manager and using select_for_update() on the queryset to prevent race conditions. Alternatively, handle IntegrityError if a unique constraint is added in the future.

Suggested change
if project is None:
# Global list: find list with this name that has no project associations
qs = self.filter(name=name).annotate(project_count=models.Count("projects")).filter(project_count=0)
else:
# Project-specific: find list with this name in this project
qs = self.filter(name=name, projects=project)
try:
return qs.get(), False
except self.model.DoesNotExist:
taxa_list = self.create(name=name, **defaults)
if project:
taxa_list.projects.add(project)
return taxa_list, True
except self.model.MultipleObjectsReturned:
# Handle existing duplicates gracefully - return the oldest one
taxa_list = qs.order_by("created_at").first()
assert taxa_list is not None # We know there's at least one
return taxa_list, False
with transaction.atomic():
if project is None:
# Global list: find list with this name that has no project associations
qs = (
self.filter(name=name)
.annotate(project_count=models.Count("projects"))
.filter(project_count=0)
)
else:
# Project-specific: find list with this name in this project
qs = self.filter(name=name, projects=project)
try:
# Lock matching rows, if any, to avoid concurrent modifications
return qs.select_for_update().get(), False
except self.model.DoesNotExist:
try:
taxa_list = self.create(name=name, **defaults)
except IntegrityError:
# Another transaction may have created the same TaxaList concurrently.
# Re-fetch and return the existing instance.
taxa_list = qs.get()
return taxa_list, False
else:
if project:
taxa_list.projects.add(project)
return taxa_list, True
except self.model.MultipleObjectsReturned:
# Handle existing duplicates gracefully - return the oldest one
taxa_list = qs.order_by("created_at").first()
assert taxa_list is not None # We know there's at least one
return taxa_list, False

Copilot uses AI. Check for mistakes.
Comment on lines +3613 to +3643
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new get_or_create_for_project method lacks test coverage. Consider adding tests that verify: 1) Creating a new global list (project=None), 2) Creating a new project-specific list, 3) Retrieving an existing global list, 4) Retrieving an existing project-specific list, 5) Handling duplicate names gracefully by returning the oldest, and 6) Ensuring that global lists and project-specific lists with the same name are properly distinguished.

Copilot uses AI. Check for mistakes.


class TaxaListManager(models.Manager.from_queryset(TaxaListQuerySet)):
pass


@final
class TaxaList(BaseModel):
"""A checklist of taxa"""
Expand All @@ -3619,6 +3657,8 @@ class TaxaList(BaseModel):
taxa = models.ManyToManyField(Taxon, related_name="lists")
projects = models.ManyToManyField("Project", related_name="taxa_lists")

objects: TaxaListManager = TaxaListManager()

class Meta:
ordering = ["-created_at"]
verbose_name_plural = "Taxa Lists"
Expand Down
3 changes: 2 additions & 1 deletion ami/ml/models/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,9 @@ def get_or_create_taxon_for_classification(

:return: The Taxon object
"""
taxa_list, created = TaxaList.objects.get_or_create(
taxa_list, created = TaxaList.objects.get_or_create_for_project(
name=f"Taxa returned by {algorithm.name}",
project=None, # Algorithm taxa lists are global
)
if created:
logger.info(f"Created new taxa list {taxa_list}")
Expand Down