Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bc170bb
Initial plan
Copilot May 11, 2026
02ba4f7
Add abundance-profile dendrogram refinement for metabolite grouping
Copilot May 11, 2026
b4775a9
Address validation feedback in abundance grouping helper
Copilot May 11, 2026
f4c00f6
Polish abundance grouping helper robustness and clarity
Copilot May 11, 2026
2c51ed5
Apply minor cleanup from review feedback
Copilot May 11, 2026
e26d2f7
Add processing-tab controls for abundance similarity grouping
Copilot May 11, 2026
875b256
Refine naming for abundance threshold in grouping flow
Copilot May 11, 2026
b800c82
Add abundance profiles tab in experiment results
Copilot May 11, 2026
ffe00cf
Polish abundance profile plot axis label handling
Copilot May 11, 2026
670e617
Refine abundance plotting and zero-aware abundance grouping
Copilot May 11, 2026
6a8aba3
Refine post-reintegration grouping and boxplot grouping layout
Copilot May 11, 2026
ec98811
Address validation feedback on grouping and plot code clarity
Copilot May 11, 2026
1a3446b
Polish constant naming and grouping comments
Copilot May 11, 2026
3454eb5
Document grouping helpers and centralize boxplot constants
Copilot May 11, 2026
508a5f7
Refactor UI components and enhance functionality
chrboku May 11, 2026
2fea1cb
Implement sample peaks tab with navigation and dynamic plotting
chrboku May 11, 2026
5cab61c
minor ui adaption
chrboku May 12, 2026
330c387
minor adaptions
chrboku May 13, 2026
33e6c64
minor adaptions
chrboku May 15, 2026
192f304
added test import for databases
chrboku May 15, 2026
35e6524
minor adaption
chrboku May 15, 2026
81bcc9e
Add MSMS similarity tools, overview, and flexible MGF export
Copilot May 17, 2026
bcba628
Polish MSMS export/similarity code review feedback
Copilot May 17, 2026
930446a
Refine MSMS export code readability and consistency
Copilot May 17, 2026
8c77bf4
Tidy MSMS helper naming and filename sanitization
Copilot May 17, 2026
e8fa614
- improved summary dialog
chrboku May 19, 2026
3c9b563
performance improvements for grouping
chrboku May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 0 additions & 16 deletions generateGUIS.bat

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies = [
"context>=0.1.0",
"polars>=1.39.3",
"desktop-notifier>=6.2.0",
"matchms>=0.33.0",
]

[project.optional-dependencies]
Expand Down
3,568 changes: 2,742 additions & 826 deletions src/MExtract.py

Large diffs are not rendered by default.

68 changes: 58 additions & 10 deletions src/annotateResultMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def annotateWithDatabases(
processedElement,
pwMaxSet=None,
pwValSet=None,
db_info_messages=None,
):
"""
Annotate metabolites by searching in databases using PolarsDB.
Expand All @@ -167,7 +168,7 @@ def annotateWithDatabases(

Args:
file: Path to the results file (PolarsDB format)
sheet_name: Name of the sheet to read from (e.g., "4_Reintegrated")
sheet_name: Name of the sheet to read from (e.g., "3_Reintegrated")
new_sheet_name: Name of the sheet to write to (e.g., "6_Annotated")
dbFiles: List of database file paths
useAdducts: List of adduct definitions [[name, mzoffset, polarity, charge, mCount], ...]
Expand All @@ -180,6 +181,8 @@ def annotateWithDatabases(
processedElement: Element to check in formulas (e.g., "C")
pwMaxSet: Progress callback for max value
pwValSet: Progress callback for current value
db_info_messages: Optional list; if provided, import summary messages are appended to it
(used to write the DB_info log sheet)

Returns:
List of annotation column names added
Expand All @@ -203,21 +206,31 @@ def annotateWithDatabases(
dbNames = []

# Import database files and collect database names
logging.info(f"Importing {len(dbFiles)} database file(s)")
logging.info(f"\n\n#########################################\nImporting {len(dbFiles)} database file(s)")
for dbFile in dbFiles:
logging.info(f"\n-------------------------\nImporting database file: {dbFile}")
dbName = dbFile[dbFile.rfind("/") + 1 : dbFile.rfind(".")]
dbNames.append(dbName)
errors = []
try:
imported, notImported = db.addEntriesFromFile(dbName, dbFile)
if notImported > 0:
logging.warning(f"Warning: {notImported} entries from database '{dbName}' were not imported successfully")
dbNames.append(dbName)
logging.info(f" Imported {imported} entries from database '{dbName}'")
imported, not_imported = db.addEntriesFromFile(dbName, dbFile, error_collector=errors)
if db_info_messages is not None:
db_info_messages.append(f"Database: {dbName} (file: {dbFile})")
db_info_messages.append(f" Imported: {imported} entries successfully")
if not_imported > 0:
db_info_messages.append(f" Not imported: {not_imported} entries due to errors")
for err in errors:
db_info_messages.append(f" {err}")
except IOError as e:
logging.error(f"Cannot open database file '{dbName}' at '{dbFile}': {e}")
logging.error(f" - Cannot process database file '{dbName}' at '{dbFile}': {e}")
if db_info_messages is not None:
db_info_messages.append(f"Database: {dbName} (file: {dbFile})")
db_info_messages.append(f" Fatal error: {e}")
continue
logging.info(f"\nFinished importing databases. Total imported entries: MZ: {len(db.dbEntriesMZ)}, Neutral: {len(db.dbEntriesNeutral)}")

# Optimize database for searching
logging.info("Optimizing database for searching")
logging.info("\nOptimizing database for searching")
db.optimizeDB()

# Add all necessary annotation columns BEFORE searching
Expand Down Expand Up @@ -363,7 +376,7 @@ def searchForRow(self, row):
if pwMaxSet is not None:
pwMaxSet(total_rows)

logging.info(f"Searching database hits for {total_rows} metabolites")
logging.info(f"Searching database hits for {total_rows} metabolites, parameters are ppm: {ppm}, correctppmPosMode: {correctppmPosMode}, correctppmNegMode: {correctppmNegMode}, rtError: {rtError}, useRt: {useRt}, checkXnInHits: {checkXnInHits}, processedElement: {processedElement}")

# Collect all hits for compound-focused sheet
all_compound_hits = []
Expand All @@ -378,6 +391,8 @@ def searchForRow(self, row):
# Search for database hits
hits_per_db, hit_objects = searcher.searchForRow(row)

print(f"Row {row_idx + 1}/{total_rows}, mz {row.get('MZ')}, rt {row.get('RT')}, charge {row.get('Charge')}, polarity {row.get('Ionisation_Mode')}, Xn {row.get('Xn')}\n - Found hits in {len(hits_per_db)} databases")

# Update only if there are hits
if hits_per_db:
for dbName, hit_data in hits_per_db.items():
Expand Down Expand Up @@ -483,6 +498,39 @@ def searchForRow(self, row):
return annotationColumns


def testDatabaseImports(dbFiles):
"""
Test-import database files and return per-db import results without writing to any output file.

Args:
dbFiles: List of database file paths

Returns:
List of dicts: [{'db_name': str, 'db_file': str, 'imported': int, 'not_imported': int, 'errors': list[str]}, ...]
"""
results = []
for dbFile in dbFiles:
dbName = dbFile[dbFile.rfind("/") + 1 : dbFile.rfind(".")]
errors = []
db = searchDatabases.DBSearch()
try:
imported, not_imported = db.addEntriesFromFile(dbName, dbFile, error_collector=errors)
except Exception as e:
errors.insert(0, f"Fatal error: {e}")
imported = 0
not_imported = 0
results.append(
{
"db_name": dbName,
"db_file": dbFile,
"imported": imported,
"not_imported": not_imported,
"errors": errors,
}
)
return results


def annotateWithSumFormulas(
file,
sheet_name,
Expand Down
Loading