chrboku · Copilot · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/docs/screenshots/experiment-results-abundance-profiles-tab.png b/docs/screenshots/experiment-results-abundance-profiles-tab.png
diff --git a/docs/screenshots/experiment-results-msms-tools.png b/docs/screenshots/experiment-results-msms-tools.png
diff --git a/docs/screenshots/processing-tab-abundance-similarity-controls.png b/docs/screenshots/processing-tab-abundance-similarity-controls.png
diff --git a/generateGUIS.bat b/generateGUIS.bat
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ dependencies = [
     "context>=0.1.0",
     "polars>=1.39.3",
     "desktop-notifier>=6.2.0",
+    "matchms>=0.33.0",
 ]
 
 [project.optional-dependencies]

diff --git a/src/MExtract.py b/src/MExtract.py
diff --git a/src/annotateResultMatrix.py b/src/annotateResultMatrix.py
@@ -153,6 +153,7 @@ def annotateWithDatabases(
     processedElement,
     pwMaxSet=None,
     pwValSet=None,
+    db_info_messages=None,
 ):
     """
     Annotate metabolites by searching in databases using PolarsDB.
@@ -167,7 +168,7 @@ def annotateWithDatabases(
 
     Args:
         file: Path to the results file (PolarsDB format)
-        sheet_name: Name of the sheet to read from (e.g., "4_Reintegrated")
+        sheet_name: Name of the sheet to read from (e.g., "3_Reintegrated")
         new_sheet_name: Name of the sheet to write to (e.g., "6_Annotated")
         dbFiles: List of database file paths
         useAdducts: List of adduct definitions [[name, mzoffset, polarity, charge, mCount], ...]
@@ -180,6 +181,8 @@ def annotateWithDatabases(
         processedElement: Element to check in formulas (e.g., "C")
         pwMaxSet: Progress callback for max value
         pwValSet: Progress callback for current value
+        db_info_messages: Optional list; if provided, import summary messages are appended to it
+            (used to write the DB_info log sheet)
 
     Returns:
         List of annotation column names added
@@ -203,21 +206,31 @@ def annotateWithDatabases(
     dbNames = []
 
     # Import database files and collect database names
-    logging.info(f"Importing {len(dbFiles)} database file(s)")
+    logging.info(f"\n\n#########################################\nImporting {len(dbFiles)} database file(s)")
     for dbFile in dbFiles:
+        logging.info(f"\n-------------------------\nImporting database file: {dbFile}")
         dbName = dbFile[dbFile.rfind("/") + 1 : dbFile.rfind(".")]
+        dbNames.append(dbName)
+        errors = []
         try:
-            imported, notImported = db.addEntriesFromFile(dbName, dbFile)
-            if notImported > 0:
-                logging.warning(f"Warning: {notImported} entries from database '{dbName}' were not imported successfully")
-            dbNames.append(dbName)
-            logging.info(f"  Imported {imported} entries from database '{dbName}'")
+            imported, not_imported = db.addEntriesFromFile(dbName, dbFile, error_collector=errors)
+            if db_info_messages is not None:
+                db_info_messages.append(f"Database: {dbName} (file: {dbFile})")
+                db_info_messages.append(f"  Imported: {imported} entries successfully")
+                if not_imported > 0:
+                    db_info_messages.append(f"  Not imported: {not_imported} entries due to errors")
+                for err in errors:
+                    db_info_messages.append(f"  {err}")
         except IOError as e:
-            logging.error(f"Cannot open database file '{dbName}' at '{dbFile}': {e}")
+            logging.error(f"   - Cannot process database file '{dbName}' at '{dbFile}': {e}")
+            if db_info_messages is not None:
+                db_info_messages.append(f"Database: {dbName} (file: {dbFile})")
+                db_info_messages.append(f"  Fatal error: {e}")
             continue
+    logging.info(f"\nFinished importing databases. Total imported entries: MZ: {len(db.dbEntriesMZ)}, Neutral: {len(db.dbEntriesNeutral)}")
 
     # Optimize database for searching
-    logging.info("Optimizing database for searching")
+    logging.info("\nOptimizing database for searching")
     db.optimizeDB()
 
     # Add all necessary annotation columns BEFORE searching
@@ -363,7 +376,7 @@ def searchForRow(self, row):
     if pwMaxSet is not None:
         pwMaxSet(total_rows)
 
-    logging.info(f"Searching database hits for {total_rows} metabolites")
+    logging.info(f"Searching database hits for {total_rows} metabolites, parameters are ppm: {ppm}, correctppmPosMode: {correctppmPosMode}, correctppmNegMode: {correctppmNegMode}, rtError: {rtError}, useRt: {useRt}, checkXnInHits: {checkXnInHits}, processedElement: {processedElement}")
 
     # Collect all hits for compound-focused sheet
     all_compound_hits = []
@@ -378,6 +391,8 @@ def searchForRow(self, row):
         # Search for database hits
         hits_per_db, hit_objects = searcher.searchForRow(row)
 
+        print(f"Row {row_idx + 1}/{total_rows}, mz {row.get('MZ')}, rt {row.get('RT')}, charge {row.get('Charge')}, polarity {row.get('Ionisation_Mode')}, Xn {row.get('Xn')}\n   - Found hits in {len(hits_per_db)} databases")
+
         # Update only if there are hits
         if hits_per_db:
             for dbName, hit_data in hits_per_db.items():
@@ -483,6 +498,39 @@ def searchForRow(self, row):
     return annotationColumns
 
 
+def testDatabaseImports(dbFiles):
+    """
+    Test-import database files and return per-db import results without writing to any output file.
+
+    Args:
+        dbFiles: List of database file paths
+
+    Returns:
+        List of dicts: [{'db_name': str, 'db_file': str, 'imported': int, 'not_imported': int, 'errors': list[str]}, ...]
+    """
+    results = []
+    for dbFile in dbFiles:
+        dbName = dbFile[dbFile.rfind("/") + 1 : dbFile.rfind(".")]
+        errors = []
+        db = searchDatabases.DBSearch()
+        try:
+            imported, not_imported = db.addEntriesFromFile(dbName, dbFile, error_collector=errors)
+        except Exception as e:
+            errors.insert(0, f"Fatal error: {e}")
+            imported = 0
+            not_imported = 0
+        results.append(
+            {
+                "db_name": dbName,
+                "db_file": dbFile,
+                "imported": imported,
+                "not_imported": not_imported,
+                "errors": errors,
+            }
+        )
+    return results
+
+
 def annotateWithSumFormulas(
     file,
     sheet_name,