From e42e3864e79d387eba9d9f9e436e4d39d56646b1 Mon Sep 17 00:00:00 2001 From: polrus Date: Wed, 6 May 2026 15:21:10 +0100 Subject: [PATCH] feat(gsea): add pathway genes to GSEA results --- app/services/gsea.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/app/services/gsea.py b/app/services/gsea.py index b86142f..1d1aeb1 100644 --- a/app/services/gsea.py +++ b/app/services/gsea.py @@ -244,10 +244,13 @@ def run_gsea_from_dataframe( else: res_df["Link"] = "https://www.ebi.ac.uk/chembl/visualise" - # --- Size = number of genes defined in GMT --- + # --- Size and full gene list from GMT --- res_df["Pathway size"] = res_df["ID"].map( lambda x: len(id_to_genes.get(x, [])) if pd.notna(x) and x != "" else 0 ) + res_df["Pathway genes"] = res_df["ID"].map( + lambda x: ",".join(id_to_genes.get(x, [])) if pd.notna(x) and x != "" else "" + ) rename_map = { "Term": "Pathway", @@ -274,7 +277,8 @@ def run_gsea_from_dataframe( res_df.groupby( [ "ID", "Link", "Pathway", "ES", "NES", "FDR", "p-value", - "Sidak's p-value", "Number of input genes", "Leading edge genes", "Pathway size", + "Sidak's p-value", "Number of input genes", "Leading edge genes", + "Pathway size", "Pathway genes", ], dropna=False, )["Parent pathway"] @@ -310,7 +314,7 @@ def safe_int_col(df_, col_name): }) # Ensure string columns are properly handled - string_columns = ['Leading edge genes', 'Parent pathway'] + string_columns = ['Leading edge genes', 'Pathway genes', 'Parent pathway'] for col in string_columns: if col in res_df.columns: res_df[col] = res_df[col].astype(str).replace('nan', '')