Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions app/services/gsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,13 @@ def run_gsea_from_dataframe(
else:
res_df["Link"] = "https://www.ebi.ac.uk/chembl/visualise"

# --- Size = number of genes defined in GMT ---
# --- Size and full gene list from GMT ---
res_df["Pathway size"] = res_df["ID"].map(
lambda x: len(id_to_genes.get(x, [])) if pd.notna(x) and x != "" else 0
)
res_df["Pathway genes"] = res_df["ID"].map(
lambda x: ",".join(id_to_genes.get(x, [])) if pd.notna(x) and x != "" else ""
)

rename_map = {
"Term": "Pathway",
Expand All @@ -274,7 +277,8 @@ def run_gsea_from_dataframe(
res_df.groupby(
[
"ID", "Link", "Pathway", "ES", "NES", "FDR", "p-value",
"Sidak's p-value", "Number of input genes", "Leading edge genes", "Pathway size",
"Sidak's p-value", "Number of input genes", "Leading edge genes",
"Pathway size", "Pathway genes",
],
dropna=False,
)["Parent pathway"]
Expand Down Expand Up @@ -310,7 +314,7 @@ def safe_int_col(df_, col_name):
})

# Ensure string columns are properly handled
string_columns = ['Leading edge genes', 'Parent pathway']
string_columns = ['Leading edge genes', 'Pathway genes', 'Parent pathway']
for col in string_columns:
if col in res_df.columns:
res_df[col] = res_df[col].astype(str).replace('nan', '')
Expand Down
Loading