diff --git a/assets/NO_FILE b/assets/DUMMY_MSA similarity index 100% rename from assets/NO_FILE rename to assets/DUMMY_MSA diff --git a/assets/NO_FILE_PAE b/assets/DUMMY_PAE similarity index 100% rename from assets/NO_FILE_PAE rename to assets/DUMMY_PAE diff --git a/assets/comparison_template.html b/assets/comparison_template.html deleted file mode 100644 index fffe4b133..000000000 --- a/assets/comparison_template.html +++ /dev/null @@ -1,820 +0,0 @@ - - - - - - - Protein structure comparison - - - - - - - - - - - - - - - -
- -
- -
- - - -
- -
- - - - - -
- -
- -
- -
-
Navigation
-
-
- Scroll up/down - to zoom in and out -
-
- Click + drag - to rotate the structure -
-
- CTRL + click + drag - to move the structure -
-
- Click - an atom to bring it into focus -
-
-
-
-
Display
-
- - -
-
-
-
- -
-
-
- -
-
    -
    - -
    -
    -
    Information
    -
    -
    Program: *prog_name*
    -
    ID: *sample_name*
    -
    - Average pLDDT: - -
    -
    -
    -
    -
    Download
    -
    - - -
    -
    -
    -
    -
    -
    pLDDT
    -
    -
    -
    -
    -
    -
    -
    Sequence Coverage
    -
    -
    - -
    - -
    -
    -
    - - - -
    -
    -

    - The Australian BioCommons - is supported by - Bioplatforms Australia -

    -

    - Bioplatforms Australia - is enabled by - NCRIS -

    -
    -
    -
    - - - diff --git a/assets/report_template.html b/assets/report_template.html index 48f644970..71031d0f2 100644 --- a/assets/report_template.html +++ b/assets/report_template.html @@ -2,9 +2,8 @@ - - Protein structure prediction + Protein structure report - + - - -
    @@ -293,12 +244,12 @@
    Information
    -
    Program: *prog_name*
    -
    ID: *sample_name*
    +
    Program:
    +
    ID:
    Average pLDDT: - +
    @@ -423,66 +374,44 @@ -
    -
    -
    -
    -
    -
    -
    - - +
    +
    Sequence Coverage – MSA
    -
    - -
    +
    +
    -
    +
    Residue confidence - pLDDT
    -
    -
    -
    +
    -
    -
    + +
    Residue-pair alignment error - PAE
    -
    -
    -
    -
    +
    +
    +
    @@ -539,7 +468,7 @@
    ' + html = html.replace('', f'{config_script}\n', 1) + + # Generate sequence coverage plot from first MSA file + seq_cov_html = None + if msa_files: + # Filter out tools that don't generate MSAs (e.g. ESMFold) - if MSA file is a dummy placeholder, skip the section entirely + + valid_msa = [(m, _tool_program_label(m)) for m in msa_files if not os.path.basename(m).startswith("DUMMY_")] + + if valid_msa: + seq_cov_sections = [] + for msa_file, tool_label in valid_msa: + seq_cov_fig = generate_sequence_coverage_plot(msa_file) + # In comparison mode, label each coverage plot with its tool name + if report_type == "comparison" and len(valid_msa) > 1: + seq_cov_fig.update_layout( + title=dict(text=f"Sequence Coverage — {tool_label}") + ) + seq_cov_sections.append( + seq_cov_fig.to_html( + full_html=False, + include_plotlyjs="cdn", + config=PLOTLY_CONFIG, + ) + ) + seq_cov_html = "\n".join(seq_cov_sections) + + # Replace or remove optional sections + if seq_cov_html: + html = html.replace('
    ', seq_cov_html, 1) else: - for i, plddt_values_str in enumerate(output_data): - plddt_per_model[i] = [] - plddt_per_model[i] = [float(x) for x in plddt_values_str.strip().split()] + html = re.sub(r'.*?', '', html, flags=re.DOTALL) - fig = go.Figure() - for idx, (model_name, value_plddt) in enumerate(plddt_per_model.items()): - rank_label = os.path.splitext(pdb[idx])[0] - fig.add_trace( - go.Scatter( - x=list(range(len(value_plddt))), - y=value_plddt, - mode="lines", - name=rank_label, - text=[f"({i}, {value:.2f})" for i, value in enumerate(value_plddt)], - hoverinfo="text", - ) - ) - fig.update_layout( - title=dict(text="Predicted LDDT per position", x=0.5, xanchor="center"), - xaxis=dict( - title="Positions", showline=True, linecolor="black", gridcolor="WhiteSmoke", minallowed=0, maxallowed=len(value_plddt)-1 - ), - yaxis=dict( - title="Predicted LDDT", - range=[0, 100], - fixedrange=True, - showline=True, - linecolor="black", - gridcolor="WhiteSmoke", - ), - legend=dict(yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1), - plot_bgcolor="white", - width=600, - height=600, - modebar_remove=["toImage", "zoomIn", "zoomOut"], - ) - html_content = fig.to_html( + # Generate the pLDDT plot and convert to HTML + plddt_fig = generate_plddt_plot(parsed_structures, labels=model_labels) + plddt_html = plddt_fig.to_html( full_html=False, include_plotlyjs="cdn", - config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + config=PLOTLY_CONFIG, ) + html = html.replace('
    ', plddt_html, 1) - with open( - f"{out_dir}/{name+('_' if name else '')}coverage_LDDT.html", "w" - ) as out_file: - out_file.write(html_content) - - if args.pae and not args.pae.endswith('NO_FILE_PAE'): - pae_fig = generate_pae_plot(args.pae, out_dir, name) - pae_html_content = pae_fig.to_html( + # Generate PAE plot from first PAE file (TODO: toggle PAE with model selection), Not used in comparison report + if pae_files: + pae_fig = generate_pae_plot(pae_files[0]) + pae_html = pae_fig.to_html( full_html=False, include_plotlyjs="cdn", - config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, - ) - with open( - f"{out_dir}/{name+('_' if name else '')}PAE.html", "w" - ) as pae_out_file: - pae_out_file.write(pae_html_content) - -def generate_plots(msa_path, plddt_paths, name, out_dir): - msa = [] - with open(msa_path, "r") as in_file: - for line in in_file: - msa.append([int(x) for x in line.strip().split()]) - - seqid = [] - for sequence in msa: - matches = [ - 1.0 if first == other else 0.0 for first, other in zip(msa[0], sequence) - ] - seqid.append(sum(matches) / len(matches)) - - seqid_sort = sorted(range(len(seqid)), key=seqid.__getitem__) - - non_gaps = [] - for sequence in msa: - non_gaps.append( - [float(num != 21) if num != 21 else float("nan") for num in sequence] - ) - - sorted_non_gaps = [non_gaps[i] for i in seqid_sort] - final = [] - for sorted_seq, identity in zip(sorted_non_gaps, [seqid[i] for i in seqid_sort]): - final.append( - [ - value * identity if not isinstance(value, str) else value - for value in sorted_seq - ] + config=PLOTLY_CONFIG, ) - - # Plotting Sequence Coverage using Plotly - fig = go.Figure() - fig.add_trace( - go.Heatmap( - z=final, - colorscale="Rainbow", - zmin=0, - zmax=1, - ) - ) - fig.update_layout( - title="Sequence coverage", xaxis_title="Positions", yaxis_title="Sequences" - ) - # Save as interactive HTML instead of an image - fig.savefig(f"{out_dir}/{name+('_' if name else '')}seq_coverage.png") - - # Plotting Predicted LDDT per position using Plotly - plddt_per_model = OrderedDict() - plddt_paths.sort() - for plddt_path in plddt_paths: - with open(plddt_path, "r") as in_file: - plddt_per_model[os.path.basename(plddt_path)[:-4]] = [ - float(x) for x in in_file.read().strip().split() - ] - - i = 0 - for model_name, value_plddt in plddt_per_model.items(): - fig = go.Figure() - fig.add_trace( - go.Scatter( - x=list(range(len(value_plddt))), - y=value_plddt, - mode="lines", - name=model_name, - ) - ) - fig.update_layout(title="Predicted LDDT per Position") - fig.savefig(f"{out_dir}/{name+('_' if name else '')}coverage_LDDT_{i}.png") - i += 1 - - - -def align_structures(structures): - parser = PDB.PDBParser(QUIET=True) - structures = [ - parser.get_structure(f"Structure_{i}", pdb) for i, pdb in enumerate(structures) - ] - ref_structure = structures[0] - - common_atoms = set( - f"{atom.get_parent().get_parent().get_id()}-{atom.get_parent().get_id()[1]}-{atom.name}" - for atom in ref_structure.get_atoms() if not atom.element == 'H' - ) - #print(common_atoms) - for i, structure in enumerate(structures[1:], start=1): - common_atoms = common_atoms.intersection( - set( - f"{atom.get_parent().get_parent().get_id()}-{atom.get_parent().get_id()[1]}-{atom.name}" - for atom in structure.get_atoms() - ) - ) - - ref_atoms = [ - atom - for atom in ref_structure.get_atoms() - if f"{atom.get_parent().get_parent().get_id()}-{atom.get_parent().get_id()[1]}-{atom.name}" in common_atoms - ] - # print(ref_atoms) - super_imposer = PDB.Superimposer() - aligned_structures = [structures[0]] # Include the reference structure in the list - - for i, structure in enumerate(structures[1:], start=1): - target_atoms = [ - atom - for atom in structure.get_atoms() - if f"{atom.get_parent().get_parent().get_id()}-{atom.get_parent().get_id()[1]}-{atom.name}" in common_atoms - ] - - super_imposer.set_atoms(ref_atoms, target_atoms) - super_imposer.apply(structure.get_atoms()) - - aligned_structure = f"aligned_structure_{i}.pdb" - io = PDB.PDBIO() - io.set_structure(structure) - io.save(aligned_structure) - aligned_structures.append(aligned_structure) - - return aligned_structures - - -def pdb_to_lddt(struct_files, generate_tsv): - struct_files_sorted = struct_files - struct_files_sorted.sort() - - output_lddt = [] - averages = [] - - for struct_file in struct_files_sorted: - plddt_values = [] - - if struct_file.endswith('.pdb'): - parser = PDB.PDBParser(QUIET=True) - suffix = ".pdb" - elif struct_file.endswith('.cif'): - parser = PDB.MMCIFParser(QUIET=True) - suffix = ".cif" - else: - raise NotImplementedError("Reporting only supported for .pdb and .cif filetypes") - structure = parser.get_structure("", struct_file) - - for residue in structure.get_residues(): - res_pLDDT_tot = 0 - res_atom_count = 0 - - for atom in residue.get_atoms(): - res_atom_count +=1 - res_pLDDT_tot += atom.get_bfactor() - - # Residue-level mean for ESMfold atom-level pLDDT - res_pLDDT_ave = res_pLDDT_tot/res_atom_count - - if res_pLDDT_ave < 1.0: - res_pLDDT_ave *= 100 - plddt_values.append(res_pLDDT_ave) - - # Calculate the average PLDDT value for the current file - if plddt_values: - avg_plddt = sum(plddt_values) / len(plddt_values) - averages.append(round(avg_plddt, 3)) - else: - averages.append(0.0) - - if generate_tsv == "y": - output_file = f"{struct_file.replace(suffix, '')}_plddt.tsv" - with open(output_file, "w") as outfile: - outfile.write(" ".join(map(str, plddt_values)) + "\n") - output_lddt.append(output_file) - else: - plddt_values_string = " ".join(map(str, plddt_values)) - output_lddt.append(plddt_values_string) - - return output_lddt, averages - - -print("Starting...") - -version = "1.0.0" -model_name = { - "esmfold": "ESMFold", - "alphafold2": "AlphaFold2", - "alphafold3": "Alphafold3", - "colabfold": "ColabFold", - "rosettafold_all_atom": "RosettaFold All-Atom", - "helixfold3": "HelixFold3", - "rosettafold2na": "RoseTTAFold2NA", - "boltz": "Boltz" -} - -parser = argparse.ArgumentParser() -parser.add_argument("--type", dest="in_type") -parser.add_argument( - "--generate_tsv", choices=["y", "n"], default="n", dest="generate_tsv" -) -parser.add_argument("--msa", dest="msa", default="NO_FILE") -parser.add_argument("--pdb", dest="pdb", required=True, nargs="+") -parser.add_argument("--pae", dest="pae", default="NO_FILE") -parser.add_argument("--name", dest="name") -parser.add_argument("--output_dir", dest="output_dir") -parser.add_argument("--html_template", dest="html_template") -parser.add_argument("--version", action="version", version=f"{version}") -parser.set_defaults(output_dir="") -parser.set_defaults(in_type="esmfold") -parser.set_defaults(name="") -args = parser.parse_args() - -lddt_data, lddt_averages = pdb_to_lddt(args.pdb, args.generate_tsv) - -generate_output_images( - args.msa, lddt_data, args.name, args.output_dir, args.in_type, args.generate_tsv, args.pdb -) - -print("generating html report...") -structures = args.pdb -structures.sort() -aligned_structures = align_structures(structures) - -io = PDB.PDBIO() -ref_structure_path = "aligned_structure_0.pdb" -io.set_structure(aligned_structures[0]) -io.save(ref_structure_path) -aligned_structures[0] = ref_structure_path - -proteinfold_template = open(args.html_template, "r").read() -proteinfold_template = proteinfold_template.replace("*sample_name*", args.name) -proteinfold_template = proteinfold_template.replace( - "*prog_name*", model_name[args.in_type.lower()] -) - -args_pdb_array_js = ",\n".join([f'"{model}"' for model in structures]) -proteinfold_template = re.sub( - r"const MODELS = \[.*?\];", # Match the existing MODELS array in HTML template - f"const MODELS = [\n {args_pdb_array_js}\n];", # Replace with the new array - proteinfold_template, - flags=re.DOTALL, -) - -averages_js_array = f"const LDDT_AVERAGES = {lddt_averages};" -proteinfold_template = proteinfold_template.replace( - "const LDDT_AVERAGES = [];", averages_js_array -) - -i = 0 -for structure in aligned_structures: - proteinfold_template = proteinfold_template.replace( - f"*_data_ranked_{i}.pdb*", open(structure, "r").read().replace("\n", "\\n") - ) - i += 1 - -if not args.msa.endswith("NO_FILE"): - image_path = f"{args.output_dir}/{args.name}_{args.in_type}_seq_coverage.png" - with open(image_path, "rb") as in_file: - proteinfold_template = proteinfold_template.replace( - "seq_coverage.png", - f"data:image/png;base64,{base64.b64encode(in_file.read()).decode('utf-8')}", - ) -else: - pattern = r'
    .*?(.*?)*?
    \s*
    \s*
    \s*' - proteinfold_template = re.sub(pattern, "", proteinfold_template, flags=re.DOTALL) - -with open( - f"{args.output_dir}/{args.name + ('_' if args.name else '')}coverage_LDDT.html", - "r", -) as in_file: - lddt_html = in_file.read() - proteinfold_template = proteinfold_template.replace( - '
    ', lddt_html + html = html.replace('
    ', pae_html, 1) + else: + html = re.sub(r'.*?', '', html, flags=re.DOTALL) + + # Write the final HTML report + with open(f"{out_dir}/{name}_{report_type}_report.html", "w") as out_file: + out_file.write(html) + +def main(): + parser = argparse.ArgumentParser(description="Generate protein structure reports.") + parser.add_argument("--name", required=True, help="Name of the report.") + parser.add_argument("--output_dir", required=True, help="Output directory for the report.") + parser.add_argument("--structs", required=True, nargs="+", help="List of structure file paths (.pdb or .cif).") + parser.add_argument("--msa", nargs="+", default=None, help="MSA file path(s).") + parser.add_argument("--pae", nargs="+", default=None, help="PAE file path(s).") + parser.add_argument("--prog", default="proteinfold", choices=["proteinfold", "alphafold2", "alphafold3", "esmfold", "colabfold", "rosettafold-all-atom", "rosettafold2na", "helixfold3", "boltz", "comparison"], type=str.lower, help="The program used to generate the structures.") + parser.add_argument("--report_type", default="standard", choices=["standard", "comparison"], help="The type of report to generate.") + parser.add_argument("--html_template", required=True, help="Path to the HTML report template.") + + args = parser.parse_args() + + print("Generating report.....") + + html_template = args.html_template + + ## Both these values could be missing - ESMFold for MSA, many others for PAE + #if args.msa and os.path.basename(args.msa[0]).startswith("DUMMY_"): + # args.msa = None + #if args.pae and os.path.basename(args.pae[0]).startswith("DUMMY_"): + # args.pae = None + ## But caught by a more broad catch-all below for any future metrics, just MSA and PAE are the most explicit cases so left as examples + for attr in vars(args): + val = getattr(args, attr) + if isinstance(val, list) and val and os.path.basename(val[0]).startswith("DUMMY_"): + setattr(args, attr, None) + + generate_report( + name=args.name, + out_dir=args.output_dir, + structures=args.structs, + num_structs_limit=5, + msa_files=args.msa, + pae_files=args.pae, + prog=args.prog, + report_type=args.report_type, + html_template=html_template, ) -if not args.pae.endswith("NO_FILE_PAE"): - with open( - f"{args.output_dir}/{args.name + ('_' if args.name else '')}PAE.html", - "r", - ) as pae_in_file: - pae_html = pae_in_file.read() - proteinfold_template = proteinfold_template.replace( - '
    ', pae_html - ) -else: - pattern = r'
    .*?(.*?)*?
    \s*' - proteinfold_template = re.sub(pattern, "", proteinfold_template, flags=re.DOTALL) - -with open( - f"{args.output_dir}/{args.name}_{args.in_type.lower()}_report.html", "w" -) as out_file: - out_file.write(proteinfold_template) +if __name__ == "__main__": + main() diff --git a/bin/plot_utils.py b/bin/plot_utils.py new file mode 100644 index 000000000..bf85049ac --- /dev/null +++ b/bin/plot_utils.py @@ -0,0 +1,333 @@ +import plotly.graph_objects as go +from Bio import PDB +from io import StringIO +import numpy as np +import os + +def structure_to_pdb_string(structure): + """Serialize a BioPython Structure object to a PDB-format string in memory. + Useful util to work with object directly and not have to write intermediate to disk + """ + io = PDB.PDBIO() + io.set_structure(structure) + string_io = StringIO() + io.save(string_io) + return string_io.getvalue() + + +def reset_residue_numbers(structure): + """ + Resets residue numbering in a PDB file, because ESMFold starts renumbering + at 1 for each chain and increments only when encountering a new residue. + """ + if str(structure).endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif str(structure).endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + + struct_obj = parser.get_structure("structure", structure) + + for model in struct_obj: + for chain in model: + for idx, residue in enumerate(chain.get_residues(), start=1): + # Do a swap in place to renumber the residue, the other entries in the tuple can stay the same + # See: https://biopython.org/docs/1.76/api/Bio.PDB.Chain.html#Bio.PDB.Chain.Chain.__getitem__ + het_atom, _, insertion_code = residue.get_id() + residue.id = (het_atom, idx, insertion_code) + + return struct_obj + +# TODO: Barcelona team to implement AF3 +def sort_structures_by_rank(structures, prog): + """ + Sorts a list of structures based on their rank. Handles different program naming conventions. + + Returns: + List of structure files sorted by rank (always returns list, even for single structures) + """ + + #TODO: some new modes don't have rank sorting logic implemented yet, *i.e.* rosettafold2na + if prog == "alphafold2": + # AlphaFold2 structures are named with [run]/ranked_[rank].pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).replace('ranked_', '').split('.')[0])) + elif prog == "colabfold": + # ColabFold structures are named with [run]_unrelaxed_rank_[rank]_alphafold2_ptm_model_[num]_seed_[seed].pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_')[3])) + elif prog == "helixfold3": + # HelixFold3 structures are named with .../[run]/[run]-rank[rank]/predicted_structure.pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.dirname(x).split('rank')[-1])) + elif prog == "boltz": + # Boltz structures are named with ..._model_[diffusion_samples-1].[pdb|cif] + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_model_')[-1].split('.')[0])) + elif prog == "esmfold" or prog == "rosettafold-all-atom": + # ESMFold and RoseTTAFold only produce one structure + sorted_structures = structures if isinstance(structures, list) else [structures] + else: + print(f"Warning: Sorting not implemented for {prog}. Using original order.") + sorted_structures = structures if isinstance(structures, list) else [structures] + + return sorted_structures if isinstance(sorted_structures, list) else [sorted_structures] + +def align_structures(structures): + """ + Align multiple structures against the first (reference) structure. + Uses common atoms for superimposition (handles cases where structures aren't complete). + + Returns: + List of BioPython structure objects aligned to the first structure + """ + if not structures: + raise ValueError("No structures provided for alignment.") + + parsed_structures = [] + # Conceivably there could be a mix of structure file types, particularly in comparison mode + for idx, structure in enumerate(structures): + if structure.endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif structure.endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + parsed_structures.append(parser.get_structure(f"structure-{idx}", structure)) + + ref_structure = parsed_structures[0] + + def get_atom_ids(structure): + # Note: this is a *set* of atom_ids due to the {} surrounding the comprehension + return {(atom.get_parent().get_parent().get_id(), atom.get_parent().get_id(), atom.name) for atom in structure.get_atoms() if atom.element != 'H'} + + # Find common atoms across all structures (progressive intersection) + # This allows alignment even if structures are incomplete or have different atom coverage + common_atoms = get_atom_ids(ref_structure) + for structure in parsed_structures[1:]: + common_atoms.intersection_update(get_atom_ids(structure)) + + if not common_atoms: + raise ValueError("No common atoms found between structures for alignment.") + + def extract_atoms(structure, atom_ids): + # Must return a sorted list (not set) so ref/target atoms correspond positionally + atoms = [atom for atom in structure.get_atoms() + if (atom.get_parent().get_parent().get_id(), atom.get_parent().get_id(), atom.name) in atom_ids] + return sorted(atoms, key=lambda a: (a.get_parent().get_parent().get_id(), a.get_parent().get_id(), a.name)) + + ref_atoms = extract_atoms(ref_structure, common_atoms) + + # The aligned structures will be the parsed structures aligned to the common atoms of the reference structure + super_imposer = PDB.Superimposer() + aligned_structures = [ref_structure] # Reference needs no alignment + for idx, structure in enumerate(parsed_structures[1:], start=1): + target_atoms = extract_atoms(structure, common_atoms) + super_imposer.set_atoms(list(ref_atoms), list(target_atoms)) + super_imposer.apply(structure.get_atoms()) + aligned_structures.append(structure) + + return aligned_structures + +def plddt_from_struct_b_factor(structure): + """ + Extracts residue pLDDT values from the b-factor column using BioPython. + Accepts either a file path (str/Path) or a pre-parsed BioPython Structure object. + """ + if isinstance(structure, (str, os.PathLike)): + if str(structure).endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif str(structure).endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + struct_obj = parser.get_structure(os.path.basename(str(structure)), str(structure)) + else: + # Already a BioPython structure object + struct_obj = structure + + res_plddts = [] + + for model in struct_obj: + for chain in model: + for residue in chain: + atom_list = residue.get_unpacked_list() + atom_plddt_tot = 0 + # Handle both atom-wise and residue-wise pLDDT values + for atom in residue: + atom_plddt = atom.get_bfactor() + atom_plddt_tot += atom_plddt + + res_plddt = float(atom_plddt_tot / len(atom_list)) if atom_list else 0.0 + + # Ensure values are in [0, 100] range + if res_plddt < 1: + res_plddt *= 100 + + res_plddts.append(res_plddt) + + res_plddts = np.array(res_plddts) + res_plddts = np.round(res_plddts, 2) + + return res_plddts + +def generate_plddt_plot(structures, labels=None): + """ + Generate a Plotly figure for pLDDT per position for given structures. + """ + # Support labelling from external scheme, otherwise default to Rank order-based labels + if labels is None: + labels = [f"Rank {idx}" for idx in range(len(structures))] + + fig = go.Figure() + max_residues = 0 + for idx, struct in enumerate(structures): + plddts = plddt_from_struct_b_factor(struct) + max_residues = max(max_residues, len(plddts)) + fig.add_trace( + go.Scatter( + x=list(range(len(plddts))), + y=plddts, + mode="lines", + name=labels[idx], + text=[f"({pos}, {value:.2f})" for pos, value in enumerate(plddts)], + hoverinfo="text", + ) + ) + fig.update_layout( + xaxis=dict( + title="Residue position", showline=True, linecolor="black", gridcolor="WhiteSmoke", + minallowed=0, + maxallowed=max_residues - 1, #prevent scrolling past residues, just zoom-ins. Max across all structures is just being very defensive in case of tool differences (though shouldn't happen) + ), + yaxis=dict( + title="pLDDT", + range=[0, 100], + minallowed=0, + maxallowed=100, #prevent scrolling, just zoom-ins + showline=True, + linecolor="black", + gridcolor="WhiteSmoke", + ), + legend=dict( + yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1 + ), + plot_bgcolor="white", + autosize=True, + ) + + return fig + +def process_msas(msa_path): + msa = np.loadtxt(msa_path, dtype=int) + + query_sequence = msa[0] + seqid_match = np.mean(msa == query_sequence, axis=1) + + # Sort sequences by sequence identity + seqid_sort_indices = np.argsort(seqid_match) + sorted_msa = msa[seqid_sort_indices] + sorted_seqid = seqid_match[seqid_sort_indices] + + non_gaps_msas = np.where(sorted_msa != 21, 1.0, np.nan) + + # Scale non-gap positions by sequence identity + final_msas = non_gaps_msas * sorted_seqid[:, None] + + return final_msas, non_gaps_msas + +def generate_sequence_coverage_plot(msa_path): + """ + Generate an interactive Plotly heatmap for sequence coverage with depth overlay. + """ + # Pastel rainbow_r: matplotlib rainbow_r colours blended ~60% with white + PASTEL_RAINBOW_R = [ + [0.00, "#CC99FF"], # pale violet (low identity) + [0.17, "#9999FF"], # pale blue + [0.33, "#99FFFF"], # pale cyan + [0.50, "#99FF99"], # pale green + [0.67, "#FFFF99"], # pale yellow + [0.83, "#FFCC99"], # pale orange + [1.00, "#FF9999"], # pale red (high identity) + ] + + final_msas, non_gaps_msas = process_msas(msa_path) + n_seqs = final_msas.shape[0] + seq_depth_counts = np.sum(~np.isnan(non_gaps_msas), axis=0) + + fig = go.Figure() + + # Heatmap — sequence identity, NaN gaps rendered as white + fig.add_trace( + go.Heatmap( + z=final_msas, + colorscale=PASTEL_RAINBOW_R, + zmin=0, + zmax=1, + colorbar=dict( + title=dict(text="Sequence
    identity", side="right"), + thickness=15, + len=0.75, + ), + name="", + ) + ) + + # Coverage depth line — same y-axis as heatmap (both in units of sequences) + fig.add_trace( + go.Scatter( + x=list(range(len(seq_depth_counts))), + y=seq_depth_counts, + mode="lines", + line=dict(color="black", width=1.5), + name="Coverage depth", + ) + ) + + fig.update_layout( + xaxis=dict( + title="Residue position", + showline=True, + linecolor="black", + gridcolor="WhiteSmoke", + fixedrange=True, + ), + yaxis=dict( + title="Sequences", + range=[0, n_seqs], + showline=True, + linecolor="black", + gridcolor="WhiteSmoke", + fixedrange=True, + ), + plot_bgcolor="white", + legend=dict(yanchor="bottom", y=0.02, xanchor="right", x=0.98), + autosize=True, + ) + + return fig + +def generate_pae_plot(pae_path): + """ + Generate an interactive Plotly heatmap for Predicted Aligned Error (PAE) data. + """ + pae = np.genfromtxt(pae_path, delimiter="\t") + max_pae = 31.75 # Capped from AlphaFold's value + fig = go.Figure() + + # Add heatmap with green colorscale + fig.add_trace( + go.Heatmap( + z=pae, + colorscale="Greens_r", + zmin=0, + zmax=max_pae, + colorbar={"title": "PAE (Å)"}, + ) + ) + + fig.update_layout( + title=dict(text="Predicted Aligned Error", x=0.5, xanchor="center"), + xaxis=dict(title="Scored Residue"), + yaxis=dict(title="Aligned Residue"), + autosize=True, + ) + + return fig diff --git a/main.nf b/main.nf index 857415a56..3c046cc8b 100644 --- a/main.nf +++ b/main.nf @@ -67,15 +67,14 @@ workflow NFCORE_PROTEINFOLD { main: ch_samplesheet = samplesheet - ch_multiqc = channel.empty() ch_versions = channel.empty() ch_report_input = channel.empty() ch_top_ranked_model = channel.empty() requested_modes = params.mode.toLowerCase().split(",") requested_modes_size = requested_modes.size() - ch_dummy_file = channel.fromPath("$projectDir/assets/NO_FILE") - ch_dummy_file_pae = channel.fromPath("$projectDir/assets/NO_FILE_PAE") + ch_dummy_msa = channel.fromPath("$projectDir/assets/DUMMY_MSA") + ch_dummy_pae = channel.fromPath("$projectDir/assets/DUMMY_PAE") // // WORKFLOW: Run alphafold2 @@ -136,25 +135,10 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, PREPARE_ALPHAFOLD2_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) - ch_report_input = ch_report_input - .mix(ALPHAFOLD2 - .out - .pdb - .map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) + ch_report_input = ch_report_input + .mix( + ALPHAFOLD2.out.pdb .join(ALPHAFOLD2.out.msa) .join(ALPHAFOLD2.out.pae) ) @@ -209,27 +193,10 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD3_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD3.out.multiqc_report) ch_versions = ch_versions.mix(ALPHAFOLD3.out.versions) ch_report_input = ch_report_input .mix( - ALPHAFOLD3 - .out - .pdb - .map { it -> - [ - it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /.*_ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + ALPHAFOLD3.out.pdb .join(ALPHAFOLD3.out.msa) .join(ALPHAFOLD3.out.pae) ) @@ -270,22 +237,10 @@ workflow NFCORE_PROTEINFOLD { params.colabfold_num_recycles ) - ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) ch_versions = ch_versions.mix(COLABFOLD.out.versions) - ch_report_input = ch_report_input - .mix(COLABFOLD.out.pdb.map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /_relaxed_rank_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + ch_report_input = ch_report_input + .mix( + COLABFOLD.out.pdb .join(COLABFOLD.out.msa) .join(COLABFOLD.out.pae) ) @@ -320,13 +275,13 @@ workflow NFCORE_PROTEINFOLD { params.esmfold_num_recycles ) - ch_multiqc = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ESMFOLD.out.versions) - ch_report_input = ch_report_input.mix( - ESMFOLD.out.pdb - .combine(ch_dummy_file) - .combine(ch_dummy_file_pae) - ) + ch_report_input = ch_report_input + .mix( + ESMFOLD.out.pdb + .combine(ch_dummy_msa) + .combine(ch_dummy_pae) + ) ch_top_ranked_model = ch_top_ranked_model.mix(ESMFOLD.out.pdb) } @@ -364,13 +319,14 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.rfaa_paper_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD_ALL_ATOM.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) - ch_report_input = ch_report_input.mix(ROSETTAFOLD_ALL_ATOM.out.pdb - .join(ROSETTAFOLD_ALL_ATOM.out.msa) - .join(ROSETTAFOLD_ALL_ATOM.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD_ALL_ATOM.out.pdb) + ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) + ch_report_input = ch_report_input + .mix( + ROSETTAFOLD_ALL_ATOM.out.pdb + .join(ROSETTAFOLD_ALL_ATOM.out.msa) + .join(ROSETTAFOLD_ALL_ATOM.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD_ALL_ATOM.out.pdb) } // @@ -434,22 +390,10 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) - ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) - ch_report_input = ch_report_input - .mix(HELIXFOLD3.out.pdb.map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + ch_report_input = ch_report_input + .mix( + HELIXFOLD3.out.pdb .join(HELIXFOLD3.out.msa) .join(HELIXFOLD3.out.pae) ) @@ -495,18 +439,14 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD2NA_DBS.out.rna, PREPARE_ROSETTAFOLD2NA_DBS.out.rosettafold2na_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD2NA.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) - ch_report_input = ch_report_input - .mix( - ROSETTAFOLD2NA - .out - .pdb - .map { meta, pdb -> [ meta, [ pdb ] ] } - .join(ROSETTAFOLD2NA.out.msa) - .join(ROSETTAFOLD2NA.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD2NA.out.pdb) + ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) + ch_report_input = ch_report_input + .mix( + ROSETTAFOLD2NA.out.pdb + .join(ROSETTAFOLD2NA.out.msa) + .join(ROSETTAFOLD2NA.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD2NA.out.pdb) } // WORKFLOW: Run Boltz @@ -553,47 +493,30 @@ workflow NFCORE_PROTEINFOLD { PREPARE_COLABFOLD_DBS_BOLTZ.out.uniref30, params.use_msa_server ) - ch_multiqc = ch_multiqc.mix(BOLTZ.out.multiqc_report) - ch_versions = ch_versions.mix(BOLTZ.out.versions) - ch_report_input = ch_report_input.mix( - BOLTZ.out.pdb - .join(BOLTZ.out.msa) - .join(BOLTZ.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(BOLTZ.out.top_ranked_pdb) + ch_versions = ch_versions.mix(BOLTZ.out.versions) + ch_report_input = ch_report_input + .mix( + BOLTZ.out.pdb + .join(BOLTZ.out.msa) + .join(BOLTZ.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(BOLTZ.out.top_ranked_pdb) } // // POST PROCESSING: generate visualisation reports // - ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() - ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.empty() - ch_multiqc_logo = params.multiqc_logo ? channel.fromPath( params.multiqc_logo ).first() : channel.empty() - ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) - ch_comparison_template = channel.value(file("$projectDir/assets/comparison_template.html", checkIfExists: true)) + ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) POST_PROCESSING( - params.skip_visualisation, requested_modes_size, ch_report_input, ch_report_template, - ch_comparison_template, - params.skip_foldseek, - params.foldseek_db, - params.foldseek_db_path, - params.skip_multiqc, - params.outdir, ch_versions, - ch_multiqc, - ch_multiqc_config, - ch_multiqc_custom_config, - ch_multiqc_logo, - ch_multiqc_methods_description, ch_top_ranked_model ) emit: - multiqc_report = ch_multiqc + multiqc_report = POST_PROCESSING.out.multiqc_report } /* diff --git a/modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch/main.nf index ee567e2d8..da569daef 100644 --- a/modules/local/colabfold_batch/main.nf +++ b/modules/local/colabfold_batch/main.nf @@ -17,10 +17,10 @@ process COLABFOLD_BATCH { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_colabfold.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/*relaxed_rank_*.pdb") , emit: pdb + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_colabfold_msa.tsv") , emit: msa - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc - tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms path "versions.yml" , emit: versions @@ -87,6 +87,9 @@ process COLABFOLD_BATCH { touch ./raw/${meta.id}_scores_rank.json touch ./${meta.id}_0_pae.tsv touch ./${meta.id}_ptm.tsv + touch ./${meta.id}_iptm.tsv + touch ./${meta.id}_chainwise_ptm.tsv + touch ./${meta.id}_chainwise_iptm.tsv touch ./${meta.id}_plddt.tsv touch ./${meta.id}_colabfold_msa.tsv diff --git a/modules/local/compare_structures/environment.yml b/modules/local/compare_structures/environment.yml index 9f657a6fd..7feaeba52 100644 --- a/modules/local/compare_structures/environment.yml +++ b/modules/local/compare_structures/environment.yml @@ -4,6 +4,5 @@ channels: - bioconda dependencies: - conda-forge::biopython=1.84 - - conda-forge::matplotlib=3.9.2 - - conda-forge::pip=24.2 - conda-forge::plotly=5.24.1 + - conda-forge::pip=24.2 diff --git a/modules/local/compare_structures/main.nf b/modules/local/compare_structures/main.nf index b5fb52fc1..d92945944 100644 --- a/modules/local/compare_structures/main.nf +++ b/modules/local/compare_structures/main.nf @@ -24,9 +24,11 @@ process COMPARE_STRUCTURES { def args = task.ext.args ?: '' """ - generate_comparison_report.py \\ + generate_report.py \\ + --report_type comparison \\ + --prog comparison \\ --msa ${msa.join(' ')} \\ - --pdb ${pdb.join(' ')} \\ + --structs ${pdb.join(' ')} \\ --html_template ${template} \\ --output_dir ./ \\ --name ${meta.id} \\ @@ -35,18 +37,18 @@ process COMPARE_STRUCTURES { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') - generate_comparison_report.py: \$(python3 --version) + generate_report.py: \$(python3 --version) END_VERSIONS """ stub: """ - touch test_alphafold2_report.html + touch test_comparison_report.html cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') - generate_comparison_report.py: \$(python3 --version) + generate_report.py: \$(python3 --version) END_VERSIONS """ } diff --git a/modules/local/generate_report/environment.yml b/modules/local/generate_report/environment.yml index 07a5b9f11..36061e2ff 100644 --- a/modules/local/generate_report/environment.yml +++ b/modules/local/generate_report/environment.yml @@ -4,6 +4,5 @@ channels: - bioconda dependencies: - conda-forge::biopython=1.84 - - conda-forge::matplotlib=3.9.2 - - conda-forge::pip=24.2 - conda-forge::plotly=5.24.1 + - conda-forge::pip=24.2 diff --git a/modules/local/generate_report/main.nf b/modules/local/generate_report/main.nf index f33599828..408a86228 100644 --- a/modules/local/generate_report/main.nf +++ b/modules/local/generate_report/main.nf @@ -13,8 +13,6 @@ process GENERATE_REPORT { output: tuple val(meta), path ("*report.html") , emit: report - tuple val(meta), path ("*seq_coverage.png"), optional: true, emit: sequence_coverage - tuple val(meta), path ("*_LDDT.html") , emit: plddt path "versions.yml" , emit: versions when: @@ -25,14 +23,15 @@ process GENERATE_REPORT { """ generate_report.py \\ - --type ${meta.model} \\ + --report_type standard \\ + --prog ${meta.model} \\ --msa ${msa} \\ --pae ${pae} \\ - --pdb ${pdb.join(' ')} \\ + --structs ${pdb.join(' ')} \\ --html_template ${template} \\ --output_dir ./ \\ --name ${meta.id} \\ - $args \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -44,8 +43,6 @@ process GENERATE_REPORT { stub: """ touch test_alphafold2_report.html - touch test_seq_coverage.png - touch test_LDDT.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index eea900702..ac6ba5e4b 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -29,11 +29,11 @@ process RUN_ALPHAFOLD2 { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa // Note: alphafold2_model_preset == "monomer" the pae file won't exist, thus the optional - tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms path "versions.yml" , emit: versions @@ -104,6 +104,8 @@ process RUN_ALPHAFOLD2 { touch "${meta.id}_0_pae.tsv" touch "${meta.id}_ptm.tsv" touch "${meta.id}_iptm.tsv" + touch "${meta.id}_chainwise_ptm.tsv" + touch "${meta.id}_chainwise_iptm.tsv" mkdir "raw" touch "raw/ranked_0.pdb" touch "raw/ranked_1.pdb" diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index 30a581a32..8655dac86 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -27,11 +27,11 @@ process RUN_ALPHAFOLD2_PRED { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc //Note: alphafold2_model_preset == "monomer" the pae file won't exist. - tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms path "versions.yml" , emit: versions @@ -81,6 +81,10 @@ process RUN_ALPHAFOLD2_PRED { touch "${meta.id}_plddt.tsv" touch "${meta.id}_alphafold2_msa.tsv" touch "${meta.id}_0_pae.tsv" + touch "${meta.id}_ptm.tsv" + touch "${meta.id}_iptm.tsv" + touch "${meta.id}_chainwise_ptm.tsv" + touch "${meta.id}_chainwise_iptm.tsv" mkdir "raw/" touch "raw/ranked_0.pdb" touch "raw/ranked_1.pdb" diff --git a/modules/local/run_alphafold3/main.nf b/modules/local/run_alphafold3/main.nf index 48e38815e..e8f77e2e4 100644 --- a/modules/local/run_alphafold3/main.nf +++ b/modules/local/run_alphafold3/main.nf @@ -21,11 +21,14 @@ process RUN_ALPHAFOLD3 { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold3.cif") , emit: top_ranked_cif tuple val(meta), path ("raw/*ranked_*.cif") , emit: cif - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_alphafold3_msa.tsv") , emit: msa tuple val(meta), path ("${meta.id}_0_pae.tsv") , emit: pae + tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: paes tuple val(meta), path ("${meta.id}_ptm.tsv") , emit: ptms tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms + tuple val(meta), path ("${meta.id}_chainwise_ptm.tsv") , optional: true, emit: chainwise_ptm + tuple val(meta), path ("${meta.id}_chainwise_iptm.tsv") , optional: true, emit: chainwise_iptm path "versions.yml" , emit: versions when: @@ -134,6 +137,8 @@ process RUN_ALPHAFOLD3 { touch ${prefix}_0_pae.tsv touch ${prefix}_ptm.tsv touch ${prefix}_iptm.tsv + touch ${prefix}_chainwise_ptm.tsv + touch ${prefix}_chainwise_iptm.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_boltz/main.nf b/modules/local/run_boltz/main.nf index 0960aa4e9..9a8f019c6 100644 --- a/modules/local/run_boltz/main.nf +++ b/modules/local/run_boltz/main.nf @@ -18,21 +18,22 @@ process RUN_BOLTZ { output: tuple val(meta), path ("boltz_results_${meta.id}") , optional: true, emit: intermediates - tuple val(meta), path ("boltz_results_*/processed/msa/*.npz") , emit: msa - tuple val(meta), path ("boltz_results_*/processed/structures/*.npz") , emit: structures + tuple val(meta), path ("boltz_results_*/processed/structures/*.npz") , emit: structures_npz + tuple val(meta), path ("boltz_results_*/predictions/*/plddt_*model_0.npz") , emit: plddt_npz + tuple val(meta), path ("boltz_results_*/processed/msa/*.npz") , emit: msa_npz + tuple val(meta), path ("boltz_results_*/predictions/*/pae_*model_0.npz") , emit: pae_npz tuple val(meta), path ("boltz_results_*/predictions/*/confidence*.json") , emit: confidence - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc tuple val(meta), path ("${meta.id}_boltz.pdb") , emit: top_ranked_pdb tuple val(meta), path ("boltz_results_*/predictions/*/*.pdb") , emit: pdb - tuple val(meta), path ("boltz_results_*/predictions/*/plddt_*model_0.npz") , emit: plddt - tuple val(meta), path ("boltz_results_*/predictions/*/pae_*model_0.npz") , emit: pae - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt_raw - tuple val(meta), path ("${meta.id}_boltz_msa.tsv") , emit: msa_raw - tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: pae_raw - tuple val(meta), path ("${meta.id}_ptm.tsv") , emit: ptm_raw - tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptm_raw - tuple val(meta), path ("${meta.id}_chainwise_ptm.tsv") , emit: summary_chainwise_ptm_raw - tuple val(meta), path ("${meta.id}_chainwise_iptm.tsv") , optional: true, emit: chainwise_iptm_raw + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt + tuple val(meta), path ("${meta.id}_boltz_msa.tsv") , emit: msa + // Could potential remove the optional, but Boltz has the --write_full_pae False flag. + tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes + tuple val(meta), path ("${meta.id}_ptm.tsv") , emit: ptms + tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms + tuple val(meta), path ("${meta.id}_chainwise_ptm.tsv") , optional: true, emit: chainwise_ptm + tuple val(meta), path ("${meta.id}_chainwise_iptm.tsv") , optional: true, emit: chainwise_iptm path "versions.yml", emit: versions when: diff --git a/modules/local/run_esmfold/main.nf b/modules/local/run_esmfold/main.nf index dc4394c75..767d65082 100644 --- a/modules/local/run_esmfold/main.nf +++ b/modules/local/run_esmfold/main.nf @@ -13,7 +13,7 @@ process RUN_ESMFOLD { output: tuple val(meta), path ("${meta.id}_esmfold.pdb") , emit: top_ranked_pdb tuple val(meta), path ("*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt path "versions.yml" , emit: versions when: diff --git a/modules/local/run_helixfold3/main.nf b/modules/local/run_helixfold3/main.nf index b22417775..b3e8952e1 100644 --- a/modules/local/run_helixfold3/main.nf +++ b/modules/local/run_helixfold3/main.nf @@ -30,7 +30,7 @@ process RUN_HELIXFOLD3 { tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_helixfold3_msa.tsv") , emit: msa // If ${meta.id}-rank*/all_results.json" doesn't have PAE vales in the key, this will be empty tuple val(meta), path ("${meta.id}_1_pae.tsv") , emit: pae @@ -114,6 +114,8 @@ process RUN_HELIXFOLD3 { touch "${meta.id}_helixfold3_msa.tsv" touch "${meta.id}_ptm.tsv" touch "${meta.id}_iptm.tsv" + touch "${meta.id}_chainwise_ptm.tsv" + touch "${meta.id}_chainwise_iptm.tsv" touch "${meta.id}_1_pae.tsv" touch "${meta.id}_2_pae.tsv" touch "${meta.id}_3_pae.tsv" diff --git a/modules/local/run_rosettafold2na/main.nf b/modules/local/run_rosettafold2na/main.nf index 2650610e7..aeeec467a 100644 --- a/modules/local/run_rosettafold2na/main.nf +++ b/modules/local/run_rosettafold2na/main.nf @@ -20,7 +20,7 @@ process RUN_ROSETTAFOLD2NA { path ("raw/**") , emit: raw tuple val(meta), path("${meta.id}_rosettafold2na.pdb") , emit: top_ranked_pdb tuple val(meta), path("raw/*.pdb") , emit: pdb - tuple val(meta), path("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path("${meta.id}_rosettafold2na_msa.tsv") , emit: msa tuple val(meta), path("${meta.id}_0_pae.tsv") , emit: pae path "versions.yml" , emit: versions diff --git a/modules/local/run_rosettafold_all_atom/main.nf b/modules/local/run_rosettafold_all_atom/main.nf index 2c1147fa0..00154c388 100644 --- a/modules/local/run_rosettafold_all_atom/main.nf +++ b/modules/local/run_rosettafold_all_atom/main.nf @@ -20,7 +20,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { output: path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_rosettafold_all_atom.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_rosettafold_all_atom_msa.tsv") , emit: msa // I think there should always be PAE from the .pt PyTorch model. extract_metrics.py has condition import torch to handle this tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: paes diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index d100361ce..3c819e295 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -19,88 +19,65 @@ include { MULTIQC } from '../../modules/nf-core/multiqc/main' workflow POST_PROCESSING { take: - skip_visualisation requested_modes_size ch_report_input ch_report_template - ch_comparison_template - skip_foldseek - foldseek_db - foldseek_db_path - skip_multiqc - outdir ch_versions - ch_multiqc_rep - ch_multiqc_config - ch_multiqc_custom_config - ch_multiqc_logo - ch_multiqc_methods_description ch_top_ranked_model main: ch_comparison_report_files = channel.empty() - if (!skip_visualisation){ + if (!params.skip_visualisation){ + ch_report_input + .multiMap { meta, pdbs, msa, pae -> + full: [meta, pdbs, msa, pae] + msa_only: [meta, msa] + } + .set { ch_report_split } + GENERATE_REPORT( - ch_report_input, + ch_report_split.full, ch_report_template ) ch_versions = ch_versions.mix(GENERATE_REPORT.out.versions) if (requested_modes_size > 1){ - ch_dummy_file = channel.fromPath("$projectDir/assets/NO_FILE") - - def esm = ch_top_ranked_model.filter { it ->it[0].model == 'esmfold' } - def not_esm = ch_top_ranked_model.filter { it -> it[0].model != 'esmfold' } - - esm = esm - .map { it -> - [it[0], it[1]] - } - .merge(ch_dummy_file) - - not_esm = not_esm - .map { it -> [it[0], it[1]] } - .join(GENERATE_REPORT.out.sequence_coverage) - - not_esm.mix(esm).set{ch_comparison_report_files} - - ch_comparison_report_files - .map { it -> - [["id": it[0].id], it[0], it[1], it[2]] + // Multi-mode comparison: group top-ranked structures and MSA data from all modes + ch_top_ranked_model + .join(ch_report_split.msa_only) + .map { meta, pdb, msa -> + [["id": meta.id], meta, pdb, msa] } .groupTuple(by: [0], size: requested_modes_size) - .map { it -> - it[0].models=it[1].join(','); - [it[0], it[2], it[3]] + .map { key, model_meta_list, pdbs, msas -> + def models_str = model_meta_list.collect { it.model }.join(',') + [key + [models: models_str], pdbs, msas] } - .set { ch_comparison_report_input } + .multiMap { meta, pdbs, msas -> + def valid_msas = msas.findAll { !it.name.startsWith("DUMMY_") } + pdbs: [meta, pdbs.collect { it.name }] + msas: [meta, valid_msas.collect { it.name }] + allfiles: (pdbs + valid_msas).unique() + } + .set { ch_split } COMPARE_STRUCTURES( - ch_comparison_report_input - .map { it -> - [it[0], it[1].collect { file -> file.name} ] - }, - ch_comparison_report_input - .map { it -> - [ it[0], it[2].collect { file -> file.name } ] - }, - ch_comparison_report_input - .map { it -> - (it[1] + it[2]).unique() - }, - ch_comparison_template + ch_split.pdbs, + ch_split.msas, + ch_split.allfiles, + ch_report_template ) ch_versions = ch_versions.mix(COMPARE_STRUCTURES.out.versions) } } - if (!skip_foldseek) { + if (!params.skip_foldseek) { ch_foldseek_db = channel.value([ [ - id: foldseek_db, + id: params.foldseek_db, ], - file(foldseek_db_path, checkIfExists: true) + file(params.foldseek_db_path, checkIfExists: true) ]) FOLDSEEK_EASYSEARCH( ch_top_ranked_model, @@ -113,7 +90,7 @@ workflow POST_PROCESSING { // softwareVersionsToYAML(ch_versions) .collectFile( - storeDir: "${outdir}/pipeline_info", + storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'proteinfold_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true @@ -124,31 +101,25 @@ workflow POST_PROCESSING { // ch_multiqc_report = channel.empty() - if (!skip_multiqc) { - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) - ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_methods_description)) + if (!params.skip_multiqc) { + ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath(params.multiqc_config).first() : channel.empty() + ch_multiqc_logo = params.multiqc_logo ? channel.fromPath(params.multiqc_logo).first() : channel.empty() + ch_multiqc_methods_desc = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_methods_desc)) - ch_multiqc_files = channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + .mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + .mix(ch_collated_versions) MULTIQC ( - ch_multiqc_rep - .combine( - ch_multiqc_files - .collect() - .map { it -> [it] } - ) - .map { it -> [ it[0], it[1] + it[2] ] }, + ch_multiqc_files.collect().map { [[id: 'proteinfold', model: 'proteinfold'], it] }, ch_multiqc_config, - ch_multiqc_custom_config - .collect() - .ifEmpty([]), - ch_multiqc_logo - .collect() - .ifEmpty([]), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), [], [] ) diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf index acac78d49..16a481bbb 100644 --- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -220,11 +220,12 @@ def getColabfoldAlphafold2ParamsPath() { return path } -def modeChannel(ch, mode) { +def modeChannel(ch, mode, asList = false) { return ch.map { meta, value -> def meta_clone = meta.clone() meta_clone.model = mode - [ meta_clone, value ] + def v = asList ? ((value instanceof List) ? value : [value]) : value + [ meta_clone, v ] } } diff --git a/tests/alphafold2_download.nf.test.snap b/tests/alphafold2_download.nf.test.snap index 7396db3e8..ee2d34625 100644 --- a/tests/alphafold2_download.nf.test.snap +++ b/tests/alphafold2_download.nf.test.snap @@ -10,18 +10,18 @@ "aria2": null }, "COMBINE_UNIPROT": { - "sed": 4.7 + "sed": 4.5 }, "DOWNLOAD_PDBMMCIF": { - "sed": 4.9, - "rsync": "3.3.0" + "sed": 4.5, + "rsync": "3.1.3" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_ALPHAFOLD2": { - "python": "unknown", + "python": "3.10.8", "alphafold2": "unknown", "jax": "unknown", "jaxlib": "unknown", @@ -119,10 +119,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:29:19.367471847", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:42:46.521360256" + } } } \ No newline at end of file diff --git a/tests/alphafold2_split.nf.test.snap b/tests/alphafold2_split.nf.test.snap index 4e07e946c..39b7c69ff 100644 --- a/tests/alphafold2_split.nf.test.snap +++ b/tests/alphafold2_split.nf.test.snap @@ -4,17 +4,17 @@ 7, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_ALPHAFOLD2_MSA": { - "python": null, + "python": "3.10.8", "alphafold2": "unknown", "numpy": "unknown", "biopython": "unknown" }, "RUN_ALPHAFOLD2_PRED": { - "python": null, + "python": "3.10.8", "alphafold2": "unknown", "jax": "unknown", "jaxlib": "unknown", @@ -30,14 +30,18 @@ "alphafold2/split_msa_prediction", "alphafold2/split_msa_prediction/T1024", "alphafold2/split_msa_prediction/T1024/T1024_alphafold2_msa.tsv", + "alphafold2/split_msa_prediction/T1024/T1024_iptm.tsv", "alphafold2/split_msa_prediction/T1024/T1024_plddt.tsv", + "alphafold2/split_msa_prediction/T1024/T1024_ptm.tsv", "alphafold2/split_msa_prediction/T1024/msa", "alphafold2/split_msa_prediction/T1024/msa/features.pkl", "alphafold2/split_msa_prediction/T1024/paes", "alphafold2/split_msa_prediction/T1024/paes/T1024_0_pae.tsv", "alphafold2/split_msa_prediction/T1026", "alphafold2/split_msa_prediction/T1026/T1026_alphafold2_msa.tsv", + "alphafold2/split_msa_prediction/T1026/T1026_iptm.tsv", "alphafold2/split_msa_prediction/T1026/T1026_plddt.tsv", + "alphafold2/split_msa_prediction/T1026/T1026_ptm.tsv", "alphafold2/split_msa_prediction/T1026/msa", "alphafold2/split_msa_prediction/T1026/msa/features.pkl", "alphafold2/split_msa_prediction/T1026/paes", @@ -56,11 +60,15 @@ ], [ "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -69,10 +77,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:29:36.167251574", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:45:45.745906851" + } } } \ No newline at end of file diff --git a/tests/alphafold3.nf.test.snap b/tests/alphafold3.nf.test.snap index b7907d944..fbd8af4f1 100644 --- a/tests/alphafold3.nf.test.snap +++ b/tests/alphafold3.nf.test.snap @@ -4,20 +4,20 @@ 11, { "FASTA_TO_ALPHAFOLD3_JSON": { - "python": "3.13.7" + "python": "3.10.8" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MMCIF2PDB_MODELS": { - "python": "3.12.7" + "python": "3.10.8" }, "MMCIF2PDB_TOP_RANKED": { - "python": "3.12.7" + "python": "3.10.8" }, "RUN_ALPHAFOLD3": { - "python": "unknown", + "python": "3.10.8", "alphafold3": "unknown", "jax": "unknown", "jaxlib": "unknown", @@ -34,6 +34,8 @@ "alphafold3", "alphafold3/T1024", "alphafold3/T1024/T1024_alphafold3_msa.tsv", + "alphafold3/T1024/T1024_chainwise_iptm.tsv", + "alphafold3/T1024/T1024_chainwise_ptm.tsv", "alphafold3/T1024/T1024_iptm.tsv", "alphafold3/T1024/T1024_plddt.tsv", "alphafold3/T1024/T1024_ptm.tsv", @@ -41,6 +43,8 @@ "alphafold3/T1024/paes/T1024_0_pae.tsv", "alphafold3/T1026", "alphafold3/T1026/T1026_alphafold3_msa.tsv", + "alphafold3/T1026/T1026_chainwise_iptm.tsv", + "alphafold3/T1026/T1026_chainwise_ptm.tsv", "alphafold3/T1026/T1026_iptm.tsv", "alphafold3/T1026/T1026_plddt.tsv", "alphafold3/T1026/T1026_ptm.tsv", @@ -76,11 +80,15 @@ ], [ "T1024_alphafold3_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_chainwise_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold3_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_chainwise_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -104,10 +112,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-03-31T18:10:37.500286431", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:49:12.687346079" + } } } \ No newline at end of file diff --git a/tests/boltz.nf.test.snap b/tests/boltz.nf.test.snap index 872fb60bd..82c48f5ed 100644 --- a/tests/boltz.nf.test.snap +++ b/tests/boltz.nf.test.snap @@ -4,8 +4,8 @@ 11, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MMSEQS_COLABFOLDSEARCH": { "colabfold_search": "unknown", @@ -15,7 +15,7 @@ "boltz": "unknown" }, "SPLIT_MSA": { - "python": "3.8.3" + "python": "3.10.8" }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -94,10 +94,10 @@ "B.csv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-03-31T18:15:20.627421945", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:53:06.410999759" + } } } \ No newline at end of file diff --git a/tests/colabfold_download.nf.test.snap b/tests/colabfold_download.nf.test.snap index 17f1bc576..a479d72a2 100644 --- a/tests/colabfold_download.nf.test.snap +++ b/tests/colabfold_download.nf.test.snap @@ -11,11 +11,11 @@ "colabfold_batch": "unknown" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MULTIFASTA_TO_CSV": { - "sed": 4.7 + "sed": 4.5 }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -29,12 +29,14 @@ "colabfold", "colabfold/T1024", "colabfold/T1024/T1024_colabfold_msa.tsv", + "colabfold/T1024/T1024_iptm.tsv", "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", "colabfold/T1024/paes/T1024_0_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_colabfold_msa.tsv", + "colabfold/T1026/T1026_iptm.tsv", "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", @@ -58,10 +60,12 @@ "file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ], "T1024_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -71,10 +75,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:30:46.467096595", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:56:29.675960575" + } } } \ No newline at end of file diff --git a/tests/colabfold_local.nf.test.snap b/tests/colabfold_local.nf.test.snap index e83fe4e2f..c0975a6ca 100644 --- a/tests/colabfold_local.nf.test.snap +++ b/tests/colabfold_local.nf.test.snap @@ -8,15 +8,15 @@ "colabfold_batch": "unknown" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MMSEQS_COLABFOLDSEARCH": { "colabfold_search": "unknown", "mmseqs": null }, "MULTIFASTA_TO_CSV": { - "sed": 4.7 + "sed": 4.5 }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -26,12 +26,14 @@ "colabfold", "colabfold/T1024", "colabfold/T1024/T1024_colabfold_msa.tsv", + "colabfold/T1024/T1024_iptm.tsv", "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", "colabfold/T1024/paes/T1024_0_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_colabfold_msa.tsv", + "colabfold/T1026/T1026_iptm.tsv", "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", @@ -52,10 +54,12 @@ ], [ "T1024_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -65,10 +69,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:31:17.651387259", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:59:52.019356523" + } } } \ No newline at end of file diff --git a/tests/colabfold_webserver.nf.test.snap b/tests/colabfold_webserver.nf.test.snap index 45182d1b0..15ef91916 100644 --- a/tests/colabfold_webserver.nf.test.snap +++ b/tests/colabfold_webserver.nf.test.snap @@ -8,11 +8,11 @@ "colabfold_batch": "unknown" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MULTIFASTA_TO_CSV": { - "sed": 4.7 + "sed": 4.5 }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -22,12 +22,14 @@ "colabfold", "colabfold/T1024", "colabfold/T1024/T1024_colabfold_msa.tsv", + "colabfold/T1024/T1024_iptm.tsv", "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", "colabfold/T1024/paes/T1024_0_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_colabfold_msa.tsv", + "colabfold/T1026/T1026_iptm.tsv", "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", @@ -48,10 +50,12 @@ ], [ "T1024_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -61,10 +65,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:31:59.021522555", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:02:44.668327349" + } } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 35fd6fee5..b202b6225 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -3,11 +3,11 @@ "content": [ { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_ALPHAFOLD2": { - "python": "unknown", + "python": "3.10.8", "alphafold2": "unknown", "jax": "unknown", "jaxlib": "unknown", @@ -64,10 +64,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:32:24.264902816", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T08:35:24.77961009" + } } } \ No newline at end of file diff --git a/tests/esmfold.nf.test.snap b/tests/esmfold.nf.test.snap index a23b1b5e7..0b8311e64 100644 --- a/tests/esmfold.nf.test.snap +++ b/tests/esmfold.nf.test.snap @@ -4,12 +4,12 @@ 5, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_ESMFOLD": { "esm-fold": "1.0.3", - "python": "unknown", + "python": "3.10.8", "pytorch": "unknown", "openfold": "unknown", "numpy": "unknown", @@ -45,10 +45,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-03-31T18:11:55.994664003", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:07:29.828926981" + } } } \ No newline at end of file diff --git a/tests/helixfold3.nf.test.snap b/tests/helixfold3.nf.test.snap index b41900a73..35ff2b3c6 100644 --- a/tests/helixfold3.nf.test.snap +++ b/tests/helixfold3.nf.test.snap @@ -4,11 +4,11 @@ 7, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_HELIXFOLD3": { - "python": "unknown" + "python": "3.10.8" }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -83,10 +83,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:33:05.481348845", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:10:21.963345902" + } } } \ No newline at end of file diff --git a/tests/rosettafold2na.nf.test.snap b/tests/rosettafold2na.nf.test.snap index fa79216f7..5540048c1 100644 --- a/tests/rosettafold2na.nf.test.snap +++ b/tests/rosettafold2na.nf.test.snap @@ -4,14 +4,14 @@ 4, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "ROSETTAFOLD2NA_FASTA": { "python": "$(python3 --version 2>/dev/null | sed 's/Python //g' || echo \"unknown\")" }, "RUN_ROSETTAFOLD2NA": { - "python": "unknown", + "python": "3.10.8", "rosettafold2na": "v0.2" }, "Workflow": { @@ -47,10 +47,10 @@ "rna_complex.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-03-31T18:12:20.628048711", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:13:14.502746624" + } } } \ No newline at end of file diff --git a/tests/rosettafold_all_atom.nf.test.snap b/tests/rosettafold_all_atom.nf.test.snap index 520a2e630..e224aa096 100644 --- a/tests/rosettafold_all_atom.nf.test.snap +++ b/tests/rosettafold_all_atom.nf.test.snap @@ -4,11 +4,11 @@ 7, { "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "RUN_ROSETTAFOLD_ALL_ATOM": { - "python": "unknown", + "python": "3.10.8", "rosettafold-all-atom": "unknown" }, "Workflow": { @@ -61,10 +61,10 @@ "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-03-31T18:12:32.445481634", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:16:06.423724852" + } } } \ No newline at end of file diff --git a/tests/split_fasta.nf.test.snap b/tests/split_fasta.nf.test.snap index fd2192bba..3b64db869 100644 --- a/tests/split_fasta.nf.test.snap +++ b/tests/split_fasta.nf.test.snap @@ -8,15 +8,15 @@ "colabfold_batch": "unknown" }, "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" + "python": "3.10.8", + "generate_report.py": "Python 3.10.8" }, "MMSEQS_COLABFOLDSEARCH": { "colabfold_search": "unknown", "mmseqs": null }, "MULTIFASTA_TO_CSV": { - "sed": 4.7 + "sed": 4.5 }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" @@ -26,12 +26,14 @@ "colabfold", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv", + "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_iptm.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv", + "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_iptm.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv", "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes", @@ -52,10 +54,12 @@ ], [ "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -65,10 +69,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-04-02T15:34:14.250153819", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-27T09:19:28.818804127" + } } } \ No newline at end of file diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 1c17cea1c..584e69e91 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -11,6 +11,8 @@ include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -49,7 +51,6 @@ workflow ALPHAFOLD2 { ch_top_ranked_pdb = channel.empty() ch_msa = channel.empty() ch_pae = channel.empty() - ch_multiqc_report = channel.empty() if (alphafold2_model_preset != 'multimer') { ch_samplesheet @@ -83,16 +84,6 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold2" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2.out.top_ranked_pdb) ch_msa = ch_msa.mix(RUN_ALPHAFOLD2.out.msa) @@ -143,16 +134,6 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2_PRED - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold2" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2_PRED.out.top_ranked_pdb) ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) ch_msa = ch_msa.mix(RUN_ALPHAFOLD2_PRED.out.msa) @@ -160,44 +141,16 @@ workflow ALPHAFOLD2 { ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } - ch_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - - ch_msa - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } - .set { ch_msa_final } - - ch_pae - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } - .set { ch_pae_final } - - ch_top_ranked_pdb_final = ch_top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } + modeChannel(ch_pdb, "alphafold2", true).set { ch_pdb_final } + modeChannel(ch_msa, "alphafold2").set { ch_msa_final } + modeChannel(ch_pae, "alphafold2").set { ch_pae_final } + ch_top_ranked_pdb_final = modeChannel(ch_top_ranked_pdb, "alphafold2") emit: top_ranked_pdb = ch_top_ranked_pdb_final // channel: [ meta, /path/to/*.pdb ] pdb = ch_pdb_final // channel: [ meta, /path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] // Would prefer channel: [ meta, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ meta, /path/to/*_0_pae.tsv] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/alphafold3.nf b/workflows/alphafold3.nf index 84ff42186..cd5deaf0e 100644 --- a/workflows/alphafold3.nf +++ b/workflows/alphafold3.nf @@ -12,6 +12,8 @@ include { RUN_ALPHAFOLD3 } from '../modules/local/run_alphafo include { MMCIF2PDB as MMCIF2PDB_TOP_RANKED } from '../modules/local/mmcif2pdb/main.nf' include { MMCIF2PDB as MMCIF2PDB_MODELS } from '../modules/local/mmcif2pdb/main.nf' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -41,7 +43,6 @@ workflow ALPHAFOLD3 { ch_pdb_final = channel.empty() ch_top_ranked_pdb = channel.empty() ch_msa_final = channel.empty() - ch_multiqc_report = channel.empty() FASTA_TO_ALPHAFOLD3_JSON(ch_samplesheet) ch_versions = ch_versions.mix(FASTA_TO_ALPHAFOLD3_JSON.out.versions) @@ -84,16 +85,7 @@ workflow ALPHAFOLD3 { ) ch_versions = ch_versions.mix(MMCIF2PDB_MODELS.out.versions) - MMCIF2PDB_MODELS - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } + modeChannel(MMCIF2PDB_MODELS.out.pdb, "alphafold3", true).set { ch_pdb_final } // Convert top ranked mmcif to pdb MMCIF2PDB_TOP_RANKED ( @@ -103,55 +95,19 @@ workflow ALPHAFOLD3 { ) ch_versions = ch_versions.mix(MMCIF2PDB_TOP_RANKED.out.versions) - MMCIF2PDB_TOP_RANKED - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } + modeChannel(MMCIF2PDB_TOP_RANKED.out.pdb, "alphafold3").set { ch_top_ranked_pdb } // Prepare msa input - RUN_ALPHAFOLD3 - .out - .msa - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_msa_final } - - // Prepare report input - RUN_ALPHAFOLD3 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold3" ], it.flatten() ] - } - .set { ch_multiqc_report } + modeChannel(RUN_ALPHAFOLD3.out.msa, "alphafold3").set { ch_msa_final } // Prepare dummy pae input - RUN_ALPHAFOLD3 - .out - .pae - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_pae_final } + modeChannel(RUN_ALPHAFOLD3.out.pae, "alphafold3").set { ch_pae_final } emit: top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] pdb = ch_pdb_final // channel: [ meta, /path/to/*.pdb, ...,/path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] pae = ch_pae_final // channel: [ meta, path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/boltz.nf b/workflows/boltz.nf index de82b05e7..5545f8100 100644 --- a/workflows/boltz.nf +++ b/workflows/boltz.nf @@ -29,6 +29,7 @@ include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' // // MODULE: Boltz @@ -135,62 +136,18 @@ workflow BOLTZ { ch_mols ) - RUN_BOLTZ - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set {ch_pdb} - - RUN_BOLTZ - .out - .top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } - - RUN_BOLTZ - .out - .msa_raw - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_msa } - - RUN_BOLTZ - .out - .pae_raw - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_pae } - - RUN_BOLTZ - .out - .multiqc - .map { it -> it[1] } - .collect(sort: true) - .map { it -> [ [ "model": "boltz"], it.flatten() ] } - .set { ch_multiqc_report } + modeChannel(RUN_BOLTZ.out.pdb, "boltz").set { ch_pdb } + modeChannel(RUN_BOLTZ.out.top_ranked_pdb, "boltz").set { ch_top_ranked_pdb } + modeChannel(RUN_BOLTZ.out.msa, "boltz").set { ch_msa } + modeChannel(RUN_BOLTZ.out.pae, "boltz").set { ch_pae } ch_versions = ch_versions.mix(RUN_BOLTZ.out.versions) emit: versions = ch_versions msa = ch_msa - structures = RUN_BOLTZ.out.structures + structures_npz = RUN_BOLTZ.out.structures_npz confidence = RUN_BOLTZ.out.confidence - multiqc_report = ch_multiqc_report top_ranked_pdb = ch_top_ranked_pdb pdb = ch_pdb pae = ch_pae diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 312a22b4a..33be42113 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -37,7 +37,6 @@ workflow COLABFOLD { num_recycles // int: Number of recycles for colabfold main: - ch_multiqc_report = channel.empty() if (params.use_msa_server) { // @@ -89,46 +88,16 @@ workflow COLABFOLD { ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } - COLABFOLD_BATCH - .out - .top_ranked_pdb - .map { it -> - def meta_clone = it[0].clone(); - meta_clone.model = "colabfold"; - [ meta_clone, it[1] ] - } - .set { ch_top_ranked_pdb } - - COLABFOLD_BATCH - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "colabfold"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - + modeChannel(COLABFOLD_BATCH.out.top_ranked_pdb, "colabfold").set { ch_top_ranked_pdb } + modeChannel(COLABFOLD_BATCH.out.pdb, "colabfold", true).set { ch_pdb_final } modeChannel(COLABFOLD_BATCH.out.msa, "colabfold").set { ch_msa_final } modeChannel(COLABFOLD_BATCH.out.pae, "colabfold").set { ch_pae_final } - COLABFOLD_BATCH - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model":"colabfold"], it.flatten() ] - } - .set { ch_multiqc_report } - emit: top_ranked_pdb = ch_top_ranked_pdb // channel: [ meta, /path/to/*.pdb ] pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 5a221d986..834063c52 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -56,21 +56,10 @@ workflow ESMFOLD { ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) } - RUN_ESMFOLD - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "esmfold"], it.flatten() ] - } - .set { ch_multiqc_report } - - modeChannel(RUN_ESMFOLD.out.pdb, "esmfold").set { ch_pdb_final } + modeChannel(RUN_ESMFOLD.out.pdb, "esmfold", true).set { ch_pdb_final } emit: pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index 9defb5c43..d506c26dd 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -47,7 +47,6 @@ workflow HELIXFOLD3 { main: ch_pdb = channel.empty() ch_top_ranked_pdb = channel.empty() - ch_multiqc_report = channel.empty() // // SUBWORKFLOW: Run helixfold3 @@ -77,40 +76,11 @@ workflow HELIXFOLD3 { ch_helixfold3_maxit_src ) - RUN_HELIXFOLD3 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "helixfold3" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3.out.pdb) ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) - RUN_HELIXFOLD3 - .out - .top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "helixfold3"; - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } - - RUN_HELIXFOLD3 - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "helixfold3"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - + modeChannel(RUN_HELIXFOLD3.out.top_ranked_pdb, "helixfold3").set { ch_top_ranked_pdb } + modeChannel(RUN_HELIXFOLD3.out.pdb, "helixfold3", true).set { ch_pdb_final } modeChannel(RUN_HELIXFOLD3.out.msa, "helixfold3").set { ch_msa_final } modeChannel(RUN_HELIXFOLD3.out.pae, "helixfold3").set { ch_pae_final } @@ -119,7 +89,6 @@ workflow HELIXFOLD3 { pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rosettafold2na.nf b/workflows/rosettafold2na.nf index dfcf92861..0ffcaea4a 100644 --- a/workflows/rosettafold2na.nf +++ b/workflows/rosettafold2na.nf @@ -10,6 +10,8 @@ include { ROSETTAFOLD2NA_FASTA } from '../modules/local/rosettafold2na_fasta' include { RUN_ROSETTAFOLD2NA } from '../modules/local/run_rosettafold2na' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -28,7 +30,6 @@ workflow ROSETTAFOLD2NA { ch_rosettafold2na_weights // channel: path(rosettafold2na_weights) main: - ch_multiqc_report = channel.empty() ROSETTAFOLD2NA_FASTA( ch_samplesheet @@ -45,51 +46,14 @@ workflow ROSETTAFOLD2NA { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD2NA.out.versions) - RUN_ROSETTAFOLD2NA - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "rosettafold2na" ], it.flatten() ] - } - .set { ch_multiqc_report } - - RUN_ROSETTAFOLD2NA - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_pdb_final } - - RUN_ROSETTAFOLD2NA - .out - .pae - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_pae_final } - - RUN_ROSETTAFOLD2NA - .out - .msa - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_msa_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.pdb, "rosettafold2na", true).set { ch_pdb_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.pae, "rosettafold2na").set { ch_pae_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.msa, "rosettafold2na").set { ch_msa_final } emit: pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index bd576087b..7c202fead 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -36,7 +36,6 @@ workflow ROSETTAFOLD_ALL_ATOM { ch_rfaa_paper_weights // channel: path(rfaa_paper_weightsch_dummy_file // channel: path(NO_file) main: - ch_multiqc_report = channel.empty() ch_samplesheet.branch { it -> fasta: it[1].extension == "fasta" || it[1].extension == "fa" @@ -64,17 +63,7 @@ workflow ROSETTAFOLD_ALL_ATOM { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) - RUN_ROSETTAFOLD_ALL_ATOM - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "rosettafold_all_atom" ], it.flatten() ] - } - .set { ch_multiqc_report } - - modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pdb, "rosettafold_all_atom").set { ch_pdb_final } + modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pdb, "rosettafold_all_atom", true).set { ch_pdb_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.msa, "rosettafold_all_atom").set { ch_msa_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pae, "rosettafold_all_atom").set { ch_pae_final } @@ -82,7 +71,6 @@ workflow ROSETTAFOLD_ALL_ATOM { pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] }