diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 index de236ed958..c273b54f90 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 @@ -204,11 +204,10 @@ runcmd: - /usr/local/bin/set-ssh.sh - /usr/local/bin/install_cuda_toolkit.sh - - mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab + - mkdir -p {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path}}/hpc_tools /hpc_tools nfs defaults,_netdev 0 0" >> /etc/fstab @@ -221,17 +220,22 @@ - bash /usr/local/bin/doca-install.sh && bash /usr/local/bin/configure-ib-network.sh - yes | cp /etc/slurm/epilog.d/slurmd.service /usr/lib/systemd/system/ - /usr/local/bin/check_slurm_controller_status.sh - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - chown -R {{ munge_user }}:{{ munge_group }} /etc/munge/munge.key - - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + - chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - - mkdir -p /var/spool/slurmd - - chmod {{ file_mode_755 }} /var/spool/slurmd - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd +{% for epath in slurm_epilog_custom_paths %} + - bash -c 'if [ ! -f "{{ epath }}" ]; then mkdir -p "$(dirname "{{ epath }}")"; printf "#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n" > "{{ epath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}"; chmod {{ file_mode_755 }} "{{ epath }}"; fi' +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + - bash -c 'if [ ! -f "{{ ppath }}" ]; then mkdir -p "$(dirname "{{ ppath }}")"; printf "#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n" > "{{ ppath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}"; chmod {{ file_mode_755 }} "{{ ppath }}"; fi' +{% endfor %} + - mkdir -p {{ slurm_slurmd_spool_dir_effective }} + - chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - setenforce 0 - systemctl enable firewalld - systemctl start firewalld diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 index 3195fad9e3..b7b23c1d33 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 @@ -214,11 +214,10 @@ # Ensure Slurm NFS root is mounted at client_mount_path (e.g. /share_omnia) - mkdir -p {{ client_mount_path }}/slurm/ssh - - mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab + - mkdir -p {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path}}/hpc_tools /hpc_tools nfs defaults,_netdev 0 0" >> /etc/fstab @@ -233,17 +232,22 @@ - bash /usr/local/bin/doca-install.sh && bash /usr/local/bin/configure-ib-network.sh - yes | cp /etc/slurm/epilog.d/slurmd.service /usr/lib/systemd/system/ - /usr/local/bin/check_slurm_controller_status.sh - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - chown -R {{ munge_user }}:{{ munge_group }} /etc/munge/munge.key - - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + - chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - - mkdir -p /var/spool/slurmd - - chmod {{ file_mode_755 }} /var/spool/slurmd - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd +{% for epath in slurm_epilog_custom_paths %} + - bash -c 'if [ ! -f "{{ epath }}" ]; then mkdir -p "$(dirname "{{ epath }}")"; printf "#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n" > "{{ epath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}"; chmod {{ file_mode_755 }} "{{ epath }}"; fi' +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + - bash -c 'if [ ! -f "{{ ppath }}" ]; then mkdir -p "$(dirname "{{ ppath }}")"; printf "#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n" > "{{ ppath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}"; chmod {{ file_mode_755 }} "{{ ppath }}"; fi' +{% endfor %} + - mkdir -p {{ slurm_slurmd_spool_dir_effective }} + - chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - setenforce 0 - systemctl enable firewalld - systemctl start firewalld diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 index f869d7d8fe..8b3d771592 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 @@ -116,11 +116,10 @@ runcmd: - /usr/local/bin/set-ssh.sh - - mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab + - mkdir -p {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path}}/hpc_tools/container_images /hpc_tools/container_images nfs defaults,_netdev 0 0" >> /etc/fstab @@ -134,17 +133,22 @@ - bash /usr/local/bin/doca-install.sh && bash /usr/local/bin/configure-ib-network.sh - yes | cp /etc/slurm/epilog.d/slurmd.service /usr/lib/systemd/system/ - /usr/local/bin/check_slurm_controller_status.sh - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - chown -R {{ munge_user }}:{{ munge_group }} /etc/munge/munge.key - - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + - chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - - mkdir -p /var/spool/slurmd - - chmod {{ file_mode_755 }} /var/spool/slurmd - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd +{% for epath in slurm_epilog_custom_paths %} + - bash -c 'if [ ! -f "{{ epath }}" ]; then mkdir -p "$(dirname "{{ epath }}")"; printf "#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n" > "{{ epath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}"; chmod {{ file_mode_755 }} "{{ epath }}"; fi' +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + - bash -c 'if [ ! -f "{{ ppath }}" ]; then mkdir -p "$(dirname "{{ ppath }}")"; printf "#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n" > "{{ ppath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}"; chmod {{ file_mode_755 }} "{{ ppath }}"; fi' +{% endfor %} + - mkdir -p {{ slurm_slurmd_spool_dir_effective }} + - chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - setenforce 0 - systemctl enable firewalld - systemctl start firewalld diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 index 82646da1c6..4e68ba8d81 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 @@ -123,10 +123,10 @@ - /usr/local/bin/set-ssh.sh # Ensure Slurm NFS root is mounted at client_mount_path (e.g. /share_omnia) - mkdir -p {{ client_mount_path }}/slurm/ssh - - mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts + - mkdir -p {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts - echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab @@ -144,18 +144,23 @@ - bash /usr/local/bin/doca-install.sh && bash /usr/local/bin/configure-ib-network.sh - yes | cp /etc/slurm/epilog.d/slurmd.service /usr/lib/systemd/system/ - /usr/local/bin/check_slurm_controller_status.sh - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - chown -R {{ munge_user }}:{{ munge_group }} /etc/munge/munge.key - - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + - chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh - - mkdir -p /var/spool/slurmd - - chmod {{ file_mode_755 }} /var/spool/slurmd - - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd +{% for epath in slurm_epilog_custom_paths %} + - bash -c 'if [ ! -f "{{ epath }}" ]; then mkdir -p "$(dirname "{{ epath }}")"; printf "#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n" > "{{ epath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}"; chmod {{ file_mode_755 }} "{{ epath }}"; fi' +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + - bash -c 'if [ ! -f "{{ ppath }}" ]; then mkdir -p "$(dirname "{{ ppath }}")"; printf "#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n" > "{{ ppath }}"; chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}"; chmod {{ file_mode_755 }} "{{ ppath }}"; fi' +{% endfor %} + - mkdir -p {{ slurm_slurmd_spool_dir_effective }} + - chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + - chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} - setenforce 0 - systemctl enable firewalld - systemctl start firewalld diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 index 2f2721d7eb..a8c3b8d88c 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 @@ -469,15 +469,18 @@ # slurm user and group created in the users module # Create directories for nfs and mount all - - mkdir -p /var/log/slurm /etc/slurm {{ home_dir }} /etc/my.cnf.d /etc/munge /var/lib/mysql /var/log/mariadb /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts + - mkdir -p {{ slurm_ctld_log_dir_effective }} {{ slurmdbd_log_dir_effective }} {{ slurm_ctld_pid_dir_effective }} {{ slurmdbd_pid_dir_effective }} {{ slurm_state_save_location_effective }} {% if slurm_sched_log_dir_effective %}{{ slurm_sched_log_dir_effective }} {% endif %}/etc/slurm {{ home_dir }} /etc/my.cnf.d /etc/munge /var/lib/mysql /var/log/mariadb /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts - echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm /etc/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/my.cnf.d /etc/my.cnf.d nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/mariadb /var/log/mariadb nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_ctld_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab +{% if slurmdbd_log_dir_effective != slurm_ctld_log_dir_effective %} + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurmdbd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab +{% endif %} {% if powervault_config is not defined %} - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/lib/mysql /var/lib/mysql nfs defaults,_netdev 0 0" >> /etc/fstab - - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmctld /var/spool/slurmctld nfs defaults,_netdev 0 0" >> /etc/fstab + - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmctld {{ slurm_state_save_location_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab {% endif %} - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 index cc784bdd10..3dc8f65514 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 @@ -237,13 +237,12 @@ echo "[INFO] ===== Starting directory creation and NFS mounts for Pulp cert, Slurm and Munge (aarch64) =====" echo "[INFO] Creating base directories for Slurm and Munge" - mkdir -pv /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts + mkdir -pv {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts echo "[INFO] Updating /etc/fstab with NFS entries for Pulp cert, Slurm and Munge paths" - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab + echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ cloud_init_nfs_path}}/hpc_tools/container_images /hpc_tools/container_images nfs defaults,_netdev 0 0" >> /etc/fstab @@ -274,22 +273,43 @@ bash /usr/local/bin/check_slurm_controller_status.sh echo "[INFO] Setting ownership for Slurm directories" - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] Setting permissions for Slurm directories" - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] Ensuring Slurm epilog directory and logout script permissions" chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh +{% for epath in slurm_epilog_custom_paths %} + + echo "[INFO] Checking custom epilog script: {{ epath }}" + if [ ! -f "{{ epath }}" ]; then + echo "[INFO] Creating stub epilog script at {{ epath }}" + mkdir -p "$(dirname '{{ epath }}')" + printf '#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n' > "{{ epath }}" + chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}" + chmod {{ file_mode_755 }} "{{ epath }}" + fi +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + + echo "[INFO] Checking custom prolog script: {{ ppath }}" + if [ ! -f "{{ ppath }}" ]; then + echo "[INFO] Creating stub prolog script at {{ ppath }}" + mkdir -p "$(dirname '{{ ppath }}')" + printf '#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n' > "{{ ppath }}" + chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}" + chmod {{ file_mode_755 }} "{{ ppath }}" + fi +{% endfor %} - echo "[INFO] Creating and configuring /var/spool/slurmd" - mkdir -p /var/spool/slurmd - chmod {{ file_mode_755 }} /var/spool/slurmd - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd + echo "[INFO] Creating and configuring slurmd spool directory" + mkdir -p {{ slurm_slurmd_spool_dir_effective }} + chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] ===== Completed slurmd setup (aarch64) =====" @@ -448,4 +468,6 @@ - /root/ldms_sampler.sh {% endif %} + - systemctl restart slurmd + - echo "Cloud-Init has completed successfully." diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 index 5128aee1d1..62a4e9e063 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 @@ -256,12 +256,12 @@ # Ensure Slurm NFS root is mounted at client_mount_path (e.g. /share_omnia) mkdir -p {{ client_mount_path }}/slurm/ssh echo "[INFO] Creating base directories for Pulp cert, Slurm and Munge" - mkdir -pv /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts + mkdir -pv {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} {{ slurm_epilog_dirs_all | join(' ') }} {% for d in slurm_prolog_dirs_all %}{{ d }} {% endfor %}/etc/munge /cert /var/log/track /var/lib/packages /hpc_tools/container_images /hpc_tools/scripts echo "[INFO] Updating /etc/fstab with NFS entries for Pulp cert, Slurm and Munge paths" echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab - echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd /var/spool/slurmd nfs defaults,_netdev 0 0" >> /etc/fstab + echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm {{ slurm_slurmd_log_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab + echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool/slurmd {{ slurm_slurmd_spool_dir_effective }} nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/munge /etc/munge nfs defaults,_netdev 0 0" >> /etc/fstab echo "{{ trackfile_nfs_path }} /var/log/track nfs defaults,_netdev 0 0" >> /etc/fstab @@ -292,22 +292,43 @@ bash /usr/local/bin/check_slurm_controller_status.sh echo "[INFO] Setting ownership for Slurm directories" - chown -R {{ slurm_user }}:{{ slurm_user }} /var/log/slurm - chown -R {{ slurm_user }}:{{ slurm_user }} /var/run/slurm - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool - chown -R {{ slurm_user }}:{{ slurm_user }} /var/lib/slurm + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_log_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_pid_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] Setting permissions for Slurm directories" - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm + chmod {{ file_mode_755 }} {{ slurm_slurmd_log_dir_effective }} {{ slurm_slurmd_pid_dir_effective }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] Ensuring Slurm epilog directory and logout script permissions" chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh +{% for epath in slurm_epilog_custom_paths %} + + echo "[INFO] Checking custom epilog script: {{ epath }}" + if [ ! -f "{{ epath }}" ]; then + echo "[INFO] Creating stub epilog script at {{ epath }}" + mkdir -p "$(dirname '{{ epath }}')" + printf '#!/bin/bash\n# Custom epilog script placeholder\n# Add your epilog commands here\n' > "{{ epath }}" + chown {{ slurm_user }}:{{ slurm_user }} "{{ epath }}" + chmod {{ file_mode_755 }} "{{ epath }}" + fi +{% endfor %} +{% for ppath in slurm_prolog_custom_paths %} + + echo "[INFO] Checking custom prolog script: {{ ppath }}" + if [ ! -f "{{ ppath }}" ]; then + echo "[INFO] Creating stub prolog script at {{ ppath }}" + mkdir -p "$(dirname '{{ ppath }}')" + printf '#!/bin/bash\n# Custom prolog script placeholder\n# Add your prolog commands here\n' > "{{ ppath }}" + chown {{ slurm_user }}:{{ slurm_user }} "{{ ppath }}" + chmod {{ file_mode_755 }} "{{ ppath }}" + fi +{% endfor %} - echo "[INFO] Creating and configuring /var/spool/slurmd" - mkdir -p /var/spool/slurmd - chmod {{ file_mode_755 }} /var/spool/slurmd - chown -R {{ slurm_user }}:{{ slurm_user }} /var/spool/slurmd + echo "[INFO] Creating and configuring slurmd spool directory" + mkdir -p {{ slurm_slurmd_spool_dir_effective }} + chmod {{ file_mode_755 }} {{ slurm_slurmd_spool_dir_effective }} + chown -R {{ slurm_user }}:{{ slurm_user }} {{ slurm_slurmd_spool_dir_effective }} echo "[INFO] ===== Completed slurmd setup =====" @@ -462,4 +483,6 @@ - /root/ldms_sampler.sh {% endif %} + - systemctl restart slurmd + - echo "Cloud-Init has completed successfully." diff --git a/discovery/roles/slurm_config/tasks/confs.yml b/discovery/roles/slurm_config/tasks/confs.yml index 1ff30acf34..641efc7ab9 100644 --- a/discovery/roles/slurm_config/tasks/confs.yml +++ b/discovery/roles/slurm_config/tasks/confs.yml @@ -112,6 +112,12 @@ slurm_conf_dict: "{{ (merged_conf.results | selectattr('item.key', 'equalto', 'slurm') | first).conf_dict }}" when: "'slurm' in conf_merge_dict" +- name: Extract effective path parameters from merged configs + ansible.builtin.include_tasks: extract_path_overrides.yml + +- name: Validate path parameters are absolute + ansible.builtin.include_tasks: validate_path_overrides.yml + - name: Get nodes from normal partition and compare with cmpt_list ansible.builtin.set_fact: normal_partition: "{{ slurm_conf_dict.PartitionName | default([]) | selectattr('PartitionName', 'equalto', slurm_partition_name) | first | default({}) }}" @@ -134,17 +140,17 @@ - nodes_in_normal_not_in_cmpt is defined - nodes_in_normal_not_in_cmpt | length > 0 -- name: Create directories from conf values +- name: Create directories from conf values (NFS server-side always uses defaults) ansible.builtin.include_tasks: exist_dir.yml loop: - "{{ ctld_list - | product([slurm_conf_dict.get('StateSaveLocation', '/var/spool/slurmctld'), - (slurm_conf_dict.get('SlurmctldLogFile', '/var/log/slurmctld.log') | dirname), - (slurm_conf_dict.get('SlurmctldPidFile', '/var/run/slurmctld.pid') | dirname)]) }}" + | product(['/var/spool/slurmctld', + '/var/log/slurm', + '/var/run']) }}" - "{{ (cmpt_list + login_list + compiler_login_list) - | product([slurm_conf_dict.get('SlurmdSpoolDir', '/var/spool/slurmd'), - (slurm_conf_dict.get('SlurmdLogFile', '/var/log/slurmd.log') | dirname), - (slurm_conf_dict.get('SlurmdPidFile', '/var/run/slurmd.pid') | dirname)]) }}" + | product(['/var/spool/slurmd', + '/var/log/slurm', + '/var/run']) }}" loop_control: loop_var: product diff --git a/discovery/roles/slurm_config/tasks/extract_path_overrides.yml b/discovery/roles/slurm_config/tasks/extract_path_overrides.yml new file mode 100644 index 0000000000..ab1bf17aa6 --- /dev/null +++ b/discovery/roles/slurm_config/tasks/extract_path_overrides.yml @@ -0,0 +1,221 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# ── Extract merged dicts ────────────────────────────────────────────── + +- name: Extract slurm.conf merged dict + ansible.builtin.set_fact: + slurm_merged_dict: "{{ (merged_conf.results | selectattr('item.key', 'equalto', 'slurm') | first).conf_dict }}" + when: "'slurm' in conf_merge_dict" + +- name: Extract slurmdbd.conf merged dict + ansible.builtin.set_fact: + slurmdbd_merged_dict: "{{ (merged_conf.results | selectattr('item.key', 'equalto', 'slurmdbd') | first).conf_dict }}" + when: "'slurmdbd' in conf_merge_dict" + +- name: Extract cgroup.conf merged dict + ansible.builtin.set_fact: + cgroup_merged_dict: "{{ (merged_conf.results | selectattr('item.key', 'equalto', 'cgroup') | first).conf_dict }}" + when: "'cgroup' in conf_merge_dict" + +# ── slurm.conf: controller path params ──────────────────────────────── + +- name: Extract effective controller directories from slurm.conf + ansible.builtin.set_fact: + slurm_ctld_log_dir_effective: >- + {{ (slurm_merged_dict.get('SlurmctldLogFile', ['/var/log/slurm/slurmctld.log']) + | first if slurm_merged_dict.get('SlurmctldLogFile') is iterable + and slurm_merged_dict.get('SlurmctldLogFile') is not string + else slurm_merged_dict.get('SlurmctldLogFile', '/var/log/slurm/slurmctld.log')) + | dirname }} + slurm_state_save_location_effective: >- + {{ (slurm_merged_dict.get('StateSaveLocation', ['/var/spool/slurmctld']) + | first if slurm_merged_dict.get('StateSaveLocation') is iterable + and slurm_merged_dict.get('StateSaveLocation') is not string + else slurm_merged_dict.get('StateSaveLocation', '/var/spool/slurmctld')) }} + slurm_ctld_pid_dir_effective: >- + {{ (slurm_merged_dict.get('SlurmctldPidFile', ['/var/run/slurmctld.pid']) + | first if slurm_merged_dict.get('SlurmctldPidFile') is iterable + and slurm_merged_dict.get('SlurmctldPidFile') is not string + else slurm_merged_dict.get('SlurmctldPidFile', '/var/run/slurmctld.pid')) + | dirname }} + slurm_sched_log_dir_effective: >- + {{ ((slurm_merged_dict.get('SlurmSchedLogFile', ['']) + | first if slurm_merged_dict.get('SlurmSchedLogFile') is iterable + and slurm_merged_dict.get('SlurmSchedLogFile') is not string + else slurm_merged_dict.get('SlurmSchedLogFile', '')) + | default('', true) | dirname | default('', true)) }} + when: slurm_merged_dict is defined + +# ── slurm.conf: compute path params ────────────────────────────────── + +- name: Extract effective compute directories from slurm.conf + ansible.builtin.set_fact: + slurm_slurmd_log_dir_effective: >- + {{ (slurm_merged_dict.get('SlurmdLogFile', ['/var/log/slurm/slurmd.log']) + | first if slurm_merged_dict.get('SlurmdLogFile') is iterable + and slurm_merged_dict.get('SlurmdLogFile') is not string + else slurm_merged_dict.get('SlurmdLogFile', '/var/log/slurm/slurmd.log')) + | dirname }} + slurm_slurmd_spool_dir_effective: >- + {{ (slurm_merged_dict.get('SlurmdSpoolDir', ['/var/spool/slurmd']) + | first if slurm_merged_dict.get('SlurmdSpoolDir') is iterable + and slurm_merged_dict.get('SlurmdSpoolDir') is not string + else slurm_merged_dict.get('SlurmdSpoolDir', '/var/spool/slurmd')) }} + slurm_slurmd_pid_dir_effective: >- + {{ (slurm_merged_dict.get('SlurmdPidFile', ['/var/run/slurmd.pid']) + | first if slurm_merged_dict.get('SlurmdPidFile') is iterable + and slurm_merged_dict.get('SlurmdPidFile') is not string + else slurm_merged_dict.get('SlurmdPidFile', '/var/run/slurmd.pid')) + | dirname }} + slurm_epilog_dir_effective: >- + {{ (slurm_merged_dict.get('Epilog', ['/etc/slurm/epilog.d/logout_user.sh']) + | first if slurm_merged_dict.get('Epilog') is iterable + and slurm_merged_dict.get('Epilog') is not string + else slurm_merged_dict.get('Epilog', '/etc/slurm/epilog.d/logout_user.sh')) + | dirname }} + slurm_prolog_dir_effective: >- + {{ ((slurm_merged_dict.get('Prolog', ['']) + | first if slurm_merged_dict.get('Prolog') is iterable + and slurm_merged_dict.get('Prolog') is not string + else slurm_merged_dict.get('Prolog', '')) + | default('', true) | dirname | default('', true)) }} + when: slurm_merged_dict is defined + +# ── slurm.conf: all epilog/prolog dirs and custom file paths ───────── + +- name: Extract all epilog paths from merged Epilog list + ansible.builtin.set_fact: + slurm_epilog_paths_all: >- + {{ (slurm_merged_dict.get('Epilog', []) + if slurm_merged_dict.get('Epilog') is iterable + and slurm_merged_dict.get('Epilog') is not string + else [slurm_merged_dict.get('Epilog', '')]) + | reject('equalto', '') | list }} + slurm_epilog_dirs_all: >- + {{ (slurm_merged_dict.get('Epilog', []) + if slurm_merged_dict.get('Epilog') is iterable + and slurm_merged_dict.get('Epilog') is not string + else [slurm_merged_dict.get('Epilog', '')]) + | map('dirname') | unique | reject('equalto', '') | list }} + when: slurm_merged_dict is defined + +- name: Extract custom epilog paths (non-default) + ansible.builtin.set_fact: + slurm_epilog_custom_paths: >- + {{ slurm_epilog_paths_all | reject('search', '^/etc/slurm/epilog\\.d/') | list }} + when: slurm_merged_dict is defined + +- name: Extract all prolog paths from merged Prolog list + ansible.builtin.set_fact: + slurm_prolog_paths_all: >- + {{ (slurm_merged_dict.get('Prolog', []) + if slurm_merged_dict.get('Prolog') is iterable + and slurm_merged_dict.get('Prolog') is not string + else [slurm_merged_dict.get('Prolog', '')]) + | reject('equalto', '') | list }} + slurm_prolog_dirs_all: >- + {{ (slurm_merged_dict.get('Prolog', []) + if slurm_merged_dict.get('Prolog') is iterable + and slurm_merged_dict.get('Prolog') is not string + else [slurm_merged_dict.get('Prolog', '')]) + | map('dirname') | unique | reject('equalto', '') | list }} + when: slurm_merged_dict is defined + +- name: Extract custom prolog paths (non-default) + ansible.builtin.set_fact: + slurm_prolog_custom_paths: >- + {{ slurm_prolog_paths_all | list }} + when: slurm_merged_dict is defined + +# ── slurm.conf: plugin dir (both controller and compute) ───────────── + +- name: Extract effective plugin directory from slurm.conf + ansible.builtin.set_fact: + slurm_plugin_dir_effective: >- + {{ (slurm_merged_dict.get('PluginDir', ['/usr/lib64/slurm']) + | first if slurm_merged_dict.get('PluginDir') is iterable + and slurm_merged_dict.get('PluginDir') is not string + else slurm_merged_dict.get('PluginDir', '/usr/lib64/slurm')) }} + when: slurm_merged_dict is defined + +# ── slurmdbd.conf path params ──────────────────────────────────────── + +- name: Extract effective directories from slurmdbd.conf + ansible.builtin.set_fact: + slurmdbd_log_dir_effective: >- + {{ (slurmdbd_merged_dict.get('LogFile', ['/var/log/slurm/slurmdbd.log']) + | first if slurmdbd_merged_dict.get('LogFile') is iterable + and slurmdbd_merged_dict.get('LogFile') is not string + else slurmdbd_merged_dict.get('LogFile', '/var/log/slurm/slurmdbd.log')) + | dirname }} + slurmdbd_pid_dir_effective: >- + {{ (slurmdbd_merged_dict.get('PidFile', ['/var/run/slurmdbd.pid']) + | first if slurmdbd_merged_dict.get('PidFile') is iterable + and slurmdbd_merged_dict.get('PidFile') is not string + else slurmdbd_merged_dict.get('PidFile', '/var/run/slurmdbd.pid')) + | dirname }} + slurmdbd_plugin_dir_effective: >- + {{ (slurmdbd_merged_dict.get('PluginDir', ['/usr/lib64/slurm']) + | first if slurmdbd_merged_dict.get('PluginDir') is iterable + and slurmdbd_merged_dict.get('PluginDir') is not string + else slurmdbd_merged_dict.get('PluginDir', '/usr/lib64/slurm')) }} + when: slurmdbd_merged_dict is defined + +# ── cgroup.conf path params ────────────────────────────────────────── + +- name: Extract effective cgroup mountpoint from cgroup.conf + ansible.builtin.set_fact: + slurm_cgroup_mountpoint_effective: >- + {{ ((cgroup_merged_dict.get('CgroupMountpoint', ['']) + | first if cgroup_merged_dict.get('CgroupMountpoint') is iterable + and cgroup_merged_dict.get('CgroupMountpoint') is not string + else cgroup_merged_dict.get('CgroupMountpoint', '')) + | default('', true)) }} + when: cgroup_merged_dict is defined + +# ── Defaults when confs are not merged ──────────────────────────────── + +- name: Set default effective directories if slurm.conf not merged + ansible.builtin.set_fact: + slurm_ctld_log_dir_effective: "/var/log/slurm" + slurm_slurmd_log_dir_effective: "/var/log/slurm" + slurm_state_save_location_effective: "/var/spool/slurmctld" + slurm_slurmd_spool_dir_effective: "/var/spool/slurmd" + slurm_ctld_pid_dir_effective: "/var/run" + slurm_slurmd_pid_dir_effective: "/var/run" + slurm_epilog_dir_effective: "/etc/slurm/epilog.d" + slurm_prolog_dir_effective: "" + slurm_sched_log_dir_effective: "" + slurm_plugin_dir_effective: "/usr/lib64/slurm" + slurm_epilog_dirs_all: ["/etc/slurm/epilog.d"] + slurm_epilog_paths_all: ["/etc/slurm/epilog.d/logout_user.sh"] + slurm_epilog_custom_paths: [] + slurm_prolog_dirs_all: [] + slurm_prolog_paths_all: [] + slurm_prolog_custom_paths: [] + when: slurm_merged_dict is not defined + +- name: Set default effective directories if slurmdbd.conf not merged + ansible.builtin.set_fact: + slurmdbd_log_dir_effective: "/var/log/slurm" + slurmdbd_pid_dir_effective: "/var/run" + slurmdbd_plugin_dir_effective: "/usr/lib64/slurm" + when: slurmdbd_merged_dict is not defined + +- name: Set default effective cgroup mountpoint if cgroup.conf not merged + ansible.builtin.set_fact: + slurm_cgroup_mountpoint_effective: "" + when: cgroup_merged_dict is not defined diff --git a/discovery/roles/slurm_config/tasks/validate_path_overrides.yml b/discovery/roles/slurm_config/tasks/validate_path_overrides.yml new file mode 100644 index 0000000000..c4a1783b02 --- /dev/null +++ b/discovery/roles/slurm_config/tasks/validate_path_overrides.yml @@ -0,0 +1,107 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# ── slurm.conf path validation ─────────────────────────────────────── + +- name: Validate slurm.conf path parameters are absolute + ansible.builtin.fail: + msg: "slurm.conf {{ item }} must be an absolute path (start with /). Current value: {{ slurm_merged_dict.get(item) }}" + when: + - slurm_merged_dict is defined + - slurm_merged_dict.get(item) is defined + - slurm_merged_dict.get(item) is not none + - >- + (slurm_merged_dict.get(item) is string + and slurm_merged_dict.get(item) | length > 0) + or (slurm_merged_dict.get(item) is iterable + and slurm_merged_dict.get(item) | list | length > 0) + - >- + not ((slurm_merged_dict.get(item) is string + and slurm_merged_dict.get(item) | regex_search('^/')) + or (slurm_merged_dict.get(item) is iterable + and (slurm_merged_dict.get(item) | first) | regex_search('^/'))) + loop: + - SlurmctldLogFile + - SlurmdLogFile + - StateSaveLocation + - SlurmdSpoolDir + - SlurmctldPidFile + - SlurmdPidFile + - Epilog + - Prolog + - EpilogSlurmctld + - PrologSlurmctld + - SlurmSchedLogFile + - PluginDir + - PlugStackConfig + - SrunEpilog + - SrunProlog + - TaskEpilog + - TaskProlog + - HealthCheckProgram + - RebootProgram + - UnkillableStepProgram + - ResvEpilog + - ResvProlog + - TmpFS + - JobCompLoc + - JobCredentialPrivateKey + - JobCredentialPublicCertificate + +# ── slurmdbd.conf path validation ──────────────────────────────────── + +- name: Validate slurmdbd.conf path parameters are absolute + ansible.builtin.fail: + msg: "slurmdbd.conf {{ item }} must be an absolute path (start with /). Current value: {{ slurmdbd_merged_dict.get(item) }}" + when: + - slurmdbd_merged_dict is defined + - slurmdbd_merged_dict.get(item) is defined + - slurmdbd_merged_dict.get(item) is not none + - >- + (slurmdbd_merged_dict.get(item) is string + and slurmdbd_merged_dict.get(item) | length > 0) + or (slurmdbd_merged_dict.get(item) is iterable + and slurmdbd_merged_dict.get(item) | list | length > 0) + - >- + not ((slurmdbd_merged_dict.get(item) is string + and slurmdbd_merged_dict.get(item) | regex_search('^/')) + or (slurmdbd_merged_dict.get(item) is iterable + and (slurmdbd_merged_dict.get(item) | first) | regex_search('^/'))) + loop: + - LogFile + - PidFile + - PluginDir + +# ── cgroup.conf path validation ────────────────────────────────────── + +- name: Validate cgroup.conf path parameters are absolute + ansible.builtin.fail: + msg: "cgroup.conf {{ item }} must be an absolute path (start with /). Current value: {{ cgroup_merged_dict.get(item) }}" + when: + - cgroup_merged_dict is defined + - cgroup_merged_dict.get(item) is defined + - cgroup_merged_dict.get(item) is not none + - >- + (cgroup_merged_dict.get(item) is string + and cgroup_merged_dict.get(item) | length > 0) + or (cgroup_merged_dict.get(item) is iterable + and cgroup_merged_dict.get(item) | list | length > 0) + - >- + not ((cgroup_merged_dict.get(item) is string + and cgroup_merged_dict.get(item) | regex_search('^/')) + or (cgroup_merged_dict.get(item) is iterable + and (cgroup_merged_dict.get(item) | first) | regex_search('^/'))) + loop: + - CgroupMountpoint