Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Omnia 1.x Documentation is hosted on [Read The Docs 1.x](https://omnia-doc.readt

Omnia 2.x Documentation is hosted on [Read The Docs 2.x](https://omnia.readthedocs.io/en/latest/index.html).

Current Status: ![GitHub](https://readthedocs.org/projects/omnia-doc/badge/?version=latest)
Current Status: ![GitHub](https://readthedocs.org/projects/omnia/badge/?version=latest)

## Licensing

Expand Down
2 changes: 1 addition & 1 deletion build_image_aarch64/roles/prepare_arm_node/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@

- name: Build full Podman image path
ansible.builtin.set_fact:
pulp_aarch_image: "{{ hostvars['localhost']['oim_pxe_ip'] }}:2225/dellhpcomniaaisolution/image-build-aarch64:latest"
pulp_aarch_image: "{{ hostvars['localhost']['oim_pxe_ip'] }}:2225/dellhpcomniaaisolution/image-build-aarch64:1.0"

- name: Pull aarch64 image using Podman
ansible.builtin.command:
Expand Down
1 change: 0 additions & 1 deletion build_image_aarch64/roles/prepare_arm_node/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ aarch64_regctl_url: "https://github.com/regclient/regclient/releases/latest/down
pulp_repo_file_path: "/etc/yum.repos.d/pulp.repo"
pulp_webserver_cert_path: "/opt/omnia/pulp/settings/certs/pulp_webserver.crt"
anchors_path: "/etc/pki/ca-trust/source/anchors/pulp_webserver.crt"
regctl_tar_path: "omnia/offline_repo/cluster/aarch64/rhel/10.0/tarball/regctl-linux-arm64/regctl-linux-arm64.tar.gz"
regctl_bin_path: "/usr/local/bin/regctl"

# Error messages
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@
- groupadd -r {{ slurm_group_name }}
- useradd -r -g {{ slurm_group_name }} -d {{ home_dir }} -s /sbin/nologin {{ user }}

- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /var/log/track
- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track
- echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
Expand Down Expand Up @@ -213,6 +214,8 @@
- systemctl start slurmd
- systemctl daemon-reexec
- systemctl restart sshd
- cp /cert/pulp_webserver.crt /etc/pki/ca-trust/source/anchors && update-ca-trust
- sed -i 's/^gpgcheck=1/gpgcheck=0/' /etc/dnf/dnf.conf

{% if hostvars['localhost']['openldap_support'] %}
- /usr/local/bin/update_ldap_conf.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@
- groupadd -r {{ slurm_group_name }}
- useradd -r -g {{ slurm_group_name }} -d {{ home_dir }} -s /sbin/nologin {{ user }}

- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /var/log/track
- mkdir -p /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track
- echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
- echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
Expand Down Expand Up @@ -135,6 +136,8 @@
- systemctl start slurmd
- systemctl daemon-reexec
- systemctl restart sshd
- cp /cert/pulp_webserver.crt /etc/pki/ca-trust/source/anchors && update-ca-trust
- sed -i 's/^gpgcheck=1/gpgcheck=0/' /etc/dnf/dnf.conf

{% if hostvars['localhost']['openldap_support'] %}
- /usr/local/bin/update_ldap_conf.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,13 @@
LOGFILE="/var/log/configure_dirs_and_mounts.log"
exec > >(tee -a "$LOGFILE") 2>&1

echo "[INFO] ===== Starting directory creation and NFS mounts for Slurm and Munge (aarch64) ====="
echo "[INFO] ===== Starting directory creation and NFS mounts for Pulp cert, Slurm and Munge (aarch64) ====="

echo "[INFO] Creating base directories for Slurm and Munge"
mkdir -pv /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /var/log/track
echo "[INFO] Creating base directories for Pulp cert, Slurm and Munge"
mkdir -pv /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm /etc/slurm/epilog.d /etc/munge /cert /var/log/track

echo "[INFO] Updating /etc/fstab with NFS entries for Slurm and Munge paths"
echo "[INFO] Updating /etc/fstab with NFS entries for Pulp cert, Slurm and Munge paths"
echo "{{ cloud_init_nfs_path }}/cert /cert nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/log/slurm /var/log/slurm nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/var/spool /var/spool nfs defaults,_netdev 0 0" >> /etc/fstab
echo "{{ cloud_init_nfs_path }}/$(hostname -s)/etc/slurm/epilog.d /etc/slurm/epilog.d nfs defaults,_netdev 0 0" >> /etc/fstab
Expand Down Expand Up @@ -376,6 +377,8 @@
- useradd -r -g {{ slurm_group_name }} -d {{ home_dir }} -s /sbin/nologin {{ user }}

- /usr/local/bin/configure_dirs_and_mounts.sh
- cp /cert/pulp_webserver.crt /etc/pki/ca-trust/source/anchors && update-ca-trust
- sed -i 's/^gpgcheck=1/gpgcheck=0/' /etc/dnf/dnf.conf
- /usr/local/bin/configure_slurmd_setup.sh
- /usr/local/bin/configure_munge_and_pam.sh

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,16 @@
{ "src" : "FilePmdMapped", "dst" : "FilePmdMapped", "type" : "u64" },
{ "src" : "CmaTotal", "dst" : "CmaTotal", "type" : "u64" },
{ "src" : "CmaFree", "dst" : "CmaFree", "type" : "u64" },
{ "src" : "Unaccepted", "dst" : "Unaccepted", "type" : "u64" },
{ "src" : "Unaccepted", "dst" : "Unaccepted", "type" : "u64", "fill" : 0 },
{ "src" : "HugePages_Total", "dst" : "HugePages_Total", "type" : "u64" },
{ "src" : "HugePages_Free", "dst" : "HugePages_Free", "type" : "u64" },
{ "src" : "HugePages_Rsvd", "dst" : "HugePages_Rsvd", "type" : "u64" },
{ "src" : "HugePages_Surp", "dst" : "HugePages_Surp", "type" : "u64" },
{ "src" : "Hugepagesize", "dst" : "Hugepagesize", "type" : "u64" },
{ "src" : "Hugetlb", "dst" : "Hugetlb", "type" : "u64" },
{ "src" : "DirectMap4k", "dst" : "DirectMap4k", "type" : "u64" },
{ "src" : "DirectMap2M", "dst" : "DirectMap2M", "type" : "u64" },
{ "src" : "DirectMap1G", "dst" : "DirectMap1G", "type" : "u64" }
{ "src" : "DirectMap4k", "dst" : "DirectMap4k", "type" : "u64", "fill" : 0 },
{ "src" : "DirectMap2M", "dst" : "DirectMap2M", "type" : "u64", "fill" : 0 },
{ "src" : "DirectMap1G", "dst" : "DirectMap1G", "type" : "u64", "fill" : 0 }
],
"indices" : [
]
Expand Down Expand Up @@ -567,7 +567,7 @@
{ "src" : "thp_scan_exceed_none_pte", "dst" : "thp_scan_exceed_none_pte", "type" : "u64" },
{ "src" : "thp_scan_exceed_swap_pte", "dst" : "thp_scan_exceed_swap_pte", "type" : "u64" },
{ "src" : "thp_scan_exceed_share_pte", "dst" : "thp_scan_exceed_share_pte", "type" : "u64" },
{ "src" : "thp_split_pud", "dst" : "thp_split_pud", "type" : "u64" },
{ "src" : "thp_split_pud", "dst" : "thp_split_pud", "type" : "u64", "fill" : 0 },
{ "src" : "thp_zero_page_alloc", "dst" : "thp_zero_page_alloc", "type" : "u64" },
{ "src" : "thp_zero_page_alloc_failed", "dst" : "thp_zero_page_alloc_failed", "type" : "u64" },
{ "src" : "thp_swpout", "dst" : "thp_swpout", "type" : "u64" },
Expand All @@ -584,13 +584,13 @@
{ "src" : "zswpin", "dst" : "zswpin", "type" : "u64" },
{ "src" : "zswpout", "dst" : "zswpout", "type" : "u64" },
{ "src" : "zswpwb", "dst" : "zswpwb", "type" : "u64" },
{ "src" : "direct_map_level2_splits", "dst" : "direct_map_level2_splits", "type" : "u64" },
{ "src" : "direct_map_level3_splits", "dst" : "direct_map_level3_splits", "type" : "u64" },
{ "src" : "direct_map_level2_splits", "dst" : "direct_map_level2_splits", "type" : "u64", "fill" : 0 },
{ "src" : "direct_map_level3_splits", "dst" : "direct_map_level3_splits", "type" : "u64", "fill" : 0 },
{ "src" : "vma_lock_success", "dst" : "vma_lock_success", "type" : "u64" },
{ "src" : "vma_lock_abort", "dst" : "vma_lock_abort", "type" : "u64" },
{ "src" : "vma_lock_retry", "dst" : "vma_lock_retry", "type" : "u64" },
{ "src" : "vma_lock_miss", "dst" : "vma_lock_miss", "type" : "u64" },
{ "src" : "nr_unaccepted", "dst" : "nr_unaccepted", "type" : "u64" },
{ "src" : "nr_unaccepted", "dst" : "nr_unaccepted", "type" : "u64", "fill" : 0 },
{ "src" : "nr_unstable", "dst" : "nr_unstable", "type" : "u64" }
],
"indices" : [
Expand Down Expand Up @@ -728,11 +728,13 @@
"8BE378143DF8894C6C911EE1934E5BF166BAD9C012013D1E9F1361F0ACC249E1" : "loadavg_decomp",
"EF4141E721CF871A14A0751296C04A439BD78F448721145DB896EB024D7C3829" : "lustre_llite_decomp",
"EF957A75E226C57176D45950B7281DB1775E4EC86DFE4F7921C8E5210FD2A7EB" : "meminfo_decomp",
"1DFDD62FB6C37AE8A96FA04C5D7975BBFCCBE4C8A12A86678A2AF259F49A1BA4" : "meminfo_decomp",
"E8B9CC8D83FB4E5B779071E801CA351B69DCB9E9CE2601A0B127A2977F11C62A" : "procnetdev2_decomp",
"78935B2B0B932E5FDFD20CF29B561B842978B4A5E75663A3AEB02FD5E3F7712E" : "procstat2_decomp",
"FB038D1C7A059BD675F0C06447F8644AD064583026174B998B904729D23F9487" : "slingshot_info_decomp",
"181972BDD114E997CC71AD6979056DA3C172B640F130DB143649E1355C4F5599" : "slingshot_metrics_decomp",
"85CE1C60D0570924DAE5B17758912D1A3ADA2091ABD946E06B9A0240F53F4FD8" : "vmstat_decomp",
"9292CFE0558DBE06EF95BE5B97A9FA13A3F66CF1523D3E175816F3F0D9C66DD4" : "vmstat_decomp",
"F76BA26012C2F1F481AB0C1E0672D438ECFE0C4F7B2B4942AA7067A1FCE51A75" : "mt_slurm_decomp"
}
}
2 changes: 1 addition & 1 deletion input/config/aarch64/rhel/10.0/default_packages.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
{"package": "kexec-tools", "type": "rpm", "repo_name": "aarch64_baseos"},
{"package": "which", "type": "rpm", "repo_name": "aarch64_baseos"},
{"package": "iperf3", "type": "rpm", "repo_name": "aarch64_appstream"},
{ "package": "docker.io/dellhpcomniaaisolution/image-build-aarch64", "tag": "latest", "type": "image" }
{ "package": "docker.io/dellhpcomniaaisolution/image-build-aarch64", "tag": "1.0", "type": "image" }
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@
ansible.builtin.set_fact:
kube_compute_nodes: >-
{{ service_cluster_metadata | dict2items
| selectattr('value.parent_status', 'defined')
| selectattr('value.parent_status', 'equalto', true)
| selectattr("value.role", "defined")
| selectattr("value.role", "search", "^service_kube_node")
| sort(attribute="key") | list }}
Expand Down