Skip to content
8 changes: 8 additions & 0 deletions 01_install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ if [ -z "${METAL3_DEV_ENV:-}" ]; then
# Patch metal3-dev-env to use Ansible 8.x on centos9/rhel9.
sed -i '/ANSIBLE_VERSION/{ s/10\.7\.0/8.7.0/; }' lib/common.sh

# Go tarball defaults hardcode linux-amd64; use GOARCH passed as extra var.
# Upstream fix: https://github.com/metal3-io/metal3-dev-env/pull/1694
GO_DEFAULTS="vm-setup/roles/packages_installation/defaults/main.yml"
if grep -q 'linux-amd64' "${GO_DEFAULTS}"; then
sed -i 's/go_tarball: "go{{ go_version }}.linux-amd64.tar.gz"/go_tarball: "go{{ go_version }}.linux-{{ GOARCH | default('\''amd64'\'') }}.tar.gz"/' \
"${GO_DEFAULTS}"
fi

popd
fi

Expand Down
10 changes: 10 additions & 0 deletions 02_configure_host.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ export VNC_CONSOLE=true
if [[ $(uname -m) == "aarch64" ]]; then
VNC_CONSOLE=false
echo "libvirt_cdrombus: scsi" >> vm_setup_vars.yml
# On native aarch64 KVM (e.g. AWS Graviton), the upstream metal3-dev-env
# template hardcodes cortex-a57 for all aarch64 VMs. That model only works
# under qemu emulation; native KVM requires host-passthrough. Narrow the
# CPU conditional so native aarch64 falls through to host-passthrough,
# without affecting the <os> or <features> sections that must still fire.
# Upstream fix: https://github.com/metal3-io/metal3-dev-env/pull/1694
TEMPLATE="${VM_SETUP_PATH}/roles/libvirt/templates/baremetalvm.xml.j2"

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same

if [ -f "${TEMPLATE}" ] && grep -q '{% if is_aarch64 %}' "${TEMPLATE}"; then
sed -i '/{% if is_aarch64 %}/{N; /<!--/s/{% if is_aarch64 %}/{% if is_aarch64 and libvirt_domain_type == '"'"'qemu'"'"' %}/}' "${TEMPLATE}"
fi
fi

# playbooks depend on it
Expand Down
2 changes: 1 addition & 1 deletion agent/01_agent_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if [[ "${MIRROR_COMMAND}" == oc-mirror ]]; then
oc_mirror_file=oc-mirror.tar.gz
oc_mirror_exec=${oc_mirror_file%%.*}
if [[ ! -f "/usr/local/bin/${oc_mirror_exec}" ]]; then
curl -O -L https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/${oc_mirror_file}
curl -O -L https://mirror.openshift.com/pub/openshift-v4/$(uname -m)/clients/ocp/stable/${oc_mirror_file}
tar xzf ${oc_mirror_file}
chmod +x "${oc_mirror_exec}"
sudo mv -f "${oc_mirror_exec}" /usr/local/bin
Expand Down
104 changes: 98 additions & 6 deletions agent/06_agent_create_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ shopt -s nocasematch
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
ARCH=$(uname -m)

CDROM_BUS="sata"
if [[ "${ARCH}" == "aarch64" ]]; then
CDROM_BUS="scsi"
fi

LOGDIR="${SCRIPTDIR}/logs"
source "$SCRIPTDIR/logging.sh"
source "$SCRIPTDIR/common.sh"
Expand Down Expand Up @@ -143,13 +148,13 @@ function attach_agent_iso() {
for (( n=0; n<${2}; n++ ))
do
name=${CLUSTER_NAME}_${1}_${n}
sudo virt-xml "${name}" --add-device --disk "${agent_iso}",device=cdrom,target.dev=sdc
sudo virt-xml "${name}" --add-device --disk "${agent_iso}",device=cdrom,target.dev=sdc,target.bus=${CDROM_BUS}
if [ "${AGENT_USE_APPLIANCE_MODEL}" == true ]; then
if [ "${AGENT_APPLIANCE_HOTPLUG}" == true ]; then
# Add the device with no image. It will be added later using change-media when config-drive is created
sudo virt-xml "${name}" --add-device --disk device=cdrom,target.dev="${config_image_drive}"
sudo virt-xml "${name}" --add-device --disk device=cdrom,target.dev="${config_image_drive}",target.bus=${CDROM_BUS}
else
sudo virt-xml "${name}" --add-device --disk "${config_image_dir}/agentconfig.noarch.iso,device=cdrom,target.dev=${config_image_drive}"
sudo virt-xml "${name}" --add-device --disk "${config_image_dir}/agentconfig.noarch.iso,device=cdrom,target.dev=${config_image_drive},target.bus=${CDROM_BUS}"
fi
fi
sudo virt-xml "${name}" --edit target=sda --disk="boot_order=1"
Expand All @@ -175,8 +180,8 @@ function attach_appliance_diskimage() {
# Attach the appliance disk image and the config ISO
sudo virt-xml "${name}" --remove-device --disk all
sudo virt-xml "${name}" --add-device --disk "${disk_image}",device=disk,target.dev=sda
sudo virt-xml "${name}" --add-device --disk "${config_image_dir}/agentconfig.noarch.iso,device=cdrom,target.dev=${config_image_drive}"
sudo virt-xml "${name}" --add-device --disk "${config_image_dir}/agentconfig.noarch.iso,device=cdrom,target.dev=${config_image_drive},target.bus=${CDROM_BUS}"

# Boot machine from the appliance disk image
sudo virt-xml "${name}" --edit target=sda --disk="boot_order=1" --start
done
Expand All @@ -190,12 +195,20 @@ function attach_agent_iso_no_registry() {
for (( n=0; n<${2}; n++ ))
do
name=${CLUSTER_NAME}_${1}_${n}
sudo virt-xml "${name}" --add-device --disk "${agent_iso_no_registry}",device=cdrom,target.dev=sdc
sudo virt-xml "${name}" --add-device --disk "${agent_iso_no_registry}",device=cdrom,target.dev=sdc,target.bus=${CDROM_BUS}
sudo virt-xml "${name}" --edit target=sda --disk="boot_order=1"
sudo virt-xml "${name}" --edit target=sdc --disk="boot_order=2" --start
done
}

function eject_agent_iso() {
for (( n=0; n<${2}; n++ ))
do
name=${CLUSTER_NAME}_${1}_${n}
sudo virsh change-media "${name}" sdc --eject --live
done
}

function automate_rendezvousIP_selection(){
for (( n=0; n<${2}; n++ ))
do
Expand Down Expand Up @@ -228,6 +241,69 @@ function check_assisted_install_UI(){
done
}

function wait_for_hosts_installed() {
local rendezvousIP
rendezvousIP=$(getRendezvousIP)
local base_url="http://$(wrap_if_ipv6 "${rendezvousIP}"):3001"
local clusters_url="${base_url}/api/assisted-install/v2/clusters"
local expected_hosts=$(( NUM_MASTERS + NUM_WORKERS + NUM_ARBITERS ))
local max_attempts=120
local sleep_seconds=30
local cluster_id=""

echo "aarch64: waiting for all ${expected_hosts} hosts to reach 'installed' status before ejecting CDROM..."

set +x
for (( attempt=1; attempt<=max_attempts; attempt++ )); do
if [[ -z "${cluster_id}" ]]; then
cluster_id=$(curl -s -f "${clusters_url}" 2>/dev/null | jq -r '.[0].id // empty' 2>/dev/null) || true
if [[ -z "${cluster_id}" ]]; then
echo " Attempt ${attempt}/${max_attempts}: assisted-service API not ready, retrying in ${sleep_seconds}s..."
sleep "${sleep_seconds}"
continue
fi
fi

local hosts_url="${clusters_url}/${cluster_id}/hosts"
local hosts_json
hosts_json=$(curl -s -f "${hosts_url}" 2>/dev/null) || true

if [[ -z "${hosts_json}" ]]; then
echo " Attempt ${attempt}/${max_attempts}: could not reach hosts endpoint, retrying in ${sleep_seconds}s..."
sleep "${sleep_seconds}"
continue
fi

local statuses
statuses=$(echo "${hosts_json}" | jq -r '[.[].status] | join(",")' 2>/dev/null) || true

local error_count
error_count=$(echo "${hosts_json}" | jq '[.[].status | select(. == "error" or . == "cancelled")] | length' 2>/dev/null) || true
if [[ "${error_count}" -gt 0 ]]; then
set -x
echo "ERROR: ${error_count} host(s) in error/cancelled state. Statuses: ${statuses}"
return 1
fi

local installed_count
installed_count=$(echo "${hosts_json}" | jq '[.[].status | select(. == "installed")] | length' 2>/dev/null) || true

echo " Attempt ${attempt}/${max_attempts}: ${installed_count:-0}/${expected_hosts} hosts installed (statuses: ${statuses:-unknown})"

if [[ "${installed_count}" -eq "${expected_hosts}" ]]; then
set -x
echo "aarch64: all ${expected_hosts} hosts have reached 'installed' status"
return 0
fi

sleep "${sleep_seconds}"
done

set -x
echo "ERROR: timed out after $((max_attempts * sleep_seconds / 60)) minutes waiting for hosts to reach 'installed' status"
return 1
}

function get_node0_ip() {
# shellcheck disable=SC2059
node0_name=$(printf "${MASTER_HOSTNAME_FORMAT}" 0)
Expand Down Expand Up @@ -545,6 +621,14 @@ case "${AGENT_E2E_TEST_BOOT_MODE}" in
attach_agent_iso worker "$NUM_WORKERS"
attach_agent_iso arbiter "$NUM_ARBITERS"

if [[ "${ARCH}" == "aarch64" ]]; then
wait_for_hosts_installed || echo "WARNING: proceeding with CDROM eject despite host status issues"
eject_agent_iso master "$NUM_MASTERS"
eject_agent_iso worker "$NUM_WORKERS"
eject_agent_iso arbiter "$NUM_ARBITERS"
echo "aarch64: CDROM media ejected from all VMs"
fi

;;

"PXE" )
Expand Down Expand Up @@ -619,6 +703,14 @@ case "${AGENT_E2E_TEST_BOOT_MODE}" in

echo "Waiting for 2 mins to arrive at agent-tui screen"
sleep 120

if [[ "${ARCH}" == "aarch64" ]]; then
eject_agent_iso master "$NUM_MASTERS"
eject_agent_iso worker "$NUM_WORKERS"
eject_agent_iso arbiter "$NUM_ARBITERS"
echo "aarch64: CDROM media ejected from all VMs"
fi

automate_rendezvousIP_selection master "$NUM_MASTERS"
automate_rendezvousIP_selection worker "$NUM_WORKERS"
automate_rendezvousIP_selection arbiter "$NUM_ARBITERS"
Expand Down
2 changes: 1 addition & 1 deletion agent/07_agent_add_extraworker_nodes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ case "${AGENT_E2E_TEST_BOOT_MODE}" in

for (( n=0; n < NUM_EXTRA_WORKERS; n++ ))
do
sudo virt-xml "${CLUSTER_NAME}_extraworker_${n}" --add-device --disk "$OCP_DIR/add-node//node.x86_64.iso,device=cdrom,target.dev=sdc"
sudo virt-xml "${CLUSTER_NAME}_extraworker_${n}" --add-device --disk "$OCP_DIR/add-node//node.$(uname -m).iso,device=cdrom,target.dev=sdc"
sudo virt-xml "${CLUSTER_NAME}_extraworker_${n}" --edit target=sda --disk="boot_order=1"
sudo virt-xml "${CLUSTER_NAME}_extraworker_${n}" --edit target=sdc --disk="boot_order=2" --start
done
Expand Down
2 changes: 1 addition & 1 deletion agent/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export EXTRA_MANIFESTS_PATH="${OCP_DIR}/openshift"
# in install-config.yaml, OR
# 3. ISCSI, to contain the iPXE file needed for iSCSI booting
export BOOT_SERVER_DIR=${WORKING_DIR}/boot-artifacts
export PXE_BOOT_FILE=agent.x86_64.ipxe
export PXE_BOOT_FILE=agent.$(uname -m).ipxe
# FIXME: agent/common.sh is sourced without network.sh
# wrap_if_ipv6 and PROVISIONING_HOST_EXTERNAL_IP are undefined
# errors masked by export which returns true
Expand Down
4 changes: 2 additions & 2 deletions agent/iscsi_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function agent_create_iscsi_network() {
<ip address="${ISCSI_NETWORK_SUBNET}.1" netmask="255.255.255.0">
<dhcp>
<range start='${ISCSI_NETWORK_SUBNET}.20' end='${ISCSI_NETWORK_SUBNET}.120'/>
<bootp file='http://${ISCSI_NETWORK_SUBNET}.1:8089/agent.x86_64-iscsi.ipxe'/>
<bootp file='http://${ISCSI_NETWORK_SUBNET}.1:8089/agent.$(uname -m)-iscsi.ipxe'/>
</dhcp>
</ip>
</network>
Expand Down Expand Up @@ -79,7 +79,7 @@ function agent_create_iscsi_pxe_file() {

# Set 'hostname' variable in file. It will be resolved by host during PXE boot
# in order to access a unique target for this host.
cat > "${boot_dir}/agent.x86_64-iscsi.ipxe" << EOF
cat > "${boot_dir}/agent.$(uname -m)-iscsi.ipxe" << EOF
#!ipxe
set initiator-iqn ${ISCSI_INITIATOR_BASE}:\${hostname}
sanboot --keep iscsi:${ISCSI_NETWORK_SUBNET}.1::::${ISCSI_INITIATOR_BASE}:\${hostname}
Expand Down
6 changes: 3 additions & 3 deletions agent/iso_no_registry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ function create_agent_iso_no_registry() {

# Deletes all files and directories under asset_dir
# example, ocp/ostest/iso_builder/4.19.*
# except the final generated ISO file (agent-ove.x86_64.iso),
# except the final generated ISO file (agent-ove.${ARCH}.iso),
# to free up disk space while preserving the built artifact.
# Note: This optional cleanup is relevant only when the
# AGENT_CLEANUP_ISO_BUILDER_CACHE_LOCAL_DEV is set as as true,
Expand All @@ -119,8 +119,8 @@ function cleanup_diskspace_agent_iso_noregistry() {

echo "Cleaning up directory: $dir"

# Delete all files and symlinks except the agent-ove.x86_64.iso
sudo find "$dir" \( -type f -o -type l \) ! -name 'agent-ove.x86_64.iso' -print -delete
# Delete all files and symlinks except the agent-ove ISO
sudo find "$dir" \( -type f -o -type l \) ! -name "agent-ove.${ARCH}.iso" -print -delete

# Remove any empty directories left behind
sudo find "$dir" -type d -empty -print -delete
Expand Down
2 changes: 1 addition & 1 deletion rhcos.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
if $OPENSHIFT_INSTALLER coreos print-stream-json >/dev/null 2>&1; then
$OPENSHIFT_INSTALLER coreos print-stream-json > "$OCP_DIR/rhcos.json"
TOP_LEVEL_FORMAT="$(jq -r '.architectures.x86_64.artifacts.openstack.formats | keys[]' "$OCP_DIR/rhcos.json" | head -n1)"
TOP_LEVEL_FORMAT="$(jq -r ".architectures.$(uname -m).artifacts.openstack.formats | keys[]" "$OCP_DIR/rhcos.json" | head -n1)"
MACHINE_OS_INSTALLER_IMAGE_URL=$(jq -r ".architectures.$(uname -m).artifacts.openstack.formats[\"${TOP_LEVEL_FORMAT}\"].disk.location" "$OCP_DIR/rhcos.json")
export MACHINE_OS_INSTALLER_IMAGE_URL
MACHINE_OS_INSTALLER_IMAGE_SHA256=$(jq -r ".architectures.$(uname -m).artifacts.openstack.formats[\"${TOP_LEVEL_FORMAT}\"].disk[\"sha256\"]" "$OCP_DIR/rhcos.json")
Expand Down