Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions isvctl/configs/providers/aws/config/eks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,22 @@ commands:
TF_VAR_test_pool_labels_json: '{"isv.ncp.validation/workload":"gpu-compute"}'
TF_VAR_test_pool_node_type: "gpu"

# Throwaway pool for the delete leg (K8S06-03): created here, removed in
# the test phase. CPU-only with its own state file so no other check
# depends on it.
- name: create_test_delete_node_pool
phase: setup
command: "../scripts/eks/create_node_pool.sh"
output_schema: node_pool
timeout: 900 # 15 min - EKS node group join can take 5-8 min
env:
TF_AUTO_APPROVE: "true"
NODE_POOL_STATE_FILE: "terraform-delete.tfstate"
TF_VAR_test_pool_name: "isv-test-delete-pool"
TF_VAR_test_pool_instance_types: '["m6i.large"]'
TF_VAR_test_pool_desired_size: "1"
TF_VAR_test_pool_node_type: "cpu"

# Scale the CPU pool to a new target count. Re-applies the
# terraform-node-pool module with a bumped desired_size; create_node_pool.sh
# is reused because `terraform apply` is idempotent. The emitted
Expand All @@ -134,6 +150,16 @@ commands:
TF_VAR_test_pool_taints_json: '[{"key":"isv.ncp.validation/dedicated","value":"test","effect":"NoSchedule"}]'
TF_VAR_test_pool_node_type: "cpu"

# Delete the throwaway pool (K8S06-03). In the test phase so the matching
# K8sNodePoolCheck can assert it reaches zero nodes after all test steps.
- name: delete_test_node_pool
phase: test
command: "../scripts/eks/destroy_node_pool.sh"
timeout: 900
env:
TF_AUTO_APPROVE: "true"
NODE_POOL_STATE_FILE: "terraform-delete.tfstate"

# Teardown order: destroy both test node pools before tearing down the
# cluster so their ENIs/instances are freed before the VPC comes down.
# Each destroy targets its pool's state file (matching create).
Expand Down Expand Up @@ -163,6 +189,16 @@ commands:
TF_AUTO_APPROVE: "true"
SHARED_VPC_CLUSTER_STATE_FILE: "terraform.tfstate"

# Safety net in case the test phase (which normally deletes it) didn't
# run; idempotent when the state is already empty.
- name: destroy_test_delete_node_pool
phase: teardown
command: "../scripts/eks/destroy_node_pool.sh"
timeout: 900
env:
TF_AUTO_APPROVE: "true"
NODE_POOL_STATE_FILE: "terraform-delete.tfstate"

- name: teardown
phase: teardown
command: "../scripts/eks/teardown.sh"
Expand Down
14 changes: 13 additions & 1 deletion isvctl/configs/providers/aws/scripts/eks/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ EFS_SC=$(kubectl get sc -o json 2>/dev/null \

STATIC_VOLUME_HANDLE=""
STATIC_DRIVER_NAME=""
STATIC_VOLUME_AZ=""
if [ -n "$BLOCK_SC" ]; then
NODE_AZ=$(kubectl get nodes -l nvidia.com/gpu.present=true \
-o jsonpath='{.items[0].metadata.labels.topology\.kubernetes\.io/zone}' 2>/dev/null || echo "")
Expand Down Expand Up @@ -449,6 +450,16 @@ if [ -n "$BLOCK_SC" ]; then

if [ -n "$STATIC_VOLUME_HANDLE" ]; then
STATIC_DRIVER_NAME="ebs.csi.aws.com"
# EBS volumes are zonal; the static PV must pin its consumer pod to
# the volume's AZ or the attach hangs cross-zone. Read the actual AZ
# (covers both the freshly-created and reused-volume paths).
STATIC_VOLUME_AZ=$(aws ec2 describe-volumes \
--volume-ids "$STATIC_VOLUME_HANDLE" \
--region "$AWS_REGION" \
--query 'Volumes[0].AvailabilityZone' --output text 2>/dev/null || echo "")
if [ "$STATIC_VOLUME_AZ" = "None" ]; then
STATIC_VOLUME_AZ=""
fi
fi
else
echo "Warning: could not determine worker-node AZ; skipping standalone EBS volume creation" >&2
Expand Down Expand Up @@ -490,7 +501,8 @@ cat << EOF
"shared_fs_storage_class": "${EFS_SC}",
"nfs_storage_class": "${EFS_SC}",
"static_volume_handle": "${STATIC_VOLUME_HANDLE}",
"static_driver_name": "${STATIC_DRIVER_NAME}"
"static_driver_name": "${STATIC_DRIVER_NAME}",
"static_volume_az": "${STATIC_VOLUME_AZ}"
},
"aws": {
"region": "${AWS_REGION}",
Expand Down
3 changes: 3 additions & 0 deletions isvctl/configs/suites/k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ tests:
static_pv:
volume_handle: "{{ steps.setup.csi.static_volume_handle | default('', true) }}"
csi_driver: "{{ steps.setup.csi.static_driver_name | default('', true) }}"
# Zonal block volumes (EBS/PD/Disk) must pin their consumer pod to
# the volume's AZ; empty for zone-agnostic backends.
zone: "{{ steps.setup.csi.static_volume_az | default('', true) }}"
fs_type: "ext4"
capacity: "1Gi"
access_mode: "ReadWriteOnce"
Expand Down
62 changes: 61 additions & 1 deletion isvtest/src/isvtest/validations/k8s_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,11 @@ class K8sCsiProvisioningModesCheck(BaseValidation):
static_pv.capacity: PV ``spec.capacity.storage`` and the matching
PVC request size (default: ``1Gi``).
static_pv.access_mode: PV / PVC access mode (default: ``ReadWriteOnce``).
static_pv.zone: ``topology.kubernetes.io/zone`` the backing volume
lives in. Set for zonal block backends (AWS EBS, GCE PD, Azure
Disk) so the consumer pod is scheduled in the volume's zone and
the attach does not hang cross-zone. Unset for zone-agnostic
backends (e.g. EFS).
bind_timeout_s: Max wait for PVC Bind and mount-pod Ready
(default: 180).
namespace_prefix: Prefix for the ephemeral namespace
Expand Down Expand Up @@ -1561,6 +1566,7 @@ def run(self) -> None:
capacity=str(static_pv_cfg.get("capacity") or "1Gi"),
access_mode=str(static_pv_cfg.get("access_mode") or "ReadWriteOnce"),
bind_timeout=bind_timeout,
zone=str(static_pv_cfg.get("zone") or ""),
)
if not static_ok:
any_failed = True
Expand Down Expand Up @@ -1685,6 +1691,7 @@ def _run_static(
capacity: str,
access_mode: str,
bind_timeout: int,
zone: str = "",
) -> bool:
"""Run the ``static`` subtest: pre-create PV + PVC → Bound → mount + canary."""
pvc_name = f"csi-prov-static-{uuid.uuid4().hex[:6]}"
Expand All @@ -1698,6 +1705,7 @@ def _run_static(
capacity=capacity,
access_mode=access_mode,
claim_name=pvc_name,
zone=zone,
)
if returncode != 0:
self.report_subtest(
Expand Down Expand Up @@ -1778,6 +1786,7 @@ def _apply_pv(
capacity: str,
access_mode: str,
claim_name: str,
zone: str = "",
) -> tuple[int, str]:
"""Render the static PV manifest and apply it."""

Expand All @@ -1792,6 +1801,7 @@ def _mutate(doc: dict[str, Any]) -> dict[str, Any]:
access_mode=access_mode,
claim_namespace=self._namespace,
claim_name=claim_name,
zone=zone,
)

return self._run_kubectl_apply(render_k8s_manifest(_PV_MANIFEST, _mutate))
Expand Down Expand Up @@ -1895,12 +1905,19 @@ def _set_pv_fields(
access_mode: str,
claim_namespace: str,
claim_name: str,
zone: str = "",
) -> dict[str, Any]:
"""Mutate a parsed PersistentVolume manifest in place with the requested fields.

``claimRef`` pre-reserves the PV for the matching PVC so the binding is
deterministic and cannot race against another claim landing in the same
cluster while the static probe runs.

When ``zone`` is set, a ``spec.nodeAffinity`` on
``topology.kubernetes.io/zone`` pins the volume to that zone. Zonal block
backends (AWS EBS, GCE PD, Azure Disk) can only attach to a node in the
volume's own zone; without this the scheduler may place the consumer pod
in a different zone and the attach hangs until the mount timeout.
"""
metadata = doc.setdefault("metadata", {})
metadata["name"] = name
Expand All @@ -1919,6 +1936,24 @@ def _set_pv_fields(
"namespace": claim_namespace,
"name": claim_name,
}
if zone:
spec["nodeAffinity"] = {
"required": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "topology.kubernetes.io/zone",
"operator": "In",
"values": [zone],
}
]
}
]
}
}
else:
spec.pop("nodeAffinity", None)
return doc


Expand All @@ -1929,12 +1964,37 @@ def _set_mount_pod_fields(
name: str,
pvc_name: str,
) -> dict[str, Any]:
"""Mutate a parsed mount-pod manifest in place, binding its single PVC volume."""
"""Mutate a parsed mount-pod manifest in place, binding its single PVC volume.

The pod is kept off transient test-provisioning node pools (nodes carrying
the ``isv.ncp.validation/pool`` marker). Those pools are created/scaled/
deleted within the same run by node-pool CRUD checks, so a freshly joined
node may not yet have the CSI node-plugin DaemonSet pod running - a probe
pod landing there hangs at mount until the bind timeout. Baseline cluster
nodes never carry the marker, so this is a no-op for providers that do not
provision test pools (single-node k3s/minikube/microk8s included).
"""
metadata = doc.setdefault("metadata", {})
metadata["name"] = name
metadata["namespace"] = namespace

spec = doc.setdefault("spec", {})
spec["affinity"] = {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "isv.ncp.validation/pool",
"operator": "DoesNotExist",
}
]
}
]
}
}
}
volumes = spec.setdefault("volumes", [])
if not volumes:
volumes.append({"name": "data"})
Expand Down
38 changes: 38 additions & 0 deletions isvtest/tests/test_k8s_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,35 @@ def test_missing_sections_are_created(self) -> None:
assert out["spec"]["csi"]["volumeHandle"] == "vh"
assert out["spec"]["claimRef"]["name"] == "pvc"

def test_no_zone_omits_node_affinity(self) -> None:
"""A zone-agnostic backend (e.g. EFS) must not pin the PV to a zone."""
out = _set_pv_fields(self._base_doc(), **self._args())
assert "nodeAffinity" not in out["spec"]

def test_zone_sets_topology_node_affinity(self) -> None:
"""A zonal block backend pins the PV to its volume's AZ."""
out = _set_pv_fields(self._base_doc(), zone="us-west-2a", **self._args())
terms = out["spec"]["nodeAffinity"]["required"]["nodeSelectorTerms"]
expr = terms[0]["matchExpressions"][0]
assert expr == {
"key": "topology.kubernetes.io/zone",
"operator": "In",
"values": ["us-west-2a"],
}

def _args(self) -> dict[str, Any]:
"""Common required kwargs for ``_set_pv_fields``."""
return {
"name": "pv-z",
"driver": "ebs.csi.aws.com",
"volume_handle": "vol-1",
"fs_type": "ext4",
"capacity": "1Gi",
"access_mode": "ReadWriteOnce",
"claim_namespace": "ns",
"claim_name": "pvc",
}


class TestSetMountPodFields:
"""Tests for ``_set_mount_pod_fields`` - the BusyBox mount-pod mutator."""
Expand Down Expand Up @@ -1943,6 +1972,15 @@ def test_sets_name_namespace_and_pvc(self) -> None:
# only touches volumes so the mount path stays /data as documented.
assert out["spec"]["containers"][0]["volumeMounts"][0]["mountPath"] == "/data"

def test_excludes_test_pool_nodes(self) -> None:
"""Probe pods must avoid transient test-pool nodes via node anti-affinity."""
out = _set_mount_pod_fields(self._base_doc(), namespace="ns1", name="probe-1", pvc_name="pvc-1")
terms = out["spec"]["affinity"]["nodeAffinity"]["requiredDuringSchedulingIgnoredDuringExecution"][
"nodeSelectorTerms"
]
expr = terms[0]["matchExpressions"][0]
assert expr == {"key": "isv.ncp.validation/pool", "operator": "DoesNotExist"}


class TestK8sCsiProvisioningModesCheck:
"""Tests for ``K8sCsiProvisioningModesCheck``."""
Expand Down