Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ To release a new version, please select a new version number (usually plus 1 to

Pending
+++++++
* `az aks create` and `az aks nodepool add`: Add `--enable-osdisk-full-caching` (preview) to enable the full-cache ephemeral OS disk feature for a node pool. Requires AFEC registration `Microsoft.ContainerService/FullCachePreview`. Property is immutable after node pool creation.

20.0.0b8
+++++++
Expand Down
16 changes: 16 additions & 0 deletions src/aks-preview/azext_aks_preview/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@
- name: --node-osdisk-type
type: string
short-summary: OS disk type to be used for machines in a given agent pool. Defaults to 'Ephemeral' when possible in conjunction with VM size and OS disk size. May not be changed for this pool after creation. ('Ephemeral' or 'Managed')
- name: --enable-osdisk-fc --enable-osdisk-full-caching
type: bool
short-summary: Enable the full-cache ephemeral OS disk feature for the default node pool.
long-summary: |-
When enabled, the entire operating system is cached on the local
ephemeral OS disk to mitigate E17 events caused by network failures.
Requires Ephemeral OS disk and a VM size with sufficient cache.
Comment thread
weiliu2dev marked this conversation as resolved.
This property is immutable after the node pool is created.
- name: --node-osdisk-diskencryptionset-id -d
type: string
short-summary: ResourceId of the disk encryption set to use for enabling encryption at rest on agent node os disk.
Expand Down Expand Up @@ -2106,6 +2114,14 @@
- name: --node-osdisk-type
type: string
short-summary: OS disk type to be used for machines in a given agent pool. Defaults to 'Ephemeral' when possible in conjunction with VM size and OS disk size. May not be changed for this pool after creation. ('Ephemeral' or 'Managed')
- name: --enable-osdisk-fc --enable-osdisk-full-caching
type: bool
short-summary: Enable the full-cache ephemeral OS disk feature for the node pool.
long-summary: |-
When enabled, the entire operating system is cached on the local
ephemeral OS disk to mitigate E17 events caused by network failures.
Requires Ephemeral OS disk and a VM size with sufficient cache.
Comment thread
weiliu2dev marked this conversation as resolved.
This property is immutable after the node pool is created.
- name: --max-pods -m
type: int
short-summary: The maximum number of pods deployable to a node.
Expand Down
15 changes: 15 additions & 0 deletions src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@
validate_force_upgrade_disable_and_enable_parameters,
validate_azure_service_mesh_revision,
validate_artifact_streaming,
validate_os_disk_full_caching,
validate_custom_endpoints,
validate_bootstrap_container_registry_resource_id,
validate_gateway_prefix_size,
Expand Down Expand Up @@ -899,6 +900,13 @@ def load_arguments(self, _):
)
c.argument("node_osdisk_type", arg_type=get_enum_type(node_os_disk_types))
c.argument("node_osdisk_size", type=int)
c.argument(
"enable_os_disk_full_caching",
options_list=["--enable-osdisk-full-caching", "--enable-osdisk-fc"],
action="store_true",
validator=validate_os_disk_full_caching,
is_preview=True,
)
c.argument("max_pods", type=int, options_list=["--max-pods", "-m"])
c.argument("vm_set_type", validator=validate_vm_set_type)
c.argument(
Expand Down Expand Up @@ -2067,6 +2075,13 @@ def load_arguments(self, _):
c.argument("node_taints", validator=validate_nodepool_taints)
c.argument("node_osdisk_type", arg_type=get_enum_type(node_os_disk_types))
c.argument("node_osdisk_size", type=int)
c.argument(
"enable_os_disk_full_caching",
options_list=["--enable-osdisk-full-caching", "--enable-osdisk-fc"],
action="store_true",
validator=validate_os_disk_full_caching,
is_preview=True,
)
# upgrade strategy
c.argument("upgrade_strategy", arg_type=get_enum_type(upgrade_strategies))
# rolling upgrade params
Expand Down
19 changes: 18 additions & 1 deletion src/aks-preview/azext_aks_preview/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
CONST_MANAGED_CLUSTER_SKU_TIER_STANDARD,
CONST_NETWORK_POD_IP_ALLOCATION_MODE_DYNAMIC_INDIVIDUAL,
CONST_NETWORK_POD_IP_ALLOCATION_MODE_STATIC_BLOCK,
CONST_NODEPOOL_MODE_GATEWAY, CONST_OS_SKU_AZURELINUX,
CONST_NODEPOOL_MODE_GATEWAY, CONST_OS_DISK_TYPE_MANAGED,
CONST_OS_SKU_AZURELINUX,
CONST_OS_SKU_CBLMARINER, CONST_OS_SKU_MARINER)
from azext_aks_preview._helpers import _fuzzy_match
from azure.cli.core import keys
Expand Down Expand Up @@ -953,6 +954,22 @@ def validate_asm_egress_name(namespace):
)


def validate_os_disk_full_caching(namespace):
"""Reject --enable-osdisk-full-caching when OS disk type is explicitly Managed.

Full-cache OS disk requires Ephemeral storage; failing fast at the CLI gives
a clearer error than waiting for an ARM round-trip.
"""
if not getattr(namespace, "enable_os_disk_full_caching", False):
return
node_osdisk_type = getattr(namespace, "node_osdisk_type", None)
if node_osdisk_type == CONST_OS_DISK_TYPE_MANAGED:
raise ArgumentUsageError(
"--enable-osdisk-full-caching requires Ephemeral OS disk; "
"it cannot be used with --node-osdisk-type Managed."
)


def validate_artifact_streaming(namespace):
"""Validates artifact streaming flags for mutual exclusivity and OS support."""
enable_artifact_streaming = getattr(namespace, "enable_artifact_streaming", False)
Expand Down
23 changes: 23 additions & 0 deletions src/aks-preview/azext_aks_preview/agentpool_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,19 @@ def get_enable_artifact_streaming(self) -> bool:
)
return enable_artifact_streaming

def get_enable_os_disk_full_caching(self) -> bool:
"""Obtain the value of enable_os_disk_full_caching.
:return: bool
"""
enable_os_disk_full_caching = self.raw_param.get("enable_os_disk_full_caching")
if self.decorator_mode == DecoratorMode.CREATE:
if (
self.agentpool and
self.agentpool.enable_os_disk_full_caching is not None
):
enable_os_disk_full_caching = self.agentpool.enable_os_disk_full_caching
return enable_os_disk_full_caching

def get_enable_managed_gpu(self) -> Union[bool, None]:
"""Obtain the value of enable_managed_gpu.
:return: bool
Expand Down Expand Up @@ -1327,6 +1340,14 @@ def set_up_artifact_streaming(self, agentpool: AgentPool) -> AgentPool:
agentpool.artifact_streaming_profile.enabled = True
return agentpool

def set_up_os_disk_full_caching(self, agentpool: AgentPool) -> AgentPool:
"""Set up enable_os_disk_full_caching property for the AgentPool object."""
self._ensure_agentpool(agentpool)

if self.context.get_enable_os_disk_full_caching():
agentpool.enable_os_disk_full_caching = True
return agentpool

def set_up_managed_gpu(self, agentpool: AgentPool) -> AgentPool:
"""Set up managed GPU property for the AgentPool object."""
self._ensure_agentpool(agentpool)
Expand Down Expand Up @@ -1606,6 +1627,8 @@ def construct_agentpool_profile_preview(self) -> AgentPool:
agentpool = self.set_up_init_taints(agentpool)
# set up artifact streaming
agentpool = self.set_up_artifact_streaming(agentpool)
# set up os disk full caching
agentpool = self.set_up_os_disk_full_caching(agentpool)
# set up managed gpu
agentpool = self.set_up_managed_gpu(agentpool)
# set up skip_gpu_driver_install
Expand Down
2 changes: 2 additions & 0 deletions src/aks-preview/azext_aks_preview/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,7 @@ def aks_create(
nodepool_initialization_taints=None,
node_osdisk_type=None,
node_osdisk_size=0,
enable_os_disk_full_caching=False,
vm_set_type=None,
zones=None,
ppg=None,
Expand Down Expand Up @@ -1936,6 +1937,7 @@ def aks_agentpool_add(
node_taints=None,
node_osdisk_type=None,
node_osdisk_size=0,
enable_os_disk_full_caching=False,
max_surge=None,
drain_timeout=None,
node_soak_duration=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,61 @@ def common_get_enable_artifact_streaming(self):
ctx_2.attach_agentpool(agentpool_2)
self.assertEqual(ctx_2.get_enable_artifact_streaming(), None)

def common_get_enable_os_disk_full_caching(self):
# default: store_true flag not provided -> raw is False
ctx_1 = AKSPreviewAgentPoolContext(
self.cmd,
AKSAgentPoolParamDict({"enable_os_disk_full_caching": False}),
self.models,
DecoratorMode.CREATE,
self.agentpool_decorator_mode,
)
self.assertEqual(ctx_1.get_enable_os_disk_full_caching(), False)
# CREATE: value on attached agentpool overrides default False
agentpool_1 = self.create_initialized_agentpool_instance(
enable_os_disk_full_caching=True
)
ctx_1.attach_agentpool(agentpool_1)
self.assertEqual(ctx_1.get_enable_os_disk_full_caching(), True)

# explicit True from raw param, no attached agentpool
ctx_2 = AKSPreviewAgentPoolContext(
self.cmd,
AKSAgentPoolParamDict({"enable_os_disk_full_caching": True}),
self.models,
DecoratorMode.CREATE,
self.agentpool_decorator_mode,
)
self.assertEqual(ctx_2.get_enable_os_disk_full_caching(), True)

# priority: raw True is overridden by attached agentpool False in CREATE mode
ctx_3 = AKSPreviewAgentPoolContext(
self.cmd,
AKSAgentPoolParamDict({"enable_os_disk_full_caching": True}),
self.models,
DecoratorMode.CREATE,
self.agentpool_decorator_mode,
)
agentpool_3 = self.create_initialized_agentpool_instance(
enable_os_disk_full_caching=False
)
ctx_3.attach_agentpool(agentpool_3)
self.assertEqual(ctx_3.get_enable_os_disk_full_caching(), False)

# UPDATE mode gate: attached agentpool MUST NOT override raw_param
ctx_4 = AKSPreviewAgentPoolContext(
self.cmd,
AKSAgentPoolParamDict({"enable_os_disk_full_caching": False}),
self.models,
DecoratorMode.UPDATE,
self.agentpool_decorator_mode,
)
agentpool_4 = self.create_initialized_agentpool_instance(
enable_os_disk_full_caching=True
)
ctx_4.attach_agentpool(agentpool_4)
self.assertEqual(ctx_4.get_enable_os_disk_full_caching(), False)

def common_get_disable_artifact_streaming(self):
# default
ctx_1 = AKSPreviewAgentPoolContext(
Expand Down Expand Up @@ -1155,6 +1210,9 @@ def test_get_workload_runtime(self):
def test_get_enable_artifact_streaming(self):
self.common_get_enable_artifact_streaming()

def test_get_enable_os_disk_full_caching(self):
self.common_get_enable_os_disk_full_caching()

def test_get_disable_artifact_streaming(self):
self.common_get_disable_artifact_streaming()

Expand Down Expand Up @@ -1257,7 +1315,10 @@ def test_get_workload_runtime(self):

def test_get_enable_artifact_streaming(self):
self.common_get_enable_artifact_streaming()


def test_get_enable_os_disk_full_caching(self):
self.common_get_enable_os_disk_full_caching()

def test_get_enable_managed_gpu(self):
self.common_get_enable_managed_gpu()

Expand Down Expand Up @@ -1583,6 +1644,41 @@ def common_set_up_artifact_streaming(self):
)
self.assertEqual(dec_agentpool_1, ground_truth_agentpool_1)

def common_set_up_os_disk_full_caching(self):
# default: store_true flag not provided -> raw is False -> field stays unset
dec_default = AKSPreviewAgentPoolAddDecorator(
self.cmd,
self.client,
{"enable_os_disk_full_caching": False},
self.resource_type,
self.agentpool_decorator_mode,
)
with self.assertRaises(CLIInternalError):
dec_default.set_up_os_disk_full_caching(None)
agentpool_default = self.create_initialized_agentpool_instance(restore_defaults=False)
dec_default.context.attach_agentpool(agentpool_default)
dec_agentpool_default = dec_default.set_up_os_disk_full_caching(agentpool_default)
dec_agentpool_default = self._restore_defaults_in_agentpool(dec_agentpool_default)
ground_truth_default = self.create_initialized_agentpool_instance()
self.assertEqual(dec_agentpool_default, ground_truth_default)

# explicit True -> field set to True
dec_true = AKSPreviewAgentPoolAddDecorator(
self.cmd,
self.client,
{"enable_os_disk_full_caching": True},
self.resource_type,
self.agentpool_decorator_mode,
)
agentpool_true = self.create_initialized_agentpool_instance(restore_defaults=False)
dec_true.context.attach_agentpool(agentpool_true)
dec_agentpool_true = dec_true.set_up_os_disk_full_caching(agentpool_true)
dec_agentpool_true = self._restore_defaults_in_agentpool(dec_agentpool_true)
ground_truth_true = self.create_initialized_agentpool_instance(
enable_os_disk_full_caching=True
)
self.assertEqual(dec_agentpool_true, ground_truth_true)

def common_set_up_managed_gpu(self):
dec_1 = AKSPreviewAgentPoolAddDecorator(
self.cmd,
Expand Down Expand Up @@ -2157,6 +2253,9 @@ def test_set_up_gpu_propertes(self):
def test_set_up_artifact_streaming(self):
self.common_set_up_artifact_streaming()

def test_set_up_os_disk_full_caching(self):
self.common_set_up_os_disk_full_caching()

def test_set_up_managed_gpu(self):
self.common_set_up_managed_gpu()

Expand Down Expand Up @@ -2305,7 +2404,10 @@ def test_set_up_gpu_propertes(self):

def test_set_up_artifact_streaming(self):
self.common_set_up_artifact_streaming()


def test_set_up_os_disk_full_caching(self):
self.common_set_up_os_disk_full_caching()

def test_set_up_managed_gpu(self):
self.common_set_up_managed_gpu()

Expand Down
Loading
Loading