diff --git a/.gitignore b/.gitignore
index 6603f927d..ee7e248e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,9 @@ dist
 .ruff_cache
 .env
 .tox
+
+# Ignore charts directory
+simplyblock_core/scripts/charts/charts/
+
+# Ignore Helm requirements lock file
+simplyblock_core/scripts/charts/requirements.lock
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..37d1834ca
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023-2025 simplyblock GmbH
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/docker/Dockerfile b/docker/Dockerfile
index ce1a83ae1..c8999b47d 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,12 +1,29 @@
 # syntax=docker/dockerfile:1
 FROM simplyblock/simplyblock:base_image
 
+LABEL name="simplyblock"
+LABEL vendor="Simplyblock"
+LABEL version="1.0.0"
+LABEL release="1"
+LABEL summary="Simplyblock controlplane plane component"
+LABEL description="Simplyblock controlplane plane container"
+LABEL maintainer="developers@simplyblock.io"
+
+COPY LICENSE /licenses/LICENSE
+
 WORKDIR /app
 
 COPY requirements.txt .
 
-RUN pip3 install -r requirements.txt
+RUN pip3 install --no-cache-dir -r requirements.txt
+
 
 COPY . /app
 
 RUN python setup.py install
+
+RUN if [ -d /usr/share/terminfo ]; then \
+       find /usr/share/terminfo -lname '*ncr260vt300wpp*' -exec rm -f {} + ; \
+       rm -f /usr/share/terminfo/n/ncr260vt300wpp || true ; \
+    fi
+
diff --git a/docker/Dockerfile_base b/docker/Dockerfile_base
index 226188c96..735d331b1 100644
--- a/docker/Dockerfile_base
+++ b/docker/Dockerfile_base
@@ -38,3 +38,4 @@ RUN pip3 install setuptools --upgrade
 COPY requirements.txt requirements.txt
 
 RUN pip3 install -r requirements.txt
+
diff --git a/docs/talos.md b/docs/talos.md
index 47ff817d5..f1406ef38 100644
--- a/docs/talos.md
+++ b/docs/talos.md
@@ -19,26 +19,12 @@ kubectl label namespace simplyblock \
   --overwrite
 ```
 
-
-Patch the host machine so that OpenEBS could work
-
 Create a machine config patch with the contents below and save as patch.yaml
 ```
 cat > patch.yaml <<'EOF'
 machine:
   sysctls:
     vm.nr_hugepages: "1024"
-  nodeLabels:
-    openebs.io/engine: mayastor
-  kubelet:
-    extraMounts:
-      - destination: /var/openebs/local
-        type: bind
-        source: /var/openebs/local
-        options:
-          - rbind
-          - rshared
-          - rw
 EOF
 
 talosctl -e <endpoint ip/hostname> -n <node ip/hostname> patch mc -p @patch.yaml
diff --git a/e2e/__init__.py b/e2e/__init__.py
index e8cae33f7..31164238e 100644
--- a/e2e/__init__.py
+++ b/e2e/__init__.py
@@ -55,6 +55,7 @@
 from stress_test.continuous_failover_ha_geomtery import RandomMultiGeometryFailoverTest
 from stress_test.continuous_failover_ha_2node import RandomMultiClient2NodeFailoverTest
 from stress_test.continuous_failover_ha_rdma import RandomRDMAFailoverTest
+from stress_test.continuous_failover_ha_multi_client_quick_outage import RandomRapidFailoverNoGap
 
 
 from e2e_tests.upgrade_tests.major_upgrade import TestMajorUpgrade
@@ -96,8 +97,8 @@ def get_all_tests(custom=True, ha_test=False):
         TestLvolFioNpcs0,
         TestLvolFioNpcs1,
         TestLvolFioNpcs2,
-        TestLvolFioQOSBW,
-        TestLvolFioQOSIOPS,
+        # TestLvolFioQOSBW,
+        # TestLvolFioQOSIOPS,
         TestSingleNodeOutage,
         # TestSingleNodeReboot,
         # TestHASingleNodeReboot,
@@ -147,6 +148,7 @@ def get_stress_tests():
         RandomMultiGeometryFailoverTest,
         RandomMultiClient2NodeFailoverTest,
         RandomRDMAFailoverTest,
+        RandomRapidFailoverNoGap,
     ]
     return tests
 
@@ -161,4 +163,4 @@ def get_load_tests():
     tests = [
         TestLvolOutageLoadTest
     ]
-    return tests
\ No newline at end of file
+    return tests
diff --git a/e2e/continuous_log_collector.py b/e2e/continuous_log_collector.py
index 48f06fd80..d1ea68c38 100644
--- a/e2e/continuous_log_collector.py
+++ b/e2e/continuous_log_collector.py
@@ -1,6 +1,5 @@
 import os
 from datetime import datetime
-from pathlib import Path
 from utils.ssh_utils import SshUtils, RunnerK8sLog
 from logger_config import setup_logger
 
@@ -22,7 +21,7 @@ def __init__(self,docker_logs_path=None):
 
     def get_log_directory(self):
         timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-        return os.path.join(Path.home(), "container-logs", f"manual-logs-{timestamp}")
+        return os.path.join('/mnt/nfs_share/', f"snapshot-repliction-from-replicated-clone-{timestamp}")
 
     def collect_logs(self, test_name):
         all_nodes = set()
@@ -75,4 +74,4 @@ def collect_logs(self, test_name):
 
 if __name__ == "__main__":
     collector = ContinuousLogCollector()
-    collector.collect_logs(test_name="Manual")
+    collector.collect_logs(test_name="snapshot-repliction-from-replicated-clone")
diff --git a/e2e/e2e_tests/cluster_test_base.py b/e2e/e2e_tests/cluster_test_base.py
index 5077544b0..d37222c88 100644
--- a/e2e/e2e_tests/cluster_test_base.py
+++ b/e2e/e2e_tests/cluster_test_base.py
@@ -401,13 +401,17 @@ def collect_management_details(self, post_teardown=False):
             cmd = f"{self.base_cmd} sn check {result['uuid']} >& {base_path}/node{node}_check{suffix}.txt"
             self.ssh_obj.exec_command(self.mgmt_nodes[0], cmd)
 
+            cmd = f"{self.base_cmd} sn get {result['uuid']} >& {base_path}/node{node}_get{suffix}.txt"
+            self.ssh_obj.exec_command(self.mgmt_nodes[0], cmd)
+
             node+=1
-        for node in self.fio_node:
+        all_nodes = self.storage_nodes + self.mgmt_nodes + self.client_machines
+        for node in all_nodes:
             base_path = os.path.join(self.docker_logs_path, node)
-            cmd = f"journalctl -k >& {base_path}/jounalctl_{node}.txt"
+            cmd = f"journalctl -k --no-tail >& {base_path}/jounalctl_{node}-final.txt"
 
             self.ssh_obj.exec_command(node, cmd)
-            cmd = f"dmesg -T >& {base_path}/dmesg_{node}.txt"
+            cmd = f"dmesg -T >& {base_path}/dmesg_{node}-final.txt"
             self.ssh_obj.exec_command(node, cmd)
             
     def teardown(self, delete_lvols=True, close_ssh=True):
diff --git a/e2e/e2e_tests/single_node_multi_fio_perf.py b/e2e/e2e_tests/single_node_multi_fio_perf.py
index 86a75c4d5..681cc1742 100644
--- a/e2e/e2e_tests/single_node_multi_fio_perf.py
+++ b/e2e/e2e_tests/single_node_multi_fio_perf.py
@@ -187,10 +187,11 @@ def cleanup_lvols(self, lvol_configs):
         self.logger.info("Starting cleanup of LVOLs")
         for config in lvol_configs:
             lvol_name = config['lvol_name']
-            self.ssh_obj.unmount_path(node=self.client_machines[0],
-                                      device=self.lvol_devices[lvol_name]['MountPath'])
-            self.ssh_obj.remove_dir(node=self.client_machines[0], 
-                                    dir_path=self.lvol_devices[lvol_name]['MountPath'])
+            if config['mount']:
+                self.ssh_obj.unmount_path(node=self.client_machines[0],
+                                          device=self.lvol_devices[lvol_name]['MountPath'])
+                self.ssh_obj.remove_dir(node=self.client_machines[0], 
+                                        dir_path=self.lvol_devices[lvol_name]['MountPath'])
             lvol_id = self.sbcli_utils.get_lvol_id(lvol_name=lvol_name)
             subsystems = self.ssh_obj.get_nvme_subsystems(node=self.client_machines[0], 
                                                           nqn_filter=lvol_id)
diff --git a/e2e/stress_test/continuous_failover_ha_multi_client.py b/e2e/stress_test/continuous_failover_ha_multi_client.py
index a2869482d..0f0c9f94e 100644
--- a/e2e/stress_test/continuous_failover_ha_multi_client.py
+++ b/e2e/stress_test/continuous_failover_ha_multi_client.py
@@ -42,6 +42,7 @@ def __init__(self, **kwargs):
         self.sn_nodes = []
         self.current_outage_node = None
         self.snapshot_names = []
+        self.current_outage_nodes = []
         self.disconnect_thread = None
         self.outage_start_time = None
         self.outage_end_time = None
@@ -60,8 +61,7 @@ def __init__(self, **kwargs):
         # self.outage_types = ["graceful_shutdown", "container_stop", "interface_full_network_interrupt",
         #                      "interface_partial_network_interrupt",
         #                      "partial_nw"]
-        self.outage_types = ["graceful_shutdown", "container_stop", "interface_full_network_interrupt",
-                             "interface_partial_network_interrupt"]
+        self.outage_types = ["graceful_shutdown", "container_stop", "interface_full_network_interrupt"]
         # self.outage_types = ["partial_nw"]
         self.blocked_ports = None
         self.outage_log_file = os.path.join("logs", f"outage_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
@@ -111,7 +111,26 @@ def create_lvols_with_fio(self, count):
                 lvol_name = f"{self.lvol_name}_{i}" if not is_crypto else f"c{self.lvol_name}_{i}"
             self.logger.info(f"Creating lvol with Name: {lvol_name}, fs type: {fs_type}, crypto: {is_crypto}")
             try:
-                if self.current_outage_node:
+                self.logger.info(f"Current Outage Node: {self.current_outage_nodes}")
+                if self.current_outage_nodes:
+                    self.logger.info(f"Primary vs secondary: {self.sn_primary_secondary_map}")
+                    skip_nodes = [node for node in self.sn_primary_secondary_map if self.sn_primary_secondary_map[node] in self.current_outage_nodes]
+                    self.logger.info(f"Skip Nodes: {skip_nodes}")
+                    for node in self.current_outage_nodes:
+                        skip_nodes.append(node)
+                    self.logger.info(f"Skip Nodes: {skip_nodes}")
+                    self.logger.info(f"Storage Nodes with sec: {self.sn_nodes_with_sec}")
+                    host_id = [node for node in self.sn_nodes_with_sec if node not in skip_nodes]
+                    self.sbcli_utils.add_lvol(
+                        lvol_name=lvol_name,
+                        pool_name=self.pool_name,
+                        size=self.lvol_size,
+                        crypto=is_crypto,
+                        key1=self.lvol_crypt_keys[0],
+                        key2=self.lvol_crypt_keys[1],
+                        host_id=host_id[0]
+                    )
+                elif self.current_outage_node:
                     skip_nodes = [node for node in self.sn_primary_secondary_map if self.sn_primary_secondary_map[node] == self.current_outage_node]
                     skip_nodes.append(self.current_outage_node)
                     skip_nodes.append(self.sn_primary_secondary_map[self.current_outage_node])
@@ -276,7 +295,7 @@ def create_lvols_with_fio(self, count):
                     "iodepth": 1,
                     "numjobs": 5,
                     "time_based": True,
-                    "runtime": 2000,
+                    "runtime": 3000,
                     "log_avg_msec": 1000,
                     "iolog_file": self.lvol_mount_details[lvol_name]["iolog_base_path"],
                 },
@@ -306,11 +325,11 @@ def perform_random_outage(self):
         node_ip = node_details[0]["mgmt_ip"]
         node_rpc_port = node_details[0]["rpc_port"]
 
-        sleep_n_sec(120)
+        sleep_n_sec(5)
         for node in self.sn_nodes_with_sec:
-            self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
-                                      storage_node_id=node)
-        
+            # self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
+            #                          storage_node_id=node)
+            self.logger.info("Skipping lvstore dump!!")
         for node in self.sn_nodes_with_sec:
             cur_node_details = self.sbcli_utils.get_storage_node_details(node)
             cur_node_ip = cur_node_details[0]["mgmt_ip"]
@@ -417,7 +436,7 @@ def perform_random_outage(self):
             
             self.disconnect_thread = threading.Thread(
                 target=self.ssh_obj.disconnect_all_active_interfaces,
-                args=(node_ip, active_interfaces, 600),
+                args=(node_ip, active_interfaces, 300),
             )
             self.disconnect_thread.start()
         elif outage_type == "interface_partial_network_interrupt":
@@ -430,7 +449,7 @@ def perform_random_outage(self):
             
             self.disconnect_thread = threading.Thread(
                 target=self.ssh_obj.disconnect_all_active_interfaces,
-                args=(node_ip, active_interfaces, 600),
+                args=(node_ip, active_interfaces, 300),
             )
             self.disconnect_thread.start()
         elif outage_type == "partial_nw":
@@ -478,12 +497,12 @@ def perform_random_outage(self):
                 self.ssh_obj.disconnect_lvol_node_device(node=self.lvol_mount_details[lvol]["Client"], device=self.lvol_mount_details[lvol]["Device"])
             
         if outage_type != "partial_nw" or outage_type != "partial_nw_single_port":
-            sleep_n_sec(120)
+            sleep_n_sec(10)
         
         return outage_type
     
     
-    def restart_nodes_after_failover(self, outage_type):
+    def restart_nodes_after_failover(self, outage_type, restart=False):
         """Perform steps for node restart."""
         node_details = self.sbcli_utils.get_storage_node_details(self.current_outage_node)
         node_ip = node_details[0]["mgmt_ip"]
@@ -543,14 +562,48 @@ def restart_nodes_after_failover(self, outage_type):
                 self.ssh_obj.exec_command(node=self.lvol_mount_details[lvol]["Client"], command=connect)
         
         elif outage_type == "container_stop":
-            self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=1000)
-            # Log the restart event
-            self.log_outage_event(self.current_outage_node, outage_type, "Node restarted", outage_time=1)
+            if restart:
+                max_retries = 10
+                retry_delay = 10  # seconds
+
+                # Retry mechanism for restarting the node
+                for attempt in range(max_retries):
+                    try:
+                        force=False
+                        if attempt == max_retries - 1:
+                            force=True
+                            self.logger.info("[CHECK] Restarting Node via CLI with Force flag as via API Fails.")
+                        else:
+                            self.logger.info("[CHECK] Restarting Node via CLI as via API Fails.")
+                        self.ssh_obj.restart_node(node=self.mgmt_nodes[0],
+                                                node_id=self.current_outage_node,
+                                                force=force)
+                        # else:
+                        #     self.sbcli_utils.restart_node(node_uuid=self.current_outage_node, expected_error_code=[503])
+                        self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=1000)
+                        break  # Exit loop if successful
+                    except Exception as _:
+                        if attempt < max_retries - 2:
+                            self.logger.info(f"Attempt {attempt + 1} failed to restart node. Retrying in {retry_delay} seconds...")
+                            sleep_n_sec(retry_delay)
+                        elif attempt < max_retries - 1:
+                            self.logger.info(f"Attempt {attempt + 1} failed to restart node via API. Retrying in {retry_delay} seconds via CMD...")
+                            sleep_n_sec(retry_delay)
+                        else:
+                            self.logger.info("Max retries reached. Failed to restart node.")
+                            raise  # Rethrow the last exception
+                self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=1000)
+                # Log the restart event
+                self.log_outage_event(self.current_outage_node, outage_type, "Node restarted", outage_time=0)
+            else:
+                self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=1000)
+                # Log the restart event
+                self.log_outage_event(self.current_outage_node, outage_type, "Node restarted", outage_time=2)
 
         elif "network_interrupt" in outage_type:
             self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=1000)
             # Log the restart event
-            self.log_outage_event(self.current_outage_node, outage_type, "Node restarted", outage_time=11)
+            self.log_outage_event(self.current_outage_node, outage_type, "Node restarted", outage_time=6)
         
         if not self.k8s_test:
             for node in self.storage_nodes:
@@ -608,9 +661,9 @@ def restart_nodes_after_failover(self, outage_type):
             # sleep_n_sec(30)
 
         for node in self.sn_nodes_with_sec:
-            self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
-                                      storage_node_id=node)
-
+            # self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
+            #                          storage_node_id=node)
+            self.logger.info("Skipping lvstore dump!!")
 
     def create_snapshots_and_clones(self):
         """Create snapshots and clones during an outage."""
@@ -777,7 +830,7 @@ def create_snapshots_and_clones(self):
                     "iodepth": 1,
                     "numjobs": 5,
                     "time_based": True,
-                    "runtime": 2000,
+                    "runtime": 3000,
                     "log_avg_msec": 1000,
                     "iolog_file": self.clone_mount_details[clone_name]["iolog_base_path"],
                 },
@@ -786,22 +839,23 @@ def create_snapshots_and_clones(self):
             self.fio_threads.append(fio_thread)
             self.logger.info(f"Created snapshot {snapshot_name} and clone {clone_name}.")
 
-            self.sbcli_utils.resize_lvol(lvol_id=self.lvol_mount_details[lvol]["ID"],
-                                         new_size=f"{self.int_lvol_size}G")
+            if self.lvol_mount_details[lvol]["ID"]:
+                self.sbcli_utils.resize_lvol(lvol_id=self.lvol_mount_details[lvol]["ID"],
+                                             new_size=f"{self.int_lvol_size}G")
             sleep_n_sec(10)
-            self.sbcli_utils.resize_lvol(lvol_id=self.clone_mount_details[clone_name]["ID"],
-                                         new_size=f"{self.int_lvol_size}G")
-            
+            if self.clone_mount_details[clone_name]["ID"]:
+                self.sbcli_utils.resize_lvol(lvol_id=self.clone_mount_details[clone_name]["ID"],
+                                             new_size=f"{self.int_lvol_size}G")
+
 
     def delete_random_lvols(self, count):
         """Delete random lvols during an outage."""
         skip_nodes = [node for node in self.sn_primary_secondary_map if self.sn_primary_secondary_map[node] == self.current_outage_node]
         skip_nodes.append(self.current_outage_node)
         skip_nodes.append(self.sn_primary_secondary_map[self.current_outage_node])
-        skip_nodes_lvol = []
-        self.logger.info(f"Skipping Nodes: {skip_nodes_lvol}")
+        self.logger.info(f"Skipping Nodes: {skip_nodes}")
         available_lvols = [
-            lvol for node, lvols in self.node_vs_lvol.items() if node not in skip_nodes_lvol for lvol in lvols
+            lvol for node, lvols in self.node_vs_lvol.items() if node not in skip_nodes for lvol in lvols
         ]
         self.logger.info(f"Available Lvols: {available_lvols}")
         if len(available_lvols) < count:
@@ -922,7 +976,7 @@ def perform_failover_during_outage(self):
                     storage_node_id=node,
                     logs_path=self.docker_logs_path
                 )
-            self.create_lvols_with_fio(3)
+            self.create_lvols_with_fio(5)
             if not self.k8s_test:
                 for node in self.storage_nodes:
                     self.ssh_obj.restart_docker_logging(
@@ -1041,7 +1095,7 @@ def restart_fio(self, iteration):
                     "iodepth": 1,
                     "numjobs": 5,
                     "time_based": True,
-                    "runtime": 2000,
+                    "runtime": 3000,
                     "log_avg_msec": 1000,
                     "iolog_file": self.lvol_mount_details[lvol]["iolog_base_path"],
                 },
@@ -1150,7 +1204,7 @@ def run(self):
                         storage_node_id=node,
                         logs_path=self.docker_logs_path
                     )
-                self.create_lvols_with_fio(5)
+                self.create_lvols_with_fio(3)
                 if not self.k8s_test:
                     for node in self.storage_nodes:
                         self.ssh_obj.restart_docker_logging(
@@ -1175,7 +1229,7 @@ def run(self):
             else:
                 self.logger.info(f"Current outage node: {self.current_outage_node} is secondary node. Skipping delete and create")
             if outage_type != "partial_nw" or outage_type != "partial_nw_single_port":
-                sleep_n_sec(280)
+                sleep_n_sec(100)
             for node in self.sn_nodes_with_sec:
                 cur_node_details = self.sbcli_utils.get_storage_node_details(node)
                 cur_node_ip = cur_node_details[0]["mgmt_ip"]
@@ -1195,7 +1249,7 @@ def run(self):
                 )
             self.logger.info("Waiting for fallback.")
             if outage_type != "partial_nw" or outage_type != "partial_nw_single_port":
-                sleep_n_sec(100)
+                sleep_n_sec(15)
             time_duration = self.common_utils.calculate_time_duration(
                 start_timestamp=self.outage_start_time,
                 end_timestamp=self.outage_end_time
@@ -1213,23 +1267,24 @@ def run(self):
             no_task_ok = outage_type in {"partial_nw", "partial_nw_single_port", "lvol_disconnect_primary"}
             if not self.sbcli_utils.is_secondary_node(self.current_outage_node):
                 self.validate_migration_for_node(self.outage_start_time, 2000, None, 60, no_task_ok=no_task_ok)
+                # pass
 
             for clone, clone_details in self.clone_mount_details.items():
                 self.common_utils.validate_fio_test(clone_details["Client"],
                                                     log_file=clone_details["Log"])
-                # self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/local-{clone}_fio*"])
-                # self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/{clone}_fio_iolog*"])
+                self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/local-{clone}_fio*"])
+                self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/{clone}_fio_iolog*"])
             
             for lvol, lvol_details in self.lvol_mount_details.items():
                 self.common_utils.validate_fio_test(lvol_details["Client"],
                                                     log_file=lvol_details["Log"])
-                # self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/local-{lvol}_fio*"])
-                # self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/{lvol}_fio_iolog*"])
+                self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/local-{lvol}_fio*"])
+                self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/{lvol}_fio_iolog*"])
 
             # Perform failover and manage resources during outage
             outage_type = self.perform_failover_during_outage()
             if outage_type != "partial_nw" or outage_type != "partial_nw_single_port":
-                sleep_n_sec(100)
+                sleep_n_sec(15)
             time_duration = self.common_utils.calculate_time_duration(
                 start_timestamp=self.outage_start_time,
                 end_timestamp=self.outage_end_time
diff --git a/e2e/stress_test/continuous_failover_ha_multi_client_quick_outage.py b/e2e/stress_test/continuous_failover_ha_multi_client_quick_outage.py
new file mode 100644
index 000000000..c2c1051a2
--- /dev/null
+++ b/e2e/stress_test/continuous_failover_ha_multi_client_quick_outage.py
@@ -0,0 +1,534 @@
+# stress_test/continuous_failover_ha_multi_client_quick_outage.py
+# Fast outages with long-running FIO, no churn beyond initial setup.
+# - Create lvols, snapshots, clones ONCE at the beginning
+# - Start 30min FIO on all mounts (lvols + clones)
+# - Run fast outages (as soon as node is ONLINE again)
+# - Every 5 outages: wait for all FIO to complete, validate, then (optionally) wait for migration window
+# - Graceful shutdown: suspend -> wait SUSPENDED -> shutdown -> wait OFFLINE -> keep offline 5 min -> restart
+# - After any restart: 15–30s idle then immediately next outage
+
+import os
+import random
+import string
+import threading
+from datetime import datetime
+from utils.common_utils import sleep_n_sec
+from exceptions.custom_exception import LvolNotConnectException
+from stress_test.lvol_ha_stress_fio import TestLvolHACluster
+
+
+def _rand_id(n=15, first_alpha=True):
+    letters = string.ascii_uppercase
+    digits = string.digits
+    allc = letters + digits
+    if first_alpha:
+        return random.choice(letters) + ''.join(random.choices(allc, k=n-1))
+    return ''.join(random.choices(allc, k=n))
+
+
+class RandomRapidFailoverNoGap(TestLvolHACluster):
+    """
+    - Minimal churn (only bootstrap creates)
+    - Long FIO (30 mins) on every lvol/clone
+    - Outage pacing: next outage right after ONLINE; add 15–30s buffer post-restart
+    - Validate FIO and pause for migration every 5 outages
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # Base knobs
+        self.total_lvols = 20
+        self.lvol_size = "40G"
+        self.fio_size = "15G"
+
+        # Validation cadence & FIO runtime
+        self.validate_every = 5
+        self._iter = 0
+        self._per_wave_fio_runtime = 3600      # 60 minutes
+        self._fio_wait_timeout = 5000          # wait for all to finish
+
+        # Internal state
+        self.fio_threads = []
+        self.lvol_mount_details = {}
+        self.clone_mount_details = {}
+        self.sn_nodes = []
+        self.sn_nodes_with_sec = []
+        self.sn_primary_secondary_map = {}
+        self.node_vs_lvol = {}
+        self.snapshot_names = []
+        self.snap_vs_node = {}
+        self.current_outage_node = None
+        self.outage_start_time = None
+        self.outage_end_time = None
+        self.first_outage_ts = None            # track the first outage for migration window
+        self.test_name = "longfio_nochurn_rapid_outages"
+
+        self.outage_types = [
+            "graceful_shutdown",
+            "container_stop",
+            # "interface_full_network_interrupt",
+        ]
+
+        # Names
+        self.lvol_base = f"lvl{_rand_id(12)}"
+        self.clone_base = f"cln{_rand_id(12)}"
+        self.snap_base = f"snap{_rand_id(12)}"
+
+        # Logging file for outages
+        self.outage_log_file = os.path.join("logs", f"outage_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
+        self._init_outage_log()
+
+    # ---------- small utilities ----------
+
+    def _init_outage_log(self):
+        os.makedirs(os.path.dirname(self.outage_log_file), exist_ok=True)
+        with open(self.outage_log_file, "w") as f:
+            f.write("Timestamp,Node,Outage_Type,Event\n")
+
+    def _log_outage_event(self, node, outage_type, event):
+        ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        with open(self.outage_log_file, "a") as f:
+            f.write(f"{ts},{node},{outage_type},{event}\n")
+
+    def _short_bs(self):
+        # return f"{2 ** random.randint(2, 7)}K"  # 4K–128K
+        return f"{2 ** 6}K"
+
+    def _pick_outage(self):
+        random.shuffle(self.outage_types)
+        return self.outage_types[0]
+
+    # ---------- cluster bootstrap ----------
+
+    def _wait_cluster_active(self, timeout=900, poll=5):
+        """
+        Poll `sbctl cluster list` until status ACTIVE.
+        Avoids 400 in_activation when creating lvol/snap/clone during bring-up.
+        """
+        end = datetime.now().timestamp() + timeout
+        while datetime.now().timestamp() < end:
+            try:
+                info = self.ssh_obj.cluster_list(self.mgmt_nodes[0], self.cluster_id)  # must wrap "sbctl cluster list"
+                self.logger.info(info)
+                # Expect a single row with Status
+                status = str(info).upper()
+                if "ACTIVE" in status:
+                    return
+            except Exception as e:
+                self.logger.info(f"ERROR: {e}")
+            sleep_n_sec(poll)
+        raise RuntimeError("Cluster did not become ACTIVE within timeout")
+
+    def _bootstrap_cluster(self):
+        # Ensure Cluster is ACTIVE
+        self._wait_cluster_active()
+
+        # create pool
+        self.sbcli_utils.add_storage_pool(pool_name=self.pool_name)
+
+        # discover storage nodes
+        storage_nodes = self.sbcli_utils.get_storage_nodes()
+        for res in storage_nodes['results']:
+            self.sn_nodes.append(res["uuid"])
+            self.sn_nodes_with_sec.append(res["uuid"])
+            self.sn_primary_secondary_map[res["uuid"]] = res["secondary_node_id"]
+        
+        self.logger.info(f"[LFNG] SN sec map: {self.sn_primary_secondary_map}")
+
+        # initial lvols + mount + then later clone from snapshots
+        self._create_lvols(count=self.total_lvols)  # start_fio=False → we launch after clones
+        self._seed_snapshots_and_clones()           # also mounts clones
+
+        # Start 30 min FIO on all (lvols + clones)
+        self._kick_fio_for_all(runtime=self._per_wave_fio_runtime)
+
+        # start container logs
+        if not self.k8s_test:
+            for node in self.storage_nodes:
+                self.ssh_obj.restart_docker_logging(
+                    node_ip=node,
+                    containers=self.container_nodes[node],
+                    log_dir=os.path.join(self.docker_logs_path, node),
+                    test_name=self.test_name
+                )
+        else:
+            self.runner_k8s_log.restart_logging()
+
+    # ---------- lvol / fio helpers ----------
+
+    def _create_lvols(self, count=1):
+        for _ in range(count):
+            fs_type = random.choice(["ext4", "xfs"])
+            is_crypto = random.choice([True, False])
+            name_core = f"{self.lvol_base}_{_rand_id(6, first_alpha=False)}"
+            lvol_name = name_core if not is_crypto else f"c{name_core}"
+
+            kwargs = dict(
+                lvol_name=lvol_name,
+                pool_name=self.pool_name,
+                size=self.lvol_size,
+                crypto=is_crypto,
+                key1=self.lvol_crypt_keys[0],
+                key2=self.lvol_crypt_keys[1],
+            )
+
+            # Avoid outage node & partner during initial placement
+            if self.current_outage_node:
+                skip_nodes = [self.current_outage_node, self.sn_primary_secondary_map.get(self.current_outage_node)]
+                skip_nodes += [p for p, s in self.sn_primary_secondary_map.items() if s == self.current_outage_node]
+                host_id = [n for n in self.sn_nodes_with_sec if n not in skip_nodes]
+                if host_id:
+                    kwargs["host_id"] = host_id[0]
+
+            # Ensure cluster ACTIVE before creating
+            self._wait_cluster_active()
+
+            try:
+                self.sbcli_utils.add_lvol(**kwargs)
+            except Exception as e:
+                self.logger.warning(f"[LFNG] lvol create failed ({lvol_name}) → {e}; retry once after ACTIVE gate")
+                self._wait_cluster_active()
+                self.sbcli_utils.add_lvol(**kwargs)
+
+            # record
+            lvol_id = self.sbcli_utils.get_lvol_id(lvol_name)
+            self.lvol_mount_details[lvol_name] = {
+                "ID": lvol_id,
+                "Command": None,
+                "Mount": None,
+                "Device": None,
+                "MD5": None,
+                "FS": fs_type,
+                "Log": f"{self.log_path}/{lvol_name}.log",
+                "snapshots": [],
+                "iolog_base_path": f"{self.log_path}/{lvol_name}_fio_iolog",
+            }
+
+            # refresh list
+            self.ssh_obj.exec_command(node=self.mgmt_nodes[0], command=f"{self.base_cmd} lvol list", supress_logs=True)
+
+            # track node placement
+            lvol_node_id = self.sbcli_utils.get_lvol_details(lvol_id=lvol_id)[0]["node_id"]
+            self.node_vs_lvol.setdefault(lvol_node_id, []).append(lvol_name)
+
+            # connect
+            connect_ls = self.sbcli_utils.get_lvol_connect_str(lvol_name=lvol_name)
+            self.lvol_mount_details[lvol_name]["Command"] = connect_ls
+
+            client_node = random.choice(self.fio_node)
+            self.lvol_mount_details[lvol_name]["Client"] = client_node
+
+            initial = self.ssh_obj.get_devices(node=client_node)
+            for c in connect_ls:
+                _, err = self.ssh_obj.exec_command(node=client_node, command=c)
+                if err:
+                    nqn = self.sbcli_utils.get_lvol_details(lvol_id=lvol_id)[0]["nqn"]
+                    self.ssh_obj.disconnect_nvme(node=client_node, nqn_grep=nqn)
+                    self.logger.info(f"[LFNG] connect error → clean lvol {lvol_name}")
+                    self.sbcli_utils.delete_lvol(lvol_name=lvol_name, max_attempt=20, skip_error=True)
+                    sleep_n_sec(3)
+                    del self.lvol_mount_details[lvol_name]
+                    self.node_vs_lvol[lvol_node_id].remove(lvol_name)
+                    break
+
+            final = self.ssh_obj.get_devices(node=client_node)
+            new_dev = None
+            for d in final:
+                if d not in initial:
+                    new_dev = f"/dev/{d.strip()}"
+                    break
+            if not new_dev:
+                raise LvolNotConnectException("LVOL did not connect")
+
+            self.lvol_mount_details[lvol_name]["Device"] = new_dev
+            self.ssh_obj.format_disk(node=client_node, device=new_dev, fs_type=fs_type)
+
+            mnt = f"{self.mount_path}/{lvol_name}"
+            self.ssh_obj.mount_path(node=client_node, device=new_dev, mount_path=mnt)
+            self.lvol_mount_details[lvol_name]["Mount"] = mnt
+
+            # clean old logs
+            self.ssh_obj.delete_files(client_node, [
+                f"{mnt}/*fio*",
+                f"{self.log_path}/local-{lvol_name}_fio*",
+                f"{self.log_path}/{lvol_name}_fio_iolog*"
+            ])
+
+    def _seed_snapshots_and_clones(self):
+        """Create one snapshot and one clone per lvol (best effort). Mount clones on same client."""
+        for lvol, det in list(self.lvol_mount_details.items()):
+            # Ensure ACTIVE
+            self._wait_cluster_active()
+
+            snap_name = f"{self.snap_base}_{_rand_id(8, first_alpha=False)}"
+            out, err = self.ssh_obj.add_snapshot(self.mgmt_nodes[0], det["ID"], snap_name)
+            if "(False," in str(out) or "(False," in str(err):
+                self.logger.warning(f"[LFNG] snapshot create failed for {lvol} → skip clone")
+                continue
+
+            self.snapshot_names.append(snap_name)
+            node_id = self.sbcli_utils.get_lvol_details(lvol_id=det["ID"])[0]["node_id"]
+            self.snap_vs_node[snap_name] = node_id
+            det["snapshots"].append(snap_name)
+
+            snap_id = self.ssh_obj.get_snapshot_id(self.mgmt_nodes[0], snap_name)
+            clone_name = f"{self.clone_base}_{_rand_id(8, first_alpha=False)}"
+            try:
+                self.ssh_obj.add_clone(self.mgmt_nodes[0], snap_id, clone_name)
+            except Exception as e:
+                self.logger.warning(f"[LFNG] clone create failed for {lvol} → {e}")
+                continue
+
+            # connect clone
+            fs_type = det["FS"]
+            client = det["Client"]
+
+            self.clone_mount_details[clone_name] = {
+                "ID": self.sbcli_utils.get_lvol_id(clone_name),
+                "Command": None,
+                "Mount": None,
+                "Device": None,
+                "MD5": None,
+                "FS": fs_type,
+                "Log": f"{self.log_path}/{clone_name}.log",
+                "snapshot": snap_name,
+                "Client": client,
+                "iolog_base_path": f"{self.log_path}/{clone_name}_fio_iolog",
+            }
+
+            connect_ls = self.sbcli_utils.get_lvol_connect_str(lvol_name=clone_name)
+            self.clone_mount_details[clone_name]["Command"] = connect_ls
+
+            initial = self.ssh_obj.get_devices(node=client)
+            for c in connect_ls:
+                _, err = self.ssh_obj.exec_command(node=client, command=c)
+                if err:
+                    nqn = self.sbcli_utils.get_lvol_details(lvol_id=self.clone_mount_details[clone_name]["ID"])[0]["nqn"]
+                    self.ssh_obj.disconnect_nvme(node=client, nqn_grep=nqn)
+                    self.logger.info("[LFNG] connect clone error → cleanup")
+                    self.sbcli_utils.delete_lvol(lvol_name=clone_name, max_attempt=20, skip_error=True)
+                    sleep_n_sec(3)
+                    del self.clone_mount_details[clone_name]
+                    continue
+
+            final = self.ssh_obj.get_devices(node=client)
+            new_dev = None
+            for d in final:
+                if d not in initial:
+                    new_dev = f"/dev/{d.strip()}"
+                    break
+            if not new_dev:
+                raise LvolNotConnectException("Clone did not connect")
+
+            self.clone_mount_details[clone_name]["Device"] = new_dev
+            if fs_type == "xfs":
+                self.ssh_obj.clone_mount_gen_uuid(client, new_dev)
+            mnt = f"{self.mount_path}/{clone_name}"
+            self.ssh_obj.mount_path(node=client, device=new_dev, mount_path=mnt)
+            self.clone_mount_details[clone_name]["Mount"] = mnt
+
+            # purge old logs
+            self.ssh_obj.delete_files(client, [
+                f"{self.log_path}/local-{clone_name}_fio*",
+                f"{self.log_path}/{clone_name}_fio_iolog*",
+                f"{mnt}/*fio*"
+            ])
+
+    def _kick_fio_for_all(self, runtime=None):
+        """Start verified fio (PID-checked; auto-rerun) for all lvols + clones."""
+        # small stagger to avoid SSH bursts
+        def _launch(name, det):
+            self.ssh_obj.run_fio_test(
+                det["Client"], None, det["Mount"], det["Log"],
+                size=self.fio_size, name=f"{name}_fio", rw="randrw",
+                bs=self._short_bs(), nrfiles=8, iodepth=1, numjobs=2,
+                time_based=True, runtime=runtime, log_avg_msec=1000,
+                iolog_file=det["iolog_base_path"], max_latency="30s",
+                verify="md5", verify_dump=1, verify_fatal=1, retries=6,
+                use_latency=False
+            )
+
+        for lvol, det in self.lvol_mount_details.items():
+            self.ssh_obj.delete_files(det["Client"], [f"/mnt/{lvol}/*"])
+            t = threading.Thread(target=_launch, args=(lvol, det))
+            t.start()
+            self.fio_threads.append(t)
+            sleep_n_sec(0.2)
+
+        for cname, det in self.clone_mount_details.items():
+            self.ssh_obj.delete_files(det["Client"], [f"/mnt/{cname}/*"])
+            t = threading.Thread(target=_launch, args=(cname, det))
+            t.start()
+            self.fio_threads.append(t)
+            sleep_n_sec(0.2)
+
+    # ---------- outage flow ----------
+
+    def _perform_outage(self):
+        random.shuffle(self.sn_nodes)
+        self.current_outage_node = self.sn_nodes[0]
+        outage_type = self._pick_outage()
+
+        if self.first_outage_ts is None:
+            self.first_outage_ts = int(datetime.now().timestamp())
+
+        cur_node_details = self.sbcli_utils.get_storage_node_details(self.current_outage_node)
+        cur_node_ip = cur_node_details[0]["mgmt_ip"]
+        self.ssh_obj.fetch_distrib_logs(
+            storage_node_ip=cur_node_ip,
+            storage_node_id=self.current_outage_node,
+            logs_path=self.docker_logs_path
+        )
+        
+        # self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
+        #                           storage_node_id=self.current_outage_node)
+
+        self.outage_start_time = int(datetime.now().timestamp())
+        self._log_outage_event(self.current_outage_node, outage_type, "Outage started")
+        self.logger.info(f"[LFNG] Outage={outage_type} node={self.current_outage_node}")
+
+        node_details = self.sbcli_utils.get_storage_node_details(self.current_outage_node)
+        node_ip = node_details[0]["mgmt_ip"]
+        node_rpc_port = node_details[0]["rpc_port"]
+
+        if outage_type == "graceful_shutdown":
+            # suspend -> wait SUSPENDED -> shutdown -> wait OFFLINE
+            try:
+                self.logger.info(f"[LFNG] Suspending node via: sbcli-dev sn suspend {self.current_outage_node}")
+                self.sbcli_utils.suspend_node(node_uuid=self.current_outage_node, expected_error_code=[503])
+                self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "suspended", timeout=600)
+            except Exception:
+                self.logger.warning("[LFNG] Suspend failed from API; ignoring if already suspended")
+
+            try:
+                self.sbcli_utils.shutdown_node(node_uuid=self.current_outage_node, force=True, expected_error_code=[503])
+            except Exception:
+                self.ssh_obj.shutdown_node(node=self.mgmt_nodes[0], node_id=self.current_outage_node, force=True)
+            self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "offline", timeout=900)
+
+            for node in self.sn_nodes_with_sec:
+                if node != self.current_outage_node:
+                    cur_node_details = self.sbcli_utils.get_storage_node_details(node)
+                    cur_node_ip = cur_node_details[0]["mgmt_ip"]
+                    self.ssh_obj.fetch_distrib_logs(
+                        storage_node_ip=cur_node_ip,
+                        storage_node_id=node,
+                        logs_path=self.docker_logs_path
+                    )
+            # Keep node strictly offline for 5 minutes
+            sleep_n_sec(500)
+
+        elif outage_type == "container_stop":
+            self.ssh_obj.stop_spdk_process(node_ip, node_rpc_port)
+
+        elif outage_type == "interface_full_network_interrupt":
+            # Down all active data interfaces for ~300s (5 minutes) with ping verification
+            active = self.ssh_obj.get_active_interfaces(node_ip)
+            self.ssh_obj.disconnect_all_active_interfaces(node_ip, active, 300)
+            sleep_n_sec(280)
+
+        return outage_type
+
+    def restart_nodes_after_failover(self, outage_type):
+
+        self.logger.info(f"[LFNG] Recover outage={outage_type} node={self.current_outage_node}")
+
+        cur_node_details = self.sbcli_utils.get_storage_node_details(self.sn_primary_secondary_map[self.current_outage_node])
+        cur_node_ip = cur_node_details[0]["mgmt_ip"]
+        self.ssh_obj.fetch_distrib_logs(
+            storage_node_ip=cur_node_ip,
+            storage_node_id=self.sn_primary_secondary_map[self.current_outage_node],
+            logs_path=self.docker_logs_path
+        )
+
+        # Only wait for ONLINE (skip deep health)
+        if outage_type == 'graceful_shutdown':
+            try:
+                self.ssh_obj.restart_node(self.mgmt_nodes[0], node_id=self.current_outage_node, force=True)
+            except Exception:
+                pass
+            self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=900)
+        elif outage_type == 'container_stop':
+            self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=900)
+        elif "network_interrupt" in outage_type:
+            self.sbcli_utils.wait_for_storage_node_status(self.current_outage_node, "online", timeout=900)
+
+        self._log_outage_event(self.current_outage_node, outage_type, "Node online")
+        self.outage_end_time = int(datetime.now().timestamp())
+
+        cur_node_details = self.sbcli_utils.get_storage_node_details(self.current_outage_node)
+        cur_node_ip = cur_node_details[0]["mgmt_ip"]
+        self.ssh_obj.fetch_distrib_logs(
+            storage_node_ip=cur_node_ip,
+            storage_node_id=self.current_outage_node,
+            logs_path=self.docker_logs_path
+        )
+
+        # keep container log streaming going
+        if not self.k8s_test:
+            for node in self.storage_nodes:
+                self.ssh_obj.restart_docker_logging(
+                    node_ip=node,
+                    containers=self.container_nodes[node],
+                    log_dir=os.path.join(self.docker_logs_path, node),
+                    test_name=self.test_name
+                )
+        else:
+            self.runner_k8s_log.restart_logging()
+
+        # small cool-down before next outage to reduce SSH churn
+        # sleep_n_sec(random.randint(1, 5))
+
+    # ---------- main ----------
+
+    def run(self):
+        self.logger.info("[LFNG] Starting RandomRapidFailoverNoGap")
+        self._bootstrap_cluster()
+        sleep_n_sec(5)
+
+        iteration = 1
+        while True:
+            outage_type = self._perform_outage()
+            self.restart_nodes_after_failover(outage_type)
+
+            self._iter += 1
+            if self._iter % self.validate_every == 0:
+                self.logger.info(f"[LFNG] {self._iter} outages → wait & validate all FIO")
+                # Join launch threads so we know all jobs issued
+                for t in self.fio_threads:
+                    t.join(timeout=10)
+                self.fio_threads = []
+
+                # Wait for all fio jobs to end (they’re 30min jobs)
+                self.common_utils.manage_fio_threads(self.fio_node, [], timeout=self._fio_wait_timeout)
+
+                for node in self.sn_nodes_with_sec:
+                    cur_node_details = self.sbcli_utils.get_storage_node_details(node)
+                    cur_node_ip = cur_node_details[0]["mgmt_ip"]
+                    self.ssh_obj.fetch_distrib_logs(
+                        storage_node_ip=cur_node_ip,
+                        storage_node_id=node,
+                        logs_path=self.docker_logs_path
+                    )
+                
+                    self.ssh_obj.dump_lvstore(node_ip=self.mgmt_nodes[0],
+                                              storage_node_id=node)
+
+                # Validate logs
+                for lvol, det in self.lvol_mount_details.items():
+                    self.common_utils.validate_fio_test(det["Client"], log_file=det["Log"])
+                for cname, det in self.clone_mount_details.items():
+                    self.common_utils.validate_fio_test(det["Client"], log_file=det["Log"])
+
+                # Optional: wait for migration window after FIO completes
+                # (replace with your actual migration-check, if any)
+                self.logger.info("[LFNG] FIO validated; pausing briefly for migration window")
+                sleep_n_sec(10)
+
+                # Re-kick next 30min wave
+                self._kick_fio_for_all(runtime=self._per_wave_fio_runtime)
+                self.logger.info("[LFNG] Next FIO wave started")
+
+            self.logger.info(f"[LFNG] Iter {iteration} complete → starting next outage ASAP")
+            iteration += 1
\ No newline at end of file
diff --git a/e2e/stress_test/continuous_failover_ha_multi_outage.py b/e2e/stress_test/continuous_failover_ha_multi_outage.py
index fb5f6d507..e96a0b547 100644
--- a/e2e/stress_test/continuous_failover_ha_multi_outage.py
+++ b/e2e/stress_test/continuous_failover_ha_multi_outage.py
@@ -1,5 +1,6 @@
 from utils.common_utils import sleep_n_sec
 from datetime import datetime
+from collections import defaultdict
 from stress_test.continuous_failover_ha_multi_client import RandomMultiClientFailoverTest
 from exceptions.custom_exception import LvolNotConnectException
 import threading
@@ -8,13 +9,20 @@
 import os
 
 
+generated_sequences = set()
+
 def generate_random_sequence(length):
     letters = string.ascii_uppercase
     numbers = string.digits
     all_chars = letters + numbers
-    first_char = random.choice(letters)
-    remaining_chars = ''.join(random.choices(all_chars, k=length - 1))
-    return first_char + remaining_chars
+
+    while True:
+        first_char = random.choice(letters)
+        remaining_chars = ''.join(random.choices(all_chars, k=length-1))
+        result = first_char + remaining_chars
+        if result not in generated_sequences:
+            generated_sequences.add(result)
+            return result
 
 
 class RandomMultiClientMultiFailoverTest(RandomMultiClientFailoverTest):
@@ -25,7 +33,7 @@ class RandomMultiClientMultiFailoverTest(RandomMultiClientFailoverTest):
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        self.total_lvols = 20
+        self.total_lvols = 40
         self.lvol_name = f"lvl{generate_random_sequence(15)}"
         self.clone_name = f"cln{generate_random_sequence(15)}"
         self.snapshot_name = f"snap{generate_random_sequence(15)}"
@@ -48,9 +56,12 @@ def __init__(self, **kwargs):
         self.lvols_without_sec_connect = []
         self.test_name = "n_plus_k_failover_multi_client_ha"
         self.outage_types = [
+            "graceful_shutdown",
+            "interface_full_network_interrupt"
+        ]
+        self.outage_types2 = [
             "container_stop",
             "graceful_shutdown",
-            "interface_partial_network_interrupt",
             "interface_full_network_interrupt"
         ]
         self.blocked_ports = None
@@ -61,30 +72,101 @@ def _initialize_outage_log(self):
         with open(self.outage_log_file, 'w') as log:
             log.write("Timestamp,Node,Outage_Type,Event\n")
 
-    def log_outage_event(self, node, outage_type, event):
-        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    def log_outage_event(self, node, outage_type, event, outage_time=0):
+        """Log an outage event to the outage log file.
+
+        Args:
+            node (str): Node UUID or IP where the event occurred.
+            outage_type (str): Type of outage (e.g., port_network_interrupt, container_stop, graceful_shutdown).
+            event (str): Event description (e.g., 'Outage started', 'Node restarted').
+            outage_time (int): Minutes to add to self.outage_start_time. If 0/None, use current time.
+        """
+        # Compute timestamp
+        if outage_time:
+            # Uses self.outage_start_time (epoch seconds) + outage_time (minutes)
+            base_epoch = getattr(self, "outage_start_time", None)
+            if isinstance(base_epoch, (int, float)) and base_epoch > 0:
+                ts_dt = datetime.fromtimestamp(int(base_epoch) + int(outage_time) * 60)
+            else:
+                # Fallback to now if outage_start_time is missing/invalid
+                ts_dt = datetime.now()
+        else:
+            ts_dt = datetime.now()
+
+        timestamp = ts_dt.strftime('%Y-%m-%d %H:%M:%S')
+
+        # Write the log line
         with open(self.outage_log_file, 'a') as log:
             log.write(f"{timestamp},{node},{outage_type},{event}\n")
 
+    def _build_reverse_secondary_map(self):
+        rev = defaultdict(set)        # secondary -> {primary,...}
+        for p, s in self.sn_primary_secondary_map.items():
+            if s:
+                rev[s].add(p)
+        return rev
+
+    def _pick_outage_nodes(self, primary_candidates, k):
+        rev = self._build_reverse_secondary_map()
+        order = primary_candidates[:]
+
+        random.shuffle(order)
+
+        chosen, blocked = [], set()
+        for node in order:
+            if node in blocked:
+                continue
+
+            chosen.append(node)
+            blocked.add(node)                            # itself
+            sec = self.sn_primary_secondary_map.get(node)
+            if sec:
+                blocked.add(sec)                         # its secondary
+            blocked.update(rev.get(node, ()))           # any primary whose secondary == node
+
+            if len(chosen) == k:
+                break
+
+        if len(chosen) < k:
+            raise Exception(
+                f"Cannot pick {k} nodes without primary/secondary conflicts; only {len(chosen)} possible with current topology."
+            )
+        return chosen
+
     def perform_n_plus_k_outages(self):
         """
-        Perform K (self.npcs) parallel outages as part of N+K configuration.
-        Ensure only primary nodes are selected for outage.
+        Select K outage nodes such that no two are in a primary/secondary
+        relationship (in either direction). Candidates = keys of the map.
         """
-        primary_nodes = [node for node in self.sn_nodes if not self.sbcli_utils.is_secondary_node(node)]
+        # Candidates are nodes that are primary *for someone* (map keys)
+        primary_candidates = list(self.sn_primary_secondary_map.keys())
+        self.current_outage_nodes = []
 
-        if len(primary_nodes) < self.npcs:
-            raise Exception(f"Not enough primary nodes to perform {self.npcs} outages. Found only {len(primary_nodes)}.")
+        if len(primary_candidates) < self.npcs:
+            raise Exception(
+                f"Need {self.npcs} outage nodes, but only {len(primary_candidates)} primary-role nodes exist."
+            )
 
-        outage_nodes = random.sample(primary_nodes, k=self.npcs)
+        outage_nodes = self._pick_outage_nodes(primary_candidates, self.npcs)
+        self.logger.info(f"Selected outage nodes: {outage_nodes}")
         outage_combinations = []
-
+        outage_num = 0
         for node in outage_nodes:
-            outage_type = random.choice(self.outage_types)
+            if outage_num == 0:
+                outage_type = random.choice(self.outage_types)
+                outage_num = 1
+            else:
+                outage_type = random.choice(self.outage_types2)
             node_details = self.sbcli_utils.get_storage_node_details(node)
             node_ip = node_details[0]["mgmt_ip"]
             node_rpc_port = node_details[0]["rpc_port"]
 
+            self.ssh_obj.fetch_distrib_logs(
+                storage_node_ip=node_ip,
+                storage_node_id=node,
+                logs_path=self.docker_logs_path
+            )
+
             self.logger.info(f"Performing {outage_type} on primary node {node}.")
             self.log_outage_event(node, outage_type, "Outage started")
 
@@ -105,26 +187,74 @@ def perform_n_plus_k_outages(self):
 
     def _graceful_shutdown_node(self, node):
         try:
-            self.sbcli_utils.suspend_node(node_uuid=node, expected_error_code=[503])
-            self.sbcli_utils.wait_for_storage_node_status(node, "suspended", timeout=1000)
-            self.sbcli_utils.shutdown_node(node_uuid=node, expected_error_code=[503])
-            self.sbcli_utils.wait_for_storage_node_status(node, "offline", timeout=1000)
+            sleep_n_sec(10)
+            max_retries = 10
+            retry_delay = 10  # seconds
+            # Retry mechanism for suspending the node
+            for attempt in range(max_retries):
+                try:
+                    if attempt == max_retries - 1:
+                        self.logger.info("[CHECK] Suspending Node via CLI as via API Fails.")
+                        self.ssh_obj.suspend_node(node=self.mgmt_nodes[0],
+                                                  node_id=node)
+                    else:
+                        self.sbcli_utils.suspend_node(node_uuid=node, expected_error_code=[503])
+                    self.sbcli_utils.wait_for_storage_node_status(node, "suspended", timeout=1000)
+                    break  # Exit loop if successful
+                except Exception as _:
+                    if attempt < max_retries - 2:
+                        self.logger.info(f"Attempt {attempt + 1} failed to suspend node. Retrying in {retry_delay} seconds...")
+                        sleep_n_sec(retry_delay)
+                    elif attempt < max_retries - 1:
+                        self.logger.info(f"Attempt {attempt + 1} failed to suspend node via API. Retrying in {retry_delay} seconds via CMD...")
+                        sleep_n_sec(retry_delay)
+                    else:
+                        self.logger.info("Max retries reached. Failed to suspend node.")
+                        raise  # Rethrow the last exception
+
+            sleep_n_sec(10)  # Wait before shutting down
+
+            # Retry mechanism for shutting down the node
+            for attempt in range(max_retries):
+                try:
+                    if attempt == max_retries - 1:
+                        self.logger.info("[CHECK] Shutting down Node via CLI as via API Fails.")
+                        self.ssh_obj.shutdown_node(node=self.mgmt_nodes[0],
+                                                   node_id=node,
+                                                   force=True)
+                    else:
+                        self.sbcli_utils.shutdown_node(node_uuid=node, force=True,
+                                                       expected_error_code=[503])
+                    self.sbcli_utils.wait_for_storage_node_status(node, "offline", timeout=1000)
+                    break  # Exit loop if successful
+                except Exception as _:
+                    if attempt < max_retries - 2:
+                        self.logger.info(f"Attempt {attempt + 1} failed to shutdown node. Retrying in {retry_delay} seconds...")
+                        sleep_n_sec(retry_delay)
+                    elif attempt < max_retries - 1:
+                        self.logger.info(f"Attempt {attempt + 1} failed to shutdown node via API. Retrying in {retry_delay} seconds via CMD...")
+                        sleep_n_sec(retry_delay)
+                    else:
+                        self.logger.info("Max retries reached. Failed to shutdown node.")
+                        raise  # Rethrow the last exception
         except Exception as e:
             self.logger.error(f"Failed graceful shutdown for node {node}: {str(e)}")
 
     def _disconnect_partial_interface(self, node, node_ip):
         active_interfaces = [nic["if_name"] for nic in self.sbcli_utils.get_storage_node_details(node)[0]["data_nics"]]
+        active_interfaces = ['eth1']
         self.disconnect_thread = threading.Thread(
             target=self.ssh_obj.disconnect_all_active_interfaces,
-            args=(node_ip, active_interfaces, 600)
+            args=(node_ip, active_interfaces, 300)
         )
         self.disconnect_thread.start()
 
     def _disconnect_full_interface(self, node, node_ip):
+        self.logger.info("Handling full interface based network interruption...")
         active_interfaces = self.ssh_obj.get_active_interfaces(node_ip)
         self.disconnect_thread = threading.Thread(
             target=self.ssh_obj.disconnect_all_active_interfaces,
-            args=(node_ip, active_interfaces, 600)
+            args=(node_ip, active_interfaces, 300)
         )
         self.disconnect_thread.start()
 
@@ -134,50 +264,81 @@ def delete_random_lvols(self, count):
             lvol for node, lvols in self.node_vs_lvol.items()
             if node not in self.current_outage_nodes for lvol in lvols
         ]
+
+        self.logger.info(f"Available Lvols: {available_lvols}")
         if len(available_lvols) < count:
             self.logger.warning("Not enough lvols available to delete the requested count.")
             count = len(available_lvols)
 
         for lvol in random.sample(available_lvols, count):
-            self.logger.info(f"Deleting lvol {lvol}")
+            self.logger.info(f"Deleting lvol {lvol}.")
             snapshots = self.lvol_mount_details[lvol]["snapshots"]
             to_delete = []
-
-            # Handle dependent clones
             for clone_name, clone_details in self.clone_mount_details.items():
                 if clone_details["snapshot"] in snapshots:
-                    self.common_utils.validate_fio_test(clone_details["Client"], clone_details["Log"])
+                    self.common_utils.validate_fio_test(clone_details["Client"],
+                                                        log_file=clone_details["Log"])
                     self.ssh_obj.find_process_name(clone_details["Client"], f"{clone_name}_fio", return_pid=False)
                     fio_pids = self.ssh_obj.find_process_name(clone_details["Client"], f"{clone_name}_fio", return_pid=True)
+                    sleep_n_sec(10)
                     for pid in fio_pids:
                         self.ssh_obj.kill_processes(clone_details["Client"], pid=pid)
+                    attempt = 1
+                    while len(fio_pids) > 2:
+                        self.ssh_obj.find_process_name(clone_details["Client"], f"{clone_name}_fio", return_pid=False)
+                        fio_pids = self.ssh_obj.find_process_name(clone_details["Client"], f"{clone_name}_fio", return_pid=True)
+                        if attempt >= 30:
+                            raise Exception("FIO not killed on clone")
+                        attempt += 1
+                        sleep_n_sec(20)
+                    
+                    sleep_n_sec(10)
                     self.ssh_obj.unmount_path(clone_details["Client"], f"/mnt/{clone_name}")
                     self.ssh_obj.remove_dir(clone_details["Client"], dir_path=f"/mnt/{clone_name}")
                     self.disconnect_lvol(clone_details['ID'])
-                    self.sbcli_utils.delete_lvol(clone_name)
+                    self.sbcli_utils.delete_lvol(clone_name, max_attempt=20, skip_error=True)
+                    sleep_n_sec(30)
                     if clone_name in self.lvols_without_sec_connect:
                         self.lvols_without_sec_connect.remove(clone_name)
                     to_delete.append(clone_name)
-
+                    self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/local-{clone_name}_fio*"])
+                    self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/{clone_name}_fio_iolog*"])
+                    self.ssh_obj.delete_files(clone_details["Client"], [f"/mnt/{clone_name}/*"])
+                    # self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/{clone_name}*.log"])
             for del_key in to_delete:
                 del self.clone_mount_details[del_key]
-
-            # Delete snapshots
             for snapshot in snapshots:
                 snapshot_id = self.ssh_obj.get_snapshot_id(self.mgmt_nodes[0], snapshot)
+                # snapshot_node = self.snap_vs_node[snapshot]
+                # if snapshot_node not in skip_nodes:
                 self.ssh_obj.delete_snapshot(self.mgmt_nodes[0], snapshot_id=snapshot_id)
                 self.snapshot_names.remove(snapshot)
 
-            # Stop FIO and cleanup lvol
-            self.common_utils.validate_fio_test(self.lvol_mount_details[lvol]["Client"], self.lvol_mount_details[lvol]["Log"])
+            self.common_utils.validate_fio_test(self.lvol_mount_details[lvol]["Client"],
+                                                log_file=self.lvol_mount_details[lvol]["Log"])
             self.ssh_obj.find_process_name(self.lvol_mount_details[lvol]["Client"], f"{lvol}_fio", return_pid=False)
+            sleep_n_sec(10)
             fio_pids = self.ssh_obj.find_process_name(self.lvol_mount_details[lvol]["Client"], f"{lvol}_fio", return_pid=True)
             for pid in fio_pids:
                 self.ssh_obj.kill_processes(self.lvol_mount_details[lvol]["Client"], pid=pid)
+            attempt = 1
+            while len(fio_pids) > 2:
+                self.ssh_obj.find_process_name(self.lvol_mount_details[lvol]["Client"], f"{lvol}_fio", return_pid=False)
+                fio_pids = self.ssh_obj.find_process_name(self.lvol_mount_details[lvol]["Client"], f"{lvol}_fio", return_pid=True)
+                if attempt >= 30:
+                    raise Exception("FIO not killed on lvols")
+                attempt += 1
+                sleep_n_sec(20)
+
+            sleep_n_sec(10)
             self.ssh_obj.unmount_path(self.lvol_mount_details[lvol]["Client"], f"/mnt/{lvol}")
             self.ssh_obj.remove_dir(self.lvol_mount_details[lvol]["Client"], dir_path=f"/mnt/{lvol}")
             self.disconnect_lvol(self.lvol_mount_details[lvol]['ID'])
-            self.sbcli_utils.delete_lvol(lvol)
+            self.sbcli_utils.delete_lvol(lvol, max_attempt=20, skip_error=True)
+            self.ssh_obj.delete_files(self.lvol_mount_details[lvol]["Client"], [f"{self.log_path}/local-{lvol}_fio*"])
+            self.ssh_obj.delete_files(self.lvol_mount_details[lvol]["Client"], [f"{self.log_path}/{lvol}_fio_iolog*"])
+            self.ssh_obj.delete_files(self.lvol_mount_details[lvol]["Client"], [f"/mnt/{lvol}/*"])
+            # self.ssh_obj.delete_files(self.lvol_mount_details[lvol]["Client"], [f"{self.log_path}/{lvol}*.log"])
             if lvol in self.lvols_without_sec_connect:
                 self.lvols_without_sec_connect.remove(lvol)
             del self.lvol_mount_details[lvol]
@@ -190,14 +351,19 @@ def delete_random_lvols(self, count):
     def create_snapshots_and_clones(self):
         """Create snapshots and clones during an outage, avoiding lvols on outage nodes."""
         self.int_lvol_size += 1
+        skip_nodes = [node for node in self.sn_primary_secondary_map if self.sn_primary_secondary_map[node] in self.current_outage_nodes]
+        self.logger.info(f"Skip Nodes: {skip_nodes}")
+        for node in self.current_outage_nodes:
+            skip_nodes.append(node)
+        self.logger.info(f"Skip Nodes: {skip_nodes}")
         available_lvols = [
             lvol for node, lvols in self.node_vs_lvol.items()
-            if node not in self.current_outage_nodes for lvol in lvols
+            if node not in skip_nodes for lvol in lvols
         ]
         if not available_lvols:
             self.logger.warning("No available lvols to create snapshots and clones.")
             return
-
+        self.logger.info(f"Available lvols: {available_lvols}")
         for _ in range(3):
             random.shuffle(available_lvols)
             lvol = available_lvols[0]
@@ -205,69 +371,140 @@ def create_snapshots_and_clones(self):
             temp_name = generate_random_sequence(5)
             if snapshot_name in self.snapshot_names:
                 snapshot_name = f"{snapshot_name}_{temp_name}"
-
             try:
                 output, error = self.ssh_obj.add_snapshot(self.mgmt_nodes[0], self.lvol_mount_details[lvol]["ID"], snapshot_name)
-                if "(False," in output or "(False," in error:
-                    raise Exception(output or error)
+                if "(False," in output:
+                    raise Exception(output)
+                if "(False," in error:
+                    raise Exception(error)
             except Exception as e:
-                self.logger.warning(f"Snapshot creation failed: {e}")
-                continue
-
+                self.logger.warning(f"Snap creation fails with {str(e)}. Retrying with different name.")
+                try:
+                    snapshot_name = f"snap_{lvol}"
+                    temp_name = generate_random_sequence(5)
+                    snapshot_name = f"{snapshot_name}_{temp_name}"
+                    self.ssh_obj.add_snapshot(self.mgmt_nodes[0], self.lvol_mount_details[lvol]["ID"], snapshot_name)
+                except Exception as exp:
+                    self.logger.warning(f"Retry Snap creation fails with {str(exp)}.")
+                    continue
+                
             self.snapshot_names.append(snapshot_name)
+            lvol_node_id = self.sbcli_utils.get_lvol_details(
+                lvol_id=self.lvol_mount_details[lvol]["ID"])[0]["node_id"]
+            self.snap_vs_node[snapshot_name] = lvol_node_id
             self.lvol_mount_details[lvol]["snapshots"].append(snapshot_name)
-
             clone_name = f"clone_{generate_random_sequence(15)}"
+            if clone_name in list(self.clone_mount_details):
+                clone_name = f"{clone_name}_{temp_name}"
             sleep_n_sec(30)
             snapshot_id = self.ssh_obj.get_snapshot_id(self.mgmt_nodes[0], snapshot_name)
             try:
                 self.ssh_obj.add_clone(self.mgmt_nodes[0], snapshot_id, clone_name)
             except Exception as e:
-                self.logger.warning(f"Clone creation failed: {e}")
-                continue
-
+                self.logger.warning(f"Clone creation fails with {str(e)}. Retrying with different name.")
+                try:
+                    clone_name = f"clone_{generate_random_sequence(15)}"
+                    temp_name = generate_random_sequence(5)
+                    clone_name = f"{clone_name}_{temp_name}"
+                    self.ssh_obj.add_clone(self.mgmt_nodes[0], snapshot_id, clone_name)
+                except Exception as exp:
+                    self.logger.warning(f"Retry Clone creation fails with {str(exp)}.")
+                    continue
             fs_type = self.lvol_mount_details[lvol]["FS"]
             client = self.lvol_mount_details[lvol]["Client"]
             self.clone_mount_details[clone_name] = {
-                "ID": self.sbcli_utils.get_lvol_id(clone_name),
-                "Command": None,
-                "Mount": None,
-                "Device": None,
-                "MD5": None,
-                "FS": fs_type,
-                "Log": f"{self.log_path}/{clone_name}.log",
-                "snapshot": snapshot_name,
-                "Client": client
+                   "ID": self.sbcli_utils.get_lvol_id(clone_name),
+                   "Command": None,
+                   "Mount": None,
+                   "Device": None,
+                   "MD5": None,
+                   "FS": fs_type,
+                   "Log": f"{self.log_path}/{clone_name}.log",
+                   "snapshot": snapshot_name,
+                   "Client": client,
+                   "iolog_base_path": f"{self.log_path}/{clone_name}_fio_iolog"
             }
 
+            self.logger.info(f"Created clone {clone_name}.")
+
+            sleep_n_sec(3)
+
+            self.ssh_obj.exec_command(node=self.mgmt_nodes[0],
+                                      command=f"{self.base_cmd} lvol list")
+
             connect_ls = self.sbcli_utils.get_lvol_connect_str(lvol_name=clone_name)
             self.clone_mount_details[clone_name]["Command"] = connect_ls
+
+            # if self.secondary_outage:
+            #     connect_ls = [connect_ls[0]]
+            #     self.lvols_without_sec_connect.append(clone_name)
+
             initial_devices = self.ssh_obj.get_devices(node=client)
             for connect_str in connect_ls:
                 _, error = self.ssh_obj.exec_command(node=client, command=connect_str)
                 if error:
-                    self.logger.warning(f"Clone connect failed: {error}")
+                    lvol_details = self.sbcli_utils.get_lvol_details(lvol_id=self.clone_mount_details[clone_name]["ID"])
+                    nqn = lvol_details[0]["nqn"]
+                    self.ssh_obj.disconnect_nvme(node=client, nqn_grep=nqn)
+                    self.logger.info(f"Connecting clone {clone_name} has error: {error}. Disconnect all connections for that clone!!")
+                    self.sbcli_utils.delete_lvol(lvol_name=clone_name, max_attempt=20, skip_error=True)
+                    sleep_n_sec(30)
+                    del self.clone_mount_details[clone_name]
                     continue
 
+            sleep_n_sec(3)
             final_devices = self.ssh_obj.get_devices(node=client)
-            lvol_device = next((f"/dev/{d.strip()}" for d in final_devices if d not in initial_devices), None)
+            lvol_device = None
+            for device in final_devices:
+                if device not in initial_devices:
+                    lvol_device = f"/dev/{device.strip()}"
+                    break
             if not lvol_device:
-                raise LvolNotConnectException("Clone device not found")
+                raise LvolNotConnectException("LVOL did not connect")
             self.clone_mount_details[clone_name]["Device"] = lvol_device
 
+            # Mount and Run FIO
             if fs_type == "xfs":
                 self.ssh_obj.clone_mount_gen_uuid(client, lvol_device)
-
             mount_point = f"{self.mount_path}/{clone_name}"
             self.ssh_obj.mount_path(node=client, device=lvol_device, mount_path=mount_point)
             self.clone_mount_details[clone_name]["Mount"] = mount_point
 
+            # clone_node_id = self.sbcli_utils.get_lvol_details(
+            #     lvol_id=self.lvol_mount_details[clone_name]["ID"])[0]["node_id"]
+            
+            # self.node_vs_lvol[clone_node_id].append(clone_name)
+
+            sleep_n_sec(10)
+
             self.ssh_obj.delete_files(client, [f"{mount_point}/*fio*"])
             self.ssh_obj.delete_files(client, [f"{self.log_path}/local-{clone_name}_fio*"])
-
+            self.ssh_obj.delete_files(client, [f"{self.log_path}/{clone_name}_fio_iolog*"])
+
+            sleep_n_sec(5)
+
+            # Start FIO
+            # fio_thread = threading.Thread(
+            #     target=self.ssh_obj.run_fio_test,
+            #     args=(client, None, self.clone_mount_details[clone_name]["Mount"], self.clone_mount_details[clone_name]["Log"]),
+            #     kwargs={
+            #         "size": self.fio_size,
+            #         "name": f"{clone_name}_fio",
+            #         "rw": "randrw",
+            #         "bs": f"{2 ** random.randint(2, 7)}K",
+            #         "nrfiles": 16,
+            #         "iodepth": 1,
+            #         "numjobs": 5,
+            #         "time_based": True,
+            #         "runtime": 2000,
+            #         "log_avg_msec": 1000,
+            #         "iolog_file": self.clone_mount_details[clone_name]["iolog_base_path"],
+            #         "debug": True,
+            #     },
+            # )
             fio_thread = threading.Thread(
                 target=self.ssh_obj.run_fio_test,
-                args=(client, None, mount_point, self.clone_mount_details[clone_name]["Log"]),
+                args=(client, None, self.clone_mount_details[clone_name]["Mount"], self.clone_mount_details[clone_name]["Log"]),
                 kwargs={
                     "size": self.fio_size,
                     "name": f"{clone_name}_fio",
@@ -278,15 +515,21 @@ def create_snapshots_and_clones(self):
                     "numjobs": 5,
                     "time_based": True,
                     "runtime": 2000,
+                    "log_avg_msec": 1000,
+                    "iolog_file": self.clone_mount_details[clone_name]["iolog_base_path"],
                 },
             )
             fio_thread.start()
             self.fio_threads.append(fio_thread)
+            self.logger.info(f"Created snapshot {snapshot_name} and clone {clone_name}.")
 
-            self.logger.info(f"Created snapshot {snapshot_name} and clone {clone_name}")
-            self.sbcli_utils.resize_lvol(self.lvol_mount_details[lvol]["ID"], f"{self.int_lvol_size}G")
+            if self.lvol_mount_details[lvol]["ID"]:
+                self.sbcli_utils.resize_lvol(lvol_id=self.lvol_mount_details[lvol]["ID"],
+                                             new_size=f"{self.int_lvol_size}G")
             sleep_n_sec(10)
-            self.sbcli_utils.resize_lvol(self.clone_mount_details[clone_name]["ID"], f"{self.int_lvol_size}G")
+            if self.clone_mount_details[clone_name]["ID"]:
+                self.sbcli_utils.resize_lvol(lvol_id=self.clone_mount_details[clone_name]["ID"],
+                                             new_size=f"{self.int_lvol_size}G")
 
 
     def run(self):
@@ -301,6 +544,8 @@ def run(self):
         for result in storage_nodes['results']:
             self.sn_nodes.append(result["uuid"])
             self.sn_nodes_with_sec.append(result["uuid"])
+            self.sn_primary_secondary_map[result["uuid"]] = result["secondary_node_id"]
+        self.logger.info(f"Secondary node map: {self.sn_primary_secondary_map}")
 
         sleep_n_sec(30)
 
@@ -320,11 +565,23 @@ def run(self):
 
             for node, outage_type in outage_events:
                 self.current_outage_node = node
-                self.restart_nodes_after_failover(outage_type)
+                if outage_type == "container_stop" and self.npcs > 1:
+                    self.restart_nodes_after_failover(outage_type, True)
+                else:
+                    self.restart_nodes_after_failover(outage_type)
 
             self.logger.info("Waiting for fallback recovery.")
             sleep_n_sec(100)
 
+            for node in self.sn_nodes_with_sec:
+                cur_node_details = self.sbcli_utils.get_storage_node_details(node)
+                cur_node_ip = cur_node_details[0]["mgmt_ip"]
+                self.ssh_obj.fetch_distrib_logs(
+                    storage_node_ip=cur_node_ip,
+                    storage_node_id=node,
+                    logs_path=self.docker_logs_path
+                )
+
             time_duration = self.common_utils.calculate_time_duration(
                 start_timestamp=self.outage_start_time,
                 end_timestamp=self.outage_end_time
@@ -343,12 +600,27 @@ def run(self):
             # for node, outage_type in outage_events:
             #     if not self.sbcli_utils.is_secondary_node(node):
             self.validate_migration_for_node(self.outage_start_time, 2000, None, 60, no_task_ok=no_task_ok)
+            self.common_utils.manage_fio_threads(self.fio_node, self.fio_threads, timeout=20000)
 
             for clone, clone_details in self.clone_mount_details.items():
                 self.common_utils.validate_fio_test(clone_details["Client"], clone_details["Log"])
+                self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/local-{clone}_fio*"])
+                self.ssh_obj.delete_files(clone_details["Client"], [f"{self.log_path}/{clone}_fio_iolog*"])
 
             for lvol, lvol_details in self.lvol_mount_details.items():
                 self.common_utils.validate_fio_test(lvol_details["Client"], lvol_details["Log"])
+                self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/local-{lvol}_fio*"])
+                self.ssh_obj.delete_files(lvol_details["Client"], [f"{self.log_path}/{lvol}_fio_iolog*"])
 
             self.logger.info(f"N+K failover iteration {iteration} complete.")
+
+            for node in self.sn_nodes_with_sec:
+                cur_node_details = self.sbcli_utils.get_storage_node_details(node)
+                cur_node_ip = cur_node_details[0]["mgmt_ip"]
+                self.ssh_obj.fetch_distrib_logs(
+                    storage_node_ip=cur_node_ip,
+                    storage_node_id=node,
+                    logs_path=self.docker_logs_path
+                )
             iteration += 1
+
diff --git a/e2e/utils/ssh_utils.py b/e2e/utils/ssh_utils.py
index bd06f06f7..a50a61726 100644
--- a/e2e/utils/ssh_utils.py
+++ b/e2e/utils/ssh_utils.py
@@ -13,6 +13,10 @@
 import string
 import re
 import subprocess
+import shlex
+import socket
+from collections import defaultdict
+from typing import Optional, List
 
 
 SSH_KEY_LOCATION = os.path.join(Path.home(), ".ssh", os.environ.get("KEY_NAME"))
@@ -47,31 +51,227 @@ def __init__(self, bastion_server):
         self.log_monitor_threads = {}
         self.log_monitor_stop_flags = {}
         self.ssh_semaphore = threading.Semaphore(10)  # Max 10 SSH calls in parallel (tune as needed)
+        self._bastion_client = None
+        self._reconnect_locks = defaultdict(threading.Lock)   
+        self.ssh_pass = None
+
+    def _candidate_usernames(self, explicit_user) -> List[str]:
+        if explicit_user:
+            if isinstance(explicit_user, (list, tuple)):
+                return list(explicit_user)
+            return [str(explicit_user)]
+        return ["ec2-user", "ubuntu", "rocky", "root"]
+    
+    def _load_private_keys(self) -> List[paramiko.PKey]:
+        """
+        Try Ed25519 then RSA. If SSH_KEY_LOCATION/env points to a file, use it.
+        Else try ~/.ssh/id_ed25519 and ~/.ssh/id_rsa. If SSH_KEY_PATH is a dir, load all files from it.
+        """
+        paths = []
+        # explicit single file via KEY_NAME → SSH_KEY_LOCATION
+        if SSH_KEY_LOCATION and os.path.isfile(SSH_KEY_LOCATION):
+            paths.append(SSH_KEY_LOCATION)
+        # defaults
+        home = os.path.join(Path.home(), ".ssh")
+        paths.extend([os.path.join(home, "id_ed25519"), os.path.join(home, "id_rsa")])
+
+        keys = []
+        seen = set()
+        for p in paths:
+            if not os.path.exists(p) or p in seen:
+                continue
+            seen.add(p)
+            try:
+                keys.append(paramiko.Ed25519Key.from_private_key_file(p))
+                continue
+            except Exception:
+                pass
+            try:
+                keys.append(paramiko.RSAKey.from_private_key_file(p))
+            except Exception:
+                pass
+        if not keys and not self.ssh_pass:
+            raise FileNotFoundError("No usable SSH private key found and SSH_PASS not set.")
+        return keys
+
+    def _try_connect(self, host: str, username: str, pkey: Optional[paramiko.PKey], password: Optional[str], sock=None, timeout=30):
+        cli = paramiko.SSHClient()
+        cli.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        cli.connect(
+            hostname=host,
+            username=username,
+            pkey=pkey,
+            password=(password if pkey is None else None),
+            timeout=timeout,
+            banner_timeout=timeout,
+            auth_timeout=timeout,
+            allow_agent=False,
+            look_for_keys=False,
+            sock=sock
+        )
+        return cli
+
+    # def connect(self, address: str, port: int = 22,
+    #         bastion_server_address: str = None,
+    #         username: str = "ec2-user",
+    #         is_bastion_server: bool = False):
+    #     """Connect to cluster nodes"""
+    #     # --- prep usernames list ---
+    #     default_users = ["ec2-user", "ubuntu", "rocky", "root"]
+    #     if getattr(self, "ssh_user", None):
+    #         if isinstance(self.ssh_user, (list, tuple)):
+    #             usernames = list(self.ssh_user)
+    #         else:
+    #             usernames = [str(self.ssh_user)]
+    #     else:
+    #         usernames = default_users
+
+    #     # Load key (Ed25519 -> RSA fallback)
+    #     if not os.path.exists(SSH_KEY_LOCATION):
+    #         raise FileNotFoundError(f"SSH private key not found at {SSH_KEY_LOCATION}")
+    #     try:
+    #         private_key = paramiko.Ed25519Key(filename=SSH_KEY_LOCATION)
+    #     except Exception:
+    #         private_key = paramiko.RSAKey.from_private_key_file(SSH_KEY_LOCATION)
+
+    #     # Helper to store/replace a connection
+    #     def _store(host, client):
+    #         if self.ssh_connections.get(host):
+    #             try:
+    #                 self.ssh_connections[host].close()
+    #             except Exception:
+    #                 pass
+    #         self.ssh_connections[host] = client
+
+    #     # ---------- direct connection ----------
+    #     bastion_server_address = bastion_server_address or self.bastion_server
+    #     if not bastion_server_address:
+    #         self.logger.info(f"Connecting directly to {address} on port {port}...")
+    #         last_err = None
+    #         for user in usernames:
+    #             ssh = paramiko.SSHClient()
+    #             ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    #             try:
+    #                 ssh.connect(
+    #                     hostname=address,
+    #                     username=user,
+    #                     port=port,
+    #                     pkey=private_key,
+    #                     timeout=300,
+    #                     banner_timeout=30,
+    #                     auth_timeout=30,
+    #                     allow_agent=False,
+    #                     look_for_keys=False,
+    #                 )
+    #                 self.logger.info(f"Connected directly to {address} as '{user}'.")
+    #                 _store(address, ssh)
+    #                 return
+    #             except Exception as e:
+    #                 last_err = e
+    #                 self.logger.info(f"Direct login failed for '{user}': {repr(e)}")
+    #                 try:
+    #                     ssh.close()
+    #                 except Exception:
+    #                     pass
+    #         raise Exception(f"All usernames failed for {address}. Last error: {repr(last_err)}")
+
+    #     # ---------- connect to bastion ----------
+    #     self.logger.info(f"Connecting to bastion server {bastion_server_address}...")
+    #     bastion_ssh = paramiko.SSHClient()
+    #     bastion_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    #     last_err = None
+    #     bastion_user_used = None
+    #     for b_user in usernames:
+    #         try:
+    #             bastion_ssh.connect(
+    #                 hostname=bastion_server_address,
+    #                 username=b_user,
+    #                 port=port,
+    #                 pkey=private_key,
+    #                 timeout=300,
+    #                 banner_timeout=30,
+    #                 auth_timeout=30,
+    #                 allow_agent=False,
+    #                 look_for_keys=False,
+    #             )
+    #             self.logger.info(f"Connected to bastion as '{b_user}'.")
+    #             _store(bastion_server_address, bastion_ssh)
+    #             bastion_user_used = b_user
+    #             break
+    #         except Exception as e:
+    #             last_err = e
+    #             self.logger.info(f"Bastion login failed for '{b_user}': {repr(e)}")
+    #     if bastion_user_used is None:
+    #         raise Exception(f"All usernames failed for bastion {bastion_server_address}. Last error: {repr(last_err)}")
+    #     if is_bastion_server:
+    #         return  # caller only needed bastion
+
+    #     # ---------- tunnel to target through bastion ----------
+    #     self.logger.info(f"Connecting to target server {address} through bastion server...")
+    #     transport = bastion_ssh.get_transport()
+    #     last_err = None
+    #     for user in usernames:
+    #         # IMPORTANT: open a NEW channel for each username attempt
+    #         try:
+    #             channel = transport.open_channel(
+    #                 "direct-tcpip",
+    #                 (address, port),
+    #                 ("localhost", 0),
+    #             )
+    #         except paramiko.ssh_exception.ChannelException as ce:
+    #             self.logger.error(
+    #                 f"Channel open failed: {repr(ce)} — check AllowTcpForwarding/PermitOpen on bastion."
+    #             )
+    #             raise
+    #         target_ssh = paramiko.SSHClient()
+    #         target_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    #         try:
+    #             target_ssh.connect(
+    #                 address,
+    #                 username=user,
+    #                 port=port,
+    #                 sock=channel,
+    #                 pkey=private_key,
+    #                 timeout=300,
+    #                 banner_timeout=30,
+    #                 auth_timeout=30,
+    #                 allow_agent=False,
+    #                 look_for_keys=False,
+    #             )
+    #             self.logger.info(f"Connected to {address} as '{user}' via bastion '{bastion_user_used}'.")
+    #             _store(address, target_ssh)
+    #             return
+    #         except Exception as e:
+    #             last_err = e
+    #             self.logger.info(f"Target login failed for '{user}': {repr(e)}")
+    #             try:
+    #                 target_ssh.close()
+    #             except Exception:
+    #                 pass
+    #             try:
+    #                 channel.close()
+    #             except Exception:
+    #                 pass
+
+    #     raise Exception(
+    #         f"Tunnel established, but all usernames failed for target {address}. Last error: {repr(last_err)}"
+    #     )
 
     def connect(self, address: str, port: int = 22,
             bastion_server_address: str = None,
             username: str = "ec2-user",
             is_bastion_server: bool = False):
-        """Connect to cluster nodes"""
-        # --- prep usernames list ---
-        default_users = ["ec2-user", "ubuntu", "rocky", "root"]
-        if getattr(self, "ssh_user", None):
-            if isinstance(self.ssh_user, (list, tuple)):
-                usernames = list(self.ssh_user)
-            else:
-                usernames = [str(self.ssh_user)]
-        else:
-            usernames = default_users
+        """
+        Connect to a host directly or via bastion, trying multiple usernames and keys,
+        with optional password fallback.
+        """
+        # Resolve bastion
+        bastion_server_address = bastion_server_address or self.bastion_server
 
-        # Load key (Ed25519 -> RSA fallback)
-        if not os.path.exists(SSH_KEY_LOCATION):
-            raise FileNotFoundError(f"SSH private key not found at {SSH_KEY_LOCATION}")
-        try:
-            private_key = paramiko.Ed25519Key(filename=SSH_KEY_LOCATION)
-        except Exception:
-            private_key = paramiko.RSAKey.from_private_key_file(SSH_KEY_LOCATION)
+        usernames = self._candidate_usernames(self.ssh_user or username)
+        keys = self._load_private_keys()
+        password = self.ssh_pass
 
-        # Helper to store/replace a connection
         def _store(host, client):
             if self.ssh_connections.get(host):
                 try:
@@ -80,230 +280,291 @@ def _store(host, client):
                     pass
             self.ssh_connections[host] = client
 
-        # ---------- direct connection ----------
-        bastion_server_address = bastion_server_address or self.bastion_server
+        # --- NO BASTION: direct connect ---
         if not bastion_server_address:
-            self.logger.info(f"Connecting directly to {address} on port {port}...")
             last_err = None
+            self.logger.info(f"Connecting directly to {address} on port {port}...")
             for user in usernames:
-                ssh = paramiko.SSHClient()
-                ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-                try:
-                    ssh.connect(
-                        hostname=address,
-                        username=user,
-                        port=port,
-                        pkey=private_key,
-                        timeout=300,
-                        banner_timeout=30,
-                        auth_timeout=30,
-                        allow_agent=False,
-                        look_for_keys=False,
-                    )
-                    self.logger.info(f"Connected directly to {address} as '{user}'.")
-                    _store(address, ssh)
-                    return
-                except Exception as e:
-                    last_err = e
-                    self.logger.info(f"Direct login failed for '{user}': {repr(e)}")
+                # try keys
+                for key in keys:
                     try:
-                        ssh.close()
-                    except Exception:
-                        pass
+                        cli = self._try_connect(address, user, key, None, timeout=30)
+                        self.logger.info(f"Connected directly to {address} as '{user}'.")
+                        _store(address, cli)
+                        return
+                    except Exception as e:
+                        last_err = e
+                # then password
+                if password:
+                    try:
+                        cli = self._try_connect(address, user, None, password, timeout=30)
+                        self.logger.info(f"Connected directly to {address} as '{user}' (password).")
+                        _store(address, cli)
+                        return
+                    except Exception as e:
+                        last_err = e
             raise Exception(f"All usernames failed for {address}. Last error: {repr(last_err)}")
 
-        # ---------- connect to bastion ----------
-        self.logger.info(f"Connecting to bastion server {bastion_server_address}...")
-        bastion_ssh = paramiko.SSHClient()
-        bastion_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-        last_err = None
-        bastion_user_used = None
-        for b_user in usernames:
-            try:
-                bastion_ssh.connect(
-                    hostname=bastion_server_address,
-                    username=b_user,
-                    port=port,
-                    pkey=private_key,
-                    timeout=300,
-                    banner_timeout=30,
-                    auth_timeout=30,
-                    allow_agent=False,
-                    look_for_keys=False,
-                )
-                self.logger.info(f"Connected to bastion as '{b_user}'.")
-                _store(bastion_server_address, bastion_ssh)
-                bastion_user_used = b_user
+        # --- VIA BASTION ---
+        # ensure bastion client (reuse if alive)
+        if (not self._bastion_client) or (not self._bastion_client.get_transport()) or (not self._bastion_client.get_transport().is_active()):
+            last_err = None
+            self.logger.info(f"Connecting to bastion server {bastion_server_address}...")
+            for b_user in self._candidate_usernames(self.ssh_user or username):
+                for key in keys:
+                    try:
+                        cli = self._try_connect(bastion_server_address, b_user, key, None, timeout=30)
+                        self._bastion_client = cli
+                        self.logger.info(f"Connected to bastion as '{b_user}'.")
+                        break
+                    except Exception as e:
+                        last_err = e
+                else:
+                    if password:
+                        try:
+                            cli = self._try_connect(bastion_server_address, b_user, None, password, timeout=30)
+                            self._bastion_client = cli
+                            self.logger.info(f"Connected to bastion as '{b_user}' (password).")
+                            break
+                        except Exception as e:
+                            last_err = e
+                    continue
                 break
-            except Exception as e:
-                last_err = e
-                self.logger.info(f"Bastion login failed for '{b_user}': {repr(e)}")
-        if bastion_user_used is None:
-            raise Exception(f"All usernames failed for bastion {bastion_server_address}. Last error: {repr(last_err)}")
+            if (not self._bastion_client) or (not self._bastion_client.get_transport()) or (not self._bastion_client.get_transport().is_active()):
+                raise Exception(f"All usernames failed for bastion {bastion_server_address}. Last error: {repr(last_err)}")
+
         if is_bastion_server:
-            return  # caller only needed bastion
+            # caller only wanted bastion connection open
+            _store(bastion_server_address, self._bastion_client)
+            return
 
-        # ---------- tunnel to target through bastion ----------
+        # open a channel through bastion → target
         self.logger.info(f"Connecting to target server {address} through bastion server...")
-        transport = bastion_ssh.get_transport()
+        bastion_transport = self._bastion_client.get_transport()
+
         last_err = None
         for user in usernames:
-            # IMPORTANT: open a NEW channel for each username attempt
-            try:
-                channel = transport.open_channel(
-                    "direct-tcpip",
-                    (address, port),
-                    ("localhost", 0),
-                )
-            except paramiko.ssh_exception.ChannelException as ce:
-                self.logger.error(
-                    f"Channel open failed: {repr(ce)} — check AllowTcpForwarding/PermitOpen on bastion."
-                )
-                raise
-            target_ssh = paramiko.SSHClient()
-            target_ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-            try:
-                target_ssh.connect(
-                    address,
-                    username=user,
-                    port=port,
-                    sock=channel,
-                    pkey=private_key,
-                    timeout=300,
-                    banner_timeout=30,
-                    auth_timeout=30,
-                    allow_agent=False,
-                    look_for_keys=False,
-                )
-                self.logger.info(f"Connected to {address} as '{user}' via bastion '{bastion_user_used}'.")
-                _store(address, target_ssh)
-                return
-            except Exception as e:
-                last_err = e
-                self.logger.info(f"Target login failed for '{user}': {repr(e)}")
+            # new channel for each attempt
+            chan = bastion_transport.open_channel("direct-tcpip", (address, port), ("127.0.0.1", 0))
+            # try keys
+            for key in keys:
                 try:
-                    target_ssh.close()
-                except Exception:
-                    pass
+                    cli = self._try_connect(address, user, key, None, sock=chan, timeout=30)
+                    self.logger.info(f"Connected to {address} as '{user}' via bastion.")
+                    _store(address, cli)
+                    return
+                except Exception as e:
+                    last_err = e
+            # then password
+            if password:
                 try:
-                    channel.close()
-                except Exception:
-                    pass
-
-        raise Exception(
-            f"Tunnel established, but all usernames failed for target {address}. Last error: {repr(last_err)}"
-        )
-
+                    cli = self._try_connect(address, user, None, password, sock=chan, timeout=30)
+                    self.logger.info(f"Connected to {address} as '{user}' via bastion (password).")
+                    _store(address, cli)
+                    return
+                except Exception as e:
+                    last_err = e
+            try:
+                chan.close()
+            except Exception:
+                pass
+
+        raise Exception(f"Tunnel established, but all usernames failed for target {address}. Last error: {repr(last_err)}")
+
+
+
+    # def exec_command(self, node, command, timeout=360, max_retries=3, stream_callback=None, supress_logs=False):
+    #     """Executes a command on a given machine with streaming output and retry mechanism.
+
+    #     Args:
+    #         node (str): Machine to run command on.
+    #         command (str): Command to run.
+    #         timeout (int): Timeout in seconds.
+    #         max_retries (int): Number of retries in case of failures.
+    #         stream_callback (callable, optional): A callback function for streaming output. Defaults to None.
+
+    #     Returns:
+    #         tuple: Final output and error strings after command execution.
+    #     """
+    #     retry_count = 0
+    #     while retry_count < max_retries:
+    #         with self.ssh_semaphore:
+    #             ssh_connection = self.ssh_connections.get(node)
+    #             try:
+    #                 # Ensure the SSH connection is active, otherwise reconnect
+    #                 if not ssh_connection or not ssh_connection.get_transport().is_active() or retry_count > 0:
+    #                     self.logger.info(f"Reconnecting SSH to node {node}")
+    #                     self.connect(
+    #                         address=node,
+    #                         is_bastion_server=True if node == self.bastion_server else False
+    #                     )
+    #                     ssh_connection = self.ssh_connections[node]
+                    
+    #                 if not supress_logs:
+    #                     self.logger.info(f"Executing command: {command}")
+    #                 stdin, stdout, stderr = ssh_connection.exec_command(command, timeout=timeout)
+
+    #                 output = []
+    #                 error = []
+
+    #                 # Read stdout and stderr dynamically if stream_callback is provided
+    #                 if stream_callback:
+    #                     while not stdout.channel.exit_status_ready():
+    #                         # Process stdout
+    #                         if stdout.channel.recv_ready():
+    #                             chunk = stdout.channel.recv(1024).decode()
+    #                             output.append(chunk)
+    #                             stream_callback(chunk, is_error=False)  # Callback for stdout
+
+    #                         # Process stderr
+    #                         if stderr.channel.recv_stderr_ready():
+    #                             chunk = stderr.channel.recv_stderr(1024).decode()
+    #                             error.append(chunk)
+    #                             stream_callback(chunk, is_error=True)  # Callback for stderr
+
+    #                         time.sleep(0.1)
+
+    #                     # Finalize any remaining output
+    #                     if stdout.channel.recv_ready():
+    #                         chunk = stdout.channel.recv(1024).decode()
+    #                         output.append(chunk)
+    #                         stream_callback(chunk, is_error=False)
+
+    #                     if stderr.channel.recv_stderr_ready():
+    #                         chunk = stderr.channel.recv_stderr(1024).decode()
+    #                         error.append(chunk)
+    #                         stream_callback(chunk, is_error=True)
+    #                 else:
+    #                     # Default behavior: Read the entire output at once
+    #                     output = stdout.read().decode()
+    #                     error = stderr.read().decode()
+
+    #                 # Combine the output into strings
+    #                 output = "".join(output) if isinstance(output, list) else output
+    #                 error = "".join(error) if isinstance(error, list) else error
+
+    #                 # Log the results
+    #                 if output:
+    #                     if not supress_logs:
+    #                         self.logger.info(f"Command output: {output}")
+    #                 if error:
+    #                     if not supress_logs:
+    #                         self.logger.error(f"Command error: {error}")
+
+    #                 if not output and not error:
+    #                     if not supress_logs:
+    #                         self.logger.warning(f"Command '{command}' executed but returned no output or error.")
+
+    #                 return output, error
+
+    #             except EOFError as e:
+    #                 self.logger.error(f"EOFError occurred while executing command '{command}': {e}. Retrying ({retry_count + 1}/{max_retries})...")
+    #                 retry_count += 1
+    #                 time.sleep(2)  # Short delay before retrying
+
+    #             except paramiko.SSHException as e:
+    #                 self.logger.error(f"SSH command failed: {e}. Retrying ({retry_count + 1}/{max_retries})...")
+    #                 retry_count += 1
+    #                 time.sleep(2)  # Short delay before retrying
+
+    #             except paramiko.buffered_pipe.PipeTimeout as e:
+    #                 self.logger.error(f"SSH command failed: {e}. Retrying ({retry_count + 1}/{max_retries})...")
+    #                 retry_count += 1
+    #                 time.sleep(2)  # Short delay before retrying
+
+    #             except Exception as e:
+    #                 self.logger.error(f"SSH command failed (General Exception): {e}. Retrying ({retry_count + 1}/{max_retries})...")
+    #                 retry_count += 1
+    #                 time.sleep(2)  # Short delay before retrying
+
+    #     # If we exhaust retries, return failure
+    #     self.logger.error(f"Failed to execute command '{command}' on node {node} after {max_retries} retries.")
+    #     return "", "Command failed after max retries"
 
     def exec_command(self, node, command, timeout=360, max_retries=3, stream_callback=None, supress_logs=False):
-        """Executes a command on a given machine with streaming output and retry mechanism.
-
-        Args:
-            node (str): Machine to run command on.
-            command (str): Command to run.
-            timeout (int): Timeout in seconds.
-            max_retries (int): Number of retries in case of failures.
-            stream_callback (callable, optional): A callback function for streaming output. Defaults to None.
-
-        Returns:
-            tuple: Final output and error strings after command execution.
         """
-        retry_count = 0
-        while retry_count < max_retries:
+        Execute a command with auto-reconnect (serialized per node), optional streaming,
+        and proper exit-status capture to reduce “ran but no output” confusion.
+        """
+        retry = 0
+        while retry < max_retries:
             with self.ssh_semaphore:
-                ssh_connection = self.ssh_connections.get(node)
+                # serialize reconnect attempts per node
+                lock = self._reconnect_locks[node]
+                with lock:
+                    ssh = self.ssh_connections.get(node)
+                    if not ssh or not ssh.get_transport() or not ssh.get_transport().is_active() or retry > 0:
+                        if not supress_logs:
+                            self.logger.info(f"Reconnecting SSH to node {node}")
+                        # if node is the bastion itself
+                        self.connect(node, is_bastion_server=(node == self.bastion_server))
+                        ssh = self.ssh_connections[node]
+
                 try:
-                    # Ensure the SSH connection is active, otherwise reconnect
-                    if not ssh_connection or not ssh_connection.get_transport().is_active() or retry_count > 0:
-                        self.logger.info(f"Reconnecting SSH to node {node}")
-                        self.connect(
-                            address=node,
-                            is_bastion_server=True if node == self.bastion_server else False
-                        )
-                        ssh_connection = self.ssh_connections[node]
-                    
                     if not supress_logs:
                         self.logger.info(f"Executing command: {command}")
-                    stdin, stdout, stderr = ssh_connection.exec_command(command, timeout=timeout)
+                    stdin, stdout, stderr = ssh.exec_command(command, timeout=timeout)
+                    output_chunks, error_chunks = [], []
 
-                    output = []
-                    error = []
-
-                    # Read stdout and stderr dynamically if stream_callback is provided
                     if stream_callback:
                         while not stdout.channel.exit_status_ready():
-                            # Process stdout
                             if stdout.channel.recv_ready():
-                                chunk = stdout.channel.recv(1024).decode()
-                                output.append(chunk)
-                                stream_callback(chunk, is_error=False)  # Callback for stdout
-
-                            # Process stderr
+                                chunk = stdout.channel.recv(8192).decode(errors="replace")
+                                output_chunks.append(chunk)
+                                stream_callback(chunk, is_error=False)
                             if stderr.channel.recv_stderr_ready():
-                                chunk = stderr.channel.recv_stderr(1024).decode()
-                                error.append(chunk)
-                                stream_callback(chunk, is_error=True)  # Callback for stderr
-
-                            time.sleep(0.1)
-
-                        # Finalize any remaining output
-                        if stdout.channel.recv_ready():
-                            chunk = stdout.channel.recv(1024).decode()
-                            output.append(chunk)
+                                chunk = stderr.channel.recv_stderr(8192).decode(errors="replace")
+                                error_chunks.append(chunk)
+                                stream_callback(chunk, is_error=True)
+                            time.sleep(0.05)
+
+                        # flush remaining
+                        while stdout.channel.recv_ready():
+                            chunk = stdout.channel.recv(8192).decode(errors="replace")
+                            output_chunks.append(chunk)
                             stream_callback(chunk, is_error=False)
-
-                        if stderr.channel.recv_stderr_ready():
-                            chunk = stderr.channel.recv_stderr(1024).decode()
-                            error.append(chunk)
+                        while stderr.channel.recv_stderr_ready():
+                            chunk = stderr.channel.recv_stderr(8192).decode(errors="replace")
+                            error_chunks.append(chunk)
                             stream_callback(chunk, is_error=True)
+
+                        exit_status = stdout.channel.recv_exit_status()
+                        out = "".join(output_chunks)
+                        err = "".join(error_chunks)
                     else:
-                        # Default behavior: Read the entire output at once
-                        output = stdout.read().decode()
-                        error = stderr.read().decode()
+                        out = stdout.read().decode(errors="replace")
+                        err = stderr.read().decode(errors="replace")
+                        exit_status = stdout.channel.recv_exit_status()
 
-                    # Combine the output into strings
-                    output = "".join(output) if isinstance(output, list) else output
-                    error = "".join(error) if isinstance(error, list) else error
+                    if (not supress_logs) and out:
+                        self.logger.info(f"Command output: {out.strip()[:2000]}")
+                    if (not supress_logs) and err:
+                        self.logger.error(f"Command error: {err.strip()[:2000]}")
 
-                    # Log the results
-                    if output:
-                        if not supress_logs:
-                            self.logger.info(f"Command output: {output}")
-                    if error:
-                        if not supress_logs:
-                            self.logger.error(f"Command error: {error}")
+                    if exit_status != 0 and not err:
+                        # some tools write nothing on stderr but non-zero exit
+                        err = f"Non-zero exit status: {exit_status}"
 
-                    if not output and not error:
+                    if not out and not err:
                         if not supress_logs:
                             self.logger.warning(f"Command '{command}' executed but returned no output or error.")
 
-                    return output, error
-
-                except EOFError as e:
-                    self.logger.error(f"EOFError occurred while executing command '{command}': {e}. Retrying ({retry_count + 1}/{max_retries})...")
-                    retry_count += 1
-                    time.sleep(2)  # Short delay before retrying
+                    return out, err
 
-                except paramiko.SSHException as e:
-                    self.logger.error(f"SSH command failed: {e}. Retrying ({retry_count + 1}/{max_retries})...")
-                    retry_count += 1
-                    time.sleep(2)  # Short delay before retrying
-
-                except paramiko.buffered_pipe.PipeTimeout as e:
-                    self.logger.error(f"SSH command failed: {e}. Retrying ({retry_count + 1}/{max_retries})...")
-                    retry_count += 1
-                    time.sleep(2)  # Short delay before retrying
+                except (EOFError, paramiko.SSHException, paramiko.buffered_pipe.PipeTimeout, socket.error) as e:
+                    retry += 1
+                    self.logger.error(f"SSH command failed ({type(e).__name__}): {e}. Retrying ({retry}/{max_retries})...")
+                    time.sleep(min(2 * retry, 5))
 
                 except Exception as e:
-                    self.logger.error(f"SSH command failed (General Exception): {e}. Retrying ({retry_count + 1}/{max_retries})...")
-                    retry_count += 1
-                    time.sleep(2)  # Short delay before retrying
+                    retry += 1
+                    self.logger.error(f"SSH command failed (General): {e}. Retrying ({retry}/{max_retries})...")
+                    time.sleep(min(2 * retry, 5))
 
-        # If we exhaust retries, return failure
         self.logger.error(f"Failed to execute command '{command}' on node {node} after {max_retries} retries.")
         return "", "Command failed after max retries"
 
-    
+
     def format_disk(self, node, device, fs_type="ext4"):
         """Format disk on the given node
 
@@ -362,14 +623,133 @@ def get_devices(self, node):
 
         return output.strip().split()
     
-    def run_fio_test(self, node, device=None, directory=None, log_file=None, **kwargs):
-        """Run FIO Tests with given params and proper logging for MD5 error timestamp tracing.
+    # def run_fio_test(self, node, device=None, directory=None, log_file=None, **kwargs):
+    #     """
+    #     Run FIO with optional 'ensure_running' that verifies process presence and retries start  up to N times.
+
+    #     kwargs:
+    #     - ensure_running: bool (default False)
+    #     - max_start_retries: int (default 3)
+    #     """
+    #     location = ""
+    #     if device:
+    #         location = f"--filename={device}"
+    #     if directory:
+    #         location = f"--directory={directory}"
+
+    #     runtime     = kwargs.get("runtime", 3600)
+    #     name        = kwargs.get("name", f"fio_{_rid(6)}")
+    #     ioengine    = kwargs.get("ioengine", "libaio")
+    #     iodepth     = kwargs.get("iodepth", 1)
+    #     time_based  = "--time_based" if kwargs.get("time_based", True) else ""
+    #     rw          = kwargs.get("rw", "randrw")
+    #     bs          = kwargs.get("bs", "4K")
+    #     size        = kwargs.get("size", "1G")
+    #     rwmixread   = kwargs.get("rwmixread", 70)
+    #     numjobs     = kwargs.get("numjobs", 2)
+    #     nrfiles     = kwargs.get("nrfiles", 8)
+    #     log_avg_ms  = kwargs.get("log_avg_msec", 1000)
+    #     output_fmt  = f' --output-format={kwargs["output_format"]} ' if kwargs.get("output_format") else ''
+    #     output_file = f" --output={kwargs['output_file']} " if kwargs.get("output_file") else ''
+    #     iolog_base  = kwargs.get("iolog_file")
+
+    #     iolog_opt   = f"--write_iolog={iolog_base}" if iolog_base else ""
+    #     log_opt     = f"--log_avg_msec={log_avg_ms}" if log_avg_ms else ""
+
+    #     command = (
+    #         f"sudo fio --name={name} {location} --ioengine={ioengine} --direct=1 --iodepth={iodepth} "
+    #         f"{time_based} --runtime={runtime} --rw={rw} --max_latency=20s --bs={bs} --size={size} --rwmixread={rwmixread} "
+    #         f"--verify=md5 --verify_dump=1 --verify_fatal=1 --numjobs={numjobs} --nrfiles={nrfiles} "
+    #         f"{log_opt} {iolog_opt} {output_fmt}{output_file}"
+    #     )
+    #     if kwargs.get("debug"):
+    #         command += " --debug=all"
+    #     if log_file:
+    #         command += f" > {log_file} 2>&1"
+
+    #     ensure_running   = bool(kwargs.get("ensure_running", False))
+    #     max_start_retries = int(kwargs.get("max_start_retries", 3))
+
+    #     launch_retries = 3
+    #     for attempt in range(1, launch_retries + 1):
+
+    #         try:
+    #             self.logger.info(f"Starting FIO on {node}: {name} → {location} (attempt {attempt}/{launch_retries})")
+    #             self.exec_command(node=node, command=f"sudo {command}", max_retries=2)
+    #             break
+    #         except Exception as e:
+    #             self.logger.error(f"FIO start failed: {e}")
+    #             if attempt == launch_retries:
+    #                 raise
+    #             time.sleep(1.0 * attempt)
+
+    #     # Ensure process is up (pgrep name)
+    #     start_retries = 6
+    #     for i in range(start_retries):
+    #         out, err  = self.exec_command(
+    #             node=node,
+    #             command=f"pgrep -fa 'fio.*{name}' || true",
+    #             max_retries=1,
+    #         )
+    #         if out.strip():
+    #             self.logger.info(f"FIO is running for {name}: {out.strip().splitlines()[0]}")
+    #             return
+    #         # Not running yet → small backoff and try again
+    #         time.sleep(2 + i)
+    #         # If still not, try re-launch quickly
+    #         if i >= 2:
+    #             self.logger.warning(f"FIO still not running for {name}; re-issuing start (try {i-1}/{start_retries-3})")
+    #             try:
+    #                 self.exec_command(node=node, command=f"sudo {command}", max_retries=1)
+    #             except Exception as e:
+    #                 self.logger.warning(f"Re-start attempt raised: {e}")
+
+    #     # If we get here, fio didn’t stick
+    #     raise RuntimeError(f"FIO failed to stay running for job {name} on {node}")
+
+        # def _is_running():
+        #     # Use pgrep on job name (fio --name=<name>) for a quick check
+        #     # Fall back to ps+grep if pgrep not present.
+        #     try:
+        #         out, _ = self.exec_command(node=node, command=f"pgrep -fl 'fio.*--name={name}'", max_retries=1)
+        #         return bool(out.strip())
+        #     except Exception:
+        #         out, _ = self.exec_command(node=node, command=f"ps ax | grep -E 'fio.*--name={name}' | grep -v grep || true", max_retries=1)
+        #         return bool(out.strip())
+
+        # # Try to start; handle EOF/channel close by reconnect+retry
+        # attempts = 0
+        # while True:
+        #     attempts += 1
+        #     try:
+        #         self.exec_command(node=node, command=command, max_retries=3)
+        #     except Exception as e:
+        #         # Channel/EOF during start is common in churn; retry a few times
+        #         if attempts < max_start_retries:
+        #             self.logger.error(f"FIO start error ({e}); retrying {attempts}/{max_start_retries} in 2s")
+        #             time.sleep(2)
+        #             continue
+        #         else:
+        #             raise
+
+        #     if not ensure_running:
+        #         return
+
+        #     # Verify started; retry if not
+        #     time.sleep(1.0)
+        #     if _is_running():
+        #         return
+
+        #     if attempts >= max_start_retries:
+        #         raise RuntimeError(f"FIO failed to start after {max_start_retries} attempts for job '{name}'")
+
+        #     self.logger.warning(f"FIO not detected running for '{name}'; retrying start {attempts}/{max_start_retries}")
+        #     time.sleep(1.0)
 
-        Args:
-            node (str): Node to perform ssh operation on
-            device (str): Device path. Defaults to None.
-            directory (str, optional): Directory to run test on. Defaults to None.
-            log_file (str, optional): Log file to redirect output to. Defaults to None.
+    def run_fio_test(self, node, device=None, directory=None, log_file=None, **kwargs):
+        """
+        Start FIO in a detached tmux session so it survives SSH channel drops during fast outages.
+        Verifies process presence and re-kicks a few times if missing.
         """
         location = ""
         if device:
@@ -377,72 +757,63 @@ def run_fio_test(self, node, device=None, directory=None, log_file=None, **kwarg
         if directory:
             location = f"--directory={directory}"
 
-        runtime = kwargs.get("runtime", 3600)
-        rw = kwargs.get("rw", "randrw")
-        name = kwargs.get("name", "test")
-        ioengine = kwargs.get("ioengine", "libaio")
-        iodepth = kwargs.get("iodepth", 1)
-        bs = kwargs.get("bs", "4k")
-        rwmixread = kwargs.get("rwmixread", 70)
-        size = kwargs.get("size", "10MiB")
-        time_based = "--time_based" if kwargs.get("time_based", True) else ""
-        numjobs = kwargs.get("numjobs", 1)
-        nrfiles = kwargs.get("nrfiles", 1)
-
-        output_format = f' --output-format={kwargs["output_format"]} ' if kwargs.get("output_format") else ''
+        runtime     = kwargs.get("runtime", 3600)
+        name        = kwargs.get("name", f"fio_{_rid(6)}")
+        ioengine    = kwargs.get("ioengine", "libaio")
+        iodepth     = kwargs.get("iodepth", 1)
+        time_based  = "--time_based" if kwargs.get("time_based", True) else ""
+        rw          = kwargs.get("rw", "randrw")
+        bs          = kwargs.get("bs", "4K")
+        size        = kwargs.get("size", "1G")
+        rwmixread   = kwargs.get("rwmixread", 70)
+        numjobs     = kwargs.get("numjobs", 2)
+        nrfiles     = kwargs.get("nrfiles", 8)
+        log_avg_ms  = kwargs.get("log_avg_msec", 1000)
+        max_latency  = kwargs.get("max_latency", "20s")
+        use_latency = kwargs.get("use_latency", True)
+        output_fmt  = f' --output-format={kwargs["output_format"]} ' if kwargs.get("output_format") else ''
         output_file = f" --output={kwargs['output_file']} " if kwargs.get("output_file") else ''
+        iolog_base  = kwargs.get("iolog_file")
 
-        log_avg_msec = kwargs.get("log_avg_msec", 1000)
-        log_avg_msec_opt = f"--log_avg_msec={log_avg_msec}" if log_avg_msec else ""
-
-        iolog_base = kwargs.get("iolog_file", None)
-        iolog_opt = f"--write_iolog={iolog_base}" if iolog_base else ""
-        verify_md5 = "--verify=md5" if iodepth == 1 else ""
+        iolog_opt   = f"--write_iolog={iolog_base}" if iolog_base else ""
+        log_opt     = f"--log_avg_msec={log_avg_ms}" if log_avg_ms else ""
+        latency = f" --max_latency={max_latency}" if use_latency else ""
 
-        command = (
-            f"sudo fio --name={name} {location} --ioengine={ioengine} --direct=1 --iodepth={iodepth} "
-            f"{time_based} --runtime={runtime} --rw={rw} --max_latency=30s --bs={bs} --size={size} --rwmixread={rwmixread} "
-            f"{verify_md5} --verify_dump=1 --verify_fatal=1 --numjobs={numjobs} --nrfiles={nrfiles} "
-            f"{log_avg_msec_opt} {iolog_opt} "
-            f"{output_format}{output_file}"
-        )
-        # timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        # log_file = log_file or f"/tmp/{name}_{timestamp}.log"
+        # raw fio command
+        fio_cmd = (
+            f"fio --name={name} {location} --ioengine={ioengine} --direct=1 --iodepth={iodepth} "
+            f"{time_based} --runtime={runtime} --rw={rw} {latency} --bs={bs} --size={size} --rwmixread={rwmixread} "
+            f"--verify=md5 --verify_dump=1 --verify_fatal=1 --numjobs={numjobs} --nrfiles={nrfiles} "
+            f"{log_opt} {iolog_opt} {output_fmt}{output_file}"
+        ).strip()
 
         if kwargs.get("debug"):
-            command += " --debug=all"
+            fio_cmd += " --debug=all"
 
+        # run fio under tmux so HUP/SSH channel drops don't kill it
+        session = f"fio_{name}"
         if log_file:
-            command += f" > {log_file} 2>&1"
-        
-        # else:
-        #     command += " --debug=verify"
-        
-        # awk_ts = " | awk '{ print strftime(\"[%Y-%m-%d %H:%M:%S]\"), $0; fflush(); }' | "
-        # command += awk_ts
-        # command += f"tee {log_file}"
-
-        self.logger.info(f"Executing FIO command:\n{command}")
+            fio_cmd = f"{fio_cmd} > {log_file} 2>&1"
+
+        start_cmd = f"sudo tmux new-session -d -s {session} \"{fio_cmd}\" || sudo tmux kill-session -t {session} 2>/dev/null || true; sudo tmux new-session -d -s {session} \"{fio_cmd}\""
+        self.logger.info(f"Starting FIO on {node}: {name} in tmux session '{session}'")
+        self.exec_command(node=node, command=start_cmd, max_retries=2)
+
+        # Ensure process is up: check tmux & pgrep
+        for i in range(8):
+            out, _ = self.exec_command(node=node, command=f"pgrep -fa 'fio.*{name}' || true", max_retries=1, supress_logs=True)
+            tmux_ok, _ = self.exec_command(node=node, command=f"sudo tmux has-session -t {session} 2>/dev/null || echo MISSING", max_retries=1, supress_logs=True)
+            if out.strip() and "MISSING" not in tmux_ok:
+                self.logger.info(f"FIO is running for {name}: {out.strip().splitlines()[0]}")
+                return
+            if i >= 2:
+                self.logger.warning(f"FIO not detected yet for {name}; re-issuing start (try {i-1}/5)")
+                self.exec_command(node=node, command=start_cmd, max_retries=1, supress_logs=True)
+            time.sleep(2 + i)
 
-        start_time = time.time()
-        output, error = self.exec_command(node=node, command=command, timeout=runtime * 2)
-        end_time = time.time()
-
-        total_time = end_time - start_time
-        self.fio_runtime[name] = start_time
-        self.logger.info(f"Total time taken to run the command: {total_time:.2f} seconds")
-
-        # Return all generated iolog files (one per job)
-        iolog_files = [f"{iolog_base}.{i}" for i in range(numjobs)]
-        return {
-            "output": output,
-            "error": error,
-            "start_time": start_time,
-            "end_time": end_time,
-            "iolog_files": iolog_files,
-        }
+        raise RuntimeError(f"FIO failed to stay running for job {name} on {node}")
 
-    
+        
     def find_process_name(self, node, process_name, return_pid=False):
         if return_pid:
             command = "ps -ef | grep -i '%s' | awk '{print $2}'" % process_name
@@ -700,15 +1071,35 @@ def get_lvol_id(self, node, lvol_name):
         return output.strip().split()
     
     def get_snapshot_id(self, node, snapshot_name):
-        cmd = "%s snapshot list | grep -i '%s ' | awk '{print $2}'" % (self.base_cmd, snapshot_name)
-        output, error = self.exec_command(node=node, command=cmd)
+        start = time.time()
+        deadline = start + 600  # 10 minutes
+        wait_interval = 10       # seconds between checks
+        snapshot_id = ""
+
+        while time.time() < deadline:
+            cmd = "%s snapshot list | grep -i '%s ' | awk '{print $2}'" % (self.base_cmd, snapshot_name)
+            output, error = self.exec_command(node=node, command=cmd)
+            if output.strip():
+                if hasattr(self, "logger"):
+                    self.logger.info(f"Snapshot '{snapshot_name}' is visible with ID: {snapshot_id}")
+                break
+            time.sleep(wait_interval)
+
+        if not output.strip():
+            if hasattr(self, "logger"):
+                self.logger.error(f"Timed out waiting for snapshot '{snapshot_name}' to appear within 10 minutes.")
 
         return output.strip()
 
     def add_snapshot(self, node, lvol_id, snapshot_name):
         cmd = f"{self.base_cmd} -d snapshot add {lvol_id} {snapshot_name}"
         output, error = self.exec_command(node=node, command=cmd)
-        return output, error
+
+        snapshot_id = self.get_snapshot_id(node=node, snapshot_name=snapshot_name)
+
+        if not snapshot_id:
+            if hasattr(self, "logger"):
+                self.logger.error(f"Timed out waiting for snapshot '{snapshot_name}' to appear within 10 minutes.")
     
     def add_clone(self, node, snapshot_id, clone_name):
         cmd = f"{self.base_cmd} -d snapshot clone {snapshot_id} {clone_name}"
@@ -971,30 +1362,81 @@ def get_active_interfaces(self, node_ip):
             return []
         
 
-    def disconnect_all_active_interfaces(self, node_ip, interfaces, reconnect_time=300):
-        """
-        Disconnect all active network interfaces on a node in a single SSH call.
+    # def disconnect_all_active_interfaces(self, node_ip, interfaces, reconnect_time=300):
+    #     """
+    #     Disconnect all active network interfaces on a node in a single SSH call.
+
+    #     Args:
+    #         node_ip (str): IP of the target node.
+    #         interfaces (list): List of active network interfaces to disconnect.
+    #     """
+    #     if not interfaces:
+    #         self.logger.warning(f"No active interfaces to disconnect on node {node_ip}.")
+    #         return
+
+    #     # Combine disconnect commands for all interfaces
+    #     disconnect_cmds = " && ".join([f"sudo nmcli connection down {iface}" for iface in interfaces])
+    #     reconnect_cmds = " && ".join([f"sudo nmcli connection up {iface}" for iface in interfaces])
+
+    #     cmd = (
+    #         f'nohup sh -c "{disconnect_cmds} && sleep {reconnect_time} && {reconnect_cmds}" &'
+    #     )
+    #     self.logger.info(f"Executing combined disconnect command on node {node_ip}: {cmd}")
+    #     try:
+    #         self.exec_command(node_ip, cmd)
+    #     except Exception as e:
+    #         self.logger.error(f"Failed to execute combined disconnect command on {node_ip}: {e}")
+
+    def _ping_once(self, ip: str, count: int = 1, wait: int = 1) -> bool:
+        try:
+            # Use system ping; True means "ping success"
+            res = subprocess.run(["ping", "-c", str(count), "-W", str(wait), ip],
+                                 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            return res.returncode == 0
+        except Exception:
+            return False
 
-        Args:
-            node_ip (str): IP of the target node.
-            interfaces (list): List of active network interfaces to disconnect.
+    def disconnect_all_active_interfaces(
+        self,
+        node_ip: str,
+        interfaces: list[str],
+        duration_secs: int = 300,
+        max_tries: int = 3,
+    ):
+        """
+        Bring all given interfaces DOWN, verify outage by ping, keep for duration, then bring them UP.
+        Fire-and-forget style; robust against brief SSH flaps.
         """
         if not interfaces:
-            self.logger.warning(f"No active interfaces to disconnect on node {node_ip}.")
+            self.logger.info(f"No active interfaces provided for {node_ip}; skipping NIC down.")
             return
 
-        # Combine disconnect commands for all interfaces
-        disconnect_cmds = " && ".join([f"sudo nmcli connection down {iface}" for iface in interfaces])
-        reconnect_cmds = " && ".join([f"sudo nmcli connection up {iface}" for iface in interfaces])
+        down_cmd = " && ".join([f"nmcli connection down {i}" for i in interfaces])
+        up_cmd   = " && ".join([f"nmcli connection up {i}" for i in interfaces])
+        cmd = f'nohup sh -c "{down_cmd} && sleep {duration_secs} && {up_cmd}" &'
 
-        cmd = (
-            f'nohup sh -c "{disconnect_cmds} && sleep {reconnect_time} && {reconnect_cmds}" &'
-        )
-        self.logger.info(f"Executing combined disconnect command on node {node_ip}: {cmd}")
         try:
-            self.exec_command(node_ip, cmd)
+            self.logger.info(f"Executing combined disconnect command on node {node_ip}: {cmd}")
+            out, err = self.exec_command(node=node_ip, command=cmd, max_retries=1, timeout=20)
+            if err:
+                raise Exception(err)
         except Exception as e:
-            self.logger.error(f"Failed to execute combined disconnect command on {node_ip}: {e}")
+            self.logger.info(f"Command: {cmd}, error: {e}! Checking pings!!")
+
+        # Verify outage begins (best-effort). If ping still works, attempt to issue 'down' again.
+        time.sleep(5)
+        tries = 0
+        attempts = 10
+        while self._ping_once(node_ip) and attempts > 0:
+            tries += 1
+            if tries >= max_tries:
+                self.logger.warning(f"Ping to {node_ip} still responding after NIC down attempts; continuing anyway.")
+                break
+            self.logger.info(f"Ping to {node_ip} still alive; retrying NIC down...")
+            # re-run only the DOWN part (don’t append sleep again to avoid stacking)
+            self.exec_command(node=node_ip, command=cmd, max_retries=2)
+            time.sleep(3)
+            attempts -= 1
 
     def check_tmux_installed(self, node_ip):
         """Check tmux installation
@@ -1420,132 +1862,263 @@ def dump_lvstore(self, node_ip, storage_node_id):
             self.logger.error(f"Failed to dump lvstore on {node_ip}: {e}")
             return None
         
-    def fetch_distrib_logs(self, storage_node_ip, storage_node_id, logs_path):
-        """
-        Fetch distrib names using bdev_get_bdevs RPC, generate and execute RPC JSON,
-        and copy logs from SPDK container.
+    # def fetch_distrib_logs(self, storage_node_ip, storage_node_id, logs_path):
+    #     """
+    #     Fetch distrib names using bdev_get_bdevs RPC, generate and execute RPC JSON,
+    #     and copy logs from SPDK container.
+
+    #     Args:
+    #         storage_node_ip (str): IP of the storage node
+    #         storage_node_id (str): ID of the storage node
+    #     """
+    #     self.logger.info(f"Fetching distrib logs for Storage Node ID: {storage_node_id} on {storage_node_ip}")
+
+    #     # Step 1: Find the SPDK container
+    #     find_container_cmd = "sudo docker ps --format '{{.Names}}' | grep -E '^spdk_[0-9]+$'"
+    #     container_name_output, _ = self.exec_command(storage_node_ip, find_container_cmd)
+    #     container_name = container_name_output.strip()
+
+    #     if not container_name:
+    #         self.logger.warning(f"No SPDK container found on {storage_node_ip}")
+    #         return
+
+    #     # Step 2: Get bdev_get_bdevs output
+    #     # bdev_cmd = f"sudo docker exec {container_name} bash -c 'python spdk/scripts/rpc.py bdev_get_bdevs'"
+    #     # bdev_output, error = self.exec_command(storage_node_ip, bdev_cmd)
+
+    #     # if error:
+    #     #     self.logger.error(f"Error running bdev_get_bdevs: {error}")
+    #     #     return
+
+    #     # # Step 3: Save full output to local file
+    #     # timestamp = datetime.now().strftime("%d-%m-%y-%H-%M-%S")
+    #     # raw_output_path = f"{Path.home()}/bdev_output_{storage_node_ip}_{timestamp}.json"
+    #     # with open(raw_output_path, "w") as f:
+    #     #     f.write(bdev_output)
+    #     # self.logger.info(f"Saved raw bdev_get_bdevs output to {raw_output_path}")
+
+    #     timestamp = datetime.now().strftime("%d-%m-%y-%H-%M-%S")
+    #     base_path = f"{logs_path}/{storage_node_ip}/distrib_bdev_logs/"
+
+    #     cmd = f"sudo mkdir -p '{base_path}'"
+    #     self.exec_command(storage_node_ip, cmd)
+
+    #     remote_output_path = f"bdev_output_{storage_node_ip}_{timestamp}.json"
+
+    #     # 1. Run to capture output into a variable (for parsing)
+    #     bdev_cmd = f"sudo docker exec {container_name} bash -c 'python spdk/scripts/rpc.py -s /mnt/ramdisk/{container_name}/spdk.sock bdev_get_bdevs'"
+    #     bdev_output, error = self.exec_command(storage_node_ip, bdev_cmd)
+
+    #     if error:
+    #         self.logger.error(f"Error running bdev_get_bdevs: {error}")
+    #         return
+
+    #     # 2. Run again to save output on host machine (audit trail)
+    #     bdev_save_cmd = (
+    #         f"sudo bash -c \"docker exec {container_name} python spdk/scripts/rpc.py -s /mnt/ramdisk/{container_name}/spdk.sock bdev_get_bdevs > {remote_output_path}\"")
+
+    #     self.exec_command(storage_node_ip, bdev_save_cmd)
+    #     self.logger.info(f"Saved bdev_get_bdevs output to {remote_output_path} on {storage_node_ip}")
+
+
+    #     # Step 4: Extract unique distrib names
+    #     try:
+    #         bdevs = json.loads(bdev_output)
+    #         distribs = list({bdev['name'] for bdev in bdevs if bdev['name'].startswith('distrib_')})
+    #     except json.JSONDecodeError as e:
+    #         self.logger.error(f"JSON parsing failed: {e}")
+    #         return
+
+    #     if not distribs:
+    #         self.logger.warning("No distrib names found in bdev_get_bdevs output.")
+    #         return
+
+    #     self.logger.info(f"Distributions found: {distribs}")
+
+    #     # Step 5: Process each distrib
+    #     for distrib in distribs:
+    #         self.logger.info(f"Processing distrib: {distrib}")
+    #         rpc_json = {
+    #             "subsystems": [
+    #                 {
+    #                     "subsystem": "distr",
+    #                     "config": [
+    #                         {
+    #                             "method": "distr_debug_placement_map_dump",
+    #                             "params": {"name": distrib}
+    #                         }
+    #                     ]
+    #                 }
+    #             ]
+    #         }
+
+    #         rpc_json_str = json.dumps(rpc_json)
+    #         remote_json_path = "/tmp/stack.json"
+
+    #         # Save JSON file remotely
+    #         create_json_command = f"echo '{rpc_json_str}' | sudo tee {remote_json_path}"
+    #         self.exec_command(storage_node_ip, create_json_command)
+
+    #         # Copy into container
+    #         copy_json_command = f"sudo docker cp {remote_json_path} {container_name}:{remote_json_path}"
+    #         self.exec_command(storage_node_ip, copy_json_command)
+
+    #         # Run RPC inside container
+    #         rpc_command = f"sudo docker exec {container_name} bash -c 'python scripts/rpc_sock.py {remote_json_path} /mnt/ramdisk/{container_name}/spdk.sock'"
+    #         self.exec_command(storage_node_ip, rpc_command)
+
+    #         # Find and copy log
+    #         find_log_command = f"sudo docker exec {container_name} ls /tmp/ | grep {distrib}"
+    #         log_file_name, _ = self.exec_command(storage_node_ip, find_log_command)
+    #         log_file_name = log_file_name.strip().replace("\r", "").replace("\n", "")
+
+    #         if not log_file_name:
+    #             self.logger.error(f"No log file found for distrib {distrib}.")
+    #             continue
+
+    #         log_file_path = f"/tmp/{log_file_name}"
+    #         local_log_path = f"{base_path}/{log_file_name}_{storage_node_ip}_{timestamp}"
+    #         copy_log_cmd = f"sudo docker cp {container_name}:{log_file_path} {local_log_path}"
+    #         self.exec_command(storage_node_ip, copy_log_cmd)
+
+    #         self.logger.info(f"Fetched log for {distrib}: {local_log_path}")
+
+    #         # Clean up
+    #         delete_log_cmd = f"sudo docker exec {container_name} rm -f {log_file_path}"
+    #         self.exec_command(storage_node_ip, delete_log_cmd)
+
+    #     self.logger.info("All distrib logs retrieved successfully.")
 
-        Args:
-            storage_node_ip (str): IP of the storage node
-            storage_node_id (str): ID of the storage node
-        """
+    def fetch_distrib_logs(self, storage_node_ip, storage_node_id, logs_path):
         self.logger.info(f"Fetching distrib logs for Storage Node ID: {storage_node_id} on {storage_node_ip}")
 
-        # Step 1: Find the SPDK container
-        find_container_cmd = "sudo docker ps --format '{{.Names}}' | grep -E '^spdk_[0-9]+$'"
-        container_name_output, _ = self.exec_command(storage_node_ip, find_container_cmd)
-        container_name = container_name_output.strip()
-
+        # 0) Find SPDK container name
+        find_container_cmd = "sudo docker ps --format '{{.Names}}' | grep -E '^spdk_[0-9]+$' || true"
+        container_name_out, _ = self.exec_command(storage_node_ip, find_container_cmd)
+        container_name = (container_name_out or "").strip()
         if not container_name:
             self.logger.warning(f"No SPDK container found on {storage_node_ip}")
             return
 
-        # Step 2: Get bdev_get_bdevs output
-        # bdev_cmd = f"sudo docker exec {container_name} bash -c 'python spdk/scripts/rpc.py bdev_get_bdevs'"
-        # bdev_output, error = self.exec_command(storage_node_ip, bdev_cmd)
-
-        # if error:
-        #     self.logger.error(f"Error running bdev_get_bdevs: {error}")
-        #     return
-
-        # # Step 3: Save full output to local file
-        # timestamp = datetime.now().strftime("%d-%m-%y-%H-%M-%S")
-        # raw_output_path = f"{Path.home()}/bdev_output_{storage_node_ip}_{timestamp}.json"
-        # with open(raw_output_path, "w") as f:
-        #     f.write(bdev_output)
-        # self.logger.info(f"Saved raw bdev_get_bdevs output to {raw_output_path}")
-
-        timestamp = datetime.now().strftime("%d-%m-%y-%H-%M-%S")
-        base_path = f"{logs_path}/{storage_node_ip}/distrib_bdev_logs/"
-
-        cmd = f"sudo mkdir -p '{base_path}'"
-        self.exec_command(storage_node_ip, cmd)
-
-        remote_output_path = f"bdev_output_{storage_node_ip}_{timestamp}.json"
-
-        # 1. Run to capture output into a variable (for parsing)
-        bdev_cmd = f"sudo docker exec {container_name} bash -c 'python spdk/scripts/rpc.py bdev_get_bdevs'"
-        bdev_output, error = self.exec_command(storage_node_ip, bdev_cmd)
-
-        if error:
-            self.logger.error(f"Error running bdev_get_bdevs: {error}")
+        # 1) Get bdevs via correct sock
+        timestamp = datetime.now().strftime("%Y%m%d_%H-%M-%S")
+        base_path = f"{logs_path}/{storage_node_ip}/distrib_bdev_logs"
+        self.exec_command(storage_node_ip, f"sudo mkdir -p '{base_path}' && sudo chmod -R 777 '{base_path}'")
+        bdev_cmd = (
+            f"sudo docker exec {container_name} bash -lc "
+            f"\"python spdk/scripts/rpc.py -s /mnt/ramdisk/{container_name}/spdk.sock bdev_get_bdevs\""
+        )
+        bdev_output, bdev_err = self.exec_command(storage_node_ip, bdev_cmd)
+        if (bdev_err and bdev_err.strip()) and not bdev_output:
+            self.logger.error(f"bdev_get_bdevs error on {storage_node_ip}: {bdev_err.strip()}")
             return
 
-        # 2. Run again to save output on host machine (audit trail)
-        bdev_save_cmd = (
-            f"sudo bash -c \"docker exec {container_name} python spdk/scripts/rpc.py bdev_get_bdevs > {remote_output_path}\"")
-
-        self.exec_command(storage_node_ip, bdev_save_cmd)
-        self.logger.info(f"Saved bdev_get_bdevs output to {remote_output_path} on {storage_node_ip}")
-
-
-        # Step 4: Extract unique distrib names
+        # Parse distrib names
         try:
             bdevs = json.loads(bdev_output)
-            distribs = list({bdev['name'] for bdev in bdevs if bdev['name'].startswith('distrib_')})
+            distribs = sorted({
+                b.get("name", "")
+                for b in bdevs
+                if isinstance(b, dict) and str(b.get("name","")).startswith("distrib_")
+            })
         except json.JSONDecodeError as e:
-            self.logger.error(f"JSON parsing failed: {e}")
+            self.logger.error(f"JSON parsing failed on {storage_node_ip}: {e}")
             return
-
         if not distribs:
-            self.logger.warning("No distrib names found in bdev_get_bdevs output.")
+            self.logger.warning(f"No distrib_* bdevs found on {storage_node_ip}.")
+            return
+        self.logger.info(f"[{storage_node_ip}] Distributions: {distribs}")
+
+        # 2) Run multiple docker exec in parallel from ONE SSH exec
+        distrib_list_str = " ".join(shlex.quote(d) for d in distribs)
+        remote_tar = f"/tmp/distrib_logs_{timestamp}.tar.gz"
+
+        # IMPORTANT: This script runs on the HOST and spawns many `docker exec ... &` in parallel.
+        # It throttles with MAXJ, waits, then tars outputs from /tmp inside the container into one tarball on the host.
+        remote_script = f"""\
+set -euo pipefail
+CN={shlex.quote(container_name)}
+SOCK="/mnt/ramdisk/$CN/spdk.sock"
+TS="{timestamp}"
+MAXJ=8
+WORKDIR_HOST="{base_path}"
+mkdir -p "$WORKDIR_HOST"
+
+# Make a temporary host folder to collect per-distrib files copied out of the container
+HOST_STAGING="/tmp/distrib_host_collect_$TS"
+mkdir -p "$HOST_STAGING"
+
+pids=()
+
+for d in {distrib_list_str}; do
+  (
+    # Build JSON on host then copy into container (avoids many ssh execs)
+    JF="/tmp/stack_${{d}}.json"
+    cat > "$JF" <<'EOF_JSON'
+{{
+  "subsystems": [
+    {{
+      "subsystem": "distr",
+      "config": [
+        {{
+          "method": "distr_debug_placement_map_dump",
+          "params": {{"name": "__DIST__"}}
+        }}
+      ]
+    }}
+  ]
+}}
+EOF_JSON
+    # substitute distrib name
+    sed -i "s/__DIST__/$d/g" "$JF"
+
+    # Copy JSON into container
+    sudo docker cp "$JF" "$CN:/tmp/stack_${{d}}.json"
+
+    # Run rpc inside container (socket path respected)
+    sudo docker exec "$CN" bash -lc "python scripts/rpc_sock.py /tmp/stack_${{d}}.json {shlex.quote('/mnt/ramdisk/'+container_name+'/spdk.sock')} > /tmp/rpc_${{d}}.log 2>&1 || true"
+
+    # Copy any files for this distrib out to host staging (rpc log + any matching /tmp/*d*)
+    sudo docker cp "$CN:/tmp/rpc_${{d}}.log" "$HOST_STAGING/rpc_${{d}}.log" 2>/dev/null || true
+    # try to pull any distrib-related artifacts
+    for f in $(sudo docker exec "$CN" bash -lc "ls /tmp/ 2>/dev/null | grep -F \"$d\" || true"); do
+      sudo docker cp "$CN:/tmp/$f" "$HOST_STAGING/$f" 2>/dev/null || true
+    done
+
+    # cleanup container temp for this distrib
+    sudo docker exec "$CN" bash -lc "rm -f /tmp/stack_${{d}}.json /tmp/rpc_${{d}}.log" || true
+    rm -f "$JF" || true
+  ) &
+
+  # throttle parallel jobs
+  while [ "$(jobs -rp | wc -l)" -ge "$MAXJ" ]; do sleep 0.2; done
+done
+
+# Wait for all background jobs
+wait
+
+# Tar once on host
+tar -C "$HOST_STAGING" -czf {shlex.quote(remote_tar)} . 2>/dev/null || true
+
+# Move artifacts to final location
+mv -f {shlex.quote(remote_tar)} "$WORKDIR_HOST/" || true
+
+# Also copy loose files (for convenience) then clean staging
+cp -rf "$HOST_STAGING"/. "$WORKDIR_HOST"/ 2>/dev/null || true
+rm -rf "$HOST_STAGING" || true
+
+echo "$WORKDIR_HOST/{os.path.basename(remote_tar)}"
+"""
+
+        run_many_cmd = "bash -lc " + shlex.quote(remote_script)
+        tar_out, tar_err = self.exec_command(storage_node_ip, run_many_cmd)
+        if (tar_err and tar_err.strip()) and not tar_out:
+            self.logger.error(f"[{storage_node_ip}] Parallel docker-exec script error: {tar_err.strip()}")
             return
 
-        self.logger.info(f"Distributions found: {distribs}")
-
-        # Step 5: Process each distrib
-        for distrib in distribs:
-            self.logger.info(f"Processing distrib: {distrib}")
-            rpc_json = {
-                "subsystems": [
-                    {
-                        "subsystem": "distr",
-                        "config": [
-                            {
-                                "method": "distr_debug_placement_map_dump",
-                                "params": {"name": distrib}
-                            }
-                        ]
-                    }
-                ]
-            }
-
-            rpc_json_str = json.dumps(rpc_json)
-            remote_json_path = "/tmp/stack.json"
-
-            # Save JSON file remotely
-            create_json_command = f"echo '{rpc_json_str}' | sudo tee {remote_json_path}"
-            self.exec_command(storage_node_ip, create_json_command)
-
-            # Copy into container
-            copy_json_command = f"sudo docker cp {remote_json_path} {container_name}:{remote_json_path}"
-            self.exec_command(storage_node_ip, copy_json_command)
-
-            # Run RPC inside container
-            rpc_command = f"sudo docker exec {container_name} bash -c 'python scripts/rpc_sock.py {remote_json_path}'"
-            self.exec_command(storage_node_ip, rpc_command)
-
-            # Find and copy log
-            find_log_command = f"sudo docker exec {container_name} ls /tmp/ | grep {distrib}"
-            log_file_name, _ = self.exec_command(storage_node_ip, find_log_command)
-            log_file_name = log_file_name.strip().replace("\r", "").replace("\n", "")
-
-            if not log_file_name:
-                self.logger.error(f"No log file found for distrib {distrib}.")
-                continue
-
-            log_file_path = f"/tmp/{log_file_name}"
-            local_log_path = f"{base_path}/{log_file_name}_{storage_node_ip}_{timestamp}"
-            copy_log_cmd = f"sudo docker cp {container_name}:{log_file_path} {local_log_path}"
-            self.exec_command(storage_node_ip, copy_log_cmd)
-
-            self.logger.info(f"Fetched log for {distrib}: {local_log_path}")
-
-            # Clean up
-            delete_log_cmd = f"sudo docker exec {container_name} rm -f {log_file_path}"
-            self.exec_command(storage_node_ip, delete_log_cmd)
+        final_tar = (tar_out or "").strip().splitlines()[-1] if tar_out else f"{base_path}/{os.path.basename(remote_tar)}"
+        self.logger.info(f"[{storage_node_ip}] Distrib logs saved: {base_path} (tar: {final_tar})")
 
-        self.logger.info("All distrib logs retrieved successfully.")
 
     def clone_mount_gen_uuid(self, node, device):
         """Repair the XFS filesystem and generate a new UUID.
@@ -1722,8 +2295,8 @@ def start_netstat_dmesg_logging(self, node_ip, log_dir):
 
         self.exec_command(node_ip, f"sudo tmux new-session -d -s netstat_log 'bash -c \"while true; do netstat -s | grep \\\"segments dropped\\\" >> {netstat_log}; sleep 5; done\"'")
         self.exec_command(node_ip, f"sudo tmux new-session -d -s dmesg_log 'bash -c \"while true; do sudo dmesg | grep -i \\\"tcp\\\" >> {dmesg_log}; sleep 5; done\"'")
-        self.exec_command(node_ip, f"sudo tmux new-session -d -s journalctl_log 'bash -c \"while true; do sudo journalctl -k | grep -i \\\"tcp\\\" >> {journalctl_log}; sleep 5; done\"'")
-
+        self.exec_command(node_ip, f"sudo tmux new-session -d -s journalctl_log 'bash -c \"while true; do sudo journalctl -k --no-tail | grep -i \\\"tcp\\\" >> {journalctl_log}; sleep 5; done\"'")
+                
     def reset_iptables_in_spdk(self, node_ip):
         """
         Resets iptables rules inside the SPDK container on a given node.
@@ -1915,6 +2488,7 @@ def start_resource_monitors(self, node_ip, log_dir):
         root_log = f"{log_dir}/root_partition_usage_{node_ip}_{timestamp}.txt"
         docker_mem_log = f"{log_dir}/docker_mem_usage_{node_ip}_{timestamp}.txt"
         system_mem_log = f"{log_dir}/system_memory_usage_{node_ip}_{timestamp}.txt"
+        docker_stats_logs = f"{log_dir}/docker_stats_usage_{node_ip}_{timestamp}.txt"
 
         # Ensure log directory exists and is writable
         self.exec_command(node_ip, f"sudo mkdir -p {log_dir} && sudo chmod 777 {log_dir}")
@@ -1939,14 +2513,29 @@ def start_resource_monitors(self, node_ip, log_dir):
         'bash -c "while true; do date >> {system_mem_log}; free -h >> {system_mem_log}; echo >> {system_mem_log}; sleep 10; done"'
         """
 
+        docker_stats_cmd = f"""
+        sudo tmux new-session -d -s docker_stats_all \
+        'bash -c "while true; do date >> {docker_stats_logs}; docker stats --no-stream >> {docker_stats_logs}; echo >> {docker_stats_logs}; sleep 10; done"'
+        """
+
         self.exec_command(node_ip, df_cmd)
         self.exec_command(node_ip, docker_cmd)
         self.exec_command(node_ip, system_cmd)
+        self.exec_command(node_ip, docker_stats_cmd)
 
-        self.logger.info(f"Started root partition, container memory, and system memory logging on {node_ip}")
+        self.logger.info(f"Started root partition, container memory, docker stats and system memory logging on {node_ip}")
+    
+    def cluster_list(self, node_ip, cluster_id):
+        """Sets cluster in suspended state
 
+        Args:
+            node_ip (str): Mgmt Node IP to run command on
+            cluster_id (str): Cluster id to put in suspended state
+        """
+        cmd = f"{self.base_cmd} cluster list"
+        output, _ = self.exec_command(node_ip, cmd)
+        return output.strip()
 
-    
     def suspend_cluster(self, node_ip, cluster_id):
         """Sets cluster in suspended state
 
@@ -1995,7 +2584,7 @@ def ensure_nfs_mounted(self, node, nfs_server, nfs_path, mount_point, is_local =
         """
         check_cmd = f"mount | grep -w '{mount_point}'"
         mount_cmd = f"sudo mkdir -p {mount_point} && sudo mount -t nfs {nfs_server}:{nfs_path} {mount_point}"
-        install_check_cmd = "dnf list installed nfs-util"
+        install_check_cmd = "dnf list installed nfs-utils"
         install_cmd = "sudo dnf install -y nfs-utils"
 
         try:
@@ -2300,3 +2889,10 @@ def stop_log_monitor(self):
             self._monitor_stop_flag.set()
             self._monitor_thread.join(timeout=10)
             print("K8s log monitor thread stopped.")
+
+def _rid(n=6):
+    import string
+    import random
+    letters = string.ascii_uppercase
+    digits = string.digits
+    return random.choice(letters) + ''.join(random.choices(letters + digits, k=n-1))
diff --git a/requirements.txt b/requirements.txt
index 030cca8e0..9ee458f00 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,4 @@ flask-openapi3
 jsonschema
 fastapi
 uvicorn
+prometheus_api_client
\ No newline at end of file
diff --git a/simplyblock_cli/cli-reference.yaml b/simplyblock_cli/cli-reference.yaml
index 59357ce89..d5d889a55 100644
--- a/simplyblock_cli/cli-reference.yaml
+++ b/simplyblock_cli/cli-reference.yaml
@@ -47,7 +47,7 @@ commands:
               _150 TiB / 3 * 2 = 100TiB_ would be a safe choice.
             dest: max_prov
             type: str
-            required: true
+            required: false
           - name: "--nodes-per-socket"
             help: "number of each node to be added per each socket."
             dest: nodes_per_socket
@@ -60,6 +60,16 @@ commands:
             dest: sockets_to_use
             type: str
             default: "0"
+          - name: "--cores-percentage"
+            help: "The percentage of cores to be used for spdk (0-99)"
+            description: >
+              The percentage of cores to be used for spdk (0-99)
+            dest: cores_percentage
+            type:
+              range:
+                min: 0
+                max: 99
+            default: 0
           - name: "--pci-allowed"
             help: "Comma separated list of PCI addresses of Nvme devices to use for storage devices."
             description: >
@@ -76,6 +86,35 @@ commands:
             required: false
             type: str
             default: ""
+          - name: "--device-model"
+            help: "NVMe SSD model string, example: --model PM1628, --device-model and --size-range must be set together"
+            description: >
+              NVMe SSD model string, example: --model PM1628,  --device-model and --size-range must be set together
+            dest: device_model
+            required: false
+            type: str
+            default: ""
+          - name: "--size-range"
+            help: "NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987, --device-model and --size-range must be set together"
+            description: >
+              NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987,  --device-model and --size-range must be set together
+            dest: size_range
+            required: false
+            type: str
+            default: ""
+          - name: "--nvme-names"
+            help: "Comma separated list of nvme namespace names like nvme0n1,nvme1n1..."
+            description: >
+              Comma separated list of nvme namespace names like nvme0n1,nvme1n1...
+            dest: nvme_names
+            required: false
+            type: str
+            default: ""
+          - name: "--force"
+            help: "Force format detected or passed nvme pci address to 4K and clean partitions"
+            dest: force
+            type: bool
+            action: store_true
       - name: configure-upgrade
         help: "Upgrade the automated configuration file with new changes of cpu mask or storage devices"
         description: >
@@ -86,6 +125,19 @@ commands:
         description: >
           Run locally on storage nodes and control plane hosts. Remove a previous deployment to support a fresh
           scratch-deployment of cluster software.
+
+      - name: clean-devices
+        help: "clean devices stored in /etc/simplyblock/sn_config_file (local run)"
+        description: >
+          Run locally on storage nodes to clean nvme devices and free them.
+        arguments:
+          - name: "--config-path"
+            help: "Config path to read stored nvme devices from"
+            dest: config_path
+            required: false
+            type: str
+            default: "/etc/simplyblock/sn_config_file"
+
       - name: add-node
         help: "Adds a storage node by its IP address"
         arguments:
@@ -106,6 +158,11 @@ commands:
             dest: partitions
             type: int
             default: 1
+          - name: "--format-4k"
+            help: "Force format nvme devices with 4K"
+            dest: format_4k
+            type: bool
+            action: store_true
           - name: "--jm-percent"
             help: "Number in percent to use for JM from each device"
             dest: jm_percent
@@ -437,16 +494,6 @@ commands:
             help: "Device id"
             dest: device_id
             type: str
-      - name: reset-device
-        help: "Resets a storage device"
-        usage: >
-          Hardware device reset. Resetting the device can return the device from an unavailable into online state, if
-          successful.
-        arguments:
-          - name: "device_id"
-            help: "Device id"
-            dest: device_id
-            type: str
       - name: restart-device
         help: "Restarts a storage device"
         usage: >
@@ -458,6 +505,11 @@ commands:
             help: "Device id"
             dest: device_id
             type: str
+          - name: "--force"
+            help: "Force remove"
+            dest: force
+            type: bool
+            action: store_true
       - name: add-device
         help: "Adds a new storage device"
         usage: >
@@ -603,6 +655,11 @@ commands:
             dest: force
             type: bool
             action: store_true
+          - name: "--format"
+            help: "Format the Alceml device used for JM device"
+            dest: format
+            type: bool
+            action: store_true
       - name: send-cluster-map
         help: "Sends a new cluster map"
         private: true
@@ -657,6 +714,16 @@ commands:
             help: "attr_value"
             dest: attr_value
             type: str
+      - name: new-device-from-failed
+        help: "Adds a new device to from failed device information"
+        usage: >
+          A previously failed and migrated device may be added back into the cluster as a new device. The new device
+          would have the same info as the failed device but would be empty and not contain any data.
+        arguments:
+          - name: "device_id"
+            help: "Device id"
+            dest: device_id
+            type: str
   - name: "cluster"
     help: "Cluster commands"
     weight: 200
@@ -1272,6 +1339,28 @@ commands:
             help: "Name"
             dest: name
             type: str
+      - name: add-replication
+        help: Assigns the snapshot replication target cluster
+        arguments:
+          - name: "cluster_id"
+            help: "Cluster id"
+            dest: cluster_id
+            type: str
+            completer: _completer_get_cluster_list
+          - name: "target_cluster_id"
+            help: "Target Cluster id"
+            dest: target_cluster_id
+            type: str
+            completer: _completer_get_cluster_list
+          - name: "--timeout"
+            help: "Snapshot replication network timeout"
+            dest: timeout
+            type: int
+            default: "3600"
+          - name: "--target-pool"
+            help: "Target cluster pool ID or name"
+            dest: target_pool
+            type: str
   - name: "volume"
     help: "Logical volume commands"
     aliases:
@@ -1402,6 +1491,11 @@ commands:
             dest: npcs
             type: int
             default: 0
+          - name: "--replicate"
+            help: "Replicate LVol snapshot"
+            dest: replicate
+            type: bool
+            action: store_true
       - name: qos-set
         help: "Changes QoS settings for an active logical volume"
         arguments:
@@ -1603,6 +1697,52 @@ commands:
             help: "Logical volume id"
             dest: volume_id
             type: str
+      - name: replication-start
+        help: "Start snapshot replication taken from lvol"
+        arguments:
+          - name: "lvol_id"
+            help: "Logical volume id"
+            dest: lvol_id
+            type: str
+          - name: "--replication-cluster-id"
+            help: "Cluster ID of the replication target cluster"
+            dest: replication_cluster_id
+            type: str
+      - name: replication-stop
+        help: "Stop snapshot replication taken from lvol"
+        arguments:
+          - name: "lvol_id"
+            help: "Logical volume id"
+            dest: lvol_id
+            type: str
+      - name: replication-status
+        help: "Lists replication status"
+        arguments:
+          - name: "cluster_id"
+            help: "Cluster UUID"
+            dest: cluster_id
+            type: str
+      - name: replication-trigger
+        help: "Start replication for lvol"
+        arguments:
+          - name: "lvol_id"
+            help: "Logical volume id"
+            dest: lvol_id
+            type: str
+      - name: suspend
+        help: "Suspend lvol subsystems"
+        arguments:
+          - name: "lvol_id"
+            help: "Logical volume id"
+            dest: lvol_id
+            type: str
+      - name: resume
+        help: "Resume lvol subsystems"
+        arguments:
+          - name: "lvol_id"
+            help: "Logical volume id"
+            dest: lvol_id
+            type: str
   - name: "control-plane"
     help: "Control plane commands"
     aliases:
@@ -1827,6 +1967,16 @@ commands:
             dest: all
             type: bool
             action: store_true
+          - name: "--cluster-id"
+            help: "Filter snapshots by cluster UUID"
+            dest: cluster_id
+            type: str
+            required: false
+          - name: "--with-details"
+            help: "List snapshots with replicate and chaining details"
+            dest: with_details
+            type: bool
+            action: store_true
       - name: delete
         help: "Deletes a snapshot"
         arguments:
@@ -1839,6 +1989,13 @@ commands:
             dest: force
             type: bool
             action: store_true
+      - name: check
+        help: "Check a snapshot health"
+        arguments:
+          - name: "snapshot_id"
+            help: "Snapshot id"
+            dest: snapshot_id
+            type: str
       - name: clone
         help: "Provisions a new logical volume from an existing snapshot"
         arguments:
@@ -1855,6 +2012,43 @@ commands:
             dest: resize
             type: size
             default: "0"
+      - name: replication-status
+        help: "Lists snapshots replication status"
+        arguments:
+          - name: "cluster_id"
+            help: "Cluster UUID"
+            dest: cluster_id
+            type: str
+      - name: delete-replication-only
+        help: "Delete replicated version of a snapshot"
+        arguments:
+          - name: "snapshot_id"
+            help: "Snapshot UUID"
+            dest: snapshot_id
+            type: str
+      - name: get
+        help: "Gets a snapshot information"
+        arguments:
+          - name: "snapshot_id"
+            help: "Snapshot UUID"
+            dest: snapshot_id
+            type: str
+      - name: set
+        help: "set snapshot db value"
+        private: true
+        arguments:
+          - name: "snapshot_id"
+            help: "snapshot id"
+            dest: snapshot_id
+            type: str
+          - name: "attr_name"
+            help: "attr_name"
+            dest: attr_name
+            type: str
+          - name: "attr_value"
+            help: "attr_value"
+            dest: attr_value
+            type: str
   - name: "qos"
     help: "qos commands"
     weight: 700
diff --git a/simplyblock_cli/cli.py b/simplyblock_cli/cli.py
index e70f72339..1c5ed552c 100644
--- a/simplyblock_cli/cli.py
+++ b/simplyblock_cli/cli.py
@@ -5,7 +5,7 @@
 import sys
 import traceback
 
-from simplyblock_cli.clibase import CLIWrapperBase, range_type, regex_type, size_type
+from simplyblock_cli.clibase import CLIWrapperBase, range_type, size_type
 from simplyblock_core import utils
 
 class CLIWrapper(CLIWrapperBase):
@@ -36,6 +36,7 @@ def init_storage_node(self):
         self.init_storage_node__configure(subparser)
         self.init_storage_node__configure_upgrade(subparser)
         self.init_storage_node__deploy_cleaner(subparser)
+        self.init_storage_node__clean_devices(subparser)
         self.init_storage_node__add_node(subparser)
         self.init_storage_node__delete(subparser)
         self.init_storage_node__remove(subparser)
@@ -51,7 +52,6 @@ def init_storage_node(self):
         if self.developer_mode:
             self.init_storage_node__device_testing_mode(subparser)
         self.init_storage_node__get_device(subparser)
-        self.init_storage_node__reset_device(subparser)
         self.init_storage_node__restart_device(subparser)
         self.init_storage_node__add_device(subparser)
         self.init_storage_node__remove_device(subparser)
@@ -77,6 +77,7 @@ def init_storage_node(self):
             self.init_storage_node__dump_lvstore(subparser)
         if self.developer_mode:
             self.init_storage_node__set(subparser)
+        self.init_storage_node__new_device_from_failed(subparser)
 
 
     def init_storage_node__deploy(self, subparser):
@@ -87,11 +88,16 @@ def init_storage_node__deploy(self, subparser):
     def init_storage_node__configure(self, subparser):
         subcommand = self.add_sub_command(subparser, 'configure', 'Prepare a configuration file to be used when adding the storage node')
         argument = subcommand.add_argument('--max-lvol', help='Max logical volume per storage node', type=int, dest='max_lvol', required=True)
-        argument = subcommand.add_argument('--max-size', help='Maximum amount of GB to be utilized on this storage node', type=str, dest='max_prov', required=True)
+        argument = subcommand.add_argument('--max-size', help='Maximum amount of GB to be utilized on this storage node', type=str, dest='max_prov', required=False)
         argument = subcommand.add_argument('--nodes-per-socket', help='number of each node to be added per each socket.', type=int, default=1, dest='nodes_per_socket')
         argument = subcommand.add_argument('--sockets-to-use', help='The system socket to use when adding the storage nodes', type=str, default='0', dest='sockets_to_use')
+        argument = subcommand.add_argument('--cores-percentage', help='The percentage of cores to be used for spdk (0-99)', type=range_type(0, 99), default=0, dest='cores_percentage')
         argument = subcommand.add_argument('--pci-allowed', help='Comma separated list of PCI addresses of Nvme devices to use for storage devices.', type=str, default='', dest='pci_allowed', required=False)
         argument = subcommand.add_argument('--pci-blocked', help='Comma separated list of PCI addresses of Nvme devices to not use for storage devices', type=str, default='', dest='pci_blocked', required=False)
+        argument = subcommand.add_argument('--device-model', help='NVMe SSD model string, example: --model PM1628, --device-model and --size-range must be set together', type=str, default='', dest='device_model', required=False)
+        argument = subcommand.add_argument('--size-range', help='NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987, --device-model and --size-range must be set together', type=str, default='', dest='size_range', required=False)
+        argument = subcommand.add_argument('--nvme-names', help='Comma separated list of nvme namespace names like nvme0n1,nvme1n1...', type=str, default='', dest='nvme_names', required=False)
+        argument = subcommand.add_argument('--force', help='Force format detected or passed nvme pci address to 4K and clean partitions', dest='force', action='store_true')
 
     def init_storage_node__configure_upgrade(self, subparser):
         subcommand = self.add_sub_command(subparser, 'configure-upgrade', 'Upgrade the automated configuration file with new changes of cpu mask or storage devices')
@@ -99,12 +105,17 @@ def init_storage_node__configure_upgrade(self, subparser):
     def init_storage_node__deploy_cleaner(self, subparser):
         subcommand = self.add_sub_command(subparser, 'deploy-cleaner', 'Cleans a previous simplyblock deploy (local run)')
 
+    def init_storage_node__clean_devices(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'clean-devices', 'clean devices stored in /etc/simplyblock/sn_config_file (local run)')
+        argument = subcommand.add_argument('--config-path', help='Config path to read stored nvme devices from', type=str, default='/etc/simplyblock/sn_config_file', dest='config_path', required=False)
+
     def init_storage_node__add_node(self, subparser):
         subcommand = self.add_sub_command(subparser, 'add-node', 'Adds a storage node by its IP address')
         subcommand.add_argument('cluster_id', help='Cluster id', type=str)
         subcommand.add_argument('node_addr', help='Address of storage node api to add, like <node-ip>:5000', type=str)
         subcommand.add_argument('ifname', help='Management interface name', type=str)
         argument = subcommand.add_argument('--journal-partition', help='1: auto-create small partitions for journal on nvme devices. 0: use a separate (the smallest) nvme device of the node for journal. The journal needs a maximum of 3 percent of total available raw disk space.', type=int, default=1, dest='partitions')
+        argument = subcommand.add_argument('--format-4k', help='Force format nvme devices with 4K', dest='format_4k', action='store_true')
         if self.developer_mode:
             argument = subcommand.add_argument('--jm-percent', help='Number in percent to use for JM from each device', type=int, default=3, dest='jm_percent')
         argument = subcommand.add_argument('--data-nics', help='Storage network interface names. currently one interface is supported.', type=str, dest='data_nics', nargs='+')
@@ -210,13 +221,10 @@ def init_storage_node__get_device(self, subparser):
         subcommand = self.add_sub_command(subparser, 'get-device', 'Gets storage device by its id')
         subcommand.add_argument('device_id', help='Device id', type=str)
 
-    def init_storage_node__reset_device(self, subparser):
-        subcommand = self.add_sub_command(subparser, 'reset-device', 'Resets a storage device')
-        subcommand.add_argument('device_id', help='Device id', type=str)
-
     def init_storage_node__restart_device(self, subparser):
         subcommand = self.add_sub_command(subparser, 'restart-device', 'Restarts a storage device')
         subcommand.add_argument('device_id', help='Device id', type=str)
+        argument = subcommand.add_argument('--force', help='Force remove', dest='force', action='store_true')
 
     def init_storage_node__add_device(self, subparser):
         subcommand = self.add_sub_command(subparser, 'add-device', 'Adds a new storage device')
@@ -276,6 +284,7 @@ def init_storage_node__restart_jm_device(self, subparser):
         subcommand = self.add_sub_command(subparser, 'restart-jm-device', 'Restarts a journaling device')
         subcommand.add_argument('jm_device_id', help='Journaling device id', type=str)
         argument = subcommand.add_argument('--force', help='Force device remove', dest='force', action='store_true')
+        argument = subcommand.add_argument('--format', help='Format the Alceml device used for JM device', dest='format', action='store_true')
 
     def init_storage_node__send_cluster_map(self, subparser):
         subcommand = self.add_sub_command(subparser, 'send-cluster-map', 'Sends a new cluster map')
@@ -299,6 +308,10 @@ def init_storage_node__set(self, subparser):
         subcommand.add_argument('attr_name', help='attr_name', type=str)
         subcommand.add_argument('attr_value', help='attr_value', type=str)
 
+    def init_storage_node__new_device_from_failed(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'new-device-from-failed', 'Adds a new device to from failed device information')
+        subcommand.add_argument('device_id', help='Device id', type=str)
+
 
     def init_cluster(self):
         subparser = self.add_command('cluster', 'Cluster commands')
@@ -331,6 +344,7 @@ def init_cluster(self):
         if self.developer_mode:
             self.init_cluster__set(subparser)
         self.init_cluster__change_name(subparser)
+        self.init_cluster__add_replication(subparser)
 
 
     def init_cluster__create(self, subparser):
@@ -513,6 +527,13 @@ def init_cluster__change_name(self, subparser):
         subcommand.add_argument('cluster_id', help='Cluster id', type=str).completer = self._completer_get_cluster_list
         subcommand.add_argument('name', help='Name', type=str)
 
+    def init_cluster__add_replication(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'add-replication', 'Assigns the snapshot replication target cluster')
+        subcommand.add_argument('cluster_id', help='Cluster id', type=str).completer = self._completer_get_cluster_list
+        subcommand.add_argument('target_cluster_id', help='Target Cluster id', type=str).completer = self._completer_get_cluster_list
+        argument = subcommand.add_argument('--timeout', help='Snapshot replication network timeout', type=int, default=3600, dest='timeout')
+        argument = subcommand.add_argument('--target-pool', help='Target cluster pool ID or name', type=str, dest='target_pool')
+
 
     def init_volume(self):
         subparser = self.add_command('volume', 'Logical volume commands', aliases=['lvol',])
@@ -533,6 +554,12 @@ def init_volume(self):
         self.init_volume__get_io_stats(subparser)
         self.init_volume__check(subparser)
         self.init_volume__inflate(subparser)
+        self.init_volume__replication_start(subparser)
+        self.init_volume__replication_stop(subparser)
+        self.init_volume__replication_status(subparser)
+        self.init_volume__replication_trigger(subparser)
+        self.init_volume__suspend(subparser)
+        self.init_volume__resume(subparser)
 
 
     def init_volume__add(self, subparser):
@@ -562,6 +589,7 @@ def init_volume__add(self, subparser):
         argument = subcommand.add_argument('--pvc-name', '--pvc_name', help='Set logical volume PVC name for k8s clients', type=str, dest='pvc_name')
         argument = subcommand.add_argument('--data-chunks-per-stripe', help='Erasure coding schema parameter k (distributed raid), default: 1', type=int, default=0, dest='ndcs')
         argument = subcommand.add_argument('--parity-chunks-per-stripe', help='Erasure coding schema parameter n (distributed raid), default: 1', type=int, default=0, dest='npcs')
+        argument = subcommand.add_argument('--replicate', help='Replicate LVol snapshot', dest='replicate', action='store_true')
 
     def init_volume__qos_set(self, subparser):
         subcommand = self.add_sub_command(subparser, 'qos-set', 'Changes QoS settings for an active logical volume')
@@ -639,6 +667,31 @@ def init_volume__inflate(self, subparser):
         subcommand = self.add_sub_command(subparser, 'inflate', 'Inflate a logical volume')
         subcommand.add_argument('volume_id', help='Logical volume id', type=str)
 
+    def init_volume__replication_start(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'replication-start', 'Start snapshot replication taken from lvol')
+        subcommand.add_argument('lvol_id', help='Logical volume id', type=str)
+        argument = subcommand.add_argument('--replication-cluster-id', help='Cluster ID of the replication target cluster', type=str, dest='replication_cluster_id')
+
+    def init_volume__replication_stop(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'replication-stop', 'Stop snapshot replication taken from lvol')
+        subcommand.add_argument('lvol_id', help='Logical volume id', type=str)
+
+    def init_volume__replication_status(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'replication-status', 'Lists replication status')
+        subcommand.add_argument('cluster_id', help='Cluster UUID', type=str)
+
+    def init_volume__replication_trigger(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'replication-trigger', 'Start replication for lvol')
+        subcommand.add_argument('lvol_id', help='Logical volume id', type=str)
+
+    def init_volume__suspend(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'suspend', 'Suspend lvol subsystems')
+        subcommand.add_argument('lvol_id', help='Logical volume id', type=str)
+
+    def init_volume__resume(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'resume', 'Resume lvol subsystems')
+        subcommand.add_argument('lvol_id', help='Logical volume id', type=str)
+
 
     def init_control_plane(self):
         subparser = self.add_command('control-plane', 'Control plane commands', aliases=['cp','mgmt',])
@@ -738,7 +791,13 @@ def init_snapshot(self):
         self.init_snapshot__add(subparser)
         self.init_snapshot__list(subparser)
         self.init_snapshot__delete(subparser)
+        self.init_snapshot__check(subparser)
         self.init_snapshot__clone(subparser)
+        self.init_snapshot__replication_status(subparser)
+        self.init_snapshot__delete_replication_only(subparser)
+        self.init_snapshot__get(subparser)
+        if self.developer_mode:
+            self.init_snapshot__set(subparser)
 
 
     def init_snapshot__add(self, subparser):
@@ -749,18 +808,42 @@ def init_snapshot__add(self, subparser):
     def init_snapshot__list(self, subparser):
         subcommand = self.add_sub_command(subparser, 'list', 'Lists all snapshots')
         argument = subcommand.add_argument('--all', help='List soft deleted snapshots', dest='all', action='store_true')
+        argument = subcommand.add_argument('--cluster-id', help='Filter snapshots by cluster UUID', type=str, dest='cluster_id', required=False)
+        argument = subcommand.add_argument('--with-details', help='List snapshots with replicate and chaining details', dest='with_details', action='store_true')
 
     def init_snapshot__delete(self, subparser):
         subcommand = self.add_sub_command(subparser, 'delete', 'Deletes a snapshot')
         subcommand.add_argument('snapshot_id', help='Snapshot id', type=str)
         argument = subcommand.add_argument('--force', help='Force remove', dest='force', action='store_true')
 
+    def init_snapshot__check(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'check', 'Check a snapshot health')
+        subcommand.add_argument('snapshot_id', help='Snapshot id', type=str)
+
     def init_snapshot__clone(self, subparser):
         subcommand = self.add_sub_command(subparser, 'clone', 'Provisions a new logical volume from an existing snapshot')
         subcommand.add_argument('snapshot_id', help='Snapshot id', type=str)
         subcommand.add_argument('lvol_name', help='Logical volume name', type=str)
         argument = subcommand.add_argument('--resize', help='New logical volume size: 10M, 10G, 10(bytes). Can only increase.', type=size_type(), default='0', dest='resize')
 
+    def init_snapshot__replication_status(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'replication-status', 'Lists snapshots replication status')
+        subcommand.add_argument('cluster_id', help='Cluster UUID', type=str)
+
+    def init_snapshot__delete_replication_only(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'delete-replication-only', 'Delete replicated version of a snapshot')
+        subcommand.add_argument('snapshot_id', help='Snapshot UUID', type=str)
+
+    def init_snapshot__get(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'get', 'Gets a snapshot information')
+        subcommand.add_argument('snapshot_id', help='Snapshot UUID', type=str)
+
+    def init_snapshot__set(self, subparser):
+        subcommand = self.add_sub_command(subparser, 'set', 'set snapshot db value')
+        subcommand.add_argument('snapshot_id', help='snapshot id', type=str)
+        subcommand.add_argument('attr_name', help='attr_name', type=str)
+        subcommand.add_argument('attr_value', help='attr_value', type=str)
+
 
     def init_qos(self):
         subparser = self.add_command('qos', 'qos commands')
@@ -809,6 +892,8 @@ def run(self):
                     ret = self.storage_node__configure_upgrade(sub_command, args)
                 elif sub_command in ['deploy-cleaner']:
                     ret = self.storage_node__deploy_cleaner(sub_command, args)
+                elif sub_command in ['clean-devices']:
+                    ret = self.storage_node__clean_devices(sub_command, args)
                 elif sub_command in ['add-node']:
                     if not self.developer_mode:
                         args.jm_percent = 3
@@ -860,8 +945,6 @@ def run(self):
                         ret = self.storage_node__device_testing_mode(sub_command, args)
                 elif sub_command in ['get-device']:
                     ret = self.storage_node__get_device(sub_command, args)
-                elif sub_command in ['reset-device']:
-                    ret = self.storage_node__reset_device(sub_command, args)
                 elif sub_command in ['restart-device']:
                     ret = self.storage_node__restart_device(sub_command, args)
                 elif sub_command in ['add-device']:
@@ -924,6 +1007,8 @@ def run(self):
                         ret = False
                     else:
                         ret = self.storage_node__set(sub_command, args)
+                elif sub_command in ['new-device-from-failed']:
+                    ret = self.storage_node__new_device_from_failed(sub_command, args)
                 else:
                     self.parser.print_help()
 
@@ -1009,6 +1094,8 @@ def run(self):
                         ret = self.cluster__set(sub_command, args)
                 elif sub_command in ['change-name']:
                     ret = self.cluster__change_name(sub_command, args)
+                elif sub_command in ['add-replication']:
+                    ret = self.cluster__add_replication(sub_command, args)
                 else:
                     self.parser.print_help()
 
@@ -1055,6 +1142,18 @@ def run(self):
                     ret = self.volume__check(sub_command, args)
                 elif sub_command in ['inflate']:
                     ret = self.volume__inflate(sub_command, args)
+                elif sub_command in ['replication-start']:
+                    ret = self.volume__replication_start(sub_command, args)
+                elif sub_command in ['replication-stop']:
+                    ret = self.volume__replication_stop(sub_command, args)
+                elif sub_command in ['replication-status']:
+                    ret = self.volume__replication_status(sub_command, args)
+                elif sub_command in ['replication-trigger']:
+                    ret = self.volume__replication_trigger(sub_command, args)
+                elif sub_command in ['suspend']:
+                    ret = self.volume__suspend(sub_command, args)
+                elif sub_command in ['resume']:
+                    ret = self.volume__resume(sub_command, args)
                 else:
                     self.parser.print_help()
 
@@ -1100,8 +1199,22 @@ def run(self):
                     ret = self.snapshot__list(sub_command, args)
                 elif sub_command in ['delete']:
                     ret = self.snapshot__delete(sub_command, args)
+                elif sub_command in ['check']:
+                    ret = self.snapshot__check(sub_command, args)
                 elif sub_command in ['clone']:
                     ret = self.snapshot__clone(sub_command, args)
+                elif sub_command in ['replication-status']:
+                    ret = self.snapshot__replication_status(sub_command, args)
+                elif sub_command in ['delete-replication-only']:
+                    ret = self.snapshot__delete_replication_only(sub_command, args)
+                elif sub_command in ['get']:
+                    ret = self.snapshot__get(sub_command, args)
+                elif sub_command in ['set']:
+                    if not self.developer_mode:
+                        print("This command is private.")
+                        ret = False
+                    else:
+                        ret = self.snapshot__set(sub_command, args)
                 else:
                     self.parser.print_help()
 
diff --git a/simplyblock_cli/clibase.py b/simplyblock_cli/clibase.py
index 834dd2bab..2603bc574 100644
--- a/simplyblock_cli/clibase.py
+++ b/simplyblock_cli/clibase.py
@@ -87,8 +87,7 @@ def storage_node__configure_upgrade(self, sub_command, args):
     def storage_node__configure(self, sub_command, args):
         if not args.max_lvol:
             self.parser.error(f"Mandatory argument '--max-lvol' not provided for {sub_command}")
-        if not args.max_prov:
-            self.parser.error(f"Mandatory argument '--max-size' not provided for {sub_command}")
+        max_size = getattr(args, "max_prov") or 0
         sockets_to_use = [0]
         if args.sockets_to_use:
             try:
@@ -101,21 +100,41 @@ def storage_node__configure(self, sub_command, args):
             self.parser.error(f"nodes_per_socket {args.nodes_per_socket}must be either 1 or 2")
         if args.pci_allowed and args.pci_blocked:
             self.parser.error("pci-allowed and pci-blocked cannot be both specified")
-        max_prov = utils.parse_size(args.max_prov, assume_unit='G')
+        max_prov = utils.parse_size(max_size, assume_unit='G')
         pci_allowed = []
         pci_blocked = []
+        nvme_names = []
         if args.pci_allowed:
             pci_allowed = [str(x) for x in args.pci_allowed.split(',')]
         if args.pci_blocked:
             pci_blocked = [str(x) for x in args.pci_blocked.split(',')]
-
-        return storage_ops.generate_automated_deployment_config(args.max_lvol, max_prov, sockets_to_use,
-                                                                args.nodes_per_socket, pci_allowed, pci_blocked)
+        if (args.device_model and not args.size_range) or (not args.device_model and args.size_range):
+            self.parser.error("device_model and size_range must be set together")
+        if args.nvme_names:
+            nvme_names = [str(x) for x in args.nvme_names.split(',')]
+        use_pci_allowed = bool(args.pci_allowed)
+        use_pci_blocked = bool(args.pci_blocked)
+        use_model_range = bool(args.device_model and args.size_range)
+        if sum([use_pci_allowed, use_pci_blocked, use_model_range]) > 1:
+            self.parser.error(
+                "Options --pci-allowed, --pci-blocked, and "
+                "(--device-model with --size-range) are mutually exclusive; choose only one."
+            )
+        cores_percentage = int(args.cores_percentage)
+
+        return storage_ops.generate_automated_deployment_config(
+            args.max_lvol, max_prov, sockets_to_use,args.nodes_per_socket,
+            pci_allowed, pci_blocked, force=args.force, device_model=args.device_model,
+            size_range=args.size_range, cores_percentage=cores_percentage, nvme_names=nvme_names)
 
     def storage_node__deploy_cleaner(self, sub_command, args):
         storage_ops.deploy_cleaner()
         return True  # remove once CLI changed to exceptions
 
+    def storage_node__clean_devices(self, sub_command, args):
+        storage_ops.clean_devices(args.config_path)
+        return True  # remove once CLI changed to exceptions
+
     def storage_node__add_node(self, sub_command, args):
         cluster_id = args.cluster_id
         node_addr = args.node_addr
@@ -134,26 +153,31 @@ def storage_node__add_node(self, sub_command, args):
         enable_ha_jm = args.enable_ha_jm
         namespace = args.namespace
         ha_jm_count = args.ha_jm_count
-
-        out = storage_ops.add_node(
-            cluster_id=cluster_id,
-            node_addr=node_addr,
-            iface_name=ifname,
-            data_nics_list=data_nics,
-            max_snap=max_snap,
-            spdk_image=spdk_image,
-            spdk_debug=spdk_debug,
-            small_bufsize=small_bufsize,
-            large_bufsize=large_bufsize,
-            num_partitions_per_dev=num_partitions_per_dev,
-            jm_percent=jm_percent,
-            enable_test_device=enable_test_device,
-            namespace=namespace,
-            enable_ha_jm=enable_ha_jm,
-            id_device_by_nqn=args.id_device_by_nqn,
-            partition_size=args.partition_size,
-            ha_jm_count=ha_jm_count,
-        )
+        format_4k = args.format_4k
+        try:
+            out = storage_ops.add_node(
+                cluster_id=cluster_id,
+                node_addr=node_addr,
+                iface_name=ifname,
+                data_nics_list=data_nics,
+                max_snap=max_snap,
+                spdk_image=spdk_image,
+                spdk_debug=spdk_debug,
+                small_bufsize=small_bufsize,
+                large_bufsize=large_bufsize,
+                num_partitions_per_dev=num_partitions_per_dev,
+                jm_percent=jm_percent,
+                enable_test_device=enable_test_device,
+                namespace=namespace,
+                enable_ha_jm=enable_ha_jm,
+                id_device_by_nqn=args.id_device_by_nqn,
+                partition_size=args.partition_size,
+                ha_jm_count=ha_jm_count,
+                format_4k=format_4k
+            )
+        except Exception as e:
+            print(e)
+            return False
 
         return out
 
@@ -184,11 +208,15 @@ def storage_node__restart(self, sub_command, args):
         large_bufsize = args.large_bufsize
         ssd_pcie = args.ssd_pcie
 
-        return storage_ops.restart_storage_node(
-            node_id, max_lvol, max_snap, max_prov,
-            spdk_image, spdk_debug,
-            small_bufsize, large_bufsize, node_ip=args.node_ip, reattach_volume=reattach_volume, force=args.force,
-            new_ssd_pcie=ssd_pcie, force_lvol_recreate=args.force_lvol_recreate)
+        try:
+            return storage_ops.restart_storage_node(
+                node_id, max_lvol, max_snap, max_prov,
+                spdk_image, spdk_debug,
+                small_bufsize, large_bufsize, node_ip=args.node_ip, reattach_volume=reattach_volume, force=args.force,
+                new_ssd_pcie=ssd_pcie, force_lvol_recreate=args.force_lvol_recreate)
+        except Exception as e:
+            print(e)
+            return False
 
     def storage_node__shutdown(self, sub_command, args):
         return storage_ops.shutdown_storage_node(args.node_id, args.force)
@@ -233,7 +261,7 @@ def storage_node__reset_device(self, sub_command, args):
         return device_controller.reset_storage_device(args.device_id)
 
     def storage_node__restart_device(self, sub_command, args):
-        return device_controller.restart_device(args.device_id)
+        return device_controller.restart_device(args.device_id, args.force)
 
     def storage_node__add_device(self, sub_command, args):
         return device_controller.add_device(args.device_id)
@@ -292,7 +320,7 @@ def storage_node__remove_jm_device(self, sub_command, args):
         return device_controller.remove_jm_device(args.jm_device_id, args.force)
 
     def storage_node__restart_jm_device(self, sub_command, args):
-        return device_controller.restart_jm_device(args.jm_device_id, args.force)
+        return device_controller.restart_jm_device(args.jm_device_id, args.force, args.format)
 
     def storage_node__send_cluster_map(self, sub_command, args):
         node_id = args.node_id
@@ -310,6 +338,9 @@ def storage_node__dump_lvstore(self, sub_command, args):
         node_id = args.node_id
         return storage_ops.dump_lvstore(node_id)
 
+    def storage_node__new_device_from_failed(self, sub_command, args):
+        return device_controller.new_device_from_failed(args.device_id)
+
     def storage_node__set(self, sub_command, args):
         return storage_ops.set_value(args.node_id, args.attr_name, args.attr_value)
 
@@ -447,6 +478,9 @@ def cluster__complete_expand(self, sub_command, args):
         cluster_ops.cluster_expand(args.cluster_id)
         return True
 
+    def cluster__add_replication(self, sub_command, args):
+        return cluster_ops.add_replication(args.cluster_id, args.target_cluster_id, args.timeout, args.target_pool)
+
     def volume__add(self, sub_command, args):
         name = args.name
         size = args.size
@@ -474,7 +508,8 @@ def volume__add(self, sub_command, args):
             crypto_key2=args.crypto_key2,
             lvol_priority_class=lvol_priority_class,
             uid=args.uid, pvc_name=args.pvc_name, namespace=args.namespace, 
-            max_namespace_per_subsys=args.max_namespace_per_subsys, ndcs=ndcs, npcs=npcs, fabric=args.fabric)
+            max_namespace_per_subsys=args.max_namespace_per_subsys, ndcs=ndcs, npcs=npcs, fabric=args.fabric,
+            do_replicate=args.replicate)
         if results:
             return results
         else:
@@ -555,6 +590,24 @@ def volume__check(self, sub_command, args):
     def volume__inflate(self, sub_command, args):
         return lvol_controller.inflate_lvol(args.volume_id)
 
+    def volume__replication_start(self, sub_command, args):
+        return lvol_controller.replication_start(args.lvol_id, args.replication_cluster_id)
+
+    def volume__replication_stop(self, sub_command, args):
+        return lvol_controller.replication_stop(args.lvol_id)
+
+    def volume__replication_status(self, sub_command, args):
+        return snapshot_controller.list_replication_tasks(args.cluster_id)
+
+    def volume__replication_trigger(self, sub_command, args):
+        return lvol_controller.replication_trigger(args.lvol_id)
+
+    def volume__suspend(self, sub_command, args):
+        return lvol_controller.suspend_lvol(args.lvol_id)
+
+    def volume__resume(self, sub_command, args):
+        return lvol_controller.resume_lvol(args.lvol_id)
+
     def control_plane__add(self, sub_command, args):
         cluster_id = args.cluster_id
         cluster_ip = args.cluster_ip
@@ -623,16 +676,31 @@ def snapshot__add(self, sub_command, args):
         return snapshot_id if not error else error
 
     def snapshot__list(self, sub_command, args):
-        return snapshot_controller.list(args.all)
+        return snapshot_controller.list(args.all, args.cluster_id, args.with_details)
 
     def snapshot__delete(self, sub_command, args):
         return snapshot_controller.delete(args.snapshot_id, args.force)
 
+    def snapshot__check(self, sub_command, args):
+        return health_controller.check_snap(args.snapshot_id)
+
     def snapshot__clone(self, sub_command, args):
         new_size = args.resize
 
-        success, details = snapshot_controller.clone(args.snapshot_id, args.lvol_name, new_size)
-        return details
+        clone_id, error = snapshot_controller.clone(args.snapshot_id, args.lvol_name, new_size)
+        return clone_id if not error else error
+
+    def snapshot__replication_status(self, sub_command, args):
+        return snapshot_controller.list_replication_tasks(args.cluster_id)
+
+    def snapshot__delete_replication_only(self, sub_command, args):
+        return snapshot_controller.delete_replicated(args.snapshot_id)
+
+    def snapshot__get(self, sub_command, args):
+        return snapshot_controller.get(args.snapshot_id)
+
+    def snapshot__set(self, sub_command, args):
+        return snapshot_controller.set(args.snapshot_id, args.attr_name, args.attr_value)
 
     def qos__add(self, sub_command, args):
         return qos_controller.add_class(args.name, args.weight, args.cluster_id)
diff --git a/simplyblock_cli/scripts/cli-wrapper.jinja2 b/simplyblock_cli/scripts/cli-wrapper.jinja2
index 423b11992..80932e582 100644
--- a/simplyblock_cli/scripts/cli-wrapper.jinja2
+++ b/simplyblock_cli/scripts/cli-wrapper.jinja2
@@ -5,7 +5,7 @@ import logging
 import sys
 import traceback
 
-from simplyblock_cli.clibase import CLIWrapperBase, range_type, regex_type, size_type
+from simplyblock_cli.clibase import CLIWrapperBase, range_type, size_type
 from simplyblock_core import utils
 
 class CLIWrapper(CLIWrapperBase):
diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py
index 103123934..84c2c3442 100644
--- a/simplyblock_core/cluster_ops.py
+++ b/simplyblock_core/cluster_ops.py
@@ -15,7 +15,7 @@
 
 from docker.errors import DockerException
 from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
-from simplyblock_core.controllers import cluster_events, device_controller, qos_controller
+from simplyblock_core.controllers import cluster_events, device_controller, qos_controller, tasks_controller
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.models.cluster import Cluster
 from simplyblock_core.models.job_schedule import JobSchedule
@@ -25,6 +25,7 @@
 from simplyblock_core.models.stats import LVolStatObject, ClusterStatObject, NodeStatObject, DeviceStatObject
 from simplyblock_core.models.nvme_device import NVMeDevice
 from simplyblock_core.models.storage_node import StorageNode
+from simplyblock_core.prom_client import PromClient
 from simplyblock_core.utils import pull_docker_image_with_retry
 
 logger = utils.get_logger(__name__)
@@ -79,7 +80,7 @@ def _create_update_user(cluster_id, grafana_url, grafana_secret, user_secret, up
 
 
 def _add_graylog_input(cluster_ip, password):
-    base_url = f"http://{cluster_ip}/graylog/api"
+    base_url = f"{cluster_ip}/api"
     input_url = f"{base_url}/system/inputs"
 
     retries = 30
@@ -160,7 +161,7 @@ def _add_graylog_input(cluster_ip, password):
 
 def _set_max_result_window(cluster_ip, max_window=100000):
 
-    url_existing_indices = f"http://{cluster_ip}/opensearch/_all/_settings"
+    url_existing_indices = f"{cluster_ip}/_all/_settings"
 
     retries = 30
     reachable=False
@@ -187,7 +188,7 @@ def _set_max_result_window(cluster_ip, max_window=100000):
         logger.error(f"Failed to update settings for existing indices: {response.text}")
         return False
 
-    url_template = f"http://{cluster_ip}/opensearch/_template/all_indices_template"
+    url_template = f"{cluster_ip}/_template/all_indices_template"
     payload_template = json.dumps({
         "index_patterns": ["*"],
         "settings": {
@@ -281,9 +282,6 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
         if not dev_ip:
             raise ValueError("Error getting ip: For Kubernetes-based deployments, please supply --mgmt-ip.")
 
-        current_node = utils.get_node_name_by_ip(dev_ip)
-        utils.label_node_as_mgmt_plane(current_node)
-
     if not cli_pass:
         cli_pass = utils.generate_string(10)
 
@@ -315,12 +313,17 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
     cluster.fabric_tcp = protocols["tcp"]
     cluster.fabric_rdma = protocols["rdma"]
     cluster.is_single_node = is_single_node
-    if grafana_endpoint:
-        cluster.grafana_endpoint = grafana_endpoint
-    elif ingress_host_source == "hostip":
-        cluster.grafana_endpoint = f"http://{dev_ip}/grafana"
+
+    if ingress_host_source == "hostip":
+        base = dev_ip
     else:
-        cluster.grafana_endpoint = f"http://{dns_name}/grafana"
+        base = dns_name
+
+    graylog_endpoint = f"http://{base}/graylog"
+    os_endpoint      = f"http://{base}/opensearch"
+    default_grafana  = f"http://{base}/grafana"
+
+    cluster.grafana_endpoint = grafana_endpoint or default_grafana
     cluster.enable_node_affinity = enable_node_affinity
     cluster.qpair_count = qpair_count or constants.QPAIR_COUNT
     cluster.client_qpair_count = client_qpair_count or constants.CLIENT_QPAIR_COUNT
@@ -331,6 +334,7 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
     cluster.contact_point = contact_point
     cluster.disable_monitoring = disable_monitoring
     cluster.mode = mode
+    cluster.full_page_unmap = False
 
     if mode == "docker":
         if not disable_monitoring:
@@ -350,20 +354,21 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
     elif mode == "kubernetes":
         logger.info("Retrieving foundationdb connection string...")
         fdb_cluster_string = utils.get_fdb_cluster_string(constants.FDB_CONFIG_NAME, constants.K8S_NAMESPACE)
-
         db_connection = fdb_cluster_string
+        
+        logger.info("Patching prometheus configmap...")
+        utils.patch_prometheus_configmap(cluster.uuid, cluster.secret)
 
     if not disable_monitoring:
         if ingress_host_source == "hostip":
             dns_name = dev_ip
 
-        _set_max_result_window(dns_name)
 
-        _add_graylog_input(dns_name, monitoring_secret)
+        _set_max_result_window(os_endpoint)
+
+        _add_graylog_input(graylog_endpoint, monitoring_secret)
 
         _create_update_user(cluster.uuid, cluster.grafana_endpoint, monitoring_secret, cluster.secret)
-        if mode == "kubernetes":
-            utils.patch_prometheus_configmap(cluster.uuid, cluster.secret)
 
     cluster.db_connection = db_connection
     cluster.status = Cluster.STATUS_UNREADY
@@ -371,8 +376,6 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
 
     cluster.write_to_db(db_controller.kv_store)
 
-    qos_controller.add_class("Default", 100, cluster.get_id())
-
     cluster_events.cluster_create(cluster)
 
     mgmt_node_ops.add_mgmt_node(dev_ip, mode, cluster.uuid)
@@ -437,18 +440,24 @@ def _run_fio(mount_point) -> None:
 
 def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
                 distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity, qpair_count,
-                max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric="tcp") -> str:
+                max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, cr_name=None,
+                cr_namespace=None, cr_plural=None, fabric="tcp", cluster_ip=None, grafana_secret=None) -> str:
 
+
+    default_cluster = None
+    monitoring_secret = os.environ.get("MONITORING_SECRET", "")
+    enable_monitoring = os.environ.get("ENABLE_MONITORING", "")
     clusters = db_controller.get_clusters()
-    if not clusters:
-        raise ValueError("No previous clusters found!")
+    if clusters:
+        default_cluster = clusters[0]
+    else:
+        logger.info("No previous clusters found")
 
     if distr_ndcs == 0 and distr_npcs == 0:
         raise ValueError("both distr_ndcs and distr_npcs cannot be 0")
 
-    monitoring_secret = os.environ.get("MONITORING_SECRET", "")
-    
     logger.info("Adding new cluster")
+
     cluster = Cluster()
     cluster.uuid = str(uuid.uuid4())
     cluster.cluster_name = name
@@ -457,13 +466,40 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn
     cluster.nqn = f"{constants.CLUSTER_NQN}:{cluster.uuid}"
     cluster.secret = utils.generate_string(20)
     cluster.strict_node_anti_affinity = strict_node_anti_affinity
+    if default_cluster:
+        cluster.mode = default_cluster.mode
+        cluster.db_connection = default_cluster.db_connection
+        cluster.grafana_secret = grafana_secret if grafana_secret else default_cluster.grafana_secret
+        cluster.grafana_endpoint = default_cluster.grafana_endpoint
+    else:
+        # creating first cluster on k8s
+        cluster.mode = "kubernetes"
+        logger.info("Retrieving foundationdb connection string...")
+        fdb_cluster_string = utils.get_fdb_cluster_string(constants.FDB_CONFIG_NAME, constants.K8S_NAMESPACE)
+        cluster.db_connection = fdb_cluster_string
+        if monitoring_secret:
+            cluster.grafana_secret = monitoring_secret
+        elif enable_monitoring != "true":
+            cluster.grafana_secret = ""
+        else:
+            raise Exception("monitoring_secret is required")
+        cluster.grafana_endpoint = constants.GRAFANA_K8S_ENDPOINT
+        if not cluster_ip:
+            cluster_ip = "0.0.0.0"
+
+        # add mgmt node object
+        mgmt_node_ops.add_mgmt_node(cluster_ip, "kubernetes", cluster.uuid)
+        if enable_monitoring == "true":
+            graylog_endpoint = constants.GRAYLOG_K8S_ENDPOINT
+            os_endpoint = constants.OS_K8S_ENDPOINT
+            _create_update_user(cluster.uuid, cluster.grafana_endpoint, cluster.grafana_secret, cluster.secret)
+
+            _set_max_result_window(os_endpoint)
 
-    default_cluster = clusters[0]
-    cluster.db_connection = default_cluster.db_connection
-    cluster.grafana_secret = monitoring_secret if default_cluster.mode == "kubernetes" else default_cluster.grafana_secret
-    cluster.grafana_endpoint = default_cluster.grafana_endpoint
+            _add_graylog_input(graylog_endpoint, monitoring_secret)
 
-    _create_update_user(cluster.uuid, cluster.grafana_endpoint, cluster.grafana_secret, cluster.secret)
+    if cluster.mode  == "kubernetes":
+        utils.patch_prometheus_configmap(cluster.uuid, cluster.secret)
 
     cluster.distr_ndcs = distr_ndcs
     cluster.distr_npcs = distr_npcs
@@ -475,6 +511,10 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn
     cluster.qpair_count = qpair_count or constants.QPAIR_COUNT
     cluster.max_queue_size = max_queue_size
     cluster.inflight_io_threshold = inflight_io_threshold
+    cluster.cr_name = cr_name
+    cluster.cr_namespace = cr_namespace
+    cluster.cr_plural = cr_plural
+
     if cap_warn and cap_warn > 0:
         cluster.cap_warn = cap_warn
     if cap_crit and cap_crit > 0:
@@ -486,12 +526,12 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn
     protocols = parse_protocols(fabric)
     cluster.fabric_tcp = protocols["tcp"]
     cluster.fabric_rdma = protocols["rdma"]
+    cluster.full_page_unmap = False
 
     cluster.status = Cluster.STATUS_UNREADY
     cluster.create_dt = str(datetime.datetime.now())
     cluster.write_to_db(db_controller.kv_store)
     cluster_events.cluster_create(cluster)
-    qos_controller.add_class("Default", 100, cluster.get_id())
 
     return cluster.get_id()
 
@@ -597,9 +637,8 @@ def cluster_activate(cl_id, force=False, force_lvstore_create=False) -> None:
             snode.lvstore_status = "failed"
             snode.write_to_db()
             logger.error(f"Failed to restore lvstore on node {snode.get_id()}")
-            if not force:
-                set_cluster_status(cl_id, ols_status)
-                raise ValueError("Failed to activate cluster")
+            set_cluster_status(cl_id, ols_status)
+            raise ValueError("Failed to activate cluster")
 
     snodes = db_controller.get_storage_nodes_by_cluster_id(cl_id)
     for snode in snodes:
@@ -621,10 +660,8 @@ def cluster_activate(cl_id, force=False, force_lvstore_create=False) -> None:
             snode.lvstore_status = "failed"
             snode.write_to_db()
             logger.error(f"Failed to restore lvstore on node {snode.get_id()}")
-            if not force:
-                logger.error("Failed to activate cluster")
-                set_cluster_status(cl_id, ols_status)
-                raise ValueError("Failed to activate cluster")
+            set_cluster_status(cl_id, ols_status)
+            raise ValueError("Failed to activate cluster")
 
     # reorder qos classes ids
     qos_classes = db_controller.get_qos(cl_id)
@@ -645,6 +682,15 @@ def cluster_activate(cl_id, force=False, force_lvstore_create=False) -> None:
                 if not ret:
                     logger.error(f"Failed to set Alcemls QOS on node: {node.get_id()}")
 
+    # Start JC compression on each node
+    if ols_status == Cluster.STATUS_UNREADY:
+        for node in db_controller.get_storage_nodes_by_cluster_id(cl_id):
+            if node.status == StorageNode.STATUS_ONLINE:
+                ret, err = node.rpc_client().jc_suspend_compression(jm_vuid=node.jm_vuid, suspend=False)
+                if not ret:
+                    logger.info("Failed to resume JC compression adding task...")
+                    tasks_controller.add_jc_comp_resume_task(node.cluster_id, node.get_id(), jm_vuid=node.jm_vuid)
+
     if not cluster.cluster_max_size:
         cluster = db_controller.get_cluster_by_id(cl_id)
         cluster.cluster_max_size = max_size
@@ -792,6 +838,7 @@ def list() -> t.List[dict]:
             "#storage": len(st),
             "Mod": f"{cl.distr_ndcs}x{cl.distr_npcs}",
             "Status": status.upper(),
+            "Replicate": cl.snapshot_replication_target_cluster,
         })
     return data
 
@@ -1002,16 +1049,11 @@ def list_all_info(cluster_id) -> str:
 
 
 def get_capacity(cluster_id, history, records_count=20) -> t.List[dict]:
-    cluster = db_controller.get_cluster_by_id(cluster_id)
-
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            raise ValueError(f"Error parsing history string: {history}")
-    else:
-        records_number = 20
-
-    records = db_controller.get_cluster_capacity(cluster, records_number)
+    try:
+        _ = db_controller.get_cluster_by_id(cluster_id)
+    except KeyError:
+        logger.error(f"Cluster not found: {cluster_id}")
+        return []
 
     cap_stats_keys = [
         "date",
@@ -1022,20 +1064,17 @@ def get_capacity(cluster_id, history, records_count=20) -> t.List[dict]:
         "size_util",
         "size_prov_util",
     ]
+    prom_client = PromClient(cluster_id)
+    records = prom_client.get_cluster_metrics(cluster_id, cap_stats_keys, history)
     return utils.process_records(records, records_count, keys=cap_stats_keys)
 
 
 def get_iostats_history(cluster_id, history_string, records_count=20, with_sizes=False) -> t.List[dict]:
-    cluster = db_controller.get_cluster_by_id(cluster_id)
-
-    if history_string:
-        records_number = utils.parse_history_param(history_string)
-        if not records_number:
-            raise ValueError(f"Error parsing history string: {history_string}")
-    else:
-        records_number = 20
-
-    records = db_controller.get_cluster_stats(cluster, records_number)
+    try:
+        _ = db_controller.get_cluster_by_id(cluster_id)
+    except KeyError:
+        logger.error(f"Cluster not found: {cluster_id}")
+        return []
 
     io_stats_keys = [
         "date",
@@ -1073,6 +1112,9 @@ def get_iostats_history(cluster_id, history_string, records_count=20, with_sizes
                 "write_latency_ticks",
             ]
         )
+
+    prom_client = PromClient(cluster_id)
+    records = prom_client.get_cluster_metrics(cluster_id, io_stats_keys, history_string)
     # combine records
     return utils.process_records(records, records_count, keys=io_stats_keys)
 
@@ -1137,6 +1179,7 @@ def get_logs(cluster_id, limit=50, **kwargs) -> t.List[dict]:
         if record.event in ["device_status", "node_status"]:
             msg = msg+f" ({record.count})"
 
+        logger.debug(record)
         out.append({
             "Date": record.get_date_string(),
             "NodeId": record.node_id,
@@ -1159,10 +1202,6 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
 
     logger.info("Updating mgmt cluster")
     if cluster.mode == "docker":
-        sbcli=constants.SIMPLY_BLOCK_CLI_NAME
-        subprocess.check_call(f"pip install {sbcli} --upgrade".split(' '))
-        logger.info(f"{sbcli} upgraded")
-
         cluster_docker = utils.get_docker_client(cluster_id)
         logger.info(f"Pulling image {constants.SIMPLY_BLOCK_DOCKER_IMAGE}")
         pull_docker_image_with_retry(cluster_docker, constants.SIMPLY_BLOCK_DOCKER_IMAGE)
@@ -1176,37 +1215,52 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
         for service in cluster_docker.services.list():
             if image_parts in service.attrs['Spec']['Labels']['com.docker.stack.image'] or \
             "simplyblock" in service.attrs['Spec']['Labels']['com.docker.stack.image']:
-                logger.info(f"Updating service {service.name}")
-                service.update(image=service_image, force_update=True)
-                service_names.append(service.attrs['Spec']['Name'])
+                if service.name in ["app_CachingNodeMonitor", "app_CachedLVolStatsCollector"]:
+                    logger.info(f"Removing service {service.name}")
+                    service.remove()
+                else:
+                    logger.info(f"Updating service {service.name}")
+                    service.update(image=service_image, force_update=True)
+                    service_names.append(service.attrs['Spec']['Name'])
 
         if "app_SnapshotMonitor" not in service_names:
-            logger.info("Creating snapshot monitor service")
-            cluster_docker.services.create(
-                image=service_image,
-                command="python simplyblock_core/services/snapshot_monitor.py",
-                name="app_SnapshotMonitor",
-                mounts=["/etc/foundationdb:/etc/foundationdb"],
-                env=["SIMPLYBLOCK_LOG_LEVEL=DEBUG"],
-                networks=["host"],
-                constraints=["node.role == manager"]
-            )
+            utils.create_docker_service(
+                cluster_docker=cluster_docker,
+                service_name="app_SnapshotMonitor",
+                service_file="python simplyblock_core/services/snapshot_monitor.py",
+                service_image=service_image)
+
+        if "app_TasksRunnerLVolSyncDelete" not in service_names:
+            utils.create_docker_service(
+                cluster_docker=cluster_docker,
+                service_name="app_TasksRunnerLVolSyncDelete",
+                service_file="python simplyblock_core/services/tasks_runner_sync_lvol_del.py",
+                service_image=service_image)
+
+        if "app_TasksRunnerJCCompResume" not in service_names:
+            utils.create_docker_service(
+                cluster_docker=cluster_docker,
+                service_name="app_TasksRunnerJCCompResume",
+                service_file="python simplyblock_core/services/tasks_runner_jc_comp.py",
+                service_image=service_image)
+
         logger.info("Done updating mgmt cluster")
 
     elif cluster.mode == "kubernetes":
         utils.load_kube_config_with_fallback()
         apps_v1 = k8s_client.AppsV1Api()
-
+        namespace = constants.K8S_NAMESPACE
         image_without_tag = constants.SIMPLY_BLOCK_DOCKER_IMAGE.split(":")[0]
         image_parts = "/".join(image_without_tag.split("/")[-2:])
         service_image = mgmt_image or constants.SIMPLY_BLOCK_DOCKER_IMAGE
-
+        deployment_names = []
         # Update Deployments
-        deployments = apps_v1.list_namespaced_deployment(namespace=constants.K8S_NAMESPACE)
+        deployments = apps_v1.list_namespaced_deployment(namespace=namespace)
         for deploy in deployments.items:
             if deploy.metadata.name == constants.ADMIN_DEPLOY_NAME:
                 logger.info(f"Skipping deployment {deploy.metadata.name}")
                 continue
+            deployment_names.append(deploy.metadata.name)
             for c in deploy.spec.template.spec.containers:
                 if image_parts in c.image:
                     logger.info(f"Updating deployment {deploy.metadata.name} image to {service_image}")
@@ -1216,12 +1270,28 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
                     deploy.spec.template.metadata.annotations = annotations
                     apps_v1.patch_namespaced_deployment(
                         name=deploy.metadata.name,
-                        namespace=constants.K8S_NAMESPACE,
+                        namespace=namespace,
                         body={"spec": {"template": deploy.spec.template}}
                     )
 
+        if "simplyblock-tasks-runner-sync-lvol-del" not in deployment_names:
+            utils.create_k8s_service(
+                namespace=namespace,
+                deployment_name="simplyblock-tasks-runner-sync-lvol-del",
+                container_name="tasks-runner-sync-lvol-del",
+                service_file="simplyblock_core/services/tasks_runner_sync_lvol_del.py",
+                container_image=service_image)
+
+        if "simplyblock-snapshot-monitor" not in deployment_names:
+            utils.create_k8s_service(
+                namespace=namespace,
+                deployment_name="simplyblock-snapshot-monitor",
+                container_name="snapshot-monitor",
+                service_file="simplyblock_core/services/snapshot_monitor.py",
+                container_image=service_image)
+
         # Update DaemonSets
-        daemonsets = apps_v1.list_namespaced_daemon_set(namespace=constants.K8S_NAMESPACE)
+        daemonsets = apps_v1.list_namespaced_daemon_set(namespace=namespace)
         for ds in daemonsets.items:
             for c in ds.spec.template.spec.containers:
                 if image_parts in c.image:
@@ -1232,7 +1302,7 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
                     ds.spec.template.metadata.annotations = annotations
                     apps_v1.patch_namespaced_daemon_set(
                         name=ds.metadata.name,
-                        namespace=constants.K8S_NAMESPACE,
+                        namespace=namespace,
                         body={"spec": {"template": ds.spec.template}}
                         )
 
@@ -1270,7 +1340,12 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None,
                 logger.info(f"Restarting node: {node.get_id()} with SPDK image: {spdk_image}")
             else:
                 logger.info(f"Restarting node: {node.get_id()}")
-            storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image)
+            try:
+                storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image)
+            except Exception as e:
+                logger.debug(e)
+                logger.error(f"Failed to restart node: {node.get_id()}")
+                return
 
     logger.info("Done")
 
@@ -1329,3 +1404,30 @@ def set(cl_id, attr, value) -> None:
     logger.info(f"Setting {attr} to {value}")
     setattr(cluster, attr, value)
     cluster.write_to_db()
+
+
+def add_replication(source_cl_id, target_cl_id, timeout=0, target_pool=None) -> bool:
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(source_cl_id)
+    if not cluster:
+        raise ValueError(f"Cluster not found: {source_cl_id}")
+
+    target_cluster = db_controller.get_cluster_by_id(target_cl_id)
+    if not target_cluster:
+        raise ValueError(f"Target cluster not found: {target_cl_id}")
+
+    logger.info("Updating Cluster replication target")
+    cluster.snapshot_replication_target_cluster = target_cl_id
+    if target_pool:
+        pool = db_controller.get_pool_by_id(target_pool)
+        if not pool:
+            raise ValueError(f"Pool not found: {target_pool}")
+        if pool.status != Pool.STATUS_ACTIVE:
+            raise ValueError(f"Pool not active: {target_pool}")
+        cluster.snapshot_replication_target_pool = target_pool
+
+    if timeout and timeout > 0:
+        cluster.snapshot_replication_timeout = timeout
+    cluster.write_to_db()
+    logger.info("Done")
+    return True
diff --git a/simplyblock_core/constants.py b/simplyblock_core/constants.py
index 41824c73a..23cb100d8 100644
--- a/simplyblock_core/constants.py
+++ b/simplyblock_core/constants.py
@@ -27,7 +27,6 @@ def get_config_var(name, default=None):
 KVD_DB_FILE_PATH = os.getenv('FDB_CLUSTER_FILE', '/etc/foundationdb/fdb.cluster')
 KVD_DB_TIMEOUT_MS = 10000
 SPK_DIR = '/home/ec2-user/spdk'
-RPC_HTTP_PROXY_PORT = 8080
 LOG_LEVEL = logging.INFO
 LOG_WEB_LEVEL = logging.DEBUG
 LOG_WEB_DEBUG = True if LOG_WEB_LEVEL == logging.DEBUG else False
@@ -93,7 +92,7 @@ def get_config_var(name, default=None):
 MIN_SYS_MEMORY_FOR_LVOL = 524288000
 EXTRA_SMALL_POOL_COUNT = 4096
 EXTRA_LARGE_POOL_COUNT = 10240
-EXTRA_HUGE_PAGE_MEMORY = 1147483648
+EXTRA_HUGE_PAGE_MEMORY = 3221225472
 EXTRA_SYS_MEMORY = 0.10
 
 INSTANCE_STORAGE_DATA = {
@@ -133,12 +132,10 @@ def get_config_var(name, default=None):
 LVOL_NVME_CONNECT_NR_IO_QUEUES=3
 LVOL_NVME_KEEP_ALIVE_TO=10
 LVOL_NVME_KEEP_ALIVE_TO_TCP=7
-LVOL_NVMF_PORT_START=int(os.getenv('LVOL_NVMF_PORT_START', 9100))
 QPAIR_COUNT=32
 CLIENT_QPAIR_COUNT=3
 NVME_TIMEOUT_US=8000000
 NVMF_MAX_SUBSYSTEMS=50000
-HA_JM_COUNT=3
 KATO=10000
 ACK_TO=11
 BDEV_RETRY=0
@@ -157,15 +154,22 @@ def get_config_var(name, default=None):
 LINUX_DRV_MASS_STORAGE_ID = 1
 LINUX_DRV_MASS_STORAGE_NVME_TYPE_ID = 8
 
-NODE_NVMF_PORT_START=9060
 
-NODE_HUBLVOL_PORT_START=9030
 
 NODES_CONFIG_FILE = "/etc/simplyblock/sn_config_file"
 SYSTEM_INFO_FILE = "/etc/simplyblock/system_info"
 
 LVO_MAX_NAMESPACES_PER_SUBSYS=32
 
+CR_GROUP = "simplyblock.simplyblock.io"
+CR_VERSION  = "v1alpha1"
+
+GRAFANA_K8S_ENDPOINT = "http://simplyblock-grafana:3000"
+GRAYLOG_K8S_ENDPOINT = "http://simplyblock-graylog:9000"
+OS_K8S_ENDPOINT = "http://opensearch-cluster-master:9200"
+
+WEBAPI_K8S_ENDPOINT = "http://simplyblock-webappapi:5000/api/v2"
+
 K8S_NAMESPACE = os.getenv('K8S_NAMESPACE', 'simplyblock')
 OS_STATEFULSET_NAME = "simplyblock-opensearch"
 MONGODB_STATEFULSET_NAME = "simplyblock-mongo"
@@ -224,4 +228,14 @@ def get_config_var(name, default=None):
 
 qos_class_meta_and_migration_weight_percent = 25
 
-MIG_PARALLEL_JOBS = 16
\ No newline at end of file
+MIG_PARALLEL_JOBS = 64
+MIG_JOB_SIZE = 64
+
+# ports ranges
+RPC_PORT_RANGE_START = 8080
+NODE_NVMF_PORT_START=9060
+NODE_HUBLVOL_PORT_START=9030
+FW_PORT_START = 50001
+# todo(hamdy): make it configurable: sfam-2586
+LVOL_NVMF_PORT_ENV = os.getenv("LVOL_NVMF_PORT_START", "")
+LVOL_NVMF_PORT_START = int(LVOL_NVMF_PORT_ENV) if LVOL_NVMF_PORT_ENV else 9100
\ No newline at end of file
diff --git a/simplyblock_core/controllers/cluster_events.py b/simplyblock_core/controllers/cluster_events.py
index e8e6c406e..e201c53a9 100644
--- a/simplyblock_core/controllers/cluster_events.py
+++ b/simplyblock_core/controllers/cluster_events.py
@@ -4,6 +4,7 @@
 from simplyblock_core.controllers import events_controller as ec
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.models.events import EventObj
+from simplyblock_core import utils, constants
 
 logger = logging.getLogger()
 db_controller = DBController()
@@ -39,6 +40,15 @@ def cluster_status_change(cluster, new_state, old_status):
         caused_by=ec.CAUSED_BY_CLI,
         message=f"Cluster status changed from {old_status} to {new_state}")
 
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=cluster.cr_plural,
+            namespace=cluster.cr_namespace,
+            name=cluster.cr_name,
+            status_patch={"status": new_state})
+
 
 def _cluster_cap_event(cluster, msg, event_level):
     return ec.log_event_cluster(
@@ -80,3 +90,21 @@ def cluster_delete(cluster):
         db_object=cluster,
         caused_by=ec.CAUSED_BY_CLI,
         message=f"Cluster deleted {cluster.get_id()}")
+
+
+def cluster_rebalancing_change(cluster, new_state, old_status):
+    ec.log_event_cluster(
+        cluster_id=cluster.get_id(),
+        domain=ec.DOMAIN_CLUSTER,
+        event=ec.EVENT_STATUS_CHANGE,
+        db_object=cluster,
+        caused_by=ec.CAUSED_BY_CLI,
+        message=f"Cluster rebalancing changed from {old_status} to {new_state}")
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=cluster.cr_plural,
+            namespace=cluster.cr_namespace,
+            name=cluster.cr_name,
+            status_patch={"rebalancing": new_state})
diff --git a/simplyblock_core/controllers/device_controller.py b/simplyblock_core/controllers/device_controller.py
index 8e684c942..b51801302 100644
--- a/simplyblock_core/controllers/device_controller.py
+++ b/simplyblock_core/controllers/device_controller.py
@@ -1,13 +1,15 @@
 import time
 import logging
+import uuid
 
 from simplyblock_core import distr_controller, utils, storage_node_ops
 from simplyblock_core.controllers import device_events, tasks_controller
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice
 from simplyblock_core.models.storage_node import StorageNode
+from simplyblock_core.prom_client import PromClient
 from simplyblock_core.rpc_client import RPCClient
-
+from simplyblock_core.snode_client import SNodeClient
 
 logger = logging.getLogger()
 
@@ -68,7 +70,9 @@ def device_set_state(device_id, state):
         for node in snodes:
             if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
                 continue
-            node.remote_devices = storage_node_ops._connect_to_remote_devs(node)
+            remote_devices = storage_node_ops._connect_to_remote_devs(node)
+            node = db_controller.get_storage_node_by_id(node.get_id())
+            node.remote_devices = remote_devices
             node.write_to_db()
 
     distr_controller.send_dev_status_event(device, device.status)
@@ -121,7 +125,7 @@ def get_alceml_name(alceml_id):
     return f"alceml_{alceml_id}"
 
 
-def _def_create_device_stack(device_obj, snode, force=False):
+def _def_create_device_stack(device_obj, snode, force=False, clear_data=False):
     db_controller = DBController()
 
     rpc_client = RPCClient(
@@ -155,7 +159,7 @@ def _def_create_device_stack(device_obj, snode, force=False):
     if alceml_name not in bdev_names:
         ret = snode.create_alceml(
             alceml_name, nvme_bdev, alceml_id,
-            pba_init_mode=2,
+            pba_init_mode=3 if clear_data else 2,
             write_protection=cluster.distr_ndcs > 1,
             pba_page_size=cluster.page_size_in_blocks,
             full_page_unmap=cluster.full_page_unmap
@@ -240,6 +244,10 @@ def restart_device(device_id, force=False):
             device_obj = dev
             break
 
+    if not device_obj:
+        logger.error("device not found")
+        return False
+
     task_id = tasks_controller.get_active_dev_restart_task(snode.cluster_id, device_id)
     if task_id:
         logger.error(f"Restart task found: {task_id}, can not restart device")
@@ -250,6 +258,17 @@ def restart_device(device_id, force=False):
     device_set_retries_exhausted(device_id, True)
     device_set_unavailable(device_id)
 
+    if not snode.rpc_client().bdev_nvme_controller_list(device_obj.nvme_controller):
+        try:
+            ret = SNodeClient(snode.api_endpoint, timeout=30, retry=1).bind_device_to_spdk(device_obj.pcie_address)
+            logger.debug(ret)
+            snode.rpc_client().bdev_nvme_controller_attach(device_obj.nvme_controller, device_obj.pcie_address)
+            snode.rpc_client().bdev_examine(f"{device_obj.nvme_controller}n1")
+            snode.rpc_client().bdev_wait_for_examine()
+        except Exception as e:
+            logger.error(e)
+            return False
+
     ret = _def_create_device_stack(device_obj, snode, force=force)
 
     if not ret:
@@ -263,22 +282,33 @@ def restart_device(device_id, force=False):
     device_set_online(device_id)
     device_events.device_restarted(device_obj)
 
-    # add to jm raid
-    if snode.jm_device and snode.jm_device.raid_bdev and snode.jm_device.status != JMDevice.STATUS_REMOVED:
-        # looking for jm partition
-        rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
-        jm_dev_part = f"{dev.nvme_bdev[:-1]}1"
-        ret = rpc_client.get_bdevs(jm_dev_part)
-        if ret:
-            logger.info(f"JM part found: {jm_dev_part}")
+    if snode.jm_device and snode.jm_device.status != JMDevice.STATUS_REMOVED:
+        if not snode.jm_device.raid_bdev:
             if snode.jm_device.status == JMDevice.STATUS_UNAVAILABLE:
-                restart_jm_device(snode.jm_device.get_id(), force=True)
-
-            if snode.jm_device.status == JMDevice.STATUS_ONLINE and \
-                    jm_dev_part not in snode.jm_device.jm_nvme_bdev_list:
-                remove_jm_device(snode.jm_device.get_id(), force=True)
-                time.sleep(3)
-                restart_jm_device(snode.jm_device.get_id(), force=True)
+                set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE)
+        else:
+            # looking for jm partition
+            rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
+            jm_dev_part = f"{dev.nvme_bdev[:-1]}1"
+            ret = rpc_client.get_bdevs(jm_dev_part)
+            if ret:
+                logger.info(f"JM part found: {jm_dev_part}")
+                if snode.jm_device.status == JMDevice.STATUS_UNAVAILABLE:
+                    if snode.rpc_client().get_bdevs(snode.jm_device.raid_bdev):
+                        logger.info("Raid found, setting jm device online")
+                        ret = snode.rpc_client().bdev_raid_get_bdevs()
+                        has_bdev = any(
+                            bdev["name"] == jm_dev_part
+                            for raid in ret
+                            for bdev in raid.get("base_bdevs_list", [])
+                        )
+                        if not has_bdev:
+                            logger.info(f"Adding to raid: {jm_dev_part}")
+                            snode.rpc_client().bdev_raid_add_base_bdev(snode.jm_device.raid_bdev, jm_dev_part)
+                        set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE)
+                    else:
+                        logger.info("Raid not found, restarting jm device")
+                        restart_jm_device(snode.jm_device.get_id(), force=True)
 
     return "Done"
 
@@ -337,15 +367,25 @@ def device_remove(device_id, force=True):
         logger.error(e)
         return False
 
+    device = None
     for dev in snode.nvme_devices:
         if dev.get_id() == device_id:
             device = dev
             break
 
-    if device.status in [NVMeDevice.STATUS_REMOVED, NVMeDevice.STATUS_FAILED]:
-        logger.error(f"Unsupported device status: {device.status}")
+    if not device:
+        logger.error("device not found")
         return False
 
+    if device.status == NVMeDevice.STATUS_REMOVED:
+        return True
+
+    if device.status in [NVMeDevice.STATUS_FAILED, NVMeDevice.STATUS_FAILED_AND_MIGRATED,
+                         NVMeDevice.STATUS_NEW]:
+        logger.error(f"Unsupported device status: {device.status}")
+        if force is False:
+            return False
+
     task_id = tasks_controller.get_active_dev_restart_task(snode.cluster_id, device_id)
     if task_id:
         logger.error(f"Restart task found: {task_id}, can not remove device")
@@ -359,33 +399,46 @@ def device_remove(device_id, force=True):
     distr_controller.disconnect_device(device)
 
     logger.info("Removing device fabric")
-    rpc_client = RPCClient(
-        snode.mgmt_ip, snode.rpc_port,
-        snode.rpc_username, snode.rpc_password)
+    rpc_client = snode.rpc_client()
+    node_bdev = {}
+    ret = rpc_client.get_bdevs()
+    if ret:
+        for b in ret:
+            node_bdev[b['name']] = b
+            for al in b['aliases']:
+                node_bdev[al] = b
+
+    if rpc_client.subsystem_list(device.nvmf_nqn):
+        logger.info("Removing device subsystem")
+        ret = rpc_client.subsystem_delete(device.nvmf_nqn)
+        if not ret:
+            logger.error(f"Failed to remove subsystem: {device.nvmf_nqn}")
+            if not force:
+                return False
 
-    ret = rpc_client.subsystem_delete(device.nvmf_nqn)
-    if not ret:
-        logger.error(f"Failed to remove subsystem: {device.nvmf_nqn}")
-        if not force:
-            return False
+    if f"{device.alceml_bdev}_PT" in node_bdev or force:
+        logger.info("Removing device PT")
+        ret = rpc_client.bdev_PT_NoExcl_delete(f"{device.alceml_bdev}_PT")
+        if not ret:
+            logger.error(f"Failed to remove bdev: {device.alceml_bdev}_PT")
+            if not force:
+                return False
 
-    logger.info("Removing device bdevs")
-    ret = rpc_client.bdev_PT_NoExcl_delete(f"{device.alceml_bdev}_PT")
-    if not ret:
-        logger.error(f"Failed to remove bdev: {device.alceml_bdev}_PT")
-        if not force:
-            return False
-    ret = rpc_client.bdev_alceml_delete(device.alceml_bdev)
-    if not ret:
-        logger.error(f"Failed to remove bdev: {device.alceml_bdev}")
-        if not force:
-            return False
-    ret = rpc_client.qos_vbdev_delete(device.qos_bdev)
-    if not ret:
-        logger.error(f"Failed to remove bdev: {device.qos_bdev}")
-        if not force:
-            return False
-    if snode.enable_test_device:
+    if device.alceml_bdev in node_bdev or force:
+        ret = rpc_client.bdev_alceml_delete(device.alceml_bdev)
+        if not ret:
+            logger.error(f"Failed to remove bdev: {device.alceml_bdev}")
+            if not force:
+                return False
+
+    if device.qos_bdev in node_bdev or force:
+        ret = rpc_client.qos_vbdev_delete(device.qos_bdev)
+        if not ret:
+            logger.error(f"Failed to remove bdev: {device.qos_bdev}")
+            if not force:
+                return False
+
+    if snode.enable_test_device and device.testing_bdev in node_bdev or force:
         ret = rpc_client.bdev_passtest_delete(device.testing_bdev)
         if not ret:
             logger.error(f"Failed to remove bdev: {device.testing_bdev}")
@@ -394,8 +447,9 @@ def device_remove(device_id, force=True):
 
     device_set_state(device_id, NVMeDevice.STATUS_REMOVED)
 
-    # remove device from jm raid
-    if snode.jm_device.raid_bdev:
+    if not snode.jm_device.raid_bdev:
+        remove_jm_device(snode.jm_device.get_id())
+    else:
         nvme_controller = device.nvme_controller
         dev_to_remove = None
         for part in snode.jm_device.jm_nvme_bdev_list:
@@ -404,11 +458,49 @@ def device_remove(device_id, force=True):
                 break
 
         if dev_to_remove:
-            if snode.jm_device.status == NVMeDevice.STATUS_ONLINE:
-                remove_jm_device(snode.jm_device.get_id(), force=True)
-                time.sleep(3)
+            raid_found = False
+            for raid_info in rpc_client.bdev_raid_get_bdevs():
+                if raid_info["name"] == snode.jm_device.raid_bdev:
+                    raid_found = True
+                    base_bdevs = raid_info.get("base_bdevs_list", [])
+                    if any(bdev["name"] == dev_to_remove for bdev in base_bdevs):
+                        remove_from_jm_device(snode.jm_device.get_id(), dev_to_remove)
+            if not raid_found:
+                set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_UNAVAILABLE)
+
+    return True
 
-            restart_jm_device(snode.jm_device.get_id(), force=True)
+
+def remove_from_jm_device(device_id, jm_bdev):
+    db_controller = DBController()
+
+    try:
+        snode = get_storage_node_by_jm_device(db_controller, device_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    if snode.status == StorageNode.STATUS_ONLINE:
+        rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
+
+        if snode.jm_device.raid_bdev:
+            logger.info("device part of raid1: only remove from raid")
+            try:
+                has_any = False
+                for raid_info in rpc_client.bdev_raid_get_bdevs():
+                    if raid_info["name"] == snode.jm_device.raid_bdev:
+                        base_bdevs = raid_info.get("base_bdevs_list", [])
+                        if any(bdev["name"] and bdev["name"] != jm_bdev for bdev in base_bdevs):
+                            has_any = True
+                if has_any:
+                    rpc_client.bdev_raid_remove_base_bdev(jm_bdev)
+                    return True
+                else:
+                    set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_UNAVAILABLE)
+
+            except KeyError as e:
+                logger.error(e)
+                return False
 
     return True
 
@@ -438,9 +530,9 @@ def get_device_capacity(device_id, history, records_count=20, parse_sizes=True):
         if not records_number:
             return False
     else:
-        records_number = 20
+        records_number = records_count
 
-    records = db_controller.get_device_capacity(device, records_number)
+    # records = db_controller.get_device_capacity(device, records_number)
     cap_stats_keys = [
         "date",
         "size_total",
@@ -448,6 +540,8 @@ def get_device_capacity(device_id, history, records_count=20, parse_sizes=True):
         "size_free",
         "size_util",
     ]
+    prom_client = PromClient(device.cluster_id)
+    records = prom_client.get_device_metrics(device_id, cap_stats_keys, history)
     records_list = utils.process_records(records, records_count, keys=cap_stats_keys)
 
     if not parse_sizes:
@@ -474,15 +568,6 @@ def get_device_iostats(device_id, history, records_count=20, parse_sizes=True):
         logger.error("device not found")
         return False
 
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    records_list = db_controller.get_device_stats(device, records_number)
     io_stats_keys = [
         "date",
         "read_bytes",
@@ -496,8 +581,10 @@ def get_device_iostats(device_id, history, records_count=20, parse_sizes=True):
         "write_io_ps",
         "write_latency_ps",
     ]
+    prom_client = PromClient(device.cluster_id)
+    records = prom_client.get_device_metrics(device_id, io_stats_keys, history)
     # combine records
-    new_records = utils.process_records(records_list, records_count, keys=io_stats_keys)
+    new_records = utils.process_records(records, records_count, keys=io_stats_keys)
 
     if not parse_sizes:
         return new_records
@@ -591,14 +678,15 @@ def device_set_failed(device_id):
         logger.error(e)
         return False
 
+    if dev.status != NVMeDevice.STATUS_REMOVED:
+        logger.error(f"Device must be in removed status, current status: {dev.status}")
+        return False
+
     task_id = tasks_controller.get_active_dev_restart_task(snode.cluster_id, device_id)
     if task_id:
         logger.error(f"Restart task found: {task_id}, can not fail device")
         return False
 
-    if dev.status == NVMeDevice.STATUS_FAILED:
-        return True
-
     ret = device_set_state(device_id, NVMeDevice.STATUS_FAILED)
     if not ret:
         logger.warning("Failed to set device state to failed")
@@ -608,6 +696,7 @@ def device_set_failed(device_id):
             rpc_client.distr_replace_id_in_map_prob(dev.cluster_device_order, -1)
 
     tasks_controller.add_device_failed_mig_task(device_id)
+    return True
 
 
 def add_device(device_id, add_migration_task=True):
@@ -623,14 +712,18 @@ def add_device(device_id, add_migration_task=True):
         logger.error("Device must be in new state")
         return False
 
+    device_obj = None
     for dev in snode.nvme_devices:
         if dev.get_id() == device_id:
             device_obj = dev
             break
 
+    if not device_obj:
+        logger.error("device not found")
+        return False
+
     logger.info(f"Adding device {device_id}")
-    # if snode.num_partitions_per_dev == 0 or device_obj.is_partition:
-    ret = _def_create_device_stack(device_obj, snode, force=True)
+    ret = _def_create_device_stack(device_obj, snode, force=True, clear_data=True)
     if not ret:
         logger.error("Failed to create device stack")
         return False
@@ -657,81 +750,6 @@ def add_device(device_id, add_migration_task=True):
         tasks_controller.add_new_device_mig_task(device_id)
     return device_id
 
-    #
-    # # create partitions
-    # partitions = snode.num_partitions_per_dev
-    # rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
-    # # look for partitions
-    # partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj)
-    # logger.debug("partitioned_devices")
-    # logger.debug(partitioned_devices)
-    # if len(partitioned_devices) == partitions+1:
-    #     logger.info("Partitioned devices found")
-    # else:
-    #     logger.info(f"Creating partitions for {device_obj.nvme_bdev}")
-    #     storage_node_ops._create_device_partitions(rpc_client, device_obj, snode, partitions, snode.jm_percent)
-    #     partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj)
-    #     if len(partitioned_devices) == partitions+1:
-    #         logger.info("Device partitions created")
-    #     else:
-    #         logger.error("Failed to create partitions")
-    #         return False
-    #
-    # jm_part = partitioned_devices.pop(0)
-    # new_devices = []
-    # dev_order = storage_node_ops.get_next_cluster_device_order(db_controller, snode.cluster_id)
-    # for dev in partitioned_devices:
-    #     new_device = storage_node_ops._create_storage_device_stack(rpc_client, dev, snode, after_restart=False)
-    #     if not new_device:
-    #         logger.error("failed to create dev stack")
-    #         continue
-    #
-    #     new_device.cluster_device_order = dev_order
-    #     dev_order += 1
-    #     device_events.device_create(new_device)
-    #     new_devices.append(new_device)
-    #
-    # if new_devices:
-    #     snode.nvme_devices.remove(device_obj)
-    #     snode.nvme_devices.extend(new_devices)
-    #     snode.write_to_db(db_controller.kv_store)
-    # else:
-    #     logger.error("failed to create devices")
-    #     return False
-    #
-    # for dev in new_devices:
-    #     distr_controller.send_cluster_map_add_device(dev, snode)
-    #
-    # logger.info("Make other nodes connect to the node devices")
-    # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id)
-    # for node in snodes:
-    #     if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
-    #         continue
-    #     node.remote_devices = storage_node_ops._connect_to_remote_devs(node)
-    #     node.write_to_db()
-    #     for dev in new_devices:
-    #         distr_controller.send_cluster_map_add_device(dev, node)
-    #
-    # for dev in new_devices:
-    #     tasks_controller.add_new_device_mig_task(dev.get_id())
-    #
-    # # add to jm raid
-    # if snode.jm_device and snode.jm_device.raid_bdev and jm_part:
-    #     # looking for jm partition
-    #     jm_dev_part = jm_part.nvme_bdev
-    #     ret = rpc_client.get_bdevs(jm_dev_part)
-    #     if ret:
-    #         logger.info(f"JM part found: {jm_dev_part}")
-    #         if snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_REMOVED]:
-    #             restart_jm_device(snode.jm_device.get_id(), force=True, format_alceml=True)
-    #
-    #         if snode.jm_device.status == JMDevice.STATUS_ONLINE and \
-    #                 jm_dev_part not in snode.jm_device.jm_nvme_bdev_list:
-    #             remove_jm_device(snode.jm_device.get_id(), force=True)
-    #             restart_jm_device(snode.jm_device.get_id(), force=True)
-    #
-    # return "Done"
-
 
 def device_set_failed_and_migrated(device_id):
     db_controller = DBController()
@@ -924,3 +942,58 @@ def restart_jm_device(device_id, force=False, format_alceml=False):
                 set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE)
 
     return True
+
+
+def new_device_from_failed(device_id):
+    db_controller = DBController()
+    device = None
+    device_node = None
+    for node in db_controller.get_storage_nodes():
+        for dev in node.nvme_devices:
+            if dev.get_id() == device_id:
+                device = dev
+                device_node = node
+                break
+
+    if not device:
+        logger.info(f"Device not found: {device_id}")
+        return False
+
+    if not device_node:
+        logger.info("node not found")
+        return False
+
+    if device.status != NVMeDevice.STATUS_FAILED_AND_MIGRATED:
+        logger.error(f"Device status: {device.status} but expected status is {NVMeDevice.STATUS_FAILED_AND_MIGRATED}")
+        return False
+
+    if device.serial_number.endswith("_failed"):
+        logger.error("Device is already added back from failed")
+        return False
+
+    if not device_node.rpc_client().bdev_nvme_controller_list(device.nvme_controller):
+        try:
+            ret = SNodeClient(device_node.api_endpoint, timeout=30, retry=1).bind_device_to_spdk(device.pcie_address)
+            logger.debug(ret)
+            device_node.rpc_client().bdev_nvme_controller_attach(device.nvme_controller, device.pcie_address)
+        except Exception as e:
+            logger.error(e)
+            return False
+
+    if not device_node.rpc_client().bdev_nvme_controller_list(device.nvme_controller):
+        logger.error(f"Failed to find device nvme controller {device.nvme_controller}")
+        return False
+
+    new_device = NVMeDevice(device.to_dict())
+    new_device.uuid = str(uuid.uuid4())
+    new_device.status = NVMeDevice.STATUS_NEW
+    new_device.cluster_device_order = -1
+    new_device.deleted = False
+    new_device.io_error = False
+    new_device.retries_exhausted = False
+    device_node.nvme_devices.append(new_device)
+
+    device.serial_number = f"{device.serial_number}_failed"
+    device_node.write_to_db(db_controller.kv_store)
+    logger.info(f"New device created from failed device: {device_id}, new device id: {new_device.get_id()}")
+    return new_device.get_id()
\ No newline at end of file
diff --git a/simplyblock_core/controllers/device_events.py b/simplyblock_core/controllers/device_events.py
index f2e1e959d..1f5ee881a 100644
--- a/simplyblock_core/controllers/device_events.py
+++ b/simplyblock_core/controllers/device_events.py
@@ -3,6 +3,8 @@
 
 from simplyblock_core.controllers import events_controller as ec
 from simplyblock_core.db_controller import DBController
+from simplyblock_core.models.nvme_device import NVMeDevice
+from simplyblock_core import utils, constants
 
 logger = logging.getLogger()
 
@@ -20,6 +22,24 @@ def _device_event(device, message, caused_by, event):
         node_id=device.get_id(),
         storage_id=device.cluster_device_order)
 
+    cluster = db_controller.get_cluster_by_id(snode.cluster_id)
+    if cluster.mode == "kubernetes":
+        total_devices = len(snode.nvme_devices)
+        online_devices = 0
+        for dev in snode.nvme_devices:
+            if dev.status == NVMeDevice.STATUS_ONLINE:
+                online_devices += 1
+        utils.patch_cr_node_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=snode.cr_plural,
+            namespace=snode.cr_namespace,
+            name=snode.cr_name,
+            node_uuid=snode.get_id(),
+            node_mgmt_ip=snode.mgmt_ip,
+            updates={"devices": f"{total_devices}/{online_devices}"},
+        )
+
 
 def device_create(device, caused_by=ec.CAUSED_BY_CLI):
     _device_event(device, f"Device created: {device.get_id()}", caused_by, ec.EVENT_OBJ_CREATED)
diff --git a/simplyblock_core/controllers/health_controller.py b/simplyblock_core/controllers/health_controller.py
index c013e2d58..fb0444348 100644
--- a/simplyblock_core/controllers/health_controller.py
+++ b/simplyblock_core/controllers/health_controller.py
@@ -9,7 +9,7 @@
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.fw_api_client import FirewallClient
 from simplyblock_core.models.cluster import Cluster
-from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice
+from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice
 from simplyblock_core.models.storage_node import StorageNode
 from simplyblock_core.rpc_client import RPCClient
 from simplyblock_core.snode_client import SNodeClient
@@ -18,7 +18,7 @@
 logger = utils.get_logger(__name__)
 
 
-def check_bdev(name, *, rpc_client=None, bdev_names=None):
+def check_bdev(name, *, rpc_client=None, bdev_names=None) -> bool:
     present = (
             ((bdev_names is not None) and (name in bdev_names)) or
             (rpc_client is not None and (rpc_client.get_bdevs(name) is not None))
@@ -27,7 +27,7 @@ def check_bdev(name, *, rpc_client=None, bdev_names=None):
     return present
 
 
-def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None):
+def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None) -> bool:
     if rpc_client:
         subsystem = subsystems[0] if (subsystems := rpc_client.subsystem_list(nqn)) is not None else None
     elif nqns:
@@ -59,7 +59,7 @@ def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None):
         for listener in listeners:
             logger.info(f"Checking listener {listener['traddr']}:{listener['trsvcid']} ... ok")
 
-    return bool(listeners) and namespaces
+    return bool(listeners) and bool(namespaces)
 
 
 def check_cluster(cluster_id):
@@ -109,15 +109,17 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password, timeout=5, ret
         ret = rpc_client.get_version()
         if ret:
             logger.debug(f"SPDK version: {ret['version']}")
-            return True
+            return True, True
+        else:
+            return True, False
     except Exception as e:
         logger.debug(e)
-    return False
+    return False, False
 
 
 def _check_node_api(ip):
     try:
-        snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2)
+        snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2)
         logger.debug(f"Node API={ip}:5000")
         ret, _ = snode_api.is_live()
         logger.debug(f"snode is alive: {ret}")
@@ -128,43 +130,35 @@ def _check_node_api(ip):
     return False
 
 
-def _check_spdk_process_up(ip, rpc_port):
-    try:
-        snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2)
-        logger.debug(f"Node API={ip}:5000")
-        is_up, _ = snode_api.spdk_process_is_up(rpc_port)
-        logger.debug(f"SPDK is {is_up}")
-        return is_up
-    except Exception as e:
-        logger.debug(e)
-    return False
-
-
-def _check_port_on_node(snode, port_id):
-    try:
-        fw_api = FirewallClient(snode, timeout=5, retry=2)
-        iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port)
-        if type(iptables_command_output) is str:
-            iptables_command_output = [iptables_command_output]
-        for rules in iptables_command_output:
-            result = jc.parse('iptables', rules)
-            for chain in result:
-                if chain['chain'] in ["INPUT", "OUTPUT"]:  # type: ignore
-                    for rule in chain['rules']:  # type: ignore
-                        if str(port_id) in rule['options']:  # type: ignore
-                            action = rule['target']  # type: ignore
-                            if action in ["DROP"]:
-                                return False
-
-        # check RDMA port block
-        if snode.active_rdma:
-            rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma()
-            if port_id in rdma_fw_port_list:
-                return False
+def _check_spdk_process_up(ip, rpc_port, cluster_id):
+    snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2)
+    logger.debug(f"Node API={ip}:5000")
+    is_up, _ = snode_api.spdk_process_is_up(rpc_port, cluster_id)
+    logger.debug(f"SPDK is {is_up}")
+    return is_up
+
+
+def check_port_on_node(snode, port_id):
+    fw_api = FirewallClient(snode, timeout=5, retry=2)
+    iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port)
+    if type(iptables_command_output) is str:
+        iptables_command_output = [iptables_command_output]
+    for rules in iptables_command_output:
+        result = jc.parse('iptables', rules)
+        for chain in result:
+            if chain['chain'] in ["INPUT", "OUTPUT"]:  # type: ignore
+                for rule in chain['rules']:  # type: ignore
+                    if str(port_id) in rule['options']:  # type: ignore
+                        action = rule['target']  # type: ignore
+                        if action in ["DROP"]:
+                            return False
+
+    # check RDMA port block
+    if snode.active_rdma:
+        rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma()
+        if port_id in rdma_fw_port_list:
+            return False
 
-        return True
-    except Exception as e:
-        logger.error(e)
     return True
 
 
@@ -175,7 +169,7 @@ def _check_node_ping(ip):
     else:
         return False
 
-def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None):
+def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None) -> bool:
     if not node.hublvol:
         logger.error(f"Node {node.get_id()} does not have a hublvol")
         return False
@@ -235,15 +229,17 @@ def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns
                         passed = False
                 else:
                     lvs_info_dict.append({"Key": k, "Value": v, "expected": " "})
-            for line in utils.print_table(lvs_info_dict).splitlines():
-                logger.info(line)
+            if not passed:
+                for line in utils.print_table(lvs_info_dict).splitlines():
+                    logger.info(line)
 
     except Exception as e:
         logger.exception(e)
+        return False
     return passed
 
 
-def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False):
+def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False) -> bool:
     db_controller = DBController()
     try:
         primary_node = db_controller.get_storage_node_by_id(node.lvstore_stack_secondary_1)
@@ -294,6 +290,16 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N
             passed = bool(ret)
             logger.info(f"Checking controller: {primary_node.hublvol.bdev_name} ... {passed}")
 
+            node_bdev = {}
+            ret = rpc_client.get_bdevs()
+            if ret:
+                for b in ret:
+                    node_bdev[b['name']] = b
+                    for al in b['aliases']:
+                        node_bdev[al]= b
+            else:
+                node_bdev = []
+
         passed &= check_bdev(primary_node.hublvol.get_remote_bdev_name(), bdev_names=node_bdev)
         if not passed:
             return False
@@ -331,20 +337,20 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N
 
                 else:
                     lvs_info_dict.append({"Key": k, "Value": v, "expected": " "})
-            for line in utils.print_table(lvs_info_dict).splitlines():
-                logger.info(line)
+            if not passed:
+                for line in utils.print_table(lvs_info_dict).splitlines():
+                    logger.info(line)
     except Exception as e:
         logger.exception(e)
+        return False
     return passed
 
 
 def _check_node_lvstore(
-        lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None):
+        lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None) -> bool:
     db_controller = DBController()
-    lvstore_check = True
     logger.info(f"Checking distr stack on node : {node.get_id()}")
-    rpc_client = RPCClient(
-        node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=1)
+
     cluster = db_controller.get_cluster_by_id(node.cluster_id)
     if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
         auto_fix = False
@@ -367,12 +373,24 @@ def _check_node_lvstore(
             node_distribs_list = bdev["distribs_list"]
 
     if not node_bdev_names:
-        ret = rpc_client.get_bdevs()
+        try:
+            ret = node.rpc_client().get_bdevs()
+        except Exception as e:
+            logger.info(e)
+            return False
+
         if ret:
             node_bdev_names = [b['name'] for b in ret]
         else:
             node_bdev_names = []
 
+    nodes = {}
+    devices = {}
+    for n in db_controller.get_storage_nodes():
+        nodes[n.get_id()] = n
+        for dev in n.nvme_devices:
+            devices[dev.get_id()] = dev
+
     for distr in distribs_list:
         if distr in node_bdev_names:
             logger.info(f"Checking distr bdev : {distr} ... ok")
@@ -386,22 +404,34 @@ def _check_node_lvstore(
             for jm in jm_names:
                 logger.info(jm)
             logger.info("Checking Distr map ...")
-            ret = rpc_client.distr_get_cluster_map(distr)
+            try:
+                ret = node.rpc_client().distr_get_cluster_map(distr)
+            except Exception as e:
+                logger.info(f"Failed to get cluster map: {e}")
+                return False
             if not ret:
                 logger.error("Failed to get cluster map")
-                lvstore_check = False
+                return False
             else:
-                results, is_passed = distr_controller.parse_distr_cluster_map(ret)
+                results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices)
                 if results:
-                    logger.info(utils.print_table(results))
                     logger.info(f"Checking Distr map ... {is_passed}")
-                    if not is_passed and auto_fix:
+                    if is_passed:
+                        continue
+
+                    elif not auto_fix:
+                        return False
+
+                    else: #  is_passed is False and auto_fix is True
+                        logger.info(utils.print_table(results))
                         for result in results:
                             if result['Results'] == 'failed':
                                 if result['Kind'] == "Device":
                                     if result['Found Status']:
                                         dev = db_controller.get_storage_device_by_id(result['UUID'])
-                                        if dev.status == NVMeDevice.STATUS_ONLINE:
+                                        dev_node = db_controller.get_storage_node_by_id(dev.node_id)
+                                        if dev.status == NVMeDevice.STATUS_ONLINE and dev_node.status in [
+                                            StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
                                             try:
                                                 remote_bdev = storage_node_ops.connect_device(
                                                     f"remote_{dev.alceml_bdev}", dev, node,
@@ -413,44 +443,67 @@ def _check_node_lvstore(
                                                         if dev.get_id() == rem_dev.get_id():
                                                             continue
                                                         new_remote_devices.append(rem_dev)
-                                                    dev.remote_bdev = remote_bdev
-                                                    new_remote_devices.append(dev)
+
+                                                    remote_device = RemoteDevice()
+                                                    remote_device.uuid = dev.uuid
+                                                    remote_device.alceml_name = dev.alceml_name
+                                                    remote_device.node_id = dev.node_id
+                                                    remote_device.size = dev.size
+                                                    remote_device.status = NVMeDevice.STATUS_ONLINE
+                                                    remote_device.nvmf_multipath = dev.nvmf_multipath
+                                                    remote_device.remote_bdev = remote_bdev
+                                                    new_remote_devices.append(remote_device)
                                                     n.remote_devices = new_remote_devices
                                                     n.write_to_db()
                                                     distr_controller.send_dev_status_event(dev, dev.status, node)
                                             except Exception as e:
                                                 logger.error(f"Failed to connect to {dev.get_id()}: {e}")
+                                        else:
+                                            if dev_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
+                                                distr_controller.send_dev_status_event(dev, dev.status, node)
+
                                 if result['Kind'] == "Node":
                                     n = db_controller.get_storage_node_by_id(result['UUID'])
                                     distr_controller.send_node_status_event(n, n.status, node)
-                        ret = rpc_client.distr_get_cluster_map(distr)
+
+                        try:
+                            ret = node.rpc_client().distr_get_cluster_map(distr)
+                        except Exception as e:
+                            logger.error(e)
+                            return False
                         if not ret:
                             logger.error("Failed to get cluster map")
-                            lvstore_check = False
+                            return False
                         else:
-                            results, is_passed = distr_controller.parse_distr_cluster_map(ret)
+                            results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices)
                             logger.info(f"Checking Distr map ... {is_passed}")
+                            if not is_passed:
+                                return False
 
                 else:
                     logger.error("Failed to parse distr cluster map")
-                lvstore_check &= is_passed
+                    return False
         else:
             logger.info(f"Checking distr bdev : {distr} ... not found")
-            lvstore_check = False
+            return False
     if raid:
         if raid in node_bdev_names:
             logger.info(f"Checking raid bdev: {raid} ... ok")
         else:
             logger.info(f"Checking raid bdev: {raid} ... not found")
-            lvstore_check = False
+            return False
     if bdev_lvstore:
-        ret = rpc_client.bdev_lvol_get_lvstores(bdev_lvstore)
+        try:
+            ret = node.rpc_client().bdev_lvol_get_lvstores(bdev_lvstore)
+        except Exception as e:
+            logger.error(e)
+            return False
         if ret:
             logger.info(f"Checking lvstore: {bdev_lvstore} ... ok")
         else:
             logger.info(f"Checking lvstore: {bdev_lvstore} ... not found")
-            lvstore_check = False
-    return lvstore_check
+            return False
+    return True
 
 def check_node(node_id, with_devices=True):
     db_controller = DBController()
@@ -479,7 +532,7 @@ def check_node(node_id, with_devices=True):
     logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
 
     # 3- check node RPC
-    node_rpc_check = _check_node_rpc(
+    node_rpc_check, _ = _check_node_rpc(
         snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
     logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
 
@@ -493,13 +546,19 @@ def check_node(node_id, with_devices=True):
     if snode.lvstore_stack_secondary_1:
         try:
             n = db_controller.get_storage_node_by_id(snode.lvstore_stack_secondary_1)
-            lvol_port_check = _check_port_on_node(snode, n.lvol_subsys_port)
+            lvol_port_check = check_port_on_node(snode, n.lvol_subsys_port)
             logger.info(f"Check: node {snode.mgmt_ip}, port: {n.lvol_subsys_port} ... {lvol_port_check}")
         except KeyError:
-            pass
+            logger.error("node not found")
+        except Exception:
+            logger.error("Check node port failed, connection error")
+
     if not snode.is_secondary_node:
-        lvol_port_check = _check_port_on_node(snode, snode.lvol_subsys_port)
-        logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}")
+        try:
+            lvol_port_check = check_port_on_node(snode, snode.lvol_subsys_port)
+            logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}")
+        except Exception:
+            logger.error("Check node port failed, connection error")
 
     is_node_online = ping_check and node_api_check and node_rpc_check
 
@@ -722,17 +781,23 @@ def check_lvol_on_node(lvol_id, node_id, node_bdev_names=None, node_lvols_nqns=N
 
     if not node_bdev_names:
         node_bdev_names = {}
-        ret = rpc_client.get_bdevs()
-        if ret:
-            for bdev in ret:
-                node_bdev_names[bdev['name']] = bdev
+        try:
+            ret = rpc_client.get_bdevs()
+            if ret:
+                for bdev in ret:
+                    node_bdev_names[bdev['name']] = bdev
+        except Exception as e:
+            logger.error(f"Failed to connect to node's SPDK: {e}")
 
     if not node_lvols_nqns:
         node_lvols_nqns = {}
-        ret = rpc_client.subsystem_list()
-        if ret:
-            for sub in ret:
-                node_lvols_nqns[sub['nqn']] = sub
+        try:
+            ret = rpc_client.subsystem_list()
+            if ret:
+                for sub in ret:
+                    node_lvols_nqns[sub['nqn']] = sub
+        except Exception as e:
+            logger.error(f"Failed to connect to node's SPDK: {e}")
 
     passed = True
     try:
@@ -785,12 +850,14 @@ def check_snap(snap_id):
         return False
 
     snode = db_controller.get_storage_node_by_id(snap.lvol.node_id)
-    rpc_client = RPCClient(
-        snode.mgmt_ip, snode.rpc_port,
-        snode.rpc_username, snode.rpc_password, timeout=5, retry=1)
-
-    ret = rpc_client.get_bdevs(snap.snap_bdev)
-    return ret
+    check_primary = snode.rpc_client().get_bdevs(snap.snap_bdev)
+    logger.info(f"Checking snap bdev: {snap.snap_bdev} on node: {snap.lvol.node_id} is {bool(check_primary)}")
+    if snode.secondary_node_id:
+        secondary_node = db_controller.get_storage_node_by_id(snode.secondary_node_id)
+        check_secondary = secondary_node.rpc_client().get_bdevs(snap.snap_bdev)
+        logger.info(f"Checking snap bdev: {snap.snap_bdev} on node: {snode.secondary_node_id} is {bool(check_secondary)}")
+        return check_primary and check_secondary
+    return check_primary
 
 
 def check_jm_device(device_id):
diff --git a/simplyblock_core/controllers/lvol_controller.py b/simplyblock_core/controllers/lvol_controller.py
index 4d7a5aad3..f25f8cec7 100644
--- a/simplyblock_core/controllers/lvol_controller.py
+++ b/simplyblock_core/controllers/lvol_controller.py
@@ -1,4 +1,5 @@
 # coding=utf-8
+import copy
 import logging as lg
 import json
 import math
@@ -10,11 +11,15 @@
 from typing import List, Tuple
 
 from simplyblock_core import utils, constants
-from simplyblock_core.controllers import snapshot_controller, pool_controller, lvol_events
+from simplyblock_core.controllers import snapshot_controller, pool_controller, lvol_events, tasks_controller, \
+    snapshot_events
 from simplyblock_core.db_controller import DBController
+from simplyblock_core.models.cluster import Cluster
+from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.pool import Pool
 from simplyblock_core.models.lvol_model import LVol
 from simplyblock_core.models.storage_node import StorageNode
+from simplyblock_core.prom_client import PromClient
 from simplyblock_core.rpc_client import RPCClient
 
 logger = lg.getLogger()
@@ -139,36 +144,17 @@ def _get_next_3_nodes(cluster_id, lvol_size=0):
     for node in snodes:
         if node.is_secondary_node:  # pass
             continue
-
         if node.status == node.STATUS_ONLINE:
-
             lvol_count = len(db_controller.get_lvols_by_node_id(node.get_id()))
             if lvol_count >= node.max_lvol:
                 continue
-
-            # Validate Eligible nodes for adding lvol
-            # snode_api = SNodeClient(node.api_endpoint)
-            # result, _ = snode_api.info()
-            # memory_free = result["memory_details"]["free"]
-            # huge_free = result["memory_details"]["huge_free"]
-            # total_node_capacity = db_controller.get_snode_size(node.get_id())
-            # error = utils.validate_add_lvol_or_snap_on_node(memory_free, huge_free, node.max_lvol, lvol_size,  total_node_capacity, len(node.lvols))
-            # if error:
-            #     logger.warning(error)
-            #     continue
-            #
+            if node.lvol_sync_del():
+                logger.warning(f"LVol sync delete task found on node: {node.get_id()}, skipping")
+                continue
             online_nodes.append(node)
-            # node_stat_list = db_controller.get_node_stats(node, limit=1000)
-            # combined_record = utils.sum_records(node_stat_list)
             node_st = {
-                "lvol": lvol_count+1,
-                # "cpu": 1 + (node.cpu * node.cpu_hz),
-                # "r_io": combined_record.read_io_ps,
-                # "w_io": combined_record.write_io_ps,
-                # "r_b": combined_record.read_bytes_ps,
-                # "w_b": combined_record.write_bytes_ps
+                "lvol": lvol_count+1
             }
-
             node_stats[node.get_id()] = node_st
 
     if len(online_nodes) <= 1:
@@ -263,10 +249,11 @@ def validate_aes_xts_keys(key1: str, key2: str) -> Tuple[bool, str]:
     return True, ""
 
 
-def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp, use_crypto,
-                distr_vuid, max_rw_iops, max_rw_mbytes, max_r_mbytes, max_w_mbytes,
+def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp=False, use_crypto=False,
+                distr_vuid=0, max_rw_iops=0, max_rw_mbytes=0, max_r_mbytes=0, max_w_mbytes=0,
                 with_snapshot=False, max_size=0, crypto_key1=None, crypto_key2=None, lvol_priority_class=0,
-                uid=None, pvc_name=None, namespace=None, max_namespace_per_subsys=1, fabric="tcp", ndcs=0, npcs=0):
+                uid=None, pvc_name=None, namespace=None, max_namespace_per_subsys=1, fabric="tcp", ndcs=0, npcs=0,
+                do_replicate=False, replication_cluster_id=None):
 
     db_controller = DBController()
     logger.info(f"Adding LVol: {name}")
@@ -280,6 +267,9 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
                 host_node = nodes[0]
             else:
                 return False, f"Can not find storage node: {host_id_or_name}"
+        if host_node.lvol_sync_del():
+            logger.error(f"LVol sync deletion found on node: {host_node.get_id()}")
+            return False, f"LVol sync deletion found on node: {host_node.get_id()}"
 
     if namespace:
         try:
@@ -455,14 +445,12 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
         lvol.nqn = cl.nqn + ":lvol:" + lvol.uuid
         lvol.max_namespace_per_subsys = max_namespace_per_subsys
 
-    nodes = []
-    if host_node:
-        nodes.insert(0, host_node)
-    else:
+    if not host_node:
         nodes = _get_next_3_nodes(cl.get_id(), lvol.size)
         if not nodes:
             return False, "No nodes found with enough resources to create the LVol"
         host_node = nodes[0]
+
     s_node = db_controller.get_storage_node_by_id(host_node.secondary_node_id)
     attr_name = f"active_{fabric}"
     is_active_primary = getattr(host_node, attr_name)
@@ -484,6 +472,16 @@ def add_lvol_ha(name, size, host_id_or_name, ha_type, pool_id_or_name, use_comp,
     else:
         lvol.npcs = cl.distr_npcs
         lvol.ndcs = cl.distr_ndcs
+    lvol.do_replicate = bool(do_replicate)
+    if lvol.do_replicate:
+        if replication_cluster_id:
+            replication_cluster = db_controller.get_cluster_by_id(replication_cluster_id)
+            if not replication_cluster:
+                return False, f"Replication cluster not found: {replication_cluster_id}"
+        else:
+            replication_cluster_id = cl.snapshot_replication_target_cluster
+        random_nodes = _get_next_3_nodes(replication_cluster_id, lvol.size)
+        lvol.replication_node_id = random_nodes[0].get_id()
 
     lvol_count = len(db_controller.get_lvols_by_node_id(host_node.get_id()))
     if lvol_count > host_node.max_lvol:
@@ -731,7 +729,7 @@ def add_lvol_on_node(lvol, snode, is_primary=True):
                         return False, f"Failed to create listener for {lvol.get_id()}"
 
     logger.info("Add BDev to subsystem")
-    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid)
+    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid, lvol.ns_id, f"{lvol.vuid:016X}")
     if not ret:
         return False, "Failed to add bdev to subsystem"
     lvol.ns_id = int(ret)
@@ -775,7 +773,7 @@ def recreate_lvol_on_node(lvol, snode, ha_inode_self=0, ana_state=None):
 
     # if namespace_found is False:
     logger.info("Add BDev to subsystem")
-    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid)
+    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid, lvol.ns_id)
     # if not ret:
     #     return False, "Failed to add bdev to subsystem"
 
@@ -1225,7 +1223,8 @@ def list_lvols(is_json, cluster_id, pool_id_or_name, all=False):
             "IO Err": lvol.io_error,
             "Health": lvol.health_check,
             "NS ID": lvol.ns_id,
-            "Mode": mode
+            "Mode": mode,
+            "Replicated On": lvol.replication_node_id,
         }
         data.append(lvol_data)
 
@@ -1265,6 +1264,62 @@ def list_lvols_mem(is_json, is_csv):
         return utils.print_table(data)
 
 
+def get_replication_info(lvol_id_or_name):
+    db_controller = DBController()
+    lvol = None
+    for lv in db_controller.get_lvols():  # pass
+        if lv.get_id() == lvol_id_or_name or lv.lvol_name == lvol_id_or_name:
+            lvol = lv
+            break
+
+    if not lvol:
+        logger.error(f"LVol id or name not found: {lvol_id_or_name}")
+        return False
+
+    tasks = []
+    snaps = []
+    out = {
+        "last_snapshot_id": None,
+        "last_replication_time": None,
+        "last_replication_duration": None,
+        "replicated_count": None,
+        "snaps": None,
+        "tasks": None,
+    }
+    node = db_controller.get_storage_node_by_id(lvol.node_id)
+    for task in db_controller.get_job_tasks(node.cluster_id):
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            logger.debug(task)
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+
+            if snap.lvol.get_id() != lvol.get_id():
+                continue
+            snaps.append(snap)
+            tasks.append(task)
+
+    if tasks:
+        tasks = sorted(tasks, key=lambda x: x.date)
+        snaps = sorted(snaps, key=lambda x: x.created_at)
+        out["snaps"] = [s.to_dict() for s in snaps]
+        out["tasks"] = [t.to_dict() for t in tasks]
+        out["replicated_count"] = len(snaps)
+        last_task = tasks[-1]
+        last_snap = db_controller.get_snapshot_by_id(last_task.function_params["snapshot_id"])
+        out["last_snapshot_id"] = last_snap.get_id()
+        out["last_replication_time"] = last_task.updated_at
+        if "end_time" in last_task.function_params:
+            duration = utils.strfdelta_seconds(
+                last_task.function_params["end_time"] - last_task.function_params["start_time"])
+        else:
+            duration = utils.strfdelta_seconds(int(time.time()) - last_task.function_params["start_time"])
+        out["last_replication_duration"] = duration
+
+    return out
+
+
 def get_lvol(lvol_id_or_name, is_json):
     db_controller = DBController()
     lvol = None
@@ -1281,6 +1336,7 @@ def get_lvol(lvol_id_or_name, is_json):
 
     del data['nvme_dev']
 
+
     if is_json:
         return json.dumps(data, indent=2)
     else:
@@ -1296,6 +1352,16 @@ def connect_lvol(uuid, ctrl_loss_tmo=constants.LVOL_NVME_CONNECT_CTRL_LOSS_TMO):
         logger.error(e)
         return False
 
+    node = db_controller.get_storage_node_by_id(lvol.node_id)
+    cluster = db_controller.get_cluster_by_id(node.cluster_id)
+    if cluster.status == Cluster.STATUS_SUSPENDED and cluster.snapshot_replication_target_cluster:
+        logger.error("Cluster is suspended, looking for replicated lvol")
+        for lv in db_controller.get_lvols(cluster.snapshot_replication_target_cluster):
+            if lv.nqn == lvol.nqn:
+                logger.info(f"LVol with same nqn already exists on target cluster: {lv.get_id()}")
+                lvol = lv
+                break
+
     out = []
     nodes_ids = []
     if lvol.ha_type == 'single':
@@ -1380,6 +1446,10 @@ def resize_lvol(id, new_size):
 
     snode = db_controller.get_storage_node_by_id(lvol.node_id)
 
+    if snode.lvol_sync_del():
+        logger.error(f"LVol sync deletion found on node: {snode.get_id()}")
+        return False, f"LVol sync deletion found on node: {snode.get_id()}"
+
     logger.info(f"Resizing LVol: {lvol.get_id()}")
     logger.info(f"Current size: {utils.humanbytes(lvol.size)}, new size: {utils.humanbytes(new_size)}")
 
@@ -1521,19 +1591,11 @@ def get_capacity(lvol_uuid, history, records_count=20, parse_sizes=True):
     db_controller = DBController()
     try:
         lvol = db_controller.get_lvol_by_id(lvol_uuid)
+        pool = db_controller.get_pool_by_id(lvol.pool_uuid)
     except KeyError as e:
         logger.error(e)
         return False
 
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    records_list = db_controller.get_lvol_stats(lvol, limit=records_number)
     cap_stats_keys = [
         "date",
         "size_total",
@@ -1543,6 +1605,8 @@ def get_capacity(lvol_uuid, history, records_count=20, parse_sizes=True):
         "size_prov",
         "size_prov_util"
     ]
+    prom_client = PromClient(pool.cluster_id)
+    records_list = prom_client.get_lvol_metrics(lvol_uuid, cap_stats_keys, history)
     new_records = utils.process_records(records_list, records_count, keys=cap_stats_keys)
 
     if not parse_sizes:
@@ -1564,19 +1628,11 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si
     db_controller = DBController()
     try:
         lvol = db_controller.get_lvol_by_id(lvol_uuid)
+        pool = db_controller.get_pool_by_id(lvol.pool_uuid)
     except KeyError as e:
         logger.error(e)
         return False
 
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    records_list = db_controller.get_lvol_stats(lvol, limit=records_number)
     io_stats_keys = [
         "date",
         "read_bytes",
@@ -1587,7 +1643,6 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si
         "write_bytes_ps",
         "write_io_ps",
         "write_latency_ps",
-        "connected_clients",
     ]
     if with_sizes:
         io_stats_keys.extend(
@@ -1612,6 +1667,8 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si
                 "write_latency_ticks",
             ]
         )
+    prom_client = PromClient(pool.cluster_id)
+    records_list = prom_client.get_lvol_metrics(lvol_uuid, io_stats_keys, history)
     # combine records
     new_records = utils.process_records(records_list, records_count, keys=io_stats_keys)
 
@@ -1630,7 +1687,6 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si
             "Write speed": utils.humanbytes(record['write_bytes_ps']),
             "Write IOPS": record['write_io_ps'],
             "Write lat": record['write_latency_ps'],
-            "Con": record['connected_clients'],
         })
     return out
 
@@ -1765,3 +1821,447 @@ def inflate_lvol(lvol_id):
     else:
         logger.error(f"Failed to inflate LVol: {lvol_id}")
     return ret
+
+def replication_trigger(lvol_id):
+    # create snapshot and replicate it
+    db_controller = DBController()
+    lvol = db_controller.get_lvol_by_id(lvol_id)
+    node = db_controller.get_storage_node_by_id(lvol.node_id)
+    snapshot_controller.add(lvol_id, f"replication_{uuid.uuid4()}")
+
+    tasks = []
+    snaps = []
+    out = {
+        "lvol": lvol,
+        "last_snapshot_id": None,
+        "last_replication_time": None,
+        "last_replication_duration": None,
+        "replicated_count": None,
+        "snaps": None,
+        "tasks": None,
+    }
+    for task in db_controller.get_job_tasks(node.cluster_id):
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            logger.debug(task)
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+
+            if snap.lvol.get_id() != lvol_id:
+                continue
+            snaps.append(snap)
+            tasks.append(task)
+
+    if tasks:
+        tasks = sorted(tasks, key=lambda x: x.date)
+        snaps = sorted(snaps, key=lambda x: x.created_at)
+        out["snaps"] = snaps
+        out["tasks"] = tasks
+        out["replicated_count"] = len(snaps)
+        last_task = tasks[-1]
+        last_snap = db_controller.get_snapshot_by_id(last_task.function_params["snapshot_id"])
+        out["last_snapshot_id"] = last_snap.get_id()
+        out["last_replication_time"] = last_task.updated_at
+        duration = 0
+        if "start_time" in last_task.function_params:
+            if "end_time" in last_task.function_params:
+                duration = utils.strfdelta_seconds(
+                    last_task.function_params["end_time"] - last_task.function_params["start_time"])
+            else:
+                duration = utils.strfdelta_seconds(int(time.time()) - last_task.function_params["start_time"])
+        out["last_replication_duration"] = duration
+
+    return out
+
+def replication_start(lvol_id, replication_cluster_id=None):
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    lvol.do_replicate = True
+    if not lvol.replication_node_id:
+        excluded_nodes = []
+        if lvol.cloned_from_snap:
+            lvol_snap = db_controller.get_snapshot_by_id(lvol.cloned_from_snap)
+            if lvol_snap.source_replicated_snap_uuid:
+                org_snap = db_controller.get_snapshot_by_id(lvol_snap.source_replicated_snap_uuid)
+                excluded_nodes.append(org_snap.lvol.node_id)
+        snode = db_controller.get_storage_node_by_id(lvol.node_id)
+        cluster = db_controller.get_cluster_by_id(snode.cluster_id)
+        if not replication_cluster_id:
+            replication_cluster_id = cluster.snapshot_replication_target_cluster
+        if not replication_cluster_id:
+            logger.error(f"Cluster: {snode.cluster_id} not replicated")
+            return False
+        random_nodes = _get_next_3_nodes(replication_cluster_id, lvol.size)
+        for r_node in random_nodes:
+            if r_node.get_id() not in excluded_nodes:
+                logger.info(f"Replicating on node: {r_node.get_id()}")
+                lvol.replication_node_id = r_node.get_id()
+                lvol.write_to_db()
+                break
+        if not lvol.replication_node_id:
+            logger.error(f"Replication node not found for lvol: {lvol.get_id()}")
+            return False
+    logger.info("Setting LVol do_replicate: True")
+
+    for snap in db_controller.get_snapshots():
+        if snap.lvol.uuid == lvol.uuid:
+            if not snap.target_replicated_snap_uuid:
+                task = tasks_controller.add_snapshot_replication_task(snap.cluster_id, snap.lvol.node_id, snap.get_id())
+                if task:
+                    snapshot_events.replication_task_created(snap)
+    return True
+
+
+def replication_stop(lvol_id, delete=False):
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    logger.info("Setting LVol do_replicate: False")
+    lvol.do_replicate = False
+    lvol.write_to_db()
+
+    snode = db_controller.get_storage_node_by_id(lvol.node_id)
+    tasks = db_controller.get_job_tasks(snode.cluster_id)
+
+
+    for task in tasks:
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION and task.status != JobSchedule.STATUS_DONE:
+            snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            if snap.lvol.uuid == lvol.uuid:
+                tasks_controller.cancel_task(task.uuid)
+
+    return True
+
+
+def replicate_lvol_on_target_cluster(lvol_id):
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    if not lvol.replication_node_id:
+        logger.error(f"LVol: {lvol_id} replication node id not found")
+        return False
+
+    target_node = db_controller.get_storage_node_by_id(lvol.replication_node_id)
+    if not target_node:
+        logger.error(f"Node not found: {lvol.replication_node_id}")
+        return False
+
+    if target_node.status != StorageNode.STATUS_ONLINE:
+        logger.error(f"Node is not online!: {target_node}, status: {target_node.status}")
+        return False
+
+    source_node = db_controller.get_storage_node_by_id(lvol.node_id)
+    source_cluster = db_controller.get_cluster_by_id(source_node.cluster_id)
+
+    for lv in db_controller.get_lvols(source_cluster.snapshot_replication_target_cluster):
+        if lv.nqn == lvol.nqn:
+            logger.info(f"LVol with same nqn already exists on target cluster: {lv.get_id()}")
+            return lv.get_id()
+
+    snaps = []
+    snapshot = None
+    for task in db_controller.get_job_tasks(source_node.cluster_id):
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            logger.debug(task)
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+
+            if snap.lvol.get_id() != lvol_id:
+                continue
+            snaps.append(snap)
+
+    if snaps:
+        snaps = sorted(snaps, key=lambda x: x.created_at)
+        last_snapshot = snaps[-1]
+        rep_snap = db_controller.get_snapshot_by_id(last_snapshot.target_replicated_snap_uuid)
+        snapshot = rep_snap
+
+    if not snapshot:
+        logger.error(f"Snapshot for replication not found for lvol: {lvol_id}")
+        return False
+
+    # create lvol on target node
+    new_lvol = copy.deepcopy(lvol)
+    new_lvol.uuid = str(uuid.uuid4())
+    new_lvol.create_dt = str(datetime.now())
+    new_lvol.node_id = target_node.get_id()
+    new_lvol.nodes = [target_node.get_id(), target_node.secondary_node_id]
+    new_lvol.replication_node_id = ""
+    new_lvol.do_replicate = False
+    new_lvol.cloned_from_snap = snapshot.get_id()
+    new_lvol.pool_uuid = source_cluster.snapshot_replication_target_pool
+    new_lvol.lvs_name = target_node.lvstore
+    new_lvol.top_bdev = f"{new_lvol.lvs_name}/{new_lvol.lvol_bdev}"
+    new_lvol.snapshot_name = snapshot.snap_bdev
+    new_lvol.status = LVol.STATUS_IN_CREATION
+
+    new_lvol.bdev_stack = [
+        {
+            "type": "bdev_lvol_clone",
+            "name": new_lvol.top_bdev,
+            "params": {
+                "snapshot_name": snapshot.snap_bdev,
+                "clone_name": new_lvol.lvol_bdev
+            }
+        }
+    ]
+
+    if new_lvol.crypto_bdev:
+        new_lvol.bdev_stack.append({
+            "type": "crypto",
+            "name": new_lvol.crypto_bdev,
+            "params": {
+                "name": new_lvol.crypto_bdev,
+                "base_name": new_lvol.top_bdev,
+                "key1": new_lvol.crypto_key1,
+                "key2": new_lvol.crypto_key2,
+            }
+        })
+
+    new_lvol.write_to_db(db_controller.kv_store)
+
+    lvol_bdev, error = add_lvol_on_node(new_lvol, target_node)
+    if error:
+        logger.error(error)
+        new_lvol.remove(db_controller.kv_store)
+        return False, error
+
+    new_lvol.lvol_uuid = lvol_bdev['uuid']
+    new_lvol.blobid = lvol_bdev['driver_specific']['lvol']['blobid']
+
+    secondary_node = db_controller.get_storage_node_by_id(target_node.secondary_node_id)
+    if secondary_node.status == StorageNode.STATUS_ONLINE:
+        lvol_bdev, error = add_lvol_on_node(new_lvol, secondary_node, is_primary=False)
+        if error:
+            logger.error(error)
+            # remove lvol from primary
+            ret = delete_lvol_from_node(new_lvol, target_node)
+            if not ret:
+                logger.error("")
+            new_lvol.remove(db_controller.kv_store)
+            return False, error
+
+    new_lvol.status = LVol.STATUS_ONLINE
+    new_lvol.write_to_db(db_controller.kv_store)
+    lvol = db_controller.get_lvol_by_id(lvol_id)
+    lvol.from_source = False
+    lvol.write_to_db()
+    lvol_events.lvol_replicated(lvol, new_lvol)
+
+    return new_lvol.lvol_uuid
+
+
+def list_replication_tasks(lvol_id):
+    db_controller = DBController()
+    lvol = db_controller.get_lvol_by_id(lvol_id)
+    node = db_controller.get_storage_node_by_id(lvol.node_id)
+    tasks = []
+    for task in db_controller.get_job_tasks(node.cluster_id):
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+            if snap.lvol.get_id() != lvol_id:
+                continue
+            tasks.append(task)
+
+    return tasks
+
+
+def suspend_lvol(lvol_id):
+
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    logger.info(f"suspending LVol subsystem: {lvol.get_id()}")
+    snode = db_controller.get_storage_node_by_id(lvol.node_id)
+    for iface in snode.data_nics:
+        if iface.ip4_address and lvol.fabric == iface.trtype.lower():
+            logger.info("adding listener for %s on IP %s" % (lvol.nqn, iface.ip4_address))
+            ret = snode.rpc_client().nvmf_subsystem_listener_set_ana_state(lvol.nqn, iface.ip4_address, lvol.subsys_port, ana="inaccessible")
+            if not ret:
+                logger.error(f"Failed to set subsystem listener state for {lvol.nqn} on {iface.ip4_address}")
+                return False
+
+    if snode.secondary_node_id:
+        sec_node = db_controller.get_storage_node_by_id(snode.secondary_node_id)
+        if sec_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN, StorageNode.STATUS_SUSPENDED]:
+            for iface in sec_node.data_nics:
+                if iface.ip4_address and lvol.fabric == iface.trtype.lower():
+                    logger.info("adding listener for %s on IP %s" % (lvol.nqn, iface.ip4_address))
+                    ret = sec_node.rpc_client().nvmf_subsystem_listener_set_ana_state(lvol.nqn, iface.ip4_address, lvol.subsys_port, ana="inaccessible")
+                    if not ret:
+                        logger.error(f"Failed to set subsystem listener state for {lvol.nqn} on {iface.ip4_address}")
+                        return False
+
+    return True
+
+
+def resume_lvol(lvol_id):
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    logger.info(f"suspending LVol subsystem: {lvol.get_id()}")
+    snode = db_controller.get_storage_node_by_id(lvol.node_id)
+    for iface in snode.data_nics:
+        if iface.ip4_address and lvol.fabric == iface.trtype.lower():
+            logger.info("adding listener for %s on IP %s" % (lvol.nqn, iface.ip4_address))
+            ret = snode.rpc_client().nvmf_subsystem_listener_set_ana_state(
+                lvol.nqn, iface.ip4_address, lvol.subsys_port, is_optimized=True)
+            if not ret:
+                logger.error(f"Failed to set subsystem listener state for {lvol.nqn} on {iface.ip4_address}")
+                return False
+
+    if snode.secondary_node_id:
+        sec_node = db_controller.get_storage_node_by_id(snode.secondary_node_id)
+        if sec_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN, StorageNode.STATUS_SUSPENDED]:
+            for iface in sec_node.data_nics:
+                if iface.ip4_address and lvol.fabric == iface.trtype.lower():
+                    logger.info("adding listener for %s on IP %s" % (lvol.nqn, iface.ip4_address))
+                    ret = sec_node.rpc_client().nvmf_subsystem_listener_set_ana_state(
+                        lvol.nqn, iface.ip4_address, lvol.subsys_port, is_optimized=False)
+                    if not ret:
+                        logger.error(f"Failed to set subsystem listener state for {lvol.nqn} on {iface.ip4_address}")
+                        return False
+
+    return True
+
+
+def replicate_lvol_on_source_cluster(lvol_id):
+    db_controller = DBController()
+    try:
+        lvol = db_controller.get_lvol_by_id(lvol_id)
+    except KeyError as e:
+        logger.error(e)
+        return False
+
+    source_node = db_controller.get_storage_node_by_id(lvol.node_id)
+    source_cluster = db_controller.get_cluster_by_id(source_node.cluster_id)
+
+    if not source_node:
+        logger.error(f"Node not found: {lvol.node_id}")
+        return False
+
+    if source_node.status != StorageNode.STATUS_ONLINE:
+        logger.error(f"Node is not online!: {source_node.get_id()}, status: {source_node.status}")
+        return False
+
+    # for lv in db_controller.get_lvols(source_cluster.snapshot_replication_target_cluster):
+    #     if lv.nqn == lvol.nqn:
+    #         logger.info(f"LVol with same nqn already exists on target cluster: {lv.get_id()}")
+    #         return lv.get_id()
+
+    snaps = []
+    snapshot = None
+    for task in db_controller.get_job_tasks(source_node.cluster_id):
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            logger.debug(task)
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+
+            if snap.lvol.get_id() != lvol_id:
+                continue
+            snaps.append(snap)
+
+    if snaps:
+        snaps = sorted(snaps, key=lambda x: x.created_at)
+        snapshot = snaps[-1]
+
+    if not snapshot:
+        logger.error(f"Snapshot for replication not found for lvol: {lvol_id}")
+        return False
+
+    # create lvol on target node
+    new_lvol = copy.deepcopy(lvol)
+    new_lvol.cloned_from_snap = snapshot.get_id()
+    new_lvol.snapshot_name = snapshot.snap_bdev
+    new_lvol.from_source = True
+    new_lvol.status = LVol.STATUS_IN_CREATION
+
+    new_lvol.bdev_stack = [
+        {
+            "type": "bdev_lvol_clone",
+            "name": new_lvol.top_bdev,
+            "params": {
+                "snapshot_name": snapshot.snap_bdev,
+                "clone_name": new_lvol.lvol_bdev
+            }
+        }
+    ]
+
+    if new_lvol.crypto_bdev:
+        new_lvol.bdev_stack.append({
+            "type": "crypto",
+            "name": new_lvol.crypto_bdev,
+            "params": {
+                "name": new_lvol.crypto_bdev,
+                "base_name": new_lvol.top_bdev,
+                "key1": new_lvol.crypto_key1,
+                "key2": new_lvol.crypto_key2,
+            }
+        })
+
+    new_lvol.write_to_db(db_controller.kv_store)
+
+    lvol = db_controller.get_lvol_by_id(lvol_id)
+    lvol.uuid = str(uuid.uuid4())
+    lvol.from_source = True
+    lvol.write_to_db()
+    delete_lvol(lvol.uuid)
+
+    time.sleep(3)
+
+    lvol_bdev, error = add_lvol_on_node(new_lvol, source_node)
+    if error:
+        logger.error(error)
+        new_lvol.remove(db_controller.kv_store)
+        return False, error
+
+    new_lvol.lvol_uuid = lvol_bdev['uuid']
+    new_lvol.blobid = lvol_bdev['driver_specific']['lvol']['blobid']
+
+    secondary_node = db_controller.get_storage_node_by_id(source_node.secondary_node_id)
+    if secondary_node.status == StorageNode.STATUS_ONLINE:
+        lvol_bdev, error = add_lvol_on_node(new_lvol, secondary_node, is_primary=False)
+        if error:
+            logger.error(error)
+            # remove lvol from primary
+            ret = delete_lvol_from_node(new_lvol, source_node)
+            if not ret:
+                logger.error("")
+            new_lvol.remove(db_controller.kv_store)
+            return False, error
+
+    new_lvol.status = LVol.STATUS_ONLINE
+    new_lvol.write_to_db(db_controller.kv_store)
+    lvol_events.lvol_replicated(lvol, new_lvol)
+
+    return new_lvol.lvol_uuid
+
diff --git a/simplyblock_core/controllers/lvol_events.py b/simplyblock_core/controllers/lvol_events.py
index 636c444b3..c4f2abde8 100644
--- a/simplyblock_core/controllers/lvol_events.py
+++ b/simplyblock_core/controllers/lvol_events.py
@@ -3,6 +3,7 @@
 
 from simplyblock_core.controllers import events_controller as ec
 from simplyblock_core.db_controller import DBController
+from simplyblock_core import utils, constants
 
 logger = logging.getLogger()
 
@@ -10,6 +11,7 @@
 def _lvol_event(lvol, message, caused_by, event):
     db_controller = DBController()
     snode = db_controller.get_storage_node_by_id(lvol.node_id)
+    cluster = db_controller.get_cluster_by_id(snode.cluster_id)
     ec.log_event_cluster(
         cluster_id=snode.cluster_id,
         domain=ec.DOMAIN_CLUSTER,
@@ -18,7 +20,79 @@ def _lvol_event(lvol, message, caused_by, event):
         caused_by=caused_by,
         message=message,
         node_id=lvol.get_id())
-
+    if cluster.mode == "kubernetes":
+        pool = db_controller.get_pool_by_id(lvol.pool_uuid)
+
+        if event == ec.EVENT_OBJ_CREATED:
+            crypto_key=(
+                (lvol.crypto_key1, lvol.crypto_key2)
+                if lvol.crypto_key1 and lvol.crypto_key2
+                else None
+            )
+
+            node_urls = [
+                f"{constants.WEBAPI_K8S_ENDPOINT}/clusters/{snode.cluster_id}/storage-nodes/{node_id}/"
+                for node_id in lvol.nodes
+            ]
+
+            utils.patch_cr_lvol_status(
+                group=constants.CR_GROUP,
+                version=constants.CR_VERSION,
+                plural=pool.lvols_cr_plural,
+                namespace=pool.lvols_cr_namespace,
+                name=pool.lvols_cr_name,
+                add={
+                    "uuid": lvol.get_id(),
+                    "lvolName": lvol.lvol_name,
+                    "status": lvol.status,
+                    "nodeUUID": node_urls,
+                    "size": utils.humanbytes(lvol.size),
+                    "health": lvol.health_check,
+                    "isCrypto": crypto_key is not None,
+                    "nqn": lvol.nqn,
+                    "subsysPort": lvol.subsys_port,
+                    "hostname": lvol.hostname,
+                    "fabric": lvol.fabric,
+                    "ha": lvol.ha_type == 'ha',
+                    "poolUUID": lvol.pool_uuid,
+                    "poolName": lvol.pool_name,
+                    "PvcName": lvol.pvc_name,
+                    "snapName": lvol.snapshot_name,
+                    "clonedFromSnap": lvol.cloned_from_snap,
+                    "stripeWdata": lvol.ndcs,
+                    "stripeWparity": lvol.npcs,
+                    "blobID": lvol.blobid,
+                    "namespaceID": lvol.ns_id,
+                    "qosClass": lvol.lvol_priority_class,
+                    "maxNamespacesPerSubsystem": lvol.max_namespace_per_subsys,
+                    "qosIOPS": lvol.rw_ios_per_sec,
+                    "qosRWTP": lvol.rw_mbytes_per_sec,
+                    "qosRTP": lvol.r_mbytes_per_sec,
+                    "qosWTP": lvol.w_mbytes_per_sec,
+                },
+            )
+
+        elif event == ec.EVENT_STATUS_CHANGE:
+            utils.patch_cr_lvol_status(
+                group=constants.CR_GROUP,
+                version=constants.CR_VERSION,
+                plural=pool.lvols_cr_plural,
+                namespace=pool.lvols_cr_namespace,
+                name=pool.lvols_cr_name,
+                lvol_uuid=lvol.get_id(),
+                updates={"status": lvol.status, "health": lvol.health_check},
+            )
+        elif event == ec.EVENT_OBJ_DELETED:
+            logger.info("Deleting lvol CR object")
+            utils.patch_cr_lvol_status(
+                group=constants.CR_GROUP,
+                version=constants.CR_VERSION,
+                plural=pool.lvols_cr_plural,
+                namespace=pool.lvols_cr_namespace,
+                name=pool.lvols_cr_name,
+                lvol_uuid=lvol.get_id(),
+                remove=True,
+            )
 
 def lvol_create(lvol, caused_by=ec.CAUSED_BY_CLI):
     _lvol_event(lvol, "LVol created", caused_by, ec.EVENT_OBJ_CREATED)
@@ -43,3 +117,7 @@ def lvol_health_check_change(lvol, new_state, old_status, caused_by=ec.CAUSED_BY
 def lvol_io_error_change(lvol, new_state, old_status, caused_by=ec.CAUSED_BY_CLI):
     _lvol_event(lvol, f"LVol IO Error changed from: {old_status} to: {new_state}", caused_by, ec.EVENT_STATUS_CHANGE)
 
+
+def lvol_replicated(lvol, new_lvol, caused_by=ec.CAUSED_BY_CLI):
+    _lvol_event(lvol, f"LVol Replicated, {lvol.get_id()}, new lvol: {new_lvol.get_id()}", caused_by, ec.EVENT_STATUS_CHANGE)
+
diff --git a/simplyblock_core/controllers/pool_controller.py b/simplyblock_core/controllers/pool_controller.py
index db7016d7d..0d2738e67 100644
--- a/simplyblock_core/controllers/pool_controller.py
+++ b/simplyblock_core/controllers/pool_controller.py
@@ -12,6 +12,7 @@
 from simplyblock_core.controllers import pool_events, lvol_controller
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.models.pool import Pool
+from simplyblock_core.prom_client import PromClient
 from simplyblock_core.rpc_client import RPCClient
 
 logger = lg.getLogger()
@@ -22,7 +23,8 @@ def _generate_string(length):
         string.ascii_letters + string.digits) for _ in range(length))
 
 
-def add_pool(name, pool_max, lvol_max, max_rw_iops, max_rw_mbytes, max_r_mbytes, max_w_mbytes, cluster_id, qos_host=None):
+def add_pool(name, pool_max, lvol_max, max_rw_iops, max_rw_mbytes, max_r_mbytes, max_w_mbytes, cluster_id,
+                 cr_name=None, cr_namespace=None, cr_plural=None, qos_host=None):
     db_controller = DBController()
     if not name:
         logger.error("Pool name is empty!")
@@ -70,6 +72,9 @@ def add_pool(name, pool_max, lvol_max, max_rw_iops, max_rw_mbytes, max_r_mbytes,
     pool.max_rw_mbytes_per_sec = max_rw_mbytes
     pool.max_r_mbytes_per_sec = max_r_mbytes
     pool.max_w_mbytes_per_sec = max_w_mbytes
+    pool.cr_name = cr_name
+    pool.cr_namespace = cr_namespace
+    pool.cr_plural = cr_plural
     if pool.has_qos() and not qos_host:
         next_nodes = lvol_controller._get_next_3_nodes(cluster_id)
         if next_nodes:
@@ -120,7 +125,8 @@ def qos_exists_on_child_lvol(db_controller: DBController, pool_uuid):
     return False
 
 def set_pool(uuid, pool_max=0, lvol_max=0, max_rw_iops=0,
-             max_rw_mbytes=0, max_r_mbytes=0, max_w_mbytes=0, name=""):
+             max_rw_mbytes=0, max_r_mbytes=0, max_w_mbytes=0, name="",
+             lvols_cr_name="", lvols_cr_namespace="", lvols_cr_plural=""):
     db_controller = DBController()
     try:
         pool = db_controller.get_pool_by_id(uuid)
@@ -142,6 +148,17 @@ def set_pool(uuid, pool_max=0, lvol_max=0, max_rw_iops=0,
                 return False, msg
         pool.pool_name = name
 
+    if lvols_cr_name and lvols_cr_name != pool.lvols_cr_name:
+        for p in db_controller.get_pools():
+            if p.lvols_cr_name == lvols_cr_name:
+                msg = f"Pool found with the same lvol cr name: {name}"
+                logger.error(msg)
+                return False, msg
+        pool.lvols_cr_name = lvols_cr_name
+        pool.lvols_cr_namespace = lvols_cr_namespace
+        pool.lvols_cr_plural = lvols_cr_plural
+
+
     # Normalize inputs
     max_rw_iops = max_rw_iops or 0
     max_rw_mbytes = max_rw_mbytes or 0
@@ -264,8 +281,10 @@ def set_status(pool_id, status):
     except KeyError:
         logger.error(f"Pool not found {pool_id}")
         return False
+    old_status = pool.status
     pool.status = status
     pool.write_to_db(db_controller.kv_store)
+    pool_events.pool_status_change(pool, pool.status, old_status)
     logger.info("Done")
 
 
@@ -321,15 +340,18 @@ def get_io_stats(pool_id, history, records_count=20):
         logger.error(f"Pool not found {pool_id}")
         return False
 
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
+    io_stats_keys = [
+        "date",
+        "read_bytes_ps",
+        "read_io_ps",
+        "read_latency_ps",
+        "write_bytes_ps",
+        "write_io_ps",
+        "write_latency_ps",
+    ]
 
-    out = db_controller.get_pool_stats(pool, records_number)
+    prom_client = PromClient(pool.cluster_id)
+    out = prom_client.get_pool_metrics(pool_id, io_stats_keys, history)
     new_records = utils.process_records(out, records_count)
 
     return utils.print_table([
diff --git a/simplyblock_core/controllers/pool_events.py b/simplyblock_core/controllers/pool_events.py
index 2581d59b1..8c4f0ea08 100644
--- a/simplyblock_core/controllers/pool_events.py
+++ b/simplyblock_core/controllers/pool_events.py
@@ -2,7 +2,8 @@
 import logging
 
 from simplyblock_core.controllers import events_controller as ec
-
+from simplyblock_core.db_controller import DBController
+from simplyblock_core import utils, constants
 
 logger = logging.getLogger()
 
@@ -29,3 +30,24 @@ def pool_remove(pool):
 def pool_updated(pool):
     _add(pool, f"Pool updated {pool.pool_name}", event=ec.EVENT_STATUS_CHANGE)
 
+
+def pool_status_change(pool, new_state, old_status):
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(pool.cluster_id)
+    ec.log_event_cluster(
+        cluster_id=pool.cluster_id,
+        domain=ec.DOMAIN_CLUSTER,
+        event=ec.EVENT_STATUS_CHANGE,
+        db_object=pool,
+        caused_by=ec.CAUSED_BY_CLI,
+        message=f"Pool status changed from {old_status} to {new_state}",
+        node_id=pool.cluster_id)
+
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=pool.cr_plural,
+            namespace=pool.cr_namespace,
+            name=pool.cr_name,
+            status_patch={"status": new_state})
diff --git a/simplyblock_core/controllers/snapshot_controller.py b/simplyblock_core/controllers/snapshot_controller.py
index d3eca0e00..99c84d080 100644
--- a/simplyblock_core/controllers/snapshot_controller.py
+++ b/simplyblock_core/controllers/snapshot_controller.py
@@ -1,12 +1,14 @@
 # coding=utf-8
+import json
 import logging as lg
 import time
 import uuid
 
-from simplyblock_core.controllers import lvol_controller, snapshot_events, pool_controller
+from simplyblock_core.controllers import lvol_controller, snapshot_events, pool_controller, tasks_controller
 
 from simplyblock_core import utils, constants
 from simplyblock_core.db_controller import DBController
+from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.pool import Pool
 from simplyblock_core.models.snapshot import SnapShot
 from simplyblock_core.models.lvol_model import LVol
@@ -49,9 +51,14 @@ def add(lvol_id, snapshot_name):
             if sn.snap_name == snapshot_name:
                 return False, f"Snapshot name must be unique: {snapshot_name}"
 
-    logger.info(f"Creating snapshot: {snapshot_name} from LVol: {lvol.get_id()}")
     snode = db_controller.get_storage_node_by_id(lvol.node_id)
 
+    if snode.lvol_sync_del():
+        logger.error(f"LVol sync deletion found on node: {snode.get_id()}")
+        return False, f"LVol sync deletion found on node: {snode.get_id()}"
+
+    logger.info(f"Creating snapshot: {snapshot_name} from LVol: {lvol.get_id()}")
+
     rec = db_controller.get_lvol_stats(lvol, 1)
     if rec:
         size = rec[0].size_used
@@ -217,19 +224,42 @@ def add(lvol_id, snapshot_name):
             snap.snap_ref_id = original_snap.get_id()
             snap.write_to_db(db_controller.kv_store)
 
-    logger.info("Done")
+    for sn in db_controller.get_snapshots(cluster.get_id()):
+        if sn.get_id() == snap.get_id():
+            continue
+        if sn.lvol.get_id() == lvol_id:
+            if not sn.next_snap_uuid:
+                sn.next_snap_uuid = snap.get_id()
+                snap.prev_snap_uuid = sn.get_id()
+                sn.write_to_db()
+                snap.write_to_db()
+                break
+
     snapshot_events.snapshot_create(snap)
+    if lvol.do_replicate:
+        task = tasks_controller.add_snapshot_replication_task(snap.cluster_id, snap.lvol.node_id, snap.get_id())
+        if task:
+            snapshot_events.replication_task_created(snap)
+    if lvol.cloned_from_snap:
+        lvol_snap = db_controller.get_snapshot_by_id(lvol.cloned_from_snap)
+        if lvol_snap.source_replicated_snap_uuid:
+            org_snap = db_controller.get_snapshot_by_id(lvol_snap.source_replicated_snap_uuid)
+            if org_snap and org_snap.status == SnapShot.STATUS_ONLINE:
+                task = tasks_controller.add_snapshot_replication_task(
+                    snap.cluster_id, org_snap.lvol.node_id, snap.get_id(), replicate_to_source=True)
+                if task:
+                    logger.info("Created snapshot replication task on original node")
     return snap.uuid, False
 
 
-def list(all=False):
-    snaps = db_controller.get_snapshots()
+def list(all=False, cluster_id=None, with_details=False):
+    snaps = db_controller.get_snapshots(cluster_id)
     data = []
     for snap in snaps:
         logger.debug(snap)
         if snap.deleted is True and all is False:
             continue
-        data.append({
+        d = {
             "UUID": snap.uuid,
             "Name": snap.snap_name,
             "Size": utils.humanbytes(snap.used_size),
@@ -239,7 +269,13 @@ def list(all=False):
             "Created At": time.strftime("%H:%M:%S, %d/%m/%Y", time.gmtime(snap.created_at)),
             "Health": snap.health_check,
             "Status": snap.status,
-        })
+        }
+        if with_details:
+            d["Replication target snap"] = snap.target_replicated_snap_uuid
+            d["Replication source snap"] = snap.source_replicated_snap_uuid
+            d["Rrev snap"] = snap.prev_snap_uuid
+            d["Next snap"] = snap.next_snap_uuid
+        data.append(d)
     return utils.print_table(data)
 
 
@@ -250,6 +286,10 @@ def delete(snapshot_uuid, force_delete=False):
         logger.error(f"Snapshot not found {snapshot_uuid}")
         return False
 
+    if snap.status == SnapShot.STATUS_IN_REPLICATION:
+        logger.error("Snapshot is in replication")
+        return False
+
     try:
         snode = db_controller.get_storage_node_by_id(snap.lvol.node_id)
     except KeyError:
@@ -351,6 +391,9 @@ def delete(snapshot_uuid, force_delete=False):
     except KeyError:
         pass
 
+    if snap.target_replicated_snap_uuid:
+        delete_replicated(snap.uuid)
+
     logger.info("Done")
     return True
 
@@ -381,6 +424,10 @@ def clone(snapshot_id, clone_name, new_size=0, pvc_name=None, pvc_namespace=None
         logger.exception(msg)
         return False, msg
 
+    if snode.lvol_sync_del():
+        logger.error(f"LVol sync deletion found on node: {snode.get_id()}")
+        return False, f"LVol sync deletion found on node: {snode.get_id()}"
+
     cluster = db_controller.get_cluster_by_id(pool.cluster_id)
     if cluster.status not in [cluster.STATUS_ACTIVE, cluster.STATUS_DEGRADED]:
         return False, f"Cluster is not active, status: {cluster.status}"
@@ -587,3 +634,98 @@ def clone(snapshot_id, clone_name, new_size=0, pvc_name=None, pvc_namespace=None
     if new_size:
         lvol_controller.resize_lvol(lvol.get_id(), new_size)
     return lvol.uuid, False
+
+
+def list_replication_tasks(cluster_id):
+    tasks = db_controller.get_job_tasks(cluster_id)
+
+    data = []
+    for task in tasks:
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+            logger.debug(task)
+            try:
+                snap = db_controller.get_snapshot_by_id(task.function_params["snapshot_id"])
+            except KeyError:
+                continue
+
+            duration = ""
+            try:
+                if task.status == JobSchedule.STATUS_RUNNING:
+                    duration = utils.strfdelta_seconds(int(time.time()) - task.function_params["start_time"])
+                elif "end_time" in task.function_params:
+                    duration = utils.strfdelta_seconds(
+                        task.function_params["end_time"] - task.function_params["start_time"])
+            except Exception as e:
+                logger.error(e)
+            status = task.status
+            if task.canceled:
+                status = "cancelled"
+            replicate_to = "target"
+            if "replicate_to_source" in task.function_params:
+                if task.function_params["replicate_to_source"] is True:
+                    replicate_to = "source"
+            offset = 0
+            if "offset" in task.function_params:
+                offset = task.function_params["offset"]
+            data.append({
+                "Task ID": task.uuid,
+                "Snapshot ID": snap.uuid,
+                "Size": utils.humanbytes(snap.used_size),
+                "Duration": duration,
+                "Offset": offset,
+                "Status": status,
+                "Replicate to": replicate_to,
+                "Result": task.function_result,
+                "Cluster ID": task.cluster_id,
+            })
+    return utils.print_table(data)
+
+
+def delete_replicated(snapshot_id):
+    try:
+        snap = db_controller.get_snapshot_by_id(snapshot_id)
+    except KeyError:
+        logger.error(f"Snapshot not found {snapshot_id}")
+        return False
+
+    try:
+        target_replicated_snap = db_controller.get_snapshot_by_id(snap.target_replicated_snap_uuid)
+        logger.info("Deleting replicated snapshot %s", target_replicated_snap.uuid)
+        ret = delete(target_replicated_snap.uuid)
+        if not ret:
+            logger.error("Failed to delete snapshot %s", target_replicated_snap.uuid)
+            return False
+
+    except KeyError:
+        logger.error(f"Snapshot not found {snap.target_replicated_snap_uuid}")
+        return False
+
+    return True
+
+
+def get(snapshot_uuid):
+    try:
+        snap = db_controller.get_snapshot_by_id(snapshot_uuid)
+    except KeyError:
+        logger.error(f"Snapshot not found {snapshot_uuid}")
+        return False
+
+    return json.dumps(snap.get_clean_dict(), indent=2)
+
+
+def set(snapshot_uuid, attr, value) -> bool:
+    try:
+        snap = db_controller.get_snapshot_by_id(snapshot_uuid)
+    except KeyError:
+        logger.error(f"Snapshot not found {snapshot_uuid}")
+        return False
+
+    if attr not in snap.get_attrs_map():
+        raise KeyError('Attribute not found')
+
+    value = snap.get_attrs_map()[attr]['type'](value)
+    logger.info(f"Setting {attr} to {value}")
+    setattr(snap, attr, value)
+    snap.write_to_db()
+    return True
+
diff --git a/simplyblock_core/controllers/snapshot_events.py b/simplyblock_core/controllers/snapshot_events.py
index 4cb107dcd..9b29f8b6f 100644
--- a/simplyblock_core/controllers/snapshot_events.py
+++ b/simplyblock_core/controllers/snapshot_events.py
@@ -31,3 +31,10 @@ def snapshot_delete(snapshot, caused_by=ec.CAUSED_BY_CLI):
 def snapshot_clone(snapshot, lvol_clone, caused_by=ec.CAUSED_BY_CLI):
     _snapshot_event(snapshot, f"Snapshot cloned: {snapshot.get_id()} clone id: {lvol_clone.get_id()}", caused_by, ec.EVENT_STATUS_CHANGE)
 
+
+def replication_task_created(snapshot, caused_by=ec.CAUSED_BY_CLI):
+    _snapshot_event(snapshot, "Snapshot replication task created", caused_by, ec.EVENT_OBJ_CREATED)
+
+
+def replication_task_finished(snapshot, caused_by=ec.CAUSED_BY_CLI):
+    _snapshot_event(snapshot, "Snapshot replication task finished", caused_by, ec.EVENT_OBJ_CREATED)
diff --git a/simplyblock_core/controllers/storage_events.py b/simplyblock_core/controllers/storage_events.py
index b73890cd8..486daa8ee 100644
--- a/simplyblock_core/controllers/storage_events.py
+++ b/simplyblock_core/controllers/storage_events.py
@@ -3,6 +3,8 @@
 
 from simplyblock_core.controllers import events_controller as ec
 from simplyblock_core.models.events import EventObj
+from simplyblock_core.db_controller import DBController
+from simplyblock_core import utils, constants
 
 logger = logging.getLogger()
 
@@ -19,6 +21,8 @@ def snode_add(node):
 
 
 def snode_delete(node):
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(node.cluster_id)
     ec.log_event_cluster(
         cluster_id=node.cluster_id,
         domain=ec.DOMAIN_CLUSTER,
@@ -27,9 +31,21 @@ def snode_delete(node):
         caused_by=ec.CAUSED_BY_CLI,
         message=f"Storage node deleted {node.get_id()}",
         node_id=node.get_id())
-
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_node_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=node.cr_plural,
+            namespace=node.cr_namespace,
+            name=node.cr_name,
+            node_uuid=node.get_id(),
+            node_mgmt_ip=node.mgmt_ip,
+            remove=True,
+        )
 
 def snode_status_change(node, new_state, old_status, caused_by=ec.CAUSED_BY_CLI):
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(node.cluster_id)
     ec.log_event_cluster(
         cluster_id=node.cluster_id,
         domain=ec.DOMAIN_CLUSTER,
@@ -38,9 +54,22 @@ def snode_status_change(node, new_state, old_status, caused_by=ec.CAUSED_BY_CLI)
         caused_by=caused_by,
         message=f"Storage node status changed from: {old_status} to: {new_state}",
         node_id=node.get_id())
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_node_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=node.cr_plural,
+            namespace=node.cr_namespace,
+            name=node.cr_name,
+            node_uuid=node.get_id(),
+            node_mgmt_ip=node.mgmt_ip,
+            updates={"status": new_state},
+        )
 
 
 def snode_health_check_change(node, new_state, old_status, caused_by=ec.CAUSED_BY_CLI):
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(node.cluster_id)
     ec.log_event_cluster(
         cluster_id=node.cluster_id,
         domain=ec.DOMAIN_CLUSTER,
@@ -49,7 +78,17 @@ def snode_health_check_change(node, new_state, old_status, caused_by=ec.CAUSED_B
         caused_by=caused_by,
         message=f"Storage node health check changed from: {old_status} to: {new_state}",
         node_id=node.get_id())
-
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_node_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=node.cr_plural,
+            namespace=node.cr_namespace,
+            name=node.cr_name,
+            node_uuid=node.get_id(),
+            node_mgmt_ip=node.mgmt_ip,
+            updates={"health": new_state},
+        )
 
 def snode_restart_failed(node):
     ec.log_event_cluster(
@@ -72,3 +111,40 @@ def snode_rpc_timeout(node, timeout_seconds, caused_by=ec.CAUSED_BY_MONITOR):
         event_level=EventObj.LEVEL_WARN,
         message=f"Storage node RPC timeout detected after {timeout_seconds} seconds",
         node_id=node.get_id())
+
+
+def jm_repl_tasks_found(node, jm_vuid, caused_by=ec.CAUSED_BY_MONITOR):
+    ec.log_event_cluster(
+        cluster_id=node.cluster_id,
+        domain=ec.DOMAIN_CLUSTER,
+        event=ec.EVENT_STATUS_CHANGE,
+        db_object=node,
+        caused_by=caused_by,
+        event_level=EventObj.LEVEL_WARN,
+        message=f"JM replication task found for jm {jm_vuid}",
+        node_id=node.get_id())
+
+
+def node_ports_changed(node, caused_by=ec.CAUSED_BY_MONITOR):
+    db_controller = DBController()
+    cluster = db_controller.get_cluster_by_id(node.cluster_id)
+    ec.log_event_cluster(
+        cluster_id=node.cluster_id,
+        domain=ec.DOMAIN_CLUSTER,
+        event=ec.EVENT_STATUS_CHANGE,
+        db_object=node,
+        caused_by=caused_by,
+        event_level=EventObj.LEVEL_WARN,
+        message=f"Storage node ports set, LVol:{node.lvol_subsys_port} RPC:{node.rpc_port} Internal:{node.nvmf_port}",
+        node_id=node.get_id())
+    if cluster.mode == "kubernetes":
+        utils.patch_cr_node_status(
+            group=constants.CR_GROUP,
+            version=constants.CR_VERSION,
+            plural=node.cr_plural,
+            namespace=node.cr_namespace,
+            name=node.cr_name,
+            node_uuid=node.get_id(),
+            node_mgmt_ip=node.mgmt_ip,
+            updates={"nvmf_port": node.nvmf_port, "rpc_port": node.rpc_port, "lvol_port": node.lvol_subsys_port},
+        )
diff --git a/simplyblock_core/controllers/tasks_controller.py b/simplyblock_core/controllers/tasks_controller.py
index 689027d08..eb3184068 100644
--- a/simplyblock_core/controllers/tasks_controller.py
+++ b/simplyblock_core/controllers/tasks_controller.py
@@ -70,6 +70,18 @@ def _add_task(function_name, cluster_id, node_id, device_id,
         if task_id:
             logger.info(f"Task found, skip adding new task: {task_id}")
             return False
+    elif function_name == JobSchedule.FN_LVOL_SYNC_DEL:
+        task_id = get_lvol_sync_del_task(cluster_id, node_id, function_params['lvol_bdev_name'])
+        if task_id:
+            logger.info(f"Task found, skip adding new task: {task_id}")
+            return False
+
+    elif function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+        task_id = get_snapshot_replication_task(
+            cluster_id, function_params['snapshot_id'], function_params['replicate_to_source'])
+        if task_id:
+            logger.info(f"Task found, skip adding new task: {task_id}")
+            return False
 
     task_obj = JobSchedule()
     task_obj.uuid = str(uuid.uuid4())
@@ -95,11 +107,13 @@ def add_device_mig_task(device_id_list, cluster_id):
 
     device = db.get_storage_device_by_id(device_id_list[0])
     tasks = db.get_job_tasks(cluster_id)
+    master_task = None
     for task in tasks:
         if task.function_name == JobSchedule.FN_BALANCING_AFTER_NODE_RESTART :
             if task.status != JobSchedule.STATUS_DONE and task.canceled is False:
-                logger.info(f"Task found, skip adding new task: {task.get_id()}")
-                return False
+                logger.info("Master task found, skip adding new master task")
+                master_task = task
+                break
 
     for node in db.get_storage_nodes_by_cluster_id(cluster_id):
         if node.status == StorageNode.STATUS_REMOVED:
@@ -112,16 +126,19 @@ def add_device_mig_task(device_id_list, cluster_id):
                 if task_id:
                     sub_tasks.append(task_id)
     if sub_tasks:
-        task_obj = JobSchedule()
-        task_obj.uuid = str(uuid.uuid4())
-        task_obj.cluster_id = cluster_id
-        task_obj.date = int(time.time())
-        task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART
-        task_obj.sub_tasks = sub_tasks
-        task_obj.status = JobSchedule.STATUS_NEW
-        task_obj.write_to_db(db.kv_store)
-        tasks_events.task_create(task_obj)
-
+        if master_task:
+            master_task.sub_tasks.extend(sub_tasks)
+            master_task.write_to_db()
+        else:
+            task_obj = JobSchedule()
+            task_obj.uuid = str(uuid.uuid4())
+            task_obj.cluster_id = cluster_id
+            task_obj.date = int(time.time())
+            task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART
+            task_obj.sub_tasks = sub_tasks
+            task_obj.status = JobSchedule.STATUS_NEW
+            task_obj.write_to_db(db.kv_store)
+            tasks_events.task_create(task_obj)
         return True
 
 
@@ -135,10 +152,13 @@ def add_node_to_auto_restart(node):
                               Cluster.STATUS_READONLY, Cluster.STATUS_UNREADY]:
         logger.warning(f"Cluster is not active, skip node auto restart, status: {cluster.status}")
         return False
+    offline_nodes = 0
     for sn in db.get_storage_nodes_by_cluster_id(node.cluster_id):
         if node.get_id() != sn.get_id() and sn.status != StorageNode.STATUS_ONLINE and node.mgmt_ip != sn.mgmt_ip:
-            logger.info("Node found that is not online, skip node auto restart")
-            return False
+            offline_nodes += 1
+    if offline_nodes > cluster.distr_npcs :
+        logger.info("Node found that is not online, skip node auto restart")
+        return False
     return _add_task(JobSchedule.FN_NODE_RESTART, node.cluster_id, node.get_id(), "", max_retry=11)
 
 
@@ -150,23 +170,26 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs):
         return False
 
     data = []
-    tasks = db.get_job_tasks(cluster_id, reverse=True, limit=limit)
+    tasks = db.get_job_tasks(cluster_id, reverse=True)
     tasks.reverse()
     if is_json is True:
         for t in tasks:
             if t.function_name == JobSchedule.FN_DEV_MIG:
                 continue
             data.append(t.get_clean_dict())
+            if len(data)+1 > limit > 0:
+                return json.dumps(data, indent=2)
         return json.dumps(data, indent=2)
 
     for task in tasks:
         if task.function_name == JobSchedule.FN_DEV_MIG:
             continue
+        logger.debug(task)
         if task.max_retry > 0:
             retry = f"{task.retry}/{task.max_retry}"
         else:
             retry = f"{task.retry}"
-
+        logger.debug(task)
         upd = task.updated_at
         if upd:
             try:
@@ -192,6 +215,8 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs):
             "Result": task.function_result,
             "Updated At": upd or "",
         })
+        if len(data)+1 > limit > 0:
+            return utils.print_table(data)
     return utils.print_table(data)
 
 
@@ -234,6 +259,7 @@ def get_subtasks(master_task_id):
             except Exception as e:
                 logger.error(e)
 
+        logger.debug(sub_task)
         data.append({
             "Task ID": sub_task.uuid,
             "Node ID / Device ID": f"{sub_task.node_id}\n{sub_task.device_id}",
@@ -303,7 +329,8 @@ def add_new_device_mig_task(device_id):
 
 
 def add_node_add_task(cluster_id, function_params):
-    return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "", function_params=function_params)
+    return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "",
+                     function_params=function_params, max_retry=16)
 
 
 def get_active_node_tasks(cluster_id, node_id):
@@ -334,7 +361,7 @@ def get_new_device_mig_task(cluster_id, node_id, distr_name, dev_id=None):
 def get_device_mig_task(cluster_id, node_id, device_id, distr_name):
     tasks = db.get_job_tasks(cluster_id)
     for task in tasks:
-        if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id and task.device_id == device_id:
+        if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id:
             if task.status != JobSchedule.STATUS_DONE and task.canceled is False \
                     and "distr_name" in task.function_params and task.function_params["distr_name"] == distr_name:
                 return task.uuid
@@ -386,3 +413,58 @@ def get_jc_comp_task(cluster_id, node_id, jm_vuid=0):
                 if jm_vuid and "jm_vuid" in task.function_params and task.function_params["jm_vuid"] == jm_vuid:
                     return task.uuid
     return False
+
+
+def add_lvol_sync_del_task(cluster_id, node_id, lvol_bdev_name, primary_node):
+    return _add_task(JobSchedule.FN_LVOL_SYNC_DEL, cluster_id, node_id, "",
+                     function_params={"lvol_bdev_name": lvol_bdev_name, "primary_node": primary_node}, max_retry=10)
+
+def get_lvol_sync_del_task(cluster_id, node_id, lvol_bdev_name=None):
+    tasks = db.get_job_tasks(cluster_id)
+    for task in tasks:
+        if task.function_name == JobSchedule.FN_LVOL_SYNC_DEL and task.node_id == node_id :
+            if task.status != JobSchedule.STATUS_DONE and task.canceled is False:
+                if lvol_bdev_name:
+                    if "lvol_bdev_name" in task.function_params and task.function_params["lvol_bdev_name"] == lvol_bdev_name:
+                        return task.uuid
+                else:
+                    return task.uuid
+    return False
+
+def get_snapshot_replication_task(cluster_id, snapshot_id, replicate_to_source):
+    tasks = db.get_job_tasks(cluster_id)
+    for task in tasks:
+        if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION and task.function_params["snapshot_id"] == snapshot_id:
+            if task.status != JobSchedule.STATUS_DONE and task.canceled is False:
+                if task.function_params["replicate_to_source"] == replicate_to_source:
+                    return task.uuid
+    return False
+
+
+def _check_snap_instance_on_node(snapshot_id: str , node_id: str):
+    snapshot = db.get_snapshot_by_id(snapshot_id)
+    for sn_inst in snapshot.instances:
+        if sn_inst.lvol.node_id == node_id:
+            logger.info("Snapshot instance found on node, skip adding replication task")
+            return
+
+    if snapshot.snap_ref_id:
+        prev_snap = db.get_snapshot_by_id(snapshot.snap_ref_id)
+        _check_snap_instance_on_node(prev_snap.get_id(), node_id)
+
+    _add_task(JobSchedule.FN_SNAPSHOT_REPLICATION, snapshot.cluster_id, node_id, "",
+              function_params={"snapshot_id": snapshot.get_id(), "replicate_to_source": False,
+                               "replicate_as_snap_instance": True},
+              send_to_cluster_log=False)
+
+
+def add_snapshot_replication_task(cluster_id, node_id, snapshot_id, replicate_to_source=False):
+    if not replicate_to_source:
+        snapshot = db.get_snapshot_by_id(snapshot_id)
+        if snapshot.snap_ref_id:
+            prev_snap = db.get_snapshot_by_id(snapshot.snap_ref_id)
+            _check_snap_instance_on_node(prev_snap.get_id(), node_id)
+
+    return _add_task(JobSchedule.FN_SNAPSHOT_REPLICATION, cluster_id, node_id, "",
+                     function_params={"snapshot_id": snapshot_id, "replicate_to_source": replicate_to_source},
+                     send_to_cluster_log=False)
diff --git a/simplyblock_core/db_controller.py b/simplyblock_core/db_controller.py
index 277d1b68a..ddcd4272b 100644
--- a/simplyblock_core/db_controller.py
+++ b/simplyblock_core/db_controller.py
@@ -2,7 +2,7 @@
 import os.path
 
 import fdb
-from typing import List
+from typing import List, Optional
 
 from simplyblock_core import constants
 from simplyblock_core.models.cluster import Cluster
@@ -17,8 +17,7 @@
 from simplyblock_core.models.snapshot import SnapShot
 from simplyblock_core.models.stats import DeviceStatObject, NodeStatObject, ClusterStatObject, LVolStatObject, \
     PoolStatObject, CachedLVolStatObject
-from simplyblock_core.models.storage_node import StorageNode
-
+from simplyblock_core.models.storage_node import StorageNode, NodeLVolDelLock
 
 
 class Singleton(type):
@@ -159,9 +158,11 @@ def get_hostnames_by_pool_id(self, pool_id) -> List[str]:
                 hostnames.append(lv.hostname)
         return hostnames
 
-    def get_snapshots(self) -> List[SnapShot]:
-        ret = SnapShot().read_from_db(self.kv_store)
-        return ret
+    def get_snapshots(self, cluster_id=None) -> List[SnapShot]:
+        snaps = SnapShot().read_from_db(self.kv_store)
+        if cluster_id:
+            snaps = [n for n in snaps if n.cluster_id == cluster_id]
+        return sorted(snaps, key=lambda x: x.created_at)
 
     def get_snapshot_by_id(self, id) -> SnapShot:
         ret = SnapShot().read_from_db(self.kv_store, id)
@@ -258,7 +259,9 @@ def get_events(self, event_id=" ", limit=0, reverse=False) -> List[EventObj]:
         return EventObj().read_from_db(self.kv_store, id=event_id, limit=limit, reverse=reverse)
 
     def get_job_tasks(self, cluster_id, reverse=True, limit=0) -> List[JobSchedule]:
-        return JobSchedule().read_from_db(self.kv_store, id=cluster_id, reverse=reverse, limit=limit)
+        ret = JobSchedule().read_from_db(self.kv_store, id=cluster_id, reverse=reverse, limit=limit)
+        return sorted(ret, key=lambda x: x.date)
+
 
     def get_task_by_id(self, task_id) -> JobSchedule:
         for task in self.get_job_tasks(" "):
@@ -272,7 +275,7 @@ def get_snapshots_by_node_id(self, node_id) -> List[SnapShot]:
         for snap in snaps:
             if snap.lvol.node_id == node_id:
                 ret.append(snap)
-        return ret
+        return sorted(ret, key=lambda x: x.create_dt)
 
     def get_snode_size(self, node_id) -> int:
         snode = self.get_storage_node_by_id(node_id)
@@ -309,3 +312,10 @@ def get_qos(self, cluster_id=None) -> List[QOSClass]:
         else:
             classes = QOSClass().read_from_db(self.kv_store)
         return sorted(classes, key=lambda x: x.class_id)
+
+    def get_lvol_del_lock(self, node_id) -> Optional[NodeLVolDelLock]:
+        ret = NodeLVolDelLock().read_from_db(self.kv_store, id=node_id)
+        if ret:
+            return ret[0]
+        else:
+            return None
diff --git a/simplyblock_core/distr_controller.py b/simplyblock_core/distr_controller.py
index e50115f62..420b9e3fe 100644
--- a/simplyblock_core/distr_controller.py
+++ b/simplyblock_core/distr_controller.py
@@ -2,6 +2,7 @@
 import datetime
 import logging
 import re
+import threading
 
 from simplyblock_core import utils
 from simplyblock_core.models.nvme_device import NVMeDevice
@@ -26,6 +27,7 @@ def send_node_status_event(node, node_status, target_node=None):
     events = {"events": [node_status_event]}
     logger.debug(node_status_event)
     skipped_nodes = []
+    connect_threads = []
     if target_node:
         snodes = [target_node]
     else:
@@ -45,10 +47,14 @@ def send_node_status_event(node, node_status, target_node=None):
         if node_found_same_host:
             continue
         logger.info(f"Sending to: {node.get_id()}")
-        rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1)
-        ret = rpc_client.distr_status_events_update(events)
-        if not ret:
-            logger.warning("Failed to send event update")
+        t = threading.Thread(
+            target=_send_event_to_node,
+            args=(node, events,))
+        connect_threads.append(t)
+        t.start()
+
+    for t in connect_threads:
+        t.join()
 
 
 def send_dev_status_event(device, status, target_node=None):
@@ -57,7 +63,7 @@ def send_dev_status_event(device, status, target_node=None):
     db_controller = DBController()
     storage_ID = device.cluster_device_order
     skipped_nodes = []
-
+    connect_threads = []
     if target_node:
         snodes = [db_controller.get_storage_node_by_id(target_node.get_id())]
     else:
@@ -67,7 +73,8 @@ def send_dev_status_event(device, status, target_node=None):
                 skipped_nodes.append(node)
 
     for node in snodes:
-        if node.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+        if node.status in [StorageNode.STATUS_OFFLINE, StorageNode.STATUS_REMOVED]:
+            logger.info(f"skipping node: {node.get_id()} with status: {node.status}")
             continue
         node_found_same_host = False
         for n in skipped_nodes:
@@ -95,10 +102,14 @@ def send_dev_status_event(device, status, target_node=None):
             "storage_ID": storage_ID,
             "status": dev_status}]}
         logger.debug(f"Sending event updates, device: {storage_ID}, status: {dev_status}, node: {node.get_id()}")
-        rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1)
-        ret = rpc_client.distr_status_events_update(events)
-        if not ret:
-            logger.warning("Failed to send event update")
+        t = threading.Thread(
+            target=_send_event_to_node,
+            args=(node,events,))
+        connect_threads.append(t)
+        t.start()
+
+    for t in connect_threads:
+        t.join()
 
 
 def disconnect_device(device):
@@ -192,12 +203,20 @@ def get_distr_cluster_map(snodes, target_node, distr_name=""):
     return cl_map
 
 
-def parse_distr_cluster_map(map_string):
+def parse_distr_cluster_map(map_string, nodes=None, devices=None):
     db_controller = DBController()
     node_pattern = re.compile(r".*uuid_node=(.*)  status=(.*)$", re.IGNORECASE)
     device_pattern = re.compile(
         r".*storage_ID=(.*)  status=(.*)  uuid_device=(.*)  storage_bdev_name=(.*)$", re.IGNORECASE)
 
+    if not nodes or not devices:
+        nodes = {}
+        devices = {}
+        for n in db_controller.get_storage_nodes():
+            nodes[n.get_id()] = n
+            for dev in n.nvme_devices:
+                devices[dev.get_id()] = dev
+
     results = []
     passed = True
     for line in map_string.split('\n'):
@@ -213,8 +232,7 @@ def parse_distr_cluster_map(map_string):
                 "Results": "",
             }
             try:
-                nd = db_controller.get_storage_node_by_id(node_id)
-                node_status = nd.status
+                node_status = nodes[node_id].status
                 if node_status == StorageNode.STATUS_SCHEDULABLE:
                     node_status = StorageNode.STATUS_UNREACHABLE
                 data["Desired Status"] = node_status
@@ -238,7 +256,7 @@ def parse_distr_cluster_map(map_string):
                 "Results": "",
             }
             try:
-                sd = db_controller.get_storage_device_by_id(device_id)
+                sd =  devices[device_id]
                 data["Desired Status"] = sd.status
                 if sd.status == status:
                     data["Results"] = "ok"
@@ -252,38 +270,26 @@ def parse_distr_cluster_map(map_string):
     return results, passed
 
 
-def send_cluster_map_to_node(node):
+def send_cluster_map_to_node(node: StorageNode):
     db_controller = DBController()
     snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id)
-    rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10)
-
-    # if node.lvstore_stack_secondary_1:
-    #     for snode in db_controller.get_primary_storage_nodes_by_secondary_node_id(node.get_id()):
-    #         for bdev in snode.lvstore_stack:
-    #             if bdev['type'] == "bdev_distr":
-    #                 cluster_map_data = get_distr_cluster_map(snodes, node, bdev["name"])
-    #                 ret = rpc_client.distr_send_cluster_map(cluster_map_data)
-    #                 if not ret:
-    #                     logger.error("Failed to send cluster map")
-    #                     return False
-    #     return True
-    # else:
     cluster_map_data = get_distr_cluster_map(snodes, node)
-    ret = rpc_client.distr_send_cluster_map(cluster_map_data)
-    if not ret:
+    try:
+        node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data)
+    except Exception:
         logger.error("Failed to send cluster map")
         logger.info(cluster_map_data)
         return False
     return True
 
 
-def send_cluster_map_to_distr(node, distr_name):
+def send_cluster_map_to_distr(node: StorageNode, distr_name: str):
     db_controller = DBController()
     snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id)
-    rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10)
     cluster_map_data = get_distr_cluster_map(snodes, node, distr_name)
-    ret = rpc_client.distr_send_cluster_map(cluster_map_data)
-    if not ret:
+    try:
+        node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data)
+    except Exception:
         logger.error("Failed to send cluster map")
         logger.info(cluster_map_data)
         return False
@@ -294,14 +300,13 @@ def send_cluster_map_add_node(snode, target_node):
     if target_node.status != StorageNode.STATUS_ONLINE:
         return False
     logger.info(f"Sending to: {target_node.get_id()}")
-    rpc_client = RPCClient(target_node.mgmt_ip, target_node.rpc_port, target_node.rpc_username, target_node.rpc_password, timeout=5)
-
     cluster_map_data = get_distr_cluster_map([snode], target_node)
     cl_map = {
         "map_cluster": cluster_map_data['map_cluster'],
         "map_prob": cluster_map_data['map_prob']}
-    ret = rpc_client.distr_add_nodes(cl_map)
-    if not ret:
+    try:
+        target_node.rpc_client(timeout=10).distr_add_nodes(cl_map)
+    except Exception:
         logger.error("Failed to send cluster map")
         return False
     return True
@@ -353,10 +358,20 @@ def send_cluster_map_add_device(device: NVMeDevice, target_node: StorageNode):
                 "bdev_name": name,
                 "status": device.status,
                 "weight": dev_w_gib,
+                "physical_label":  device.physical_label if device.physical_label > 0 else -1,
             }}
         }
-        ret = rpc_client.distr_add_devices(cl_map)
-        if not ret:
+        try:
+            rpc_client.distr_add_devices(cl_map)
+        except Exception:
             logger.error("Failed to send cluster map")
             return False
     return True
+
+
+def _send_event_to_node(node, events):
+    try:
+        node.rpc_client(timeout=1, retry=0).distr_status_events_update(events)
+    except Exception as e:
+        logger.warning("Failed to send event update")
+        logger.error(e)
diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var
index f3e377ee4..4c8c24d06 100644
--- a/simplyblock_core/env_var
+++ b/simplyblock_core/env_var
@@ -1,6 +1,6 @@
 SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev
-SIMPLY_BLOCK_VERSION=19.2.23
+SIMPLY_BLOCK_VERSION=19.2.24
 
-SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:main
-SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:main-latest
+SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:main-sfam-2359
+SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:transfer-feature-latest
 
diff --git a/simplyblock_core/fw_api_client.py b/simplyblock_core/fw_api_client.py
index d17255c80..8f089ce5c 100644
--- a/simplyblock_core/fw_api_client.py
+++ b/simplyblock_core/fw_api_client.py
@@ -6,6 +6,7 @@
 from requests.adapters import HTTPAdapter
 from urllib3 import Retry
 
+
 logger = logging.getLogger()
 
 
@@ -18,7 +19,7 @@ class FirewallClient:
 
     def __init__(self, node, timeout=300, retry=5):
         self.node = node
-        self.ip_address = f"{node.mgmt_ip}:5001"
+        self.ip_address = f"{node.mgmt_ip}:{node.firewall_port}"
         self.url = 'http://%s/' % self.ip_address
         self.timeout = timeout
         self.session = requests.session()
@@ -41,7 +42,7 @@ def _request(self, method, path, payload=None):
             response = self.session.request(method, self.url+path, data=data,
                                             timeout=self.timeout, params=params)
         except Exception as e:
-            raise e
+            raise FirewallClientException(str(e))
 
         logger.debug("Response: status_code: %s, content: %s",
                      response.status_code, response.content)
diff --git a/simplyblock_core/mgmt_node_ops.py b/simplyblock_core/mgmt_node_ops.py
index 84375d819..6d752a86c 100644
--- a/simplyblock_core/mgmt_node_ops.py
+++ b/simplyblock_core/mgmt_node_ops.py
@@ -106,18 +106,13 @@ def deploy_mgmt_node(cluster_ip, cluster_id, ifname, mgmt_ip, cluster_secret, mo
 
         logger.info(f"Node IP: {dev_ip}")
 
-        hostname = utils.get_node_name_by_ip(dev_ip)
-        utils.label_node_as_mgmt_plane(hostname)
         db_connection = cluster_data['db_connection']
         db_controller = DBController()
         nodes = db_controller.get_mgmt_nodes()
         if not nodes:
             logger.error("No mgmt nodes was found in the cluster!")
             return False
-        for node in nodes:
-            if node.hostname == hostname:
-                logger.error("Node already exists in the cluster")
-                return False
+
 
     logger.info("Adding management node object")
     node_id = add_mgmt_node(dev_ip, mode, cluster_id)
@@ -225,10 +220,9 @@ def deploy_mgmt_node(cluster_ip, cluster_id, ifname, mgmt_ip, cluster_secret, mo
 
 def add_mgmt_node(mgmt_ip, mode, cluster_id=None):
     db_controller = DBController()
+    hostname = ""
     if mode == "docker":
         hostname = utils.get_hostname()
-    elif mode == "kubernetes":
-        hostname = utils.get_node_name_by_ip(mgmt_ip)
     try:
         node = db_controller.get_mgmt_node_by_hostname(hostname)
         if node:
diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py
index fd4802771..bcf111f1b 100644
--- a/simplyblock_core/models/cluster.py
+++ b/simplyblock_core/models/cluster.py
@@ -45,7 +45,7 @@ class Cluster(BaseModel):
     distr_npcs: int = 0
     enable_node_affinity: bool = False
     grafana_endpoint: str = ""
-    mode: str = ""
+    mode: str = "docker"
     grafana_secret: str = ""
     contact_point: str = ""
     ha_type: str = "single"
@@ -63,12 +63,18 @@ class Cluster(BaseModel):
     fabric_rdma: bool = False
     client_qpair_count: int = 3
     secret: str = ""
+    cr_name: str = ""
+    cr_namespace: str = ""
+    cr_plural: str = ""
     disable_monitoring: bool = False
     strict_node_anti_affinity: bool = False
     tls: bool = False
     is_re_balancing: bool = False
     full_page_unmap: bool = True
     is_single_node: bool = False
+    snapshot_replication_target_cluster: str = ""
+    snapshot_replication_target_pool: str = ""
+    snapshot_replication_timeout: int = 60*10
 
     def get_status_code(self):
         if self.status in self.STATUS_CODE_MAP:
diff --git a/simplyblock_core/models/job_schedule.py b/simplyblock_core/models/job_schedule.py
index 3d87a9aca..3a20b3499 100644
--- a/simplyblock_core/models/job_schedule.py
+++ b/simplyblock_core/models/job_schedule.py
@@ -22,6 +22,8 @@ class JobSchedule(BaseModel):
     FN_BALANCING_AFTER_DEV_REMOVE = "balancing_on_dev_rem"
     FN_BALANCING_AFTER_DEV_EXPANSION = "balancing_on_dev_add"
     FN_JC_COMP_RESUME = "jc_comp_resume"
+    FN_SNAPSHOT_REPLICATION = "snapshot_replication"
+    FN_LVOL_SYNC_DEL = "lvol_sync_del"
 
     canceled: bool = False
     cluster_id: str = ""
diff --git a/simplyblock_core/models/lvol_model.py b/simplyblock_core/models/lvol_model.py
index f84091473..a67032c53 100644
--- a/simplyblock_core/models/lvol_model.py
+++ b/simplyblock_core/models/lvol_model.py
@@ -66,6 +66,9 @@ class LVol(BaseModel):
     fabric: str = "tcp"
     ndcs: int = 0
     npcs: int = 0
+    do_replicate: bool = False
+    replication_node_id: str = ""
+    from_source: bool = True
 
     def has_qos(self):
         return (self.rw_ios_per_sec > 0 or self.rw_mbytes_per_sec > 0 or self.r_mbytes_per_sec > 0 or self.w_mbytes_per_sec > 0)
diff --git a/simplyblock_core/models/nvme_device.py b/simplyblock_core/models/nvme_device.py
index b86e25c44..82749e30a 100644
--- a/simplyblock_core/models/nvme_device.py
+++ b/simplyblock_core/models/nvme_device.py
@@ -47,25 +47,39 @@ class NVMeDevice(BaseModel):
     nvmf_nqn: str = ""
     nvmf_port: int = 0
     nvmf_multipath: bool = False
-    overload_percentage: int = 0  # Unused
-    partition_jm_bdev: str = ""  # Unused
-    partition_jm_size: int = 0  # Unused
-    partition_main_bdev: str = ""  # Unused
-    partition_main_size: int = 0  # Unused
-    partitions_count: int = 0  # Unused
     pcie_address: str = ""
     physical_label: int = 0
     pt_bdev: str = ""
     qos_bdev: str = ""
     remote_bdev: str = ""
     retries_exhausted: bool = False
-    sequential_number: int = 0  # Unused
     serial_number: str = ""
     size: int = -1
     testing_bdev: str = ""
     connecting_from_node: str = ""
     previous_status: str = ""
 
+    def __change_dev_connection_to(self, connecting_from_node):
+        from simplyblock_core.db_controller import DBController
+        db = DBController()
+        for n in db.get_storage_nodes():
+            if n.nvme_devices:
+                for d in n.nvme_devices:
+                    if d.get_id() == self.get_id():
+                        d.connecting_from_node = connecting_from_node
+                        n.write_to_db()
+                        break
+
+    def lock_device_connection(self, node_id):
+        self.__change_dev_connection_to(node_id)
+
+    def release_device_connection(self):
+        self.__change_dev_connection_to("")
+
+    def is_connection_in_progress_to_node(self, node_id):
+        if self.connecting_from_node and self.connecting_from_node == node_id:
+            return True
+
 
 class JMDevice(NVMeDevice):
 
@@ -73,3 +87,18 @@ class JMDevice(NVMeDevice):
     jm_bdev: str = ""
     jm_nvme_bdev_list: List[str] = []
     raid_bdev: str = ""
+
+
+class RemoteDevice(BaseModel):
+
+    remote_bdev: str = ""
+    alceml_name: str = ""
+    node_id: str = ""
+    size: int = -1
+    nvmf_multipath: bool = False
+
+
+class RemoteJMDevice(RemoteDevice):
+
+    jm_bdev: str = ""
+
diff --git a/simplyblock_core/models/pool.py b/simplyblock_core/models/pool.py
index 27b2a23e5..683eafe1e 100644
--- a/simplyblock_core/models/pool.py
+++ b/simplyblock_core/models/pool.py
@@ -29,6 +29,12 @@ class Pool(BaseModel):
     secret: str = ""  # unused
     users: List[str] = []
     qos_host: str = ""
+    cr_name: str = ""
+    cr_namespace: str = ""
+    cr_plural: str = ""
+    lvols_cr_name: str = ""
+    lvols_cr_namespace: str = ""
+    lvols_cr_plural: str = ""
 
 
     def has_qos(self):
diff --git a/simplyblock_core/models/snapshot.py b/simplyblock_core/models/snapshot.py
index 1da571ec8..ab91a0087 100644
--- a/simplyblock_core/models/snapshot.py
+++ b/simplyblock_core/models/snapshot.py
@@ -9,6 +9,7 @@ class SnapShot(BaseModel):
     STATUS_ONLINE = 'online'
     STATUS_OFFLINE = 'offline'
     STATUS_IN_DELETION = 'in_deletion'
+    STATUS_IN_REPLICATION = 'in_replication'
 
     base_bdev: str = ""
     blobid: int = 0
@@ -29,3 +30,8 @@ class SnapShot(BaseModel):
     deletion_status: str = ""
     status: str = ""
     fabric: str = "tcp"
+    target_replicated_snap_uuid: str = ""
+    source_replicated_snap_uuid: str = ""
+    next_snap_uuid: str = ""
+    prev_snap_uuid: str = ""
+    instances: list = []
\ No newline at end of file
diff --git a/simplyblock_core/models/storage_node.py b/simplyblock_core/models/storage_node.py
index 8c76d3649..4dd24b9e6 100644
--- a/simplyblock_core/models/storage_node.py
+++ b/simplyblock_core/models/storage_node.py
@@ -1,13 +1,14 @@
 # coding=utf-8
-
+import time
 from typing import List
 from uuid import uuid4
 
 from simplyblock_core import utils
-from simplyblock_core.models.base_model import BaseNodeObject
+from simplyblock_core.models.base_model import BaseNodeObject, BaseModel
 from simplyblock_core.models.hublvol import HubLVol
 from simplyblock_core.models.iface import IFace
-from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice
+from simplyblock_core.models.job_schedule import JobSchedule
+from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice
 from simplyblock_core.rpc_client import RPCClient, RPCException
 
 logger = utils.get_logger(__name__)
@@ -79,8 +80,8 @@ class StorageNode(BaseNodeObject):
     pollers_mask: str = ""
     primary_ip: str = ""
     raid: str = ""
-    remote_devices: List[NVMeDevice] = []
-    remote_jm_devices: List[JMDevice] = []
+    remote_devices: List[RemoteDevice] = []
+    remote_jm_devices: List[RemoteJMDevice] = []
     rpc_password: str = ""
     rpc_port: int = -1
     rpc_username: str = ""
@@ -97,12 +98,17 @@ class StorageNode(BaseNodeObject):
     subsystem: str = ""
     system_uuid: str = ""
     lvstore_status: str = ""
+    cr_name: str = ""
+    cr_namespace: str = ""
+    cr_plural: str = ""
     nvmf_port: int = 4420
     physical_label: int = 0
     hublvol: HubLVol = None  # type: ignore[assignment]
     active_tcp: bool = True
     active_rdma: bool = False
-    lvol_sync_del_queue: List[str] = []
+    socket: int = 0
+    firewall_port: int = 5001
+    lvol_poller_mask: str = ""
 
     def rpc_client(self, **kwargs):
         """Return rpc client to this node
@@ -303,3 +309,70 @@ def create_alceml(self, name, nvme_bdev, uuid, **kwargs):
             alceml_worker_cpu_mask=alceml_worker_cpu_mask,
             **kwargs,
         )
+
+    def wait_for_jm_rep_tasks_to_finish(self, jm_vuid):
+        if not self.rpc_client().bdev_lvol_get_lvstores(self.lvstore):
+            return True # no lvstore means no need to wait
+        retry = 10
+        while retry > 0:
+            try:
+                jm_replication_tasks = False
+                ret = self.rpc_client().jc_get_jm_status(jm_vuid)
+                for jm in ret:
+                    if ret[jm] is False:  # jm is not ready (has active replication task)
+                        jm_replication_tasks = True
+                        break
+                if jm_replication_tasks:
+                    logger.warning(f"Replication task found on node: {self.get_id()}, jm_vuid: {jm_vuid}, retry...")
+                    retry -= 1
+                    time.sleep(20)
+                else:
+                    return True
+            except Exception:
+                logger.warning("Failed to get replication task!")
+        return False
+
+    def lvol_sync_del(self) -> bool:
+        from simplyblock_core.db_controller import DBController
+        db_controller = DBController()
+        lock = db_controller.get_lvol_del_lock(self.get_id())
+        if lock:
+            return True
+        return False
+
+    def lvol_del_sync_lock(self) -> bool:
+        from simplyblock_core.db_controller import DBController
+        db_controller = DBController()
+        lock = db_controller.get_lvol_del_lock(self.get_id())
+        if not lock:
+            lock = NodeLVolDelLock({"uuid": self.uuid})
+            lock.write_to_db()
+            logger.info(f"Created lvol_del_sync_lock on node: {self.get_id()}")
+        return True
+
+    def lvol_del_sync_lock_reset(self) -> bool:
+        from simplyblock_core.db_controller import DBController
+        db_controller = DBController()
+        task_found = False
+        tasks = db_controller.get_job_tasks(self.cluster_id)
+        for task in tasks:
+            if task.function_name == JobSchedule.FN_LVOL_SYNC_DEL and task.node_id == self.secondary_node_id:
+                if task.status != JobSchedule.STATUS_DONE and task.canceled is False:
+                    task_found = True
+                    break
+
+        lock = db_controller.get_lvol_del_lock(self.get_id())
+        if task_found:
+            if not lock:
+                lock = NodeLVolDelLock({"uuid": self.uuid})
+                lock.write_to_db()
+            logger.info(f"Created lvol_del_sync_lock on node: {self.get_id()}")
+        else:
+            if lock:
+                lock.remove(db_controller.kv_store)
+                logger.info(f"remove lvol_del_sync_lock from node: {self.get_id()}")
+        return True
+
+
+class NodeLVolDelLock(BaseModel):
+    pass
\ No newline at end of file
diff --git a/simplyblock_core/prom_client.py b/simplyblock_core/prom_client.py
new file mode 100644
index 000000000..833d42b36
--- /dev/null
+++ b/simplyblock_core/prom_client.py
@@ -0,0 +1,130 @@
+import logging
+import re
+from datetime import datetime, timedelta
+
+from simplyblock_core import constants
+from simplyblock_core.db_controller import DBController
+from simplyblock_core.models.mgmt_node import MgmtNode
+
+from prometheus_api_client import PrometheusConnect
+
+logger = logging.getLogger()
+
+
+class PromClientException(Exception):
+    def __init__(self, message):
+        self.message = message
+
+
+class PromClient:
+
+    def __init__(self, cluster_id):
+        db_controller = DBController()
+        cluster_ip = None
+        cluster = db_controller.get_cluster_by_id(cluster_id)
+        if cluster.mode == "docker":
+            for node in db_controller.get_mgmt_nodes():
+                if node.cluster_id == cluster_id and node.status == MgmtNode.STATUS_ONLINE:
+                    cluster_ip = node.mgmt_ip
+                    break
+            if cluster_ip is None:
+                raise PromClientException("Cluster has no online mgmt nodes")
+        else:
+            cluster_ip = constants.PROMETHEUS_STATEFULSET_NAME
+        self.ip_address = f"{cluster_ip}:9090"
+        self.url = 'http://%s/' % self.ip_address
+        self.client = PrometheusConnect(url=self.url, disable_ssl=True)
+
+    def parse_history_param(self, history_string):
+        if not history_string:
+            logger.error("Invalid history value")
+            return False
+
+        # process history
+        results = re.search(r'^(\d+[hmd])(\d+[hmd])?$', history_string.lower())
+        if not results:
+            logger.error(f"Error parsing history string: {history_string}")
+            logger.info("History format: xxdyyh , e.g: 1d12h, 1d, 2h, 1m")
+            return False
+
+        history_in_days = 0
+        history_in_hours = 0
+        history_in_minutes = 0
+        for s in results.groups():
+            if not s:
+                continue
+            ind = s[-1]
+            v = int(s[:-1])
+            if ind == 'd':
+                history_in_days = v
+            if ind == 'h':
+                history_in_hours = v
+            if ind == 'm':
+                history_in_minutes = v
+
+        history_in_hours += int(history_in_minutes/60)
+        history_in_minutes = history_in_minutes % 60
+        history_in_days += int(history_in_hours/24)
+        history_in_hours = history_in_hours % 24
+        return history_in_days, history_in_hours, history_in_minutes
+
+    def get_metrics(self, key_prefix, metrics_lst, params, history=None):
+        start_time = datetime.now() - timedelta(minutes=10)
+        if history:
+            try:
+                days,hours,minutes = self.parse_history_param(history)
+                start_time = datetime.now() - timedelta(days=days, hours=hours, minutes=minutes)
+            except Exception:
+                raise PromClientException(f"Error parsing history string: {history}")
+        end_time = datetime.now()
+        data_out: list[dict] = []
+        for key in metrics_lst:
+            metrics = self.client.get_metric_range_data(
+                f"{key_prefix}_{key}", label_config=params, start_time=start_time, end_time=end_time)
+            for m in metrics:
+                mt_name = key
+                mt_values = m["values"]
+                for i, v in enumerate(mt_values):
+                    value = v[1]
+                    try:
+                        value = int(value)
+                    except Exception:
+                        pass
+                    if len(data_out) <= i:
+                        data_out.append({mt_name: value})
+                    else:
+                        d = data_out[i]
+                        if mt_name not in d:
+                            d[mt_name] = value
+
+        return data_out
+
+    def get_cluster_metrics(self, cluster_uuid, metrics_lst, history=None):
+        params = {
+            "cluster": cluster_uuid
+        }
+        return self.get_metrics("cluster", metrics_lst, params, history)
+
+    def get_node_metrics(self, snode_uuid, metrics_lst, history=None):
+        params = {
+            "snode": snode_uuid
+        }
+        return self.get_metrics("snode", metrics_lst, params, history)
+
+    def get_device_metrics(self, device_uuid, metrics_lst, history=None):
+        params = {
+            "device": device_uuid
+        }
+        return self.get_metrics("device", metrics_lst, params, history)
+
+    def get_lvol_metrics(self, lvol_uuid, metrics_lst, history=None):
+        params = {
+            "lvol": lvol_uuid
+        }
+        return self.get_metrics("lvol", metrics_lst, params, history)
+
+    def get_pool_metrics(self, pool_uuid, metrics_lst, history=None):
+        params = {
+            "pool": pool_uuid
+        }
+        return self.get_metrics("pool", metrics_lst, params, history)
diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py
index 62f37b1e9..abfd5a216 100644
--- a/simplyblock_core/rpc_client.py
+++ b/simplyblock_core/rpc_client.py
@@ -109,11 +109,10 @@ def _request2(self, method, params=None):
         if params:
             payload['params'] = params
         try:
-            logger.debug("Requesting method: %s, params: %s", method, params)
+            logger.debug("From: %s, Requesting method: %s, params: %s", self.ip_address, method, params)
             response = self.session.post(self.url, data=json.dumps(payload), timeout=self.timeout)
-        except Exception as e:
-            logger.error(e)
-            return False, str(e)
+        except Exception:
+            raise RPCException("connection error")
 
         ret_code = response.status_code
         ret_content = response.content
@@ -306,7 +305,7 @@ def ultra21_alloc_ns_init(self, pci_addr):
         }
         return self._request2("ultra21_alloc_ns_init", params)
 
-    def nvmf_subsystem_add_ns(self, nqn, dev_name, uuid=None, nguid=None, nsid=None):
+    def nvmf_subsystem_add_ns(self, nqn, dev_name, uuid=None, nguid=None, nsid=None, eui64=None):
         params = {
             "nqn": nqn,
             "namespace": {
@@ -323,6 +322,11 @@ def nvmf_subsystem_add_ns(self, nqn, dev_name, uuid=None, nguid=None, nsid=None)
         if nsid:
             params['namespace']['nsid'] = nsid
 
+        if eui64:
+            params['namespace']['eui64'] = eui64
+            params['namespace']['ptpl_file'] = "/mnt/ns_resv"+eui64+".json"
+        
+
         return self._request("nvmf_subsystem_add_ns", params)
 
     def nvmf_subsystem_remove_ns(self, nqn, nsid):
@@ -379,11 +383,11 @@ def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0, ndcs=0
             "clear_method": "unmap",
             "lvol_priority_class": lvol_priority_class,
         }
-        # if ndcs or npcs:
-        #     params.update({
-        #         'ndcs' : ndcs,
-        #         'npcs' : npcs,
-        #     })
+        if ndcs or npcs:
+            params.update({
+                'ndcs' : ndcs,
+                'npcs' : npcs,
+            })
         return self._request("bdev_lvol_create", params)
 
     def delete_lvol(self, name, del_async=False):
@@ -581,7 +585,7 @@ def get_lvol_stats(self, uuid=""):
             params["uuid"] = uuid
         return self._request("bdev_get_iostat", params)
 
-    def bdev_raid_create(self, name, bdevs_list, raid_level="0", strip_size_kb=4):
+    def bdev_raid_create(self, name, bdevs_list, raid_level="0", strip_size_kb=4, superblock=False):
         try:
             ret = self.get_bdevs(name)
             if ret:
@@ -593,7 +597,8 @@ def bdev_raid_create(self, name, bdevs_list, raid_level="0", strip_size_kb=4):
             "raid_level": raid_level,
             "strip_size_kb": strip_size_kb,
             "base_bdevs": bdevs_list,
-            "io_unmap_limit": 100
+            "io_unmap_limit": 100,
+            "superblock": superblock
         }
         if raid_level == "1":
             params["strip_size_kb"] = 0
@@ -886,6 +891,12 @@ def nbd_stop_disk(self, nbd_device):
         }
         return self._request("nbd_stop_disk", params)
 
+    def nbd_get_disks(self, nbd_device):
+        params = {
+            "nbd_device": nbd_device
+        }
+        return self._request("nbd_get_disks", params)
+
     def bdev_jm_unmap_vuid(self, name, vuid):
         params = {"name": name, "vuid": vuid}
         return self._request("bdev_jm_unmap_vuid", params)
@@ -922,7 +933,7 @@ def distr_migration_status(self, name):
         params = {"name": name}
         return self._request("distr_migration_status", params)
 
-    def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=1024, jobs=4):
+    def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS):
         params = {
             "name": name,
             "storage_ID": storage_ID,
@@ -935,7 +946,7 @@ def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=Fals
             params["jobs"] = jobs
         return self._request("distr_migration_failure_start", params)
 
-    def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=1024, jobs=4):
+    def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS):
         params = {
             "name": name,
         }
@@ -954,10 +965,9 @@ def bdev_raid_add_base_bdev(self, raid_bdev, base_bdev):
         }
         return self._request("bdev_raid_add_base_bdev", params)
 
-    def bdev_raid_remove_base_bdev(self, raid_bdev, base_bdev):
+    def bdev_raid_remove_base_bdev(self, base_bdev):
         params = {
-            "raid_bdev": raid_bdev,
-            "base_bdev": base_bdev,
+            "name": base_bdev,
         }
         return self._request("bdev_raid_remove_base_bdev", params)
 
@@ -1142,7 +1152,7 @@ def jc_suspend_compression(self, jm_vuid, suspend=False):
             "jm_vuid": jm_vuid,
             "suspend": suspend,
         }
-        return self._request("jc_suspend_compression", params)
+        return self._request2("jc_suspend_compression", params)
 
     def nvmf_subsystem_add_listener(self, nqn, trtype, traddr, trsvcid, ana_state=None):
         params = {
@@ -1182,6 +1192,51 @@ def bdev_distrib_check_inflight_io(self, jm_vuid):
         }
         return self._request("bdev_distrib_check_inflight_io", params)
 
+    def bdev_lvol_create_poller_group(self, cpu_mask):
+        params = {
+            "cpu_mask": cpu_mask,
+        }
+        return self._request("bdev_lvol_create_poller_group", params)
+
+    def bdev_lvol_transfer(self, lvol_name, offset, cluster_batch, gateway, operation):
+        # --operation {migrate,replicate}
+        params = {
+            "lvol_name": lvol_name,
+            "offset": offset,
+            "cluster_batch": cluster_batch,
+            "gateway": gateway,
+            "operation": operation,
+        }
+        return self._request("bdev_lvol_transfer", params)
+
+    def bdev_lvol_transfer_stat(self, lvol_name):
+        """
+        example:
+            ./rpc.py bdev_lvol_transfer_stat lvs_raid0_lvol/snapshot_1
+            {
+                "transfer_state": "No process",
+                "offset": 0
+            }
+            transfer_state values:
+                - No process
+                - In progress
+                - Failed
+                - Done
+        """
+        params = {
+            "lvol_name": lvol_name,
+        }
+        return self._request("bdev_lvol_transfer_stat", params)
+
+    def bdev_lvol_convert(self, lvol_name):
+        """
+        convert lvol to snapshot
+        """
+        params = {
+            "lvol_name": lvol_name,
+        }
+        return self._request("bdev_lvol_convert", params)
+
     def bdev_lvol_remove_from_group(self, group_id, lvol_name_list):
         params = {
             "bdev_group_id": group_id ,
@@ -1229,3 +1284,16 @@ def nvmf_port_unblock_rdma(self, port):
 
     def nvmf_get_blocked_ports_rdma(self):
         return self._request("nvmf_get_blocked_ports")
+
+    def bdev_lvol_add_clone(self, lvol_name, child_name):
+        params = {
+            "lvol_name": lvol_name,
+            "child_name": child_name,
+        }
+        return self._request("bdev_lvol_add_clone", params)
+
+    def bdev_raid_get_bdevs(self):
+        params = {
+            "category": "online"
+        }
+        return self._request("bdev_raid_get_bdevs", params)
diff --git a/simplyblock_core/scripts/charts/Chart.yaml b/simplyblock_core/scripts/charts/Chart.yaml
index 9d1b62643..671f39cfa 100644
--- a/simplyblock_core/scripts/charts/Chart.yaml
+++ b/simplyblock_core/scripts/charts/Chart.yaml
@@ -17,20 +17,14 @@ dependencies:
     version: 1.4.0 
     repository: https://mongodb.github.io/helm-charts
     alias: mongodb
-    condition: monitoring.enabled
+    condition: observability.enabled
   - name: opensearch
     version: 2.9.0
     repository: https://opensearch-project.github.io/helm-charts
-    condition: monitoring.enabled
+    condition: observability.enabled
   - name: prometheus
     version: "25.18.0"
     repository: "https://prometheus-community.github.io/helm-charts"
-    condition: monitoring.enabled
-  - name: openebs
-    version: 3.9.0 
-    repository: https://openebs.github.io/charts
-    alias: openebs
-    condition: openebs.enabled
   - name: ingress-nginx
     version: 4.10.1
     repository: "https://kubernetes.github.io/ingress-nginx"
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockdevices.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockdevices.yaml
new file mode 100644
index 000000000..272030736
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockdevices.yaml
@@ -0,0 +1,135 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblockdevices.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockDevice
+    listKind: SimplyBlockDeviceList
+    plural: simplyblockdevices
+    singular: simplyblockdevice
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockDevice is the Schema for the simplyblockdevices API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of SimplyBlockDevice
+            properties:
+              action:
+                enum:
+                - remove
+                - restart
+                type: string
+              clusterName:
+                type: string
+              deviceID:
+                type: string
+              nodeUUID:
+                type: string
+            required:
+            - clusterName
+            type: object
+          status:
+            description: status defines the observed state of SimplyBlockDevice
+            properties:
+              actionStatus:
+                properties:
+                  action:
+                    type: string
+                  message:
+                    type: string
+                  nodeUUID:
+                    type: string
+                  observedGeneration:
+                    format: int64
+                    type: integer
+                  state:
+                    type: string
+                  triggered:
+                    type: boolean
+                  updatedAt:
+                    format: date-time
+                    type: string
+                type: object
+              nodes:
+                items:
+                  properties:
+                    devices:
+                      items:
+                        properties:
+                          health:
+                            type: string
+                          model:
+                            type: string
+                          size:
+                            type: string
+                          stats:
+                            items:
+                              properties:
+                                capacityUtil:
+                                  format: int64
+                                  type: integer
+                                riops:
+                                  format: int64
+                                  type: integer
+                                rtp:
+                                  format: int64
+                                  type: integer
+                                wiops:
+                                  format: int64
+                                  type: integer
+                                wtp:
+                                  format: int64
+                                  type: integer
+                              type: object
+                            type: array
+                          status:
+                            type: string
+                          utilization:
+                            format: int64
+                            type: integer
+                          uuid:
+                            type: string
+                        type: object
+                      type: array
+                    nodeUUID:
+                      type: string
+                  type: object
+                type: array
+            type: object
+        required:
+        - spec
+        type: object
+        x-kubernetes-validations:
+        - message: nodeUUID and deviceID are required when action is specified
+          rule: '!(has(self.spec.action) && self.spec.action != "" && ((!has(self.spec.nodeUUID)
+            || self.spec.nodeUUID == "") || (!has(self.spec.deviceID) || self.spec.deviceID
+            == "")))'
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocklvols.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocklvols.yaml
new file mode 100644
index 000000000..8e44a687d
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocklvols.yaml
@@ -0,0 +1,144 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblocklvols.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockLvol
+    listKind: SimplyBlockLvolList
+    plural: simplyblocklvols
+    singular: simplyblocklvol
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .status.lvols.length()
+      name: LVOLs
+      type: integer
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockLvol is the Schema for the simplyblocklvols API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of SimplyBlockLvol
+            properties:
+              clusterName:
+                type: string
+              poolName:
+                type: string
+            required:
+            - clusterName
+            - poolName
+            type: object
+          status:
+            description: status defines the observed state of SimplyBlockLvol
+            properties:
+              configured:
+                type: boolean
+              lvols:
+                items:
+                  properties:
+                    blobID:
+                      format: int64
+                      type: integer
+                    clonedFromSnap:
+                      type: string
+                    createDt:
+                      format: date-time
+                      type: string
+                    fabric:
+                      type: string
+                    ha:
+                      type: boolean
+                    health:
+                      type: boolean
+                    hostname:
+                      type: string
+                    isCrypto:
+                      type: boolean
+                    lvolName:
+                      type: string
+                    maxNamespacesPerSubsystem:
+                      format: int64
+                      type: integer
+                    namespaceID:
+                      format: int64
+                      type: integer
+                    nodeUUID:
+                      items:
+                        type: string
+                      type: array
+                    nqn:
+                      type: string
+                    poolName:
+                      type: string
+                    poolUUID:
+                      type: string
+                    pvcName:
+                      type: string
+                    qosClass:
+                      format: int64
+                      type: integer
+                    qosIOPS:
+                      format: int64
+                      type: integer
+                    qosRTP:
+                      format: int64
+                      type: integer
+                    qosRWTP:
+                      format: int64
+                      type: integer
+                    qosWTP:
+                      format: int64
+                      type: integer
+                    size:
+                      type: string
+                    snapName:
+                      type: string
+                    status:
+                      type: string
+                    stripeWdata:
+                      format: int64
+                      type: integer
+                    stripeWparity:
+                      format: int64
+                      type: integer
+                    subsysPort:
+                      format: int64
+                      type: integer
+                    updateDt:
+                      format: date-time
+                      type: string
+                    uuid:
+                      type: string
+                  type: object
+                type: array
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockpools.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockpools.yaml
new file mode 100644
index 000000000..693322dc3
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockpools.yaml
@@ -0,0 +1,96 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblockpools.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockPool
+    listKind: SimplyBlockPoolList
+    plural: simplyblockpools
+    singular: simplyblockpool
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockPool is the Schema for the pools API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of Pool
+            properties:
+              action:
+                type: string
+              capacityLimit:
+                type: string
+              clusterName:
+                type: string
+              name:
+                type: string
+              qosIOPSLimit:
+                format: int32
+                type: integer
+              rLimit:
+                format: int32
+                type: integer
+              rwLimit:
+                format: int32
+                type: integer
+              status:
+                type: string
+              wLimit:
+                format: int32
+                type: integer
+            required:
+            - clusterName
+            - name
+            type: object
+          status:
+            description: status defines the observed state of Pool
+            properties:
+              qosHost:
+                type: string
+              qosIOPSLimit:
+                format: int32
+                type: integer
+              rLimit:
+                format: int32
+                type: integer
+              rwLimit:
+                format: int32
+                type: integer
+              status:
+                type: string
+              uuid:
+                type: string
+              wLimit:
+                format: int32
+                type: integer
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocksnapshotreplications.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocksnapshotreplications.yaml
new file mode 100644
index 000000000..8eebd8370
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocksnapshotreplications.yaml
@@ -0,0 +1,154 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblocksnapshotreplications.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockSnapshotReplication
+    listKind: SimplyBlockSnapshotReplicationList
+    plural: simplyblocksnapshotreplications
+    singular: simplyblocksnapshotreplication
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockSnapshotReplication is the Schema for the simplyblocksnapshotreplications
+          API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of SimplyBlockSnapshotReplication
+            properties:
+              action:
+                enum:
+                - failback
+                type: string
+              excludeVolumeIDs:
+                description: 'Optional: volumes to exclude from failback.'
+                items:
+                  type: string
+                type: array
+              includeVolumeIDs:
+                description: |-
+                  Optional: only these volumes are included in failback.
+                  If empty, all volumes are candidates unless excluded below.
+                items:
+                  type: string
+                type: array
+              interval:
+                description: 'snapshot replication interval in seconds (default: 300sec)'
+                format: int32
+                type: integer
+              sourceCluster:
+                description: Source cluster for the snapshots
+                type: string
+              targetCluster:
+                description: Target cluster for replication
+                type: string
+              targetPool:
+                description: Target cluster pool for replication
+                type: string
+              timeout:
+                description: snapshot replication timeout
+                format: int32
+                type: integer
+              volumeIDs:
+                description: 'Optional: list of volumes to replicate. Empty means
+                  all volumes'
+                items:
+                  type: string
+                type: array
+            required:
+            - sourceCluster
+            - targetCluster
+            - targetPool
+            type: object
+          status:
+            description: status defines the observed state of SimplyBlockSnapshotReplication
+            properties:
+              configured:
+                type: boolean
+              observedFailbackGeneration:
+                description: The metadata.generation value for which failback was
+                  last processed.
+                format: int64
+                type: integer
+              volumes:
+                description: Per-volume replication status
+                items:
+                  description: VolumeReplicationStatus tracks the replication state
+                    of an individual volume
+                  properties:
+                    errors:
+                      description: 'Optional: list of errors encountered for this
+                        volume'
+                      items:
+                        description: ReplicationError stores timestamped error messages
+                        properties:
+                          message:
+                            type: string
+                          timestamp:
+                            format: date-time
+                            type: string
+                        required:
+                        - message
+                        - timestamp
+                        type: object
+                      type: array
+                    lastReplicationTime:
+                      description: Timestamp of the last successful replication for
+                        this volume
+                      format: date-time
+                      type: string
+                    lastSnapshotID:
+                      description: Last snapshot ID replicated for this volume
+                      type: string
+                    phase:
+                      description: Current phase for this volume
+                      enum:
+                      - Pending
+                      - Running
+                      - Completed
+                      - Failed
+                      - Paused
+                      type: string
+                    replicatedCount:
+                      description: Number of snapshots successfully replicated
+                      format: int32
+                      type: integer
+                    volumeID:
+                      description: Volume ID
+                      type: string
+                  required:
+                  - volumeID
+                  type: object
+                type: array
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstorageclusters.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstorageclusters.yaml
new file mode 100644
index 000000000..cfd99fdee
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstorageclusters.yaml
@@ -0,0 +1,173 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblockstorageclusters.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockStorageCluster
+    listKind: SimplyBlockStorageClusterList
+    plural: simplyblockstorageclusters
+    singular: simplyblockstoragecluster
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockStorageCluster is the Schema for the simplyblockstorageclusters
+          API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of SimplyBlockStorageCluster
+            properties:
+              action:
+                enum:
+                - activate
+                - expand
+                type: string
+              blkSize:
+                format: int32
+                type: integer
+              capCrit:
+                format: int32
+                type: integer
+              capWarn:
+                format: int32
+                type: integer
+              clientQpairCount:
+                format: int32
+                type: integer
+              clusterName:
+                type: string
+              distrBs:
+                format: int32
+                type: integer
+              distrChunkBs:
+                format: int32
+                type: integer
+              enableNodeAffinity:
+                type: boolean
+              eventLogEntries:
+                format: int32
+                type: integer
+              fabric:
+                type: string
+              haType:
+                type: string
+              includeEventLog:
+                type: boolean
+              inflightIOThreshold:
+                format: int32
+                type: integer
+              isSingleNode:
+                type: boolean
+              maxQueueSize:
+                format: int32
+                type: integer
+              mgmtIfc:
+                description: Create-only
+                type: string
+              pageSizeInBlocks:
+                format: int32
+                type: integer
+              provCapCrit:
+                format: int32
+                type: integer
+              provCapWarn:
+                format: int32
+                type: integer
+              qosClasses:
+                description: Updatable
+                type: string
+              qpairCount:
+                format: int32
+                type: integer
+              strictNodeAntiAffinity:
+                type: boolean
+              stripeWdata:
+                format: int32
+                type: integer
+              stripeWparity:
+                format: int32
+                type: integer
+            required:
+            - clusterName
+            type: object
+          status:
+            description: status defines the observed state of SimplyBlockStorageCluster
+            properties:
+              MOD:
+                type: string
+              NQN:
+                type: string
+              UUID:
+                type: string
+              actionStatus:
+                properties:
+                  action:
+                    type: string
+                  message:
+                    type: string
+                  nodeUUID:
+                    type: string
+                  observedGeneration:
+                    format: int64
+                    type: integer
+                  state:
+                    type: string
+                  triggered:
+                    type: boolean
+                  updatedAt:
+                    format: date-time
+                    type: string
+                type: object
+              clusterName:
+                type: string
+              configured:
+                type: boolean
+              created:
+                format: date-time
+                type: string
+              lastUpdated:
+                format: date-time
+                type: string
+              mgmtNodes:
+                format: int32
+                type: integer
+              rebalancing:
+                type: boolean
+              secretName:
+                type: string
+              status:
+                type: string
+              storageNodes:
+                format: int32
+                type: integer
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstoragenodes.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstoragenodes.yaml
new file mode 100644
index 000000000..1e6af7724
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblockstoragenodes.yaml
@@ -0,0 +1,204 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblockstoragenodes.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockStorageNode
+    listKind: SimplyBlockStorageNodeList
+    plural: simplyblockstoragenodes
+    singular: simplyblockstoragenode
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockStorageNode is the Schema for the storagenodes API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of StorageNode
+            properties:
+              action:
+                enum:
+                - shutdown
+                - restart
+                - suspend
+                - resume
+                - remove
+                type: string
+              addPcieToAllowList:
+                description: restart params
+                items:
+                  type: string
+                type: array
+              clusterImage:
+                type: string
+              clusterName:
+                type: string
+              coreIsolation:
+                type: boolean
+              coreMask:
+                type: string
+              corePercentage:
+                format: int32
+                type: integer
+              dataNIC:
+                items:
+                  type: string
+                type: array
+              driveSizeRange:
+                type: string
+              force:
+                type: boolean
+              haJM:
+                type: boolean
+              haJmCount:
+                format: int32
+                type: integer
+              idDeviceByNQN:
+                type: boolean
+              jmPercent:
+                format: int32
+                type: integer
+              maxLVol:
+                format: int32
+                type: integer
+              maxSize:
+                type: string
+              mgmtIfc:
+                type: string
+              nodeAddr:
+                type: string
+              nodeUUID:
+                description: NodeUUID is required when action is specified
+                type: string
+              nodesPerSocket:
+                format: int32
+                type: integer
+              openShiftCluster:
+                type: boolean
+              partitions:
+                format: int32
+                type: integer
+              pcieAllowList:
+                items:
+                  type: string
+                type: array
+              pcieDenyList:
+                items:
+                  type: string
+                type: array
+              pcieModel:
+                type: string
+              socketsToUse:
+                format: int32
+                type: integer
+              spdkDebug:
+                type: boolean
+              spdkImage:
+                type: string
+              useSeparateJournalDevice:
+                type: boolean
+              workerNode:
+                type: string
+              workerNodes:
+                items:
+                  type: string
+                type: array
+            required:
+            - clusterName
+            type: object
+          status:
+            description: status defines the observed state of StorageNode
+            properties:
+              actionStatus:
+                properties:
+                  action:
+                    type: string
+                  message:
+                    type: string
+                  nodeUUID:
+                    type: string
+                  observedGeneration:
+                    format: int64
+                    type: integer
+                  state:
+                    type: string
+                  updatedAt:
+                    format: date-time
+                    type: string
+                type: object
+              nodes:
+                items:
+                  properties:
+                    cpu:
+                      format: int32
+                      type: integer
+                    devices:
+                      type: string
+                    health:
+                      type: boolean
+                    hostname:
+                      type: string
+                    lvol_port:
+                      format: int32
+                      type: integer
+                    memory:
+                      type: string
+                    mgmtIp:
+                      type: string
+                    nvmf_port:
+                      format: int32
+                      type: integer
+                    rpc_port:
+                      format: int32
+                      type: integer
+                    status:
+                      type: string
+                    uptime:
+                      type: string
+                    uuid:
+                      type: string
+                    volumes:
+                      format: int32
+                      type: integer
+                  type: object
+                type: array
+            type: object
+        required:
+        - spec
+        type: object
+        x-kubernetes-validations:
+        - message: nodeUUID is required when action is specified
+          rule: '!(has(self.spec.action) && self.spec.action != "" && (!has(self.spec.nodeUUID)
+            || self.spec.nodeUUID == ""))'
+        - message: clusterImage, maxLVol, and workerNodes are required when action
+            is not specified
+          rule: (has(self.spec.action) && self.spec.action != "") || (has(self.spec.clusterImage)
+            && self.spec.clusterImage != "" && has(self.spec.maxLVol) && has(self.spec.workerNodes)
+            && size(self.spec.workerNodes) > 0)
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocktasks.yaml b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocktasks.yaml
new file mode 100644
index 000000000..2d25e21e1
--- /dev/null
+++ b/simplyblock_core/scripts/charts/crds/simplyblock.simplyblock.io_simplyblocktasks.yaml
@@ -0,0 +1,84 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+  name: simplyblocktasks.simplyblock.simplyblock.io
+spec:
+  group: simplyblock.simplyblock.io
+  names:
+    kind: SimplyBlockTask
+    listKind: SimplyBlockTaskList
+    plural: simplyblocktasks
+    singular: simplyblocktask
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: SimplyBlockTask is the Schema for the simplyblocktasks API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: spec defines the desired state of SimplyBlockTask
+            properties:
+              clusterName:
+                type: string
+              subtasks:
+                type: boolean
+              taskID:
+                type: string
+            required:
+            - clusterName
+            type: object
+          status:
+            description: status defines the observed state of SimplyBlockTask
+            properties:
+              tasks:
+                items:
+                  properties:
+                    canceled:
+                      type: boolean
+                    parentTask:
+                      type: string
+                    retried:
+                      format: int32
+                      type: integer
+                    startedAt:
+                      format: date-time
+                      type: string
+                    taskResult:
+                      type: string
+                    taskStatus:
+                      type: string
+                    taskType:
+                      type: string
+                    uuid:
+                      type: string
+                  type: object
+                type: array
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/simplyblock_core/scripts/charts/templates/_helpers.tpl b/simplyblock_core/scripts/charts/templates/_helpers.tpl
new file mode 100644
index 000000000..710260fdc
--- /dev/null
+++ b/simplyblock_core/scripts/charts/templates/_helpers.tpl
@@ -0,0 +1,21 @@
+{{- define "simplyblock.commonContainer" }}
+env:
+  - name: SIMPLYBLOCK_LOG_LEVEL
+    valueFrom:
+      configMapKeyRef:
+        name: simplyblock-config
+        key: LOG_LEVEL
+
+volumeMounts:
+  - name: fdb-cluster-file
+    mountPath: /etc/foundationdb/fdb.cluster
+    subPath: fdb.cluster
+
+resources:
+  requests:
+    cpu: "50m"
+    memory: "100Mi"
+  limits:
+    cpu: "300m"
+    memory: "1Gi"
+{{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/app_configmap.yaml b/simplyblock_core/scripts/charts/templates/app_configmap.yaml
index de0a4da08..a4d1d57dd 100644
--- a/simplyblock_core/scripts/charts/templates/app_configmap.yaml
+++ b/simplyblock_core/scripts/charts/templates/app_configmap.yaml
@@ -6,8 +6,8 @@ metadata:
   namespace: {{ .Release.Namespace }}
 
 data:
-  LOG_LEVEL: {{ .Values.log.level }}
-  LOG_DELETION_INTERVAL: {{ .Values.log.deletionInterval }}
+  LOG_LEVEL: {{ .Values.observability.level }}
+  LOG_DELETION_INTERVAL: {{ .Values.observability.deletionInterval }}
 
 ---
 
@@ -29,6 +29,7 @@ data:
         Path              /var/log/containers/*.log
         Parser            docker
         Tag               kube.*
+        Exclude_Path      /var/log/containers/*fluent-bit*.log
         Refresh_Interval  5
         Mem_Buf_Limit     5MB
         Skip_Long_Lines   On
@@ -69,9 +70,11 @@ data:
 
   filter.lua: |
     function filter_tagged_pods(tag, timestamp, record)
-        annotations = record["kubernetes"]["annotations"]
-        if annotations ~= nil and annotations["log-collector/enabled"] == "true" then
-            return 1, record
+        if record["kubernetes"] ~= nil then
+            local annotations = record["kubernetes"]["annotations"]
+            if annotations ~= nil and annotations["log-collector/enabled"] == "true" then
+                return 1, record
+            end
         end
         return -1, record
     end
diff --git a/simplyblock_core/scripts/charts/templates/app_ingress.yaml b/simplyblock_core/scripts/charts/templates/app_ingress.yaml
index 67e7b0912..b49b0c396 100644
--- a/simplyblock_core/scripts/charts/templates/app_ingress.yaml
+++ b/simplyblock_core/scripts/charts/templates/app_ingress.yaml
@@ -1,4 +1,5 @@
-{{- if (not .Values.ingress.useDNS) }}
+{{- if .Values.ingress.enabled }}
+  {{- if not .Values.ingress.useDNS }}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
@@ -17,7 +18,7 @@ spec:
                 port:
                   number: 5000
 ---
-{{- else if .Values.ingress.useDNS }}
+  {{- else if .Values.ingress.useDNS }}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
@@ -45,4 +46,5 @@ spec:
                 name: simplyblock-webappapi
                 port:
                   number: 5000
+  {{- end }}
 {{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/app_k8s.yaml b/simplyblock_core/scripts/charts/templates/app_k8s.yaml
index ec2e5b378..82f1d4f2c 100644
--- a/simplyblock_core/scripts/charts/templates/app_k8s.yaml
+++ b/simplyblock_core/scripts/charts/templates/app_k8s.yaml
@@ -5,7 +5,7 @@ metadata:
   name: simplyblock-admin-control
   namespace: {{ .Release.Namespace }}
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: simplyblock-admin-control
@@ -18,7 +18,16 @@ spec:
       labels:
         app: simplyblock-admin-control
     spec:
-      serviceAccountName: simplyblock-control-sa
+      serviceAccountName: simplyblock-sa
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet 
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchLabels:
+                  app: simplyblock-admin-control
+              topologyKey: kubernetes.io/hostname
       containers:
       - name: simplyblock-control
         image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
@@ -31,11 +40,13 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
+{{- if .Values.observability.enabled }}
         - name: MONITORING_SECRET
           valueFrom:
             secretKeyRef:
               name: simplyblock-grafana-secrets
               key: MONITORING_SECRET
+{{- end }}
         - name: SIMPLYBLOCK_LOG_LEVEL
           valueFrom:
             configMapKeyRef:
@@ -61,11 +72,12 @@ spec:
               path: fdb.cluster
 ---
 apiVersion: apps/v1
-kind: DaemonSet
+kind: Deployment
 metadata:
   name: simplyblock-webappapi
   namespace: {{ .Release.Namespace }}
 spec:
+  replicas: 2
   selector:
     matchLabels:
       app: simplyblock-webappapi
@@ -77,7 +89,15 @@ spec:
         reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
       labels:
         app: simplyblock-webappapi
-    spec:       
+    spec:
+      serviceAccountName: simplyblock-sa
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchLabels:
+                  app: simplyblock-admin-control
+              topologyKey: kubernetes.io/hostname       
       containers:
       - name: webappapi
         image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
@@ -91,6 +111,21 @@ spec:
             configMapKeyRef:
               name: simplyblock-config
               key: LOG_LEVEL
+        - name: LVOL_NVMF_PORT_START
+          value: "{{ .Values.ports.lvolNvmfPortStart }}"
+        - name: ENABLE_MONITORING
+          value: "{{ .Values.observability.enabled }}"
+        - name: K8S_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+{{- if .Values.observability.enabled }}
+        - name: MONITORING_SECRET
+          valueFrom:
+            secretKeyRef:
+              name: simplyblock-grafana-secrets
+              key: MONITORING_SECRET
+{{- end }}
         - name: FLASK_DEBUG
           value: "False"
         - name: FLASK_ENV
@@ -106,55 +141,20 @@ spec:
           limits:
             cpu: "500m"
             memory: "2Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-storage-node-monitor
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-storage-node-monitor
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-storage-node-monitor
-    spec:
-      containers:
-      - name: storage-node-monitor
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/storage_node_monitor.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
+      - name: fluent-bit
+        image: fluent/fluent-bit:1.8.11
         volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
+          - name: varlog
+            mountPath: /var/log
+          - name: config
+            mountPath: /fluent-bit/etc/
         resources:
           requests:
-            cpu: "200m"
-            memory: "256Mi"
+            cpu: "100m"
+            memory: "200Mi"
           limits:
-            cpu: "400m"
-            memory: "1Gi"
+            cpu: "200m"
+            memory: "400Mi"
       volumes:
       - name: fdb-cluster-file
         configMap:
@@ -162,18 +162,23 @@ spec:
           items:
             - key: cluster-file
               path: fdb.cluster
-      
+      - name: varlog
+        hostPath:
+          path: /var/log
+      - name: config
+        configMap:
+          name: simplyblock-fluent-bit-config
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: simplyblock-mgmt-node-monitor
+  name: simplyblock-monitoring
   namespace: {{ .Release.Namespace }}
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: simplyblock-mgmt-node-monitor
+      app: simplyblock-monitoring
   template:
     metadata:
       annotations:
@@ -181,201 +186,182 @@ spec:
         reloader.stakater.com/auto: "true"
         reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
       labels:
-        app: simplyblock-mgmt-node-monitor
+        app: simplyblock-monitoring
     spec:
+      serviceAccountName: simplyblock-sa
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
       containers:
-        - name: mgmt-node-monitor
+        - name: storage-node-monitor
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/storage_node_monitor.py"]
           imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: mgmt-node-monitor
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
           command: ["python", "simplyblock_core/services/mgmt_node_monitor.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           env:
-          - name: BACKEND_TYPE
-            value: "k8s"
-          - name: SIMPLYBLOCK_LOG_LEVEL
-            valueFrom:
-              configMapKeyRef:
-                name: simplyblock-config
-                key: LOG_LEVEL
+            - name: BACKEND_TYPE
+              value: "k8s"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-lvol-stats-collector
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-lvol-stats-collector
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-lvol-stats-collector
-    spec:
-      containers:
         - name: lvol-stats-collector
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/lvol_stat_collector.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-          - name: SIMPLYBLOCK_LOG_LEVEL
-            valueFrom:
-              configMapKeyRef:
-                name: simplyblock-config
-                key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-main-distr-event-collector
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-main-distr-event-collector
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-main-distr-event-collector
-    spec:      
-      containers:
         - name: main-distr-event-collector
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/main_distr_event_collector.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: capacity-and-stats-collector
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/capacity_and_stats_collector.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-          - name: SIMPLYBLOCK_LOG_LEVEL
-            valueFrom:
-              configMapKeyRef:
-                name: simplyblock-config
-                key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: capacity-monitor
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/cap_monitor.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: health-check
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/health_check_service.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: device-monitor
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/device_monitor.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: lvol-monitor
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/lvol_monitor.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: snapshot-monitor
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/snapshot_monitor.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+        - name: fluent-bit
+          image: fluent/fluent-bit:1.8.11
+          volumeMounts:
+            - name: varlog
+              mountPath: /var/log
+            - name: config
+              mountPath: /fluent-bit/etc/
           resources:
             requests:
-              cpu: "200m"
-              memory: "256Mi"
+              cpu: "100m"
+              memory: "200Mi"
             limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+              cpu: "200m"
+              memory: "400Mi"
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-capacity-and-stats-collector
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-capacity-and-stats-collector
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-capacity-and-stats-collector
-    spec:      
-      containers:
-      - name: capacity-and-stats-collector
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/capacity_and_stats_collector.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
       volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
-
+        - name: fdb-cluster-file
+          configMap:
+            name: simplyblock-fdb-cluster-config
+            items:
+              - key: cluster-file
+                path: fdb.cluster
+        - name: varlog
+          hostPath:
+            path: /var/log
+        - name: config
+          configMap:
+            name: simplyblock-fluent-bit-config
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: simplyblock-capacity-monitor
+  name: simplyblock-tasks
   namespace: {{ .Release.Namespace }}
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: simplyblock-capacity-monitor
+      app: simplyblock-tasks
   template:
     metadata:
       annotations:
@@ -383,730 +369,168 @@ spec:
         reloader.stakater.com/auto: "true"
         reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
       labels:
-        app: simplyblock-capacity-monitor
+        app: simplyblock-tasks
     spec:
-      
+      serviceAccountName: simplyblock-sa
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
       containers:
-      - name: capacity-monitor
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/cap_monitor.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-health-check
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-health-check
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-health-check
-    spec:      
-      containers:
-      - name: health-check
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/health_check_service.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-device-monitor
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-device-monitor
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-device-monitor
-    spec:      
-      containers:
-      - name: device-monitor
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/device_monitor.py"]
-        env:
-          - name: SIMPLYBLOCK_LOG_LEVEL
-            valueFrom:
-              configMapKeyRef:
-                name: simplyblock-config
-                key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
-
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-lvol-monitor
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-lvol-monitor
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-lvol-monitor
-    spec:      
-      containers:
-      - name: lvol-monitor
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/lvol_monitor.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-snapshot-monitor
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-snapshot-monitor
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-snapshot-monitor
-    spec:     
-      containers:
-      - name: snapshot-monitor
-        image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-        imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-        command: ["python", "simplyblock_core/services/snapshot_monitor.py"]
-        env:
-        - name: SIMPLYBLOCK_LOG_LEVEL
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-config
-              key: LOG_LEVEL
-        volumeMounts:
-        - name: fdb-cluster-file
-          mountPath: /etc/foundationdb/fdb.cluster
-          subPath: fdb.cluster
-        resources:
-          requests:
-            cpu: "200m"
-            memory: "256Mi"
-          limits:
-            cpu: "400m"
-            memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-cleanupfdb
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-cleanupfdb
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-cleanupfdb
-    spec:      
-      containers:
-        - name: cleanupfdb
+        - name: tasks-node-add-runner
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/tasks_runner_node_add.py"]
           imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-          command: ["python", "simplyblock_core/workers/cleanup_foundationdb.py"]
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
-            - name: LOG_DELETION_INTERVAL
-              value: "${LOG_DELETION_INTERVAL}"
+            - name: LVOL_NVMF_PORT_START
+              value: "{{ .Values.ports.lvolNvmfPortStart }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-restart
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-restart
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-restart
-    spec:     
-      containers:
         - name: tasks-runner-restart
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/tasks_runner_restart.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-migration
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-migration
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-migration
-    spec:      
-      containers:
         - name: tasks-runner-migration
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/tasks_runner_migration.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-failed-migration
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-failed-migration
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-failed-migration
-    spec:      
-      containers:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
         - name: tasks-runner-failed-migration
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/tasks_runner_failed_migration.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-cluster-status
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-cluster-status
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-cluster-status
-    spec:      
-      containers:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
         - name: tasks-runner-cluster-status
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/tasks_cluster_status.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-new-device-migration
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-new-device-migration
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-new-device-migration
-    spec:      
-      containers:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
         - name: tasks-runner-new-device-migration
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
-          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
           command: ["python", "simplyblock_core/services/tasks_runner_new_dev_migration.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-node-add-runner
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-node-add-runner
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-node-add-runner
-    spec:      
-      containers:
-        - name: tasks-node-addrunner
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: tasks-runner-port-allow
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/tasks_runner_port_allow.py"]
           imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-          command: ["python", "simplyblock_core/services/tasks_runner_node_add.py"]
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: LVOL_NVMF_PORT_START
-              value: "{{ .Values.ports.lvolNvmfPortStart }}"
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-port-allow
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-port-allow
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-port-allow
-    spec:      
-      containers:
-        - name: tasks-runner-port-allow
+        - name: tasks-runner-jc-comp-resume
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/tasks_runner_jc_comp.py"]
           imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-          command: ["python", "simplyblock_core/services/tasks_runner_port_allow.py"]
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-tasks-runner-jc-comp-resume
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-tasks-runner-jc-comp-resume
-  template:
-    metadata:
-      annotations:
-        log-collector/enabled: "true"
-        reloader.stakater.com/auto: "true"
-        reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config"
-      labels:
-        app: simplyblock-tasks-runner-jc-comp-resume
-    spec:      
-      containers:
-        - name: tasks-runner-jc-comp-resume
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
+
+        - name: tasks-runner-sync-lvol-del
           image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/tasks_runner_sync_lvol_del.py"]
           imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
-          command: ["python", "simplyblock_core/services/tasks_runner_jc_comp.py"]
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
           env:
-            - name: SIMPLYBLOCK_LOG_LEVEL
-              valueFrom:
-                configMapKeyRef:
-                  name: simplyblock-config
-                  key: LOG_LEVEL
+{{ toYaml .env | nindent 12 }}
           volumeMounts:
-          - name: fdb-cluster-file
-            mountPath: /etc/foundationdb/fdb.cluster
-            subPath: fdb.cluster
+{{ toYaml .volumeMounts | nindent 12 }}
           resources:
-            requests:
-              cpu: "200m"
-              memory: "256Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
-      volumes:
-      - name: fdb-cluster-file
-        configMap:
-          name: simplyblock-fdb-cluster-config
-          items:
-            - key: cluster-file
-              path: fdb.cluster
----
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
 
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
-  name: simplyblock-fluent-bit
-  namespace: {{ .Release.Namespace }}
-  labels:
-    app: simplyblock-fluent-bit
-spec:
-  selector:
-    matchLabels:
-      app: simplyblock-fluent-bit
-  template:
-    metadata:
-      labels:
-        app: simplyblock-fluent-bit
-    spec:      
-      containers:
+        - name: tasks-runner-snapshot-replication
+          image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}"
+          command: ["python", "simplyblock_core/services/snapshot_replication.py"]
+          imagePullPolicy: "{{ .Values.image.simplyblock.pullPolicy }}"
+{{- with (include "simplyblock.commonContainer" . | fromYaml) }}
+          env:
+{{ toYaml .env | nindent 12 }}
+          volumeMounts:
+{{ toYaml .volumeMounts | nindent 12 }}
+          resources:
+{{ toYaml .resources | nindent 12 }}
+{{- end }}
         - name: fluent-bit
           image: fluent/fluent-bit:1.8.11
-          securityContext:
-            privileged: true
           volumeMounts:
             - name: varlog
               mountPath: /var/log
-            - name: varlibdockercontainers
-              mountPath: /var/lib/docker/containers
-              readOnly: true
             - name: config
               mountPath: /fluent-bit/etc/
           resources:
             requests:
+              cpu: "100m"
+              memory: "200Mi"
+            limits:
               cpu: "200m"
               memory: "400Mi"
-            limits:
-              cpu: "400m"
-              memory: "1Gi"
+
       volumes:
+        - name: fdb-cluster-file
+          configMap:
+            name: simplyblock-fdb-cluster-config
+            items:
+              - key: cluster-file
+                path: fdb.cluster
         - name: varlog
           hostPath:
             path: /var/log
-        - name: varlibdockercontainers
-          hostPath:
-            path: /var/lib/docker/containers
         - name: config
           configMap:
             name: simplyblock-fluent-bit-config
diff --git a/simplyblock_core/scripts/charts/templates/app_sa.yaml b/simplyblock_core/scripts/charts/templates/app_sa.yaml
index a5dee735b..f04fc14b3 100644
--- a/simplyblock_core/scripts/charts/templates/app_sa.yaml
+++ b/simplyblock_core/scripts/charts/templates/app_sa.yaml
@@ -1,13 +1,13 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  name: simplyblock-control-sa
+  name: simplyblock-sa
   namespace: {{ .Release.Namespace }}
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
-  name: simplyblock-control-role
+  name: simplyblock-role
 rules:
   - apiGroups: [""]
     resources: ["configmaps"]
@@ -21,16 +21,23 @@ rules:
   - apiGroups: ["mongodbcommunity.mongodb.com"]
     resources: ["mongodbcommunity"]
     verbs: ["get", "list", "watch", "patch", "update"]
+  - apiGroups: ["simplyblock.simplyblock.io"] 
+    resources: ["simplyblockpools/status", "simplyblocklvols/status", "simplyblockstorageclusters/status", "simplyblockstoragenodes/status", "simplyblockdevices/status", "simplyblocktasks/status"]
+    verbs: ["get", "patch", "update"]
+  - apiGroups: ["simplyblock.simplyblock.io"] 
+    resources: ["simplyblockpools", "simplyblocklvols", "simplyblockstorageclusters", "simplyblockstoragenodes", "simplyblockdevices", "simplyblocktasks"]
+    verbs: ["get","list" ,"patch", "update", "watch"]
+
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
-  name: simplyblock-control-binding
+  name: simplyblock-binding
 subjects:
   - kind: ServiceAccount
-    name: simplyblock-control-sa
+    name: simplyblock-sa
     namespace: {{ .Release.Namespace }}
 roleRef:
   kind: ClusterRole
-  name: simplyblock-control-role
+  name: simplyblock-role
   apiGroup: rbac.authorization.k8s.io
diff --git a/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml b/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml
new file mode 100644
index 000000000..2a9d7d044
--- /dev/null
+++ b/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml
@@ -0,0 +1,24 @@
+apiVersion: storage.k8s.io/v1
+kind: CSIDriver
+metadata:
+  name: hostpath.csi.k8s.io
+  labels:
+    app.kubernetes.io/instance: hostpath.csi.k8s.io
+    app.kubernetes.io/part-of: csi-driver-host-path
+    app.kubernetes.io/name: hostpath.csi.k8s.io
+    app.kubernetes.io/component: csi-driver
+spec:
+  # Supports persistent and ephemeral inline volumes.
+  volumeLifecycleModes:
+  - Persistent
+  - Ephemeral
+  # To determine at runtime which mode a volume uses, pod info and its
+  # "csi.storage.k8s.io/ephemeral" entry are needed.
+  podInfoOnMount: true
+  # No attacher needed.
+  attachRequired: false
+  storageCapacity: false
+  # Kubernetes may use fsGroup to change permissions and ownership 
+  # of the volume to match user requested fsGroup in the pod's SecurityPolicy
+  fsGroupPolicy: File
+  
\ No newline at end of file
diff --git a/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml b/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml
new file mode 100644
index 000000000..aa645bff4
--- /dev/null
+++ b/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml
@@ -0,0 +1,232 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: csi-hostpathplugin-sa
+  namespace: {{ .Release.Namespace }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: csi-hostpathplugin
+rules:
+  - apiGroups: [""]
+    resources: ["persistentvolumes"]
+    verbs: ["get", "list", "watch", "create", "delete", "update", "patch"]
+  - apiGroups: [""]
+    resources: ["persistentvolumeclaims"]
+    verbs: ["get", "list", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["persistentvolumeclaims/status"]
+    verbs: ["get", "update", "patch"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["volumeattachments"]
+    verbs: ["get", "list", "watch", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csinodes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["storageclasses"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csistoragecapacities"]
+    verbs: ["get", "list", "watch", "create", "update", "delete"]
+  - apiGroups: [""]
+    resources: ["events"]
+    verbs: ["create", "patch", "update", "get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: csi-hostpathplugin
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: csi-hostpathplugin
+subjects:
+  - kind: ServiceAccount
+    name: csi-hostpathplugin-sa
+    namespace: {{ .Release.Namespace }}
+
+---
+kind: DaemonSet
+apiVersion: apps/v1
+metadata:
+  name: csi-hostpathplugin
+  labels:
+    app.kubernetes.io/instance: hostpath.csi.k8s.io
+    app.kubernetes.io/part-of: csi-driver-host-path
+    app.kubernetes.io/name: csi-hostpathplugin
+    app.kubernetes.io/component: plugin
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: hostpath.csi.k8s.io
+      app.kubernetes.io/part-of: csi-driver-host-path
+      app.kubernetes.io/name: csi-hostpathplugin
+      app.kubernetes.io/component: plugin
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/instance: hostpath.csi.k8s.io
+        app.kubernetes.io/part-of: csi-driver-host-path
+        app.kubernetes.io/name: csi-hostpathplugin
+        app.kubernetes.io/component: plugin
+    spec:
+      serviceAccountName: csi-hostpathplugin-sa
+      containers:
+        - name: csi-provisioner
+          image: registry.k8s.io/sig-storage/csi-provisioner:v6.0.0
+          args:
+            - -v=5
+            - --csi-address=/csi/csi.sock
+            - --feature-gates=Topology=true
+            - --node-deployment=true
+            - --strict-topology=true
+            - --immediate-topology=false
+            - --worker-threads=5
+          env:
+          - name: NODE_NAME
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: spec.nodeName
+          - name: NAMESPACE
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.namespace
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.name
+          securityContext:
+            # This is necessary only for systems with SELinux, where
+            # non-privileged sidecar containers cannot access unix domain socket
+            # created by privileged CSI driver container.
+            privileged: true
+          volumeMounts:
+            - mountPath: /csi
+              name: socket-dir
+        - name: csi-resizer
+          image: registry.k8s.io/sig-storage/csi-resizer:v2.0.0
+          args:
+            - -v=5
+            - -csi-address=/csi/csi.sock
+          securityContext:
+            # This is necessary only for systems with SELinux, where
+            # non-privileged sidecar containers cannot access unix domain socket
+            # created by privileged CSI driver container.
+            privileged: true
+          volumeMounts:
+            - mountPath: /csi
+              name: socket-dir
+
+        - name: node-driver-registrar
+          image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.12.0
+          args:
+            - --v=5
+            - --csi-address=/csi/csi.sock
+            - --kubelet-registration-path=/var/lib/kubelet/plugins/csi-hostpath/csi.sock
+          securityContext:
+            # This is necessary only for systems with SELinux, where
+            # non-privileged sidecar containers cannot access unix domain socket
+            # created by privileged CSI driver container.
+            privileged: true
+          env:
+            - name: KUBE_NODE_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: spec.nodeName
+          volumeMounts:
+          - mountPath: /csi
+            name: socket-dir
+          - mountPath: /registration
+            name: registration-dir
+          - mountPath: /csi-data-dir
+            name: csi-data-dir
+
+        - name: hostpath
+          image: registry.k8s.io/sig-storage/hostpathplugin:v1.15.0
+          args:
+            - --drivername=hostpath.csi.k8s.io
+            - --v=5
+            - --endpoint=$(CSI_ENDPOINT)
+            - --nodeid=$(KUBE_NODE_NAME)
+          env:
+            - name: CSI_ENDPOINT
+              value: unix:///csi/csi.sock
+            - name: KUBE_NODE_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: spec.nodeName
+          securityContext:
+            privileged: true
+          ports:
+          - containerPort: 9898
+            name: healthz
+            protocol: TCP
+          livenessProbe:
+            failureThreshold: 5
+            httpGet:
+              path: /healthz
+              port: healthz
+            initialDelaySeconds: 10
+            timeoutSeconds: 3
+            periodSeconds: 2
+          volumeMounts:
+            - mountPath: /csi
+              name: socket-dir
+            - mountPath: /var/lib/kubelet/pods
+              mountPropagation: Bidirectional
+              name: mountpoint-dir
+            - mountPath: /var/lib/kubelet/plugins
+              mountPropagation: Bidirectional
+              name: plugins-dir
+            - mountPath: /csi-data-dir
+              name: csi-data-dir
+            - mountPath: /dev
+              name: dev-dir
+        - name: liveness-probe
+          volumeMounts:
+          - mountPath: /csi
+            name: socket-dir
+          image: registry.k8s.io/sig-storage/livenessprobe:v2.15.0
+          args:
+          - --csi-address=/csi/csi.sock
+          - --health-port=9898
+
+      volumes:
+        - hostPath:
+            path: /var/lib/kubelet/plugins/csi-hostpath
+            type: DirectoryOrCreate
+          name: socket-dir
+        - hostPath:
+            path: /var/lib/kubelet/pods
+            type: DirectoryOrCreate
+          name: mountpoint-dir
+        - hostPath:
+            path: /var/lib/kubelet/plugins_registry
+            type: Directory
+          name: registration-dir
+        - hostPath:
+            path: /var/lib/kubelet/plugins
+            type: Directory
+          name: plugins-dir
+        - hostPath:
+            # 'path' is where PV data is persisted on host.
+            # using /tmp is also possible while the PVs will not available after plugin container recreation or host reboot
+            path: /var/lib/csi-hostpath-data/
+            type: DirectoryOrCreate
+          name: csi-data-dir
+        - hostPath:
+            path: /dev
+            type: Directory
+          name: dev-dir
+          
\ No newline at end of file
diff --git a/simplyblock_core/scripts/charts/templates/dashboards.yaml b/simplyblock_core/scripts/charts/templates/dashboards.yaml
index 981e961d0..165bad130 100644
--- a/simplyblock_core/scripts/charts/templates/dashboards.yaml
+++ b/simplyblock_core/scripts/charts/templates/dashboards.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.monitoring.enabled }}
+{{- if .Values.observability.enabled }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
@@ -12512,14796 +12512,4 @@ data:
         "weekStart": ""
     }  
 
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: simplyblock-grafana-dashboard-node-exporter
-  namespace: {{ .Release.Namespace }}
-  labels:
-    grafana_dashboard: "1"
-data:
-  node-exporter.json: | 
-    {
-        "annotations": {
-        "list": [
-            {
-            "builtIn": 1,
-            "datasource": {
-                "type": "grafana",
-                "uid": "-- Grafana --"
-            },
-            "enable": true,
-            "hide": true,
-            "iconColor": "rgba(0, 211, 255, 1)",
-            "name": "Annotations & Alerts",
-            "target": {
-                "limit": 100,
-                "matchAny": false,
-                "tags": [],
-                "type": "dashboard"
-            },
-            "type": "dashboard"
-            }
-        ]
-        },
-        "editable": true,
-        "fiscalYearStartMonth": 0,
-        "gnetId": 1860,
-        "graphTooltip": 1,
-        "id": null,
-        "links": [],
-        "liveNow": false,
-        "panels": [
-        {
-            "collapsed": false,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 0
-            },
-            "id": 261,
-            "panels": [],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Quick CPU / Mem / Disk",
-            "type": "row"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Resource pressure via PSI",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "links": [],
-                "mappings": [],
-                "max": 1,
-                "min": 0,
-                "thresholds": {
-                "mode": "percentage",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "dark-yellow",
-                    "value": 70
-                    },
-                    {
-                    "color": "dark-red",
-                    "value": 90
-                    }
-                ]
-                },
-                "unit": "percentunit"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 0,
-            "y": 1
-            },
-            "id": 323,
-            "links": [],
-            "options": {
-            "displayMode": "basic",
-            "minVizHeight": 10,
-            "minVizWidth": 0,
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showUnfilled": true,
-            "text": {}
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                "format": "time_series",
-                "instant": true,
-                "intervalFactor": 1,
-                "legendFormat": "CPU",
-                "range": false,
-                "refId": "CPU some",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                "format": "time_series",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "legendFormat": "Mem",
-                "range": false,
-                "refId": "Memory some",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                "format": "time_series",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "legendFormat": "I/O",
-                "range": false,
-                "refId": "I/O some",
-                "step": 240
-            }
-            ],
-            "title": "Pressure",
-            "type": "bargauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Busy state of all CPU cores together",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 85
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 95
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 3,
-            "y": 1
-            },
-            "id": 20,
-            "links": [],
-            "options": {
-            "orientation": "auto",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showThresholdLabels": false,
-            "showThresholdMarkers": true
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$node\"}[$__rate_interval])))",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "legendFormat": "",
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "CPU Busy",
-            "type": "gauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "System load  over all CPU cores together",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 85
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 95
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 6,
-            "y": 1
-            },
-            "id": 155,
-            "links": [],
-            "options": {
-            "orientation": "auto",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showThresholdLabels": false,
-            "showThresholdMarkers": true
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "scalar(node_load1{instance=\"$node\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
-                "format": "time_series",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "Sys Load",
-            "type": "gauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Non available RAM memory",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 80
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 90
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 9,
-            "y": 1
-            },
-            "hideTimeOverride": false,
-            "id": 16,
-            "links": [],
-            "options": {
-            "orientation": "auto",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showThresholdLabels": false,
-            "showThresholdMarkers": true
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "((node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\", job=\"$job\"}) / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"}) * 100",
-                "format": "time_series",
-                "hide": true,
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "(1 - (node_memory_MemAvailable_bytes{instance=\"$node\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"})) * 100",
-                "format": "time_series",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "B",
-                "step": 240
-            }
-            ],
-            "title": "RAM Used",
-            "type": "gauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Used Swap",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 10
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 25
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 12,
-            "y": 1
-            },
-            "id": 21,
-            "links": [],
-            "options": {
-            "orientation": "auto",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showThresholdLabels": false,
-            "showThresholdMarkers": true
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"})) * 100",
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "SWAP Used",
-            "type": "gauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Used Root FS",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 80
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 90
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 4,
-            "w": 3,
-            "x": 15,
-            "y": 1
-            },
-            "id": 154,
-            "links": [],
-            "options": {
-            "orientation": "auto",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "showThresholdLabels": false,
-            "showThresholdMarkers": true
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})",
-                "format": "time_series",
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "Root FS Used",
-            "type": "gauge"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Total number of CPU cores",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "short"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 2,
-            "w": 2,
-            "x": 18,
-            "y": 1
-            },
-            "id": 14,
-            "links": [],
-            "maxDataPoints": 100,
-            "options": {
-            "colorMode": "none",
-            "graphMode": "none",
-            "justifyMode": "auto",
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "textMode": "auto"
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
-                "instant": true,
-                "legendFormat": "__auto",
-                "range": false,
-                "refId": "A"
-            }
-            ],
-            "title": "CPU Cores",
-            "type": "stat"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "System uptime",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 1,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "s"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 2,
-            "w": 4,
-            "x": 20,
-            "y": 1
-            },
-            "hideTimeOverride": true,
-            "id": 15,
-            "links": [],
-            "maxDataPoints": 100,
-            "options": {
-            "colorMode": "none",
-            "graphMode": "none",
-            "justifyMode": "auto",
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "textMode": "auto"
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}",
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "Uptime",
-            "type": "stat"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Total RootFS",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 0,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "rgba(50, 172, 45, 0.97)",
-                    "value": null
-                    },
-                    {
-                    "color": "rgba(237, 129, 40, 0.89)",
-                    "value": 70
-                    },
-                    {
-                    "color": "rgba(245, 54, 54, 0.9)",
-                    "value": 90
-                    }
-                ]
-                },
-                "unit": "bytes"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 2,
-            "w": 2,
-            "x": 18,
-            "y": 3
-            },
-            "id": 23,
-            "links": [],
-            "maxDataPoints": 100,
-            "options": {
-            "colorMode": "none",
-            "graphMode": "none",
-            "justifyMode": "auto",
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "textMode": "auto"
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}",
-                "format": "time_series",
-                "hide": false,
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "RootFS Total",
-            "type": "stat"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Total RAM",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 0,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "bytes"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 2,
-            "w": 2,
-            "x": 20,
-            "y": 3
-            },
-            "id": 75,
-            "links": [],
-            "maxDataPoints": 100,
-            "options": {
-            "colorMode": "none",
-            "graphMode": "none",
-            "justifyMode": "auto",
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "textMode": "auto"
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "RAM Total",
-            "type": "stat"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Total SWAP",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "thresholds"
-                },
-                "decimals": 0,
-                "mappings": [
-                {
-                    "options": {
-                    "match": "null",
-                    "result": {
-                        "text": "N/A"
-                    }
-                    },
-                    "type": "special"
-                }
-                ],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "bytes"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 2,
-            "w": 2,
-            "x": 22,
-            "y": 3
-            },
-            "id": 18,
-            "links": [],
-            "maxDataPoints": 100,
-            "options": {
-            "colorMode": "none",
-            "graphMode": "none",
-            "justifyMode": "auto",
-            "orientation": "horizontal",
-            "reduceOptions": {
-                "calcs": [
-                "lastNotNull"
-                ],
-                "fields": "",
-                "values": false
-            },
-            "textMode": "auto"
-            },
-            "pluginVersion": "9.4.3",
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}",
-                "instant": true,
-                "intervalFactor": 1,
-                "range": false,
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "SWAP Total",
-            "type": "stat"
-        },
-        {
-            "collapsed": false,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 5
-            },
-            "id": 263,
-            "panels": [],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Basic CPU / Mem / Net / Disk",
-            "type": "row"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Basic CPU info",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "palette-classic"
-                },
-                "custom": {
-                "axisCenteredZero": false,
-                "axisColorMode": "text",
-                "axisLabel": "",
-                "axisPlacement": "auto",
-                "barAlignment": 0,
-                "drawStyle": "line",
-                "fillOpacity": 40,
-                "gradientMode": "none",
-                "hideFrom": {
-                    "legend": false,
-                    "tooltip": false,
-                    "viz": false
-                },
-                "lineInterpolation": "smooth",
-                "lineWidth": 1,
-                "pointSize": 5,
-                "scaleDistribution": {
-                    "type": "linear"
-                },
-                "showPoints": "never",
-                "spanNulls": false,
-                "stacking": {
-                    "group": "A",
-                    "mode": "percent"
-                },
-                "thresholdsStyle": {
-                    "mode": "off"
-                }
-                },
-                "links": [],
-                "mappings": [],
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "percentunit"
-            },
-            "overrides": [
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Busy Iowait"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#890F02",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Idle"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#052B51",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Busy Iowait"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#890F02",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Idle"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#7EB26D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Busy System"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#EAB839",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Busy User"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A437C",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Busy Other"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#6D1F62",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                }
-            ]
-            },
-            "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 0,
-            "y": 6
-            },
-            "id": 77,
-            "links": [],
-            "options": {
-            "legend": {
-                "calcs": [],
-                "displayMode": "list",
-                "placement": "bottom",
-                "showLegend": true,
-                "width": 250
-            },
-            "tooltip": {
-                "mode": "multi",
-                "sort": "desc"
-            }
-            },
-            
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "exemplar": false,
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "hide": false,
-                "instant": false,
-                "intervalFactor": 1,
-                "legendFormat": "Busy System",
-                "range": true,
-                "refId": "A",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "hide": false,
-                "intervalFactor": 1,
-                "legendFormat": "Busy User",
-                "range": true,
-                "refId": "B",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "Busy Iowait",
-                "range": true,
-                "refId": "C",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "Busy IRQs",
-                "range": true,
-                "refId": "D",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\",  mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "Busy Other",
-                "range": true,
-                "refId": "E",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "editorMode": "code",
-                "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "Idle",
-                "range": true,
-                "refId": "F",
-                "step": 240
-            }
-            ],
-            "title": "CPU Basic",
-            "type": "timeseries"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Basic memory usage",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "palette-classic"
-                },
-                "custom": {
-                "axisCenteredZero": false,
-                "axisColorMode": "text",
-                "axisLabel": "",
-                "axisPlacement": "auto",
-                "barAlignment": 0,
-                "drawStyle": "line",
-                "fillOpacity": 40,
-                "gradientMode": "none",
-                "hideFrom": {
-                    "legend": false,
-                    "tooltip": false,
-                    "viz": false
-                },
-                "lineInterpolation": "linear",
-                "lineWidth": 1,
-                "pointSize": 5,
-                "scaleDistribution": {
-                    "type": "linear"
-                },
-                "showPoints": "never",
-                "spanNulls": false,
-                "stacking": {
-                    "group": "A",
-                    "mode": "normal"
-                },
-                "thresholdsStyle": {
-                    "mode": "off"
-                }
-                },
-                "links": [],
-                "mappings": [],
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "bytes"
-            },
-            "overrides": [
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Apps"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#629E51",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Buffers"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#614D93",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Cache"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#6D1F62",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Cached"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#511749",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Committed"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#508642",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Free"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A437C",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#CFFAFF",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Inactive"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#584477",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "PageTables"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Page_Tables"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "RAM_Free"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#E0F9D7",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "SWAP Used"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Slab"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#806EB7",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Slab_Cache"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#E0752D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Swap"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Swap Used"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Swap_Cache"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#C15C17",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Swap_Free"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#2F575E",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Unused"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#EAB839",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "RAM Total"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#E0F9D7",
-                        "mode": "fixed"
-                    }
-                    },
-                    {
-                    "id": "custom.fillOpacity",
-                    "value": 0
-                    },
-                    {
-                    "id": "custom.stacking",
-                    "value": {
-                        "group": false,
-                        "mode": "normal"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "RAM Cache + Buffer"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#052B51",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "RAM Free"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#7EB26D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Available"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#DEDAF7",
-                        "mode": "fixed"
-                    }
-                    },
-                    {
-                    "id": "custom.fillOpacity",
-                    "value": 0
-                    },
-                    {
-                    "id": "custom.stacking",
-                    "value": {
-                        "group": false,
-                        "mode": "normal"
-                    }
-                    }
-                ]
-                }
-            ]
-            },
-            "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 12,
-            "y": 6
-            },
-            "id": 78,
-            "links": [],
-            "options": {
-            "legend": {
-                "calcs": [],
-                "displayMode": "list",
-                "placement": "bottom",
-                "showLegend": true,
-                "width": 350
-            },
-            "tooltip": {
-                "mode": "multi",
-                "sort": "none"
-            }
-            },
-            
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
-                "format": "time_series",
-                "hide": false,
-                "intervalFactor": 1,
-                "legendFormat": "RAM Total",
-                "refId": "A",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})",
-                "format": "time_series",
-                "hide": false,
-                "intervalFactor": 1,
-                "legendFormat": "RAM Used",
-                "refId": "B",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "RAM Cache + Buffer",
-                "refId": "C",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "RAM Free",
-                "refId": "D",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "SWAP Used",
-                "refId": "E",
-                "step": 240
-            }
-            ],
-            "title": "Memory Basic",
-            "type": "timeseries"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Basic network info per interface",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "palette-classic"
-                },
-                "custom": {
-                "axisCenteredZero": false,
-                "axisColorMode": "text",
-                "axisLabel": "",
-                "axisPlacement": "auto",
-                "barAlignment": 0,
-                "drawStyle": "line",
-                "fillOpacity": 40,
-                "gradientMode": "none",
-                "hideFrom": {
-                    "legend": false,
-                    "tooltip": false,
-                    "viz": false
-                },
-                "lineInterpolation": "linear",
-                "lineWidth": 1,
-                "pointSize": 5,
-                "scaleDistribution": {
-                    "type": "linear"
-                },
-                "showPoints": "never",
-                "spanNulls": false,
-                "stacking": {
-                    "group": "A",
-                    "mode": "none"
-                },
-                "thresholdsStyle": {
-                    "mode": "off"
-                }
-                },
-                "links": [],
-                "mappings": [],
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "bps"
-            },
-            "overrides": [
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_bytes_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#7EB26D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_bytes_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_drop_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#6ED0E0",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_drop_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#E0F9D7",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_errs_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Recv_errs_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#CCA300",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_bytes_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#7EB26D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_bytes_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_drop_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#6ED0E0",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_drop_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#E0F9D7",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_errs_eth2"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "Trans_errs_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#CCA300",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "recv_bytes_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "recv_drop_eth0"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#99440A",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "recv_drop_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#967302",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "recv_errs_eth0"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "recv_errs_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#890F02",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_bytes_eth0"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#7EB26D",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_bytes_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#0A50A1",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_drop_eth0"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#99440A",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_drop_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#967302",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_errs_eth0"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#BF1B00",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byName",
-                    "options": "trans_errs_lo"
-                },
-                "properties": [
-                    {
-                    "id": "color",
-                    "value": {
-                        "fixedColor": "#890F02",
-                        "mode": "fixed"
-                    }
-                    }
-                ]
-                },
-                {
-                "matcher": {
-                    "id": "byRegexp",
-                    "options": "/.*trans.*/"
-                },
-                "properties": [
-                    {
-                    "id": "custom.transform",
-                    "value": "negative-Y"
-                    }
-                ]
-                }
-            ]
-            },
-            "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 0,
-            "y": 13
-            },
-            "id": 74,
-            "links": [],
-            "options": {
-            "legend": {
-                "calcs": [],
-                "displayMode": "list",
-                "placement": "bottom",
-                "showLegend": true
-            },
-            "tooltip": {
-                "mode": "multi",
-                "sort": "none"
-            }
-            },
-            
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "recv {{ "{{" }}device{{ "}}" }}",
-                "refId": "A",
-                "step": 240
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "trans {{ "{{" }}device{{ "}}" }} ",
-                "refId": "B",
-                "step": 240
-            }
-            ],
-            "title": "Network Traffic Basic",
-            "type": "timeseries"
-        },
-        {
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "description": "Disk space used of all filesystems mounted",
-            "fieldConfig": {
-            "defaults": {
-                "color": {
-                "mode": "palette-classic"
-                },
-                "custom": {
-                "axisCenteredZero": false,
-                "axisColorMode": "text",
-                "axisLabel": "",
-                "axisPlacement": "auto",
-                "barAlignment": 0,
-                "drawStyle": "line",
-                "fillOpacity": 40,
-                "gradientMode": "none",
-                "hideFrom": {
-                    "legend": false,
-                    "tooltip": false,
-                    "viz": false
-                },
-                "lineInterpolation": "linear",
-                "lineWidth": 1,
-                "pointSize": 5,
-                "scaleDistribution": {
-                    "type": "linear"
-                },
-                "showPoints": "never",
-                "spanNulls": false,
-                "stacking": {
-                    "group": "A",
-                    "mode": "none"
-                },
-                "thresholdsStyle": {
-                    "mode": "off"
-                }
-                },
-                "links": [],
-                "mappings": [],
-                "max": 100,
-                "min": 0,
-                "thresholds": {
-                "mode": "absolute",
-                "steps": [
-                    {
-                    "color": "green",
-                    "value": null
-                    },
-                    {
-                    "color": "red",
-                    "value": 80
-                    }
-                ]
-                },
-                "unit": "percent"
-            },
-            "overrides": []
-            },
-            "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 12,
-            "y": 13
-            },
-            "id": 152,
-            "links": [],
-            "options": {
-            "legend": {
-                "calcs": [],
-                "displayMode": "list",
-                "placement": "bottom",
-                "showLegend": true
-            },
-            "tooltip": {
-                "mode": "multi",
-                "sort": "none"
-            }
-            },
-            
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})",
-                "format": "time_series",
-                "intervalFactor": 1,
-                "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }}",
-                "refId": "A",
-                "step": 240
-            }
-            ],
-            "title": "Disk Space Used Basic",
-            "type": "timeseries"
-        },
-        {
-            "collapsed": true,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 20
-            },
-            "id": 265,
-            "panels": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "percentage",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 70,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "smooth",
-                    "lineWidth": 2,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "percent"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "percentunit"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Idle - Waiting for something to happen"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Iowait - Waiting for I/O to complete"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Irq - Servicing interrupts"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Nice - Niced processes executing in user mode"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Softirq - Servicing softirqs"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Steal - Time spent in other operating systems when running in a virtualized environment"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCE2DE",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "System - Processes executing in kernel mode"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "User - Normal processes executing in user mode"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#5195CE",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 0,
-                "y": 21
-                },
-                "id": 3,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 250
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "desc"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "System - Processes executing in kernel mode",
-                    "range": true,
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "User - Normal processes executing in user mode",
-                    "range": true,
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"nice\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Nice - Niced processes executing in user mode",
-                    "range": true,
-                    "refId": "C",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum by(instance) (irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Iowait - Waiting for I/O to complete",
-                    "range": true,
-                    "refId": "E",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Irq - Servicing interrupts",
-                    "range": true,
-                    "refId": "F",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"softirq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Softirq - Servicing softirqs",
-                    "range": true,
-                    "refId": "G",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"steal\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment",
-                    "range": true,
-                    "refId": "H",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Idle - Waiting for something to happen",
-                    "range": true,
-                    "refId": "J",
-                    "step": 240
-                }
-                ],
-                "title": "CPU",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 40,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap - Swap memory usage"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused - Free memory unassigned"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Hardware Corrupted - *./"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.stacking",
-                        "value": {
-                            "group": false,
-                            "mode": "normal"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 12,
-                "y": 21
-                },
-                "id": 24,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Apps - Memory used by user-space applications",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified",
-                    "refId": "C",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)",
-                    "refId": "D",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Cache - Parked file data (file content) cache",
-                    "refId": "E",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Buffers - Block device (e.g. harddisk) cache",
-                    "refId": "F",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Unused - Free memory unassigned",
-                    "refId": "G",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Swap - Swap space used",
-                    "refId": "H",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working",
-                    "refId": "I",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Stack",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bits out (-) / in (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 40,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bps"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "receive_packets_eth0"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "receive_packets_lo"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "transmit_packets_eth0"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "transmit_packets_lo"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Trans.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 0,
-                "y": 33
-                },
-                "id": 84,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Receive",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Transmit",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Network Traffic",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 40,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 12,
-                "y": 33
-                },
-                "id": 156,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }}",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Disk Space Used",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "IO read (-) / write (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "iops"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Read.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 0,
-                "y": 45
-                },
-                "id": 229,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Reads completed",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Writes completed",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk IOps",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes read (-) / write (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 40,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "Bps"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "io time"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#890F02",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*read*./"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byType",
-                        "options": "time"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.axisPlacement",
-                        "value": "hidden"
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 12,
-                "y": 45
-                },
-                "id": 42,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Successfully read bytes",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Successfully written bytes",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "I/O Usage Read / Write",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "%util",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 40,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "percentunit"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "io time"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#890F02",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byType",
-                        "options": "time"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.axisPlacement",
-                        "value": "hidden"
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 0,
-                "y": 57
-                },
-                "id": 127,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [$__rate_interval])",
-                    "format": "time_series",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }}",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "I/O Utilization",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "percentage",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "bars",
-                    "fillOpacity": 70,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "smooth",
-                    "lineWidth": 2,
-                    "pointSize": 3,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "mappings": [],
-                    "max": 1,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "percentunit"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/^Guest - /"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#5195ce",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/^GuestNice - /"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#c15c17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 12,
-                "w": 12,
-                "x": 12,
-                "y": 57
-                },
-                "id": 319,
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "desc"
-                }
-                },
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[1m])))",
-                    "hide": false,
-                    "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system",
-                    "range": true,
-                    "refId": "A"
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "editorMode": "code",
-                    "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\", mode=\"nice\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[1m])))",
-                    "hide": false,
-                    "legendFormat": "GuestNice - Time spent running a niced guest  (virtual CPU for guest operating system)",
-                    "range": true,
-                    "refId": "B"
-                }
-                ],
-                "title": "CPU spent seconds in guests (VMs)",
-                "type": "timeseries"
-            }
-            ],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "CPU / Memory / Net / Disk",
-            "type": "row"
-        },
-        {
-            "collapsed": true,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 21
-            },
-            "id": 266,
-            "panels": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 54
-                },
-                "id": 136,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Inactive - Memory which has been less recently used.  It is more eligible to be reclaimed for other purposes",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Active / Inactive",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*CommitLimit - *./"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        },
-                        {
-                        "id": "custom.fillOpacity",
-                        "value": 0
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 54
-                },
-                "id": 135,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Committed_AS - Amount of memory presently allocated on the system",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "CommitLimit - Amount of  memory currently available to be allocated on the system",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Committed",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 64
-                },
-                "id": 191,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Inactive_file - File-backed memory on inactive LRU list",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Active_file - File-backed memory on active LRU list",
-                    "refId": "C",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs",
-                    "refId": "D",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Active / Inactive Detail",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 64
-                },
-                "id": 130,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Writeback - Memory which is actively being written back to disk",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Dirty - Memory which is waiting to get written back to the disk",
-                    "refId": "C",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Writeback and Dirty",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.fillOpacity",
-                        "value": 0
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.fillOpacity",
-                        "value": 0
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 74
-                },
-                "id": 138,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Mapped - Used memory in mapped pages files which have been mapped, such as libraries",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages",
-                    "refId": "C",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "ShmemPmdMapped - Amount of shared (shmem/tmpfs) memory backed by huge pages",
-                    "refId": "D",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Shared and Mapped",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 74
-                },
-                "id": 131,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Slab",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 84
-                },
-                "id": 70,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "VmallocChunk - Largest contiguous block of vmalloc area which is free",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "VmallocTotal - Total size of vmalloc memory area",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "VmallocUsed - Amount of vmalloc area which is used",
-                    "refId": "C",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Vmalloc",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 84
-                },
-                "id": 159,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Bounce - Memory used for block device bounce buffers",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Bounce",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Inactive *./"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 94
-                },
-                "id": 129,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "AnonHugePages - Memory in anonymous huge pages",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "AnonPages - Memory in user pages not backed by files",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Anonymous",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 94
-                },
-                "id": 160,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Kernel / CPU",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "pages",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 104
-                },
-                "id": 140,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages",
-                    "refId": "C",
-                    "step": 240
-                }
-                ],
-                "title": "Memory HugePages Counter",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 104
-                },
-                "id": 71,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "HugePages - Total size of the pool of huge pages",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Hugepagesize - Huge Page size",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory HugePages Size",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 114
-                },
-                "id": 128,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "DirectMap1G - Amount of pages mapped as this size",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "DirectMap2M - Amount of pages mapped as this size",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "DirectMap4K - Amount of pages mapped as this size",
-                    "refId": "C",
-                    "step": 240
-                }
-                ],
-                "title": "Memory DirectMap",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Apps"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#629E51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A437C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#CFFAFF",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "RAM_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#806EB7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#2F575E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Unused"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 114
-                },
-                "id": 137,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true,
-                    "width": 350
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Memory Unevictable and MLocked",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Active"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#99440A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Buffers"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#58140C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6D1F62",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Cached"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Committed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#508642",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Dirty"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Free"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#B7DBAB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Inactive"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Mapped"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "PageTables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Page_Tables"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Slab_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Swap_Cache"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#C15C17",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#511749",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total RAM + Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#052B51",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "Total Swap"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "VmallocUsed"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 124
-                },
-                "id": 132,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet committed to the storage",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Memory NFS",
-                "type": "timeseries"
-            }
-            ],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Memory Meminfo",
-            "type": "row"
-        },
-        {
-            "collapsed": true,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 28
-            },
-            "id": 270,
-            "panels": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The number (after merges) of I/O requests completed per second for the device",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "IO read (-) / write (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "iops"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Read.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 47
-                },
-                "id": 9,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Reads completed",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Writes completed",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk IOps Completed",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The number of bytes read from or written to the device per second",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes read (-) / write (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "Bps"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Read.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 47
-                },
-                "id": 33,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "format": "time_series",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Read bytes",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Written bytes",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk R/W Data",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "time. read (-) / write (+)",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 30,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "s"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Read.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 57
-                },
-                "id": 37,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Read wait time avg",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Write wait time avg",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk Average Wait Time",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The average queue length of the requests that were issued to the device",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "aqu-sz",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "none"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 57
-                },
-                "id": 35,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }}",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Average Queue Size",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The number of read and write requests merged per second that were queued to the device",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "I/Os",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "iops"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*Read.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 67
-                },
-                "id": 133,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Read merged",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Write merged",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk R/W Merged",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially.  But for devices  serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "%util",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 30,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "percentunit"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 67
-                },
-                "id": 36,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - IO",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - discard",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Time Spent Doing I/Os",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "Outstanding req.",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "none"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 77
-                },
-                "id": 34,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}",
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - IO now",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Instantaneous Queue Size",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "IOs",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "iops"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#7EB26D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EAB839",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#6ED0E0",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EF843C",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E24D42",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#584477",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda2_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BA43A9",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sda3_.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F4D598",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#0A50A1",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#BF1B00",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdb3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0752D",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#962D82",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#614D93",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdc3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#9AC48A",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#65C5DB",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9934E",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#EA6460",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde1.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#E0F9D7",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sdd2.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#FCEACA",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    },
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*sde3.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F9E2D2",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 77
-                },
-                "id": 301,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "interval": "",
-                    "intervalFactor": 4,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Discards completed",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}device{{ "}}" }} - Discards merged",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Disk IOps Discards completed / merged",
-                "type": "timeseries"
-            }
-            ],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Storage Disk",
-            "type": "row"
-        },
-        {
-            "collapsed": true,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 29
-            },
-            "id": 271,
-            "panels": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "bytes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "bytes"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 62
-                },
-                "id": 43,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - Available",
-                    "metric": "",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "hide": true,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - Free",
-                    "refId": "B",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "hide": true,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - Size",
-                    "refId": "C",
-                    "step": 240
-                }
-                ],
-                "title": "Filesystem space available",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "file nodes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 62
-                },
-                "id": 41,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - Free file nodes",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "File Nodes Free",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "files",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 72
-                },
-                "id": 28,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "single",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 4,
-                    "legendFormat": "Max open files",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "Open files",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "File Descriptor",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "file Nodes",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 72
-                },
-                "id": 219,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "hide": false,
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - File nodes total",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "File Nodes Size",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "counter",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "max": 1,
-                    "min": 0,
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byName",
-                        "options": "/ ReadOnly"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#890F02",
-                            "mode": "fixed"
-                        }
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 82
-                },
-                "id": 44,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
-                    "format": "time_series",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - ReadOnly",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}",
-                    "format": "time_series",
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}mountpoint{{ "}}" }} - Device error",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Filesystem in ReadOnly / Error",
-                "type": "timeseries"
-            }
-            ],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Storage Filesystem",
-            "type": "row"
-        },
-        {
-            "collapsed": true,
-            "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-            },
-            "gridPos": {
-            "h": 1,
-            "w": 24,
-            "x": 0,
-            "y": 33
-            },
-            "id": 279,
-            "panels": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "seconds",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "normal"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "s"
-                },
-                "overrides": []
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 0,
-                "y": 66
-                },
-                "id": 40,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}collector{{ "}}" }} - Scrape duration",
-                    "refId": "A",
-                    "step": 240
-                }
-                ],
-                "title": "Node Exporter Scrape Time",
-                "type": "timeseries"
-            },
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "description": "",
-                "fieldConfig": {
-                "defaults": {
-                    "color": {
-                    "mode": "palette-classic"
-                    },
-                    "custom": {
-                    "axisCenteredZero": false,
-                    "axisColorMode": "text",
-                    "axisLabel": "counter",
-                    "axisPlacement": "auto",
-                    "barAlignment": 0,
-                    "drawStyle": "line",
-                    "fillOpacity": 20,
-                    "gradientMode": "none",
-                    "hideFrom": {
-                        "legend": false,
-                        "tooltip": false,
-                        "viz": false
-                    },
-                    "lineInterpolation": "linear",
-                    "lineStyle": {
-                        "fill": "solid"
-                    },
-                    "lineWidth": 1,
-                    "pointSize": 5,
-                    "scaleDistribution": {
-                        "type": "linear"
-                    },
-                    "showPoints": "never",
-                    "spanNulls": false,
-                    "stacking": {
-                        "group": "A",
-                        "mode": "none"
-                    },
-                    "thresholdsStyle": {
-                        "mode": "off"
-                    }
-                    },
-                    "links": [],
-                    "mappings": [],
-                    "thresholds": {
-                    "mode": "absolute",
-                    "steps": [
-                        {
-                        "color": "green"
-                        },
-                        {
-                        "color": "red",
-                        "value": 80
-                        }
-                    ]
-                    },
-                    "unit": "short"
-                },
-                "overrides": [
-                    {
-                    "matcher": {
-                        "id": "byRegexp",
-                        "options": "/.*error.*/"
-                    },
-                    "properties": [
-                        {
-                        "id": "color",
-                        "value": {
-                            "fixedColor": "#F2495C",
-                            "mode": "fixed"
-                        }
-                        },
-                        {
-                        "id": "custom.transform",
-                        "value": "negative-Y"
-                        }
-                    ]
-                    }
-                ]
-                },
-                "gridPos": {
-                "h": 10,
-                "w": 12,
-                "x": 12,
-                "y": 66
-                },
-                "id": 157,
-                "links": [],
-                "options": {
-                "legend": {
-                    "calcs": [
-                    "mean",
-                    "lastNotNull",
-                    "max",
-                    "min"
-                    ],
-                    "displayMode": "table",
-                    "placement": "bottom",
-                    "showLegend": true
-                },
-                "tooltip": {
-                    "mode": "multi",
-                    "sort": "none"
-                }
-                },
-                "targets": [
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}collector{{ "}}" }} - Scrape success",
-                    "refId": "A",
-                    "step": 240
-                },
-                {
-                    "datasource": {
-                    "type": "prometheus",
-                    "uid": "PBFA97CFB590B2093"
-                    },
-                    "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}",
-                    "format": "time_series",
-                    "hide": false,
-                    "interval": "",
-                    "intervalFactor": 1,
-                    "legendFormat": "{{ "{{" }}collector{{ "}}" }} - Scrape textfile error (1 = true)",
-                    "refId": "B",
-                    "step": 240
-                }
-                ],
-                "title": "Node Exporter Scrape",
-                "type": "timeseries"
-            }
-            ],
-            "targets": [
-            {
-                "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-                },
-                "refId": "A"
-            }
-            ],
-            "title": "Node Exporter",
-            "type": "row"
-        }
-        ],
-        "refresh": "1m",
-        "revision": 1,
-        "schemaVersion": 38,
-        "style": "dark",
-        "tags": [],
-        "templating": {
-        "list": [
-            {
-            "current": {
-                "selected": false,
-                "text": "default",
-                "value": "default"
-            },
-            "hide": 0,
-            "includeAll": false,
-            "label": "Datasource",
-            "multi": false,
-            "name": "datasource",
-            "options": [],
-            "query": "prometheus",
-            "queryValue": "",
-            "refresh": 1,
-            "regex": "",
-            "skipUrlSync": false,
-            "type": "datasource"
-            },
-            {
-            "current": {},
-            "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-            },
-            "definition": "",
-            "hide": 0,
-            "includeAll": false,
-            "label": "Job",
-            "multi": false,
-            "name": "job",
-            "options": [],
-            "query": {
-                "query": "label_values(node_uname_info, job)",
-                "refId": "Prometheus-job-Variable-Query"
-            },
-            "refresh": 1,
-            "regex": "",
-            "skipUrlSync": false,
-            "sort": 1,
-            "tagValuesQuery": "",
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-            },
-            {
-            "current": {},
-            "datasource": {
-                "type": "prometheus",
-                "uid": "PBFA97CFB590B2093"
-            },
-            "definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
-            "hide": 0,
-            "includeAll": false,
-            "label": "Host",
-            "multi": false,
-            "name": "node",
-            "options": [],
-            "query": {
-                "query": "label_values(node_uname_info{job=\"$job\"}, instance)",
-                "refId": "Prometheus-node-Variable-Query"
-            },
-            "refresh": 1,
-            "regex": "",
-            "skipUrlSync": false,
-            "sort": 1,
-            "tagValuesQuery": "",
-            "tagsQuery": "",
-            "type": "query",
-            "useTags": false
-            },
-            {
-            "current": {
-                "selected": false,
-                "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+",
-                "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+"
-            },
-            "hide": 2,
-            "includeAll": false,
-            "multi": false,
-            "name": "diskdevices",
-            "options": [
-                {
-                "selected": true,
-                "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+",
-                "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+"
-                }
-            ],
-            "query": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+",
-            "skipUrlSync": false,
-            "type": "custom"
-            }
-        ]
-        },
-        "time": {
-            "from": "now-6h",
-            "to": "now"
-        },
-        "timepicker": {},
-        "timezone": "",
-        "title": "NodeExporter",
-        "uid": "d56e0ae7-48d5-481d-a2ea-3192da4d9e42",
-        "version": 5,
-        "weekStart": ""
-    }
 {{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/foundationdb.yaml b/simplyblock_core/scripts/charts/templates/foundationdb.yaml
index 1a3134e58..96d1c1979 100644
--- a/simplyblock_core/scripts/charts/templates/foundationdb.yaml
+++ b/simplyblock_core/scripts/charts/templates/foundationdb.yaml
@@ -2,20 +2,20 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: controller-manager
+  name: simplyblock-fdb-controller-manager
   labels:
-    control-plane: controller-manager
-    app: controller-manager
+    control-plane: simplyblock-fdb-controller-manager
+    app: simplyblock-fdb-controller-manager
 spec:
   selector:
     matchLabels:
-      app: controller-manager
+      app: simplyblock-fdb-controller-manager
   replicas: 1
   template:
     metadata:
       labels:
-        control-plane: controller-manager
-        app: controller-manager
+        control-plane: simplyblock-fdb-controller-manager
+        app: simplyblock-fdb-controller-manager
     spec:
       securityContext:
         runAsUser: 4059
@@ -28,7 +28,7 @@ spec:
           emptyDir: {}
         - name: fdb-binaries
           emptyDir: {}
-      serviceAccountName: controller-manager
+      serviceAccountName: simplyblock-fdb-controller-manager
       initContainers:
         - name: foundationdb-kubernetes-init-7-3
           image: foundationdb/fdb-kubernetes-monitor:7.3.63
@@ -51,7 +51,9 @@ spec:
       containers:
         - command:
             - /manager
-          image: foundationdb/fdb-kubernetes-operator:v2.13.0
+          args:
+            - "--health-probe-bind-address=:9443"
+          image: foundationdb/fdb-kubernetes-operator:v2.18.0
           name: manager
           env:
             - name: WATCH_NAMESPACE
@@ -86,13 +88,13 @@ spec:
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  name: controller-manager
+  name: simplyblock-fdb-controller-manager
 
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
-  name: manager-role
+  name: simplyblock-fdb-manager-role
 rules:
 - apiGroups:
   - ""
@@ -164,7 +166,7 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   creationTimestamp: null
-  name: manager-clusterrole
+  name: simplyblock-fdb-manager-clusterrole
 rules:
 - apiGroups:
   - ""
@@ -179,27 +181,27 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
   creationTimestamp: null
-  name: manager-rolebinding
+  name: simplyblock-fdb-manager-rolebinding
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: manager-role
+  name: simplyblock-fdb-manager-role
 subjects:
 - kind: ServiceAccount
-  name: controller-manager
+  name: simplyblock-fdb-controller-manager
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
   creationTimestamp: null
-  name: manager-clusterrolebinding
+  name: simplyblock-fdb-manager-clusterrolebinding
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: manager-clusterrole
+  name: simplyblock-fdb-manager-clusterrole
 subjects:
 - kind: ServiceAccount
-  name: controller-manager
+  name: simplyblock-fdb-controller-manager
   namespace: metadata.namespace
 
 ##### cluster file #################
@@ -213,7 +215,11 @@ spec:
     replacements:
       enabled: true
   faultDomain:
+  {{- if .Values.foundationdb.multiAZ }}
+    key: topology.kubernetes.io/zone
+  {{- else }}
     key: foundationdb.org/none
+  {{- end }}
   imageType: split
   labels:
     filterOnOwnerReference: false
@@ -224,16 +230,24 @@ spec:
     processGroupIDLabels:
     - foundationdb.org/fdb-process-group-id
   minimumUptimeSecondsForBounce: 60
+  databaseConfiguration:
+    redundancy_mode: triple
   processCounts:
+  {{- if .Values.foundationdb.multiAZ }}
+    cluster_controller: 1
+    log: 4
+    storage: 4
+    stateless: -1
+  {{- else }}
     cluster_controller: 1
     log: 3
     storage: 3
     stateless: -1
+  {{- end }}
   processes:
     general:
       customParameters:
       - knob_disable_posix_kernel_aio=1
-      - listen_address=0.0.0.0:4501
       podTemplate:
         spec:
           containers:
@@ -270,7 +284,7 @@ spec:
               runAsUser: 0
       volumeClaimTemplate:
         spec:
-          storageClassName: openebs-local-hostpath
+          storageClassName: local-hostpath
           accessModes:
             - ReadWriteOnce
           resources:
@@ -285,10 +299,10 @@ spec:
             resources:
               limits:
                 cpu: 500m
-                memory: 2Gi
+                memory: 4Gi
               requests:
                 cpu: 100m
-                memory: 512Mi
+                memory: 1Gi
             securityContext:
               runAsUser: 0
           affinity:
@@ -308,10 +322,10 @@ spec:
             resources:
               limits:
                 cpu: 500m
-                memory: 2Gi
+                memory: 4Gi
               requests:
                 cpu: 100m
-                memory: 512Mi
+                memory: 1Gi
             securityContext:
               runAsUser: 0
           affinity:
diff --git a/simplyblock_core/scripts/charts/templates/mongodb.yaml b/simplyblock_core/scripts/charts/templates/mongodb.yaml
index 740dd7642..6c004f314 100644
--- a/simplyblock_core/scripts/charts/templates/mongodb.yaml
+++ b/simplyblock_core/scripts/charts/templates/mongodb.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.observability.enabled }}
 apiVersion: mongodbcommunity.mongodb.com/v1
 kind: MongoDBCommunity
 metadata:
@@ -14,7 +15,7 @@ spec:
             name: data-volume
           spec:
             accessModes: [ "ReadWriteOnce" ]
-            storageClassName: openebs-local-hostpath
+            storageClassName: local-hostpath
             resources:
               requests:
                 storage: 5Gi
@@ -22,7 +23,7 @@ spec:
             name: logs-volume
           spec:
             accessModes: [ "ReadWriteOnce" ]
-            storageClassName: openebs-local-hostpath
+            storageClassName: local-hostpath
             resources:
               requests:
                 storage: 5Gi
@@ -51,4 +52,5 @@ metadata:
   name: admin-password
 type: Opaque
 stringData:
-  password: {{ .Values.monitoring.secret }}
+  password: {{ .Values.observability.secret }}
+{{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/monitoring_configmap.yaml b/simplyblock_core/scripts/charts/templates/monitoring_configmap.yaml
index cb4243493..7f621fbb1 100644
--- a/simplyblock_core/scripts/charts/templates/monitoring_configmap.yaml
+++ b/simplyblock_core/scripts/charts/templates/monitoring_configmap.yaml
@@ -1,13 +1,17 @@
-{{- if .Values.monitoring.enabled }}
-
+{{- $name := printf "%s-simplyblock-prometheus-config" .Release.Name -}}
+{{- $existing := (lookup "v1" "ConfigMap" .Release.Namespace $name) -}}
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: {{ .Release.Name }}-simplyblock-prometheus-config
+  name: {{ $name }}
   labels:
     app: simplyblock-prometheus
   namespace: {{ .Release.Namespace }}
 data:
+  {{- if $existing }}
+  prometheus.yml: |
+{{ index $existing.data "prometheus.yml" | indent 4 }}
+  {{- else }}
   prometheus.yml: |
     global:
       scrape_interval: 30s
@@ -15,7 +19,6 @@ data:
         monitor: 'codelab-monitor'
 
     scrape_configs:
-
       - job_name: 'cluster_metrics'
         static_configs:
           - targets: ['simplyblock-webappapi:5000']
@@ -24,14 +27,7 @@ data:
         basic_auth:
           username:
           password:
-
-      - job_name: 'node'
-        kubernetes_sd_configs:
-        - role: endpoints
-        relabel_configs:
-        - source_labels: [__meta_kubernetes_endpoints_name]
-          action: keep
-          regex: 'simplyblock-node-exporter'
+  {{- end }}
 
 ---
 apiVersion: v1
@@ -46,6 +42,7 @@ data:
     type: FILESYSTEM
     config:
       directory: /mnt/thanos
+{{- if .Values.observability.enabled }}
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -60,7 +57,7 @@ data:
     datasources:
     - name: Thanos
       type: prometheus
-      url: http://simplyblock-thanos-query:9091
+      url: http://simplyblock-thanos:9091
       isDefault: true
       access: proxy
       uid: PBFA97CFB590B2093
@@ -829,7 +826,7 @@ data:
             type: slack
             settings:
               username: grafana_bot
-              url: '{{ .Values.grafana.contactPoint }}'
+              url: '{{ .Values.observability.grafana.contactPoint }}'
               title: |
                 '{{ "{{" }} template "slack.title" . {{ "}}" }}'
               text: |
diff --git a/simplyblock_core/scripts/charts/templates/monitoring_ingress.yaml b/simplyblock_core/scripts/charts/templates/monitoring_ingress.yaml
index ec0e1ab80..bcccf4a35 100644
--- a/simplyblock_core/scripts/charts/templates/monitoring_ingress.yaml
+++ b/simplyblock_core/scripts/charts/templates/monitoring_ingress.yaml
@@ -1,4 +1,5 @@
-{{- if (not .Values.ingress.useDNS) }}
+{{- if .Values.ingress.enabled }}
+  {{- if not .Values.ingress.useDNS }}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
@@ -32,9 +33,8 @@ spec:
                 name: simplyblock-graylog
                 port:
                   number: 9000
-
 ---
-{{- else if .Values.ingress.useDNS }}
+  {{- else if .Values.ingress.useDNS }}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
@@ -77,4 +77,5 @@ spec:
                 name: simplyblock-graylog
                 port:
                   number: 9000
+  {{- end }}
 {{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml b/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml
index 9c0f46e1f..f54a9c2f5 100644
--- a/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml
+++ b/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.monitoring.enabled }}
+{{- if .Values.observability.enabled }}
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -46,7 +46,7 @@ spec:
             - name: GRAYLOG_ELASTICSEARCH_HOSTS
               value: "http://opensearch-cluster-master:9200"
             - name: GRAYLOG_MONGODB_URI
-              value: "mongodb://admin:{{ .Values.monitoring.secret }}@simplyblock-mongo-svc:27017/graylog"
+              value: "mongodb://admin:{{ .Values.observability.secret }}@simplyblock-mongo-svc:27017/graylog"
             - name: GRAYLOG_SKIP_PREFLIGHT_CHECKS
               value: "true"
             - name: GRAYLOG_ROTATION_STRATEGY
@@ -68,6 +68,8 @@ spec:
               value: "false"
             - name: GRAYLOG_ELASTICSEARCH_REPLICAS
               value: "1"
+            - name: GRAYLOG_MESSAGE_JOURNAL_MAX_SIZE
+              value: "10gb"
           ports:
             - containerPort: 5044
             - containerPort: 5140
@@ -103,30 +105,37 @@ spec:
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: simplyblock-thanos-store
+  name: simplyblock-thanos
   namespace: {{ .Release.Namespace }}
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: simplyblock-thanos-store
+      app: simplyblock-thanos
   template:
     metadata:
       labels:
-        app: simplyblock-thanos-store
-    spec:      
+        app: simplyblock-thanos
+    spec:
       containers:
         - name: thanos-store
           image: thanosio/thanos:v0.31.0
           args:
             - store
+            - --grpc-address=0.0.0.0:10901
+            - --http-address=0.0.0.0:10902
             - --objstore.config-file=/etc/thanos/objstore.yml
             - --index-cache-size=500MB
             - --chunk-pool-size=500MB
+          ports:
+            - name: grpc
+              containerPort: 10901
+            - name: http
+              containerPort: 10902
           volumeMounts:
             - name: objstore-config
               mountPath: /etc/thanos
-            - name: thanos-data
+            - name: data
               mountPath: /data
           resources:
             requests:
@@ -135,37 +144,20 @@ spec:
             limits:
               cpu: "250m"
               memory: "1Gi"
-      volumes:
-        - name: objstore-config
-          configMap:
-            name: simplyblock-objstore-config
-        - name: thanos-data
-          emptyDir: {}
-          
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-thanos-query
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-thanos-query
-  template:
-    metadata:
-      labels:
-        app: simplyblock-thanos-query
-    spec:      
-      containers:
+
         - name: thanos-query
           image: thanosio/thanos:v0.31.0
           args:
             - query
+            - --grpc-address=0.0.0.0:10911
             - --http-address=0.0.0.0:9091
-            - --store=simplyblock-thanos-store:10901
+            - --store=simplyblock-thanos:10901
             - --store=simplyblock-prometheus:10901
+          ports:
+            - containerPort: 9091
+              name: http
+            - containerPort: 10911
+              name: grpc
           resources:
             requests:
               cpu: "100m"
@@ -174,28 +166,11 @@ spec:
               cpu: "250m"
               memory: "1Gi"
 
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: simplyblock-thanos-compactor
-  namespace: {{ .Release.Namespace }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: simplyblock-thanos-compactor
-  template:
-    metadata:
-      labels:
-        app: simplyblock-thanos-compactor
-    spec:
-      
-      containers:
         - name: thanos-compactor
           image: thanosio/thanos:v0.31.0
           args:
             - compact
+            - --http-address=0.0.0.0:10922
             - --data-dir=/data
             - --objstore.config-file=/etc/thanos/objstore.yml
             - --retention.resolution-raw=30d
@@ -203,10 +178,13 @@ spec:
             - --retention.resolution-1h=90d
             - --compact.concurrency=1
             - --wait
+          ports:
+            - containerPort: 10922
+              name: http
           volumeMounts:
             - name: objstore-config
               mountPath: /etc/thanos
-            - name: compactor-data
+            - name: data
               mountPath: /data
           resources:
             requests:
@@ -215,72 +193,14 @@ spec:
             limits:
               cpu: "250m"
               memory: "1Gi"
+
       volumes:
         - name: objstore-config
           configMap:
             name: simplyblock-objstore-config
-        - name: compactor-data
-          emptyDir: {}
 
----
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
-  name: simplyblock-node-exporter
-  namespace: {{ .Release.Namespace }}
-spec:
-  selector:
-    matchLabels:
-      app: simplyblock-node-exporter
-  template:
-    metadata:
-      labels:
-        app: simplyblock-node-exporter
-    spec:      
-      containers:
-        - name: node-exporter
-          image: prom/node-exporter:v1.7.0
-          args:
-            - '--path.procfs=/host/proc'
-            - '--path.sysfs=/host/sys'
-            - '--path.rootfs=/host/root'
-            - '--collector.filesystem.ignored-mount-points=^(/rootfs|/host|)/(sys|proc|dev|host|etc|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)'
-            - '--collector.filesystem.ignored-fs-types=^(sys|proc|auto|cgroup|devpts|ns|au|fuse.lxc|mqueue)(fs|)$'
-            - '--no-collector.ipvs'
-            - '--web.listen-address=:9200'
-          ports:
-            - containerPort: 9200
-              protocol: TCP
-          volumeMounts:
-            - name: proc
-              mountPath: /host/proc
-              readOnly: true
-              mountPropagation: HostToContainer
-            - name: sys
-              mountPath: /host/sys
-              mountPropagation: HostToContainer
-              readOnly: true
-            - name: root
-              mountPath: /host/root
-              mountPropagation: HostToContainer
-              readOnly: true
-          resources:
-            requests:
-              cpu: "100m"
-              memory: "256Mi"
-            limits:
-              cpu: "250m"
-              memory: "1Gi"
-      volumes:
-        - name: proc
-          hostPath:
-            path: /proc
-        - name: sys
-          hostPath:
-            path: /sys
-        - name: root
-          hostPath:
-            path: /
+        - name: data
+          emptyDir: {}
 
 ---
 apiVersion: apps/v1
@@ -343,9 +263,6 @@ spec:
             - name: dashboard-pools
               mountPath: /var/lib/grafana/dashboards/pools.json
               subPath: pools.json
-            - name: dashboard-node-exporter
-              mountPath: /var/lib/grafana/dashboards/node-exporter.json
-              subPath: node-exporter.json
             - name: grafana-data
               mountPath: /var/lib/grafana
       volumes:
@@ -373,9 +290,6 @@ spec:
         - name: dashboard-pools
           configMap:
             name: simplyblock-grafana-dashboard-pools
-        - name: dashboard-node-exporter
-          configMap:
-            name: simplyblock-grafana-dashboard-node-exporter
         - name: grafana-data
           emptyDir: {}
 {{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/monitoring_secret.yaml b/simplyblock_core/scripts/charts/templates/monitoring_secret.yaml
index c39735159..df741f026 100644
--- a/simplyblock_core/scripts/charts/templates/monitoring_secret.yaml
+++ b/simplyblock_core/scripts/charts/templates/monitoring_secret.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.monitoring.enabled }}
+{{- if .Values.observability.enabled }}
 apiVersion: v1
 kind: Secret
 metadata:
@@ -6,8 +6,8 @@ metadata:
   namespace: {{ .Release.Namespace }}
 type: Opaque
 stringData:
-  MONITORING_SECRET: "{{ .Values.monitoring.secret }}"
-  GRAFANA_ENDPOINT: "{{ .Values.grafana.endpoint }}"
+  MONITORING_SECRET: "{{ .Values.observability.secret }}"
+  GRAFANA_ENDPOINT: "{{ .Values.observability.grafana.endpoint }}"
 
 ---
 apiVersion: v1
@@ -17,7 +17,7 @@ metadata:
   namespace: {{ .Release.Namespace }}
 type: Opaque
 stringData:
-  GRAYLOG_PASSWORD_SECRET: "{{ .Values.graylog.passwordSecret }}"
-  GRAYLOG_ROOT_PASSWORD_SHA2: "{{ .Values.graylog.rootPasswordSha2 }}"
-  MAX_NUMBER_OF_INDICES: "{{ .Values.log.maxNumberIndex }}"
+  GRAYLOG_PASSWORD_SECRET: "{{ .Values.observability.graylog.passwordSecret }}"
+  GRAYLOG_ROOT_PASSWORD_SHA2: "{{ .Values.observability.graylog.rootPasswordSha2 }}"
+  MAX_NUMBER_OF_INDICES: "{{ .Values.observability.graylog.maxNumberIndex }}"
 {{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/monitoring_svc.yaml b/simplyblock_core/scripts/charts/templates/monitoring_svc.yaml
index 55b15dccc..5a0936434 100644
--- a/simplyblock_core/scripts/charts/templates/monitoring_svc.yaml
+++ b/simplyblock_core/scripts/charts/templates/monitoring_svc.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.monitoring.enabled }}
+{{- if .Values.observability.enabled }}
 ---
 apiVersion: v1
 kind: Service
@@ -25,44 +25,19 @@ spec:
 apiVersion: v1
 kind: Service
 metadata:
-  name: simplyblock-thanos-store
+  name: simplyblock-thanos
   namespace: {{ .Release.Namespace }}
 spec:
   selector:
-    app: simplyblock-thanos-store
+    app: simplyblock-thanos
   ports:
-    - name: thanos-store
+    - name: store
       port: 10901
       targetPort: 10901
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: simplyblock-thanos-query
-  namespace: {{ .Release.Namespace }}
-spec:
-  selector:
-    app: simplyblock-thanos-query
-  ports:
-    - name: thanos-query
+    - name: query
       port: 9091
       targetPort: 9091
 
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: simplyblock-node-exporter
-  namespace: {{ .Release.Namespace }}
-spec:
-  selector:
-    app: simplyblock-node-exporter
-  ports:
-    - name: simplyblock-node-exporter
-      protocol: TCP
-      port: 9200
-      targetPort: 9200
-
 ---
 apiVersion: v1
 kind: Service
diff --git a/simplyblock_core/scripts/charts/templates/simplyblock-manager.yaml b/simplyblock_core/scripts/charts/templates/simplyblock-manager.yaml
new file mode 100644
index 000000000..cca5e522d
--- /dev/null
+++ b/simplyblock_core/scripts/charts/templates/simplyblock-manager.yaml
@@ -0,0 +1,199 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: simplyblock-manager
+  labels:
+    control-plane: simplyblock-manager
+    app: simplyblock-manager
+spec:
+  selector:
+    matchLabels:
+      app: simplyblock-manager
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        control-plane: simplyblock-manager
+        app: simplyblock-manager
+    spec:
+      securityContext:
+        runAsUser: 65532
+        runAsGroup: 65532
+        fsGroup: 65532
+      serviceAccountName: simplyblock-manager
+      containers:
+        - image: simplyblock/simplyblock-manager:snapshot_replication
+          imagePullPolicy: Always
+          name: manager
+          env:
+            - name: WATCH_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          resources:
+            limits:
+              cpu: 500m
+              memory: 256Mi
+            requests:
+              cpu: 500m
+              memory: 256Mi
+          securityContext:
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            privileged: false
+      terminationGracePeriodSeconds: 10
+
+################# ROLE AND ROLE BINDING ##############################
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: simplyblock-manager
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: simplyblock-manager-clusterrole
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  - events
+  - persistentvolumeclaims
+  - pods
+  - pods/exec
+  - namespaces
+  - secrets
+  - services
+  - serviceaccounts
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - deployments
+  - daemonsets
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - batch
+  resources:
+  - jobs
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - get
+  - list
+  - watch
+  - update
+  - patch
+- apiGroups:
+  - "rbac.authorization.k8s.io"
+  resources:
+  - roles
+  - clusterroles
+  verbs:
+  - create
+  - get
+  - list
+  - watch
+  - update
+  - patch
+- apiGroups:
+  - "rbac.authorization.k8s.io"
+  resources:
+  - rolebindings
+  - clusterrolebindings
+  verbs:
+  - create
+  - get
+  - list
+  - watch
+  - update
+  - patch
+- apiGroups:
+  - simplyblock.simplyblock.io
+  resources:
+  - simplyblockpools
+  - simplyblocklvols
+  - simplyblockstorageclusters
+  - simplyblockstoragenodes
+  - simplyblockdevices
+  - simplyblocktasks
+  - simplyblocksnapshotreplications
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - simplyblock.simplyblock.io
+  resources:
+  - simplyblockpools/finalizers
+  - simplyblocklvols/finalizers
+  - simplyblockstorageclusters/finalizers
+  - simplyblockstoragenodes/finalizers
+  - simplyblockdevices/finalizers
+  - simplyblocktasks/finalizers
+  - simplyblocksnapshotreplications/finalizers
+  verbs:
+  - update
+  - delete
+- apiGroups:
+  - simplyblock.simplyblock.io
+  resources:
+  - simplyblockpools/status
+  - simplyblocklvols/status
+  - simplyblockstorageclusters/status
+  - simplyblockstoragenodes/status
+  - simplyblockdevices/status
+  - simplyblocktasks/status
+  - simplyblocksnapshotreplications/status
+  verbs:
+  - get
+  - patch
+  - update
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  creationTimestamp: null
+  name: simplyblock-manager-clusterrolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: simplyblock-manager-clusterrole
+subjects:
+- kind: ServiceAccount
+  name: simplyblock-manager
+  namespace: {{ .Release.Namespace }}
+  
\ No newline at end of file
diff --git a/simplyblock_core/scripts/charts/templates/simplyblock_customresource.yaml b/simplyblock_core/scripts/charts/templates/simplyblock_customresource.yaml
new file mode 100644
index 000000000..eb360b60a
--- /dev/null
+++ b/simplyblock_core/scripts/charts/templates/simplyblock_customresource.yaml
@@ -0,0 +1,145 @@
+{{- if .Values.simplyblock.cluster }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockStorageCluster
+metadata:
+  name: {{ .Values.simplyblock.cluster.clusterName }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+
+  {{- if .Values.simplyblock.cluster.mgmtIfc }}
+  mgmtIfc: {{ .Values.simplyblock.cluster.mgmtIfc }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.cluster.fabric }}
+  fabric: {{ .Values.simplyblock.cluster.fabric }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.cluster "isSingleNode" }}
+  isSingleNode: {{ .Values.simplyblock.cluster.isSingleNode }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.cluster "enableNodeAffinity" }}
+  enableNodeAffinity: {{ .Values.simplyblock.cluster.enableNodeAffinity }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.cluster "strictNodeAntiAffinity" }}
+  strictNodeAntiAffinity: {{ .Values.simplyblock.cluster.strictNodeAntiAffinity }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.cluster.capWarn }}
+  capWarn: {{ .Values.simplyblock.cluster.capWarn }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.cluster.capCrit }}
+  capCrit: {{ .Values.simplyblock.cluster.capCrit }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.cluster.provCapWarn }}
+  provCapWarn: {{ .Values.simplyblock.cluster.provCapWarn }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.cluster.provCapCrit }}
+  provCapCrit: {{ .Values.simplyblock.cluster.provCapCrit }}
+  {{- end }}
+{{- end }}
+
+---
+{{- if .Values.simplyblock.pool }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockPool
+metadata:
+  name: {{ .Values.simplyblock.pool.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  name: {{ .Values.simplyblock.pool.name }}
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+
+  {{- if .Values.simplyblock.pool.capacityLimit }}
+  capacityLimit: {{ .Values.simplyblock.pool.capacityLimit | quote }}
+  {{- end }}
+{{- end }}
+
+---
+{{- if .Values.simplyblock.lvol }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockLvol
+metadata:
+  name: {{ .Values.simplyblock.lvol.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+  poolName: {{ .Values.simplyblock.pool.name }}
+{{- end }}
+
+---
+{{- if .Values.simplyblock.storageNodes }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockStorageNode
+metadata:
+  name: {{ .Values.simplyblock.storageNodes.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+
+  {{- if .Values.simplyblock.storageNodes.clusterImage }}
+  clusterImage: {{ .Values.simplyblock.storageNodes.clusterImage }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.storageNodes.mgmtIfc }}
+  mgmtIfc: {{ .Values.simplyblock.storageNodes.mgmtIfc }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.storageNodes.maxLVol }}
+  maxLVol: {{ .Values.simplyblock.storageNodes.maxLVol }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.storageNodes.maxSize }}
+  maxSize: {{ .Values.simplyblock.storageNodes.maxSize | quote }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.storageNodes "partitions" }}
+  partitions: {{ .Values.simplyblock.storageNodes.partitions }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.storageNodes.corePercentage }}
+  corePercentage: {{ .Values.simplyblock.storageNodes.corePercentage }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.storageNodes "spdkDebug" }}
+  spdkDebug: {{ .Values.simplyblock.storageNodes.spdkDebug }}
+  {{- end }}
+
+  {{- if hasKey .Values.simplyblock.storageNodes "coreIsolation" }}
+  coreIsolation: {{ .Values.simplyblock.storageNodes.coreIsolation }}
+  {{- end }}
+
+  {{- if .Values.simplyblock.storageNodes.workerNodes }}
+  workerNodes:
+  {{- range .Values.simplyblock.storageNodes.workerNodes }}
+    - {{ . }}
+  {{- end }}
+  {{- end }}
+{{- end }}
+
+---
+{{- if .Values.simplyblock.devices }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockDevice
+metadata:
+  name: {{ .Values.simplyblock.devices.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+{{- end }}
+
+---
+{{- if .Values.simplyblock.tasks }}
+apiVersion: simplyblock.simplyblock.io/v1alpha1
+kind: SimplyBlockTask
+metadata:
+  name: {{ .Values.simplyblock.tasks.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterName: {{ .Values.simplyblock.cluster.clusterName }}
+{{- end }}
diff --git a/simplyblock_core/scripts/charts/templates/storage_class.yaml b/simplyblock_core/scripts/charts/templates/storage_class.yaml
index 64e5e6280..b23cb4a07 100644
--- a/simplyblock_core/scripts/charts/templates/storage_class.yaml
+++ b/simplyblock_core/scripts/charts/templates/storage_class.yaml
@@ -2,9 +2,22 @@
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
-  name: openebs-local-hostpath
-provisioner: openebs.io/local
+  name: local-hostpath
+  labels:
+    app.kubernetes.io/instance: hostpath.csi.k8s.io
+    app.kubernetes.io/part-of: csi-driver-host-path
+    app.kubernetes.io/name: csi-hostpath-fast
+    app.kubernetes.io/component: storageclass
+provisioner: hostpath.csi.k8s.io
 allowVolumeExpansion: true
 reclaimPolicy: Retain
 volumeBindingMode: WaitForFirstConsumer
-  
+{{- if .Values.storageclass.allowedTopologyZones }}
+allowedTopologies:
+- matchLabelExpressions:
+  - key: topology.kubernetes.io/zone
+    values:
+{{- range .Values.storageclass.allowedTopologyZones }}
+    - {{ . }}
+{{- end }}
+{{- end }}
diff --git a/simplyblock_core/scripts/charts/values-template.yaml b/simplyblock_core/scripts/charts/values-template.yaml
deleted file mode 100644
index 79693e7cd..000000000
--- a/simplyblock_core/scripts/charts/values-template.yaml
+++ /dev/null
@@ -1,194 +0,0 @@
-graylog:
-  rootPasswordSha2: "${GRAYLOG_ROOT_PASSWORD_SHA2}"
-  passwordSecret: "${GRAYLOG_PASSWORD_SECRET}"
-
-cluster:
-  secret: "${CLUSTER_SECRET}"
-  id: "${CLUSTER_ID}"
-  ip: "${CLUSTER_IP}"
-
-monitoring:
-  enabled: ${ENABLE_MONITORING}
-
-log:
-  deletionInterval: "${LOG_DELETION_INTERVAL}"
-  retentionPeriod: "${RETENTION_PERIOD}"
-  level: "${LOG_LEVEL}"
-  maxNumberIndex: "${MAX_NUMBER_OF_INDICES}"
-
-grafana:
-  endpoint: "${GRAFANA_ENDPOINT}"
-  contactPoint: "${CONTACT_POINT}"
-
-image:
-  simplyblock: 
-    repository: "${SIMPLYBLOCK_REPOSITORY}"
-    tag: "${SIMPLYBLOCK_TAG}"
-    pullPolicy: "Always"
-
-openebs:
-  enabled: true
-
-mongodb:
-  name: "simplyblock-mongodb"
-  deployment_name: "simplyblock-mongodb"
-  resources:
-    requests:
-      cpu: 100m
-      memory: 300Mi
-    limits:
-      cpu: 250m
-      memory: 1Gi
-  affinity:
-    podAntiAffinity:
-      requiredDuringSchedulingIgnoredDuringExecution:
-        - labelSelector:
-            matchExpressions:
-              - key: app.kubernetes.io/component
-                operator: In
-                values:
-                  - mongodb
-          topologyKey: "kubernetes.io/hostname"
-
-opensearch:
-  fullnameOverride: "simplyblock-opensearch"
-  singleNode: true
-  replicas: 1
-
-  antiAffinity: "hard"
-  persistence:
-    enabled: true
-    storageClass: openebs-local-hostpath
-    size: 10Gi
-
-  resources:
-    requests:
-      cpu: "100m"
-      memory: "512Mi"
-    limits:
-      cpu: "500m"
-      memory: "3Gi"
-
-  extraEnvs:
-    - name: OPENSEARCH_JAVA_OPTS
-      value: "-Xms1g -Xmx1g"
-    - name: bootstrap.memory_lock
-      value: "true"
-    - name: action.auto_create_index
-      value: "false"
-    - name: plugins.security.ssl.http.enabled
-      value: "false"
-    - name: plugins.security.disabled
-      value: "true"
-
-  securityConfig:
-    enabled: false
-
-prometheus:
-  server:
-    fullnameOverride: simplyblock-prometheus
-    enabled: true
-    statefulSet:
-      enabled: true
-    name: simplyblock-prometheus
-    replicaCount: 1
-    podLabels:
-      app: simplyblock-prometheus
-    podAnnotations: {}
-    affinity:
-      podAntiAffinity:
-        requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-                - key: app.kubernetes.io/component
-                  operator: In
-                  values:
-                    - simplyblock-prometheus
-            topologyKey: "kubernetes.io/hostname"
-    service:
-      servicePort: 9090
-      type: ClusterIP
-      gRPC:
-        enabled: true
-        servicePort: 10901
-      additionalPorts:
-        - name: http-thanos
-          port: 10902
-          targetPort: 10902
-          protocol: TCP
-    securityContext:
-      fsGroup: 65534
-    persistentVolume:
-      enabled: true
-      size: 5Gi
-      storageClass: openebs-local-hostpath
-    extraArgs:
-      storage.tsdb.min-block-duration: 2h
-      storage.tsdb.max-block-duration: 2h
-    sidecarContainers:
-      thanos-sidecar:
-        image: thanosio/thanos:v0.31.0
-        args:
-          - sidecar
-          - --tsdb.path=/prometheus
-          - --prometheus.url=http://localhost:9090
-          - --objstore.config-file=/etc/thanos/objstore.yml
-        ports:
-          - name: grpc
-            containerPort: 10901
-          - name: http
-            containerPort: 10902
-        volumeMounts:
-          - name: storage-volume
-            mountPath: /prometheus
-          - name: objstore-config
-            mountPath: /etc/thanos
-        resources:
-          requests:
-            cpu: "100m"
-            memory: "256Mi"
-          limits:
-            cpu: "250m"
-            memory: "1Gi"
-    resources:
-      requests:
-        cpu: "100m"
-        memory: "512Mi"
-      limits:
-        cpu: "500m"
-        memory: "1Gi"
-    configMapOverrideName: simplyblock-prometheus-config
-    extraVolumes:
-      - name: objstore-config
-        configMap:
-          name: simplyblock-objstore-config
-  alertmanager:
-    enabled: false
-
-  prometheus-pushgateway:
-    enabled: false
-
-  prometheus-node-exporter:
-    enabled: false
-
-  kube-state-metrics:
-    enabled: false
-
-ingress:
-  enabled: true
-  ingressClassName: nginx
-  useDNS: ${USE_DNS}
-  host: "${DNS_NAME}"
-  tlsSecret: ${TLS_SECRET}
-  controller:
-    hostNetwork: ${USE_HOST}  
-    dnsPolicy: ClusterFirstWithHostNet
-    service:
-      type: ${SERVICE_TYPE}
-      nodePorts:
-        tcp:
-          4501: 32451 
-    extraArgs:
-      tcp-services-configmap: "${K8S_NAMESPACE}/simplyblock-tcp-services"
-    nodeSelector:
-      simplyblock.io/role: mgmt-plane
diff --git a/simplyblock_core/scripts/charts/values.yaml b/simplyblock_core/scripts/charts/values.yaml
index 467734176..3c17f041e 100644
--- a/simplyblock_core/scripts/charts/values.yaml
+++ b/simplyblock_core/scripts/charts/values.yaml
@@ -1,32 +1,32 @@
-graylog:
-  rootPasswordSha2: "b87c15a8ae4736d771ca60a7cc2014baaeab19b11c31f5fedef9421958a403c9"
-  passwordSecret: "is6SP2EdWg0NdmVGv6CEp5hRHNL7BKVMFem4t9pouMqDQnHwXMSomas1qcbKSt5yISr8eBHv4Y7Dbswhyz84Ut0TW6kqsiPs"
 
-monitoring:
-  enabled: true
+observability:
+  enabled: false
   secret: "sWbpOgba1bKnCfcPkVQi"
-
-log:
   deletionInterval: "3d"
-  retentionPeriod: "7d"
   level: "DEBUG"
-  maxNumberIndex: "3"
-
-grafana:
-  endpoint: ""
-  contactPoint: "https://hooks.slack.com/services/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"
+  graylog:
+    rootPasswordSha2: "b87c15a8ae4736d771ca60a7cc2014baaeab19b11c31f5fedef9421958a403c9"
+    passwordSecret: "is6SP2EdWg0NdmVGv6CEp5hRHNL7BKVMFem4t9pouMqDQnHwXMSomas1qcbKSt5yISr8eBHv4Y7Dbswhyz84Ut0TW6kqsiPs"
+    maxNumberIndex: "3"
+    retentionPeriod: "7d"
+  grafana:
+    endpoint: ""
+    contactPoint: "https://hooks.slack.com/services/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"
 
 image:
   simplyblock: 
     repository: "public.ecr.aws/simply-block/simplyblock"
-    tag: "main"
+    tag: "main-sfam-2359"
     pullPolicy: "Always"
 
 ports:
-  lvolNvmfPortStart:
-  
-openebs:
-  enabled: true
+  lvolNvmfPortStart: 9100
+
+storageclass:  
+  allowedTopologyZones: []
+
+foundationdb:
+  multiAZ: false
 
 mongodb:
   name: "simplyblock-mongodb"
@@ -57,8 +57,8 @@ opensearch:
   antiAffinity: "hard"
   persistence:
     enabled: true
-    storageClass: openebs-local-hostpath
-    size: 10Gi
+    storageClass: local-hostpath
+    size: 20Gi
 
   resources:
     requests:
@@ -123,7 +123,7 @@ prometheus:
     persistentVolume:
       enabled: true
       size: 5Gi
-      storageClass: openebs-local-hostpath
+      storageClass: local-hostpath
     extraArgs:
       storage.tsdb.min-block-duration: 2h
       storage.tsdb.max-block-duration: 2h
@@ -177,7 +177,7 @@ prometheus:
     enabled: false
 
 ingress:
-  enabled: true
+  enabled: false
   ingressClassName: nginx
   useDNS: false
   host: ""
@@ -185,8 +185,61 @@ ingress:
   controller:
     hostNetwork: true  
     dnsPolicy: ClusterFirstWithHostNet
+    replicaCount: 2
     service:
       type: ClusterIP
     extraArgs:
       tcp-services-configmap: "simplyblock/simplyblock-tcp-services"
+    affinity:
+      podAntiAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          - labelSelector:
+              matchExpressions:
+                - key: app.kubernetes.io/name
+                  operator: In
+                  values:
+                    - ingress
+            topologyKey: "kubernetes.io/hostname"
     nodeSelector: {}
+
+
+simplyblock:
+  cluster:
+    clusterName: simplyblock-cluster
+    mgmtIfc: eth0
+    fabric: tcp
+    isSingleNode: false
+    enableNodeAffinity: false
+    strictNodeAntiAffinity: false
+    capWarn: 80
+    capCrit: 90
+    provCapWarn: 120
+    provCapCrit: 150
+
+  pool:
+    name: simplyblock-pool
+    capacityLimit: 100Gi
+
+  lvol:
+    name: simplyblock-lvol
+    
+  storageNodes:
+    name: simplyblock-node
+    clusterImage: public.ecr.aws/simply-block/simplyblock:main-sfam-2359
+    mgmtIfc: eth0
+    maxLVol: 10
+    maxSize: 0
+    partitions: 0
+    corePercentage: 65
+    spdkDebug: false
+    coreIsolation: false
+    workerNodes:
+      - israel-storage-node-1
+      - israel-storage-node-2
+      - israel-storage-node-3
+
+  devices:
+    name: simplyblock-devices
+
+  tasks:
+    name: simplyblock-task
diff --git a/simplyblock_core/scripts/config_docker.sh b/simplyblock_core/scripts/config_docker.sh
index 9f75cdde3..590664ca7 100644
--- a/simplyblock_core/scripts/config_docker.sh
+++ b/simplyblock_core/scripts/config_docker.sh
@@ -38,7 +38,7 @@ create_override ${DEV_IP}
 sudo systemctl daemon-reload
 sudo systemctl restart docker
 
-activate-global-python-argcomplete --user
+activate-global-python-argcomplete --user -y
 if [ ! -s "$HOME/.bashrc" ] ||  [ -z "$(grep "source $HOME/.bash_completion" $HOME/.bashrc)" ]
 then
   echo -e "\nsource $HOME/.bash_completion\n" >> $HOME/.bashrc
diff --git a/simplyblock_core/scripts/docker-compose-swarm.yml b/simplyblock_core/scripts/docker-compose-swarm.yml
index ba0f8b61d..e407d89d7 100644
--- a/simplyblock_core/scripts/docker-compose-swarm.yml
+++ b/simplyblock_core/scripts/docker-compose-swarm.yml
@@ -130,6 +130,7 @@ services:
       - 80:80
       - 12202:12202
       - 9200:9200
+      - 9090:9090
     networks:
       - localnet
       - monitoring-net
@@ -349,6 +350,34 @@ services:
     environment:
       SIMPLYBLOCK_LOG_LEVEL: "$LOG_LEVEL"
 
+  TasksRunnerLVolSyncDelete:
+    <<: *service-base
+    image: $SIMPLYBLOCK_DOCKER_IMAGE
+    command: "python simplyblock_core/services/tasks_runner_sync_lvol_del.py"
+    deploy:
+      placement:
+        constraints: [node.role == manager]
+    volumes:
+      - "/etc/foundationdb:/etc/foundationdb"
+    networks:
+      - hostnet
+    environment:
+      SIMPLYBLOCK_LOG_LEVEL: "$LOG_LEVEL"
+
+  SnapshotReplication:
+    <<: *service-base
+    image: $SIMPLYBLOCK_DOCKER_IMAGE
+    command: "python simplyblock_core/services/snapshot_replication.py"
+    deploy:
+      placement:
+        constraints: [node.role == manager]
+    volumes:
+      - "/etc/foundationdb:/etc/foundationdb"
+    networks:
+      - hostnet
+    environment:
+      SIMPLYBLOCK_LOG_LEVEL: "$LOG_LEVEL"
+
 networks:
   monitoring-net:
     external: true
diff --git a/simplyblock_core/scripts/haproxy.cfg b/simplyblock_core/scripts/haproxy.cfg
index d95d3ebec..667989baf 100644
--- a/simplyblock_core/scripts/haproxy.cfg
+++ b/simplyblock_core/scripts/haproxy.cfg
@@ -65,6 +65,11 @@ backend graylog_input_services
     balance roundrobin
     server-template graylog_input- 1 graylog:12201 check resolvers docker init-addr libc,none
 
+backend prometheus_input_services
+    mode tcp
+    balance roundrobin
+    server-template prometheus_input- 1 prometheus:9090 check resolvers docker init-addr libc,none
+
 backend opensearch_services
     balance roundrobin
     http-request set-path %[path,regsub(^/opensearch/?,/)]
@@ -85,3 +90,8 @@ frontend graylog_input_front
     bind *:12202
     mode tcp
     default_backend graylog_input_services
+
+frontend prometheus_input_front
+    bind *:9090
+    mode tcp
+    default_backend prometheus_input_services
diff --git a/simplyblock_core/scripts/install_deps.sh b/simplyblock_core/scripts/install_deps.sh
index 256a55500..56d0bf96e 100644
--- a/simplyblock_core/scripts/install_deps.sh
+++ b/simplyblock_core/scripts/install_deps.sh
@@ -2,15 +2,15 @@
 
 if [[ "$1" == "docker" ]]; then
   sudo yum install -y yum-utils
-  sudo yum install -y https://repo.almalinux.org/almalinux/9/devel/aarch64/os/Packages/tuned-profiles-realtime-2.24.0-1.el9.noarch.rpm
+  sudo yum install -y https://repo.almalinux.org/almalinux/9/devel/aarch64/os/Packages/tuned-profiles-realtime-2.26.0-1.el9.noarch.rpm
   sudo yum install -y yum-utils xorg-x11-xauth nvme-cli fio tuned
 
   sudo yum install hostname pkg-config git wget python3-pip yum-utils \
     iptables pciutils -y
 
     sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
-    sudo yum install docker-ce docker-ce-cli \
-      containerd.io docker-buildx-plugin docker-compose-plugin -y
+    sudo yum install docker-ce-29.1.3-1.el9 docker-ce-cli-29.1.3-1.el9 \
+      containerd.io-2.2.0-2.el9 docker-buildx-plugin-0.30.1-1.el9 docker-compose-plugin-5.0.1-1.el9 -y
 
   sudo systemctl enable docker
   sudo systemctl start docker
diff --git a/simplyblock_core/services/capacity_and_stats_collector.py b/simplyblock_core/services/capacity_and_stats_collector.py
index 6f702d051..07a850edd 100644
--- a/simplyblock_core/services/capacity_and_stats_collector.py
+++ b/simplyblock_core/services/capacity_and_stats_collector.py
@@ -4,7 +4,6 @@
 from simplyblock_core import constants, db_controller, utils
 from simplyblock_core.models.nvme_device import NVMeDevice
 from simplyblock_core.models.storage_node import StorageNode
-from simplyblock_core.rpc_client import RPCClient
 from simplyblock_core.models.stats import DeviceStatObject, NodeStatObject, ClusterStatObject
 
 logger = utils.get_logger(__name__)
@@ -62,17 +61,17 @@ def add_device_stats(cl, device, capacity_dict, stats_dict):
         if last_record:
             time_diff = (now - last_record.date)
             if time_diff > 0:
-                data['read_bytes_ps'] = int((data['read_bytes'] - last_record['read_bytes']) / time_diff)
-                data['read_io_ps'] = int((data['read_io'] - last_record['read_io']) / time_diff)
-                data['read_latency_ps'] = int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff)
+                data['read_bytes_ps'] = abs(int((data['read_bytes'] - last_record['read_bytes']) / time_diff))
+                data['read_io_ps'] = abs(int((data['read_io'] - last_record['read_io']) / time_diff))
+                data['read_latency_ps'] = abs(int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff))
 
-                data['write_bytes_ps'] = int((data['write_bytes'] - last_record['write_bytes']) / time_diff)
-                data['write_io_ps'] = int((data['write_io'] - last_record['write_io']) / time_diff)
-                data['write_latency_ps'] = int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff)
+                data['write_bytes_ps'] = abs(int((data['write_bytes'] - last_record['write_bytes']) / time_diff))
+                data['write_io_ps'] = abs(int((data['write_io'] - last_record['write_io']) / time_diff))
+                data['write_latency_ps'] = abs(int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff))
 
-                data['unmap_bytes_ps'] = int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff)
-                data['unmap_io_ps'] = int((data['unmap_io'] - last_record['unmap_io']) / time_diff)
-                data['unmap_latency_ps'] = int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff)
+                data['unmap_bytes_ps'] = abs(int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff))
+                data['unmap_io_ps'] = abs(int((data['unmap_io'] - last_record['unmap_io']) / time_diff))
+                data['unmap_latency_ps'] = abs(int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff))
 
         else:
             logger.warning("last record not found")
@@ -83,6 +82,11 @@ def add_device_stats(cl, device, capacity_dict, stats_dict):
     stat_obj.write_to_db(db.kv_store)
     last_object_record[device.get_id()] = stat_obj
 
+    all_stats = db.get_device_stats(device, limit=0)
+    if len(all_stats) > 10:
+        for st in all_stats[10:]:
+            st.remove(db.kv_store)
+
     return stat_obj
 
 
@@ -117,6 +121,11 @@ def add_node_stats(node, records):
     stat_obj = NodeStatObject(data=data)
     stat_obj.write_to_db(db.kv_store)
 
+    all_stats = db.get_node_stats(node, limit=0)
+    if len(all_stats) > 10:
+        for st in all_stats[10:]:
+            st.remove(db.kv_store)
+
     return stat_obj
 
 
@@ -146,6 +155,11 @@ def add_cluster_stats(cl, records):
     stat_obj = ClusterStatObject(data=data)
     stat_obj.write_to_db(db.kv_store)
 
+    all_stats = db.get_cluster_stats(cl, limit=0)
+    if len(all_stats) > 10:
+        for st in all_stats[10:]:
+            st.remove(db.kv_store)
+
     return stat_obj
 
 
@@ -173,15 +187,15 @@ def add_cluster_stats(cl, records):
                 logger.error("No devices found in node: %s", node.get_id())
                 continue
 
-            rpc_client = RPCClient(
-                node.mgmt_ip, node.rpc_port,
-                node.rpc_username, node.rpc_password,
-                timeout=5, retry=2)
-
+            rpc_client = node.rpc_client(timeout=5, retry=2)
             node_devs_stats = {}
-            ret = rpc_client.get_lvol_stats()
-            if ret:
-                node_devs_stats = {b['name']: b for b in ret['bdevs']}
+            try:
+                ret = rpc_client.get_lvol_stats()
+                if ret:
+                    node_devs_stats = {b['name']: b for b in ret['bdevs']}
+            except Exception as e:
+                logger.error(e)
+                continue
 
             devices_records = []
             for device in node.nvme_devices:
@@ -189,7 +203,11 @@ def add_cluster_stats(cl, records):
                 if device.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, NVMeDevice.STATUS_CANNOT_ALLOCATE]:
                     logger.info(f"Device is skipped: {device.get_id()} status: {device.status}")
                     continue
-                capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name)
+                try:
+                    capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name)
+                except Exception as e:
+                    logger.error(e)
+                    continue
                 if device.nvme_bdev in node_devs_stats:
                     stats_dict = node_devs_stats[device.nvme_bdev]
                     record = add_device_stats(cl, device, capacity_dict, stats_dict)
diff --git a/simplyblock_core/services/health_check_service.py b/simplyblock_core/services/health_check_service.py
index bb48e9620..8fc5f0489 100644
--- a/simplyblock_core/services/health_check_service.py
+++ b/simplyblock_core/services/health_check_service.py
@@ -1,4 +1,5 @@
 # coding=utf-8
+import threading
 import time
 from datetime import datetime
 
@@ -10,10 +11,10 @@
 from simplyblock_core.rpc_client import RPCClient
 from simplyblock_core import constants, db_controller, distr_controller, storage_node_ops
 
-logger = utils.get_logger(__name__)
-
 
 utils.init_sentry_sdk()
+logger = utils.get_logger(__name__)
+
 
 def set_node_health_check(snode, health_check_status):
     snode = db.get_storage_node_by_id(snode.get_id())
@@ -42,223 +43,242 @@ def set_device_health_check(cluster_id, device, health_check_status):
                     return
 
 
-# get DB controller
-db = db_controller.DBController()
+def check_node(snode):
 
-logger.info("Starting health check service")
-while True:
-    clusters = db.get_clusters()
-    for cluster in clusters:
-        cluster_id = cluster.get_id()
-        snodes = db.get_storage_nodes_by_cluster_id(cluster_id)
-        if not snodes:
-            logger.warning("storage nodes list is empty")
-
-        for snode in snodes:
-            logger.info("Node: %s, status %s", snode.get_id(), snode.status)
-
-            if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE,
-                                    StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
-                logger.info(f"Node status is: {snode.status}, skipping")
-                set_node_health_check(snode, False)
-                for device in snode.nvme_devices:
-                    set_device_health_check(cluster_id, device, False)
-                continue
-
-            # 1- check node ping
-            ping_check = health_controller._check_node_ping(snode.mgmt_ip)
-            logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
-
-            # 2- check node API
-            node_api_check = health_controller._check_node_api(snode.mgmt_ip)
-            logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
-
-            # 3- check node RPC
-            node_rpc_check = health_controller._check_node_rpc(
-                snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
-            logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
-
-            is_node_online = ping_check and node_api_check and node_rpc_check
-
-            health_check_status = is_node_online
-            if node_rpc_check:
-                logger.info(f"Node device count: {len(snode.nvme_devices)}")
-                node_devices_check = True
-                node_remote_devices_check = True
-
-                rpc_client = RPCClient(
-                    snode.mgmt_ip, snode.rpc_port,
-                    snode.rpc_username, snode.rpc_password,
-                    timeout=3, retry=2)
-                connected_devices = []
-
-                node_bdevs = rpc_client.get_bdevs()
-                if node_bdevs:
-                    # node_bdev_names = [b['name'] for b in node_bdevs]
-                    node_bdev_names = {}
-                    for b in node_bdevs:
-                        node_bdev_names[b['name']] = b
-                        for al in b['aliases']:
-                            node_bdev_names[al] = b
-                else:
-                    node_bdev_names = {}
-
-                subsystem_list = rpc_client.subsystem_list() or []
-                subsystems = {
-                        subsystem['nqn']: subsystem
-                        for subsystem
-                        in subsystem_list
-                }
-
-                for device in snode.nvme_devices:
-                    passed = True
-
-                    if device.io_error:
-                        logger.info(f"Device io_error {device.get_id()}")
-                        passed = False
-
-                    if device.status != NVMeDevice.STATUS_ONLINE:
-                        logger.info(f"Device status {device.status}")
-                        passed = False
-
-                    if snode.enable_test_device:
-                        bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev]
-                    else:
-                        bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev]
-
-                    logger.info(f"Checking Device: {device.get_id()}, status:{device.status}")
-                    problems = 0
-                    for bdev in bdevs_stack:
-                        if not bdev:
-                            continue
-
-                        if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names):
-                            problems += 1
-                            passed = False
-
-                    logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})")
-
-                    passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems)
-
-                    set_device_health_check(cluster_id, device, passed)
-                    if device.status == NVMeDevice.STATUS_ONLINE:
-                        node_devices_check &= passed
-
-                logger.info(f"Node remote device: {len(snode.remote_devices)}")
-
-                for remote_device in snode.remote_devices:
-                    org_dev = db.get_storage_device_by_id(remote_device.get_id())
-                    org_node =  db.get_storage_node_by_id(remote_device.node_id)
-                    if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status == StorageNode.STATUS_ONLINE:
-                        if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names):
-                            connected_devices.append(remote_device.get_id())
-                            continue
-
-                        if not org_dev.alceml_bdev:
-                            logger.error(f"device alceml bdev not found!, {org_dev.get_id()}")
-                            continue
-
-                        try:
-                            storage_node_ops.connect_device(
-                                    f"remote_{org_dev.alceml_bdev}", org_dev, snode,
-                                    bdev_names=list(node_bdev_names), reattach=False,
-                            )
-                            connected_devices.append(org_dev.get_id())
-                            sn = db.get_storage_node_by_id(snode.get_id())
-                            for d in sn.remote_devices:
-                                if d.get_id() == remote_device.get_id():
-                                    d.status = NVMeDevice.STATUS_ONLINE
-                                    sn.write_to_db()
-                                    break
-                            distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode)
-                        except RuntimeError:
-                            logger.error(f"Failed to connect to device: {org_dev.get_id()}")
-                            node_remote_devices_check = False
-
-                connected_jms = []
-                if snode.jm_device and snode.jm_device.get_id():
-                    jm_device = snode.jm_device
-                    logger.info(f"Node JM: {jm_device.get_id()}")
-                    if jm_device.jm_bdev in node_bdev_names:
-                        logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok")
-                        connected_jms.append(jm_device.get_id())
+    snode = db.get_storage_node_by_id(snode.get_id())
+    logger.info("Node: %s, status %s", snode.get_id(), snode.status)
+
+    if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE,
+                            StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+        logger.info(f"Node status is: {snode.status}, skipping")
+        set_node_health_check(snode, False)
+        for device in snode.nvme_devices:
+            set_device_health_check(snode.cluster_id, device, False)
+        return
+
+    # 1- check node ping
+    ping_check = health_controller._check_node_ping(snode.mgmt_ip)
+    logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
+
+    # 2- check node API
+    node_api_check = health_controller._check_node_api(snode.mgmt_ip)
+    logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
+
+    # 3- check node RPC
+    node_rpc_check = health_controller._check_node_rpc(
+        snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
+    logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
+
+    is_node_online = ping_check and node_api_check and node_rpc_check
+
+    health_check_status = is_node_online
+    if node_rpc_check:
+        logger.info(f"Node device count: {len(snode.nvme_devices)}")
+        node_devices_check = True
+        node_remote_devices_check = True
+
+        rpc_client = RPCClient(
+            snode.mgmt_ip, snode.rpc_port,
+            snode.rpc_username, snode.rpc_password,
+            timeout=3, retry=2)
+        connected_devices = []
+
+        node_bdevs = rpc_client.get_bdevs()
+        if node_bdevs:
+            # node_bdev_names = [b['name'] for b in node_bdevs]
+            node_bdev_names = {}
+            for b in node_bdevs:
+                node_bdev_names[b['name']] = b
+                for al in b['aliases']:
+                    node_bdev_names[al] = b
+        else:
+            node_bdev_names = {}
+
+        subsystem_list = rpc_client.subsystem_list() or []
+        subsystems = {
+            subsystem['nqn']: subsystem
+            for subsystem
+            in subsystem_list
+        }
+
+        for device in snode.nvme_devices:
+            passed = True
+
+            if device.io_error:
+                logger.info(f"Device io_error {device.get_id()}")
+                passed = False
+
+            if device.status != NVMeDevice.STATUS_ONLINE:
+                logger.info(f"Device status {device.status}")
+                passed = False
+
+            if snode.enable_test_device:
+                bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev]
+            else:
+                bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev]
+
+            logger.info(f"Checking Device: {device.get_id()}, status:{device.status}")
+            problems = 0
+            for bdev in bdevs_stack:
+                if not bdev:
+                    continue
+
+                if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names):
+                    problems += 1
+                    passed = False
+
+            logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})")
+
+            passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems)
+
+            set_device_health_check(snode.cluster_id, device, passed)
+            if device.status == NVMeDevice.STATUS_ONLINE:
+                node_devices_check &= passed
+
+        logger.info(f"Node remote device: {len(snode.remote_devices)}")
+
+        for remote_device in snode.remote_devices:
+            org_dev = db.get_storage_device_by_id(remote_device.get_id())
+            org_node = db.get_storage_node_by_id(remote_device.node_id)
+            if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
+                if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names):
+                    connected_devices.append(remote_device.get_id())
+                    continue
+
+                if not org_dev.alceml_bdev:
+                    logger.error(f"device alceml bdev not found!, {org_dev.get_id()}")
+                    continue
+
+                try:
+                    storage_node_ops.connect_device(
+                        f"remote_{org_dev.alceml_bdev}", org_dev, snode,
+                        bdev_names=list(node_bdev_names), reattach=False,
+                    )
+                    connected_devices.append(org_dev.get_id())
+                    sn = db.get_storage_node_by_id(snode.get_id())
+                    for d in sn.remote_devices:
+                        if d.get_id() == remote_device.get_id():
+                            d.status = NVMeDevice.STATUS_ONLINE
+                            sn.write_to_db()
+                            break
+                    distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode)
+                except RuntimeError:
+                    logger.error(f"Failed to connect to device: {org_dev.get_id()}")
+                    node_remote_devices_check = False
+
+        connected_jms = []
+        if snode.jm_device and snode.jm_device.get_id():
+            jm_device = snode.jm_device
+            logger.info(f"Node JM: {jm_device.get_id()}")
+            if jm_device.jm_bdev in node_bdev_names:
+                logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok")
+                connected_jms.append(jm_device.get_id())
+            else:
+                logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found")
+
+        if snode.enable_ha_jm:
+            logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}")
+            for remote_device in snode.remote_jm_devices:
+                if remote_device.remote_bdev:
+                    check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names)
+                    if check:
+                        connected_jms.append(remote_device.get_id())
                     else:
-                        logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found")
-
-                if snode.enable_ha_jm:
-                    logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}")
-                    for remote_device in snode.remote_jm_devices:
-                        if remote_device.remote_bdev:
-                            check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names)
-                            if check:
-                                connected_jms.append(remote_device.get_id())
-                            else:
+                        node_remote_devices_check = False
+
+            for jm_id in snode.jm_ids:
+                if jm_id and jm_id not in connected_jms:
+                    for nd in db.get_storage_nodes():
+                        if nd.jm_device and nd.jm_device.get_id() == jm_id:
+                            if nd.status == StorageNode.STATUS_ONLINE:
                                 node_remote_devices_check = False
+                            break
 
-                    for jm_id in snode.jm_ids:
-                        if jm_id and jm_id not in connected_jms:
-                            for nd in db.get_storage_nodes():
-                                if nd.jm_device and nd.jm_device.get_id() == jm_id:
-                                    if nd.status == StorageNode.STATUS_ONLINE:
-                                        node_remote_devices_check = False
-                                    break
-
-                    if not node_remote_devices_check and cluster.status in [
-                        Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
-                        snode = db.get_storage_node_by_id(snode.get_id())
-                        snode.remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode)
-                        snode.write_to_db()
-
-                lvstore_check = True
+            if not node_remote_devices_check and cluster.status in [
+                Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
+                remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode)
                 snode = db.get_storage_node_by_id(snode.get_id())
-                if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \
-                        snode.lvstore_status == "failed" :
+                snode.remote_jm_devices = remote_jm_devices
+                snode.write_to_db()
 
-                    lvstore_stack = snode.lvstore_stack
+        lvstore_check = True
+        snode = db.get_storage_node_by_id(snode.get_id())
+        if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \
+                snode.lvstore_status == "failed":
+
+            lvstore_stack = snode.lvstore_stack
+            lvstore_check &= health_controller._check_node_lvstore(
+                lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names)
+
+            if snode.secondary_node_id:
+
+                lvstore_check &= health_controller._check_node_hublvol(
+                    snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems)
+
+                second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id)
+                if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE:
                     lvstore_check &= health_controller._check_node_lvstore(
-                        lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names)
-
-                    if snode.secondary_node_id:
-
-                        lvstore_check &= health_controller._check_node_hublvol(
-                            snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems)
-
-                        second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id)
-                        if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE:
-                            lvstore_check &= health_controller._check_node_lvstore(
-                                lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode)
-                            sec_node_check = health_controller._check_sec_node_hublvol(second_node_1)
-                            if not sec_node_check:
-                                if snode.status == StorageNode.STATUS_ONLINE:
-                                    ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
-                                    if ret:
-                                        lvs_info = ret[0]
-                                        if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                                            # is_sec_node_leader = True
-                                            # check jc_compression status
-                                            jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status(snode.jm_vuid)
-                                            if not jc_compression_is_active:
-                                                lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1, auto_fix=True)
-
-
-                    lvol_port_check = False
-                    # if node_api_check:
-                    ports = [snode.lvol_subsys_port]
-
-                    if snode.lvstore_stack_secondary_1:
-                        second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1)
-                        if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE:
-                            ports.append(second_node_1.lvol_subsys_port)
-
-                    for port in ports:
-                        lvol_port_check = health_controller._check_port_on_node(snode, port)
-                        logger.info(
-                            f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}")
-                        if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED:
-                            tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port)
-
-                health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check
-            set_node_health_check(snode, bool(health_check_status))
+                        lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode)
+                    sec_node_check = health_controller._check_sec_node_hublvol(second_node_1)
+                    if not sec_node_check:
+                        if snode.status == StorageNode.STATUS_ONLINE:
+                            ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
+                            if ret:
+                                lvs_info = ret[0]
+                                if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+                                    # is_sec_node_leader = True
+                                    # check jc_compression status
+                                    jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status(
+                                        snode.jm_vuid)
+                                    if not jc_compression_is_active:
+                                        lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1,
+                                                                                                   auto_fix=True)
+
+            lvol_port_check = False
+            # if node_api_check:
+            ports = [snode.lvol_subsys_port]
+
+            if snode.lvstore_stack_secondary_1:
+                second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1)
+                if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE:
+                    ports.append(second_node_1.lvol_subsys_port)
+
+            for port in ports:
+                try:
+                    lvol_port_check = health_controller.check_port_on_node(snode, port)
+                    logger.info(
+                        f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}")
+                    if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED:
+                        tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port)
+                except Exception:
+                    logger.error("Check node port failed, connection error")
+
+        health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check
+    set_node_health_check(snode, bool(health_check_status))
+    time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC)
+
+
+def loop_for_node(snode):
+    while True:
+        try:
+            check_node(snode)
+        except Exception as e:
+            logger.error(e)
+        time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC)
+
+
+logger.info("Starting health check service")
+db = db_controller.DBController()
+threads_maps: dict[str, threading.Thread] = {}
+while True:
+    clusters = db.get_clusters()
+    for cluster in clusters:
+        for node in  db.get_storage_nodes_by_cluster_id(cluster.get_id()):
+            node_id = node.get_id()
+            if node_id not in threads_maps or threads_maps[node_id].is_alive() is False:
+                t = threading.Thread(target=loop_for_node, args=(node,))
+                t.start()
+                threads_maps[node_id] = t
 
     time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC)
 
diff --git a/simplyblock_core/services/lvol_monitor.py b/simplyblock_core/services/lvol_monitor.py
index 884b67396..79c492a40 100644
--- a/simplyblock_core/services/lvol_monitor.py
+++ b/simplyblock_core/services/lvol_monitor.py
@@ -60,8 +60,8 @@ def resume_comp(lvol):
             return
     rpc_client = RPCClient(
         node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=2)
-    ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid)
-    if err and "code" in err and err["code"] != -2:
+    ret, err = rpc_client.jc_suspend_compression(jm_vuid=node.jm_vuid, suspend=False)
+    if err:
         logger.info("Failed to resume JC compression adding task...")
         tasks_controller.add_jc_comp_resume_task(node.cluster_id, node.get_id(), node.jm_vuid)
 
@@ -118,22 +118,24 @@ def process_lvol_delete_finish(lvol):
         lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id())
         return
 
+    if snode.get_id() == leader_node.get_id():
+        sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
+    else:
+        sec_node = db.get_storage_node_by_id(snode.get_id())
+
     # 3-1 async delete lvol bdev from primary
     primary_node = db.get_storage_node_by_id(leader_node.get_id())
     if primary_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+        if sec_node and sec_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED,
+                                            StorageNode.STATUS_DOWN, StorageNode.STATUS_UNREACHABLE]:
+            primary_node.lvol_del_sync_lock()
         ret = lvol_controller.delete_lvol_from_node(lvol.get_id(), primary_node.get_id(), del_async=True)
         if not ret:
             logger.error(f"Failed to delete lvol from primary_node node: {primary_node.get_id()}")
 
     # 3-2 async delete lvol bdev from secondary
-    if snode.get_id() == leader_node.get_id():
-        sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
-    else:
-        sec_node = db.get_storage_node_by_id(snode.get_id())
-
-    if sec_node:
-        sec_node.lvol_sync_del_queue.append(f"{lvol.lvs_name}/{lvol.lvol_bdev}")
-        sec_node.write_to_db()
+    if sec_node and sec_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN, StorageNode.STATUS_UNREACHABLE]:
+        tasks_controller.add_lvol_sync_del_task(sec_node.cluster_id, sec_node.get_id(), f"{lvol.lvs_name}/{lvol.lvol_bdev}", primary_node.get_id())
 
     lvol_events.lvol_delete(lvol)
     lvol.remove(db.kv_store)
@@ -160,6 +162,193 @@ def process_lvol_delete_try_again(lvol):
     lvol.write_to_db()
 
 
+def check_node(snode):
+    node_bdev_names = []
+    node_lvols_nqns = {}
+    sec_node_bdev_names = {}
+    sec_node_lvols_nqns = {}
+    sec_node = None
+
+    if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+        node_bdevs = snode.rpc_client().get_bdevs()
+        if node_bdevs:
+            node_bdev_names = [b['name'] for b in node_bdevs]
+            for bdev in node_bdevs:
+                if "aliases" in bdev and bdev["aliases"]:
+                    node_bdev_names.extend(bdev['aliases'])
+        ret = snode.rpc_client().subsystem_list()
+        if ret:
+            for sub in ret:
+                node_lvols_nqns[sub['nqn']] = sub
+
+    if snode.secondary_node_id:
+        sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
+        if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
+            sec_rpc_client = RPCClient(
+                sec_node.mgmt_ip, sec_node.rpc_port,
+                sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2)
+            ret = sec_rpc_client.get_bdevs()
+            if ret:
+                for bdev in ret:
+                    sec_node_bdev_names[bdev['name']] = bdev
+
+            ret = sec_rpc_client.subsystem_list()
+            if ret:
+                for sub in ret:
+                    sec_node_lvols_nqns[sub['nqn']] = sub
+
+    for lvol in db.get_lvols_by_node_id(snode.get_id()):
+
+        if lvol.status == LVol.STATUS_IN_CREATION:
+            continue
+
+        if lvol.status == lvol.STATUS_IN_DELETION:
+            # check leadership
+            leader_node = None
+            snode = db.get_storage_node_by_id(snode.get_id())
+            if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+                ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
+                if not ret:
+                    raise Exception("Failed to get LVol info")
+                lvs_info = ret[0]
+                if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+                    leader_node = snode
+
+            if not leader_node and sec_node:
+                ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
+                if not ret:
+                    raise Exception("Failed to get LVol info")
+                lvs_info = ret[0]
+                if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+                    leader_node = sec_node
+
+            if not leader_node:
+                raise Exception("Failed to get leader node")
+
+            if lvol.deletion_status == "" or lvol.deletion_status != leader_node.get_id():
+                lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id())
+                time.sleep(3)
+
+            try:
+                ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(
+                    f"{lvol.lvs_name}/{lvol.lvol_bdev}")
+            except Exception as e:
+                logger.error(e)
+                # timeout detected, check other node
+                break
+
+            if ret == 0 or ret == 2:  # Lvol may have already been deleted (not found) or delete completed
+                process_lvol_delete_finish(lvol)
+
+            elif ret == 1:  # Async lvol deletion is in progress or queued
+                logger.info(f"LVol deletion in progress, id: {lvol.get_id()}")
+                pre_lvol_delete_rebalance()
+
+            elif ret == 3:  # Async deletion is done, but leadership has changed (sync deletion is now blocked)
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)")
+
+            elif ret == 4:  # No async delete request exists for this lvol
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("No async delete request exists for this lvol")
+                lvol = db.get_lvol_by_id(lvol.get_id())
+                lvol.io_error = True
+                lvol.write_to_db()
+                set_lvol_status(lvol, LVol.STATUS_OFFLINE)
+
+            elif ret == -1:  # Operation not permitted
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Operation not permitted")
+                lvol = db.get_lvol_by_id(lvol.get_id())
+                lvol.io_error = True
+                lvol.write_to_db()
+                set_lvol_status(lvol, LVol.STATUS_OFFLINE)
+
+            elif ret == -2:  # No such file or directory
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("No such file or directory")
+                process_lvol_delete_finish(lvol)
+
+            elif ret == -5:  # I/O error
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("I/O error")
+                process_lvol_delete_try_again(lvol)
+
+            elif ret == -11:  # Try again
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Try again")
+                process_lvol_delete_try_again(lvol)
+
+            elif ret == -12:  # Out of memory
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Out of memory")
+                process_lvol_delete_try_again(lvol)
+
+            elif ret == -16:  # Device or resource busy
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Device or resource busy")
+                process_lvol_delete_try_again(lvol)
+
+            elif ret == -19:  # No such device
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Finishing lvol delete")
+                process_lvol_delete_finish(lvol)
+
+            elif ret == -35:  # Leadership changed
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Leadership changed")
+                process_lvol_delete_try_again(lvol)
+
+            elif ret == -36:  # Failed to update lvol for deletion
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Failed to update lvol for deletion")
+                process_lvol_delete_try_again(lvol)
+
+            else:  # Failed to update lvol for deletion
+                logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
+                logger.error("Failed to update lvol for deletion")
+
+            continue
+
+        passed = True
+        try:
+            ret = health_controller.check_lvol_on_node(
+                lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns)
+            if not ret:
+                passed = False
+        except Exception as e:
+            logger.error(f"Failed to check lvol:{lvol.get_id()} on node: {lvol.node_id}")
+            logger.error(e)
+
+        if lvol.ha_type == "ha":
+            sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
+            if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
+                try:
+                    ret = health_controller.check_lvol_on_node(
+                        lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns)
+                    if not ret:
+                        passed = False
+                    else:
+                        passed = True
+                except Exception as e:
+                    logger.error(f"Failed to check lvol: {lvol.get_id()} on node: {snode.secondary_node_id}")
+                    logger.error(e)
+
+        if snode.lvstore_status == "ready":
+
+            logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}")
+            set_lvol_health_check(lvol, passed)
+            if passed:
+                set_lvol_status(lvol, LVol.STATUS_ONLINE)
+
+    if snode.lvstore_status == "ready":
+
+        for snap in db.get_snapshots_by_node_id(snode.get_id()):
+            present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names)
+            set_snapshot_health_check(snap, present)
+
+
+
 # get DB controller
 db = db_controller.DBController()
 
@@ -173,195 +362,9 @@ def process_lvol_delete_try_again(lvol):
             continue
 
         for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()):
-            node_bdev_names = []
-            node_lvols_nqns = {}
-            sec_node_bdev_names = {}
-            sec_node_lvols_nqns = {}
-            sec_node = None
-
-            if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
-                node_bdevs = snode.rpc_client().get_bdevs()
-                if node_bdevs:
-                    node_bdev_names = [b['name'] for b in node_bdevs]
-                    for bdev in node_bdevs:
-                        if "aliases" in bdev and bdev["aliases"]:
-                            node_bdev_names.extend(bdev['aliases'])
-                ret = snode.rpc_client().subsystem_list()
-                if ret:
-                    for sub in ret:
-                        node_lvols_nqns[sub['nqn']] = sub
-
-            if snode.secondary_node_id:
-                sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
-                if sec_node and sec_node.status==StorageNode.STATUS_ONLINE:
-                    sec_rpc_client = RPCClient(
-                        sec_node.mgmt_ip, sec_node.rpc_port,
-                        sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2)
-                    ret = sec_rpc_client.get_bdevs()
-                    if ret:
-                        for bdev in ret:
-                            sec_node_bdev_names[bdev['name']] = bdev
-
-                    ret = sec_rpc_client.subsystem_list()
-                    if ret:
-                        for sub in ret:
-                            sec_node_lvols_nqns[sub['nqn']] = sub
-
-            for lvol in db.get_lvols_by_node_id(snode.get_id()):
-
-                if lvol.status == LVol.STATUS_IN_CREATION:
-                    continue
-
-                if lvol.status == lvol.STATUS_IN_DELETION:
-                    # check leadership
-                    leader_node = None
-                    snode = db.get_storage_node_by_id(snode.get_id())
-                    if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
-                        ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
-                        if not ret:
-                            raise Exception("Failed to get LVol info")
-                        lvs_info = ret[0]
-                        if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                            leader_node = snode
-
-                    if not leader_node and sec_node:
-                        ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
-                        if not ret:
-                            raise Exception("Failed to get LVol info")
-                        lvs_info = ret[0]
-                        if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                            leader_node = sec_node
-
-                    if not leader_node:
-                        raise Exception("Failed to get leader node")
-
-                    if lvol.deletion_status == "" or  lvol.deletion_status != leader_node.get_id():
-                        lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id())
-                        time.sleep(3)
-
-                    try:
-                        ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(
-                            f"{lvol.lvs_name}/{lvol.lvol_bdev}")
-                    except Exception as e:
-                        logger.error(e)
-                        # timeout detected, check other node
-                        break
-
-                    if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed
-                        process_lvol_delete_finish(lvol)
-
-                    elif ret == 1: # Async lvol deletion is in progress or queued
-                        logger.info(f"LVol deletion in progress, id: {lvol.get_id()}")
-                        pre_lvol_delete_rebalance()
-
-                    elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked)
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)")
-
-                    elif ret == 4: # No async delete request exists for this lvol
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("No async delete request exists for this lvol")
-                        lvol = db.get_lvol_by_id(lvol.get_id())
-                        lvol.io_error = True
-                        lvol.write_to_db()
-                        set_lvol_status(lvol, LVol.STATUS_OFFLINE)
-
-                    elif ret == -1: # Operation not permitted
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Operation not permitted")
-                        lvol = db.get_lvol_by_id(lvol.get_id())
-                        lvol.io_error = True
-                        lvol.write_to_db()
-                        set_lvol_status(lvol, LVol.STATUS_OFFLINE)
-
-                    elif ret == -2: # No such file or directory
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("No such file or directory")
-                        process_lvol_delete_finish(lvol)
-
-                    elif ret == -5: # I/O error
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("I/O error")
-                        process_lvol_delete_try_again(lvol)
-
-                    elif ret == -11: # Try again
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Try again")
-                        process_lvol_delete_try_again(lvol)
-
-                    elif ret == -12: # Out of memory
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Out of memory")
-                        process_lvol_delete_try_again(lvol)
-
-                    elif ret == -16: # Device or resource busy
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Device or resource busy")
-                        process_lvol_delete_try_again(lvol)
-
-                    elif ret == -19: # No such device
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Finishing lvol delete")
-                        process_lvol_delete_finish(lvol)
-
-                    elif ret == -35: # Leadership changed
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Leadership changed")
-                        process_lvol_delete_try_again(lvol)
-
-                    elif ret == -36: # Failed to update lvol for deletion
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Failed to update lvol for deletion")
-                        process_lvol_delete_try_again(lvol)
-
-                    else: # Failed to update lvol for deletion
-                        logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}")
-                        logger.error("Failed to update lvol for deletion")
-
-                    continue
-
-                passed = True
-                ret = health_controller.check_lvol_on_node(
-                    lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns)
-                if not ret:
-                    passed = False
-
-                if lvol.ha_type == "ha":
-                    sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
-                    if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
-                        ret = health_controller.check_lvol_on_node(
-                            lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns)
-                        if not ret:
-                            passed = False
-                        else:
-                            passed = True
-
-                if snode.lvstore_status == "ready":
-
-                    logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}")
-                    set_lvol_health_check(lvol, passed)
-                    if passed:
-                        set_lvol_status(lvol, LVol.STATUS_ONLINE)
-
-            if snode.lvstore_status == "ready":
-
-                for snap in db.get_snapshots_by_node_id(snode.get_id()):
-                    present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names)
-                    set_snapshot_health_check(snap, present)
-
-                snode = db.get_storage_node_by_id(snode.get_id())
-                if snode.status == StorageNode.STATUS_ONLINE:
-                    not_deleted = []
-                    for bdev_name in snode.lvol_sync_del_queue:
-                        logger.info(f"Sync delete bdev: {bdev_name} from node: {snode.get_id()}")
-                        ret, err = snode.rpc_client().delete_lvol(bdev_name, del_async=True)
-                        if not ret:
-                            if "code" in err and err["code"] == -19:
-                                logger.error(f"Sync delete completed with error: {err}")
-                            else:
-                                logger.error(f"Failed to sync delete bdev: {bdev_name} from node: {snode.get_id()}")
-                                not_deleted.append(bdev_name)
-                    snode.lvol_sync_del_queue = not_deleted
-                    snode.write_to_db()
+            try:
+                check_node(snode)
+            except Exception as e:
+                logger.error(e)
 
     time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC)
diff --git a/simplyblock_core/services/lvol_stat_collector.py b/simplyblock_core/services/lvol_stat_collector.py
index 09aa7d571..18f09d4ce 100644
--- a/simplyblock_core/services/lvol_stat_collector.py
+++ b/simplyblock_core/services/lvol_stat_collector.py
@@ -7,7 +7,6 @@
 from simplyblock_core.models.lvol_model import LVol
 from simplyblock_core.models.stats import LVolStatObject, PoolStatObject
 from simplyblock_core.models.storage_node import StorageNode
-from simplyblock_core.rpc_client import RPCClient
 
 logger = utils.get_logger(__name__)
 
@@ -154,6 +153,11 @@ def add_lvol_stats(cluster, lvol, stats_list, capacity_dict=None):
     stat_obj.write_to_db(db.kv_store)
     last_object_record[lvol.get_id()] = stat_obj
 
+    all_stats = db.get_lvol_stats(lvol, limit=0)
+    if len(all_stats) > 10:
+        for st in all_stats[10:]:
+            st.remove(db.kv_store)
+
     return stat_obj
 
 
@@ -173,6 +177,12 @@ def add_pool_stats(pool, records):
 
     stat_obj = PoolStatObject(data=data)
     stat_obj.write_to_db(db.kv_store)
+
+    all_stats = db.get_pool_stats(pool, limit=0)
+    if len(all_stats) > 10:
+        for st in all_stats[10:]:
+            st.remove(db.kv_store)
+
     return stat_obj
 
 
@@ -201,68 +211,66 @@ def add_pool_stats(pool, records):
                 continue
 
             if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+                try:
+                    rpc_client = snode.rpc_client(timeout=3, retry=2)
+                    if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]:
+                        node_bdev_names = all_node_bdev_names[snode.get_id()]
+                    else:
+                        node_bdevs = rpc_client.get_bdevs()
+                        if node_bdevs:
+                            node_bdev_names = {b['name']: b for b in node_bdevs}
+                            all_node_bdev_names[snode.get_id()] = node_bdev_names
 
-                rpc_client = RPCClient(
-                    snode.mgmt_ip, snode.rpc_port,
-                    snode.rpc_username, snode.rpc_password, timeout=3, retry=2)
-
-                if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]:
-                    node_bdev_names = all_node_bdev_names[snode.get_id()]
-                else:
-                    node_bdevs = rpc_client.get_bdevs()
-                    if node_bdevs:
-                        node_bdev_names = {b['name']: b for b in node_bdevs}
-                        all_node_bdev_names[snode.get_id()] = node_bdev_names
-
-                if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]:
-                    node_lvols_nqns = all_node_lvols_nqns[snode.get_id()]
-                else:
-                    ret = rpc_client.subsystem_list()
-                    if ret:
-                        node_lvols_nqns = {}
-                        for sub in ret:
-                            node_lvols_nqns[sub['nqn']] = sub
-                        all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns
-
-                if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]:
-                    node_lvols_stats = all_node_lvols_stats[snode.get_id()]
-                else:
-                    ret = rpc_client.get_lvol_stats()
-                    if ret:
-                        node_lvols_stats = {}
-                        for st in ret['bdevs']:
-                            node_lvols_stats[st['name']] = st
-                        all_node_lvols_stats[snode.get_id()] = node_lvols_stats
-
-            if snode.secondary_node_id:
-                sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
-                if sec_node and sec_node.status==StorageNode.STATUS_ONLINE:
-                    sec_rpc_client = RPCClient(
-                        sec_node.mgmt_ip, sec_node.rpc_port,
-                        sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2)
-
-                    if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]:
-                        ret = sec_rpc_client.get_bdevs()
-                        if ret:
-                            # node_bdev_names = {}
-                            node_bdev_names = {b['name']: b for b in ret}
-                            all_node_bdev_names[sec_node.get_id()] = node_bdev_names
-
-                    if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]:
-                        ret = sec_rpc_client.subsystem_list()
+                    if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]:
+                        node_lvols_nqns = all_node_lvols_nqns[snode.get_id()]
+                    else:
+                        ret = rpc_client.subsystem_list()
                         if ret:
                             node_lvols_nqns = {}
                             for sub in ret:
                                 node_lvols_nqns[sub['nqn']] = sub
-                            all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns
+                            all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns
 
-                    if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]:
-                        ret = sec_rpc_client.get_lvol_stats()
+                    if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]:
+                        node_lvols_stats = all_node_lvols_stats[snode.get_id()]
+                    else:
+                        ret = rpc_client.get_lvol_stats()
                         if ret:
-                            sec_node_lvols_stats = {}
+                            node_lvols_stats = {}
                             for st in ret['bdevs']:
-                                sec_node_lvols_stats[st['name']] = st
-                            all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats
+                                node_lvols_stats[st['name']] = st
+                            all_node_lvols_stats[snode.get_id()] = node_lvols_stats
+                except Exception as e:
+                    logger.error(e)
+
+            if snode.secondary_node_id:
+                sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
+                if sec_node and sec_node.status==StorageNode.STATUS_ONLINE:
+                    try:
+                        sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2)
+                        if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]:
+                                ret = sec_rpc_client.get_bdevs()
+                                if ret:
+                                    # node_bdev_names = {}
+                                    node_bdev_names = {b['name']: b for b in ret}
+                                    all_node_bdev_names[sec_node.get_id()] = node_bdev_names
+                        if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]:
+                            ret = sec_rpc_client.subsystem_list()
+                            if ret:
+                                node_lvols_nqns = {}
+                                for sub in ret:
+                                    node_lvols_nqns[sub['nqn']] = sub
+                                all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns
+
+                        if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]:
+                            ret = sec_rpc_client.get_lvol_stats()
+                            if ret:
+                                sec_node_lvols_stats = {}
+                                for st in ret['bdevs']:
+                                    sec_node_lvols_stats[st['name']] = st
+                                all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats
+                    except Exception as e:
+                        logger.error(e)
 
             for lvol in lvol_list:
                 if lvol.status in [LVol.STATUS_IN_CREATION, LVol.STATUS_IN_DELETION]:
diff --git a/simplyblock_core/services/main_distr_event_collector.py b/simplyblock_core/services/main_distr_event_collector.py
index 31dffeda0..93e0ae4df 100644
--- a/simplyblock_core/services/main_distr_event_collector.py
+++ b/simplyblock_core/services/main_distr_event_collector.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 import threading
 import time
-
+from datetime import datetime
 
 from simplyblock_core import constants, db_controller, utils, rpc_client, distr_controller
 from simplyblock_core.controllers import events_controller, device_controller
@@ -9,9 +9,8 @@
 from simplyblock_core.models.storage_node import StorageNode
 
 
-logger = utils.get_logger(__name__)
-
 utils.init_sentry_sdk()
+logger = utils.get_logger(__name__)
 
 # get DB controller
 db = db_controller.DBController()
@@ -19,7 +18,17 @@
 EVENTS_LIST = ['SPDK_BDEV_EVENT_REMOVE', "error_open", 'error_read', "error_write", "error_unmap",
                "error_write_cannot_allocate"]
 
-def process_device_event(event):
+
+def remove_remote_device_from_node(node_id, device_id):
+    node = db.get_storage_node_by_id(node_id)
+    for remote_dev in node.remote_devices:
+        if remote_dev.get_id() == device_id:
+            node.remote_devices.remove(remote_dev)
+            node.write_to_db()
+            break
+
+
+def process_device_event(event, logger):
     if event.message in EVENTS_LIST:
         node_id = event.node_id
         storage_id = event.storage_id
@@ -39,15 +48,31 @@ def process_device_event(event):
             event.status = 'device_not_found'
             return
 
-        if device_obj.connecting_from_node == event_node_obj.get_id():
+        if "timestamp" in event.object_dict:
+            ev_time = event.object_dict['timestamp']
+            time_delta = datetime.now() - datetime.strptime(ev_time, '%Y-%m-%dT%H:%M:%S.%fZ')
+            if time_delta.total_seconds() > 8:
+                if snode.rpc_client().bdev_nvme_controller_list(device_obj.nvme_controller):
+                    logger.info(f"event was fired {time_delta.total_seconds()} seconds ago, controller ok, skipping")
+                    event.status = f'skipping_late_by_{int(time_delta.total_seconds())}s_but_controller_ok'
+                    return
+
+                logger.info(f"event was fired {time_delta.total_seconds()} seconds ago, checking controller filed")
+                event.status = f'late_by_{int(time_delta.total_seconds())}s'
+
+        if device_obj.is_connection_in_progress_to_node(event_node_obj.get_id()):
             logger.warning("Connection attempt was found from node to device, sleeping 5 seconds")
             time.sleep(5)
 
+        device_obj.lock_device_connection(event_node_obj.get_id())
+
         if device_obj.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY,
                                      NVMeDevice.STATUS_CANNOT_ALLOCATE]:
             logger.info(f"The device is not online, skipping. status: {device_obj.status}")
             event.status = f'skipped:dev_{device_obj.status}'
             distr_controller.send_dev_status_event(device_obj, device_obj.status, event_node_obj)
+            remove_remote_device_from_node(event_node_obj.get_id(), device_obj.get_id())
+            device_obj.release_device_connection()
             return
 
 
@@ -55,12 +80,16 @@ def process_device_event(event):
             distr_controller.send_dev_status_event(device_obj, NVMeDevice.STATUS_UNAVAILABLE, event_node_obj)
             logger.info(f"Node is not online, skipping. status: {event_node_obj.status}")
             event.status = 'skipped:node_offline'
+            remove_remote_device_from_node(event_node_obj.get_id(), device_obj.get_id())
+            device_obj.release_device_connection()
             return
 
         if device_node_obj.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
             distr_controller.send_dev_status_event(device_obj, NVMeDevice.STATUS_UNAVAILABLE, event_node_obj)
             logger.info(f"Node is not online, skipping. status: {device_node_obj.status}")
             event.status = f'skipped:device_node_{device_node_obj.status}'
+            remove_remote_device_from_node(event_node_obj.get_id(), device_obj.get_id())
+            device_obj.release_device_connection()
             return
 
 
@@ -83,63 +112,37 @@ def process_device_event(event):
                 device_controller.device_set_io_error(device_obj.get_id(), True)
         else:
             distr_controller.send_dev_status_event(device_obj, NVMeDevice.STATUS_UNAVAILABLE, event_node_obj)
-            event_node_obj = db.get_storage_node_by_id(event_node_obj.get_id())
-            for dev in event_node_obj.remote_devices:
-                if dev.get_id() == device_obj.get_id():
-                    event_node_obj.remote_devices.remove(dev)
-                    event_node_obj.write_to_db()
-                    break
+            remove_remote_device_from_node(event_node_obj.get_id(), device_obj.get_id())
 
         event.status = 'processed'
+        device_obj.release_device_connection()
 
 
-def process_lvol_event(event):
+def process_lvol_event(event, logger):
     if event.message in ["error_open", 'error_read', "error_write", "error_unmap"]:
         vuid = event.object_dict['vuid']
-        # node_id = event.node_id
-        # storage_node_ops.set_node_status(node_id, StorageNode.STATUS_SUSPENDED)
-        # event_node_obj = db.get_storage_node_by_id(node_id)
-        # tasks_controller.add_node_to_auto_restart(event_node_obj)
-
-        # lvols = []
-        # for lv in db.get_lvols():  # pass
-        #     if lv.node_id == node_id:
-        #         lvols.append(lv)
-        #
-        # if not lvols:
-        #     logger.error(f"LVols on node {node_id} not found")
-        #     event.status = 'lvols_not_found'
-        # else:
-        #     for lvol in lvols:
-        #         if lvol.status == LVol.STATUS_ONLINE:
-        #             logger.info("Setting LVol to offline")
-        #             lvol.io_error = True
-        #             old_status = lvol.status
-        #             lvol.status = LVol.STATUS_OFFLINE
-        #             lvol.write_to_db(db.kv_store)
-        #             lvol_events.lvol_status_change(lvol, lvol.status, old_status, caused_by="monitor")
-        #             lvol_events.lvol_io_error_change(lvol, True, False, caused_by="monitor")
         event.status = f'distr error {vuid}'
     else:
         logger.error(f"Unknown event message: {event.message}")
         event.status = "event_unknown"
 
 
-def process_event(event):
+def process_event(event, logger):
     if event.event == "device_status":
         if event.storage_id >= 0:
-            process_device_event(event)
+            process_device_event(event, logger)
 
         if event.vuid >= 0:
-            process_lvol_event(event)
+            process_lvol_event(event, logger)
 
     event.write_to_db(db.kv_store)
 
 
 def start_event_collector_on_node(node_id):
+    snode = db.get_storage_node_by_id(node_id)
+
     logger.info(f"Starting Distr event collector on node: {node_id}")
 
-    snode = db.get_storage_node_by_id(node_id)
     client = rpc_client.RPCClient(
         snode.mgmt_ip,
         snode.rpc_port,
@@ -151,6 +154,7 @@ def start_event_collector_on_node(node_id):
         while True:
             page = 1
             events_groups = {}
+            events_list = []
             while True:
                 try:
                     events = client.distr_status_events_discard_then_get(
@@ -181,14 +185,17 @@ def start_event_collector_on_node(node_id):
                                 events_groups[sid][et][msg]: 1  # type: ignore
                             else:
                                 events_groups[sid][et][msg].count += 1  # type: ignore
-                                events_groups[sid][et][msg].write_to_db()  # type: ignore
-                                logger.info(f"Event {msg} already processed")
                                 continue
 
                             event = events_controller.log_distr_event(snode.cluster_id, snode.get_id(), event_dict)
                             logger.info(f"Processing event: {event.get_id()}")
-                            process_event(event)
+                            process_event(event, logger)
                             events_groups[sid][et][msg] = event
+                            events_list.append(event)
+
+                        for ev in events_list:
+                            if ev.count > 1 :
+                                ev.write_to_db(db.kv_store)
 
                         logger.info(f"Discarding events: {len(events)}")
                         client.distr_status_events_discard_then_get(len(events), 0)
@@ -197,8 +204,7 @@ def start_event_collector_on_node(node_id):
                         logger.info("no events found, sleeping")
                         break
                 except Exception as e:
-                    logger.error("Failed to process distr events")
-                    logger.exception(e)
+                    logger.error(f"Failed to process distr events: {e}")
                     break
 
             time.sleep(constants.DISTR_EVENT_COLLECTOR_INTERVAL_SEC)
diff --git a/simplyblock_core/services/snapshot_monitor.py b/simplyblock_core/services/snapshot_monitor.py
index c82476e7b..2910df3d6 100644
--- a/simplyblock_core/services/snapshot_monitor.py
+++ b/simplyblock_core/services/snapshot_monitor.py
@@ -5,10 +5,9 @@
 
 from simplyblock_core import constants, db_controller, utils
 from simplyblock_core.models.cluster import Cluster
-from simplyblock_core.controllers import health_controller, snapshot_events
+from simplyblock_core.controllers import health_controller, snapshot_events, tasks_controller
 from simplyblock_core.models.snapshot import SnapShot
 from simplyblock_core.models.storage_node import StorageNode
-from simplyblock_core.rpc_client import RPCClient
 
 logger = utils.get_logger(__name__)
 
@@ -64,21 +63,22 @@ def process_snap_delete_finish(snap, leader_node):
 
     # 3-1 async delete lvol bdev from primary
     primary_node = db.get_storage_node_by_id(leader_node.get_id())
+    non_leader_id = snode.secondary_node_id
+    if snode.get_id() != leader_node.get_id():
+        non_leader_id = snode.get_id()
+    non_leader = db.get_storage_node_by_id(non_leader_id)
     if primary_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+        if non_leader and non_leader.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED,
+                                                StorageNode.STATUS_DOWN, StorageNode.STATUS_UNREACHABLE]:
+            primary_node.lvol_del_sync_lock()
         ret, _ = primary_node.rpc_client().delete_lvol(snap.snap_bdev, del_async=True)
         if not ret:
             logger.error(f"Failed to delete snap from node: {snode.get_id()}")
 
     # 3-2 async delete lvol bdev from secondary
-    non_leader_id = snode.secondary_node_id
-    if snode.get_id() != leader_node.get_id():
-        non_leader_id = snode.get_id()
-
-    non_leader = db.get_storage_node_by_id(non_leader_id)
-    if non_leader:
-        non_leader.lvol_sync_del_queue.append(snap.snap_bdev)
-        non_leader.write_to_db()
-
+    if non_leader and non_leader.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED,
+                                            StorageNode.STATUS_DOWN, StorageNode.STATUS_UNREACHABLE]:
+        tasks_controller.add_lvol_sync_del_task(non_leader.cluster_id, non_leader.get_id(), snap.snap_bdev, primary_node.get_id())
     snapshot_events.snapshot_delete(snap)
     snap.remove(db.kv_store)
 
@@ -96,6 +96,115 @@ def set_snap_offline(snap):
     sn.write_to_db()
 
 
+def process_snap_delete(snap, snode):
+    # check leadership
+    leader_node = None
+    if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED,
+                        StorageNode.STATUS_DOWN]:
+        ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
+        if not ret:
+            raise Exception("Failed to get LVol store info")
+        lvs_info = ret[0]
+        if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+            leader_node = snode
+
+    if not leader_node and sec_node:
+        ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore)
+        if not ret:
+            raise Exception("Failed to get LVol store info")
+        lvs_info = ret[0]
+        if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+            leader_node = sec_node
+
+    if not leader_node:
+        raise Exception("Failed to get leader node")
+
+    if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id():
+
+        ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev)
+        if not ret:
+            logger.error(f"Failed to delete snap from node: {snode.get_id()}")
+            return False
+        snap = db.get_snapshot_by_id(snap.get_id())
+        snap.deletion_status = leader_node.get_id()
+        snap.write_to_db()
+
+        time.sleep(3)
+
+    try:
+        ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev)
+    except Exception as e:
+        logger.error(e)
+        # timeout detected, check other node
+        return False
+
+    if ret == 0 or ret == 2:  # Lvol may have already been deleted (not found) or delete completed
+        process_snap_delete_finish(snap, leader_node)
+
+    elif ret == 1:  # Async lvol deletion is in progress or queued
+        logger.info(f"Snap deletion in progress, id: {snap.get_id()}")
+
+    elif ret == 3:  # Async deletion is done, but leadership has changed (sync deletion is now blocked)
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error(
+            "Async deletion is done, but leadership has changed (sync deletion is now blocked)")
+
+    elif ret == 4:  # No async delete request exists for this Snap
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("No async delete request exists for this snap")
+        set_snap_offline(snap)
+
+    elif ret == -1:  # Operation not permitted
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Operation not permitted")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -2:  # No such file or directory
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("No such file or directory")
+        process_snap_delete_finish(snap, leader_node)
+
+    elif ret == -5:  # I/O error
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("I/O error")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -11:  # Try again
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Try again")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -12:  # Out of memory
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Out of memory")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -16:  # Device or resource busy
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Device or resource busy")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -19:  # No such device
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("No such device")
+        set_snap_offline(snap)
+
+    elif ret == -35:  # Leadership changed
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Leadership changed")
+        process_snap_delete_try_again(snap)
+
+    elif ret == -36:  # Failed to update lvol for deletion
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Failed to update snapshot for deletion")
+        process_snap_delete_try_again(snap)
+
+    else:  # Failed to update lvol for deletion
+        logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
+        logger.error("Failed to update snapshot for deletion")
+
+
+
 # get DB controller
 db = db_controller.DBController()
 
@@ -110,159 +219,46 @@ def set_snap_offline(snap):
 
         for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()):
             node_bdev_names = []
-            node_lvols_nqns = {}
             sec_node_bdev_names = {}
-            sec_node_lvols_nqns = {}
             sec_node = None
 
             if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
-
-                rpc_client = RPCClient(
-                    snode.mgmt_ip, snode.rpc_port,
-                    snode.rpc_username, snode.rpc_password, timeout=3, retry=2)
-                node_bdevs = rpc_client.get_bdevs()
+                rpc_client = snode.rpc_client(timeout=3, retry=2)
+                try:
+                    node_bdevs = rpc_client.get_bdevs()
+                except Exception as e:
+                    logger.error(e)
+                    continue
                 if node_bdevs:
                     node_bdev_names = [b['name'] for b in node_bdevs]
                     for bdev in node_bdevs:
                         if "aliases" in bdev and bdev["aliases"]:
                             node_bdev_names.extend(bdev['aliases'])
 
-                ret = rpc_client.subsystem_list()
-                if ret:
-                    for sub in ret:
-                        node_lvols_nqns[sub['nqn']] = sub
-
             if snode.secondary_node_id:
                 sec_node = db.get_storage_node_by_id(snode.secondary_node_id)
-                if sec_node and sec_node.status==StorageNode.STATUS_ONLINE:
-                    sec_rpc_client = RPCClient(
-                        sec_node.mgmt_ip, sec_node.rpc_port,
-                        sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2)
-                    ret = sec_rpc_client.get_bdevs()
+                if sec_node and sec_node.status in [
+                    StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]:
+                    sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2)
+                    try:
+                        ret = sec_rpc_client.get_bdevs()
+                    except Exception as e:
+                        logger.error(e)
+                        continue
                     if ret:
                         for bdev in ret:
                             sec_node_bdev_names[bdev['name']] = bdev
 
-                    ret = sec_rpc_client.subsystem_list()
-                    if ret:
-                        for sub in ret:
-                            sec_node_lvols_nqns[sub['nqn']] = sub
-
-            if snode.lvstore_status == "ready":
-
-                for snap in db.get_snapshots_by_node_id(snode.get_id()):
-                    if snap.status == SnapShot.STATUS_ONLINE:
-
-                        present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names)
+            for snap in db.get_snapshots_by_node_id(snode.get_id()):
+                if snap.status == SnapShot.STATUS_ONLINE:
+                    present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names)
+                    if snode.lvstore_status == "ready":
                         set_snapshot_health_check(snap, present)
 
-                    elif snap.status == SnapShot.STATUS_IN_DELETION:
-
-                        # check leadership
-                        leader_node = None
-                        if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED,
-                                            StorageNode.STATUS_DOWN]:
-                            ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
-                            if not ret:
-                                raise Exception("Failed to get LVol store info")
-                            lvs_info = ret[0]
-                            if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                                leader_node = snode
-
-                        if not leader_node and sec_node:
-                            ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore)
-                            if not ret:
-                                raise Exception("Failed to get LVol store info")
-                            lvs_info = ret[0]
-                            if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                                leader_node = sec_node
-
-                        if not leader_node:
-                            raise Exception("Failed to get leader node")
-
-                        if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id():
-
-                            ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev)
-                            if not ret:
-                                logger.error(f"Failed to delete snap from node: {snode.get_id()}")
-                                continue
-                            snap = db.get_snapshot_by_id(snap.get_id())
-                            snap.deletion_status = leader_node.get_id()
-                            snap.write_to_db()
-
-                            time.sleep(3)
-
-                        try:
-                            ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev)
-                        except Exception as e:
-                            logger.error(e)
-                            # timeout detected, check other node
-                            break
-
-                        if ret == 0 or ret == 2:  # Lvol may have already been deleted (not found) or delete completed
-                            process_snap_delete_finish(snap, leader_node)
-
-                        elif ret == 1:  # Async lvol deletion is in progress or queued
-                            logger.info(f"Snap deletion in progress, id: {snap.get_id()}")
-
-                        elif ret == 3:  # Async deletion is done, but leadership has changed (sync deletion is now blocked)
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error(
-                                "Async deletion is done, but leadership has changed (sync deletion is now blocked)")
-
-                        elif ret == 4:  # No async delete request exists for this Snap
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("No async delete request exists for this snap")
-                            set_snap_offline(snap)
-
-                        elif ret == -1:  # Operation not permitted
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Operation not permitted")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -2:  # No such file or directory
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("No such file or directory")
-                            process_snap_delete_finish(snap, leader_node)
-
-                        elif ret == -5:  # I/O error
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("I/O error")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -11:  # Try again
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Try again")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -12:  # Out of memory
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Out of memory")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -16:  # Device or resource busy
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Device or resource busy")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -19:  # No such device
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("No such device")
-                            set_snap_offline(snap)
-
-                        elif ret == -35:  # Leadership changed
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Leadership changed")
-                            process_snap_delete_try_again(snap)
-
-                        elif ret == -36:  # Failed to update lvol for deletion
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Failed to update snapshot for deletion")
-                            process_snap_delete_try_again(snap)
-
-                        else:  # Failed to update lvol for deletion
-                            logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}")
-                            logger.error("Failed to update snapshot for deletion")
-
+                elif snap.status == SnapShot.STATUS_IN_DELETION:
+                    try:
+                        process_snap_delete(snap, snode)
+                    except Exception as e:
+                        logger.error(e)
 
     time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC)
diff --git a/simplyblock_core/services/snapshot_replication.py b/simplyblock_core/services/snapshot_replication.py
new file mode 100644
index 000000000..2549b8546
--- /dev/null
+++ b/simplyblock_core/services/snapshot_replication.py
@@ -0,0 +1,333 @@
+# coding=utf-8
+import time
+import uuid
+
+from simplyblock_core import constants, db_controller, utils
+from simplyblock_core.controllers import lvol_controller, snapshot_events
+from simplyblock_core.models.job_schedule import JobSchedule
+from simplyblock_core.models.pool import Pool
+from simplyblock_core.models.snapshot import SnapShot
+from simplyblock_core.models.storage_node import StorageNode
+
+logger = utils.get_logger(__name__)
+utils.init_sentry_sdk(__name__)
+# get DB controller
+db = db_controller.DBController()
+
+
+def process_snap_replicate_start(task, snapshot):
+    # 1 create lvol on remote node
+    logger.info("Starting snapshot replication task")
+    snode = db.get_storage_node_by_id(snapshot.lvol.node_id)
+    replicate_to_source = task.function_params["replicate_to_source"]
+    if "remote_lvol_id" not in task.function_params or not task.function_params["remote_lvol_id"]:
+        if replicate_to_source:
+            org_snap = db.get_snapshot_by_id(snapshot.source_replicated_snap_uuid)
+            remote_node_uuid = db.get_storage_node_by_id(task.node_id)
+            remote_pool_uuid = org_snap.lvol.pool_uuid
+        else:  # replicate to target
+            remote_node_uuid = db.get_storage_node_by_id(snapshot.lvol.replication_node_id)
+            cluster = db.get_cluster_by_id(remote_node_uuid.cluster_id)
+            remote_pool_uuid = None
+            if cluster.snapshot_replication_target_pool:
+                remote_pool_uuid = cluster.snapshot_replication_target_pool
+            else:
+                for bool in db.get_pools(remote_node_uuid.cluster_id):
+                    if bool.status == Pool.STATUS_ACTIVE:
+                        remote_pool_uuid = bool.uuid
+                        break
+            if not remote_pool_uuid:
+                logger.error(f"Unable to find pool on remote cluster: {remote_node_uuid.cluster_id}")
+                return
+
+        lv_id, err = lvol_controller.add_lvol_ha(
+            f"REP_{snapshot.snap_name}", snapshot.size, remote_node_uuid.get_id(), snapshot.lvol.ha_type,
+            remote_pool_uuid)
+        if lv_id:
+            task.function_params["remote_lvol_id"] = lv_id
+            task.write_to_db()
+        else:
+            logger.error(err)
+            task.function_result = "Error creating remote lvol"
+            task.write_to_db()
+            return
+
+    remote_lv = db.get_lvol_by_id(task.function_params["remote_lvol_id"])
+    remote_lv_node = db.get_storage_node_by_id(remote_lv.node_id)
+    if remote_lv_node.status != StorageNode.STATUS_ONLINE:
+        task.function_result = "Target node is not online, retrying"
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.retry += 1
+        task.write_to_db()
+        return
+
+    # 2 connect to it
+    ret = snode.rpc_client().bdev_nvme_controller_list(remote_lv.top_bdev)
+    if not ret:
+        remote_snode = db.get_storage_node_by_id(remote_lv.node_id)
+        for nic in remote_snode.data_nics:
+            ip = nic.ip4_address
+            ret = snode.rpc_client().bdev_nvme_attach_controller(
+                remote_lv.top_bdev, remote_lv.nqn, ip, remote_lv.subsys_port, nic.trtype)
+            if not ret:
+                msg = "controller attach failed"
+                logger.error(msg)
+                raise RuntimeError(msg)
+            bdev_name = ret[0]
+            if not bdev_name:
+                msg = "Bdev name not returned from controller attach"
+                logger.error(msg)
+                raise RuntimeError(msg)
+            bdev_found = False
+            for i in range(5):
+                ret = snode.rpc_client().get_bdevs(bdev_name)
+                if ret:
+                    bdev_found = True
+                    break
+                else:
+                    time.sleep(1)
+
+            if not bdev_found:
+                logger.error("lvol Bdev not found after 5 attempts")
+                raise RuntimeError(f"Failed to connect to lvol: {remote_lv.get_id()}")
+
+    offset = 0
+    if "offset" in task.function_params and task.function_params["offset"]:
+        offset = task.function_params["offset"]
+    # 3 start replication
+    snode.rpc_client().bdev_lvol_transfer(
+        lvol_name=snapshot.snap_bdev,
+        offset=offset,
+        cluster_batch=16,
+        gateway=f"{remote_lv.top_bdev}n1",
+        operation="replicate"
+    )
+    task.status = JobSchedule.STATUS_RUNNING
+    task.function_params["start_time"] = int(time.time())
+    task.write_to_db()
+
+    if snapshot.status != SnapShot.STATUS_IN_REPLICATION:
+        snapshot.status = SnapShot.STATUS_IN_REPLICATION
+        snapshot.write_to_db()
+
+
+def process_snap_replicate_finish(task, snapshot):
+
+    # detach remote lvol
+    remote_lv = db.get_lvol_by_id(task.function_params["remote_lvol_id"])
+    snode = db.get_storage_node_by_id(snapshot.lvol.node_id)
+    snode.rpc_client().bdev_nvme_detach_controller(remote_lv.top_bdev)
+    remote_snode = db.get_storage_node_by_id(remote_lv.node_id)
+    replicate_to_source = task.function_params["replicate_to_source"]
+    if "replicate_as_snap_instance" in task.function_params:
+        replicate_as_snap_instance = task.function_params["replicate_as_snap_instance"]
+    else:
+        replicate_as_snap_instance = False
+    target_prev_snap = None
+    if replicate_to_source:
+        org_snap = db.get_snapshot_by_id(snapshot.snap_ref_id)
+        try:
+            target_prev_snap = db.get_snapshot_by_id(org_snap.source_replicated_snap_uuid)
+        except KeyError as e:
+            logger.error(e)
+    else:
+        if snapshot.snap_ref_id:
+            try:
+                prev_snap = db.get_snapshot_by_id(snapshot.snap_ref_id)
+                for sn_inst in prev_snap.instances:
+                    if sn_inst.lvol.node_id == remote_snode.get_id():
+                        target_prev_snap = sn_inst
+                        break
+            except KeyError as e:
+                logger.error(e)
+
+    # chain snaps on primary
+    if target_prev_snap:
+        logger.info(f"Chaining replicated lvol: {remote_lv.top_bdev} to snap: {target_prev_snap.snap_bdev}")
+        ret = remote_snode.rpc_client().bdev_lvol_add_clone(target_prev_snap.snap_bdev, remote_lv.top_bdev)
+        if not ret:
+            logger.error("Failed to chain replicated snapshot on primary node")
+            return False
+
+    # convert to snapshot on primary
+    ret = remote_snode.rpc_client().bdev_lvol_convert(remote_lv.top_bdev)
+    if not ret:
+        logger.error("Failed to convert to snapshot on primary node")
+        return False
+
+    # chain snaps on secondary
+    sec_node = db.get_storage_node_by_id(remote_snode.secondary_node_id)
+    if sec_node.status == StorageNode.STATUS_ONLINE:
+        if target_prev_snap:
+            logger.info(f"Chaining replicated lvol: {remote_lv.top_bdev} to snap: {target_prev_snap.snap_bdev}")
+            ret = sec_node.rpc_client().bdev_lvol_add_clone(target_prev_snap.snap_bdev, remote_lv.top_bdev)
+            if not ret:
+                logger.error("Failed to chain replicated snapshot on secondary node")
+                return False
+
+        # convert to snapshot on secondary
+        ret = sec_node.rpc_client().bdev_lvol_convert(remote_lv.top_bdev)
+        if not ret:
+            logger.error("Failed to convert to snapshot on secondary node")
+            return False
+
+    new_snapshot_uuid = str(uuid.uuid4())
+
+    new_snapshot = SnapShot()
+    new_snapshot.uuid = new_snapshot_uuid
+    new_snapshot.cluster_id = remote_snode.cluster_id
+    new_snapshot.lvol = remote_lv
+    new_snapshot.pool_uuid = remote_lv.pool_uuid
+    new_snapshot.snap_bdev = remote_lv.top_bdev
+    new_snapshot.snap_uuid = remote_lv.lvol_uuid
+    new_snapshot.size = snapshot.size
+    new_snapshot.used_size = snapshot.used_size
+    new_snapshot.snap_name = snapshot.snap_name
+    new_snapshot.blobid = remote_lv.blobid
+    new_snapshot.created_at = int(time.time())
+    new_snapshot.status = SnapShot.STATUS_ONLINE
+    snapshot.instances.append(new_snapshot)
+    if not replicate_as_snap_instance:
+        if replicate_to_source:
+            new_snapshot.target_replicated_snap_uuid = snapshot.uuid
+            snapshot.source_replicated_snap_uuid = new_snapshot_uuid
+        else:
+            snapshot.target_replicated_snap_uuid = new_snapshot_uuid
+            new_snapshot.source_replicated_snap_uuid = snapshot.uuid
+
+        if target_prev_snap:
+            new_snapshot.prev_snap_uuid = target_prev_snap.get_id()
+            target_prev_snap.next_snap_uuid = new_snapshot_uuid
+            target_prev_snap.write_to_db()
+
+    new_snapshot.write_to_db()
+
+    if snapshot.status == SnapShot.STATUS_IN_REPLICATION:
+        snapshot.status = SnapShot.STATUS_ONLINE
+
+    snapshot.write_to_db()
+
+    # delete lvol object
+    remote_lv.bdev_stack = []
+    remote_lv.write_to_db()
+    lvol_controller.delete_lvol(remote_lv.get_id(), True)
+    remote_lv.remove(db.kv_store)
+    snapshot_events.replication_task_finished(snapshot)
+
+    return new_snapshot_uuid
+
+
+def task_runner(task: JobSchedule):
+    snapshot = db.get_snapshot_by_id(task.function_params["snapshot_id"])
+    if not snapshot:
+        task.function_result = "snapshot not found"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return True
+
+    try:
+        snode = db.get_storage_node_by_id(snapshot.lvol.node_id)
+    except KeyError:
+        task.function_result = "node not found"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return True
+
+    if snode.status != StorageNode.STATUS_ONLINE:
+        task.function_result = "node is not online, retrying"
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.retry += 1
+        task.write_to_db(db.kv_store)
+        return False
+
+    if task.retry >= task.max_retry or task.canceled is True:
+        task.function_result = "max retry reached"
+        if task.canceled is True:
+            task.function_result = "task cancelled"
+
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+
+        if snapshot.status != SnapShot.STATUS_ONLINE:
+            snapshot.status = SnapShot.STATUS_ONLINE
+            snapshot.write_to_db()
+
+        remote_lv = db.get_lvol_by_id(task.function_params["remote_lvol_id"])
+        snode.rpc_client().bdev_nvme_detach_controller(remote_lv.top_bdev)
+        lvol_controller.delete_lvol(remote_lv.get_id(), True)
+
+        return True
+
+
+    if task.status in [JobSchedule.STATUS_NEW, JobSchedule.STATUS_SUSPENDED]:
+        process_snap_replicate_start(task, snapshot)
+
+    elif task.status == JobSchedule.STATUS_RUNNING:
+        snode = db.get_storage_node_by_id(snapshot.lvol.node_id)
+        ret = snode.rpc_client().bdev_lvol_transfer_stat(snapshot.snap_bdev)
+        if not ret:
+            logger.error("Failed to get transfer stat")
+            return False
+        status = ret["transfer_state"]
+        offset = ret["offset"]
+        if status == "No process":
+            task.function_result = f"Status: {status}, offset:{offset}, retrying"
+            task.status = JobSchedule.STATUS_NEW
+            task.retry += 1
+            task.write_to_db()
+            return False
+        if status == "In progress":
+            task.function_result = f"Status: {status}, offset:{offset}"
+            task.function_params["offset"] = offset
+            task.write_to_db()
+            return True
+        if status == "Failed":
+            task.function_result = f"Status: {status}, offset:{offset}, retrying"
+            task.status = JobSchedule.STATUS_SUSPENDED
+            task.retry += 1
+            task.write_to_db()
+            return False
+        if status == "Done":
+            new_snapshot_uuid = process_snap_replicate_finish(task, snapshot)
+            if new_snapshot_uuid:
+                task.function_result = new_snapshot_uuid
+                task.status = JobSchedule.STATUS_DONE
+                task.function_params["end_time"] = int(time.time())
+                task.write_to_db()
+            else:
+                task.function_result = "complete repl failed, retrying"
+                task.status = JobSchedule.STATUS_SUSPENDED
+                task.retry += 1
+                task.write_to_db()
+            return True
+
+
+logger.info("Starting Tasks runner...")
+while True:
+    clusters = db.get_clusters()
+    if not clusters:
+        logger.error("No clusters found!")
+    else:
+        for cl in clusters:
+            tasks = db.get_job_tasks(cl.get_id(), reverse=False)
+            for task in tasks:
+                delay_seconds = constants.TASK_EXEC_INTERVAL_SEC
+                if task.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION:
+                    if task.status in [JobSchedule.STATUS_NEW, JobSchedule.STATUS_SUSPENDED]:
+                        active_task = False
+                        for t in db.get_job_tasks(task.cluster_id):
+                            if t.function_name == JobSchedule.FN_SNAPSHOT_REPLICATION and t.function_params["snapshot_id"] ==  task.function_params['snapshot_id']:
+                                if t.status == JobSchedule.STATUS_RUNNING and t.canceled is False:
+                                    active_task = True
+                                    break
+                        if active_task:
+                            logger.info("replication task found for same snapshot, retry")
+                            continue
+                    if task.status != JobSchedule.STATUS_DONE:
+                        # get new task object because it could be changed from cancel task
+                        task = db.get_task_by_id(task.uuid)
+                        res = task_runner(task)
+                        if not res:
+                            time.sleep(3)
+
+    time.sleep(constants.TASK_EXEC_INTERVAL_SEC)
diff --git a/simplyblock_core/services/spdk_http_proxy_server.py b/simplyblock_core/services/spdk_http_proxy_server.py
index 06eeee008..46071e408 100644
--- a/simplyblock_core/services/spdk_http_proxy_server.py
+++ b/simplyblock_core/services/spdk_http_proxy_server.py
@@ -6,19 +6,58 @@
 import os
 import socket
 import sys
+import threading
+import time
 
 from http.server import HTTPServer
 from http.server import ThreadingHTTPServer
 from http.server import BaseHTTPRequestHandler
 
 
-rpc_sock = '/var/tmp/spdk.sock'
+rpc_sock = '/mnt/ramdisk/spdk.sock'
 logger_handler = logging.StreamHandler(stream=sys.stdout)
 logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(levelname)s: %(message)s'))
 logger = logging.getLogger()
 logger.addHandler(logger_handler)
 logger.setLevel(logging.INFO)
 
+read_line_time_diff: dict = {}
+recv_from_spdk_time_diff: dict = {}
+def print_stats():
+    while True:
+        try:
+            time.sleep(3)
+            t = time.time_ns()
+            read_line_time_diff_max = max(list(read_line_time_diff.values()))
+            read_line_time_diff_avg = int(sum(list(read_line_time_diff.values()))/len(read_line_time_diff))
+            last_3_sec = []
+            for k,v in read_line_time_diff.items():
+                if k > t - 3*1000*1000*1000:
+                    last_3_sec.append(v)
+            if len(last_3_sec) > 0:
+                read_line_time_diff_avg_last_3_sec = int(sum(last_3_sec)/len(last_3_sec))
+            else:
+                read_line_time_diff_avg_last_3_sec = 0
+            logger.info(f"Periodic stats: {t}: read_line_time: max={read_line_time_diff_max} ns, avg={read_line_time_diff_avg} ns, last_3s_avg={read_line_time_diff_avg_last_3_sec} ns")
+            if len(read_line_time_diff) > 10000:
+                read_line_time_diff.clear()
+
+            recv_from_spdk_time_max = max(list(recv_from_spdk_time_diff.values()))
+            recv_from_spdk_time_avg = int(sum(list(recv_from_spdk_time_diff.values()))/len(recv_from_spdk_time_diff))
+            last_3_sec = []
+            for k,v in recv_from_spdk_time_diff.items():
+                if k > t - 3*1000*1000*1000:
+                    last_3_sec.append(v)
+            if len(last_3_sec) > 0:
+                recv_from_spdk_time_avg_last_3_sec = int(sum(last_3_sec)/len(last_3_sec))
+            else:
+                recv_from_spdk_time_avg_last_3_sec = 0
+            logger.info(f"Periodic stats: {t}: recv_from_spdk_time: max={recv_from_spdk_time_max} ns, avg={recv_from_spdk_time_avg} ns, last_3s_avg={recv_from_spdk_time_avg_last_3_sec} ns")
+            if len(recv_from_spdk_time_diff) > 10000:
+                recv_from_spdk_time_diff.clear()
+        except Exception as e:
+            logger.error(e)
+
 
 def get_env_var(name, default=None, is_required=False):
     if not name:
@@ -30,13 +69,18 @@ def get_env_var(name, default=None, is_required=False):
     return os.environ.get(name, default)
 
 
+unix_sockets: list[socket] = []  # type: ignore[valid-type]
 def rpc_call(req):
+    logger.info(f"active threads: {threading.active_count()}")
+    logger.info(f"active unix sockets: {len(unix_sockets)}")
     req_data = json.loads(req.decode('ascii'))
+    req_time = time.time_ns()
     params = ""
     if "params" in req_data:
         params = str(req_data['params'])
-    logger.info(f"Request function: {str(req_data['method'])}, params: {params}")
+    logger.info(f"Request:{req_time} function: {str(req_data['method'])}, params: {params}")
     sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    unix_sockets.append(sock)
     sock.settimeout(TIMEOUT)
     sock.connect(rpc_sock)
     sock.sendall(req)
@@ -48,7 +92,7 @@ def rpc_call(req):
     buf = ''
     closed = False
     response = None
-
+    recv_from_spdk_time_start = time.time_ns()
     while not closed:
         newdata = sock.recv(1024*1024*1024)
         if newdata == b'':
@@ -59,21 +103,25 @@ def rpc_call(req):
         except ValueError:
             continue  # incomplete response; keep buffering
         break
+    recv_from_spdk_time_end = time.time_ns()
+    time_diff = recv_from_spdk_time_end - recv_from_spdk_time_start
+    logger.info(f"recv_from_spdk_time_diff: {time_diff}")
+    recv_from_spdk_time_diff[recv_from_spdk_time_start] = time_diff
 
     sock.close()
+    unix_sockets.remove(sock)
 
     if not response and len(buf) > 0:
         raise ValueError('Invalid response')
 
-    logger.debug(f"Response data: {buf}")
+    logger.info(f"Response:{req_time}")
 
     return buf
 
 
 class ServerHandler(BaseHTTPRequestHandler):
-
+    server_session: list[int] = []
     key = ""
-
     def do_HEAD(self):
         self.send_response(200)
         self.send_header('Content-type', 'text/html')
@@ -96,9 +144,14 @@ def do_INTERNALERROR(self):
         self.end_headers()
 
     def do_POST(self):
+        req_time = time.time_ns()
+        self.server_session.append(req_time)
+        logger.info(f"incoming request at: {req_time}")
+        logger.info(f"active server session: {len(self.server_session)}")
         if self.headers['Authorization'] != 'Basic ' + self.key:
             self.do_AUTHHEAD()
         else:
+            read_line_time_start = time.time_ns()
             if "Content-Length" in self.headers:
                 data_string = self.rfile.read(int(self.headers['Content-Length']))
             elif "chunked" in self.headers.get("Transfer-Encoding", ""):
@@ -118,7 +171,10 @@ def do_POST(self):
                     # Finally, a chunk size of 0 is an end indication
                     if chunk_length == 0:
                         break
-
+            read_line_time_end = time.time_ns()
+            time_diff = read_line_time_end - read_line_time_start
+            logger.info(f"read_line_time_diff: {time_diff}")
+            read_line_time_diff[read_line_time_start] = time_diff
             try:
                 response = rpc_call(data_string)
                 if response is not None:
@@ -129,12 +185,14 @@ def do_POST(self):
 
             except ValueError:
                 self.do_INTERNALERROR()
+        self.server_session.remove(req_time)
 
 
 def run_server(host, port, user, password, is_threading_enabled=False):
     # encoding user and password
     key = base64.b64encode((user+':'+password).encode(encoding='ascii')).decode('ascii')
-
+    print_stats_thread = threading.Thread(target=print_stats, )
+    print_stats_thread.start()
     try:
         ServerHandler.key = key
         httpd = (ThreadingHTTPServer if is_threading_enabled else HTTPServer)((host, port), ServerHandler)
@@ -157,6 +215,7 @@ def run_server(host, port, user, password, is_threading_enabled=False):
     rpc_port = int(rpc_port)
 except Exception:
     rpc_port = 8080
+rpc_sock = f"/mnt/ramdisk/spdk_{rpc_port}/spdk.sock"
 
 is_threading_enabled = bool(is_threading_enabled)
 run_server(server_ip, rpc_port, rpc_username, rpc_password, is_threading_enabled=is_threading_enabled)
diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py
index 17a7d0369..a8d5a08b7 100644
--- a/simplyblock_core/services/storage_node_monitor.py
+++ b/simplyblock_core/services/storage_node_monitor.py
@@ -5,7 +5,8 @@
 
 
 from simplyblock_core import constants, db_controller, cluster_ops, storage_node_ops, utils
-from simplyblock_core.controllers import health_controller, device_controller, tasks_controller, storage_events
+from simplyblock_core.controllers import health_controller, device_controller, tasks_controller, storage_events, \
+    cluster_events
 from simplyblock_core.models.cluster import Cluster
 from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice
@@ -74,13 +75,16 @@ def get_next_cluster_status(cluster_id):
                 continue
             online_nodes += 1
             # check for jm rep tasks:
-            ret = node.rpc_client().jc_get_jm_status(node.jm_vuid)
-            if ret:
-                for jm in ret:
-                    if ret[jm] is False: # jm is not ready (has active replication task)
-                        jm_replication_tasks = True
-                        logger.warning("Replication task found!")
-                        break
+            if node.rpc_client().bdev_lvol_get_lvstores(node.lvstore):
+                try:
+                    ret = node.rpc_client(timeout=5).jc_get_jm_status(node.jm_vuid)
+                    for jm in ret:
+                        if ret[jm] is False: # jm is not ready (has active replication task)
+                            jm_replication_tasks = True
+                            logger.warning("Replication task found!")
+                            break
+                except Exception:
+                    logger.warning("Failed to get replication task!")
         elif node.status == StorageNode.STATUS_REMOVED:
             pass
         else:
@@ -114,11 +118,12 @@ def get_next_cluster_status(cluster_id):
     k = cluster.distr_npcs
 
     # if number of devices in the cluster unavailable on DIFFERENT nodes > k --> I cannot read and in some cases cannot write (suspended)
-    if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n+k)):
+    if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n + k)):
         return Cluster.STATUS_DEGRADED
     elif jm_replication_tasks:
         return Cluster.STATUS_DEGRADED
-    elif (affected_nodes > k or online_devices < (n + k) or (online_nodes < (n+k) and cluster.strict_node_anti_affinity)):
+    elif (affected_nodes > k or online_devices < (n + k) or (
+            online_nodes < (n + k) and cluster.strict_node_anti_affinity)):
         return Cluster.STATUS_SUSPENDED
     else:
         return Cluster.STATUS_ACTIVE
@@ -132,12 +137,15 @@ def update_cluster_status(cluster_id):
     for task in db.get_job_tasks(cluster_id):
         if task.status != JobSchedule.STATUS_DONE and task.function_name in [
             JobSchedule.FN_DEV_MIG, JobSchedule.FN_NEW_DEV_MIG, JobSchedule.FN_FAILED_DEV_MIG]:
-            if task.retry == 0:
+            if "migration" not in task.function_params:
                 first_iter_task_pending += 1
-
+    is_re_balancing = first_iter_task_pending  > 0
     cluster = db.get_cluster_by_id(cluster_id)
-    cluster.is_re_balancing = first_iter_task_pending  > 0
-    cluster.write_to_db()
+    if cluster.is_re_balancing != is_re_balancing:
+        old_status = cluster.is_re_balancing
+        cluster.is_re_balancing = is_re_balancing
+        cluster.write_to_db()
+        cluster_events.cluster_rebalancing_change(cluster_id, cluster.is_re_balancing, old_status)
 
     current_cluster_status = cluster.status
     logger.info("cluster_status: %s", current_cluster_status)
@@ -145,7 +153,7 @@ def update_cluster_status(cluster_id):
         return
 
     if current_cluster_status == Cluster.STATUS_DEGRADED and next_current_status == Cluster.STATUS_ACTIVE:
-    # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE:
+        # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE:
         # cluster_ops.cluster_activate(cluster_id, True)
         cluster_ops.set_cluster_status(cluster_id, Cluster.STATUS_ACTIVE)
         return
@@ -186,7 +194,6 @@ def update_cluster_status(cluster_id):
         cluster_ops.set_cluster_status(cluster_id, next_current_status)
 
 
-
 def set_node_online(node):
     if node.status != StorageNode.STATUS_ONLINE:
 
@@ -211,24 +218,56 @@ def set_node_online(node):
         if online_devices_list:
             tasks_controller.add_device_mig_task(online_devices_list, node.cluster_id)
 
-def set_node_offline(node, set_devs_offline=False):
-    if node.status != StorageNode.STATUS_UNREACHABLE:
-        # set node unavailable
-        storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE)
+        update_cluster_status(cluster_id)
+
 
-        # if set_devs_offline:
-        #     # set devices unavailable
-        #     for dev in node.nvme_devices:
-        #         if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY]:
-        #             device_controller.device_set_unavailable(dev.get_id())
+def set_node_offline(node):
+    if node.status != StorageNode.STATUS_OFFLINE:
+        try:
+            storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_OFFLINE)
+            for dev in node.nvme_devices:
+                if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY,
+                                  NVMeDevice.STATUS_CANNOT_ALLOCATE]:
+                    device_controller.device_set_unavailable(dev.get_id())
+            update_cluster_status(cluster_id)
+            # initiate restart
+            tasks_controller.add_node_to_auto_restart(node)
+        except Exception as e:
+            logger.debug("Setting node to OFFLINE state failed")
+            logger.error(e)
+
+
+def set_node_unreachable(node):
+    if node.status != StorageNode.STATUS_UNREACHABLE:
+        try:
+            storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE)
+            update_cluster_status(cluster_id)
+        except Exception as e:
+            logger.debug("Setting node to UNREACHABLE state failed")
+            logger.error(e)
+
+
+def set_node_schedulable(node):
+    if node.status != StorageNode.STATUS_SCHEDULABLE:
+        try:
+            storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_SCHEDULABLE)
+            # initiate shutdown
+            # initiate restart
+            tasks_controller.add_node_to_auto_restart(node)
+            for dev in node.nvme_devices:
+                if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY,
+                                  NVMeDevice.STATUS_CANNOT_ALLOCATE]:
+                    device_controller.device_set_unavailable(dev.get_id())
+            update_cluster_status(cluster_id)
+        except Exception as e:
+            logger.debug("Setting node to SCHEDULABLE state failed")
+            logger.error(e)
 
-        # # set jm dev offline
-        # if node.jm_device.status != JMDevice.STATUS_UNAVAILABLE:
-        #     device_controller.set_jm_device_state(node.jm_device.get_id(), JMDevice.STATUS_UNAVAILABLE)
 
 def set_node_down(node):
     if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_SUSPENDED]:
         storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_DOWN)
+        update_cluster_status(cluster_id)
 
 
 def node_rpc_timeout_check_and_report(node):
@@ -242,10 +281,151 @@ def node_rpc_timeout_check_and_report(node):
     except Exception as e:
         logger.debug(e)
     # RPC timeout detected, send to cluster log
-    storage_events.snode_rpc_timeout(node, time.time()-start_time)
+    storage_events.snode_rpc_timeout(node, int(time.time() - start_time))
+    return False
+
+
+def node_port_check_fun(snode):
+    node_port_check = True
+    if snode.lvstore_status == "ready":
+        ports = [snode.nvmf_port]
+        if snode.lvstore_stack_secondary_1:
+            for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()):
+                if n.lvstore_status == "ready":
+                    ports.append(n.lvol_subsys_port)
+        if not snode.is_secondary_node:
+            ports.append(snode.lvol_subsys_port)
+
+        for port in ports:
+            try:
+                ret = health_controller.check_port_on_node(snode, port)
+                logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}")
+                node_port_check &= ret
+            except Exception:
+                logger.error("Check node port failed, connection error")
+
+        node_data_nic_ping_check = False
+        for data_nic in snode.data_nics:
+            if data_nic.ip4_address:
+                data_ping_check = health_controller._check_node_ping(data_nic.ip4_address)
+                logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}")
+                node_data_nic_ping_check |= data_ping_check
+
+        node_port_check &= node_data_nic_ping_check
+
+    return node_port_check
+
+
+class State:
+    counter = 0
+def increment():
+    State.counter = 1
+def decrement():
+    State.counter = 0
+def value():
+    return State.counter
+
+def check_node(snode):
+    snode = db.get_storage_node_by_id(snode.get_id())
+
+    if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE,
+                            StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]:
+        logger.info(f"Node status is: {snode.status}, skipping")
+        return False
+
+    if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation":
+        logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping")
+        return False
+
+    logger.info(f"Checking node {snode.hostname}")
+
+
+    # 1- check node ping
+    ping_check = health_controller._check_node_ping(snode.mgmt_ip)
+    logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
+    if not ping_check:
+        logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}: FAILED")
+        set_node_unreachable(snode)
+        return False
+
+    # 2- check node API
+    try:
+        snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=10, retry=2)
+        ret, _ = snode_api.is_live()
+        logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {ret}")
+        if not ret:
+            logger.info("Check: node API failed, setting node unreachable")
+            set_node_unreachable(snode)
+            return False
+    except Exception as e:
+        logger.debug(e)
+        set_node_unreachable(snode)
+        return False
+
+    # 3- check spdk process through node API
+    try:
+        snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=20, retry=2)
+        is_up, _ = snode_api.spdk_process_is_up( snode.rpc_port, snode.cluster_id)
+        logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {bool(is_up)}")
+        if not is_up:
+            logger.info("Check: node API failed, setting node offline")
+            set_node_offline(snode)
+            return False
+    except Exception as e:
+        logger.debug(e)
+        return False
+
+    # 4- check node rpc interface
+    node_rpc_check, node_rpc_check_1 = health_controller._check_node_rpc(
+        snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=20, retry=1)
+    logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
+
+    #if RPC times out, we dont know if its due to node becoming unavailable or spdk hanging
+    #so we try it twice. If all other checks pass again, but only this one fails: it's the spdk process
+    if not node_rpc_check:
+        logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}:TIMEOUT")
+        if value()==0:
+           increment()
+           return False
+
+    decrement()
+    if not node_rpc_check or not node_rpc_check_1:
+        logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}:FAILED")
+        set_node_schedulable(snode)
+        return False
+
+    #if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads:
+    #    t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,))
+    #    t.start()
+    #    node_rpc_timeout_threads[snode.get_id()] = t
+
+    node_port_check = node_port_check_fun(snode)
+
+    if not node_port_check:
+        cluster = db.get_cluster_by_id(snode.cluster_id)
+        if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
+            logger.error("Port check failed")
+            set_node_down(snode)
+            return True
+
+    set_node_online(snode)
+
+
+def loop_for_node(snode):
+    # global logger
+    # logger = logging.getLogger()
+    # logger_handler = logging.StreamHandler(stream=sys.stdout)
+    # logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s'))
+    # logger.addHandler(logger_handler)
+    while True:
+        check_node(snode)
+        logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds")
+        time.sleep(constants.NODE_MONITOR_INTERVAL_SEC)
 
 
 logger.info("Starting node monitor")
+threads_maps: dict[str, threading.Thread] = {}
+
 while True:
     clusters = db.get_clusters()
     for cluster in clusters:
@@ -253,168 +433,20 @@ def node_rpc_timeout_check_and_report(node):
         if cluster.status == Cluster.STATUS_IN_ACTIVATION:
             logger.info(f"Cluster status is: {cluster.status}, skipping monitoring")
             continue
-
+        logger.info(f"Looping for cluster {cluster_id}")
         nodes = db.get_storage_nodes_by_cluster_id(cluster_id)
-        for snode in nodes:
-
-            # get fresh node object, something could have changed until the last for loop is reached
-            snode = db.get_storage_node_by_id(snode.get_id())
-
-            if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE,
-                                    StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]:
-                logger.info(f"Node status is: {snode.status}, skipping")
-                continue
-
-            if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation":
-                logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping")
-                continue
-
-            logger.info(f"Checking node {snode.hostname}")
-
-            # 1- check node ping
-            ping_check = health_controller._check_node_ping(snode.mgmt_ip)
-            logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
-            if not ping_check:
-                time.sleep(1)
-                ping_check = health_controller._check_node_ping(snode.mgmt_ip)
-                logger.info(f"Check 2: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
-
-            # 2- check node API
-            node_api_check = health_controller._check_node_api(snode.mgmt_ip)
-            logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
-
-            if snode.status == StorageNode.STATUS_SCHEDULABLE and not ping_check and not node_api_check:
-                continue
-
-            spdk_process = False
-            if node_api_check:
-                # 3- check spdk_process
-                spdk_process = health_controller._check_spdk_process_up(snode.mgmt_ip, snode.rpc_port)
-            logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {spdk_process}")
-
-                # 4- check rpc
-            node_rpc_check = health_controller._check_node_rpc(
-                snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2)
-            logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
-
-            if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads:
-                t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,))
+        for node in nodes:
+            node_id = node.get_id()
+            if node_id not in threads_maps or threads_maps[node_id].is_alive() is False:
+                logger.info(f"Creating thread for node {node_id}")
+                t = threading.Thread(target=loop_for_node, args=(node,))
                 t.start()
-                node_rpc_timeout_threads[snode.get_id()] = t
-
-            if ping_check and node_api_check and spdk_process and not node_rpc_check:
-                start_time = time.time()
-                while time.time() < start_time + 60:
-                    node_rpc_check = health_controller._check_node_rpc(
-                        snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2)
-                    logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
-                    if node_rpc_check:
-                        break
-
-            node_port_check = True
-
-            if spdk_process and node_rpc_check and snode.lvstore_status == "ready":
-                ports = [snode.nvmf_port]
-                if snode.lvstore_stack_secondary_1:
-                    for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()):
-                        if n.lvstore_status == "ready":
-                            ports.append(n.lvol_subsys_port)
-                if not snode.is_secondary_node:
-                    ports.append(snode.lvol_subsys_port)
-
-                for port in ports:
-                    ret = health_controller._check_port_on_node(snode, port)
-                    logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}")
-                    node_port_check &= ret
-
-                node_data_nic_ping_check = False
-                for data_nic in snode.data_nics:
-                    if data_nic.ip4_address:
-                        data_ping_check = health_controller._check_node_ping(data_nic.ip4_address)
-                        logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}")
-                        node_data_nic_ping_check |= data_ping_check
-
-                node_port_check &= node_data_nic_ping_check
-
-            cluster = db.get_cluster_by_id(cluster.get_id())
-
-            # is_node_online = ping_check and spdk_process and node_rpc_check and node_port_check
-            is_node_online =  spdk_process or node_rpc_check
-            if is_node_online:
-
-                if snode.status == StorageNode.STATUS_UNREACHABLE:
-                    if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY,
-                                          Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]:
-                        # tasks_controller.add_node_to_auto_restart(snode)
-                        set_node_online(snode)
-                        continue
-
-                if not node_port_check:
-                    if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
-                        logger.error("Port check failed")
-                        set_node_down(snode)
-                        continue
-
-                set_node_online(snode)
-
-                # # check JM device
-                # if snode.jm_device:
-                #     if snode.jm_device.status in [JMDevice.STATUS_ONLINE, JMDevice.STATUS_UNAVAILABLE]:
-                #         ret = health_controller.check_jm_device(snode.jm_device.get_id())
-                #         if ret:
-                #             logger.info(f"JM bdev is online: {snode.jm_device.get_id()}")
-                #             if snode.jm_device.status != JMDevice.STATUS_ONLINE:
-                #                 device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE)
-                #         else:
-                #             logger.error(f"JM bdev is offline: {snode.jm_device.get_id()}")
-                #             if snode.jm_device.status != JMDevice.STATUS_UNAVAILABLE:
-                #                 device_controller.set_jm_device_state(snode.jm_device.get_id(),
-                #                                                       JMDevice.STATUS_UNAVAILABLE)
-            else:
-
-                if not ping_check and not node_api_check and not spdk_process:
-                    # restart on new node
-                    storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_SCHEDULABLE)
-
-                elif ping_check and node_api_check and (not spdk_process or not node_rpc_check):
-                    # add node to auto restart
-                    if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY,
-                                          Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]:
-                        if not spdk_process and not node_rpc_check:
-                            logger.info("ping is fine, snodeapi is fine, But no spdk process and no rpc check, "
-                                        "So that we set device offline")
-                        set_node_offline(snode, set_devs_offline=(not spdk_process and not node_rpc_check))
-                        try:
-                            ret = snode.rpc_client(timeout=10).get_version()
-                            if not ret:
-                                logger.debug("False RPC response, adding node to auto restart")
-                                tasks_controller.add_node_to_auto_restart(snode)
-                        except Exception as e:
-                            logger.debug("Timeout to get RPC response, skipping restart")
-                            logger.error(e)
-
-                elif not node_port_check:
-                    if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
-                        logger.error("Port check failed")
-                        set_node_down(snode)
-
-                else:
-                    set_node_offline(snode, set_devs_offline=not spdk_process)
-
-            if ping_check and node_api_check and spdk_process and not node_rpc_check:
-                # restart spdk proxy cont
-                if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY,
-                                      Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]:
-                    logger.info(f"Restarting spdk_proxy_{snode.rpc_port} on {snode.get_id()}")
-                    snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=60, retry=1)
-                    ret, err = snode_api.spdk_proxy_restart(snode.rpc_port)
-                    if ret:
-                        logger.info(f"Restarting spdk_proxy on {snode.get_id()} successfully")
-                        continue
-                    if err:
-                        logger.error(err)
-
-        update_cluster_status(cluster_id)
-
-    logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds")
+                threads_maps[node_id] = t
+                logger.debug(threads_maps[node_id])
+
+        try:
+            update_cluster_status(cluster_id)
+            logger.debug("Iteration has been finished...")
+        except Exception:
+            logger.error("Error while updating cluster status")
     time.sleep(constants.NODE_MONITOR_INTERVAL_SEC)
diff --git a/simplyblock_core/services/tasks_runner_failed_migration.py b/simplyblock_core/services/tasks_runner_failed_migration.py
index fce4fd8ef..e3baeb7f0 100644
--- a/simplyblock_core/services/tasks_runner_failed_migration.py
+++ b/simplyblock_core/services/tasks_runner_failed_migration.py
@@ -87,8 +87,12 @@ def task_runner(task):
         qos_high_priority = False
         if db.get_cluster_by_id(snode.cluster_id).is_qos_set():
             qos_high_priority = True
-        rsp = rpc_client.distr_migration_failure_start(
-            distr_name, device.cluster_device_order, qos_high_priority, job_size=1024, jobs=constants.MIG_PARALLEL_JOBS)
+        try:
+            rsp = rpc_client.distr_migration_failure_start(
+                distr_name, device.cluster_device_order, qos_high_priority, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS)
+        except Exception as e:
+            logger.error(e)
+            rsp = False
         if not rsp:
             logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}")
             task.function_result = "Failed to start device migration task"
diff --git a/simplyblock_core/services/tasks_runner_jc_comp.py b/simplyblock_core/services/tasks_runner_jc_comp.py
index 676156af3..9e1ce2368 100644
--- a/simplyblock_core/services/tasks_runner_jc_comp.py
+++ b/simplyblock_core/services/tasks_runner_jc_comp.py
@@ -46,9 +46,9 @@
                             task.write_to_db(db.kv_store)
                             continue
 
-                        node = db.get_storage_node_by_id(task.node_id)
-
-                        if not node:
+                        try:
+                            node = db.get_storage_node_by_id(task.node_id)
+                        except KeyError:
                             task.function_result = "node not found"
                             task.status = JobSchedule.STATUS_DONE
                             task.write_to_db(db.kv_store)
@@ -57,6 +57,7 @@
                         if node.status != StorageNode.STATUS_ONLINE:
                             msg = f"Node is {node.status}, retry task"
                             logger.info(msg)
+                            task.retry += 1
                             task.function_result = msg
                             task.status = JobSchedule.STATUS_SUSPENDED
                             task.write_to_db(db.kv_store)
@@ -79,6 +80,7 @@
                                     logger.info(msg)
                                     task.function_result = msg
                                     task.status = JobSchedule.STATUS_SUSPENDED
+                                    task.retry += 1
                                     task.write_to_db(db.kv_store)
                                     continue
 
@@ -86,12 +88,16 @@
                             jm_vuid = node.jm_vuid
                             if "jm_vuid" in task.function_params:
                                 jm_vuid = task.function_params["jm_vuid"]
-                            ret, err = rpc_client.jc_compression_start(jm_vuid=jm_vuid)
+                            try:
+                                ret, err = rpc_client.jc_suspend_compression(jm_vuid=jm_vuid, suspend=False)
+                            except Exception as e:
+                                logger.error(e)
+                                continue
                             if ret:
                                 task.function_result = f"JC {node.jm_vuid} compression resumed on node"
                                 task.status = JobSchedule.STATUS_DONE
                                 task.write_to_db(db.kv_store)
-                            elif err and "code" in err and err["code"] == -2:
+                            elif err:
                                 task.function_result = f"JC {node.jm_vuid} compression not needed"
                                 task.status = JobSchedule.STATUS_DONE
                                 task.write_to_db(db.kv_store)
diff --git a/simplyblock_core/services/tasks_runner_migration.py b/simplyblock_core/services/tasks_runner_migration.py
index fb085e4aa..c00231d2c 100644
--- a/simplyblock_core/services/tasks_runner_migration.py
+++ b/simplyblock_core/services/tasks_runner_migration.py
@@ -62,16 +62,6 @@ def task_runner(task):
                 except Exception as e:
                     logger.error(f"Failed to get online since: {e}")
 
-            for dev in node.nvme_devices:
-                if dev.status not in [NVMeDevice.STATUS_ONLINE,
-                                      NVMeDevice.STATUS_FAILED_AND_MIGRATED,
-                                      NVMeDevice.STATUS_CANNOT_ALLOCATE]:
-                    task.function_result = f"Some dev status is {dev.status }, retrying"
-                    task.status = JobSchedule.STATUS_SUSPENDED
-                    task.retry += 1
-                    task.write_to_db(db.kv_store)
-                    return False
-
         task.status = JobSchedule.STATUS_RUNNING
         task.function_result = ""
         task.write_to_db(db.kv_store)
@@ -93,8 +83,12 @@ def task_runner(task):
         qos_high_priority = False
         if db.get_cluster_by_id(snode.cluster_id).is_qos_set():
             qos_high_priority = True
-        rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=1024,
-                                                         jobs=constants.MIG_PARALLEL_JOBS)
+        try:
+            rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE,
+                                                            jobs=constants.MIG_PARALLEL_JOBS)
+        except Exception as e:
+            logger.error(e)
+            rsp = False
         if not rsp:
             logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}")
             task.function_result = "Failed to start device migration task, retry later"
@@ -112,7 +106,7 @@ def task_runner(task):
             allow_all_errors = False
             for node in db.get_storage_nodes_by_cluster_id(task.cluster_id):
                 for dev in node.nvme_devices:
-                    if dev.status in [NVMeDevice.STATUS_READONLY, NVMeDevice.STATUS_CANNOT_ALLOCATE]:
+                    if dev.status in [NVMeDevice.STATUS_READONLY, NVMeDevice.STATUS_CANNOT_ALLOCATE, NVMeDevice.STATUS_FAILED]:
                         allow_all_errors = True
                         break
 
@@ -219,9 +213,12 @@ def _set_master_task_status(master_task, status):
                                     continue
                             rpc_client = RPCClient(
                                 node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=2)
-                            ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid)
-                            if err and "code" in err and err["code"] != -2:
-                                logger.info("Failed to resume JC compression adding task...")
-                                tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid)
+                            try:
+                                ret, err = rpc_client.jc_suspend_compression(jm_vuid=node.jm_vuid, suspend=False)
+                                if err:
+                                    logger.info("Failed to resume JC compression adding task...")
+                                    tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid)
+                            except Exception as e:
+                                logger.error(e)
 
     time.sleep(3)
diff --git a/simplyblock_core/services/tasks_runner_new_dev_migration.py b/simplyblock_core/services/tasks_runner_new_dev_migration.py
index f62a7f210..db4143eec 100644
--- a/simplyblock_core/services/tasks_runner_new_dev_migration.py
+++ b/simplyblock_core/services/tasks_runner_new_dev_migration.py
@@ -98,8 +98,12 @@ def task_runner(task):
         qos_high_priority = False
         if db.get_cluster_by_id(snode.cluster_id).is_qos_set():
             qos_high_priority = True
-        rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=1024,
-                                                         jobs=constants.MIG_PARALLEL_JOBS)
+        try:
+            rsp = rpc_client.distr_migration_expansion_start(
+                distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE,jobs=constants.MIG_PARALLEL_JOBS)
+        except Exception as e:
+            logger.error(f"Failed to start migration : {e}")
+            rsp = False
         if not rsp:
             logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}")
             task.function_result = "Failed to start device migration task"
diff --git a/simplyblock_core/services/tasks_runner_node_add.py b/simplyblock_core/services/tasks_runner_node_add.py
index daeba918e..263f2c73e 100644
--- a/simplyblock_core/services/tasks_runner_node_add.py
+++ b/simplyblock_core/services/tasks_runner_node_add.py
@@ -2,7 +2,7 @@
 import time
 
 
-from simplyblock_core import db_controller, storage_node_ops, utils
+from simplyblock_core import db_controller, storage_node_ops, utils, constants
 from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.cluster import Cluster
 
@@ -13,46 +13,67 @@
 db = db_controller.DBController()
 
 
-logger.info("Starting Tasks runner...")
-while True:
+def process_task(task):
+    if task.canceled:
+        task.function_result = "canceled"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return False
+
+    if task.retry >= task.max_retry:
+        task.function_result = "max retry reached"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return True
+
+    if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION:
+        task.function_result = "Cluster is in_activation, waiting"
+        task.status = JobSchedule.STATUS_NEW
+        task.write_to_db(db.kv_store)
+        return False
+
+    if task.status != JobSchedule.STATUS_RUNNING:
+        task.status = JobSchedule.STATUS_RUNNING
+        task.write_to_db(db.kv_store)
+
+    try:
+        res = storage_node_ops.add_node(**task.function_params)
+        msg = f"Node add result: {res}"
+        logger.info(msg)
+        task.function_result = msg
+        if res:
+            task.status = JobSchedule.STATUS_DONE
+        else:
+            task.retry += 1
+            task.status = JobSchedule.STATUS_SUSPENDED
+        task.write_to_db(db.kv_store)
+        return True
+    except Exception as e:
+        logger.error(e)
+        return False
+
+
+logger.info("Starting Tasks runner node add...")
 
+while True:
     clusters = db.get_clusters()
     if not clusters:
         logger.error("No clusters found!")
     else:
         for cl in clusters:
-            if cl.status == Cluster.STATUS_IN_ACTIVATION:
-                continue
-
             tasks = db.get_job_tasks(cl.get_id(), reverse=False)
             for task in tasks:
-
+                delay_seconds = constants.TASK_EXEC_INTERVAL_SEC
                 if task.function_name == JobSchedule.FN_NODE_ADD:
-                    if task.status != JobSchedule.STATUS_DONE:
-
+                    while task.status != JobSchedule.STATUS_DONE:
                         # get new task object because it could be changed from cancel task
                         task = db.get_task_by_id(task.uuid)
-
-                        if task.canceled:
-                            task.function_result = "canceled"
-                            task.status = JobSchedule.STATUS_DONE
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION:
-                            task.function_result = "Cluster is in_activation, waiting"
-                            task.status = JobSchedule.STATUS_NEW
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        if task.status != JobSchedule.STATUS_RUNNING:
-                            task.status = JobSchedule.STATUS_RUNNING
-                            task.write_to_db(db.kv_store)
-
-                        res = storage_node_ops.add_node(**task.function_params)
-                        logger.info(f"Node add result: {res}")
-                        task.function_result = str(res)
-                        task.status = JobSchedule.STATUS_DONE
-                        task.write_to_db(db.kv_store)
-
-    time.sleep(5)
+                        res = process_task(task)
+                        if res:
+                            if task.status == JobSchedule.STATUS_DONE:
+                                break
+                        else:
+                            delay_seconds *= 2
+                        time.sleep(delay_seconds)
+
+    time.sleep(30)
diff --git a/simplyblock_core/services/tasks_runner_port_allow.py b/simplyblock_core/services/tasks_runner_port_allow.py
index a39de42ab..fd706b18a 100644
--- a/simplyblock_core/services/tasks_runner_port_allow.py
+++ b/simplyblock_core/services/tasks_runner_port_allow.py
@@ -3,13 +3,12 @@
 
 
 from simplyblock_core import db_controller, utils, storage_node_ops, distr_controller
-from simplyblock_core.controllers import tcp_ports_events, health_controller
+from simplyblock_core.controllers import tcp_ports_events, health_controller, tasks_controller
 from simplyblock_core.fw_api_client import FirewallClient
 from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.cluster import Cluster
-from simplyblock_core.models.nvme_device import NVMeDevice
+from simplyblock_core.models.nvme_device import NVMeDevice, RemoteDevice
 from simplyblock_core.models.storage_node import StorageNode
-from simplyblock_core.snode_client import SNodeClient
 
 logger = utils.get_logger(__name__)
 
@@ -17,9 +16,234 @@
 db = db_controller.DBController()
 
 
+def exec_port_allow_task(task):
+    # get new task object because it could be changed from cancel task
+    task = db.get_task_by_id(task.uuid)
+
+    if task.canceled:
+        task.function_result = "canceled"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return
+
+    try:
+        node = db.get_storage_node_by_id(task.node_id)
+    except KeyError:
+        task.function_result = "node not found"
+        task.status = JobSchedule.STATUS_DONE
+        task.write_to_db(db.kv_store)
+        return
+
+    if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]:
+        msg = f"Node is {node.status}, retry task"
+        logger.info(msg)
+        task.function_result = msg
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.write_to_db(db.kv_store)
+        return
+
+    # check node ping
+    ping_check = health_controller._check_node_ping(node.mgmt_ip)
+    logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
+    if not ping_check:
+        time.sleep(1)
+        ping_check = health_controller._check_node_ping(node.mgmt_ip)
+        logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
+
+    if not ping_check:
+        msg = "Node ping is false, retry task"
+        logger.info(msg)
+        task.function_result = msg
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.write_to_db(db.kv_store)
+        return
+
+    # check node ping
+    logger.info("connect to remote devices")
+    nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id)
+    # connect to remote devs
+    try:
+        node_bdevs = node.rpc_client().get_bdevs()
+        logger.debug(node_bdevs)
+        if node_bdevs:
+            node_bdev_names = {}
+            for b in node_bdevs:
+                node_bdev_names[b['name']] = b
+                for al in b['aliases']:
+                    node_bdev_names[al] = b
+        else:
+            node_bdev_names = {}
+        remote_devices = []
+        for nd in nodes:
+            if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
+                continue
+            logger.info(f"Connecting to node {nd.get_id()}")
+            for index, dev in enumerate(nd.nvme_devices):
+
+                if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY,
+                                      NVMeDevice.STATUS_CANNOT_ALLOCATE]:
+                    logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}")
+                    continue
+
+                if not dev.alceml_bdev:
+                    raise ValueError(f"device alceml bdev not found!, {dev.get_id()}")
+
+                remote_device = RemoteDevice()
+                remote_device.uuid = dev.uuid
+                remote_device.alceml_name = dev.alceml_name
+                remote_device.node_id = dev.node_id
+                remote_device.size = dev.size
+                remote_device.nvmf_multipath = dev.nvmf_multipath
+                remote_device.status = NVMeDevice.STATUS_ONLINE
+                remote_device.remote_bdev = storage_node_ops.connect_device(
+                    f"remote_{dev.alceml_bdev}", dev, node,
+                    bdev_names=list(node_bdev_names), reattach=False)
+
+                remote_devices.append(remote_device)
+        if not remote_devices:
+            msg = "Node unable to connect to remote devs, retry task"
+            logger.info(msg)
+            task.function_result = msg
+            task.status = JobSchedule.STATUS_SUSPENDED
+            task.write_to_db(db.kv_store)
+            return
+        else:
+            node = db.get_storage_node_by_id(task.node_id)
+            node.remote_devices = remote_devices
+            node.write_to_db()
+
+        logger.info("connect to remote JM devices")
+        remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node)
+        if not remote_jm_devices or len(remote_jm_devices) < 2:
+            msg = "Node unable to connect to remote JMs, retry task"
+            logger.info(msg)
+            task.function_result = msg
+            task.status = JobSchedule.STATUS_SUSPENDED
+            task.write_to_db(db.kv_store)
+            return
+        else:
+            node = db.get_storage_node_by_id(task.node_id)
+            node.remote_jm_devices = remote_jm_devices
+            node.write_to_db()
+
+
+    except Exception as e:
+        logger.error(e)
+        msg = "Error when connect to remote devs, retry task"
+        logger.info(msg)
+        task.function_result = msg
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.write_to_db(db.kv_store)
+        return
+
+    logger.info("Sending device status event")
+    for db_dev in node.nvme_devices:
+        distr_controller.send_dev_status_event(db_dev, db_dev.status, node)
+
+    logger.info("Finished sending device status and now waiting 5s for JMs to connect")
+    time.sleep(5)
+
+    sec_node = db.get_storage_node_by_id(node.secondary_node_id)
+    snode = db.get_storage_node_by_id(node.get_id())
+    if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
+        try:
+            ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
+            if ret:
+                lvs_info = ret[0]
+                if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
+                    # is_sec_node_leader = True
+                    # check jc_compression status
+                    jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid)
+                    retries = 10
+                    while jc_compression_is_active:
+                        if retries <= 0:
+                            logger.warning("Timeout waiting for JC compression task to finish")
+                            break
+                        retries -= 1
+                        logger.info(
+                            f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds")
+                        time.sleep(60)
+                        jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(
+                            snode.jm_vuid)
+        except Exception as e:
+            logger.error(e)
+            return
+
+    if node.lvstore_status == "ready":
+        lvstore_check = health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True)
+        if not lvstore_check:
+            msg = "Node LVolStore check fail, retry later"
+            logger.warning(msg)
+            task.function_result = msg
+            task.status = JobSchedule.STATUS_SUSPENDED
+            task.write_to_db(db.kv_store)
+            return
+
+        if node.secondary_node_id:
+            primary_hublvol_check = health_controller._check_node_hublvol(node)
+            if not primary_hublvol_check:
+                msg = "Node hublvol check fail, retry later"
+                logger.warning(msg)
+                task.function_result = msg
+                task.status = JobSchedule.STATUS_SUSPENDED
+                task.write_to_db(db.kv_store)
+                return
+
+            sec_node = db.get_storage_node_by_id(node.secondary_node_id)
+            if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
+                secondary_hublvol_check = health_controller._check_sec_node_hublvol(sec_node, auto_fix=True)
+                if not secondary_hublvol_check:
+                    msg = "Secondary node hublvol check fail, retry later"
+                    logger.warning(msg)
+                    task.function_result = msg
+                    task.status = JobSchedule.STATUS_SUSPENDED
+                    task.write_to_db(db.kv_store)
+                    return
+
+    if task.status != JobSchedule.STATUS_RUNNING:
+        task.status = JobSchedule.STATUS_RUNNING
+        task.write_to_db(db.kv_store)
+
+    try:
+        # wait for lvol sync delete
+        lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id)
+        while lvol_sync_del_found:
+            logger.info("Lvol sync delete task found, waiting")
+            time.sleep(3)
+            lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id)
+
+        if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
+            sec_rpc_client = sec_node.rpc_client()
+            ret = sec_node.wait_for_jm_rep_tasks_to_finish(node.jm_vuid)
+            if not ret:
+                msg = "JM replication task found on secondary"
+                logger.warning(msg)
+                task.function_result = msg
+                task.status = JobSchedule.STATUS_SUSPENDED
+                task.write_to_db(db.kv_store)
+                return
+            sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True)
+
+    except Exception as e:
+        logger.error(e)
+        return
+
+    port_number = task.function_params["port_number"]
+    logger.info(f"Allow port {port_number} on node {node.get_id()}")
+    fw_api = FirewallClient(snode, timeout=5, retry=2)
+    port_type = "tcp"
+    if node.active_rdma:
+        port_type = "udp"
+    fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port)
+    tcp_ports_events.port_allowed(node, port_number)
+
+    task.function_result = f"Port {port_number} allowed on node"
+    task.status = JobSchedule.STATUS_DONE
+    task.write_to_db(db.kv_store)
+
+
 logger.info("Starting Tasks runner...")
 while True:
-
     clusters = db.get_clusters()
     if not clusters:
         logger.error("No clusters found!")
@@ -27,207 +251,10 @@
         for cl in clusters:
             if cl.status == Cluster.STATUS_IN_ACTIVATION:
                 continue
-
             tasks = db.get_job_tasks(cl.get_id(), reverse=False)
             for task in tasks:
-
                 if task.function_name == JobSchedule.FN_PORT_ALLOW:
                     if task.status != JobSchedule.STATUS_DONE:
-
-                        # get new task object because it could be changed from cancel task
-                        task = db.get_task_by_id(task.uuid)
-
-                        if task.canceled:
-                            task.function_result = "canceled"
-                            task.status = JobSchedule.STATUS_DONE
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        node = db.get_storage_node_by_id(task.node_id)
-
-                        if not node:
-                            task.function_result = "node not found"
-                            task.status = JobSchedule.STATUS_DONE
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]:
-                            msg = f"Node is {node.status}, retry task"
-                            logger.info(msg)
-                            task.function_result = msg
-                            task.status = JobSchedule.STATUS_SUSPENDED
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        # check node ping
-                        ping_check = health_controller._check_node_ping(node.mgmt_ip)
-                        logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
-                        if not ping_check:
-                            time.sleep(1)
-                            ping_check = health_controller._check_node_ping(node.mgmt_ip)
-                            logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
-
-                        if not ping_check:
-                            msg = "Node ping is false, retry task"
-                            logger.info(msg)
-                            task.function_result = msg
-                            task.status = JobSchedule.STATUS_SUSPENDED
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        # check node ping
-                        logger.info("connect to remote devices")
-                        nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id)
-                        # connect to remote devs
-                        try:
-                            node_bdevs = node.rpc_client().get_bdevs()
-                            logger.debug(node_bdevs)
-                            if node_bdevs:
-                                node_bdev_names = {}
-                                for b in node_bdevs:
-                                    node_bdev_names[b['name']] = b
-                                    for al in b['aliases']:
-                                        node_bdev_names[al] = b
-                            else:
-                                node_bdev_names = {}
-                            remote_devices = []
-                            for nd in nodes:
-                                if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]:
-                                    continue
-                                logger.info(f"Connecting to node {nd.get_id()}")
-                                for index, dev in enumerate(nd.nvme_devices):
-
-                                    if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY,
-                                                          NVMeDevice.STATUS_CANNOT_ALLOCATE]:
-                                        logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}")
-                                        continue
-
-                                    if not dev.alceml_bdev:
-                                        raise ValueError(f"device alceml bdev not found!, {dev.get_id()}")
-
-                                    dev.remote_bdev = storage_node_ops.connect_device(
-                                        f"remote_{dev.alceml_bdev}", dev, node,
-                                        bdev_names=list(node_bdev_names), reattach=False)
-
-                                    remote_devices.append(dev)
-                            if not remote_devices:
-                                msg = "Node unable to connect to remote devs, retry task"
-                                logger.info(msg)
-                                task.function_result = msg
-                                task.status = JobSchedule.STATUS_SUSPENDED
-                                task.write_to_db(db.kv_store)
-                                continue
-                            else:
-                                node = db.get_storage_node_by_id(task.node_id)
-                                node.remote_devices = remote_devices
-                                node.write_to_db()
-
-                            logger.info("connect to remote JM devices")
-                            remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node)
-                            if not remote_jm_devices or len(remote_jm_devices) < 2:
-                                msg = "Node unable to connect to remote JMs, retry task"
-                                logger.info(msg)
-                                task.function_result = msg
-                                task.status = JobSchedule.STATUS_SUSPENDED
-                                task.write_to_db(db.kv_store)
-                                continue
-                            else:
-                                node = db.get_storage_node_by_id(task.node_id)
-                                node.remote_jm_devices = remote_jm_devices
-                                node.write_to_db()
-
-
-                        except Exception as e:
-                            logger.error(e)
-                            msg = "Error when connect to remote devs, retry task"
-                            logger.info(msg)
-                            task.function_result = msg
-                            task.status = JobSchedule.STATUS_SUSPENDED
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        logger.info("Sending device status event")
-                        for db_dev in node.nvme_devices:
-                            distr_controller.send_dev_status_event(db_dev, db_dev.status)
-
-                        logger.info("Finished sending device status and now waiting 5s for JMs to connect")
-                        time.sleep(5)
-
-                        sec_node = db.get_storage_node_by_id(node.secondary_node_id)
-                        snode = db.get_storage_node_by_id(node.get_id())
-                        if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
-                            ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore)
-                            if ret:
-                                lvs_info = ret[0]
-                                if "lvs leadership" in lvs_info and lvs_info['lvs leadership']:
-                                    # is_sec_node_leader = True
-                                    # check jc_compression status
-                                    jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid)
-                                    retries = 10
-                                    while jc_compression_is_active:
-                                        if retries <= 0:
-                                            logger.warning("Timeout waiting for JC compression task to finish")
-                                            break
-                                        retries -= 1
-                                        logger.info(
-                                            f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds")
-                                        time.sleep(60)
-                                        jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(
-                                            snode.jm_vuid)
-
-                        lvstore_check = True
-                        if node.lvstore_status == "ready":
-                            lvstore_check &= health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True)
-                            if node.secondary_node_id:
-                                lvstore_check &= health_controller._check_node_hublvol(node)
-                                sec_node = db.get_storage_node_by_id(node.secondary_node_id)
-                                if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
-                                    lvstore_check &= health_controller._check_sec_node_hublvol(sec_node, auto_fix=True)
-
-                        if lvstore_check is False:
-                            msg = "Node LVolStore check fail, retry later"
-                            logger.warning(msg)
-                            task.function_result = msg
-                            task.status = JobSchedule.STATUS_SUSPENDED
-                            task.write_to_db(db.kv_store)
-                            continue
-
-                        if task.status != JobSchedule.STATUS_RUNNING:
-                            task.status = JobSchedule.STATUS_RUNNING
-                            task.write_to_db(db.kv_store)
-
-                        not_deleted = []
-                        for bdev_name in snode.lvol_sync_del_queue:
-                            logger.info(f"Sync delete bdev: {bdev_name} from node: {snode.get_id()}")
-                            ret, err = snode.rpc_client().delete_lvol(bdev_name, del_async=True)
-                            if not ret:
-                                if "code" in err and err["code"] == -19:
-                                    logger.error(f"Sync delete completed with error: {err}")
-                                else:
-                                    logger.error(
-                                        f"Failed to sync delete bdev: {bdev_name} from node: {snode.get_id()}")
-                                    not_deleted.append(bdev_name)
-                        snode.lvol_sync_del_queue = not_deleted
-                        snode.write_to_db()
-
-                        if sec_node and sec_node.status == StorageNode.STATUS_ONLINE:
-                            sec_rpc_client = sec_node.rpc_client()
-                            sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True)
-
-                        port_number = task.function_params["port_number"]
-                        snode_api = SNodeClient(f"{node.mgmt_ip}:5000", timeout=3, retry=2)
-
-                        logger.info(f"Allow port {port_number} on node {node.get_id()}")
-
-                        fw_api = FirewallClient(snode, timeout=5, retry=2)
-                        port_type = "tcp"
-                        if node.active_rdma:
-                            port_type = "udp"
-                        fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port)
-                        tcp_ports_events.port_allowed(node, port_number)
-
-                        task.function_result = f"Port {port_number} allowed on node"
-                        task.status = JobSchedule.STATUS_DONE
-                        task.write_to_db(db.kv_store)
+                        exec_port_allow_task(task)
 
     time.sleep(5)
diff --git a/simplyblock_core/services/tasks_runner_restart.py b/simplyblock_core/services/tasks_runner_restart.py
index 2cfc82a53..61f8c5e6b 100644
--- a/simplyblock_core/services/tasks_runner_restart.py
+++ b/simplyblock_core/services/tasks_runner_restart.py
@@ -3,6 +3,7 @@
 
 from simplyblock_core import constants, db_controller, storage_node_ops, utils
 from simplyblock_core.controllers import device_controller, health_controller, tasks_controller
+from simplyblock_core.models.cluster import Cluster
 from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.nvme_device import NVMeDevice
 from simplyblock_core.models.storage_node import StorageNode
@@ -127,18 +128,19 @@ def task_runner_device(task):
 
 
 def task_runner_node(task):
-    node = db.get_storage_node_by_id(task.node_id)
-    if task.retry >= task.max_retry:
-        task.function_result = "max retry reached"
+    try:
+        node = db.get_storage_node_by_id(task.node_id)
+    except KeyError:
+        task.function_result = "node not found"
         task.status = JobSchedule.STATUS_DONE
         task.write_to_db(db.kv_store)
-        storage_node_ops.set_node_status(task.node_id, StorageNode.STATUS_OFFLINE)
         return True
 
-    if not node:
-        task.function_result = "node not found"
+    if task.retry >= task.max_retry:
+        task.function_result = "max retry reached"
         task.status = JobSchedule.STATUS_DONE
         task.write_to_db(db.kv_store)
+        storage_node_ops.set_node_status(task.node_id, StorageNode.STATUS_OFFLINE)
         return True
 
     if node.status in [StorageNode.STATUS_REMOVED, StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]:
@@ -171,6 +173,13 @@ def task_runner_node(task):
         task.status = JobSchedule.STATUS_RUNNING
         task.write_to_db(db.kv_store)
 
+    cluster = db.get_cluster_by_id(task.cluster_id)
+    if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
+        task.function_result = f"Cluster is not active: {cluster.status}, retry"
+        task.status = JobSchedule.STATUS_SUSPENDED
+        task.write_to_db(db.kv_store)
+        return False
+
     # is node reachable?
     ping_check = health_controller._check_node_ping(node.mgmt_ip)
     logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}")
@@ -191,19 +200,26 @@ def task_runner_node(task):
         return False
 
 
-    # shutting down node
-    logger.info(f"Shutdown node {node.get_id()}")
-    ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True)
-    if ret:
-        logger.info("Node shutdown succeeded")
-
-    time.sleep(3)
+    try:
+        # shutting down node
+        logger.info(f"Shutdown node {node.get_id()}")
+        ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True)
+        if ret:
+            logger.info("Node shutdown succeeded")
+        time.sleep(3)
+    except Exception as e:
+        logger.error(e)
+        return False
 
-    # resetting node
-    logger.info(f"Restart node {node.get_id()}")
-    ret = storage_node_ops.restart_storage_node(node.get_id(), force=True)
-    if ret:
-        logger.info("Node restart succeeded")
+    try:
+        # resetting node
+        logger.info(f"Restart node {node.get_id()}")
+        ret = storage_node_ops.restart_storage_node(node.get_id(), force=True)
+        if ret:
+            logger.info("Node restart succeeded")
+    except Exception as e:
+        logger.error(e)
+        return False
 
     time.sleep(3)
     node = db.get_storage_node_by_id(task.node_id)
diff --git a/simplyblock_core/services/tasks_runner_sync_lvol_del.py b/simplyblock_core/services/tasks_runner_sync_lvol_del.py
new file mode 100644
index 000000000..bce8692c3
--- /dev/null
+++ b/simplyblock_core/services/tasks_runner_sync_lvol_del.py
@@ -0,0 +1,87 @@
+# coding=utf-8
+import time
+
+
+from simplyblock_core import db_controller, utils
+from simplyblock_core.models.job_schedule import JobSchedule
+from simplyblock_core.models.cluster import Cluster
+from simplyblock_core.models.storage_node import StorageNode
+
+logger = utils.get_logger(__name__)
+
+# get DB controller
+db = db_controller.DBController()
+
+
+logger.info("Starting Tasks runner...")
+while True:
+
+    clusters = db.get_clusters()
+    if not clusters:
+        logger.error("No clusters found!")
+    else:
+        for cl in clusters:
+            if cl.status == Cluster.STATUS_IN_ACTIVATION:
+                continue
+
+            tasks = db.get_job_tasks(cl.get_id(), reverse=False)
+            for task in tasks:
+
+                if task.function_name == JobSchedule.FN_LVOL_SYNC_DEL:
+                    if task.status != JobSchedule.STATUS_DONE:
+
+                        # get new task object because it could be changed from cancel task
+                        task = db.get_task_by_id(task.uuid)
+
+                        if task.canceled:
+                            task.function_result = "canceled"
+                            task.status = JobSchedule.STATUS_DONE
+                            task.write_to_db(db.kv_store)
+                            primary_node = db.get_storage_node_by_id(task.function_params["primary_node"])
+                            primary_node.lvol_del_sync_lock_reset()
+                            continue
+
+                        node = db.get_storage_node_by_id(task.node_id)
+
+                        if not node:
+                            task.function_result = "node not found"
+                            task.status = JobSchedule.STATUS_DONE
+                            task.write_to_db(db.kv_store)
+                            primary_node = db.get_storage_node_by_id(task.function_params["primary_node"])
+                            primary_node.lvol_del_sync_lock_reset()
+                            continue
+
+                        if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]:
+                            msg = f"Node is {node.status}, retry task"
+                            logger.info(msg)
+                            task.function_result = msg
+                            task.status = JobSchedule.STATUS_SUSPENDED
+                            task.write_to_db(db.kv_store)
+                            continue
+
+                        if task.status != JobSchedule.STATUS_RUNNING:
+                            task.status = JobSchedule.STATUS_RUNNING
+                            task.write_to_db(db.kv_store)
+
+                        lvol_bdev_name = task.function_params["lvol_bdev_name"]
+
+                        logger.info(f"Sync delete bdev: {lvol_bdev_name} from node: {node.get_id()}")
+                        ret, err = node.rpc_client().delete_lvol(lvol_bdev_name, del_async=True)
+                        if not ret:
+                            if "code" in err and err["code"] == -19:
+                                logger.error(f"Sync delete completed with error: {err}")
+                            else:
+                                msg =  f"Failed to sync delete bdev: {lvol_bdev_name} from node: {node.get_id()}"
+                                logger.error(msg)
+                                task.function_result = msg
+                                task.status = JobSchedule.STATUS_SUSPENDED
+                                task.write_to_db(db.kv_store)
+                                continue
+
+                        task.function_result = f"bdev {lvol_bdev_name} deleted"
+                        task.status = JobSchedule.STATUS_DONE
+                        task.write_to_db(db.kv_store)
+                        primary_node = db.get_storage_node_by_id(task.function_params["primary_node"])
+                        primary_node.lvol_del_sync_lock_reset()
+
+    time.sleep(3)
diff --git a/simplyblock_core/snode_client.py b/simplyblock_core/snode_client.py
index 2e8504b08..ba9e8b2ad 100644
--- a/simplyblock_core/snode_client.py
+++ b/simplyblock_core/snode_client.py
@@ -40,8 +40,7 @@ def _request(self, method, path, payload=None):
             response = self.session.request(method, self.url+path, data=data,
                                             timeout=self.timeout, params=params)
         except Exception as e:
-            logger.error("Request failed: %s", e)
-            raise e
+            raise SNodeClientException(str(e))
 
         logger.debug("Response: status_code: %s, content: %s",
                      response.status_code, response.content)
@@ -69,11 +68,14 @@ def _request(self, method, path, payload=None):
         if ret_code == 422:
             raise SNodeClientException(f"Request validation failed: '{response.text}'")
 
-        logger.error("Unknown http status: %s", ret_code)
-        return None, None
+        raise SNodeClientException(f"Unknown http status: {ret_code}")
 
     def is_live(self):
-        return self._request("GET", "/check")
+        try:
+            return self._request("GET", "check")
+        except SNodeClientException:
+            logger.warning("Failed to call snode/check, trying snode/info")
+            return self.info()
 
     def info(self):
         return self._request("GET", "info")
@@ -81,7 +83,7 @@ def info(self):
     def spdk_process_start(self, l_cores, spdk_mem, spdk_image=None, spdk_debug=None, cluster_ip=None,
                            fdb_connection=None, namespace=None, server_ip=None, rpc_port=None,
                            rpc_username=None, rpc_password=None, multi_threading_enabled=False, timeout=0, ssd_pcie=None,
-                           total_mem=None, system_mem=None, cluster_mode=None):
+                           total_mem=None, system_mem=None, cluster_mode=None, socket=0, cluster_id=None, firewall_port=0):
         params = {
             "cluster_ip": cluster_ip,
             "server_ip": server_ip,
@@ -113,6 +115,13 @@ def spdk_process_start(self, l_cores, spdk_mem, spdk_image=None, spdk_debug=None
             params["system_mem"] = system_mem
         if cluster_mode:
             params["cluster_mode"] = cluster_mode
+        params["socket"] = socket
+
+        if cluster_id:
+            params["cluster_id"] = cluster_id
+        if firewall_port:
+            params["firewall_port"] = firewall_port
+        params["socket"] = socket
         return self._request("POST", "spdk_process_start", params)
 
     def join_swarm(self, cluster_ip, join_token, db_connection, cluster_id):
@@ -124,8 +133,8 @@ def join_swarm(self, cluster_ip, join_token, db_connection, cluster_id):
         #     "db_connection": db_connection}
         # return self._request("POST", "join_swarm", params)
 
-    def spdk_process_kill(self, rpc_port):
-        return self._request("GET", "spdk_process_kill", {"rpc_port": rpc_port})
+    def spdk_process_kill(self, rpc_port, cluster_id=None):
+        return self._request("GET", "spdk_process_kill", {"rpc_port": rpc_port, "cluster_id": cluster_id})
 
     def leave_swarm(self):
         return True
@@ -148,12 +157,16 @@ def bind_device_to_nvme(self, device_pci):
         params = {"device_pci": device_pci}
         return self._request("POST", "bind_device_to_nvme", params)
 
+    def format_device_with_4k(self, device_pci):
+        params = {"device_pci": device_pci}
+        return self._request("POST", "format_device_with_4k", params)
+
     def bind_device_to_spdk(self, device_pci):
         params = {"device_pci": device_pci}
         return self._request("POST", "bind_device_to_spdk", params)
 
-    def spdk_process_is_up(self, rpc_port):
-        params = {"rpc_port": rpc_port}
+    def spdk_process_is_up(self, rpc_port, cluster_id):
+        params = {"rpc_port": rpc_port, "cluster_id": cluster_id}
         return self._request("GET", "spdk_process_is_up", params)
 
     def get_file_content(self, file_name):
@@ -172,4 +185,11 @@ def ifc_is_roce(self, nic):
 
     def ifc_is_tcp(self, nic):
         params = {"nic": nic}
-        return self._request("GET", "ifc_is_tcp", params)
\ No newline at end of file
+        return self._request("GET", "ifc_is_tcp", params)
+    def nvme_connect(self, ip, port, nqn):
+        params = {"ip": ip, "port": port, "nqn": nqn}
+        return self._request("POST", "nvme_connect", params)
+
+    def disconnect_nqn(self, nqn):
+        params = {"nqn": nqn}
+        return self._request("POST", "disconnect_nqn", params)
diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py
index 3d32dd17a..1b2b23c0d 100644
--- a/simplyblock_core/storage_node_ops.py
+++ b/simplyblock_core/storage_node_ops.py
@@ -1,7 +1,6 @@
 # coding=utf- 8
 import datetime
 import json
-import os
 import platform
 import socket
 
@@ -27,15 +26,18 @@
 from simplyblock_core.models.iface import IFace
 from simplyblock_core.models.job_schedule import JobSchedule
 from simplyblock_core.models.lvol_model import LVol
-from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice
+from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice
 from simplyblock_core.models.snapshot import SnapShot
 from simplyblock_core.models.storage_node import StorageNode
 from simplyblock_core.models.cluster import Cluster
+from simplyblock_core.prom_client import PromClient
 from simplyblock_core.rpc_client import RPCClient, RPCException
 from simplyblock_core.snode_client import SNodeClient, SNodeClientException
 from simplyblock_web import node_utils
 from simplyblock_core.utils import addNvmeDevices
 from simplyblock_core.utils import pull_docker_image_with_retry
+import os
+
 
 logger = utils.get_logger(__name__)
 
@@ -57,73 +59,88 @@ def connect_device(name: str, device: NVMeDevice, node: StorageNode, bdev_names:
 
     rpc_client = node.rpc_client()
     # check connection status
-    if device.connecting_from_node and device.connecting_from_node != node.get_id():
+    if device.is_connection_in_progress_to_node(node.get_id()):
         logger.warning("This device is being connected to from other node, sleep for 5 seconds")
         time.sleep(5)
 
-    device.connecting_from_node = node.get_id()
-    device.write_to_db()
+    device.lock_device_connection(node.get_id())
 
     ret = rpc_client.bdev_nvme_controller_list(name)
     if ret:
-        for controller in ret[0]["ctrlrs"]:
-            controller_state = controller["state"]
-            logger.info(f"Controller found: {name}, status: {controller_state}")
-            if controller_state == "deleting":
-                raise RuntimeError(f"Controller: {name}, status is {controller_state}")
-
-        if reattach:
-            rpc_client.bdev_nvme_detach_controller(name)
-            time.sleep(1)
-
-    bdev_name = None
-
-    db_ctrl=DBController()
-    node=db_ctrl.get_storage_node_by_id(device.node_id)
-    if node.active_rdma:
-        tr_type="RDMA"
-    else:
-        if node.active_tcp:
-            tr_type="TCP"
+        counter = 0
+        while (counter < 5):
+            waiting = False
+            for controller in ret[0]["ctrlrs"]:
+                controller_state = controller["state"]
+                logger.info(f"Controller found: {name}, status: {controller_state}")
+                if controller_state== "failed":
+                    # we can remove the controller only for certain, if its failed. other states are intermediate and require retry.
+                    rpc_client.bdev_nvme_detach_controller(name)
+                    time.sleep(2)
+                    break
+                elif controller_state == "resetting" or controller_state == "deleting" or controller_state == "reconnect_is_delayed":
+                    if counter < 5:
+                        time.sleep(2)
+                        waiting = True
+                        break
+                    else:  # this should never happen. It means controller is "hanging" in an intermediate state for more than 10 seconds. usually if some io is hanging.
+                        raise RuntimeError(f"Controller: {name}, status is {controller_state}")
+            if not waiting:
+                counter = 5
+            else:
+                counter += 1
+
+        # if reattach:
+        #    rpc_client.bdev_nvme_detach_controller(name)
+        #    time.sleep(1)
+
+    # only if the controller is really gone we try to reattach it
+    if not rpc_client.bdev_nvme_controller_list(name):
+        bdev_name = None
+
+        db_ctrl = DBController()
+        node = db_ctrl.get_storage_node_by_id(device.node_id)
+        if node.active_rdma:
+            tr_type = "RDMA"
         else:
-            msg="target node to connect has no active fabric."
-            logger.error(msg)
-            raise RuntimeError(msg)
+            if node.active_tcp:
+                tr_type = "TCP"
+            else:
+                msg = "target node to connect has no active fabric."
+                logger.error(msg)
+                raise RuntimeError(msg)
 
-    for ip in device.nvmf_ip.split(","):
-        ret = rpc_client.bdev_nvme_attach_controller(
-                name, device.nvmf_nqn, ip, device.nvmf_port,tr_type,
+        for ip in device.nvmf_ip.split(","):
+            ret = rpc_client.bdev_nvme_attach_controller(
+                name, device.nvmf_nqn, ip, device.nvmf_port, tr_type,
                 multipath=device.nvmf_multipath)
-        if not bdev_name and ret and isinstance(ret, list):
-            bdev_name = ret[0]
-
-        if device.nvmf_multipath:
-            rpc_client.bdev_nvme_set_multipath_policy(bdev_name, "active_active")
+            if not bdev_name and ret and isinstance(ret, list):
+                bdev_name = ret[0]
 
-    # wait 5 seconds after controller attach
-    time.sleep(5)
+            if device.nvmf_multipath:
+                rpc_client.bdev_nvme_set_multipath_policy(bdev_name, "active_active")
 
-    if not bdev_name:
-        msg = "Bdev name not returned from controller attach"
-        logger.error(msg)
-        raise RuntimeError(msg)
-    bdev_found = False
-    for i in range(5):
-        ret = rpc_client.get_bdevs(bdev_name)
-        if ret:
-            bdev_found = True
-            break
-        else:
-            time.sleep(1)
+        if not bdev_name:
+            msg = "Bdev name not returned from controller attach"
+            logger.error(msg)
+            raise RuntimeError(msg)
+        bdev_found = False
+        for i in range(5):
+            ret = rpc_client.get_bdevs(bdev_name)
+            if ret:
+                bdev_found = True
+                break
+            else:
+                time.sleep(1)
 
-    device.connecting_from_node = ""
-    device.write_to_db()
+        device.release_device_connection()
 
-    if not bdev_found:
-        logger.error("Bdev not found after 5 attempts")
-        raise RuntimeError(f"Failed to connect to device: {device.get_id()}")
+        if not bdev_found:
+            logger.error("Bdev not found after 5 attempts")
+            raise RuntimeError(f"Failed to connect to device: {device.get_id()}")
 
-    return bdev_name
+        return bdev_name
+    return None
 
 
 def get_next_cluster_device_order(db_controller, cluster_id):
@@ -170,15 +187,24 @@ def _search_for_partitions(rpc_client, nvme_device):
 
 
 def _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart):
-    raid_bdev = f"raid_jm_{snode.get_id()}"
-    if len(jm_nvme_bdevs) > 1:
-        raid_level = "1"
-        ret = rpc_client.bdev_raid_create(raid_bdev, jm_nvme_bdevs, raid_level)
-        if not ret:
-            logger.error(f"Failed to create raid_jm_{snode.get_id()}")
-            return False
+    if snode.jm_device and snode.jm_device.raid_bdev:
+        raid_bdev = snode.jm_device.raid_bdev
+        if raid_bdev.startswith("raid_jm_"):
+            raid_level = "1"
+            ret = rpc_client.bdev_raid_create(raid_bdev, jm_nvme_bdevs, raid_level)
+            if not ret:
+                logger.error(f"Failed to create raid_jm_{snode.get_id()}")
+                return False
     else:
-        raid_bdev = jm_nvme_bdevs[0]
+        if len(jm_nvme_bdevs) > 1:
+            raid_bdev = f"raid_jm_{snode.get_id()}"
+            raid_level = "1"
+            ret = rpc_client.bdev_raid_create(raid_bdev, jm_nvme_bdevs, raid_level)
+            if not ret:
+                logger.error(f"Failed to create raid_jm_{snode.get_id()}")
+                return False
+        else:
+            raid_bdev = jm_nvme_bdevs[0]
 
     alceml_id = snode.get_id()
     alceml_name = f"alceml_jm_{snode.get_id()}"
@@ -224,9 +250,9 @@ def _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart):
             return False
 
         for iface in snode.data_nics:
-                logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address))
-                ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port)
-                ip_list.append(iface.ip4_address)
+            logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address))
+            ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port)
+            ip_list.append(iface.ip4_address)
 
     if len(ip_list) > 1:
         IP = ",".join(ip_list)
@@ -413,8 +439,8 @@ def _create_storage_device_stack(rpc_client, nvme, snode, after_restart):
     return nvme
 
 
-def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, jm_percent, partition_size=0):
-    nbd_device = rpc_client.nbd_start_disk(nvme.nvme_bdev)
+def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, jm_percent, partition_size, nbd_index):
+    nbd_device = rpc_client.nbd_start_disk(nvme.nvme_bdev, f"/dev/nbd{nbd_index}")
     time.sleep(3)
     if not nbd_device:
         logger.error("Failed to start nbd dev")
@@ -431,9 +457,15 @@ def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, j
         return False
     time.sleep(3)
     rpc_client.nbd_stop_disk(nbd_device)
-    time.sleep(1)
+    for i in range(10):
+        if not rpc_client.nbd_get_disks(nbd_device):
+            break
+        time.sleep(1)
     rpc_client.bdev_nvme_detach_controller(nvme.nvme_controller)
-    time.sleep(1)
+    for i in range(10):
+        if not rpc_client.bdev_nvme_controller_list(nvme.nvme_controller):
+            break
+        time.sleep(1)
     try:
         rpc_client.bdev_nvme_controller_attach(nvme.nvme_controller, nvme.pcie_address)
     except RPCException as e:
@@ -447,79 +479,84 @@ def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, j
 
 def _prepare_cluster_devices_partitions(snode, devices):
     db_controller = DBController()
-    rpc_client = RPCClient(
-        snode.mgmt_ip, snode.rpc_port,
-        snode.rpc_username, snode.rpc_password)
-
     new_devices = []
-    jm_devices = []
-    dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id)
-    bdevs_names = [d['name'] for d in rpc_client.get_bdevs()]
+    devices_to_partition = []
+    thread_list = []
     for index, nvme in enumerate(devices):
         if nvme.status == "not_found":
             continue
-
         if nvme.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_NEW]:
             logger.debug(f"Device is skipped: {nvme.get_id()}, status: {nvme.status}")
             new_devices.append(nvme)
             continue
-
         if nvme.is_partition:
-            dev_part = f"{nvme.nvme_bdev[:-2]}p1"
-            if dev_part in bdevs_names:
-                if dev_part not in jm_devices:
-                    jm_devices.append(dev_part)
-
-            new_device = _create_storage_device_stack(rpc_client, nvme, snode, after_restart=False)
-            if not new_device:
-                logger.error("failed to create dev stack")
-                return False
-            new_devices.append(new_device)
-            if new_device.status == NVMeDevice.STATUS_ONLINE:
-                device_events.device_create(new_device)
-
+            t = threading.Thread(target=_create_storage_device_stack, args=(snode.rpc_client(), nvme, snode, False,))
+            thread_list.append(t)
+            new_devices.append(nvme)
+            t.start()
         else:
-            # look for partitions
-            partitioned_devices = _search_for_partitions(rpc_client, nvme)
-            logger.debug("partitioned_devices")
-            logger.debug(partitioned_devices)
-            if len(partitioned_devices) == (1 + snode.num_partitions_per_dev):
-                logger.info("Partitioned devices found")
-            else:
+            devices_to_partition.append(nvme)
+            partitioned_devices = _search_for_partitions(snode.rpc_client(), nvme)
+            if len(partitioned_devices) != (1 + snode.num_partitions_per_dev):
                 logger.info(f"Creating partitions for {nvme.nvme_bdev}")
-                _create_device_partitions(rpc_client, nvme, snode, snode.num_partitions_per_dev, snode.jm_percent,
-                                          snode.partition_size)
-                partitioned_devices = _search_for_partitions(rpc_client, nvme)
-                if len(partitioned_devices) == (1 + snode.num_partitions_per_dev):
-                    logger.info("Device partitions created")
-                else:
-                    logger.error("Failed to create partitions")
-                    return False
+                t = threading.Thread(
+                    target=_create_device_partitions,
+                    args=(snode.rpc_client(), nvme, snode, snode.num_partitions_per_dev,
+                          snode.jm_percent, snode.partition_size, index + 1,))
+                thread_list.append(t)
+                t.start()
 
-            jm_devices.append(partitioned_devices.pop(0).nvme_bdev)
+    for thread in thread_list:
+        thread.join()
 
+    thread_list = []
+    for nvme in devices_to_partition:
+        partitioned_devices = _search_for_partitions(snode.rpc_client(), nvme)
+        if len(partitioned_devices) == (1 + snode.num_partitions_per_dev):
+            logger.info("Device partitions created")
+            # remove 1st partition for jm
+            partitioned_devices.pop(0)
             for dev in partitioned_devices:
-                ret = _create_storage_device_stack(rpc_client, dev, snode, after_restart=False)
-                if not ret:
-                    logger.error("failed to create dev stack")
-                    return False
-                if dev.status == NVMeDevice.STATUS_ONLINE:
-                    if dev.cluster_device_order < 0:
-                        dev.cluster_device_order = dev_order
-                        dev_order += 1
-                    device_events.device_create(dev)
+                t = threading.Thread(target=_create_storage_device_stack,
+                                     args=(snode.rpc_client(), dev, snode, False,))
+                thread_list.append(t)
                 new_devices.append(dev)
+                t.start()
+        else:
+            logger.error("Failed to create partitions")
+            return False
 
-    snode.nvme_devices = new_devices
+    for thread in thread_list:
+        thread.join()
+
+    # assign device order
+    dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id)
+    for nvme in new_devices:
+        if nvme.status == NVMeDevice.STATUS_ONLINE:
+            if nvme.cluster_device_order < 0:
+                nvme.cluster_device_order = dev_order
+                dev_order += 1
+        device_events.device_create(nvme)
+
+    # create jm device
+    jm_devices = []
+    bdevs_names = [d['name'] for d in snode.rpc_client().get_bdevs()]
+    for nvme in new_devices:
+        if nvme.status == NVMeDevice.STATUS_ONLINE:
+            dev_part = f"{nvme.nvme_bdev[:-2]}p1"
+            if dev_part in bdevs_names:
+                if dev_part not in jm_devices:
+                    jm_devices.append(dev_part)
 
     if jm_devices:
-        jm_device = _create_jm_stack_on_raid(rpc_client, jm_devices, snode, after_restart=False)
+        jm_device = _create_jm_stack_on_raid(snode.rpc_client(), jm_devices, snode, after_restart=False)
         if not jm_device:
             logger.error("Failed to create JM device")
             return False
 
         snode.jm_device = jm_device
 
+    snode.nvme_devices = new_devices
     return True
 
 
@@ -599,7 +636,7 @@ def _prepare_cluster_devices_on_restart(snode, clear_data=False):
 
     # prepare JM device
     jm_device = snode.jm_device
-    if jm_device is None or jm_device.status == JMDevice.STATUS_REMOVED:
+    if jm_device is None:
         return True
 
     if not jm_device or not jm_device.uuid:
@@ -608,20 +645,36 @@ def _prepare_cluster_devices_on_restart(snode, clear_data=False):
     jm_device.status = JMDevice.STATUS_UNAVAILABLE
 
     if jm_device.jm_nvme_bdev_list:
-        all_bdevs_found = True
-        for bdev_name in jm_device.jm_nvme_bdev_list:
-            ret = rpc_client.get_bdevs(bdev_name)
+        if len(jm_device.jm_nvme_bdev_list) == 1:
+            ret = rpc_client.get_bdevs(jm_device.jm_nvme_bdev_list[0])
             if not ret:
-                logger.error(f"BDev not found: {bdev_name}")
-                all_bdevs_found = False
-                break
-
-        if all_bdevs_found:
+                logger.error(f"BDev not found: {jm_device.jm_nvme_bdev_list[0]}")
+                jm_device.status = JMDevice.STATUS_REMOVED
+                return True
             ret = _create_jm_stack_on_raid(rpc_client, jm_device.jm_nvme_bdev_list, snode, after_restart=not clear_data)
             if not ret:
                 logger.error("Failed to create JM device")
                 return False
+            return True
+
+        jm_bdevs_found = []
+        for bdev_name in jm_device.jm_nvme_bdev_list:
+            ret = rpc_client.get_bdevs(bdev_name)
+            if ret:
+                logger.info(f"JM bdev found: {bdev_name}")
+                jm_bdevs_found.append(bdev_name)
+            else:
+                logger.error(f"JM bdev not found: {bdev_name}")
 
+        if len(jm_bdevs_found) > 1:
+            ret = _create_jm_stack_on_raid(rpc_client, jm_bdevs_found, snode, after_restart=not clear_data)
+            if not ret:
+                logger.error("Failed to create JM device")
+                return False
+        else:
+            logger.error("Only one jm nvme bdev found, setting jm device to removed")
+            jm_device.status = JMDevice.STATUS_REMOVED
+            return True
 
     else:
         nvme_bdev = jm_device.nvme_bdev
@@ -684,7 +737,7 @@ def _connect_to_remote_devs(
 
     rpc_client = RPCClient(
         this_node.mgmt_ip, this_node.rpc_port,
-        this_node.rpc_username, this_node.rpc_password, timeout=3, retry=1)
+        this_node.rpc_username, this_node.rpc_password, timeout=5, retry=1)
 
     node_bdevs = rpc_client.get_bdevs()
     if node_bdevs:
@@ -701,6 +754,8 @@ def _connect_to_remote_devs(
         allowed_node_statuses.append(StorageNode.STATUS_RESTARTING)
         allowed_dev_statuses.append(NVMeDevice.STATUS_UNAVAILABLE)
 
+    devices_to_connect = []
+    connect_threads = []
     nodes = db_controller.get_storage_nodes_by_cluster_id(this_node.cluster_id)
     # connect to remote devs
     for node_index, node in enumerate(nodes):
@@ -715,12 +770,36 @@ def _connect_to_remote_devs(
 
             if not dev.alceml_bdev:
                 raise ValueError(f"device alceml bdev not found!, {dev.get_id()}")
+            devices_to_connect.append(dev)
+            t = threading.Thread(
+                target=connect_device,
+                args=(f"remote_{dev.alceml_bdev}", dev, this_node, node_bdev_names, reattach,))
+            connect_threads.append(t)
+            t.start()
 
-            dev.remote_bdev = connect_device(
-                    f"remote_{dev.alceml_bdev}", dev, this_node,
-                    bdev_names=node_bdev_names, reattach=reattach,
-            )
-            remote_devices.append(dev)
+    for t in connect_threads:
+        t.join()
+
+    node_bdevs = rpc_client.get_bdevs()
+    if node_bdevs:
+        node_bdev_names = [b['name'] for b in node_bdevs]
+
+    for dev in devices_to_connect:
+        remote_bdev = RemoteDevice()
+        remote_bdev.uuid = dev.uuid
+        remote_bdev.alceml_name = dev.alceml_name
+        remote_bdev.node_id = dev.node_id
+        remote_bdev.size = dev.size
+        remote_bdev.status = NVMeDevice.STATUS_ONLINE
+        remote_bdev.nvmf_multipath = dev.nvmf_multipath
+        for bdev in node_bdev_names:
+            if bdev.startswith(f"remote_{dev.alceml_bdev}"):
+                remote_bdev.remote_bdev = bdev
+                break
+        if not remote_bdev.remote_bdev:
+            logger.error(f"Failed to connect to remote device {dev.alceml_name}")
+            continue
+        remote_devices.append(remote_bdev)
 
     return remote_devices
 
@@ -759,6 +838,10 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None):
             if jm_dev and jm_dev not in remote_devices:
                 remote_devices.append(jm_dev)
 
+    logger.debug(f"remote_devices: {remote_devices}")
+    allowed_node_statuses = [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN, StorageNode.STATUS_RESTARTING]
+    allowed_dev_statuses = [NVMeDevice.STATUS_ONLINE]
+
     new_devs = []
     for jm_dev in remote_devices:
         if not jm_dev.jm_bdev:
@@ -775,17 +858,34 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None):
         if not org_dev or org_dev in new_devs or org_dev_node and org_dev_node.get_id() == this_node.get_id():
             continue
 
+        if org_dev_node is not None and org_dev_node.status not in allowed_node_statuses:
+            logger.warning(f"Skipping node:{org_dev_node.get_id()} with status: {org_dev_node.status}")
+            continue
+
+        if org_dev is not None and org_dev.status not in allowed_dev_statuses:
+            logger.warning(f"Skipping device:{org_dev.get_id()} with status: {org_dev.status}")
+            continue
+
+        remote_device = RemoteJMDevice()
+        remote_device.uuid = org_dev.uuid
+        remote_device.alceml_name = org_dev.alceml_name
+        remote_device.node_id = org_dev.node_id
+        remote_device.size = org_dev.size
+        remote_device.jm_bdev = org_dev.jm_bdev
+        remote_device.status = NVMeDevice.STATUS_ONLINE
+        remote_device.nvmf_multipath = org_dev.nvmf_multipath
         try:
-            org_dev.remote_bdev = connect_device(
-                    f"remote_{org_dev.jm_bdev}", org_dev, this_node,
-                    bdev_names=node_bdev_names, reattach=True,
+            remote_device.remote_bdev = connect_device(
+                f"remote_{org_dev.jm_bdev}", org_dev, this_node,
+                bdev_names=node_bdev_names, reattach=True,
             )
         except RuntimeError:
             logger.error(f'Failed to connect to {org_dev.get_id()}')
-        new_devs.append(org_dev)
+        new_devs.append(remote_device)
 
     return new_devs
 
+
 def ifc_is_tcp(nic):
     addrs = psutil.net_if_addrs().get(nic, [])
     for addr in addrs:
@@ -793,6 +893,7 @@ def ifc_is_tcp(nic):
             return True
     return False
 
+
 def ifc_is_roce(nic):
     rdma_path = "/sys/class/infiniband/"
     if not os.path.exists(rdma_path):
@@ -806,12 +907,14 @@ def ifc_is_roce(nic):
                     return True
     return False
 
-def add_node(cluster_id, node_addr, iface_name,data_nics_list,
+
+def add_node(cluster_id, node_addr, iface_name, data_nics_list,
              max_snap, spdk_image=None, spdk_debug=False,
              small_bufsize=0, large_bufsize=0,
              num_partitions_per_dev=0, jm_percent=0, enable_test_device=False,
-             namespace=None, enable_ha_jm=False, id_device_by_nqn=False,
-             partition_size="", ha_jm_count=3):
+             namespace=None, enable_ha_jm=False, cr_name=None, cr_namespace=None, cr_plural=None,
+             id_device_by_nqn=False, partition_size="", ha_jm_count=3, format_4k=False):
+
     snode_api = SNodeClient(node_addr)
     node_info, _ = snode_api.info()
     if node_info.get("nodes_config") and node_info["nodes_config"].get("nodes"):
@@ -887,6 +990,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         app_thread_core = node_config.get("distribution").get("app_thread_core")
         jm_cpu_core = node_config.get("distribution").get("jm_cpu_core")
         number_of_distribs = node_config.get("number_of_distribs")
+        lvol_poller_core =  node_config.get("distribution").get("lvol_poller_core")
+        lvol_poller_mask = utils.generate_mask(lvol_poller_core)
 
         pollers_mask = utils.generate_mask(poller_cpu_cores)
         app_thread_mask = utils.generate_mask(app_thread_core)
@@ -896,9 +1001,10 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         jm_cpu_mask = utils.generate_mask(jm_cpu_core)
 
         # Calculate pool count
-        max_prov = int(utils.parse_size(node_config.get("max_size")))
-
-        if max_prov <= 0:
+        max_prov = 0
+        if node_config.get("max_size"):
+            max_prov = int(utils.parse_size(node_config.get("max_size")))
+        if max_prov < 0:
             logger.error(f"Incorrect max-prov value {max_prov}")
             return False
 
@@ -910,6 +1016,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
 
         minimum_hp_memory = node_config.get("huge_page_memory")
 
+        minimum_hp_memory = max(minimum_hp_memory, max_prov)
+
         # check for memory
         if "memory_details" in node_info and node_info['memory_details']:
             memory_details = node_info['memory_details']
@@ -918,7 +1026,7 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
             logger.info(f"Free: {utils.humanbytes(memory_details['free'])}")
             logger.info(f"huge_total: {utils.humanbytes(memory_details['huge_total'])}")
             logger.info(f"huge_free: {utils.humanbytes(memory_details['huge_free'])}")
-            logger.info(f"Minimum required huge pages memory is : {utils.humanbytes(minimum_hp_memory)}")
+            logger.info(f"Set huge pages memory is : {utils.humanbytes(minimum_hp_memory)}")
         else:
             logger.error("Cannot get memory info from the instance.. Exiting")
             return False
@@ -926,14 +1034,15 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         # Calculate minimum sys memory
         minimum_sys_memory = node_config.get("sys_memory")
 
-        satisfied, spdk_mem = utils.calculate_spdk_memory(minimum_hp_memory,
-                                                          minimum_sys_memory,
-                                                          int(memory_details['free']),
-                                                          int(memory_details['huge_total']))
+        # satisfied, spdk_mem = utils.calculate_spdk_memory(minimum_hp_memory,
+        #                                                  minimum_sys_memory,
+        #                                                  int(memory_details['free']),
+        #                                                  int(memory_details['huge_total']))
         max_lvol = node_config.get("max_lvol")
-        if not satisfied:
-            logger.warning(
-                f"Not enough memory for the provided max_lvo: {max_lvol}, max_prov: {max_prov}..")
+
+        # if not satisfied:
+        #    logger.warning(
+        #        f"Not enough memory for the provided max_lvo: {max_lvol}, max_prov: {max_prov}..")
         ssd_pcie = node_config.get("ssd_pcis")
 
         if ssd_pcie:
@@ -962,6 +1071,7 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         else:
             cluster_ip = utils.get_k8s_node_ip()
 
+        firewall_port = utils.get_next_fw_port(cluster_id)
         rpc_port = utils.get_next_rpc_port(cluster_id)
         rpc_user, rpc_pass = utils.generate_rpc_user_and_pass()
         mgmt_info = utils.get_mgmt_ip(node_info, iface_name)
@@ -980,17 +1090,20 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
             if log_config_type and log_config_type != LogConfig.types.GELF:
                 logger.info("SNodeAPI container found but not configured with gelf logger")
                 start_storage_node_api_container(mgmt_ip, cluster_ip)
+        node_socket = node_config.get("socket")
 
         total_mem = minimum_hp_memory
         for n in db_controller.get_storage_nodes_by_cluster_id(cluster_id):
-            if n.api_endpoint == node_addr:
-                total_mem += n.spdk_mem
-        total_mem += utils.parse_size("500m")
+            if n.api_endpoint == node_addr and n.socket == node_socket:
+                total_mem += (n.spdk_mem + 500000000)
+
         logger.info("Deploying SPDK")
-        results = None
         l_cores = node_config.get("l-cores")
         spdk_cpu_mask = node_config.get("cpu_mask")
         for ssd in ssd_pcie:
+            if format_4k:
+                snode_api.format_device_with_4k(ssd)
+                snode_api.bind_device_to_spdk(ssd)
             snode_api.bind_device_to_spdk(ssd)
         try:
             results, err = snode_api.spdk_process_start(
@@ -998,7 +1111,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
                 namespace, mgmt_ip, rpc_port, rpc_user, rpc_pass,
                 multi_threading_enabled=constants.SPDK_PROXY_MULTI_THREADING_ENABLED,
                 timeout=constants.SPDK_PROXY_TIMEOUT,
-                ssd_pcie=ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode)
+                ssd_pcie=ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode,
+               socket=node_socket, cluster_id=cluster_id, firewall_port=firewall_port)
             time.sleep(5)
 
         except Exception as e:
@@ -1011,8 +1125,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
 
         data_nics = []
 
-        active_tcp=False
-        active_rdma=False
+        active_tcp = False
+        active_rdma = False
         fabric_tcp = cluster.fabric_tcp
         fabric_rdma = cluster.fabric_rdma
         names = data_nics_list or [mgmt_iface]
@@ -1021,17 +1135,17 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         logger.debug(f"Data nics ports are: {names}")
         for nic in names:
             device = node_info['network_interface'][nic]
-            base_ifc_cfg={
-                      'uuid': str(uuid.uuid4()),
-                      'if_name': nic,
-                      'ip4_address': device['ip'],
-                      'status': device['status'],
-                       'net_type': device['net_type'],}
+            base_ifc_cfg = {
+                'uuid': str(uuid.uuid4()),
+                'if_name': nic,
+                'ip4_address': device['ip'],
+                'status': device['status'],
+                'net_type': device['net_type'], }
             if fabric_rdma and snode_api.ifc_is_roce(nic):
                 cfg = base_ifc_cfg.copy()
                 cfg['trtype'] = "RDMA"
                 data_nics.append(IFace(cfg))
-                active_rdma=True
+                active_rdma = True
                 if fabric_tcp and snode_api.ifc_is_tcp(nic):
                     active_tcp = True
             elif fabric_tcp and snode_api.ifc_is_tcp(nic):
@@ -1061,6 +1175,9 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         snode.cloud_name = cloud_instance['cloud'] or ""
 
         snode.namespace = namespace
+        snode.cr_name = cr_name
+        snode.cr_namespace = cr_namespace
+        snode.cr_plural = cr_plural
         snode.ssd_pcie = ssd_pcie
         snode.hostname = hostname
         snode.host_nqn = subsystem_nqn
@@ -1080,8 +1197,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         snode.enable_ha_jm = enable_ha_jm
         snode.ha_jm_count = ha_jm_count
         snode.minimum_sys_memory = minimum_sys_memory
-        snode.active_tcp=active_tcp
-        snode.active_rdma=active_rdma
+        snode.active_tcp = active_tcp
+        snode.active_rdma = active_rdma
 
         if 'cpu_count' in node_info:
             snode.cpu = node_info['cpu_count']
@@ -1103,6 +1220,7 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         snode.write_to_db(kv_store)
         snode.app_thread_mask = app_thread_mask or ""
         snode.pollers_mask = pollers_mask or ""
+        snode.lvol_poller_mask = lvol_poller_mask or ""
         snode.jm_cpu_mask = jm_cpu_mask
         snode.alceml_cpu_index = alceml_cpu_index
         snode.alceml_worker_cpu_index = alceml_worker_cpu_index
@@ -1114,11 +1232,14 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
         snode.nvmf_port = utils.get_next_dev_port(cluster_id)
         snode.poller_cpu_cores = poller_cpu_cores or []
 
+        snode.socket = node_socket
+
         snode.iobuf_small_pool_count = small_pool_count or 0
         snode.iobuf_large_pool_count = large_pool_count or 0
         snode.iobuf_small_bufsize = small_bufsize or 0
         snode.iobuf_large_bufsize = large_bufsize or 0
         snode.enable_test_device = enable_test_device
+        snode.firewall_port = firewall_port
 
         if cluster.is_single_node:
             snode.physical_label = 0
@@ -1176,6 +1297,12 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
 
         rpc_client.log_set_print_level("DEBUG")
 
+        if snode.lvol_poller_mask:
+            ret = rpc_client.bdev_lvol_create_poller_group(snode.lvol_poller_mask)
+            if not ret:
+                logger.error("Failed to set pollers mask")
+                return False
+
         # 5- set app_thread cpu mask
         if snode.app_thread_mask:
             ret = rpc_client.thread_get_stats()
@@ -1204,15 +1331,15 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
             return False
 
         if cluster.fabric_tcp:
-           ret = rpc_client.transport_create("TCP", qpair,512*(req_cpu_count+1))
-           if not ret:
-              logger.error(f"Failed to create transport TCP with qpair: {qpair}")
-              return False
+            ret = rpc_client.transport_create("TCP", qpair, 512 * (req_cpu_count + 1))
+            if not ret:
+                logger.error(f"Failed to create transport TCP with qpair: {qpair}")
+                return False
         if cluster.fabric_rdma:
-           ret = rpc_client.transport_create("RDMA", qpair,512*(req_cpu_count+1))
-           if not ret:
-              logger.error(f"Failed to create transport RDMA with qpair: {qpair}")
-              return False
+            ret = rpc_client.transport_create("RDMA", qpair, 512 * (req_cpu_count + 1))
+            if not ret:
+                logger.error(f"Failed to create transport RDMA with qpair: {qpair}")
+                return False
 
         # 7- set jc singleton mask
         if snode.jc_singleton_mask:
@@ -1262,8 +1389,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
             logger.info("Setting Alcemls QOS weights")
             ret = rpc_client.alceml_set_qos_weights(qos_controller.get_qos_weights_list(cluster_id))
             if not ret:
-              logger.error("Failed to set Alcemls QOS")
-              return False
+                logger.error("Failed to set Alcemls QOS")
+                return False
 
         logger.info("Connecting to remote devices")
         remote_devices = _connect_to_remote_devs(snode)
@@ -1277,7 +1404,7 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
 
         snode = db_controller.get_storage_node_by_id(snode.get_id())
         old_status = snode.status
-        snode.status =  StorageNode.STATUS_ONLINE
+        snode.status = StorageNode.STATUS_ONLINE
         snode.updated_at = str(datetime.datetime.now(datetime.timezone.utc))
         snode.online_since = str(datetime.datetime.now(datetime.timezone.utc))
         snode.write_to_db(db_controller.kv_store)
@@ -1297,7 +1424,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list,
                 return False
             node.write_to_db(kv_store)
 
-        if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY, Cluster.STATUS_IN_EXPANSION]:
+        if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY,
+                                  Cluster.STATUS_IN_EXPANSION]:
             logger.warning(
                 f"The cluster status is not active ({cluster.status}), adding the node without distribs and lvstore")
             continue
@@ -1454,7 +1582,7 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False):
         if health_controller._check_node_api(snode.mgmt_ip):
             logger.info("Stopping SPDK container")
             snode_api = SNodeClient(snode.api_endpoint, timeout=20)
-            snode_api.spdk_process_kill(snode.rpc_port)
+            snode_api.spdk_process_kill(snode.rpc_port, snode.cluster_id)
             snode_api.leave_swarm()
             pci_address = []
             for dev in snode.nvme_devices:
@@ -1464,7 +1592,6 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False):
                     pci_address.append(dev.pcie_address)
     except Exception as e:
         logger.exception(e)
-        return False
 
     set_node_status(node_id, StorageNode.STATUS_REMOVED)
 
@@ -1481,8 +1608,6 @@ def restart_storage_node(
         spdk_image=None, set_spdk_debug=None,
         small_bufsize=0, large_bufsize=0,
         force=False, node_ip=None, reattach_volume=False, clear_data=False, new_ssd_pcie=[], force_lvol_recreate=False):
-    db_controller = DBController()
-    kv_store = db_controller.kv_store
 
     db_controller = DBController()
     logger.info("Restarting storage node")
@@ -1565,23 +1690,22 @@ def restart_storage_node(
                         snode_api.bind_device_to_spdk(dev['address'])
         else:
             node_ip = None
-    active_tcp=False
-    active_rdma=False
+    active_tcp = False
+    active_rdma = False
     fabric_tcp = cluster.fabric_tcp
     fabric_rdma = cluster.fabric_rdma
     snode_api = SNodeClient(snode.api_endpoint, timeout=5 * 60, retry=3)
     for nic in snode.data_nics:
         if fabric_rdma and snode_api.ifc_is_roce(nic["if_name"]):
             nic.trtype = "RDMA"
-            active_rdma=True
+            active_rdma = True
             if fabric_tcp and snode_api.ifc_is_tcp(nic["if_name"]):
                 active_tcp = True
         elif fabric_tcp and snode_api.ifc_is_tcp(nic["if_name"]):
             nic.trtype = "TCP"
             active_tcp = True
-    snode.active_tcp=active_tcp
-    snode.active_rdma=active_rdma
-
+    snode.active_tcp = active_tcp
+    snode.active_rdma = active_rdma
 
     logger.info(f"Restarting Storage node: {snode.mgmt_ip}")
     node_info, _ = snode_api.info()
@@ -1602,28 +1726,27 @@ def restart_storage_node(
                     snode.l_cores = node['l-cores']
                     break
 
-    if max_prov:
-        if not isinstance(max_prov, int):
-            try:
-                max_prov = int(max_prov)
-                max_prov = f"{max_prov}g"
-                max_prov = int(utils.parse_size(max_prov))
-            except Exception:
-                logger.error(f"Invalid max_prov value: {max_prov}")
-                return False
-
-        snode.max_prov = max_prov
-    if snode.max_prov <= 0:
-        logger.error(f"Incorrect max-prov value {snode.max_prov}")
-        return False
+    if max_prov > 0:
+        try:
+            max_prov = int(utils.parse_size(max_prov))
+            snode.max_prov = max_prov
+        except Exception as e:
+            logger.debug(e)
+            logger.error(f"Invalid max_prov value: {max_prov}")
+            return False
+    else:
+        max_prov = snode.max_prov
     if spdk_image:
         snode.spdk_image = spdk_image
 
     # Calculate minimum huge page memory
-    minimum_hp_memory = utils.calculate_minimum_hp_memory(snode.iobuf_small_pool_count, snode.iobuf_large_pool_count, snode.max_lvol,
-                                                          snode.max_prov,
+    minimum_hp_memory = utils.calculate_minimum_hp_memory(snode.iobuf_small_pool_count, snode.iobuf_large_pool_count,
+                                                          snode.max_lvol,
+                                                          max_prov,
                                                           len(utils.hexa_to_cpu_list(snode.spdk_cpu_mask)))
 
+    minimum_hp_memory = max(minimum_hp_memory, max_prov)
+
     # check for memory
     if "memory_details" in node_info and node_info['memory_details']:
         memory_details = node_info['memory_details']
@@ -1636,22 +1759,26 @@ def restart_storage_node(
         return False
 
     # Calculate minimum sys memory
-    #minimum_sys_memory = utils.calculate_minimum_sys_memory(snode.max_prov, memory_details['total'])
-    minimum_sys_memory = snode.minimum_sys_memory
-    satisfied, spdk_mem = utils.calculate_spdk_memory(minimum_hp_memory,
-                                                      minimum_sys_memory,
-                                                      int(memory_details['free']),
-                                                      int(memory_details['huge_total']))
-    if not satisfied:
-        logger.error(
-            f"Not enough memory for the provided max_lvo: {snode.max_lvol}, max_snap: {snode.max_snap}, max_prov: {utils.humanbytes(snode.max_prov)}.. Exiting")
-
-    snode.spdk_mem = spdk_mem
+    # minimum_sys_memory = utils.calculate_minimum_sys_memory(snode.max_prov, memory_details['total'])
+    # minimum_sys_memory = snode.minimum_sys_memory
+    # satisfied, spdk_mem = utils.calculate_spdk_memory(minimum_hp_memory,
+    #                                                  minimum_sys_memory,
+    #                                                  int(memory_details['free']),
+    #                                                  int(memory_details['huge_total']))
+    # if not satisfied:
+    #    logger.error(
+    #        f"Not enough memory for the provided max_lvo: {snode.max_lvol}, max_snap: {snode.max_snap}, max_prov: {utils.humanbytes(snode.max_prov)}.. Exiting")
+    minimum_sys_memory = snode.minimum_sys_memory or 0
+    snode.spdk_mem = minimum_hp_memory
+
     spdk_debug = snode.spdk_debug
     if set_spdk_debug:
         spdk_debug = True
         snode.spdk_debug = spdk_debug
 
+    if minimum_sys_memory:
+        snode.minimum_sys_memory = minimum_sys_memory
+
     cluster = db_controller.get_cluster_by_id(snode.cluster_id)
 
     if cluster.mode == "docker":
@@ -1661,22 +1788,29 @@ def restart_storage_node(
     else:
         cluster_ip = utils.get_k8s_node_ip()
 
-    total_mem = 0
+    total_mem = minimum_hp_memory
     for n in db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id):
-        if n.api_endpoint == snode.api_endpoint:
-            total_mem += n.spdk_mem
-    total_mem+= utils.parse_size("500m")
+        if n.api_endpoint == snode.api_endpoint and n.socket == snode.socket and n.uuid != snode.uuid:
+            total_mem += (n.spdk_mem + 500000000)
 
     results = None
     try:
         if new_ssd_pcie and type(new_ssd_pcie) is list:
-            snode.ssd_pcie.extend(new_ssd_pcie)
+            for new_ssd in new_ssd_pcie:
+                if new_ssd not in snode.ssd_pcie:
+                    try:
+                        snode_api.bind_device_to_spdk(new_ssd)
+                    except Exception as e:
+                        logger.error(e)
+                    snode.ssd_pcie.append(new_ssd)
+
         fdb_connection = cluster.db_connection
         results, err = snode_api.spdk_process_start(
             snode.l_cores, snode.spdk_mem, snode.spdk_image, spdk_debug, cluster_ip, fdb_connection,
             snode.namespace, snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password,
             multi_threading_enabled=constants.SPDK_PROXY_MULTI_THREADING_ENABLED, timeout=constants.SPDK_PROXY_TIMEOUT,
-            ssd_pcie=snode.ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode)
+            ssd_pcie=snode.ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode,
+            cluster_id=snode.cluster_id,  socket=snode.socket, firewall_port=snode.firewall_port)
 
     except Exception as e:
         logger.error(e)
@@ -1737,6 +1871,12 @@ def restart_storage_node(
 
     rpc_client.log_set_print_level("DEBUG")
 
+    if snode.lvol_poller_mask:
+        ret = rpc_client.bdev_lvol_create_poller_group(snode.lvol_poller_mask)
+        if not ret:
+            logger.error("Failed to set pollers mask")
+            return False
+
     # 5- set app_thread cpu mask
     if snode.app_thread_mask:
         ret = rpc_client.thread_get_stats()
@@ -1761,12 +1901,12 @@ def restart_storage_node(
     qpair = cluster.qpair_count
     req_cpu_count = len(utils.hexa_to_cpu_list(snode.spdk_cpu_mask))
     if cluster.fabric_tcp:
-        ret = rpc_client.transport_create("TCP", qpair, 512*(req_cpu_count+1))
+        ret = rpc_client.transport_create("TCP", qpair, 512 * (req_cpu_count + 1))
         if not ret:
             logger.error(f"Failed to create transport TCP with qpair: {qpair}")
             return False
     if cluster.fabric_rdma:
-        ret = rpc_client.transport_create("RDMA", qpair, 512*(req_cpu_count+1))
+        ret = rpc_client.transport_create("RDMA", qpair, 512 * (req_cpu_count + 1))
         if not ret:
             logger.error(f"Failed to create transport RDMA with qpair: {qpair}")
             return False
@@ -1778,11 +1918,14 @@ def restart_storage_node(
             logger.error("Failed to set jc singleton mask")
             return False
 
+    node_info, _ = snode_api.info()
     if not snode.ssd_pcie:
-        node_info, _ = snode_api.info()
         ssds = node_info['spdk_pcie_list']
     else:
-        ssds = snode.ssd_pcie
+        ssds = []
+        for ssd in snode.ssd_pcie:
+            if ssd in node_info['spdk_pcie_list']:
+                ssds.append(ssd)
 
     nvme_devs = addNvmeDevices(rpc_client, snode, ssds)
     if not nvme_devs:
@@ -1799,10 +1942,11 @@ def restart_storage_node(
     active_devices = []
     removed_devices = []
     known_devices_sn = []
-    devices_sn_dict = {d.serial_number:d for d in nvme_devs}
+    devices_sn_dict = {d.serial_number: d for d in nvme_devs}
     for db_dev in snode.nvme_devices:
         known_devices_sn.append(db_dev.serial_number)
-        if db_dev.status in [NVMeDevice.STATUS_FAILED_AND_MIGRATED, NVMeDevice.STATUS_FAILED, NVMeDevice.STATUS_REMOVED]:
+        if db_dev.status in [NVMeDevice.STATUS_FAILED_AND_MIGRATED, NVMeDevice.STATUS_FAILED,
+                             NVMeDevice.STATUS_REMOVED]:
             removed_devices.append(db_dev)
             continue
         if db_dev.serial_number in devices_sn_dict.keys():
@@ -1811,7 +1955,7 @@ def restart_storage_node(
             if not db_dev.is_partition and not found_dev.is_partition:
                 db_dev.device_name = found_dev.device_name
                 db_dev.nvme_bdev = found_dev.nvme_bdev
-                db_dev.nvme_controller =found_dev.nvme_controller
+                db_dev.nvme_controller = found_dev.nvme_controller
                 db_dev.pcie_address = found_dev.pcie_address
 
             # if db_dev.status in [ NVMeDevice.STATUS_ONLINE]:
@@ -1819,9 +1963,11 @@ def restart_storage_node(
             active_devices.append(db_dev)
         else:
             logger.info(f"Device not found: {db_dev.get_id()}")
-            db_dev.status = NVMeDevice.STATUS_REMOVED
-            removed_devices.append(db_dev)
-            # distr_controller.send_dev_status_event(db_dev, db_dev.status)
+            if db_dev.status == NVMeDevice.STATUS_NEW:
+                snode.nvme_devices.remove(db_dev)
+            else:
+                db_dev.status = NVMeDevice.STATUS_REMOVED
+                removed_devices.append(db_dev)
 
     jm_dev_sn = ""
     if snode.jm_device and "serial_number" in snode.jm_device.device_data_dict:
@@ -1840,7 +1986,7 @@ def restart_storage_node(
             snode.nvme_devices.append(dev)
 
     snode.write_to_db(db_controller.kv_store)
-    if node_ip and len(new_devices)>0:
+    if node_ip and len(new_devices) > 0:
         # prepare devices on new node
         if snode.num_partitions_per_dev == 0 or snode.jm_percent == 0:
 
@@ -1883,11 +2029,9 @@ def restart_storage_node(
         return False
     if snode.enable_ha_jm:
         snode.remote_jm_devices = _connect_to_remote_jm_devs(snode)
-    snode.health_check = True
     snode.lvstore_status = ""
     snode.write_to_db(db_controller.kv_store)
 
-
     snode = db_controller.get_storage_node_by_id(snode.get_id())
     for db_dev in snode.nvme_devices:
         if db_dev.status in [NVMeDevice.STATUS_UNAVAILABLE, NVMeDevice.STATUS_ONLINE,
@@ -1900,23 +2044,6 @@ def restart_storage_node(
             db_dev.health_check = True
             device_events.device_restarted(db_dev)
     snode.write_to_db(db_controller.kv_store)
-    #
-    # # make other nodes connect to the new devices
-    # logger.info("Make other nodes connect to the node devices")
-    # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id)
-    # for node in snodes:
-    #     if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
-    #         continue
-    #     node.remote_devices = _connect_to_remote_devs(node, force_connect_restarting_nodes=True)
-    #     node.write_to_db(kv_store)
-    #
-    # logger.info(f"Sending device status event")
-    # snode = db_controller.get_storage_node_by_id(snode.get_id())
-    # for db_dev in snode.nvme_devices:
-    #     distr_controller.send_dev_status_event(db_dev, db_dev.status)
-    #
-    # if snode.jm_device and snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_ONLINE]:
-    #     device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE)
 
     cluster = db_controller.get_cluster_by_id(snode.cluster_id)
     if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]:
@@ -1932,7 +2059,7 @@ def restart_storage_node(
             except RuntimeError:
                 logger.error('Failed to connect to remote devices')
                 return False
-            node.write_to_db(kv_store)
+            node.write_to_db()
 
         logger.info("Sending device status event")
         snode = db_controller.get_storage_node_by_id(snode.get_id())
@@ -1979,9 +2106,7 @@ def restart_storage_node(
                 except RuntimeError:
                     logger.error('Failed to connect to remote devices')
                     return False
-                node.write_to_db(kv_store)
-                    
-
+                node.write_to_db()
             logger.info("Sending device status event")
             snode = db_controller.get_storage_node_by_id(snode.get_id())
             for db_dev in snode.nvme_devices:
@@ -2004,11 +2129,11 @@ def restart_storage_node(
             pools = db_controller.get_pools()
             for pool in pools:
                 ret = rpc_client.bdev_lvol_set_qos_limit(pool.numeric_id,
-                                                        pool.max_rw_ios_per_sec,
-                                                        pool.max_rw_mbytes_per_sec,
-                                                        pool.max_r_mbytes_per_sec,
-                                                        pool.max_w_mbytes_per_sec,
-                                                        )
+                                                         pool.max_rw_ios_per_sec,
+                                                         pool.max_rw_mbytes_per_sec,
+                                                         pool.max_r_mbytes_per_sec,
+                                                         pool.max_w_mbytes_per_sec,
+                                                         )
                 if not ret:
                     logger.error("RPC failed bdev_lvol_set_qos_limit")
                     return False
@@ -2137,51 +2262,28 @@ def list_storage_devices(node_id, is_json):
             "Health": snode.jm_device.health_check
         })
 
-    for jm_id in snode.jm_ids:
-        try:
-            jm_device = db_controller.get_jm_device_by_id(jm_id)
-        except KeyError:
-            continue
-
-        jm_devices.append({
-            "UUID": jm_device.uuid,
-            "Name": jm_device.device_name,
-            "Size": utils.humanbytes(jm_device.size),
-            "Status": jm_device.status,
-            "IO Err": jm_device.io_error,
-            "Health": jm_device.health_check
-        })
-
-    for device in snode.remote_devices:
-        logger.debug(device)
+    for remote_device in snode.remote_devices:
+        logger.debug(remote_device)
         logger.debug("*" * 20)
-        name = device.alceml_name
-        status = device.status
-        if device.remote_bdev:
-            name = device.remote_bdev
-            try:
-                org_dev = db_controller.get_storage_device_by_id(device.get_id())
-                status = org_dev.status
-            except KeyError:
-                pass
+        name = remote_device.alceml_name
 
         remote_devices.append({
-            "UUID": device.uuid,
+            "UUID": remote_device.uuid,
             "Name": name,
-            "Size": utils.humanbytes(device.size),
-            "Node ID": device.node_id,
-            "Status": status,
+            "Size": utils.humanbytes(remote_device.size),
+            "Node ID": remote_device.node_id,
+            "Status": remote_device.status,
         })
 
-    for device in snode.remote_jm_devices:
-        logger.debug(device)
+    for remote_jm_device in snode.remote_jm_devices:
+        logger.debug(remote_jm_device)
         logger.debug("*" * 20)
         remote_devices.append({
-            "UUID": device.uuid,
-            "Name": device.remote_bdev,
-            "Size": utils.humanbytes(device.size),
-            "Node ID": device.node_id,
-            "Status": device.status,
+            "UUID": remote_jm_device.uuid,
+            "Name": remote_jm_device.remote_bdev,
+            "Size": utils.humanbytes(remote_jm_device.size),
+            "Node ID": remote_jm_device.node_id,
+            "Status": remote_jm_device.status,
         })
 
     data: dict[str, List[Any]] = {
@@ -2228,7 +2330,8 @@ def shutdown_storage_node(node_id, force=False):
         if force is False:
             return False
         for task in tasks:
-            if task.function_name != JobSchedule.FN_NODE_RESTART:
+            if task.function_name not in [
+                JobSchedule.FN_NODE_RESTART, JobSchedule.FN_SNAPSHOT_REPLICATION, JobSchedule.FN_LVOL_SYNC_DEL]:
                 tasks_controller.cancel_task(task.uuid)
 
     logger.info("Shutting down node")
@@ -2250,16 +2353,19 @@ def shutdown_storage_node(node_id, force=False):
 
     logger.info("Stopping SPDK")
     try:
-        SNodeClient(snode.api_endpoint, timeout=10, retry=10).spdk_process_kill(snode.rpc_port)
+        SNodeClient(snode.api_endpoint, timeout=10, retry=10).spdk_process_kill(snode.rpc_port, snode.cluster_id)
     except SNodeClientException:
         logger.error('Failed to kill SPDK')
         return False
     pci_address = []
     for dev in snode.nvme_devices:
         if dev.pcie_address not in pci_address:
-            ret = SNodeClient(snode.api_endpoint, timeout=30, retry=1).bind_device_to_nvme(dev.pcie_address)
-            logger.debug(ret)
-            pci_address.append(dev.pcie_address)
+            try:
+                ret = SNodeClient(snode.api_endpoint, timeout=30, retry=1).bind_device_to_nvme(dev.pcie_address)
+                logger.debug(ret)
+                pci_address.append(dev.pcie_address)
+            except Exception as e:
+                logger.debug(e)
 
     logger.info("Setting node status to offline")
     set_node_status(node_id, StorageNode.STATUS_OFFLINE)
@@ -2358,34 +2464,32 @@ def suspend_storage_node(node_id, force=False):
     if snode.lvstore_stack_secondary_1:
         nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id)
         if nodes:
-           for node in nodes:
+            for node in nodes:
                 try:
                     fw_api.firewall_set_port(
                         node.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True)
                     fw_api.firewall_set_port(
                         node.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True)
+                    time.sleep(0.5)
+                    rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False)
+                    rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid)
                 except Exception as e:
                     logger.error(e)
                     return False
-                time.sleep(0.5)
-                rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False)
-                rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid)
 
     try:
         fw_api.firewall_set_port(
             snode.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True)
         fw_api.firewall_set_port(
             snode.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True)
+        time.sleep(0.5)
+        rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False)
+        rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid)
+        time.sleep(1)
     except Exception as e:
         logger.error(e)
         return False
 
-    time.sleep(0.5)
-    rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False)
-    rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid)
-    time.sleep(1)
-
-
     logger.info("Done")
     return True
 
@@ -2429,7 +2533,7 @@ def resume_storage_node(node_id):
         return False
     if snode.enable_ha_jm:
         snode.remote_jm_devices = _connect_to_remote_jm_devs(snode)
-    snode.write_to_db(db_controller.kv_store)
+    snode.write_to_db()
 
     fw_api = FirewallClient(snode, timeout=20, retry=1)
     port_type = "tcp"
@@ -2437,7 +2541,7 @@ def resume_storage_node(node_id):
         port_type = "udp"
     nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id)
     if nodes:
-       for node in nodes:
+        for node in nodes:
             try:
                 fw_api.firewall_set_port(
                     node.lvol_subsys_port, port_type, "allow", snode.rpc_port)
@@ -2465,20 +2569,11 @@ def resume_storage_node(node_id):
 def get_node_capacity(node_id, history, records_count=20, parse_sizes=True):
     db_controller = DBController()
     try:
-        this_node = db_controller.get_storage_node_by_id(node_id)
+        node = db_controller.get_storage_node_by_id(node_id)
     except KeyError:
         logger.error("Storage node Not found")
         return
 
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    records = db_controller.get_node_capacity(this_node, records_number)
     cap_stats_keys = [
         "date",
         "size_total",
@@ -2488,6 +2583,8 @@ def get_node_capacity(node_id, history, records_count=20, parse_sizes=True):
         "size_util",
         "size_prov_util",
     ]
+    prom_client = PromClient(node.cluster_id)
+    records = prom_client.get_node_metrics(node_id, cap_stats_keys, history)
     new_records = utils.process_records(records, records_count, keys=cap_stats_keys)
 
     if not parse_sizes:
@@ -2514,17 +2611,6 @@ def get_node_iostats_history(node_id, history, records_count=20, parse_sizes=Tru
     except KeyError:
         logger.error("node not found")
         return False
-
-    if history:
-        records_number = utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    records = db_controller.get_node_stats(node, records_number)
-
     io_stats_keys = [
         "date",
         "read_bytes",
@@ -2562,6 +2648,8 @@ def get_node_iostats_history(node_id, history, records_count=20, parse_sizes=Tru
                 "write_latency_ticks",
             ]
         )
+    prom_client = PromClient(node.cluster_id)
+    records = prom_client.get_node_metrics(node_id, io_stats_keys, history)
     # combine records
     new_records = utils.process_records(records, records_count, keys=io_stats_keys)
 
@@ -2669,8 +2757,8 @@ def upgrade_automated_deployment_config():
         return False
 
 
-def generate_automated_deployment_config(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_allowed, pci_blocked, cores_percentage=0):
-
+def generate_automated_deployment_config(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_allowed, pci_blocked,
+                                         cores_percentage=0, force=False, device_model="", size_range="", nvme_names=None, k8s=False):
     # we need minimum of 6 VPCs. RAM 4GB min. Plus 0.2% of the storage.
     total_cores = os.cpu_count() or 0
     if total_cores < 6:
@@ -2681,7 +2769,8 @@ def generate_automated_deployment_config(max_lvol, max_prov, sockets_to_use, nod
     utils.load_kernel_module("uio_pci_generic")
 
     nodes_config, system_info = utils.generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket,
-                                                       pci_allowed, pci_blocked, cores_percentage)
+                                                       pci_allowed, pci_blocked, cores_percentage, force=force,
+                                                       device_model=device_model, size_range=size_range, nvme_names=nvme_names)
     if not nodes_config or not nodes_config.get("nodes"):
         return False
     utils.store_config_file(nodes_config, constants.NODES_CONFIG_FILE, create_read_only_file=True)
@@ -2693,11 +2782,14 @@ def generate_automated_deployment_config(max_lvol, max_prov, sockets_to_use, nod
     for node_config in nodes_config["nodes"]:
         numa = node_config["socket"]
         huge_page_memory_dict[numa] = huge_page_memory_dict.get(numa, 0) + node_config["huge_page_memory"]
-    for numa, huge_page_memory in huge_page_memory_dict.items():
-        num_pages = huge_page_memory // (2048 * 1024)
-        utils.set_hugepages_if_needed(numa, num_pages)
+    if not k8s:
+        utils.create_rpc_socket_mount()
+    # for numa, huge_page_memory in huge_page_memory_dict.items():
+    #    num_pages = huge_page_memory // (2048 * 1024)
+    #    utils.set_hugepages_if_needed(numa, num_pages)
     return True
 
+
 def deploy(ifname, isolate_cores=False):
     if not ifname:
         ifname = "eth0"
@@ -2721,7 +2813,8 @@ def deploy(ifname, isolate_cores=False):
     logger.info("Config Validated successfully.")
 
     logger.info("NVMe SSD devices found on node:")
-    stream = os.popen(f"lspci -Dnn | grep -i '\\[{LINUX_DRV_MASS_STORAGE_ID:02}{LINUX_DRV_MASS_STORAGE_NVME_TYPE_ID:02}\\]'")
+    stream = os.popen(
+        f"lspci -Dnn | grep -i '\\[{LINUX_DRV_MASS_STORAGE_ID:02}{LINUX_DRV_MASS_STORAGE_NVME_TYPE_ID:02}\\]'")
     for line in stream.readlines():
         logger.info(line.strip())
 
@@ -2790,6 +2883,10 @@ def deploy_cleaner():
     scripts.deploy_cleaner()
 
 
+def clean_devices(config_path, format=True, force=False):
+    utils.clean_devices(config_path, format=format, force=force)
+
+
 def get_host_secret(node_id):
     db_controller = DBController()
     try:
@@ -2879,12 +2976,12 @@ def health_check(node_id):
 
         #     subsystem = rpc_client.subsystem_list(dev.nvmf_nqn)
 
-            # dev.testing_bdev = test_name
-            # dev.alceml_bdev = alceml_name
-            # dev.pt_bdev = pt_name
-            # # nvme.nvmf_nqn = subsystem_nqn
-            # # nvme.nvmf_ip = IP
-            # # nvme.nvmf_port = 4420
+        # dev.testing_bdev = test_name
+        # dev.alceml_bdev = alceml_name
+        # dev.pt_bdev = pt_name
+        # # nvme.nvmf_nqn = subsystem_nqn
+        # # nvme.nvmf_ip = IP
+        # # nvme.nvmf_port = 4420
 
     except Exception as e:
         logger.error(f"Failed to connect to node's SPDK: {e}")
@@ -2975,9 +3072,9 @@ def set_node_status(node_id, status, reconnect_on_online=True):
             return False
         if snode.enable_ha_jm:
             snode.remote_jm_devices = _connect_to_remote_jm_devs(snode)
-        snode.health_check = True
         snode.write_to_db(db_controller.kv_store)
-        distr_controller.send_cluster_map_to_node(snode)
+        for device in snode.nvme_devices:
+            distr_controller.send_dev_status_event(device, device.status, target_node=snode)
 
         for node in db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id):
             if node.get_id() == snode.get_id():
@@ -2986,7 +3083,8 @@ def set_node_status(node_id, status, reconnect_on_online=True):
                 try:
                     node.remote_devices = _connect_to_remote_devs(node)
                     node.write_to_db()
-                    distr_controller.send_cluster_map_to_node(node)
+                    for device in node.nvme_devices:
+                        distr_controller.send_dev_status_event(device, device.status, target_node=node)
                 except RuntimeError:
                     logger.error(f'Failed to connect to remote devices from node: {node.get_id()}')
                     continue
@@ -3009,7 +3107,6 @@ def set_node_status(node_id, status, reconnect_on_online=True):
                     except Exception as e:
                         logger.error("Error establishing hublvol: %s", e)
 
-
     return True
 
 
@@ -3040,7 +3137,7 @@ def recreate_lvstore_on_sec(secondary_node):
             return False
 
         # sending to the node that is being restarted (secondary_node) with the secondary group jm_vuid (primary_node.jm_vuid)
-        ret = secondary_node.rpc_client().jc_suspend_compression(jm_vuid=primary_node.jm_vuid, suspend=False)
+        ret, err = secondary_node.rpc_client().jc_suspend_compression(jm_vuid=primary_node.jm_vuid, suspend=False)
         if not ret:
             logger.info("Failed to resume JC compression adding task...")
             tasks_controller.add_jc_comp_resume_task(
@@ -3057,7 +3154,6 @@ def recreate_lvstore_on_sec(secondary_node):
             port_type = "udp"
 
         if primary_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_RESTARTING]:
-
             fw_api = FirewallClient(primary_node, timeout=5, retry=2)
             ### 3- block primary port
             fw_api.firewall_set_port(primary_node.lvol_subsys_port, port_type, "block", primary_node.rpc_port)
@@ -3084,7 +3180,6 @@ def recreate_lvstore_on_sec(secondary_node):
                 logger.error("Error connecting to hublvol: %s", e)
                 # return False
 
-
             fw_api = FirewallClient(primary_node, timeout=5, retry=2)
             ### 8- allow port on primary
             fw_api.firewall_set_port(primary_node.lvol_subsys_port, port_type, "allow", primary_node.rpc_port)
@@ -3127,6 +3222,7 @@ def recreate_lvstore(snode, force=False):
 
     ### 1- create distribs and raid
     ret, err = _create_bdev_stack(snode, [])
+
     if err:
         logger.error(f"Failed to recreate lvstore on node {snode.get_id()}")
         logger.error(err)
@@ -3178,6 +3274,13 @@ def recreate_lvstore(snode, force=False):
             port_type = "tcp"
             if sec_node.active_rdma:
                 port_type = "udp"
+
+            ret = sec_node.wait_for_jm_rep_tasks_to_finish(snode.jm_vuid)
+            if not ret:
+                msg = f"JM replication task found for jm {snode.jm_vuid}"
+                logger.error(msg)
+                storage_events.jm_repl_tasks_found(sec_node, snode.jm_vuid)
+
             fw_api.firewall_set_port(snode.lvol_subsys_port, port_type, "block", sec_node.rpc_port)
             tcp_ports_events.port_deny(sec_node, snode.lvol_subsys_port)
 
@@ -3196,7 +3299,8 @@ def recreate_lvstore(snode, force=False):
                     logger.info("Inflight IO NOT found, continuing")
                     break
             else:
-                logger.error(f"Timeout while checking for inflight IO after 10 seconds on node {snode.secondary_node_id}")
+                logger.error(
+                    f"Timeout while checking for inflight IO after 10 seconds on node {snode.secondary_node_id}")
 
         if sec_node.status in [StorageNode.STATUS_UNREACHABLE, StorageNode.STATUS_DOWN]:
             logger.info(f"Secondary node is not online, forcing journal replication on node: {snode.get_id()}")
@@ -3214,7 +3318,7 @@ def recreate_lvstore(snode, force=False):
     def _kill_app():
         storage_events.snode_restart_failed(snode)
         snode_api = SNodeClient(snode.api_endpoint, timeout=5, retry=5)
-        snode_api.spdk_process_kill(snode.rpc_port)
+        snode_api.spdk_process_kill(snode.rpc_port, snode.cluster_id)
         set_node_status(snode.get_id(), StorageNode.STATUS_OFFLINE)
 
     # If LVol Store recovery failed then stop spdk process
@@ -3331,10 +3435,10 @@ def add_lvol_thread(lvol, snode, lvol_ana_state="optimized"):
             logger.error(msg)
             return False, msg
 
-    logger.info("Add BDev to subsystem")
-    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid, nsid=lvol.ns_id)
+    logger.info("Add BDev to subsystem "+f"{lvol.vuid:016X}")
+    ret = rpc_client.nvmf_subsystem_add_ns(lvol.nqn, lvol.top_bdev, lvol.uuid, lvol.guid, nsid=lvol.ns_id, eui64=f"{lvol.vuid:016X}")
     for iface in snode.data_nics:
-        if iface.ip4_address and lvol.fabric==iface.trtype.lower():
+        if iface.ip4_address and lvol.fabric == iface.trtype.lower():
             logger.info("adding listener for %s on IP %s" % (lvol.nqn, iface.ip4_address))
             ret = rpc_client.listeners_create(
                 lvol.nqn, iface.trtype, iface.ip4_address, lvol.subsys_port, ana_state=lvol_ana_state)
@@ -3349,9 +3453,9 @@ def add_lvol_thread(lvol, snode, lvol_ana_state="optimized"):
     lvol_obj.health_check = True
     lvol_obj.write_to_db()
     # set QOS
-    if lvol.rw_ios_per_sec or lvol.rw_mbytes_per_sec or lvol.r_mbytes_per_sec or lvol.w_mbytes_per_sec :
+    if lvol.rw_ios_per_sec or lvol.rw_mbytes_per_sec or lvol.r_mbytes_per_sec or lvol.w_mbytes_per_sec:
         lvol_controller.set_lvol(lvol.uuid, lvol.rw_ios_per_sec, lvol.rw_mbytes_per_sec,
-                 lvol.r_mbytes_per_sec , lvol.w_mbytes_per_sec)
+                                 lvol.r_mbytes_per_sec, lvol.w_mbytes_per_sec)
     return True, None
 
 
@@ -3388,7 +3492,7 @@ def get_sorted_ha_jms(current_node):
                 continue
             mgmt_ips.append(jm_dev_to_mgmt_ip[jm_id])
             out.append(jm_id)
-    return out[:constants.HA_JM_COUNT-1]
+    return out[:current_node.ha_jm_count - 1]
 
 
 def get_node_jm_names(current_node, remote_node=None):
@@ -3410,16 +3514,11 @@ def get_node_jm_names(current_node, remote_node=None):
                 if remote_node.jm_device.get_id() == jm_id:
                     jm_list.append(remote_node.jm_device.jm_bdev)
                     continue
-                for jm_dev in remote_node.remote_jm_devices:
-                    if jm_dev.get_id() == jm_id:
-                        jm_list.append(jm_dev.remote_bdev)
-                        break
-            else:
-                for jm_dev in current_node.remote_jm_devices:
-                    if jm_dev.get_id() == jm_id:
-                        jm_list.append(jm_dev.remote_bdev)
-                        break
-    return jm_list[:constants.HA_JM_COUNT]
+
+            jm_dev = DBController().get_jm_device_by_id(jm_id)
+            jm_list.append(f"remote_{jm_dev.jm_bdev}n1")
+
+    return jm_list[:current_node.ha_jm_count]
 
 
 def get_secondary_nodes(current_node):
@@ -3436,8 +3535,8 @@ def get_secondary_nodes(current_node):
         if node.get_id() == current_node.get_id():
             nod_found = True
             continue
-        elif node.status == StorageNode.STATUS_ONLINE and node.mgmt_ip != current_node.mgmt_ip :
-        # elif node.status == StorageNode.STATUS_ONLINE :
+        elif node.status == StorageNode.STATUS_ONLINE and node.mgmt_ip != current_node.mgmt_ip:
+            # elif node.status == StorageNode.STATUS_ONLINE :
             if node.is_secondary_node:
                 nodes.append(node.get_id())
 
@@ -3575,7 +3674,7 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo
             return False
 
         # sending to the other node (sec_node) with the primary group jm_vuid (snode.jm_vuid)
-        ret = sec_node.rpc_client().jc_suspend_compression(jm_vuid=snode.jm_vuid, suspend=False)
+        ret, err = sec_node.rpc_client().jc_suspend_compression(jm_vuid=snode.jm_vuid, suspend=False)
         if not ret:
             logger.info("Failed to resume JC compression adding task...")
             tasks_controller.add_jc_comp_resume_task(sec_node.cluster_id, sec_node.get_id(), jm_vuid=snode.jm_vuid)
@@ -3600,10 +3699,21 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo
 
         sec_node.write_to_db()
 
+    storage_events.node_ports_changed(snode)
     return True
 
 
+
 def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None):
+    def _create_distr(snode, name, params):
+        try:
+            rpc_client.bdev_distrib_create(**params)
+        except Exception:
+            logger.error("Failed to create bdev distrib")
+        ret = distr_controller.send_cluster_map_to_distr(snode, name)
+        if not ret:
+            logger.error("Failed to send cluster map")
+
     rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
     db_controller = DBController()
     cluster = db_controller.get_cluster_by_id(snode.cluster_id)
@@ -3620,11 +3730,11 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None):
     else:
         node_bdev_names = []
 
+    thread_list = []
     for bdev in stack:
         type = bdev['type']
         name = bdev['name']
         params = bdev['params']
-
         if name in node_bdev_names:
             continue
 
@@ -3640,23 +3750,21 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None):
                 snode.distrib_cpu_index = (snode.distrib_cpu_index + 1) % len(snode.distrib_cpu_cores)
 
             params['full_page_unmap'] = cluster.full_page_unmap
-            ret = rpc_client.bdev_distrib_create(**params)
-            if ret:
-                ret = distr_controller.send_cluster_map_to_distr(snode, name)
-                if not ret:
-                    return False, "Failed to send cluster map"
-                # time.sleep(1)
+            t = threading.Thread(target=_create_distr, args=(snode, name, params,))
+            thread_list.append(t)
+            t.start()
+            ret = True
 
         elif type == "bdev_lvstore" and lvstore_stack and not primary_node:
             ret = rpc_client.create_lvstore(**params)
-            # if ret and snode.jm_vuid > 0:
-            #     rpc_client.bdev_lvol_set_lvs_ops(snode.lvstore, snode.jm_vuid, snode.lvol_subsys_port)
 
         elif type == "bdev_ptnonexcl":
             ret = rpc_client.bdev_PT_NoExcl_create(**params)
 
         elif type == "bdev_raid":
-
+            if thread_list:
+                for t in thread_list:
+                    t.join()
             distribs_list = bdev["distribs_list"]
             strip_size_kb = params["strip_size_kb"]
             ret = rpc_client.bdev_raid_create(name, distribs_list, strip_size_kb=strip_size_kb)
@@ -3674,6 +3782,9 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None):
                 _remove_bdev_stack(created_bdevs[::-1], rpc_client)
             return False, f"Failed to create BDev: {name}"
 
+    if thread_list:
+        for t in thread_list:
+            t.join()
     return True, None
 
 
@@ -3792,7 +3903,7 @@ def dump_lvstore(node_id):
         logger.error("Storage node does not have lvstore")
         return False
 
-    rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=3, retry=0)
+    rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=120)
     logger.info(f"Dumping lvstore data on node: {snode.get_id()}")
     file_name = f"LVS_dump_{snode.hostname}_{snode.lvstore}_{str(datetime.datetime.now().isoformat())}.txt"
     file_path = f"/etc/simplyblock/{file_name}"
diff --git a/simplyblock_core/test/test_utils.py b/simplyblock_core/test/test_utils.py
index da22a73ba..37b3cb267 100644
--- a/simplyblock_core/test/test_utils.py
+++ b/simplyblock_core/test/test_utils.py
@@ -1,8 +1,13 @@
+import uuid
 from typing import ContextManager
+from unittest.mock import patch
 
 import pytest
 
-from simplyblock_core import utils
+from simplyblock_core import utils, storage_node_ops
+from simplyblock_core.db_controller import DBController
+from simplyblock_core.models.nvme_device import JMDevice, RemoteJMDevice
+from simplyblock_core.models.storage_node import StorageNode
 from simplyblock_core.utils import helpers, parse_thread_siblings_list
 
 
@@ -146,3 +151,51 @@ def test_parse_thread_siblings_list(input, expected):
             parse_thread_siblings_list(input)
     else:
         assert parse_thread_siblings_list(input) == expected
+
+
+@patch.object(DBController, 'get_jm_device_by_id')
+def test_get_node_jm_names(db_controller_get_jm_device_by_id):
+
+    node_1_jm = JMDevice()
+    node_1_jm.uuid = "node_1_jm_id"
+    node_1_jm.jm_bdev = "node_1_jm"
+
+    node_2_jm = JMDevice()
+    node_2_jm.uuid = "node_2_jm_id"
+    node_2_jm.jm_bdev = "node_2_jm"
+
+    node_3_jm = JMDevice()
+    node_3_jm.uuid = "node_3_jm_id"
+    node_3_jm.jm_bdev = "node_3_jm"
+
+    node_4_jm = JMDevice()
+    node_4_jm.uuid = "node_4_jm_id"
+    node_4_jm.jm_bdev = "node_4_jm"
+
+    def get_jm_device_by_id(jm_id):
+        for jm in [node_1_jm, node_2_jm, node_3_jm, node_4_jm]:
+            if jm.uuid == jm_id:
+                return jm
+
+    db_controller_get_jm_device_by_id.side_effect = get_jm_device_by_id
+
+    node_1 = StorageNode()
+    node_1.uuid = str(uuid.uuid4())
+    node_1.enable_ha_jm = True
+    node_1.ha_jm_count = 4
+    node_1.jm_device = node_1_jm
+    node_1.jm_ids = ["node_2_jm_id", "node_3_jm_id", "node_4_jm_id"]
+
+    remote_node = StorageNode()
+    remote_node.uuid = str(uuid.uuid4())
+    remote_node.enable_ha_jm = True
+    remote_node.jm_ids = []
+    remote_node.jm_device = node_2_jm
+    remote_node.remote_jm_devices = [
+        RemoteJMDevice({"uuid": node_1_jm.uuid, "remote_bdev": f"rem_{node_1_jm.jm_bdev}"}),
+        RemoteJMDevice({"uuid": node_3_jm.uuid, "remote_bdev": f"rem_{node_3_jm.jm_bdev}"}),
+        RemoteJMDevice({"uuid": node_4_jm.uuid, "remote_bdev": f"rem_{node_4_jm.jm_bdev}"})]
+
+    jm_names = storage_node_ops.get_node_jm_names(node_1, remote_node=remote_node)
+    print(f"jm_names: {len(jm_names)}", jm_names)
+
diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py
index 941414708..6e15fba9c 100644
--- a/simplyblock_core/utils/__init__.py
+++ b/simplyblock_core/utils/__init__.py
@@ -1,4 +1,5 @@
 # coding=utf-8
+import glob
 import json
 import logging
 import math
@@ -10,10 +11,14 @@
 import sys
 import uuid
 import time
-import socket
-from typing import Union, Any, Optional, Tuple
+from datetime import datetime, timezone
+from typing import Union, Any, Optional, Tuple, List, Dict, Iterable
+from docker import DockerClient
 from kubernetes import client, config
-from kubernetes.client import ApiException
+from kubernetes.client import ApiException, V1Deployment, V1DeploymentSpec, V1ObjectMeta, \
+    V1PodTemplateSpec, V1PodSpec, V1Container, V1EnvVar, V1VolumeMount, V1Volume, V1ConfigMapVolumeSource, \
+    V1LabelSelector, V1ResourceRequirements
+
 import docker
 from prettytable import PrettyTable
 from docker.errors import APIError, DockerException, ImageNotFound, NotFound
@@ -145,7 +150,7 @@ def print_table(data: list, title=None):
 }
 
 
-def humanbytes(size: int, mode: str = 'iec') -> str: # show size using 1024 base
+def humanbytes(size: int, mode: str = 'iec') -> str:  # show size using 1024 base
     """Return the given bytes as a human friendly including the appropriate unit."""
     if not size or size < 0:
         return '0 B'
@@ -194,16 +199,8 @@ def get_k8s_node_ip():
         logger.error("No mgmt nodes was found in the cluster!")
         return False
 
-    mgmt_ips = [node.mgmt_ip for node in nodes]
-
-    for ip in mgmt_ips:
-        try:
-            with socket.create_connection((ip, 10250), timeout=2):
-                return ip
-        except Exception as e:
-            print(e)
-            raise e
-    return False
+    for node in nodes:
+        return node.mgmt_ip
 
 
 def dict_agg(data, mean=False, keys=None):
@@ -447,7 +444,8 @@ def reserve_n(count):
         assigned["jm_cpu_core"] = vcpu
         vcpu = reserve_n(1)
         assigned["jc_singleton_core"] = vcpu
-        assigned["alceml_worker_cpu_cores"] = vcpu
+        assigned["lvol_poller_core"] = vcpu
+        # assigned["alceml_worker_cpu_cores"] = vcpu
         vcpu = reserve_n(1)
         assigned["alceml_cpu_cores"] = vcpu
     elif (len(vcpu_list) < 22):
@@ -455,8 +453,10 @@ def reserve_n(count):
         assigned["jm_cpu_core"] = vcpu
         vcpu = reserve_n(1)
         assigned["jc_singleton_core"] = vcpu
-        vcpus = reserve_n(1)
-        assigned["alceml_worker_cpu_cores"] = vcpus
+        vcpu = reserve_n(1)
+        assigned["lvol_poller_core"] = vcpu
+        # vcpus = reserve_n(1)
+        # assigned["alceml_worker_cpu_cores"] = vcpus
         vcpus = reserve_n(2)
         assigned["alceml_cpu_cores"] = vcpus
     else:
@@ -464,20 +464,35 @@ def reserve_n(count):
         assigned["jm_cpu_core"] = vcpus
         vcpu = reserve_n(1)
         assigned["jc_singleton_core"] = vcpu
-        vcpus = reserve_n(int(alceml_count / 3) + ((alceml_count % 3) > 0))
-        assigned["alceml_worker_cpu_cores"] = vcpus
+        # vcpus = reserve_n(int(alceml_count / 3) + ((alceml_count % 3) > 0))
+        # assigned["alceml_worker_cpu_cores"] = vcpus
         vcpus = reserve_n(alceml_count)
         assigned["alceml_cpu_cores"] = vcpus
+        vcpus = reserve_n(2)
+        assigned["lvol_poller_core"] = vcpus
     dp = int(len(remaining) / 2)
-    vcpus = reserve_n(dp)
-    assigned["distrib_cpu_cores"] = vcpus
-    vcpus = reserve_n(dp)
-    assigned["poller_cpu_cores"] = vcpus
+    if 17 > dp >= 12:
+        poller_n = len(remaining) - 12
+        vcpus = reserve_n(12)
+        assigned["distrib_cpu_cores"] = vcpus
+        vcpus = reserve_n(poller_n)
+        assigned["poller_cpu_cores"] = vcpus
+    elif dp >= 17:
+        poller_n = len(remaining) - 24
+        vcpus = reserve_n(24)
+        assigned["distrib_cpu_cores"] = vcpus
+        vcpus = reserve_n(poller_n)
+        assigned["poller_cpu_cores"] = vcpus
+    else:
+        vcpus = reserve_n(dp)
+        assigned["distrib_cpu_cores"] = vcpus
+        vcpus = reserve_n(dp)
+        assigned["poller_cpu_cores"] = vcpus
     if len(remaining) > 0:
         if len(assigned["poller_cpu_cores"]) == 0:
             assigned["distrib_cpu_cores"] = assigned["poller_cpu_cores"] = reserve_n(1)
         else:
-            assigned["distrib_cpu_cores"] = assigned["distrib_cpu_cores"] + reserve_n(1)
+            assigned["poller_cpu_cores"] = assigned["poller_cpu_cores"] + reserve_n(1)
     # Return the individual threads as separate values
     return (
         assigned.get("app_thread_core", []),
@@ -486,7 +501,8 @@ def reserve_n(count):
         assigned.get("alceml_cpu_cores", []),
         assigned.get("alceml_worker_cpu_cores", []),
         assigned.get("distrib_cpu_cores", []),
-        assigned.get("jc_singleton_core", [])
+        assigned.get("jc_singleton_core", []),
+        assigned.get("lvol_poller_core", []),
     )
 
 
@@ -536,11 +552,12 @@ def calculate_pool_count(alceml_count, number_of_distribs, cpu_count, poller_cou
     poller_number = poller_count if poller_count else cpu_count
 
     small_pool_count = 384 * (alceml_count + number_of_distribs + 3 + poller_count) + (
-            6 + alceml_count + number_of_distribs) * 256 + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT
+
+            6 + alceml_count + number_of_distribs) * + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT
     large_pool_count = 48 * (alceml_count + number_of_distribs + 3 + poller_count) + (
             6 + alceml_count + number_of_distribs) * 32 + poller_number * 15 + 384 + 16 * poller_number + constants.EXTRA_LARGE_POOL_COUNT
 
-    return int(4.0 * small_pool_count), int(2.5 * large_pool_count)
+    return int(small_pool_count), int(large_pool_count)
 
 
 def calculate_minimum_hp_memory(small_pool_count, large_pool_count, lvol_count, max_prov, cpu_count):
@@ -551,9 +568,9 @@ def calculate_minimum_hp_memory(small_pool_count, large_pool_count, lvol_count,
     extra buffer 2GB
     return: minimum_hp_memory in bytes
     '''
-    pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024 + 1092
-    memory_consumption = (4 * cpu_count + 1.0277 * pool_consumption + 25 * lvol_count) * (1024 * 1024) + (
-            250 * 1024 * 1024) * 1.1 * convert_size(max_prov, 'TiB') + constants.EXTRA_HUGE_PAGE_MEMORY
+    pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024
+    memory_consumption = (4 * cpu_count + 1.1 * pool_consumption + 22 * lvol_count) * (
+            1024 * 1024) + constants.EXTRA_HUGE_PAGE_MEMORY
     return int(1.2 * memory_consumption)
 
 
@@ -626,7 +643,7 @@ def get_logger(name=""):
 
     if not logg.hasHandlers():
         logger_handler = logging.StreamHandler(stream=sys.stdout)
-        logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(levelname)s: %(message)s'))
+        logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(thread)d: %(levelname)s: %(message)s'))
         logg.addHandler(logger_handler)
         # gelf_handler = GELFTCPHandler('0.0.0.0', constants.GELF_PORT)
         # logg.addHandler(gelf_handler)
@@ -712,6 +729,7 @@ def get_total_cpu_cores(mapping: str) -> int:
     items = [pair for pair in mapping.split(",") if "@" in pair]
     return len(items)
 
+
 def convert_size(size: Union[int, str], unit: str, round_up: bool = False) -> int:
     """Convert the given number of bytes to target unit
 
@@ -726,6 +744,14 @@ def convert_size(size: Union[int, str], unit: str, round_up: bool = False) -> in
     return math.ceil(raw) if round_up else int(raw)
 
 
+def first_six_chars(s: str) -> str:
+    """
+    Returns the first six characters of a given string.
+    If the string is shorter than six characters, returns the entire string.
+    """
+    return s[:6]
+
+
 def nearest_upper_power_of_2(n):
     # Check if n is already a power of 2
     if (n & (n - 1)) == 0:
@@ -735,7 +761,10 @@ def nearest_upper_power_of_2(n):
 
 
 def strfdelta(tdelta):
-    remainder = int(tdelta.total_seconds())
+    return strfdelta_seconds(int(tdelta.total_seconds()))
+
+
+def strfdelta_seconds(remainder: int) -> str:
     possible_fields = ('W', 'D', 'H', 'M', 'S')
     constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}
     values = {}
@@ -819,7 +848,7 @@ def get_next_rpc_port(cluster_id):
     from simplyblock_core.db_controller import DBController
     db_controller = DBController()
 
-    port = 8080
+    port = constants.RPC_PORT_RANGE_START
     used_ports = []
     for node in db_controller.get_storage_nodes_by_cluster_id(cluster_id):
         if node.rpc_port > 0:
@@ -834,6 +863,22 @@ def get_next_rpc_port(cluster_id):
     return 0
 
 
+def get_next_fw_port(cluster_id):
+    from simplyblock_core.db_controller import DBController
+    db_controller = DBController()
+
+    port = constants.FW_PORT_START
+    used_ports = []
+    for node in db_controller.get_storage_nodes_by_cluster_id(cluster_id):
+        if node.firewall_port > 0:
+            used_ports.append(node.firewall_port)
+    next_port = port
+    while True:
+        if next_port not in used_ports:
+            return next_port
+        next_port += 1
+
+
 def get_next_dev_port(cluster_id):
     from simplyblock_core.db_controller import DBController
     db_controller = DBController()
@@ -1093,7 +1138,7 @@ def addNvmeDevices(rpc_client, snode, devs):
             serial_number = nvme_driver_data['ctrlr_data']['serial_number']
             if snode.id_device_by_nqn:
                 if "ns_data" in nvme_driver_data:
-                    serial_number = nvme_driver_data['pci_address'] + nvme_driver_data['ns_data']['id']
+                    serial_number = nvme_driver_data['pci_address'] + str(nvme_driver_data['ns_data']['id'])
                 else:
                     logger.error(f"No subsystem nqn found for device: {nvme_driver_data['pci_address']}")
 
@@ -1231,9 +1276,10 @@ def get_nvme_pci_devices():
         return [], []
 
 
-def detect_nvmes(pci_allowed, pci_blocked):
+def detect_nvmes(pci_allowed, pci_blocked, device_model, size_range, nvme_names):
     pci_addresses, blocked_devices = get_nvme_pci_devices()
     ssd_pci_set = set(pci_addresses)
+    claim_devices_to_nvme()
 
     # Normalize SSD PCI addresses and user PCI list
     if pci_allowed:
@@ -1245,10 +1291,20 @@ def detect_nvmes(pci_allowed, pci_blocked):
         # Check for unmatched addresses
         unmatched = user_pci_set - ssd_pci_set
         if unmatched:
-            logger.error(f"Invalid PCI addresses: {', '.join(unmatched)}")
-            return []
-
-        pci_addresses = list(user_pci_set)
+            logger.warn(f"Invalid PCI addresses: {', '.join(unmatched)}")
+            pci_addresses = user_pci_set & ssd_pci_set
+        else:
+            pci_addresses = list(user_pci_set)
+        for pci in pci_addresses:
+            pci_utils.ensure_driver(pci, 'nvme', override=True)
+        logger.debug(f"Found nvme devices are {pci_addresses}")
+    elif device_model and size_range:
+        pci_addresses = query_nvme_ssd_by_model_and_size(device_model, size_range)
+        logger.debug(f"Found nvme devices are {pci_addresses}")
+        pci_allowed = pci_addresses
+    elif nvme_names:
+        pci_addresses = query_nvme_ssd_by_namespace_names(nvme_names)
+        pci_allowed = pci_addresses
     elif pci_blocked:
         user_pci_set = set(
             addr if len(addr.split(":")[0]) == 4 else f"0000:{addr}"
@@ -1259,19 +1315,14 @@ def detect_nvmes(pci_allowed, pci_blocked):
 
     for pci in pci_addresses:
         pci_utils.ensure_driver(pci, 'nvme')
-
     nvme_base_path = '/sys/class/nvme/'
     nvme_devices = [dev for dev in os.listdir(nvme_base_path) if dev.startswith('nvme')]
     nvmes = {}
     for dev in nvme_devices:
-        dev_name = os.path.basename(dev)
-        pattern = re.compile(rf"^{re.escape(dev_name)}n\d+$")
-        if any(pattern.match(block_device) for block_device in blocked_devices):
-            logger.debug(f"device {dev_name} is busy.. skipping")
-            continue
-        device_symlink = os.path.join(nvme_base_path, dev)
         try:
-            pci_address = "unknown"
+            dev_name = os.path.basename(dev)
+            pattern = re.compile(rf"^{re.escape(dev_name)}n\d+$")
+            device_symlink = os.path.join(nvme_base_path, dev)
 
             # Resolve the real path to get the actual device path
             real_path = os.path.realpath(device_symlink)
@@ -1280,12 +1331,15 @@ def detect_nvmes(pci_allowed, pci_blocked):
             address_file = os.path.join(real_path, 'address')
             with open(address_file, 'r') as f:
                 pci_address = f.read().strip()
-
+            if any(pattern.match(block_device) for block_device in blocked_devices):
+                if pci_address not in pci_allowed:
+                    logger.debug(f"device {dev_name} is busy.. skipping")
+                    continue
+                logger.warning(f"PCI {pci_address} passed as allowed PCI, even it has partitions.. Formatting it now")
             # Read the NUMA node information
             numa_node_file = os.path.join(real_path, 'numa_node')
             with open(numa_node_file, 'r') as f:
                 numa_node = f.read().strip()
-
             if pci_address not in pci_addresses:
                 continue
             nvmes[dev_name] = {"pci_address": pci_address, "numa_node": numa_node}
@@ -1300,11 +1354,11 @@ def calculate_unisolated_cores(cores, cores_percentage=0):
     if cores_percentage:
         return math.ceil(total * (100 - cores_percentage) / 100)
     if total <= 10:
-        return 1
-    if total <= 20:
         return 2
-    if total <= 28:
+    if total <= 20:
         return 3
+    if total <= 28:
+        return 4
     return math.ceil(total * 0.15)
 
 
@@ -1312,6 +1366,103 @@ def get_core_indexes(core_to_index, list_of_cores):
     return [core_to_index[core] for core in list_of_cores if core in core_to_index]
 
 
+def build_unisolated_stride(
+        all_cores: List[int],
+        num_unisolated: int,
+        client_qpair_count: int,
+        pool_stride: int = 2,
+) -> List[int]:
+    """
+    Build a list of 'unisolated' CPUs by picking from per-qpair pools.
+
+    Pools are contiguous slices of all_cores:
+      total=30, q=3 -> [0..9], [10..19], [20..29]
+
+    Selection:
+      round-robin across pools, and within each pool advance by pool_stride
+      e.g. stride=2 -> 0,2,4,... then 10,12,14,... then 20,22,24,...
+
+    If hyper_thread=True, append sibling right after each core:
+      sibling = cpu +/- (total//2)
+    """
+    hyper_thread = is_hyperthreading_enabled_via_siblings()
+    if num_unisolated <= 0:
+        return []
+    if client_qpair_count <= 0:
+        raise ValueError("client_qpair_count must be > 0")
+    if pool_stride <= 0:
+        raise ValueError("pool_stride must be > 0")
+
+    cores = sorted(all_cores)
+    total = len(cores)
+    if total == 0:
+        return []
+
+    core_set = set(cores)
+
+    half: int = 0
+    if hyper_thread:
+        if total % 2 != 0:
+            raise ValueError(f"hyper_thread=True but total logical CPUs ({total}) is not even")
+        half = total // 2
+
+    # Build pools
+    pool_size = math.ceil(total / client_qpair_count)
+    pools = [cores[i * pool_size: min((i + 1) * pool_size, total)] for i in range(client_qpair_count)]
+    pools = [p for p in pools if p]  # drop empties
+
+    # Per-pool index (within each pool)
+    idx = [0] * len(pools)
+
+    out: List[int] = []
+    used = set()
+
+    def add_cpu(cpu: int) -> None:
+        if cpu in core_set and cpu not in used and len(out) < num_unisolated:
+            out.append(cpu)
+            used.add(cpu)
+
+    while len(out) < num_unisolated:
+        progress = False
+
+        for pi, pool in enumerate(pools):
+            if len(out) >= num_unisolated:
+                break
+
+            # find next candidate in this pool using stride
+            j = idx[pi]
+            while j < len(pool) and pool[j] in used:
+                j += pool_stride
+            if j >= len(pool):
+                continue
+
+            cpu = pool[j]
+            idx[pi] = j + pool_stride
+
+            add_cpu(cpu)
+            progress = True
+
+            if hyper_thread and len(out) < num_unisolated:
+                sib = cpu + half if cpu < half else cpu - half
+                add_cpu(sib)
+
+        if progress:
+            continue
+
+        # Fallback: fill any remaining from whatever is unused (should rarely happen)
+        for cpu in cores:
+            if len(out) >= num_unisolated:
+                break
+            if cpu not in used:
+                add_cpu(cpu)
+                if hyper_thread and len(out) < num_unisolated:
+                    sib = cpu + half if cpu < half else cpu - half
+                    add_cpu(sib)
+        break
+
+    return out[:num_unisolated]
+
+
 def generate_core_allocation(cores_by_numa, sockets_to_use, nodes_per_socket, cores_percentage=0):
     node_distribution: dict = {}
     # Iterate over each NUMA node
@@ -1319,20 +1470,8 @@ def generate_core_allocation(cores_by_numa, sockets_to_use, nodes_per_socket, co
         if numa_node not in cores_by_numa:
             continue
         all_cores = sorted(cores_by_numa[numa_node])
-        total_cores = len(all_cores)
         num_unisolated = calculate_unisolated_cores(all_cores, cores_percentage)
-
-        unisolated = []
-        half = total_cores // 2
-        for i in range(num_unisolated):
-            if i % 2 == 0:
-                index = i // 2
-            else:
-                index = (i - 1) // 2
-            if i % 2 == 0:
-                unisolated.append(all_cores[index])
-            else:
-                unisolated.append(all_cores[half + index])
+        unisolated = build_unisolated_stride(all_cores, num_unisolated, constants.CLIENT_QPAIR_COUNT)
 
         available_cores = [c for c in all_cores if c not in unisolated]
         q1 = len(available_cores) // 4
@@ -1420,14 +1559,17 @@ def regenerate_config(new_config, old_config, force=False):
                 "alceml_cpu_cores": get_core_indexes(core_to_index, distribution[3]),
                 "alceml_worker_cpu_cores": get_core_indexes(core_to_index, distribution[4]),
                 "distrib_cpu_cores": get_core_indexes(core_to_index, distribution[5]),
-                "jc_singleton_core": get_core_indexes(core_to_index, distribution[6])}
+                "jc_singleton_core": get_core_indexes(core_to_index, distribution[6]),
+                "lvol_poller_core": get_core_indexes(core_to_index, distribution[7])}
 
         isolated_cores = old_config["nodes"][i]["isolated"]
         number_of_distribs = 2
         number_of_distribs_cores = len(old_config["nodes"][i]["distribution"]["distrib_cpu_cores"])
         number_of_poller_cores = len(old_config["nodes"][i]["distribution"]["poller_cpu_cores"])
-        if number_of_distribs_cores > 2:
+        if 12 >= number_of_distribs_cores > 2:
             number_of_distribs = number_of_distribs_cores
+        else:
+            number_of_distribs = 12
         old_config["nodes"][i]["number_of_distribs"] = number_of_distribs
         old_config["nodes"][i]["ssd_pcis"] = new_config["nodes"][i]["ssd_pcis"]
         old_config["nodes"][i]["nic_ports"] = new_config["nodes"][i]["nic_ports"]
@@ -1457,7 +1599,7 @@ def regenerate_config(new_config, old_config, force=False):
     all_isolated_cores = set()
     for node in old_config["nodes"]:
         if len(node["ssd_pcis"]) == 0:
-            logger.error(f"There are not enough SSD devices on numa node {node['socket']}")
+            logger.error(f"There are no enough SSD devices on numa node {node['socket']}")
             return False
         total_required_memory += node["huge_page_memory"] + node["sys_memory"]
         node_cores_set = set(node["isolated"])
@@ -1471,7 +1613,7 @@ def regenerate_config(new_config, old_config, force=False):
 
 
 def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_allowed, pci_blocked,
-                     cores_percentage=0):
+                     cores_percentage=0, force=False, device_model="", size_range="", nvme_names=None):
     system_info = {}
     nodes_config: dict = {"nodes": []}
 
@@ -1479,7 +1621,25 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
     validate_sockets(sockets_to_use, cores_by_numa)
     logger.debug(f"Cores by numa {cores_by_numa}")
     nics = detect_nics()
-    nvmes = detect_nvmes(pci_allowed, pci_blocked)
+    nvmes = detect_nvmes(pci_allowed, pci_blocked, device_model, size_range, nvme_names)
+    if not nvmes:
+        logger.error(
+            "There are no enough SSD devices on system, you may run 'sbctl sn clean-devices', to clean devices stored in /etc/simplyblock/sn_config_file")
+        return False, False
+    if force:
+        nvme_devices = " ".join([f"/dev/{d}n1" for d in nvmes.keys()])
+        logger.warning(f"Formating Nvme devices {nvme_devices}")
+        answer = input("Type YES/Y to continue: ").strip().lower()
+        if answer not in ("yes", "y"):
+            logger.warning("Aborted by user.")
+            exit(1)
+        logger.info("OK, continuing formating...")
+        for nvme_device in nvmes.keys():
+            nvme_device_path = f"/dev/{nvme_device}n1"
+            clean_partitions(nvme_device_path)
+            nvme_json_string = get_idns(nvme_device_path)
+            lbaf_id = find_lbaf_id(nvme_json_string, 0, 12)
+            format_nvme_device(nvme_device_path, lbaf_id)
 
     for nid in sockets_to_use:
         if nid in cores_by_numa:
@@ -1497,7 +1657,7 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
 
     for nvme, val in nvmes.items():
         pci = val["pci_address"]
-        numa = val["numa_node"]
+        numa = int(val["numa_node"])
         pci_utils.unbind_driver(pci)
         if numa in sockets_to_use:
             system_info[numa]["nvmes"].append(pci)
@@ -1550,10 +1710,11 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
                     "jm_cpu_core": get_core_indexes(core_group["core_to_index"], core_group["distribution"][1]),
                     "poller_cpu_cores": get_core_indexes(core_group["core_to_index"], core_group["distribution"][2]),
                     "alceml_cpu_cores": get_core_indexes(core_group["core_to_index"], core_group["distribution"][3]),
-                    "alceml_worker_cpu_cores": get_core_indexes(core_group["core_to_index"],
-                                                                core_group["distribution"][4]),
+                    # "alceml_worker_cpu_cores": get_core_indexes(core_group["core_to_index"],
+                    #                                            core_group["distribution"][4]),
                     "distrib_cpu_cores": get_core_indexes(core_group["core_to_index"], core_group["distribution"][5]),
-                    "jc_singleton_core": get_core_indexes(core_group["core_to_index"], core_group["distribution"][6])
+                    "jc_singleton_core": get_core_indexes(core_group["core_to_index"], core_group["distribution"][6]),
+                    "lvol_poller_core": get_core_indexes(core_group["core_to_index"], core_group["distribution"][7])
                 },
                 "ssd_pcis": [],
                 "nic_ports": system_info[nid]["nics"]
@@ -1583,7 +1744,7 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
             node_info["large_pool_count"] = large_pool_count
             node_info["max_lvol"] = max_lvol
             node_info["max_size"] = max_prov
-            node_info["huge_page_memory"] = minimum_hp_memory
+            node_info["huge_page_memory"] = max(minimum_hp_memory, max_prov)
             minimum_sys_memory = calculate_minimum_sys_memory(max_prov)
             node_info["sys_memory"] = minimum_sys_memory
             all_nodes.append(node_info)
@@ -1596,7 +1757,7 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
     all_isolated_cores = set()
     for node in all_nodes:
         if len(node["ssd_pcis"]) == 0:
-            logger.error(f"There are not enough SSD devices on numa node {node['socket']}")
+            logger.error(f"There are no enough SSD devices on numa node {node['socket']}")
             return False, False
         total_required_memory += node["huge_page_memory"] + node["sys_memory"]
         node_cores_set = set(node["isolated"])
@@ -1611,6 +1772,29 @@ def generate_configs(max_lvol, max_prov, sockets_to_use, nodes_per_socket, pci_a
     return final_config, system_info
 
 
+def get_nvme_name_from_pci(pci_address):
+    # Search for the PCI address in the sysfs tree for NVMe devices
+    path = f"/sys/bus/pci/devices/{pci_address}/nvme/nvme*"
+    matches = glob.glob(path)
+
+    if matches:
+        # returns 'nvme0'
+        return os.path.basename(matches[0])
+    return None
+
+
+def format_device_with_4k(pci_device):
+    try:
+        nvme_device = get_nvme_name_from_pci(pci_device)
+        nvme_device_path = f"/dev/{nvme_device}n1"
+        clean_partitions(nvme_device_path)
+        nvme_json_string = get_idns(nvme_device_path)
+        lbaf_id = find_lbaf_id(nvme_json_string, 0, 12)
+        format_nvme_device(nvme_device_path, lbaf_id)
+    except Exception as e:
+        logger.error(f"Failed to format device with 4K {e}")
+
+
 def set_hugepages_if_needed(node, hugepages_needed, page_size_kb=2048):
     """Set hugepages for a specific NUMA node if current number is less than needed."""
     hugepage_path = f"/sys/devices/system/node/node{node}/hugepages/hugepages-{page_size_kb}kB/nr_hugepages"
@@ -1657,8 +1841,7 @@ def validate_node_config(node):
 
     required_distribution_fields = [
         "app_thread_core", "jm_cpu_core", "poller_cpu_cores",
-        "alceml_cpu_cores", "alceml_worker_cpu_cores",
-        "distrib_cpu_cores", "jc_singleton_core"
+        "alceml_cpu_cores", "distrib_cpu_cores", "jc_singleton_core"
     ]
 
     # Check top-level fields
@@ -1928,6 +2111,258 @@ def load_kube_config_with_fallback():
         config.load_kube_config()
 
 
+def patch_cr_status(
+        *,
+        group: str,
+        version: str,
+        plural: str,
+        namespace: str,
+        name: str,
+        status_patch: dict,
+):
+    """
+    Patch the status subresource of a Custom Resource.
+
+    status_patch example:
+        {"<KEY>": "<VALUE", "<KEY>": <VALUE>}
+    """
+
+    load_kube_config_with_fallback()
+
+    api = client.CustomObjectsApi()
+
+    body = {
+        "status": status_patch
+    }
+
+    try:
+        api.patch_namespaced_custom_object_status(
+            group=group,
+            version=version,
+            namespace=namespace,
+            plural=plural,
+            name=name,
+            body=body,
+        )
+    except ApiException as e:
+        logger.error(
+            f"Failed to patch status for {name}: {e.reason} {e.body}"
+        )
+
+
+def patch_cr_node_status(
+        *,
+        group: str,
+        version: str,
+        plural: str,
+        namespace: str,
+        name: str,
+        node_uuid: str,
+        node_mgmt_ip: str,
+        updates: Optional[Dict[str, Any]] = None,
+        remove: bool = False,
+):
+    """
+    Patch status.nodes[*] fields for a specific node identified by UUID.
+
+    Operations:
+      - Update a node (by uuid or mgmtIp)
+      - Remove a node (by uuid or mgmtIp)
+
+    updates example:
+        {"health": "true"}
+        {"status": "offline"}
+        {"capacity": {"sizeUsed": 1234}}
+    """
+    load_kube_config_with_fallback()
+    api = client.CustomObjectsApi()
+
+    try:
+        cr = api.get_namespaced_custom_object(
+            group=group,
+            version=version,
+            namespace=namespace,
+            plural=plural,
+            name=name,
+        )
+
+        status_nodes = cr.get("status", {}).get("nodes", [])
+        if not status_nodes:
+            raise RuntimeError("CR has no status.nodes")
+
+        spec_worker_nodes = cr.get("spec", {}).get("workerNodes", [])
+
+        found = False
+        new_status_nodes = []
+        removed_hostname = None
+
+        for node in status_nodes:
+            match = (
+                    node.get("uuid") == node_uuid or
+                    node.get("mgmtIp") == node_mgmt_ip
+            )
+
+            if match:
+                found = True
+                removed_hostname = node.get("hostname")
+
+                if remove:
+                    continue
+
+                if updates:
+                    node.update(updates)
+
+            new_status_nodes.append(node)
+
+        if not found:
+            raise RuntimeError(
+                f"Node not found (uuid={node_uuid}, mgmtIp={node_mgmt_ip})"
+            )
+
+        if remove and removed_hostname:
+            new_worker_nodes = [
+                n for n in spec_worker_nodes if n != removed_hostname
+            ]
+
+            api.patch_namespaced_custom_object(
+                group=group,
+                version=version,
+                namespace=namespace,
+                plural=plural,
+                name=name,
+                body={
+                    "spec": {
+                        "workerNodes": new_worker_nodes
+                    }
+                },
+            )
+
+        api.patch_namespaced_custom_object_status(
+            group=group,
+            version=version,
+            namespace=namespace,
+            plural=plural,
+            name=name,
+            body={
+                "status": {
+                    "nodes": new_status_nodes
+                }
+            },
+        )
+
+    except ApiException as e:
+        logger.error(
+            f"Failed to patch node for {name}: {e.reason} {e.body}"
+        )
+
+
+def patch_cr_lvol_status(
+        *,
+        group: str,
+        version: str,
+        plural: str,
+        namespace: str,
+        name: str,
+        lvol_uuid: Optional[str] = None,
+        updates: Optional[Dict[str, Any]] = None,
+        remove: bool = False,
+        add: Optional[Dict[str, Any]] = None,
+):
+    """
+    Patch status.lvols[*] for an LVOL CustomResource.
+
+    Operations:
+      - Update an existing LVOL (by uuid)
+      - Remove an LVOL (by uuid)
+      - Add a new LVOL entry
+
+    Parameters:
+      lvol_uuid:
+        UUID of the lvol entry to update or remove
+
+      updates:
+        Dict of fields to update on the matched lvol
+        Example:
+          {"status": "offline", "health": False}
+
+      remove:
+        If True, remove the lvol identified by lvol_uuid
+
+      add:
+        Full lvol dict to append to status.lvols
+    """
+
+    load_kube_config_with_fallback()
+    api = client.CustomObjectsApi()
+
+    now = datetime.now(timezone.utc).isoformat()
+
+    try:
+        cr = api.get_namespaced_custom_object(
+            group=group,
+            version=version,
+            namespace=namespace,
+            plural=plural,
+            name=name,
+        )
+
+        status = cr.get("status", {})
+        lvols = status.get("lvols", [])
+
+        # Ensure list exists
+        if lvols is None:
+            lvols = []
+
+        # ---- ADD ----
+        if add is not None:
+            add.setdefault("createDt", now)
+            add["updateDt"] = now
+            lvols.append(add)
+
+        # ---- UPDATE / REMOVE ----
+        if lvol_uuid:
+            found = False
+            new_lvols = []
+
+            for lvol in lvols:
+                if lvol.get("uuid") == lvol_uuid:
+                    found = True
+
+                    if remove:
+                        continue
+
+                    if updates:
+                        lvol.update(updates)
+                        lvol["updateDt"] = now
+
+                new_lvols.append(lvol)
+
+            if not found:
+                raise RuntimeError(f"LVOL not found (uuid={lvol_uuid})")
+
+            lvols = new_lvols
+
+        body = {
+            "status": {
+                "lvols": lvols
+            }
+        }
+
+        api.patch_namespaced_custom_object_status(
+            group=group,
+            version=version,
+            namespace=namespace,
+            plural=plural,
+            name=name,
+            body=body,
+        )
+
+    except ApiException as e:
+        logger.error(
+            f"Failed to patch lvol status for {name}: {e.reason} {e.body}"
+        )
+
+
 def get_node_name_by_ip(target_ip: str) -> str:
     load_kube_config_with_fallback()
     v1 = client.CoreV1Api()
@@ -2031,17 +2466,438 @@ def patch_prometheus_configmap(username: str, password: str):
     load_kube_config_with_fallback()
     v1 = client.CoreV1Api()
 
-    cm = v1.read_namespaced_config_map(name="sbcli-simplyblock-prometheus-config", namespace=constants.K8S_NAMESPACE)
-    prometheus_yml = cm.data.get("prometheus.yml", "")
+    try:
+        cm = v1.read_namespaced_config_map(
+            name="sbcli-simplyblock-prometheus-config",
+            namespace=constants.K8S_NAMESPACE
+        )
+    except client.exceptions.ApiException as e:
+        logger.error(f"Failed to read ConfigMap: {e}")
+        return False
+
+    try:
+        prometheus_yml = cm.data.get("prometheus.yml", "")
+        if not prometheus_yml:
+            logger.error("prometheus.yml key not found in ConfigMap.")
+            return False
 
-    prometheus_yml = re.sub(r"username:*", f"username: '{username}'", prometheus_yml)
-    prometheus_yml = re.sub(r"password:*", f"password: '{password}'", prometheus_yml)
+        try:
+            prometheus_yml = re.sub(r"username:.*", f"username: '{username}'", prometheus_yml)
+            prometheus_yml = re.sub(r"password:.*", f"password: '{password}'", prometheus_yml)
+        except re.error as e:
+            logger.error(f"Regex error while patching Prometheus YAML: {e}")
+            return False
 
-    patch_body = {
-        "data": {
-            "prometheus.yml": prometheus_yml
+        patch_body = {
+            "data": {
+                "prometheus.yml": prometheus_yml
+            }
         }
-    }
 
-    v1.patch_namespaced_config_map(name="sbcli-simplyblock-prometheus-config", namespace=constants.K8S_NAMESPACE, body=patch_body)
-    logger.info("Patched sbcli-simplyblock-prometheus-config ConfigMap with new credentials.")
+        v1.patch_namespaced_config_map(
+            name="sbcli-simplyblock-prometheus-config",
+            namespace=constants.K8S_NAMESPACE,
+            body=patch_body
+        )
+
+        logger.info("Patched sbcli-simplyblock-prometheus-config ConfigMap with new credentials.")
+        return True
+
+    except client.exceptions.ApiException as e:
+        logger.error(f"Failed to patch ConfigMap: {e}")
+        return False
+
+    except Exception as e:
+        logger.error(f"Unexpected error while patching ConfigMap: {e}")
+        return False
+
+
+def create_docker_service(cluster_docker: DockerClient, service_name: str, service_file: str, service_image: str):
+    logger.info(f"Creating service: {service_name}")
+    cluster_docker.services.create(
+        image=service_image,
+        command=service_file,
+        name=service_name,
+        mounts=["/etc/foundationdb:/etc/foundationdb"],
+        env=["SIMPLYBLOCK_LOG_LEVEL=DEBUG"],
+        networks=["host"],
+        constraints=["node.role == manager"],
+        labels={
+            "com.docker.stack.image": service_image,
+            "com.docker.stack.namespace": "app"}
+    )
+
+
+def create_k8s_service(namespace: str, deployment_name: str,
+                       container_name: str, service_file: str, container_image: str):
+    logger.info(f"Creating deployment: {deployment_name} in namespace {namespace}")
+    load_kube_config_with_fallback()
+    apps_v1 = client.AppsV1Api()
+
+    env_list = [
+        V1EnvVar(
+            name="SIMPLYBLOCK_LOG_LEVEL",
+            value_from={"config_map_key_ref": {"name": "simplyblock-config", "key": "LOG_LEVEL"}}
+        )
+    ]
+
+    volume_mounts = [
+        V1VolumeMount(
+            name="fdb-cluster-file",
+            mount_path="/etc/foundationdb/fdb.cluster",
+            sub_path="fdb.cluster"
+        )
+    ]
+
+    volumes = [
+        V1Volume(
+            name="fdb-cluster-file",
+            config_map=V1ConfigMapVolumeSource(
+                name="simplyblock-fdb-cluster-config",
+                items=[{"key": "cluster-file", "path": "fdb.cluster"}]
+            )
+        )
+    ]
+
+    container = V1Container(
+        name=container_name,
+        image=container_image,
+        command=["python", service_file],
+        env=env_list,
+        volume_mounts=volume_mounts,
+        resources=V1ResourceRequirements(
+            requests={"cpu": "200m", "memory": "256Mi"},
+            limits={"cpu": "400m", "memory": "1Gi"}
+        )
+    )
+
+    pod_spec = V1PodSpec(
+        containers=[container],
+        volumes=volumes,
+        host_network=True,
+        dns_policy="ClusterFirstWithHostNet"
+    )
+
+    pod_template = V1PodTemplateSpec(
+        metadata=V1ObjectMeta(labels={"app": deployment_name}),
+        spec=pod_spec
+    )
+
+    deployment_spec = V1DeploymentSpec(
+        replicas=1,
+        selector=V1LabelSelector(match_labels={"app": deployment_name}),
+        template=pod_template
+    )
+
+    deployment = V1Deployment(
+        api_version="apps/v1",
+        kind="Deployment",
+        metadata=V1ObjectMeta(name=deployment_name, namespace=namespace),
+        spec=deployment_spec
+    )
+
+    apps_v1.create_namespaced_deployment(namespace=namespace, body=deployment)
+    logger.info(f"Deployment {deployment_name} created successfully.")
+
+
+def clean_partitions(nvme_device: str):
+    command = ['wipefs', '-a', nvme_device]
+    print(" ".join(command))
+    try:
+        result = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            check=True  # Raise a CalledProcessError if the exit code is non-zero
+        )
+        return result.stdout
+
+    except subprocess.CalledProcessError as e:
+        # Handle errors (e.g., nvme not found, permission denied, or other command failures)
+        return (f"Error executing command: {' '.join(command)}\n"
+                f"Return Code: {e.returncode}\n"
+                f"Standard Error:\n{e.stderr}")
+    except FileNotFoundError:
+        return "Error: The 'nvme' command was not found. Is 'nvme-cli' installed?"
+
+
+def find_lbaf_id(json_data: str, target_ms: int, target_ds: int) -> int:
+    try:
+        data = json.loads(json_data)
+    except json.JSONDecodeError:
+        print("Error: Invalid JSON format provided.")
+        return 0
+
+    lbafs_list: List[Dict[str, int]] = data.get('lbafs', [])
+
+    # LBAF IDs are 1-based, so we use enumerate starting from 1
+    for index, lbaf in enumerate(lbafs_list, start=0):
+        if lbaf.get('ms') == target_ms and lbaf.get('ds') == target_ds:
+            return index
+
+    return 0
+
+
+def get_idns(nvme_device: str):
+    command = ['nvme', 'id-ns', nvme_device, '--output-format', 'json']
+    try:
+        # Run the command
+        # capture_output=True captures stdout and stderr.
+        # text=True decodes the output as text (using default encoding, typically UTF-8).
+        result = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            check=True  # Raise a CalledProcessError if the exit code is non-zero
+        )
+
+        # Return the captured standard output
+        return result.stdout
+
+    except subprocess.CalledProcessError as e:
+        # Handle errors (e.g., nvme not found, permission denied, or other command failures)
+        return (f"Error executing command: {' '.join(command)}\n"
+                f"Return Code: {e.returncode}\n"
+                f"Standard Error:\n{e.stderr}")
+    except FileNotFoundError:
+        return "Error: The 'nvme' command was not found. Is 'nvme-cli' installed?"
+
+
+def is_namespace_4k_from_nvme_list(device_path: str) -> bool:
+    """
+    Returns True if nvme list JSON shows SectorSize == 4096 for the given DevicePath
+    (e.g. '/dev/nvme3n1').
+    """
+    try:
+        out = subprocess.check_output(["nvme", "list", "--output-format", "json"], text=True)
+        data = json.loads(out)
+
+        for dev in data.get("Devices", []):
+            if dev.get("DevicePath") == device_path:
+                return int(dev.get("SectorSize", 0)) == 4096
+
+        # Not found in list
+        return False
+
+    except subprocess.CalledProcessError:
+        print("Error: nvme list failed")
+        return False
+    except (ValueError, json.JSONDecodeError) as e:
+        print(f"Error parsing nvme list output: {e}")
+        return False
+
+
+def format_nvme_device(nvme_device: str, lbaf_id: int):
+    if is_namespace_4k_from_nvme_list(nvme_device):
+        logger.debug(f"Device {nvme_device} already formatted with 4K...skipping")
+        return
+    command = ['nvme', 'format', nvme_device, f"--lbaf={lbaf_id}", '--force']
+    print(" ".join(command))
+    try:
+        result = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            check=True  # Raise a CalledProcessError if the exit code is non-zero
+        )
+
+        return result.stdout
+
+    except subprocess.CalledProcessError as e:
+        # Handle errors (e.g., nvme not found, permission denied, or other command failures)
+        return (f"Error executing command: {' '.join(command)}\n"
+                f"Return Code: {e.returncode}\n"
+                f"Standard Error:\n{e.stderr}")
+    except FileNotFoundError:
+        return "Error: The 'nvme' command was not found. Is 'nvme-cli' installed?"
+
+
+def get_nvme_list_verbose() -> str:
+    """
+    Executes the 'nvme list -v' command and returns the output.
+
+    Returns:
+        str: The standard output of the command, or an error message
+             if the command fails.
+    """
+    command = ['nvme', 'list', '-v', '--output-format', 'json']
+
+    try:
+        # Run the command
+        # capture_output=True captures stdout and stderr.
+        # text=True decodes the output as text (using default encoding, typically UTF-8).
+        result = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            check=True  # Raise a CalledProcessError if the exit code is non-zero
+        )
+
+        # Return the captured standard output
+        return result.stdout
+
+    except subprocess.CalledProcessError as e:
+        # Handle errors (e.g., nvme not found, permission denied, or other command failures)
+        return (f"Error executing command: {' '.join(command)}\n"
+                f"Return Code: {e.returncode}\n"
+                f"Standard Error:\n{e.stderr}")
+    except FileNotFoundError:
+        return "Error: The 'nvme' command was not found. Is 'nvme-cli' installed?"
+
+
+def query_nvme_ssd_by_model_and_size(model: str, size_range: str) -> list:
+    if not model:
+        print("No model specified.")
+        return []
+    if not size_range:
+        print("No size range specified.")
+        return []
+
+    size_from = 0
+    size_to = 0
+    try:
+        range_split = size_range.split('-')
+        if len(range_split) == 1:
+            size_from = parse_size(range_split[0])
+        elif len(range_split) == 2:
+            size_from = parse_size(range_split[0])
+            size_to = parse_size(range_split[1])
+        else:
+            raise ValueError("Invalid size range")
+    except Exception as e:
+        print(e)
+        return []
+
+    json_string = get_nvme_list_verbose()
+    data = json.loads(json_string)
+
+    pci_lst = []
+    for device_entry in data.get('Devices', []):
+        for subsystem in device_entry.get('Subsystems', []):
+            for controller in subsystem.get('Controllers', []):
+                model_number = controller.get("ModelNumber")
+                if model_number != model:
+                    continue
+                address = controller.get("Address")
+                if len(controller.get("Namespaces")) > 0:
+                    size = controller.get("Namespaces")[0].get("PhysicalSize")
+                    if size > size_from:
+                        if size_to > 0 and size < size_to:
+                            pci_lst.append(address)
+    return pci_lst
+
+
+def query_nvme_ssd_by_namespace_names(nvme_names: Iterable[str]) -> List[str]:
+    """
+    Match NVMe devices by namespace names (e.g. nvme0n1, nvme1n1) using nvme list -v JSON output.
+    Returns a de-duplicated list of PCI addresses (e.g. 0000:00:03.0).
+    """
+    nvme_names = list(nvme_names or [])
+    if not nvme_names:
+        print("No NVMe device names specified.")
+        return []
+
+    wanted = set(nvme_names)
+
+    json_string = get_nvme_list_verbose()  # should return the JSON string shown in your example
+    data = json.loads(json_string)
+
+    out: List[str] = []
+    seen = set()
+
+    for dev in data.get("Devices", []):
+        for subsys in dev.get("Subsystems", []):
+            for ctrl in subsys.get("Controllers", []):
+                addr = ctrl.get("Address")
+                for ns in ctrl.get("Namespaces", []) or []:
+                    ns_name = ns.get("NameSpace")  # <-- exact key in your JSON
+                    if ns_name in wanted and addr and addr not in seen:
+                        seen.add(addr)
+                        out.append(addr)
+                        break
+
+    return out
+
+
+def claim_devices_to_nvme(config_path=""):
+    config_path = config_path or constants.NODES_CONFIG_FILE
+    nvme_devices_list = []
+    try:
+        with open(config_path) as f:
+            cfg = json.load(f)
+        nvme_devices_list = [
+            pci
+            for node in cfg.get("nodes", [])
+            for pci in node.get("ssd_pcis", [])
+        ]
+        for pci in nvme_devices_list:
+            pci_utils.ensure_driver(pci, 'nvme')
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+    return nvme_devices_list
+
+
+def clean_devices(config_path, format, force):
+    nvme_devices_list = claim_devices_to_nvme(config_path)
+    try:
+        json_string = get_nvme_list_verbose()
+        data = json.loads(json_string)
+        controllers_list = []
+
+        # The structure is Devices[0] -> Subsystems[] -> Controllers[]
+        nvme_devices = ""
+        for device_entry in data.get('Devices', []):
+            for subsystem in device_entry.get('Subsystems', []):
+                for controller in subsystem.get('Controllers', []):
+                    # 3. Pull out the desired fields
+                    if len(controller.get("Namespaces")) > 0 and controller.get("Address") in nvme_devices_list:
+                        controllers_list.append({
+                            "NVMe_Controller": controller.get("Controller"),
+                            "PCI_Address": controller.get("Address"),
+                            "NAMESPACE": controller.get("Namespaces")[0].get("NameSpace")
+                        })
+                        nvme_devices += f"/dev/{controller.get('Namespaces')[0].get('NameSpace')} "
+        if format:
+            logger.warning(f"Formating Nvme devices {nvme_devices}")
+            if not force:
+                answer = input("Type YES/Y to continue: ").strip().lower()
+                if answer not in ("yes", "y"):
+                    logger.warning("Aborted by user.")
+                    exit(1)
+
+            for mapping in controllers_list:
+                if mapping['PCI_Address'] in nvme_devices_list:
+                    nvme_device_path = f"/dev/{mapping['NAMESPACE']}"
+                    clean_partitions(nvme_device_path)
+
+    except json.JSONDecodeError as e:
+        logger.error(f"Error decoding JSON: {e}")
+
+
+def create_rpc_socket_mount():
+    try:
+
+        logger.info("create RPC socket mount")
+        mount_point = "/mnt/ramdisk"
+        size = "1G"
+        fstab_entry = f"tmpfs {mount_point} tmpfs size={size},mode=1777,noatime 0 0\n"
+
+        # Create the mount point if it doesn't exist
+        os.makedirs(mount_point, exist_ok=True)
+
+        # Add to /etc/fstab if not already present
+        with open("/etc/fstab", "r+") as fstab:
+            lines = fstab.readlines()
+            if not any(mount_point in line for line in lines):
+                fstab.write(fstab_entry)
+                print(f"Added fstab entry for {mount_point}")
+            else:
+                print(f"fstab entry for {mount_point} already exists")
+
+        # Mount the RAM disk immediately
+        subprocess.run(["mount", mount_point], check=True)
+
+        # Verify
+        subprocess.run(["df", "-h", mount_point])
+    except Exception as e:
+        logger.error(e)
diff --git a/simplyblock_web/api/internal/storage_node/docker.py b/simplyblock_web/api/internal/storage_node/docker.py
index 8e18fc276..cfaf79e15 100644
--- a/simplyblock_web/api/internal/storage_node/docker.py
+++ b/simplyblock_web/api/internal/storage_node/docker.py
@@ -4,7 +4,6 @@
 import math
 import os
 from pathlib import Path
-import subprocess
 import time
 from typing import List, Optional, Union
 
@@ -19,6 +18,7 @@
 
 from simplyblock_core import scripts, constants, shell_utils, utils as core_utils
 import simplyblock_core.utils.pci as pci_utils
+import simplyblock_core.utils as init_utils
 from simplyblock_web import utils, node_utils
 
 logger = core_utils.get_logger(__name__)
@@ -129,7 +129,7 @@ def scan_devices():
 
 class SPDKParams(BaseModel):
     server_ip: str = Field(pattern=utils.IP_PATTERN)
-    rpc_port: int = Field(constants.RPC_HTTP_PROXY_PORT, ge=1, le=65536)
+    rpc_port: int = Field(constants.RPC_PORT_RANGE_START, ge=1, le=65536)
     rpc_username: str
     rpc_password: str
     ssd_pcie: Optional[List[str]] = Field(None)
@@ -142,6 +142,9 @@ class SPDKParams(BaseModel):
     spdk_image: Optional[str] = Field(constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE)
     cluster_ip: Optional[str] = Field(default=None, pattern=utils.IP_PATTERN)
     cluster_mode: str
+    socket: Optional[int] = Field(None, ge=0)
+    cluster_id: str
+    firewall_port: int = Field(constants.FW_PORT_START)
 
 
 @api.post('/spdk_process_start', responses={
@@ -154,7 +157,8 @@ def spdk_process_start(body: SPDKParams):
     ssd_pcie_list = " ".join(body.ssd_pcie) if body.ssd_pcie else "none"
     spdk_debug = '1' if body.spdk_debug else ''
     total_mem_mib = core_utils.convert_size(core_utils.parse_size(body.total_mem), 'MiB') if body.total_mem else ''
-    spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB')
+    # spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB')
+    spdk_mem_mib = 0
 
     node_docker = get_docker_client(timeout=60 * 3)
     for name in {f"/spdk_{body.rpc_port}", f"/spdk_proxy_{body.rpc_port}"}:
@@ -180,24 +184,29 @@ def spdk_process_start(body: SPDKParams):
             f'/tmp/shm_{body.rpc_port}/:/dev/shm/',
             '/lib/modules/:/lib/modules/',
             '/var/lib/systemd/coredump/:/var/lib/systemd/coredump/',
-            '/sys:/sys'],
+            '/sys:/sys',
+            '/mnt/ramdisk:/mnt/ramdisk',
+        ],
         environment=[
             f"RPC_PORT={body.rpc_port}",
             f"ssd_pcie={ssd_pcie_params}",
             f"PCI_ALLOWED={ssd_pcie_list}",
             f"TOTAL_HP={total_mem_mib}",
+            f"NSOCKET={body.socket}",
+            f"FW_PORT={body.firewall_port}",
         ]
         # restart_policy={"Name": "on-failure", "MaximumRetryCount": 99}
     )
     node_docker.containers.run(
         constants.SIMPLY_BLOCK_DOCKER_IMAGE,
-        "python simplyblock_core/services/spdk_http_proxy_server.py",
+        "python simplyblock_core/services/spdk_http_proxy_server.py ",
         name=f"spdk_proxy_{body.rpc_port}",
         detach=True,
         network_mode="host",
         log_config=log_config,
         volumes=[
             f'/var/tmp/spdk_{body.rpc_port}:/var/tmp',
+            '/mnt/ramdisk:/mnt/ramdisk',
         ],
         environment=[
             f"SERVER_IP={body.server_ip}",
@@ -508,8 +517,10 @@ def bind_device_to_nvme(body: utils.DeviceParams):
 def delete_gpt_partitions_for_dev(body: utils.DeviceParams):
     bind_device_to_nvme(body)
     device_name = pci_utils.nvme_device_name(body.device_pci)
-    subprocess.check_call(['parted', '-fs', f'/dev/{device_name}', 'mklabel' 'gpt'])
-    return utils.get_response(True)
+    cmd = f"parted -fs /dev/{device_name} mklabel gpt"
+    out, err, ret_code = shell_utils.run_command(cmd)
+    logger.info(f"out: {out}, err: {err}, ret_code: {ret_code}")
+    return utils.get_response(ret_code==0, error=err)
 
 
 CPU_INFO = cpuinfo.get_cpu_info()
@@ -528,6 +539,13 @@ def delete_gpt_partitions_for_dev(body: utils.DeviceParams):
         SYSTEM_ID = CLOUD_INFO["id"]
 
 
+@api.post('/format_device_with_4k')
+def format_device_with_4k(body: utils.DeviceParams):
+    pci_utils.ensure_driver(body.device_pci, 'nvme')
+    init_utils.format_device_with_4k(body.device_pci)
+    return utils.get_response(True)
+
+
 @api.post('/bind_device_to_spdk')
 def bind_device_to_spdk(body: utils.DeviceParams):
     device_path = pci_utils.device(body.device_pci)
@@ -697,3 +715,44 @@ def ifc_is_tcp(query: NicQuery):
 })
 def is_alive():
     return utils.get_response(True)
+
+
+@api.post('/nvme_connect',
+    summary='Connect NVMe-oF target',
+    responses={
+        200: {'content': {'application/json': {'schema': utils.response_schema({
+            'type': 'boolean',
+        })}},
+    },
+})
+def connect_to_nvme(body: utils.NVMEConnectParams):
+    """Connect to the indicated NVMe-oF target.
+    """
+    st = f"nvme connect --transport=tcp --traddr={body.ip} --trsvcid={body.port} --nqn={body.nqn}"
+    logger.debug(st)
+    out, err, ret_code = shell_utils.run_command(st)
+    logger.debug(ret_code)
+    logger.debug(out)
+    logger.debug(err)
+    if ret_code == 0:
+        return utils.get_response(True)
+    else:
+        return utils.get_response(ret_code, error=err)
+
+
+@api.post('/disconnect_nqn',
+    summary='Disconnect NVMe-oF device by NQN',
+    responses={
+    200: {'content': {'application/json': {'schema': utils.response_schema({
+        'type': 'integer',
+    })}}},
+})
+def disconnect_nqn(body: utils.DisconnectParams):
+    """Disconnect from indicated NVMe-oF target
+    """
+    st = f"nvme disconnect --nqn={body.nqn}"
+    out, err, ret_code = shell_utils.run_command(st)
+    logger.debug(ret_code)
+    logger.debug(out)
+    logger.debug(err)
+    return utils.get_response(ret_code)
diff --git a/simplyblock_web/api/internal/storage_node/kubernetes.py b/simplyblock_web/api/internal/storage_node/kubernetes.py
index be3193138..65ad28a3b 100644
--- a/simplyblock_web/api/internal/storage_node/kubernetes.py
+++ b/simplyblock_web/api/internal/storage_node/kubernetes.py
@@ -268,6 +268,9 @@ class SPDKParams(BaseModel):
     spdk_image: str = Field(constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE)
     cluster_ip: str = Field(pattern=utils.IP_PATTERN)
     cluster_mode: str
+    socket: Optional[int] = Field(None, ge=0)
+    firewall_port: Optional[int] = Field(constants.FW_PORT_START)
+    cluster_id: str
 
 
 @api.post('/spdk_process_start', responses={
@@ -286,9 +289,10 @@ def spdk_process_start(body: SPDKParams):
 
     total_mem_mib = core_utils.convert_size(core_utils.parse_size(body.total_mem), 'MB') if body.total_mem else ""
 
-    if _is_pod_up(body.rpc_port) or _is_pod_present(body.rpc_port):
+    first_six_cluster_id = core_utils.first_six_chars(body.cluster_id)
+    if _is_pod_up(body.rpc_port, first_six_cluster_id) or _is_pod_present(body.rpc_port, first_six_cluster_id):
         logger.info("SPDK pod found, removing...")
-        query = utils.RPCPortParams(rpc_port=body.rpc_port)
+        query = utils.RPCPortParams(rpc_port=body.rpc_port, cluster_id=body.cluster_id)
         spdk_process_kill(query)
 
     node_prepration_job_name = "snode-spdk-job-"
@@ -336,8 +340,8 @@ def spdk_process_start(body: SPDKParams):
             "L_CORES": body.l_cores,
             "CORES": core_utils.get_total_cpu_cores(body.l_cores),
             'SPDK_MEM': core_utils.convert_size(body.spdk_mem, 'MiB'),
-            'MEM_GEGA': core_utils.convert_size(body.spdk_mem, 'GiB', round_up=True),
-            'MEM2_GEGA': core_utils.convert_size(body.system_mem, 'GiB', round_up=True),
+            'MEM_MEGA': (core_utils.convert_size(body.spdk_mem, 'MiB', round_up=True) // 2) * 2 + 512,
+            'MEM2_MEGA': (core_utils.convert_size(body.system_mem, 'MiB', round_up=True) // 2) * 2,
             'SERVER_IP': body.server_ip,
             'RPC_PORT': body.rpc_port,
             'RPC_USERNAME': body.rpc_username,
@@ -351,9 +355,12 @@ def spdk_process_start(body: SPDKParams):
             'SIMPLYBLOCK_DOCKER_IMAGE': constants.SIMPLY_BLOCK_DOCKER_IMAGE,
             'GRAYLOG_SERVER_IP': body.cluster_ip,
             'MODE': body.cluster_mode,
+            'CLUSTER_ID': first_six_cluster_id,
             'SSD_PCIE': ssd_pcie_params,
             'PCI_ALLOWED': ssd_pcie_list,
-            'TOTAL_HP': total_mem_mib
+            'TOTAL_HP': total_mem_mib,
+            'NSOCKET': body.socket,
+            'FW_PORT': body.firewall_port
         }
 
         if ubuntu_host:
@@ -420,9 +427,35 @@ def spdk_process_start(body: SPDKParams):
             logger.info(f"Job deleted: '{core_resp.metadata.name}' in namespace '{namespace}")
 
         elif core_isolate and openshift:
+            batch_v1 = core_utils.get_k8s_batch_client()
+            try:
+                batch_v1.read_namespaced_job(
+                    name=node_prepration_core_name,
+                    namespace=namespace
+                )
+                logger.info(f"Existing Job '{node_prepration_core_name}' found — deleting it first...")
+
+                batch_v1.delete_namespaced_job(
+                    name=node_prepration_core_name,
+                    namespace=namespace,
+                    body=V1DeleteOptions(
+                        propagation_policy='Foreground',
+                        grace_period_seconds=0
+                    )
+                )
+
+                node_utils_k8s.wait_for_job_deletion(node_prepration_core_name, namespace)
+
+                logger.info(f"Old Job '{node_prepration_core_name}' fully deleted.")
+
+            except ApiException as e:
+                if e.status == 404:
+                    logger.info(f"No pre-existing Job '{node_prepration_core_name}' found. Proceeding.")
+                else:
+                    raise
+                
             core_template = env.get_template('oc_storage_core_isolation.yaml.j2')
             core_yaml = yaml.safe_load(core_template.render(values))
-            batch_v1 = core_utils.get_k8s_batch_client()
             core_resp = batch_v1.create_namespaced_job(namespace=namespace, body=core_yaml)
             msg = f"Job created: '{core_resp.metadata.name}' in namespace '{namespace}"
             logger.info(msg)
@@ -463,7 +496,11 @@ def spdk_process_kill(query: utils.RPCPortParams):
     k8s_core_v1 = core_utils.get_k8s_core_client()
     try:
         namespace = node_utils_k8s.get_namespace()
-        pod_name = f"snode-spdk-pod-{query.rpc_port}"
+        if not query.cluster_id:
+            return utils.get_response(False, "param required: cluster_id")
+
+        first_six_cluster_id = core_utils.first_six_chars(query.cluster_id)
+        pod_name = f"snode-spdk-pod-{query.rpc_port}-{first_six_cluster_id}"
         resp = k8s_core_v1.delete_namespaced_pod(pod_name, namespace)
         retries = 10
         while retries > 0:
@@ -486,9 +523,9 @@ def spdk_process_kill(query: utils.RPCPortParams):
     return utils.get_response(True)
 
 
-def _is_pod_up(rpc_port):
+def _is_pod_up(rpc_port, cluster_id):
     k8s_core_v1 = core_utils.get_k8s_core_client()
-    pod_name = f"snode-spdk-pod-{rpc_port}"
+    pod_name = f"snode-spdk-pod-{rpc_port}-{cluster_id}"
     try:
         resp = k8s_core_v1.list_namespaced_pod(node_utils_k8s.get_namespace())
         for pod in resp.items:
@@ -502,9 +539,9 @@ def _is_pod_up(rpc_port):
         return False
     return False
 
-def _is_pod_present(rpc_port):
+def _is_pod_present(rpc_port, cluster_id):
     k8s_core_v1 = core_utils.get_k8s_core_client()
-    pod_name = f"snode-spdk-pod-{rpc_port}"
+    pod_name = f"snode-spdk-pod-{rpc_port}-{cluster_id}"
     try:
         resp = k8s_core_v1.list_namespaced_pod(node_utils_k8s.get_namespace())
         for pod in resp.items:
@@ -525,7 +562,11 @@ def _is_pod_present(rpc_port):
     })}}},
 })
 def spdk_process_is_up(query: utils.RPCPortParams):
-    if _is_pod_up(query.rpc_port):
+    if not query.cluster_id:
+        return utils.get_response(False, "param required: cluster_id")
+
+    first_six_cluster_id = core_utils.first_six_chars(query.cluster_id)
+    if _is_pod_up(query.rpc_port, first_six_cluster_id):
         return utils.get_response(True)
     else:
         return utils.get_response(False, "SPDK container is not running")
@@ -602,10 +643,13 @@ def apply_config():
     # Set Huge page memory
     huge_page_memory_dict: dict = {}
     for node_config in nodes:
+        hg_memory = node_config["huge_page_memory"]
+        if int(node_config["max_size"]) > 0:
+            hg_memory = max(hg_memory , node_config["max_size"])
         numa = node_config["socket"]
-        huge_page_memory_dict[numa] = huge_page_memory_dict.get(numa, 0) + node_config["huge_page_memory"]
+        huge_page_memory_dict[numa] = huge_page_memory_dict.get(numa, 0) + hg_memory + 1000000000
     for numa, huge_page_memory in huge_page_memory_dict.items():
-        num_pages = huge_page_memory // (2048 * 1024)
+        num_pages = huge_page_memory // 2000000
         core_utils.set_hugepages_if_needed(numa, num_pages)
 
     return utils.get_response(True)
@@ -628,6 +672,7 @@ def is_alive():
 def spdk_proxy_restart(query: utils.RPCPortParams):
     return utils.get_response(True)
 
+api.post('/bind_device_to_nvme')(snode_ops.bind_device_to_nvme)
 
 api.post('/bind_device_to_spdk')(snode_ops.bind_device_to_spdk)
 
@@ -635,3 +680,5 @@ def spdk_proxy_restart(query: utils.RPCPortParams):
 
 api.get('/ifc_is_roce')(snode_ops.ifc_is_roce)
 
+api.post('/format_device_with_4k')(snode_ops.format_device_with_4k)
+
diff --git a/simplyblock_web/api/v1/__init__.py b/simplyblock_web/api/v1/__init__.py
index 4bcc5ba41..6df2a2db5 100644
--- a/simplyblock_web/api/v1/__init__.py
+++ b/simplyblock_web/api/v1/__init__.py
@@ -1,9 +1,12 @@
 import logging
+import fdb
 
+from flask import jsonify
 from flask import Flask
 
 from simplyblock_web.auth_middleware import token_required
 from simplyblock_web import utils
+from simplyblock_core import constants
 
 from . import cluster
 from . import mgmt_node
@@ -39,3 +42,24 @@ def before_request():
 @api.route('/', methods=['GET'])
 def status():
     return utils.get_response("Live")
+
+@api.route('/health/fdb', methods=['GET'])
+def health_fdb():
+    try:
+        fdb.api_version(constants.KVD_DB_VERSION)
+        
+        db = fdb.open(constants.KVD_DB_FILE_PATH)
+        tr = db.create_transaction()
+
+        tr.get(b"\x00")
+        tr.commit().wait()
+
+        return jsonify({
+            "fdb_connected": True
+        }), 200
+
+    except Exception as e:
+        return jsonify({
+            "fdb_connected": False,
+            "error": str(e)
+        }), 503
diff --git a/simplyblock_web/api/v1/cluster.py b/simplyblock_web/api/v1/cluster.py
index 698d9582d..759cdbd31 100644
--- a/simplyblock_web/api/v1/cluster.py
+++ b/simplyblock_web/api/v1/cluster.py
@@ -47,6 +47,9 @@ def add_cluster():
     qpair_count = cl_data.get('qpair_count', 256)
     name = cl_data.get('name', None)
     fabric = cl_data.get('fabric', "tcp")
+    cr_name = cl_data.get('cr_name', None)
+    cr_namespace = cl_data.get('cr_namespace', None)
+    cr_plural = cl_data.get('cr_plural', None)
 
     max_queue_size = cl_data.get('max_queue_size', 128)
     inflight_io_threshold = cl_data.get('inflight_io_threshold', 4)
@@ -56,10 +59,62 @@ def add_cluster():
     return utils.get_response(cluster_ops.add_cluster(
         blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
         distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity,
-        qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, fabric
+        qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name, 
+        cr_name, cr_namespace, cr_plural, fabric
     ))
 
 
+@bp.route('/cluster/create_first', methods=['POST'])
+def create_first_cluster():
+    cl_data = request.get_json()
+
+    if db.get_clusters():
+        return utils.get_response_error("Cluster found!", 400)
+
+    blk_size = 512
+    if 'blk_size' in cl_data:
+        if cl_data['blk_size'] not in [512, 4096]:
+            return utils.get_response_error("blk_size can be 512 or 4096", 400)
+        else:
+            blk_size = cl_data['blk_size']
+    page_size_in_blocks = cl_data.get('page_size_in_blocks', 2097152)
+    distr_ndcs = cl_data.get('distr_ndcs', 1)
+    distr_npcs = cl_data.get('distr_npcs', 1)
+    distr_bs = cl_data.get('distr_bs', 4096)
+    distr_chunk_bs = cl_data.get('distr_chunk_bs', 4096)
+    ha_type = cl_data.get('ha_type', 'ha')
+    enable_node_affinity = cl_data.get('enable_node_affinity', False)
+    qpair_count = cl_data.get('qpair_count', 256)
+    name = cl_data.get('name', None)
+    fabric = cl_data.get('fabric', "tcp")
+    cap_warn = cl_data.get('cap_warn', 0)
+    cap_crit = cl_data.get('cap_crit', 0)
+    prov_cap_warn = cl_data.get('prov_cap_warn', 0)
+    prov_cap_crit = cl_data.get('prov_cap_crit', 0)
+    max_queue_size = cl_data.get('max_queue_size', 128)
+    inflight_io_threshold = cl_data.get('inflight_io_threshold', 4)
+    strict_node_anti_affinity = cl_data.get('strict_node_anti_affinity', False)
+    is_single_node = cl_data.get('is_single_node', False)
+    cr_name = cl_data.get('cr_name', None)
+    cr_namespace = cl_data.get('cr_namespace', None)
+    cr_plural = cl_data.get('cr_plural', None)
+    cluster_ip = cl_data.get('cluster_ip', None)
+    grafana_secret = cl_data.get('grafana_secret', None)
+
+    try:
+        cluster_id = cluster_ops.add_cluster(
+            blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
+            distr_ndcs, distr_npcs, distr_bs, distr_chunk_bs, ha_type, enable_node_affinity,
+            qpair_count, max_queue_size, inflight_io_threshold, strict_node_anti_affinity, is_single_node, name,
+            cr_name, cr_namespace, cr_plural, fabric, cluster_ip=cluster_ip, grafana_secret=grafana_secret)
+        if cluster_id:
+            return utils.get_response(db.get_cluster_by_id(cluster_id).to_dict())
+        else:
+            return utils.get_response(False, "Failed to create cluster", 400)
+    except Exception as e:
+        return utils.get_response(False, str(e), 404)
+
+
 @bp.route('/cluster', methods=['GET'], defaults={'uuid': None})
 @bp.route('/cluster/<string:uuid>', methods=['GET'])
 def list_clusters(uuid):
@@ -227,6 +282,23 @@ def cluster_activate(uuid):
     # FIXME: Any failure within the thread are not handled
     return utils.get_response(True), 202
 
+@bp.route('/cluster/addreplication/<string:uuid>', methods=['PUT'])
+def cluster_add_replication(uuid):
+    req_data = request.get_json()
+    target_cluster_uuid = req_data.get("target_cluster_uuid", None)
+    replication_timeout = req_data.get("replication_timeout", 0)
+    target_pool_uuid = req_data.get("target_pool_uuid", None)
+
+    try:
+        db.get_cluster_by_id(uuid)
+    except KeyError:
+        return utils.get_response_error(f"Cluster not found: {uuid}", 404)
+
+    cluster_ops.add_replication(source_cl_id=uuid, target_cl_id=target_cluster_uuid, 
+                                    timeout=replication_timeout, target_pool=target_pool_uuid)
+    return utils.get_response(True), 202
+
+
 
 @bp.route('/cluster/allstats/<string:uuid>/history/<string:history>', methods=['GET'])
 @bp.route('/cluster/allstats/<string:uuid>', methods=['GET'], defaults={'history': None})
diff --git a/simplyblock_web/api/v1/lvol.py b/simplyblock_web/api/v1/lvol.py
index dbb77f6a2..31443e6c8 100644
--- a/simplyblock_web/api/v1/lvol.py
+++ b/simplyblock_web/api/v1/lvol.py
@@ -158,6 +158,7 @@ def add_lvol():
     ndcs = utils.get_value_or_default(cl_data, "ndcs", 0)
     npcs = utils.get_value_or_default(cl_data, "npcs", 0)
     fabric = utils.get_value_or_default(cl_data, "fabric", "tcp")
+    do_replicate = utils.get_value_or_default(cl_data, "do_replicate", False)
 
     ret, error = lvol_controller.add_lvol_ha(
         name=name,
@@ -186,7 +187,8 @@ def add_lvol():
         max_namespace_per_subsys=max_namespace_per_subsys,
         ndcs=ndcs,
         npcs=npcs,
-        fabric=fabric
+        fabric=fabric,
+        do_replicate=do_replicate
     )
 
     return utils.get_response(ret, error, http_code=400)
@@ -306,3 +308,24 @@ def inflate_lvol(uuid):
 
     ret = lvol_controller.inflate_lvol(uuid)
     return utils.get_response(ret)
+
+@bp.route('/lvol/replication_start/<string:uuid>', methods=['PUT'])
+def replication_start(uuid):
+    try:
+        db.get_lvol_by_id(uuid)
+    except KeyError as e:
+        return utils.get_response_error(str(e), 404)
+
+    ret = lvol_controller.replication_trigger(uuid)
+    return utils.get_response(ret)
+
+@bp.route('/lvol/replication_stop/<string:uuid>', methods=['PUT'])
+def replication_stop(uuid):
+    try:
+        db.get_lvol_by_id(uuid)
+    except KeyError as e:
+        return utils.get_response_error(str(e), 404)
+
+    ret = lvol_controller.replication_stop(uuid)
+    return utils.get_response(ret)
+    
\ No newline at end of file
diff --git a/simplyblock_web/api/v1/pool.py b/simplyblock_web/api/v1/pool.py
index a24a9e9b7..3b4fe5f72 100644
--- a/simplyblock_web/api/v1/pool.py
+++ b/simplyblock_web/api/v1/pool.py
@@ -184,21 +184,10 @@ def pool_iostats(uuid, history):
     except KeyError:
         return utils.get_response_error(f"Pool not found: {uuid}", 404)
 
-    if history:
-        records_number = core_utils.parse_history_param(history)
-        if not records_number:
-            logger.error(f"Error parsing history string: {history}")
-            return False
-    else:
-        records_number = 20
-
-    out = db.get_pool_stats(pool, records_number)
-    records_count = 20
-    new_records = core_utils.process_records(out, records_count)
-
+    data = pool_controller.get_io_stats(uuid, history)
     ret = {
         "object_data": pool.get_clean_dict(),
-        "stats": new_records or []
+        "stats": data or []
     }
     return utils.get_response(ret)
 
@@ -207,21 +196,13 @@ def pool_iostats(uuid, history):
 @bp.route('/pool/iostats-all-lvols/<string:pool_uuid>', methods=['GET'])
 def lvol_iostats(pool_uuid):
     try:
-        db.get_pool_by_id(pool_uuid)
+        pool = db.get_pool_by_id(pool_uuid)
     except KeyError:
         return utils.get_response_error(f"Pool not found: {pool_uuid}", 404)
 
-    ret = []
-    for lvol in db.get_lvols_by_pool_id(pool_uuid):
-
-        records_list = db.get_lvol_stats(lvol, limit=1)
-
-        if records_list:
-            data = records_list[0].get_clean_dict()
-        else:
-            data = {}
-        ret.append({
-            "object_data": lvol.get_clean_dict(),
-            "stats": data
-        })
+    data = pool_controller.get_capacity(pool_uuid)
+    ret = {
+        "object_data": pool.get_clean_dict(),
+        "stats": data or []
+    }
     return utils.get_response(ret)
diff --git a/simplyblock_web/api/v1/storage_node.py b/simplyblock_web/api/v1/storage_node.py
index b44313c11..b3f0925bf 100644
--- a/simplyblock_web/api/v1/storage_node.py
+++ b/simplyblock_web/api/v1/storage_node.py
@@ -249,6 +249,19 @@ def storage_node_add():
     if 'iobuf_large_pool_count' in req_data:
         iobuf_large_pool_count = int(req_data['iobuf_large_pool_count'])
 
+    ha_jm_count = 3
+    if 'ha_jm_count' in req_data:
+        ha_jm_count = int(req_data['ha_jm_count'])
+
+    format_4k = False
+    param = req_data.get('format_4k')
+    if param:
+        if isinstance(param, bool):
+            format_4k = param
+        elif isinstance(param, str):
+            format_4k = param == "true"
+
+
     tasks_controller.add_node_add_task(cluster_id, {
         "cluster_id": cluster_id,
         "node_addr": node_addr,
@@ -264,6 +277,8 @@ def storage_node_add():
         "enable_test_device": enable_test_device,
         "namespace": namespace,
         "enable_ha_jm": not disable_ha_jm,
+        "ha_jm_count": ha_jm_count,
+        "format_4k": format_4k
     })
 
     return utils.get_response(True)
diff --git a/simplyblock_web/api/v2/__init__.py b/simplyblock_web/api/v2/__init__.py
index ff8511e1c..c4c0168c7 100644
--- a/simplyblock_web/api/v2/__init__.py
+++ b/simplyblock_web/api/v2/__init__.py
@@ -10,6 +10,7 @@
 from . import pool
 from . import snapshot
 from . import storage_node
+from . import task
 
 from simplyblock_core.db_controller import DBController
 
@@ -38,6 +39,9 @@ def _verify_api_token(
 
 cluster.instance_api.include_router(storage_node.api)
 
+task.api.include_router(task.instance_api)
+
+cluster.instance_api.include_router(task.api)
 
 volume.api.include_router(volume.instance_api)
 pool.instance_api.include_router(volume.api)
diff --git a/simplyblock_web/api/v2/cluster.py b/simplyblock_web/api/v2/cluster.py
index 422766246..9447ff718 100644
--- a/simplyblock_web/api/v2/cluster.py
+++ b/simplyblock_web/api/v2/cluster.py
@@ -17,6 +17,11 @@
 db = DBController()
 
 
+class _ReplicationParams(BaseModel):
+    snapshot_replication_target_cluster: str
+    snapshot_replication_timeout: int = 0
+    target_pool: Optional[str] = None
+
 class _UpdateParams(BaseModel):
     management_image: Optional[str]
     spdk_image: Optional[str]
@@ -24,7 +29,7 @@ class _UpdateParams(BaseModel):
 
 
 class ClusterParams(BaseModel):
-    name: Optional[str] = None
+    name: str = ""
     blk_size: Literal[512, 4096] = 512
     page_size_in_blocks: int = Field(2097152, gt=0)
     cap_warn: util.Percent = 0
@@ -35,22 +40,30 @@ class ClusterParams(BaseModel):
     distr_npcs: int = 1
     distr_bs: int = 4096
     distr_chunk_bs: int = 4096
-    ha_type: Literal['single', 'ha'] = 'single'
+    ha_type: Literal['single', 'ha'] = 'ha'
     qpair_count: int = 256
     max_queue_size: int = 128
     inflight_io_threshold: int = 4
     enable_node_affinity: bool = False
     strict_node_anti_affinity: bool = False
-
+    is_single_node: bool = False
+    fabric: str = "tcp"
+    cr_name: str = ""
+    cr_namespace: str = ""
+    cr_plural: str = ""
+    cluster_ip: str = ""
+    grafana_secret: str = ""
 
 @api.get('/', name='clusters:list')
 def list() -> List[ClusterDTO]:
-    return [
-        ClusterDTO.from_model(cluster)
-        for cluster
-        in db.get_clusters()
-    ]
-
+    data = []
+    for cluster in db.get_clusters():
+        stat_obj = None
+        ret = db.get_cluster_capacity(cluster, 1)
+        if ret:
+            stat_obj = ret[0]
+        data.append(ClusterDTO.from_model(cluster, stat_obj))
+    return data
 
 @api.post('/', name='clusters:create', status_code=201, responses={201: {"content": None}})
 def add(request: Request, parameters: ClusterParams):
@@ -58,8 +71,8 @@ def add(request: Request, parameters: ClusterParams):
     if not cluster_id_or_false:
         raise ValueError('Failed to create cluster')
 
-    entity_url = request.app.url_path_for('get', cluster_id=cluster_id_or_false)
-    return Response(status_code=201, headers={'Location': entity_url})
+    cluster = db.get_cluster_by_id(cluster_id_or_false)
+    return ClusterDTO.from_model(cluster)
 
 
 instance_api = APIRouter(prefix='/{cluster_id}')
@@ -77,7 +90,11 @@ def _lookup_cluster(cluster_id: UUID):
 
 @instance_api.get('/', name='clusters:detail')
 def get(cluster: Cluster) -> ClusterDTO:
-    return ClusterDTO.from_model(cluster)
+    stat_obj = None
+    ret = db.get_cluster_capacity(cluster, 1)
+    if ret:
+        stat_obj = ret[0]
+    return ClusterDTO.from_model(cluster, stat_obj)
 
 
 class UpdatableClusterParameters(BaseModel):
@@ -154,6 +171,23 @@ def activate(cluster: Cluster) -> Response:
     ).start()
     return Response(status_code=202)  # FIXME: Provide URL for checking task status
 
+@instance_api.post('/addreplication', name='clusters:addreplication', status_code=202, responses={202: {"content": None}})
+def cluster_add_replication(cluster: Cluster, parameters: _ReplicationParams) -> Response:
+    cluster_ops.add_replication(
+        source_cl_id=cluster.get_id(), 
+        target_cl_id=parameters.snapshot_replication_target_cluster, 
+        timeout=parameters.snapshot_replication_timeout, 
+        target_pool=parameters.target_pool
+    )
+    return Response(status_code=202)
+    
+@instance_api.post('/expand', name='clusters:expand', status_code=202, responses={202: {"content": None}})
+def expand(cluster: Cluster) -> Response:
+    Thread(
+        target=cluster_ops.cluster_expand,
+        args=(cluster.get_id(),),
+    ).start()
+    return Response(status_code=202)  # FIXME: Provide URL for checking task status
 
 @instance_api.post('/update', name='clusters:upgrade', status_code=204, responses={204: {"content": None}})
 def update_cluster( cluster: Cluster, parameters: _UpdateParams) -> Response:
diff --git a/simplyblock_web/api/v2/device.py b/simplyblock_web/api/v2/device.py
index 1c7b40d7e..b0015b69b 100644
--- a/simplyblock_web/api/v2/device.py
+++ b/simplyblock_web/api/v2/device.py
@@ -18,10 +18,14 @@
 
 @api.get('/', name='clusters:storage_nodes:devices:list')
 def list(cluster: Cluster, storage_node: StorageNode) -> List[DeviceDTO]:
-    return [
-        DeviceDTO.from_model(device)
-        for device in storage_node.nvme_devices
-    ]
+    data = []
+    for device in storage_node.nvme_devices:
+        stat_obj = None
+        ret = db.get_device_stats(device, 1)
+        if ret:
+            stat_obj = ret[0]
+        data.append(DeviceDTO.from_model(device, stat_obj))
+    return data
 
 instance_api = APIRouter(prefix='/{device_id}')
 
@@ -38,16 +42,26 @@ def _lookup_device(storage_node: StorageNode, device_id: UUID) -> NVMeDevice:
 
 @instance_api.get('/', name='clusters:storage_nodes:devices:detail')
 def get(cluster: Cluster, storage_node: StorageNode, device: Device) -> DeviceDTO:
-    return DeviceDTO.from_model(device)
+    stat_obj = None
+    ret = db.get_device_stats(device, 1)
+    if ret:
+        stat_obj = ret[0]
+    return DeviceDTO.from_model(device, stat_obj)
 
 
-@instance_api.delete('/', name='clusters:storage_nodes:devices:delete', status_code=204, responses={204: {"content": None}})
-def delete(cluster: Cluster, storage_node: StorageNode, device: Device) -> Response:
-    if not device_controller.device_remove(device.get_id()):
+@instance_api.post('/remove', name='clusters:storage_nodes:devices:remove', status_code=204, responses={204: {"content": None}})
+def remove(cluster: Cluster, storage_node: StorageNode, device: Device, force: bool = False) -> Response:
+    if not device_controller.device_remove(device.get_id(), force):
         raise ValueError('Failed to remove device')
 
     return Response(status_code=204)
 
+@instance_api.post('/restart', name='clusters:storage_nodes:devices:restart', status_code=204, responses={204: {"content": None}})
+def restart(cluster: Cluster, storage_node: StorageNode, device: Device, force: bool = False) -> Response:
+    if not device_controller.restart_device(device.get_id(), force):
+        raise ValueError('Failed to restart device')
+
+    return Response(status_code=204)
 
 @instance_api.get('/capacity', name='clusters:storage_nodes:devices:capacity')
 def capacity(
diff --git a/simplyblock_web/api/v2/dtos.py b/simplyblock_web/api/v2/dtos.py
index 54c1b5b01..ca29bfae0 100644
--- a/simplyblock_web/api/v2/dtos.py
+++ b/simplyblock_web/api/v2/dtos.py
@@ -12,19 +12,40 @@
 from simplyblock_core.models.nvme_device import NVMeDevice
 from simplyblock_core.models.pool import Pool
 from simplyblock_core.models.snapshot import SnapShot
+from simplyblock_core.models.stats import StatsObject
 from simplyblock_core.models.storage_node import StorageNode
 
 from . import util
 
 
+class CapacityStatDTO(BaseModel):
+    date: int
+    size_total: int
+    size_prov: int
+    size_used: int
+    size_free: int
+    size_util: int
+
+    @staticmethod
+    def from_model(model: StatsObject):
+        return CapacityStatDTO(
+            date=model.date,
+            size_total=model.size_total,
+            size_prov=model.size_prov,
+            size_used=model.size_used,
+            size_free=model.size_free,
+            size_util=model.size_util,
+        )
+
+
+
 class ClusterDTO(BaseModel):
     id: UUID
     name: Optional[str]
     nqn: str
     status: Literal['active', 'read_only', 'inactive', 'suspended', 'degraded', 'unready', 'in_activation', 'in_expansion']
-    rebalancing: bool
+    is_re_balancing: bool
     block_size: util.Unsigned
-    coding: Tuple[util.Unsigned, util.Unsigned]
     ha: bool
     utliziation_critical: util.Percent
     utilization_warning: util.Percent
@@ -33,17 +54,21 @@ class ClusterDTO(BaseModel):
     node_affinity: bool
     anti_affinity: bool
     secret: str
+    distr_ndcs: int
+    distr_npcs: int
+    capacity: CapacityStatDTO
 
     @staticmethod
-    def from_model(model: Cluster):
+    def from_model(model: Cluster, stat_obj: Optional[StatsObject]=None):
         return ClusterDTO(
             id=UUID(model.get_id()),
             name=model.cluster_name,
             nqn=model.nqn,
             status=model.status,  # type: ignore
-            rebalancing=model.is_re_balancing,
+            is_re_balancing=model.is_re_balancing,
             block_size=model.blk_size,
-            coding=(model.distr_ndcs, model.distr_npcs),
+            distr_ndcs=model.distr_ndcs,
+            distr_npcs=model.distr_npcs,
             ha=model.ha_type == 'ha',
             utilization_warning=model.cap_warn,
             utliziation_critical=model.cap_crit,
@@ -52,6 +77,7 @@ def from_model(model: Cluster):
             node_affinity=model.enable_node_affinity,
             anti_affinity=model.strict_node_anti_affinity,
             secret=model.secret,
+            capacity=CapacityStatDTO.from_model(stat_obj if stat_obj else StatsObject()),
         )
 
 
@@ -65,9 +91,10 @@ class DeviceDTO(BaseModel):
     nvmf_ips: List[IPv4Address]
     nvmf_nqn: str = ""
     nvmf_port: int = 0
+    capacity: CapacityStatDTO
 
     @staticmethod
-    def from_model(model: NVMeDevice):
+    def from_model(model: NVMeDevice, stat_obj: Optional[StatsObject]=None):
         return DeviceDTO(
             id=UUID(model.get_id()),
             status=model.status,
@@ -78,6 +105,7 @@ def from_model(model: NVMeDevice):
             nvmf_ips=[IPv4Address(ip) for ip in model.nvmf_ip.split(',')],
             nvmf_nqn=model.nvmf_nqn,
             nvmf_port=model.nvmf_port,
+            capacity=CapacityStatDTO.from_model(stat_obj if stat_obj else StatsObject()),
         )
 
 
@@ -107,9 +135,10 @@ class StoragePoolDTO(BaseModel):
     max_rw_mbytes: util.Unsigned
     max_r_mbytes: util.Unsigned
     max_w_mbytes: util.Unsigned
+    capacity: CapacityStatDTO
 
     @staticmethod
-    def from_model(model: Pool):
+    def from_model(model: Pool, stat_obj: Optional[StatsObject]=None):
         return StoragePoolDTO(
             id=UUID(model.get_id()),
             name=model.pool_name,
@@ -120,6 +149,7 @@ def from_model(model: Pool):
             max_rw_mbytes=model.max_rw_mbytes_per_sec,
             max_r_mbytes=model.max_r_mbytes_per_sec,
             max_w_mbytes=model.max_w_mbytes_per_sec,
+            capacity=CapacityStatDTO.from_model(stat_obj if stat_obj else StatsObject()),
         )
 
 
@@ -153,14 +183,34 @@ def from_model(model: SnapShot, request: Request, cluster_id, pool_id, volume_id
 class StorageNodeDTO(BaseModel):
     id: UUID
     status: str
-    ip: IPv4Address
+    hostname: str
+    cpu: int
+    spdk_mem: int
+    lvols: int
+    rpc_port: int
+    lvol_subsys_port: int
+    nvmf_port: int
+    mgmt_ip: IPv4Address
+    health_check: bool
+    online_devices: str
+    capacity: CapacityStatDTO
 
     @staticmethod
-    def from_model(model: StorageNode):
+    def from_model(model: StorageNode, stat_obj: Optional[StatsObject]=None):
         return StorageNodeDTO(
             id=UUID(model.get_id()),
             status=model.status,
-            ip=IPv4Address(model.mgmt_ip),
+            hostname=model.hostname,
+            cpu=model.cpu,
+            spdk_mem=model.spdk_mem,
+            lvols=model.lvols,
+            rpc_port=model.rpc_port,
+            lvol_subsys_port=model.lvol_subsys_port,
+            nvmf_port=model.nvmf_port,
+            mgmt_ip=IPv4Address(model.mgmt_ip),
+            health_check=model.health_check,
+            online_devices=f"{len(model.nvme_devices)}/{len([d for d in model.nvme_devices if d.status=='online'])}",
+            capacity=CapacityStatDTO.from_model(stat_obj if stat_obj else StatsObject()),
         )
 
 
@@ -177,7 +227,7 @@ class TaskDTO(BaseModel):
     @staticmethod
     def from_model(model: JobSchedule):
         return TaskDTO(
-            id=UUID(model.get_id()),
+            id=UUID(model.uuid),
             status=model.status,
             canceled=model.canceled,
             function_name=model.function_name,
@@ -194,25 +244,44 @@ class VolumeDTO(BaseModel):
     status: str
     health_check: bool
     nqn: str
+    hostname: str
+    fabric: str
     nodes: List[util.UrlPath]
     port: util.Port
     size: util.Unsigned
+    ndcs: int
+    npcs: int
+    pool_uuid: str
+    pool_name: str
+    pvc_name: str = ""
+    snapshot_name: str = ""
+    blobid: int
+    ns_id: int
     cloned_from: Optional[util.UrlPath]
     crypto_key: Optional[Tuple[str, str]]
     high_availability: bool
+    lvol_priority_class: util.Unsigned
+    do_replicate: bool = False
+    max_namespace_per_subsys: int
     max_rw_iops: util.Unsigned
     max_rw_mbytes: util.Unsigned
     max_r_mbytes: util.Unsigned
     max_w_mbytes: util.Unsigned
+    capacity: CapacityStatDTO
+    rep_info: Optional[dict] = None
+    from_source: bool = True
+
 
     @staticmethod
-    def from_model(model: LVol, request: Request, cluster_id: str):
+    def from_model(model: LVol, request: Request, cluster_id: str, stat_obj: Optional[StatsObject]=None, rep_info=None):
         return VolumeDTO(
             id=UUID(model.get_id()),
             name=model.lvol_name,
             status=model.status,
             health_check=model.health_check,
             nqn=model.nqn,
+            hostname=model.hostname,
+            fabric=model.fabric,
             nodes=[
                 str(request.url_for(
                     'clusters:storage-nodes:detail',
@@ -235,8 +304,22 @@ def from_model(model: LVol, request: Request, cluster_id: str):
                 else None
             ),
             high_availability=model.ha_type == 'ha',
+            pool_uuid=model.pool_uuid,
+            pool_name=model.pool_name,
+            pvc_name=model.pvc_name,
+            snapshot_name=model.snapshot_name,
+            ndcs=model.ndcs,
+            npcs=model.npcs,
+            blobid=model.blobid,
+            ns_id=model.ns_id,
+            lvol_priority_class=model.lvol_priority_class,
+            do_replicate=model.do_replicate,
+            max_namespace_per_subsys=model.max_namespace_per_subsys,
             max_rw_iops=model.rw_ios_per_sec,
             max_rw_mbytes=model.rw_mbytes_per_sec,
             max_r_mbytes=model.r_mbytes_per_sec,
             max_w_mbytes=model.w_mbytes_per_sec,
+            capacity=CapacityStatDTO.from_model(stat_obj if stat_obj else StatsObject()),
+            rep_info=rep_info,
+            from_source=model.from_source
         )
diff --git a/simplyblock_web/api/v2/pool.py b/simplyblock_web/api/v2/pool.py
index c779f70ca..4bc201a28 100644
--- a/simplyblock_web/api/v2/pool.py
+++ b/simplyblock_web/api/v2/pool.py
@@ -20,12 +20,15 @@
 
 @api.get('/', name='clusters:storage-pools:list')
 def list(cluster: Cluster) -> List[StoragePoolDTO]:
-    return [
-        StoragePoolDTO.from_model(pool)
-        for pool
-        in db.get_pools()
-        if pool.cluster_id == cluster.get_id()
-    ]
+    data = []
+    for pool in db.get_pools():
+        if pool.cluster_id == cluster.get_id():
+            stat_obj = None
+            ret = db.get_pool_stats(pool, 1)
+            if ret:
+                stat_obj = ret[0]
+            data.append(StoragePoolDTO.from_model(pool, stat_obj))
+    return data
 
 
 class StoragePoolParams(BaseModel):
@@ -36,6 +39,9 @@ class StoragePoolParams(BaseModel):
     max_rw_mbytes: util.Unsigned = 0
     max_r_mbytes: util.Unsigned = 0
     max_w_mbytes: util.Unsigned = 0
+    cr_name: str
+    cr_namespace: str 
+    cr_plural: str
 
 
 @api.post('/', name='clusters:storage-pools:create', status_code=201, responses={201: {"content": None}})
@@ -49,14 +55,13 @@ def add(request: Request, cluster: Cluster, parameters: StoragePoolParams) -> Re
 
     id_or_false =  pool_controller.add_pool(
         parameters.name, parameters.pool_max, parameters.volume_max_size, parameters.max_rw_iops, parameters.max_rw_mbytes,
-        parameters.max_r_mbytes, parameters.max_w_mbytes, cluster.get_id()
+        parameters.max_r_mbytes, parameters.max_w_mbytes, cluster.get_id(), parameters.cr_name, parameters.cr_namespace, parameters.cr_plural
     )
 
     if not id_or_false:
         raise ValueError('Failed to create pool')
-
-    entity_url = request.app.url_path_for('clusters:storage-pools:detail', cluster_id=cluster.get_id(), pool_id=id_or_false)
-    return Response(status_code=201, headers={'Location': entity_url})
+    pool = db.get_pool_by_id(id_or_false)
+    return pool.to_dict()
 
 
 instance_api = APIRouter(prefix='/{pool_id}')
@@ -74,7 +79,11 @@ def _lookup_storage_pool(pool_id: UUID) -> PoolModel:
 
 @instance_api.get('/', name='clusters:storage-pools:detail')
 def get(cluster: Cluster, pool: StoragePool) -> StoragePoolDTO:
-    return StoragePoolDTO.from_model(pool)
+    stat_obj = None
+    ret = db.get_pool_stats(pool, 1)
+    if ret:
+        stat_obj = ret[0]
+    return StoragePoolDTO.from_model(pool, stat_obj)
 
 
 @instance_api.delete('/', name='clusters:storage-pools:delete', status_code=204, responses={204: {"content": None}})
@@ -96,6 +105,9 @@ class UpdatableStoragePoolParams(BaseModel):
     max_rw_mbytes: Optional[util.Unsigned] = None
     max_r_mbytes: Optional[util.Unsigned] = None
     max_w_mbytes: Optional[util.Unsigned] = None
+    lvols_cr_name: Optional[str] = None
+    lvols_cr_namespace: Optional[str] = None
+    lvols_cr_plural: Optional[str] = None
 
 
 @instance_api.put('/', name='clusters:storage-pools:update', status_code=204, responses={204: {"content": None}})
@@ -122,5 +134,5 @@ def update(cluster: Cluster, pool: StoragePool, parameters: UpdatableStoragePool
 
 @instance_api.get('/iostats', name='clusters:storage-pools:iostats')
 def iostats(cluster: Cluster, pool: StoragePool, limit: int = 20):
-    records = db.get_pool_stats(pool, limit)
-    return core_utils.process_records(records, 20)
+    data = pool_controller.get_io_stats(pool.get_id(), history="")
+    return core_utils.process_records(data, 20)
diff --git a/simplyblock_web/api/v2/storage_node.py b/simplyblock_web/api/v2/storage_node.py
index f93fa5250..7d27ecc5e 100644
--- a/simplyblock_web/api/v2/storage_node.py
+++ b/simplyblock_web/api/v2/storage_node.py
@@ -22,32 +22,40 @@
 
 @api.get('/', name='clusters:storage-nodes:list')
 def list(cluster: Cluster) -> List[StorageNodeDTO]:
-    return [
-        StorageNodeDTO.from_model(storage_node)
-        for storage_node
-        in db.get_storage_nodes_by_cluster_id(cluster.get_id())
-    ]
+    data = []
+    for storage_node in db.get_storage_nodes_by_cluster_id(cluster.get_id()):
+        node_stat_obj = None
+        ret = db.get_node_capacity(storage_node, 1)
+        if ret:
+            node_stat_obj = ret[0]
+        data.append(StorageNodeDTO.from_model(storage_node, node_stat_obj))
+    return data
 
 
 class StorageNodeParams(BaseModel):
     node_address: Annotated[str, Field(web_utils.IP_PATTERN)]
     interface_name: str
-    max_snapshots: int = Field(500)
-    ha_jm: bool = Field(True)
-    test_device: bool = Field(False)
-    spdk_image: Optional[str]
+    max_snapshots: Optional[int] = Field(500)
+    ha_jm: Optional[bool] = Field(True)
+    test_device: Optional[bool] = Field(False)
+    spdk_image: Optional[str] = Field("")
     spdk_debug: bool = Field(False)
-    full_page_unmap: bool = Field(False)
     data_nics: List[str] = Field([])
     namespace: str = Field('default')
+    id_device_by_nqn: Optional[bool] = Field(False)
     jm_percent: util.Percent = Field(3)
     partitions: int = Field(1)
     iobuf_small_pool_count: int = Field(0)
     iobuf_large_pool_count: int = Field(0)
+    cr_name: str
+    cr_namespace: str
+    cr_plural: str
+    ha_jm_count: int = Field(3)
+    format_4k: bool = Field(False)
 
 
 @api.post('/', name='clusters:storage-nodes:create', status_code=201, responses={201: {"content": None}})
-def add(request: Request, cluster: Cluster, parameters: StorageNodeParams) -> Response:
+def add(request: Request, cluster: Cluster, parameters: StorageNodeParams):
     task_id_or_false = tasks_controller.add_node_add_task(
         cluster.get_id(),
         {
@@ -65,14 +73,17 @@ def add(request: Request, cluster: Cluster, parameters: StorageNodeParams) -> Re
             'enable_test_device': parameters.test_device,
             'namespace': parameters.namespace,
             'enable_ha_jm': parameters.ha_jm,
-            'full_page_unmap': parameters.full_page_unmap,
+            'id_device_by_nqn': parameters.id_device_by_nqn,
+            'cr_name': parameters.cr_name,
+            'cr_namespace': parameters.cr_namespace,
+            'cr_plural': parameters.cr_plural,
+            "ha_jm_count": parameters.ha_jm_count,
+            "format_4k": parameters.format_4k,
         }
     )
     if not task_id_or_false:
         raise ValueError('Failed to create add-node task')
-
-    task_url = request.app.url_path_for('clusters:storage-nodes:detail', cluster_id=cluster.get_id(), task_id=task_id_or_false)
-    return Response(status_code=201, headers={'Location': task_url})
+    return task_id_or_false
 
 
 instance_api = APIRouter(prefix='/{storage_node_id}')
@@ -90,18 +101,29 @@ def _lookup_storage_node(storage_node_id: UUID) -> StorageNodeModel:
 
 @instance_api.get('/', name='clusters:storage-nodes:detail')
 def get(cluster: Cluster, storage_node: StorageNode):
-    return StorageNodeDTO.from_model(storage_node)
+    node_stat_obj = None
+    ret = db.get_node_capacity(storage_node, 1)
+    if ret:
+        node_stat_obj = ret[0]
+    return StorageNodeDTO.from_model(storage_node, node_stat_obj)
 
 
 @instance_api.delete('/', name='clusters:storage-nodes:delete')
 def delete(
-        cluster: Cluster, storage_node: StorageNode, force_remove: bool = False, force_migrate: bool = False) -> Response:
+        cluster: Cluster, storage_node: StorageNode, force_remove: bool = False, force_migrate: bool = False, force_delete: bool = False ) -> Response:
     none_or_false = storage_node_ops.remove_storage_node(
             storage_node.get_id(), force_remove=force_remove, force_migrate=force_migrate
     )
     if none_or_false == False:  # noqa
         raise ValueError('Failed to remove storage node')
 
+    if force_delete:
+        none_or_false = storage_node_ops.delete_storage_node(
+            storage_node.get_id(), force=force_delete
+        )
+        if none_or_false == False:  # noqa
+            raise ValueError('Failed to delete storage node')
+
     return Response(status_code=204)
 
 
@@ -198,17 +220,20 @@ def shutdown(cluster: Cluster, storage_node: StorageNode, force: bool = False) -
 class _RestartParams(BaseModel):
     force: bool = False
     reattach_volume: bool = False
+    node_address: Optional[Annotated[str, Field(pattern=web_utils.IP_PATTERN)]] = None
+
 
 
 @instance_api.post('/start', name='clusters:storage-nodes:start', status_code=202, responses={202: {"content": None}})  # Same as restart for now
 @instance_api.post('/restart', name='clusters:storage-nodes:restart', status_code=202, responses={202: {"content": None}})
-def restart(cluster: Cluster, storage_node: StorageNode, parameters: _RestartParams = _RestartParams()) -> Response:
+def restart(cluster: Cluster, storage_node: StorageNode, parameters: _RestartParams) -> Response:
     storage_node = storage_node
     Thread(
         target=storage_node_ops.restart_storage_node,
         kwargs={
             "node_id": storage_node.get_id(),
             "force": parameters.force,
+            "node_ip": parameters.node_address,
             "reattach_volume": parameters.reattach_volume,
         }
     ).start()
diff --git a/simplyblock_web/api/v2/task.py b/simplyblock_web/api/v2/task.py
index c17bec3b7..94ecccce3 100644
--- a/simplyblock_web/api/v2/task.py
+++ b/simplyblock_web/api/v2/task.py
@@ -5,7 +5,6 @@
 
 from simplyblock_core.db_controller import DBController
 from simplyblock_core.models.job_schedule import JobSchedule
-from simplyblock_core.controllers import tasks_controller
 
 from .cluster import Cluster
 from .dtos import TaskDTO
@@ -16,13 +15,13 @@
 
 @api.get('/', name='clusters:tasks:list')
 def list(cluster: Cluster) -> List[TaskDTO]:
-    return [
-        TaskDTO.from_model(task)
-        for task
-        in tasks_controller.list_tasks(cluster.get_id())
-        if task.cluster_id == cluster.get_id()
-    ]
-
+    cluster_tasks = db.get_job_tasks(cluster.get_id(), limit=0)
+    data=[]
+    for t in cluster_tasks:
+        if t.function_name == JobSchedule.FN_DEV_MIG:
+            continue
+        data.append(t)
+    return [TaskDTO.from_model(task) for task in data]
 
 instance_api = APIRouter(prefix='/{task_id}')
 
diff --git a/simplyblock_web/api/v2/volume.py b/simplyblock_web/api/v2/volume.py
index 698788718..ba342f071 100644
--- a/simplyblock_web/api/v2/volume.py
+++ b/simplyblock_web/api/v2/volume.py
@@ -11,7 +11,7 @@
 
 from .cluster import Cluster
 from .pool import StoragePool
-from .dtos import VolumeDTO, SnapshotDTO
+from .dtos import VolumeDTO, SnapshotDTO, TaskDTO
 from . import util
 
 
@@ -21,11 +21,14 @@
 
 @api.get('/', name='clusters:storage-pools:volumes:list')
 def list(request: Request, cluster: Cluster, pool: StoragePool) -> List[VolumeDTO]:
-    return [
-        VolumeDTO.from_model(lvol, request, cluster.get_id())
-        for lvol
-        in db.get_lvols_by_pool_id(pool.get_id())
-    ]
+    data = []
+    for lvol in db.get_lvols_by_pool_id(pool.get_id()):
+        stat_obj = None
+        ret = db.get_lvol_stats(lvol, 1)
+        if ret:
+            stat_obj = ret[0]
+        data.append(VolumeDTO.from_model(lvol, request, cluster.get_id(), stat_obj))
+    return data
 
 
 class _CreateParams(BaseModel):
@@ -43,6 +46,10 @@ class _CreateParams(BaseModel):
     pvc_name: Optional[str] = None
     ndcs: util.Unsigned = 0
     npcs: util.Unsigned = 0
+    fabric: str = "tcp"
+    max_namespace_per_subsys: int = 1
+    do_replicate: bool = False
+    replication_cluster_id: Optional[str] = None
 
 
 class _CloneParams(BaseModel):
@@ -85,6 +92,11 @@ def add(
             pvc_name=data.pvc_name,
             ndcs=data.ndcs,
             npcs=data.npcs,
+            fabric=data.fabric,
+            max_namespace_per_subsys=data.max_namespace_per_subsys,
+            do_replicate=data.do_replicate,
+            replication_cluster_id=data.replication_cluster_id,
+
         )
     elif isinstance(data, _CloneParams):
         volume_id_or_false, error = snapshot_controller.clone(
@@ -122,7 +134,12 @@ def _lookup_volume(volume_id: UUID) -> LVol:
 
 @instance_api.get('/', name='clusters:storage-pools:volumes:detail')
 def get(request: Request, cluster: Cluster, pool: StoragePool, volume: Volume) -> VolumeDTO:
-    return VolumeDTO.from_model(volume, request, cluster.get_id())
+    stat_obj = None
+    ret = db.get_lvol_stats(volume, 1)
+    if ret:
+        stat_obj = ret[0]
+    rep_info = lvol_controller.get_replication_info(volume.get_id())
+    return VolumeDTO.from_model(volume, request, cluster.get_id(), stat_obj, rep_info)
 
 
 class UpdatableLVolParams(BaseModel):
@@ -171,6 +188,26 @@ def inflate(cluster: Cluster, pool: StoragePool, volume: Volume) -> Response:
 
     return Response(status_code=204)
 
+@instance_api.post('/replication_trigger', name='clusters:storage-pools:volumes:replication_start', status_code=204, responses={204: {"content": None}})
+def replication_trigger(cluster: Cluster, pool: StoragePool, volume: Volume) -> Response:
+    if not lvol_controller.replication_trigger(volume.get_id()):
+        raise ValueError('Failed to start volume snapshot replication')
+
+    return Response(status_code=204)
+
+@instance_api.post('/replication_start', name='clusters:storage-pools:volumes:replication_start', status_code=204, responses={204: {"content": None}})
+def replication_start(cluster: Cluster, pool: StoragePool, volume: Volume) -> Response:
+    if not lvol_controller.replication_start(volume.get_id(), cluster.get_id()):
+        raise ValueError('Failed to start volume snapshot replication')
+
+    return Response(status_code=204)
+
+@instance_api.post('/replication_stop', name='clusters:storage-pools:volumes:replication_stop', status_code=204, responses={204: {"content": None}})
+def replication_stop(cluster: Cluster, pool: StoragePool, volume: Volume) -> Response:
+    if not lvol_controller.replication_stop(volume.get_id()):
+        raise ValueError('Failed to stop volume snapshot replication')
+
+    return Response(status_code=204)
 
 @instance_api.get('/connect', name='clusters:storage-pools:volumes:connect')
 def connect(cluster: Cluster, pool: StoragePool, volume: Volume):
@@ -232,3 +269,27 @@ def create_snapshot(
             cluster_id=cluster.get_id(), pool_id=pool.get_id(), snapshot_id=snapshot_id,
     )
     return Response(status_code=201, headers={'Location': entity_url})
+
+
+@instance_api.post('/replicate_lvol', name='clusters:storage-pools:volumes:replicate_lvol')
+def replicate_lvol_on_target_cluster(cluster: Cluster, pool: StoragePool, volume: Volume):
+    return lvol_controller.replicate_lvol_on_target_cluster(volume.get_id())
+
+
+@instance_api.post('/replicate_lvol_on_source_cluster', name='clusters:storage-pools:volumes:replicate_lvol_on_source_cluster')
+def replicate_lvol_on_source_cluster(cluster: Cluster, pool: StoragePool, volume: Volume):
+    return lvol_controller.replicate_lvol_on_source_cluster(volume.get_id())
+
+
+@instance_api.get('/list_replication_tasks', name='clusters:storage-pools:volumes:list_replication_tasks')
+def list_replication_tasks(cluster: Cluster, pool: StoragePool, volume: Volume) -> List[TaskDTO]:
+    tasks = lvol_controller.list_replication_tasks(volume.get_id())
+    return [TaskDTO.from_model(task) for task in tasks]
+
+@instance_api.get('/suspend', name='clusters:storage-pools:volumes:suspend')
+def suspend(cluster: Cluster, pool: StoragePool, volume: Volume) -> bool:
+    return lvol_controller.suspend_lvol(volume.get_id())
+
+@instance_api.get('/resume', name='clusters:storage-pools:volumes:resume')
+def resume(cluster: Cluster, pool: StoragePool, volume: Volume) -> bool:
+    return lvol_controller.resume_lvol(volume.get_id())
diff --git a/simplyblock_web/auth_middleware.py b/simplyblock_web/auth_middleware.py
index 8a1a9e83a..87449cb64 100644
--- a/simplyblock_web/auth_middleware.py
+++ b/simplyblock_web/auth_middleware.py
@@ -34,6 +34,10 @@ def decorated(*args: Any, **kwargs: Any) -> ResponseType:
         # Skip authentication for Swagger UI
         if request.method == "GET" and request.path.startswith("/swagger"):
             return cast(ResponseType, f(*args, **kwargs))
+        if request.method == "POST" and request.path.startswith("/cluster/create_first"):
+            return cast(ResponseType, f(*args, **kwargs))
+        if request.method == "GET" and request.path.startswith("/health/fdb"):
+            return cast(ResponseType, f(*args, **kwargs))            
 
         cluster_id: str = ""
         cluster_secret: str = ""
diff --git a/simplyblock_web/node_configure.py b/simplyblock_web/node_configure.py
index 6b69ee347..ff5a2434d 100755
--- a/simplyblock_web/node_configure.py
+++ b/simplyblock_web/node_configure.py
@@ -1,54 +1,55 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-import argparse
-import logging
-import os
-import sys
-from typing import List, Optional, cast
-
-from kubernetes.client import ApiException, CoreV1Api
-
-from simplyblock_core import constants, utils
-from simplyblock_core.storage_node_ops import (
-    generate_automated_deployment_config,
-    upgrade_automated_deployment_config,
-)
-from simplyblock_cli.clibase import range_type
-from simplyblock_web import node_utils_k8s
-
-
-logger = logging.getLogger(__name__)
-logger.setLevel(constants.LOG_LEVEL)
-
-POD_PREFIX: str = "snode-spdk-pod"
-
-def _is_pod_present_for_node() -> bool:
-    """
-    Check if a pod with the specified prefix is already running on the current node.
-    
-    Returns:
-        bool: True if a matching pod is found, False otherwise
-        
-    Raises:
-        RuntimeError: If there's an error communicating with the Kubernetes API
-    """
-    k8s_core_v1: CoreV1Api = cast(CoreV1Api, utils.get_k8s_core_client())
-    namespace: str = node_utils_k8s.get_namespace()
-    node_name: Optional[str] = os.environ.get("HOSTNAME")
-
-    if not node_name:
+#!/usr/bin/env python
+# encoding: utf-8
+
+import argparse
+import logging
+import sys
+from typing import List, Optional, cast
+
+from kubernetes.client import ApiException, CoreV1Api
+
+from simplyblock_core import constants, utils
+from simplyblock_core.storage_node_ops import (
+    generate_automated_deployment_config,
+    upgrade_automated_deployment_config,
+)
+from simplyblock_cli.clibase import range_type
+from simplyblock_web import node_utils_k8s
+import os
+import subprocess
+
+logger = logging.getLogger(__name__)
+logger.setLevel(constants.LOG_LEVEL)
+
+POD_PREFIX: str = "snode-spdk-pod"
+
+
+def _is_pod_present_for_node() -> bool:
+    """
+    Check if a pod with the specified prefix is already running on the current node.
+    
+    Returns:
+        bool: True if a matching pod is found, False otherwise
+        
+    Raises:
+        RuntimeError: If there's an error communicating with the Kubernetes API
+    """
+    k8s_core_v1: CoreV1Api = cast(CoreV1Api, utils.get_k8s_core_client())
+    namespace: str = node_utils_k8s.get_namespace()
+    node_name: Optional[str] = os.environ.get("HOSTNAME")
+
+    if not node_name:
         raise RuntimeError("HOSTNAME environment variable not set")
 
     try:
         resp = k8s_core_v1.list_namespaced_pod(namespace)
         for pod in resp.items:
             if (
-                pod.metadata and 
-                pod.metadata.name and 
-                pod.spec and 
-                pod.spec.node_name == node_name and
-                pod.metadata.name.startswith(POD_PREFIX)
+                    pod.metadata and
+                    pod.metadata.name and
+                    pod.spec and
+                    pod.spec.node_name == node_name and
+                    pod.metadata.name.startswith(POD_PREFIX)
             ):
                 return True
     except ApiException as e:
@@ -66,7 +67,7 @@ def parse_arguments() -> argparse.Namespace:
         argparse.Namespace: Parsed command line arguments
     """
     parser = argparse.ArgumentParser(description="Automated Deployment Configuration Script")
-    
+
     # Define command line arguments
     parser.add_argument(
         '--max-lvol',
@@ -121,16 +122,45 @@ def parse_arguments() -> argparse.Namespace:
     )
     parser.add_argument(
         '--cores-percentage',
-        help='The percentage of cores to be used for spdk (0-100)',
-        type=range_type(0, 100),
+        help='The percentage of cores to be used for spdk (0-99)',
+        type=range_type(0, 99),
         dest='cores_percentage',
         required=False,
         default=0
     )
-    
+    parser.add_argument(
+        '--force',
+        help='Force format detected or passed nvme pci address to 4K and clean partitions',
+        action='store_true',
+        dest='force',
+        required=False
+    )
+    parser.add_argument(
+        '--device-model',
+        help='NVMe SSD model string, example: --model PM1628, --device-model and --size-range must be set together',
+        type=str,
+        default='',
+        dest='device_model',
+        required=False
+    )
+    parser.add_argument(
+        '--size-range',
+        help='NVMe SSD device size range separated by -, can be X(m,g,t) or bytes as integer, example: --size-range 50G-1T or --size-range 1232345-67823987, --device-model and --size-range must be set together',
+        type=str,
+        default='',
+        dest='size_range',
+        required=False
+    )
+    parser.add_argument(
+        '--nvme-devices',
+        help='Comma separated list of nvme namespace names like nvme0n1,nvme1n1...',
+        type=str,
+        default='',
+        dest='nvme_names',
+        required=False
+    )
     return parser.parse_args()
 
-
 def validate_arguments(args: argparse.Namespace) -> None:
     """
     Validate the provided command line arguments.
@@ -145,8 +175,7 @@ def validate_arguments(args: argparse.Namespace) -> None:
         if not args.max_lvol:
             raise argparse.ArgumentError(None, '--max-lvol is required')
         if not args.max_prov:
-            raise argparse.ArgumentError(None, '--max-size is required')
-            
+            args.max_prov=0
         try:
             max_lvol = int(args.max_lvol)
             if max_lvol <= 0:
@@ -156,15 +185,15 @@ def validate_arguments(args: argparse.Namespace) -> None:
                 None,
                 f"Invalid value for max-lvol '{args.max_lvol}': {str(e)}"
             )
-            
+
         if args.pci_allowed and args.pci_blocked:
             raise argparse.ArgumentError(
                 None,
                 "pci-allowed and pci-blocked cannot be both specified"
             )
-            
+
         max_prov = utils.parse_size(args.max_prov, assume_unit='G')
-        if max_prov <= 0:
+        if max_prov < 0:
             raise argparse.ArgumentError(
                 None,
                 f"Invalid storage size: {args.max_prov}. Must be a positive value with optional unit (e.g., 100G, 1T)"
@@ -175,17 +204,19 @@ def main() -> None:
     """Main entry point for the node configuration script."""
     try:
         args = parse_arguments()
-        
+
         if args.upgrade:
             upgrade_automated_deployment_config()
             return
-            
+
+        if not args.max_prov:
+            args.max_prov=0
         validate_arguments(args)
-        
+
         if _is_pod_present_for_node():
             logger.info("Skipped generating automated deployment configuration — pod already present.")
             sys.exit(0)
-            
+
         # Process socket configuration
         sockets_to_use: List[int] = [0]
         if args.sockets_to_use:
@@ -196,7 +227,7 @@ def main() -> None:
                     None,
                     f"Invalid value for sockets-to-use '{args.sockets_to_use}': {str(e)}"
                 )
-        
+
         nodes_per_socket: int = 1
         if args.nodes_per_socket:
             try:
@@ -208,16 +239,19 @@ def main() -> None:
                     None,
                     f"Invalid value for nodes-per-socket '{args.nodes_per_socket}': {str(e)}"
                 )
-        
+
         # Process PCI device filters
         pci_allowed: List[str] = []
         pci_blocked: List[str] = []
-        
+        nvme_names: List[str] = []
+
         if args.pci_allowed:
             pci_allowed = [pci.strip() for pci in args.pci_allowed.split(',') if pci.strip()]
         if args.pci_blocked:
             pci_blocked = [pci.strip() for pci in args.pci_blocked.split(',') if pci.strip()]
-        
+        if args.nvme_names:
+            nvme_names = [nvme_name.strip() for nvme_name in args.nvme_names.split(',') if nvme_name.strip()]
+
         # Generate the deployment configuration
         generate_automated_deployment_config(
             max_lvol=int(args.max_lvol),
@@ -226,9 +260,36 @@ def main() -> None:
             sockets_to_use=sockets_to_use,
             pci_allowed=pci_allowed,
             pci_blocked=pci_blocked,
-            cores_percentage=args.cores_percentage
+            cores_percentage=args.cores_percentage,
+            force=args.force,
+            device_model=args.device_model,
+            size_range=args.size_range,
+            nvme_names=nvme_names,
+            k8s=True
         )
-        
+
+        logger.info("create RPC socket mount")
+        mount_point = "/mnt/ramdisk"
+        size = "1G"
+        fstab_entry = f"tmpfs {mount_point} tmpfs size={size},mode=1777,noatime 0 0\n"
+
+        # 1️⃣ Create the mount point if it doesn't exist
+        os.makedirs(mount_point, exist_ok=True)
+
+        # 2️⃣ Add to /etc/fstab if not already present
+        with open("/etc/fstab", "r+") as fstab:
+            lines = fstab.readlines()
+            if not any(mount_point in line for line in lines):
+                fstab.write(fstab_entry)
+                print(f"Added fstab entry for {mount_point}")
+            else:
+                print(f"fstab entry for {mount_point} already exists")
+
+        # 3️⃣ Mount the RAM disk immediately
+        subprocess.run(["mount", mount_point], check=True)
+
+        # 4️⃣ Verify
+        subprocess.run(["df", "-h", mount_point])
     except argparse.ArgumentError as e:
         logger.error(f"Argument error: {e}")
         sys.exit(1)
diff --git a/simplyblock_web/node_utils_k8s.py b/simplyblock_web/node_utils_k8s.py
index 4626a89c9..b1440744d 100644
--- a/simplyblock_web/node_utils_k8s.py
+++ b/simplyblock_web/node_utils_k8s.py
@@ -5,6 +5,7 @@
 import time
 
 from simplyblock_core.utils import get_k8s_batch_client
+from kubernetes.client import ApiException
 
 
 node_name = os.environ.get("HOSTNAME")
@@ -23,7 +24,7 @@ def get_namespace():
             return out
     return default_namespace
 
-def wait_for_job_completion(job_name, namespace, timeout=60):
+def wait_for_job_completion(job_name, namespace, timeout=180):
     batch_v1 = get_k8s_batch_client()
     for _ in range(timeout):
         job = batch_v1.read_namespaced_job(job_name, namespace)
@@ -33,3 +34,19 @@ def wait_for_job_completion(job_name, namespace, timeout=60):
             raise RuntimeError(f"Job '{job_name}' failed")
         time.sleep(3)
     raise TimeoutError(f"Timeout waiting for Job '{job_name}' to complete")
+
+def wait_for_job_deletion(job_name, namespace, timeout=60):
+    batch_v1 = get_k8s_batch_client()
+
+    for _ in range(timeout):
+        try:
+            batch_v1.read_namespaced_job(job_name, namespace)
+        except ApiException as e:
+            if e.status == 404:
+                return True
+            else:
+                raise
+
+        time.sleep(2)
+
+    raise TimeoutError(f"Timeout waiting for Job '{job_name}' to be deleted")
diff --git a/simplyblock_web/static/openapi.json b/simplyblock_web/static/openapi.json
new file mode 100644
index 000000000..3e2a05130
--- /dev/null
+++ b/simplyblock_web/static/openapi.json
@@ -0,0 +1 @@
+{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/api/v2/clusters/":{"get":{"summary":"Clusters:List","operationId":"clusters_list_api_v2_clusters__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ClusterDTO"},"title":"Response Clusters List Api V2 Clusters  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Clusters:Create","operationId":"clusters_create_api_v2_clusters__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cluster Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterParams"}}}},"responses":{"201":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/":{"get":{"summary":"Clusters:Detail","operationId":"clusters_detail_api_v2_clusters__cluster_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClusterDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"summary":"Clusters:Update","operationId":"clusters_update_api_v2_clusters__cluster_id___put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdatableClusterParameters"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Delete","operationId":"clusters_delete_api_v2_clusters__cluster_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/capacity":{"get":{"summary":"Clusters:Capacity","operationId":"clusters_capacity_api_v2_clusters__cluster_id__capacity_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/iostats":{"get":{"summary":"Clusters:Iostats","operationId":"clusters_iostats_api_v2_clusters__cluster_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/logs":{"get":{"summary":"Clusters:Logs","operationId":"clusters_logs_api_v2_clusters__cluster_id__logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":50,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/start":{"post":{"summary":"Clusters:Start","operationId":"clusters_start_api_v2_clusters__cluster_id__start_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/shutdown":{"post":{"summary":"Clusters:Shutdown","operationId":"clusters_shutdown_api_v2_clusters__cluster_id__shutdown_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/activate":{"post":{"summary":"Clusters:Activate","operationId":"clusters_activate_api_v2_clusters__cluster_id__activate_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/update":{"post":{"summary":"Clusters:Upgrade","operationId":"clusters_upgrade_api_v2_clusters__cluster_id__update_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/_UpdateParams"}}}},"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/":{"get":{"summary":"Clusters:Storage-Nodes:List","operationId":"clusters_storage_nodes_list_api_v2_clusters__cluster_id__storage_nodes__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/StorageNodeDTO"},"title":"Response Clusters Storage Nodes List Api V2 Clusters  Cluster Id  Storage Nodes  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Clusters:Storage-Nodes:Create","operationId":"clusters_storage_nodes_create_api_v2_clusters__cluster_id__storage_nodes__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StorageNodeParams"}}}},"responses":{"201":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/":{"get":{"summary":"Clusters:Storage-Nodes:Detail","operationId":"clusters_storage_nodes_detail_api_v2_clusters__cluster_id__storage_nodes__storage_node_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Storage-Nodes:Delete","operationId":"clusters_storage_nodes_delete_api_v2_clusters__cluster_id__storage_nodes__storage_node_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"force_remove","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Force Remove"}},{"name":"force_migrate","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Force Migrate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/capacity":{"get":{"summary":"Clusters:Storage-Nodes:Capacity","operationId":"clusters_storage_nodes_capacity_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__capacity_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/iostats":{"get":{"summary":"Clusters:Storage-Nodes:Iostats","operationId":"clusters_storage_nodes_iostats_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/nics":{"get":{"summary":"Clusters:Storage-Nodes:Nics:List","operationId":"clusters_storage_nodes_nics_list_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__nics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/nics/{nic_id}/iostats":{"get":{"summary":"Clusters:Storage-Nodes:Nics:Iostats","operationId":"clusters_storage_nodes_nics_iostats_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__nics__nic_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"nic_id","in":"path","required":true,"schema":{"type":"string","title":"Nic Id"}},{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/suspend":{"post":{"summary":"Clusters:Storage-Nodes:Suspend","operationId":"clusters_storage_nodes_suspend_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__suspend_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"force","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Force"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/resume":{"post":{"summary":"Clusters:Storage-Nodes:Resume","operationId":"clusters_storage_nodes_resume_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__resume_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/shutdown":{"post":{"summary":"Clusters:Storage-Nodes:Shutdown","operationId":"clusters_storage_nodes_shutdown_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__shutdown_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"force","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Force"}}],"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/restart":{"post":{"summary":"Clusters:Storage-Nodes:Restart","operationId":"clusters_storage_nodes_restart_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__restart_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/_RestartParams","default":{"force":false,"reattach_volume":false}}}}},"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/start":{"post":{"summary":"Clusters:Storage-Nodes:Start","operationId":"clusters_storage_nodes_start_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__start_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/_RestartParams","default":{"force":false,"reattach_volume":false}}}}},"responses":{"202":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/devices/":{"get":{"summary":"Clusters:Storage Nodes:Devices:List","operationId":"clusters_storage_nodes_devices_list_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/DeviceDTO"},"title":"Response Clusters Storage Nodes Devices List Api V2 Clusters  Cluster Id  Storage Nodes  Storage Node Id  Devices  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/devices/{device_id}/":{"get":{"summary":"Clusters:Storage Nodes:Devices:Detail","operationId":"clusters_storage_nodes_devices_detail_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__device_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"device_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Device Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeviceDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Storage Nodes:Devices:Delete","operationId":"clusters_storage_nodes_devices_delete_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__device_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"device_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Device Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/devices/{device_id}/capacity":{"get":{"summary":"Clusters:Storage Nodes:Devices:Capacity","operationId":"clusters_storage_nodes_devices_capacity_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__device_id__capacity_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"device_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Device Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/devices/{device_id}/iostats":{"get":{"summary":"Clusters:Storage Nodes:Devices:Iostats","operationId":"clusters_storage_nodes_devices_iostats_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__device_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"device_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Device Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-nodes/{storage_node_id}/devices/{device_id}/reset":{"post":{"summary":"Clusters:Storage Nodes:Devices:Reset","operationId":"clusters_storage_nodes_devices_reset_api_v2_clusters__cluster_id__storage_nodes__storage_node_id__devices__device_id__reset_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"storage_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Storage Node Id"}},{"name":"device_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Device Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/":{"get":{"summary":"Clusters:Storage-Pools:List","operationId":"clusters_storage_pools_list_api_v2_clusters__cluster_id__storage_pools__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/StoragePoolDTO"},"title":"Response Clusters Storage Pools List Api V2 Clusters  Cluster Id  Storage Pools  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Clusters:Storage-Pools:Create","operationId":"clusters_storage_pools_create_api_v2_clusters__cluster_id__storage_pools__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/StoragePoolParams"}}}},"responses":{"201":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/":{"get":{"summary":"Clusters:Storage-Pools:Detail","operationId":"clusters_storage_pools_detail_api_v2_clusters__cluster_id__storage_pools__pool_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/StoragePoolDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Storage-Pools:Delete","operationId":"clusters_storage_pools_delete_api_v2_clusters__cluster_id__storage_pools__pool_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"summary":"Clusters:Storage-Pools:Update","operationId":"clusters_storage_pools_update_api_v2_clusters__cluster_id__storage_pools__pool_id___put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdatableStoragePoolParams"}}}},"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/iostats":{"get":{"summary":"Clusters:Storage-Pools:Iostats","operationId":"clusters_storage_pools_iostats_api_v2_clusters__cluster_id__storage_pools__pool_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":20,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/":{"get":{"summary":"Clusters:Storage-Pools:Volumes:List","operationId":"clusters_storage_pools_volumes_list_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/VolumeDTO"},"title":"Response Clusters Storage Pools Volumes List Api V2 Clusters  Cluster Id  Storage Pools  Pool Id  Volumes  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Clusters:Storage-Pools:Volumes:Create","operationId":"clusters_storage_pools_volumes_create_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RootModel_Union__CreateParams___CloneParams__"}}}},"responses":{"201":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/":{"get":{"summary":"Clusters:Storage-Pools:Volumes:Detail","operationId":"clusters_storage_pools_volumes_detail_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/VolumeDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"put":{"summary":"Clusters:Storage-Pools:Volumes:Update","operationId":"clusters_storage_pools_volumes_update_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id___put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/UpdatableLVolParams"}}}},"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Storage-Pools:Volumes:Delete","operationId":"clusters_storage_pools_volumes_delete_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/inflate":{"post":{"summary":"Clusters:Storage-Pools:Volumes:Inflate","operationId":"clusters_storage_pools_volumes_inflate_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__inflate_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/connect":{"get":{"summary":"Clusters:Storage-Pools:Volumes:Connect","operationId":"clusters_storage_pools_volumes_connect_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__connect_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/capacity":{"get":{"summary":"Clusters:Storage-Pools:Volumes:Capacity","operationId":"clusters_storage_pools_volumes_capacity_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__capacity_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/iostats":{"get":{"summary":"Clusters:Storage-Pools:Volumes:Iostats","operationId":"clusters_storage_pools_volumes_iostats_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__iostats_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}},{"name":"history","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"History"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/volumes/{volume_id}/snapshots":{"get":{"summary":"Clusters:Storage-Pools:Volumes:Snapshots:List","operationId":"clusters_storage_pools_volumes_snapshots_list_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__snapshots_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SnapshotDTO"},"title":"Response Clusters Storage Pools Volumes Snapshots List Api V2 Clusters  Cluster Id  Storage Pools  Pool Id  Volumes  Volume Id  Snapshots Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"post":{"summary":"Clusters:Storage-Pools:Volumes:Snapshots:Create","operationId":"clusters_storage_pools_volumes_snapshots_create_api_v2_clusters__cluster_id__storage_pools__pool_id__volumes__volume_id__snapshots_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"volume_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Volume Id"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/_SnapshotParams"}}}},"responses":{"201":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/snapshots/":{"get":{"summary":"Clusters:Storage-Pools:Snapshots:List","operationId":"clusters_storage_pools_snapshots_list_api_v2_clusters__cluster_id__storage_pools__pool_id__snapshots__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SnapshotDTO"},"title":"Response Clusters Storage Pools Snapshots List Api V2 Clusters  Cluster Id  Storage Pools  Pool Id  Snapshots  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/clusters/{cluster_id}/storage-pools/{pool_id}/snapshots/{snapshot_id}/":{"get":{"summary":"Clusters:Storage-Pools:Snapshots:Detail","operationId":"clusters_storage_pools_snapshots_detail_api_v2_clusters__cluster_id__storage_pools__pool_id__snapshots__snapshot_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"snapshot_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Snapshot Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SnapshotDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"summary":"Clusters:Storage-Pools:Snapshots:Delete","operationId":"clusters_storage_pools_snapshots_delete_api_v2_clusters__cluster_id__storage_pools__pool_id__snapshots__snapshot_id___delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}},{"name":"pool_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pool Id"}},{"name":"snapshot_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Snapshot Id"}}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/management-nodes/":{"get":{"summary":"Management Nodes:List","operationId":"management_nodes_list_api_v2_management_nodes__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"cluster_id","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/ManagementNodeDTO"},"title":"Response Management Nodes List Api V2 Management Nodes  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/v2/management-nodes/{management_node_id}/":{"get":{"summary":"Management Node:Detail","operationId":"management_node_detail_api_v2_management_nodes__management_node_id___get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"management_node_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Management Node Id"}},{"name":"cluster_id","in":"query","required":true,"schema":{"type":"string","format":"uuid","title":"Cluster Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ManagementNodeDTO"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ClusterDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"nqn":{"type":"string","title":"Nqn"},"status":{"type":"string","enum":["active","read_only","inactive","suspended","degraded","unready","in_activation","in_expansion"],"title":"Status"},"rebalancing":{"type":"boolean","title":"Rebalancing"},"block_size":{"type":"integer","minimum":0.0,"title":"Block Size"},"coding":{"prefixItems":[{"type":"integer","minimum":0.0},{"type":"integer","minimum":0.0}],"type":"array","maxItems":2,"minItems":2,"title":"Coding"},"ha":{"type":"boolean","title":"Ha"},"utliziation_critical":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Utliziation Critical"},"utilization_warning":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Utilization Warning"},"provisioned_cacacity_critical":{"type":"integer","minimum":0.0,"title":"Provisioned Cacacity Critical"},"provisioned_cacacity_warning":{"type":"integer","minimum":0.0,"title":"Provisioned Cacacity Warning"},"node_affinity":{"type":"boolean","title":"Node Affinity"},"anti_affinity":{"type":"boolean","title":"Anti Affinity"},"secret":{"type":"string","title":"Secret"}},"type":"object","required":["id","name","nqn","status","rebalancing","block_size","coding","ha","utliziation_critical","utilization_warning","provisioned_cacacity_critical","provisioned_cacacity_warning","node_affinity","anti_affinity","secret"],"title":"ClusterDTO"},"ClusterParams":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"blk_size":{"type":"integer","enum":[512,4096],"title":"Blk Size","default":512},"page_size_in_blocks":{"type":"integer","exclusiveMinimum":0.0,"title":"Page Size In Blocks","default":2097152},"cap_warn":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Cap Warn","default":0},"cap_crit":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Cap Crit","default":0},"prov_cap_warn":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Prov Cap Warn","default":0},"prov_cap_crit":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Prov Cap Crit","default":0},"distr_ndcs":{"type":"integer","title":"Distr Ndcs","default":1},"distr_npcs":{"type":"integer","title":"Distr Npcs","default":1},"distr_bs":{"type":"integer","title":"Distr Bs","default":4096},"distr_chunk_bs":{"type":"integer","title":"Distr Chunk Bs","default":4096},"ha_type":{"type":"string","enum":["single","ha"],"title":"Ha Type","default":"single"},"qpair_count":{"type":"integer","title":"Qpair Count","default":256},"max_queue_size":{"type":"integer","title":"Max Queue Size","default":128},"inflight_io_threshold":{"type":"integer","title":"Inflight Io Threshold","default":4},"enable_node_affinity":{"type":"boolean","title":"Enable Node Affinity","default":false},"strict_node_anti_affinity":{"type":"boolean","title":"Strict Node Anti Affinity","default":false}},"type":"object","title":"ClusterParams"},"DeviceDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"status":{"type":"string","title":"Status"},"health_check":{"type":"boolean","title":"Health Check"},"size":{"type":"integer","title":"Size"},"io_error":{"type":"boolean","title":"Io Error"},"is_partition":{"type":"boolean","title":"Is Partition"},"nvmf_ips":{"items":{"type":"string","format":"ipv4"},"type":"array","title":"Nvmf Ips"},"nvmf_nqn":{"type":"string","title":"Nvmf Nqn","default":""},"nvmf_port":{"type":"integer","title":"Nvmf Port","default":0}},"type":"object","required":["id","status","health_check","size","io_error","is_partition","nvmf_ips"],"title":"DeviceDTO"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ManagementNodeDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"status":{"type":"string","title":"Status"},"hostname":{"type":"string","title":"Hostname"},"ip":{"type":"string","format":"ipv4","title":"Ip"}},"type":"object","required":["id","status","hostname","ip"],"title":"ManagementNodeDTO"},"RootModel_Union__CreateParams___CloneParams__":{"anyOf":[{"$ref":"#/components/schemas/_CreateParams"},{"$ref":"#/components/schemas/_CloneParams"}],"title":"RootModel[Union[_CreateParams, _CloneParams]]"},"SnapshotDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"status":{"type":"string","title":"Status"},"health_check":{"type":"boolean","title":"Health Check"},"size":{"type":"integer","minimum":0.0,"title":"Size"},"used_size":{"type":"integer","minimum":0.0,"title":"Used Size"},"lvol":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Lvol"}},"type":"object","required":["id","name","status","health_check","size","used_size","lvol"],"title":"SnapshotDTO"},"StorageNodeDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"status":{"type":"string","title":"Status"},"ip":{"type":"string","format":"ipv4","title":"Ip"}},"type":"object","required":["id","status","ip"],"title":"StorageNodeDTO"},"StorageNodeParams":{"properties":{"node_address":{"type":"string","title":"Node Address","default":"^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"},"interface_name":{"type":"string","title":"Interface Name"},"max_snapshots":{"type":"integer","title":"Max Snapshots","default":500},"ha_jm":{"type":"boolean","title":"Ha Jm","default":true},"test_device":{"type":"boolean","title":"Test Device","default":false},"spdk_image":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Spdk Image"},"spdk_debug":{"type":"boolean","title":"Spdk Debug","default":false},"full_page_unmap":{"type":"boolean","title":"Full Page Unmap","default":false},"data_nics":{"items":{"type":"string"},"type":"array","title":"Data Nics","default":[]},"namespace":{"type":"string","title":"Namespace","default":"default"},"jm_percent":{"type":"integer","maximum":100.0,"minimum":0.0,"title":"Jm Percent","default":3},"partitions":{"type":"integer","title":"Partitions","default":1},"iobuf_small_pool_count":{"type":"integer","title":"Iobuf Small Pool Count","default":0},"iobuf_large_pool_count":{"type":"integer","title":"Iobuf Large Pool Count","default":0},"ha_jm_count":{"type":"integer","title":"Ha Jm Count","default":3}},"type":"object","required":["interface_name","spdk_image"],"title":"StorageNodeParams"},"StoragePoolDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"status":{"type":"string","enum":["active","inactive"],"title":"Status"},"max_size":{"type":"integer","minimum":0.0,"title":"Max Size"},"volume_max_size":{"type":"integer","minimum":0.0,"title":"Volume Max Size"},"max_rw_iops":{"type":"integer","minimum":0.0,"title":"Max Rw Iops"},"max_rw_mbytes":{"type":"integer","minimum":0.0,"title":"Max Rw Mbytes"},"max_r_mbytes":{"type":"integer","minimum":0.0,"title":"Max R Mbytes"},"max_w_mbytes":{"type":"integer","minimum":0.0,"title":"Max W Mbytes"}},"type":"object","required":["id","name","status","max_size","volume_max_size","max_rw_iops","max_rw_mbytes","max_r_mbytes","max_w_mbytes"],"title":"StoragePoolDTO"},"StoragePoolParams":{"properties":{"name":{"type":"string","title":"Name"},"pool_max":{"type":"integer","minimum":0.0,"title":"Pool Max","default":0},"volume_max_size":{"type":"integer","minimum":0.0,"title":"Volume Max Size","default":0},"max_rw_iops":{"type":"integer","minimum":0.0,"title":"Max Rw Iops","default":0},"max_rw_mbytes":{"type":"integer","minimum":0.0,"title":"Max Rw Mbytes","default":0},"max_r_mbytes":{"type":"integer","minimum":0.0,"title":"Max R Mbytes","default":0},"max_w_mbytes":{"type":"integer","minimum":0.0,"title":"Max W Mbytes","default":0}},"type":"object","required":["name"],"title":"StoragePoolParams"},"UpdatableClusterParameters":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"}},"type":"object","title":"UpdatableClusterParameters"},"UpdatableLVolParams":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"max_rw_iops":{"type":"integer","minimum":0.0,"title":"Max Rw Iops","default":0},"max_rw_mbytes":{"type":"integer","minimum":0.0,"title":"Max Rw Mbytes","default":0},"max_r_mbytes":{"type":"integer","minimum":0.0,"title":"Max R Mbytes","default":0},"max_w_mbytes":{"type":"integer","minimum":0.0,"title":"Max W Mbytes","default":0},"size":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Size"}},"type":"object","title":"UpdatableLVolParams"},"UpdatableStoragePoolParams":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"max_size":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Max Size"},"volume_max_size":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Volume Max Size"},"max_rw_iops":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Max Rw Iops"},"max_rw_mbytes":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Max Rw Mbytes"},"max_r_mbytes":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Max R Mbytes"},"max_w_mbytes":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Max W Mbytes"}},"type":"object","title":"UpdatableStoragePoolParams"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VolumeDTO":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"name":{"type":"string","title":"Name"},"status":{"type":"string","title":"Status"},"health_check":{"type":"boolean","title":"Health Check"},"nqn":{"type":"string","title":"Nqn"},"nodes":{"items":{"type":"string"},"type":"array","title":"Nodes"},"port":{"type":"integer","exclusiveMaximum":65536.0,"minimum":0.0,"title":"Port"},"size":{"type":"integer","minimum":0.0,"title":"Size"},"cloned_from":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cloned From"},"crypto_key":{"anyOf":[{"prefixItems":[{"type":"string"},{"type":"string"}],"type":"array","maxItems":2,"minItems":2},{"type":"null"}],"title":"Crypto Key"},"high_availability":{"type":"boolean","title":"High Availability"},"max_rw_iops":{"type":"integer","minimum":0.0,"title":"Max Rw Iops"},"max_rw_mbytes":{"type":"integer","minimum":0.0,"title":"Max Rw Mbytes"},"max_r_mbytes":{"type":"integer","minimum":0.0,"title":"Max R Mbytes"},"max_w_mbytes":{"type":"integer","minimum":0.0,"title":"Max W Mbytes"}},"type":"object","required":["id","name","status","health_check","nqn","nodes","port","size","cloned_from","crypto_key","high_availability","max_rw_iops","max_rw_mbytes","max_r_mbytes","max_w_mbytes"],"title":"VolumeDTO"},"_CloneParams":{"properties":{"name":{"type":"string","title":"Name"},"snapshot_id":{"anyOf":[{"type":"string","pattern":"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"},{"type":"null"}],"title":"Snapshot Id"},"size":{"type":"integer","minimum":0.0,"title":"Size","default":0}},"type":"object","required":["name","snapshot_id"],"title":"_CloneParams"},"_CreateParams":{"properties":{"name":{"type":"string","title":"Name"},"size":{"type":"integer","minimum":0.0,"title":"Size"},"crypto_key":{"anyOf":[{"prefixItems":[{"type":"string"},{"type":"string"}],"type":"array","maxItems":2,"minItems":2},{"type":"null"}],"title":"Crypto Key"},"max_rw_iops":{"type":"integer","minimum":0.0,"title":"Max Rw Iops","default":0},"max_rw_mbytes":{"type":"integer","minimum":0.0,"title":"Max Rw Mbytes","default":0},"max_r_mbytes":{"type":"integer","minimum":0.0,"title":"Max R Mbytes","default":0},"max_w_mbytes":{"type":"integer","minimum":0.0,"title":"Max W Mbytes","default":0},"ha_type":{"anyOf":[{"type":"string","enum":["single","ha"]},{"type":"null"}],"title":"Ha Type"},"host_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Host Id"},"priority_class":{"type":"integer","enum":[0,1],"title":"Priority Class","default":0},"namespace":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Namespace"},"pvc_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Pvc Name"},"ndcs":{"type":"integer","minimum":0.0,"title":"Ndcs","default":0},"npcs":{"type":"integer","minimum":0.0,"title":"Npcs","default":0}},"type":"object","required":["name","size"],"title":"_CreateParams"},"_RestartParams":{"properties":{"force":{"type":"boolean","title":"Force","default":false},"reattach_volume":{"type":"boolean","title":"Reattach Volume","default":false}},"type":"object","title":"_RestartParams"},"_SnapshotParams":{"properties":{"name":{"type":"string","title":"Name"}},"type":"object","required":["name"],"title":"_SnapshotParams"},"_UpdateParams":{"properties":{"management_image":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Management Image"},"spdk_image":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Spdk Image"},"restart":{"type":"boolean","title":"Restart","default":false}},"type":"object","required":["management_image","spdk_image"],"title":"_UpdateParams"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"}}}}
\ No newline at end of file
diff --git a/simplyblock_web/templates/Untitled-1.j2 b/simplyblock_web/templates/Untitled-1.j2
new file mode 100644
index 000000000..e69de29bb
diff --git a/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2 b/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2
index 734d9c59e..85bfd0f7b 100644
--- a/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2
+++ b/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2
@@ -34,9 +34,18 @@ spec:
             - |
               set -e
 
+              MARKER="/var/simplyblock/.cpu_isolation_applied"
+
               echo "--- Installing jq ---"
               apk add --no-cache jq
 
+              echo "--- Checking if node was already configured ---"
+
+              if [[ -f "$MARKER" ]]; then
+                  echo "[INFO] Node already configured. Skipping sleep and exiting..."
+                  exit 0
+              fi
+
               echo "--- Reading isolated cores from config ---"
               CONFIG_FILE="/var/simplyblock/sn_config_file"
 
@@ -65,7 +74,7 @@ spec:
                       operator: In
                       values:
                         - worker
-                        - worker-isolated
+                        - worker-isolated-{{ HOSTNAME }}
                 nodeSelector:
                   matchLabels:
                     kubernetes.io/hostname: {{ HOSTNAME }}
@@ -78,7 +87,7 @@ spec:
               metadata:
                 name: worker-isolated-{{ HOSTNAME }}
                 labels:
-                  machineconfiguration.openshift.io/role: worker-isolated
+                  machineconfiguration.openshift.io/role: worker-isolated-{{ HOSTNAME }}
               spec:
                 kernelArguments:
                   - "nohz_full=${ISOLATED_CORES}"
@@ -93,11 +102,11 @@ spec:
               apiVersion: machineconfiguration.openshift.io/v1
               kind: KubeletConfig
               metadata:
-                name: set-static-cpu-manager
+                name: set-static-cpu-manager-{{ HOSTNAME }}
               spec:
                 machineConfigPoolSelector:
                   matchLabels:
-                    machineconfiguration.openshift.io/role: worker-isolated
+                    machineconfiguration.openshift.io/role: worker-isolated-{{ HOSTNAME }}
                 kubeletConfig:
                   cpuManagerPolicy: static
                   cpuManagerReconcilePeriod: 5s
@@ -105,4 +114,8 @@ spec:
 
               echo "[INFO] Init setup and CPU isolation complete."
               
-              echo "--- Init setup complete ---"
+              echo "[INFO] Marking node as configured."
+              touch "$MARKER"
+
+              echo "[INFO] Node is rebooting. Sleeping for 5 minutes to stop pipeline gracefully..."
+              sleep 300
\ No newline at end of file
diff --git a/simplyblock_web/templates/storage_core_isolation.yaml.j2 b/simplyblock_web/templates/storage_core_isolation.yaml.j2
index b6fafe2ee..30bbf8809 100644
--- a/simplyblock_web/templates/storage_core_isolation.yaml.j2
+++ b/simplyblock_web/templates/storage_core_isolation.yaml.j2
@@ -91,7 +91,7 @@ spec:
             - name: etc
               mountPath: /etc
             - name: rootfs
-              mountPath: /
+              mountPath: /host
             - name: var-simplyblock
               mountPath: /var/simplyblock
           command: ["/bin/sh", "-c"]
@@ -113,13 +113,13 @@ spec:
                       apt update && apt install -y grep jq nvme-cli tuned
                       ;;
                   ubuntu)
-                      apt update && apt install -y grep jq nvme-cli tuned
-                      apt-get install -y linux-modules-extra-$(uname -r)
+                      chroot /host apt update && chroot /host apt install -y grep jq nvme-cli tuned
+                      chroot /host apt-get install -y linux-modules-extra-$(uname -r)
                       ;;
-                  centos|rhel|rocky|almalinux)
+                  centos|rhel|rocky|almalinux|ol)
                       export YUM_RELEASEVER=$(awk -F'=' '/^VERSION_ID=/{gsub(/"/,"",$2); print $2}' /etc/os-release)
                       export DNF_RELEASEVER=$(awk -F'=' '/^VERSION_ID=/{gsub(/"/,"",$2); print $2}' /etc/os-release)
-                      dnf install -y grep jq nvme-cli kernel-modules-extra tuned \
+                      chroot /host dnf install -y grep jq nvme-cli kernel-modules-extra tuned \
                        --setopt=tsflags=nocontexts,noscripts --setopt=install_weak_deps=False 
                       ;;
                   *)
@@ -204,7 +204,7 @@ spec:
               tuned-adm profile "$TUNED_PROFILE"
               case "$OS_ID" in
                   centos|rhel|rocky|almalinux)
-                  grubby --update-kernel=ALL --args="isolcpus=$ISOLATED_CORES nohz_full=$ISOLATED_CORES rcu_nocbs=$ISOLATED_CORES"
+                  chroot /host grubby --update-kernel=ALL --args="isolcpus=$ISOLATED_CORES nohz_full=$ISOLATED_CORES rcu_nocbs=$ISOLATED_CORES"
                       ;;
                   *)
                       echo ""
diff --git a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2
index f10478c75..105ee1157 100644
--- a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2
+++ b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Pod
 metadata:
-  name: snode-spdk-pod-{{ RPC_PORT }}
+  name: snode-spdk-pod-{{ RPC_PORT }}-{{ CLUSTER_ID }}
   namespace: {{ NAMESPACE }}
   labels:
     app: spdk-app-{{ RPC_PORT }}
@@ -16,6 +16,7 @@ spec:
   nodeSelector:
     kubernetes.io/hostname: {{ HOSTNAME }}
   hostNetwork: true
+  dnsPolicy: ClusterFirstWithHostNet
   tolerations:
     - effect: NoSchedule
       operator: Exists
@@ -24,7 +25,8 @@ spec:
   volumes:
     - name: socket-dir
       emptyDir:
-        medium: "Memory"
+        medium: Memory
+        sizeLimit: 1Gi
     - name: host-sys
       hostPath:
         path: /sys
@@ -57,6 +59,11 @@ spec:
     - name: dockercontainerlogdirectory
       hostPath:
         path: /var/log/pods
+    {% if MODE == "kubernetes" %}
+    - name: config
+      configMap:
+        name: simplyblock-fluent-bit-config
+    {% endif %}
 
   initContainers:
     - name: copy-script
@@ -87,16 +94,10 @@ spec:
           value: "{{ TOTAL_HP }}"
         - name: RPC_PORT
           value: "{{ RPC_PORT }}"
-        - name: SPDKCSI_SECRET
-          valueFrom:
-            secretKeyRef:
-              name: simplyblock-csi-secret
-              key: secret.json
-        - name: CLUSTER_CONFIG
-          valueFrom:
-            configMapKeyRef:
-              name: simplyblock-csi-cm
-              key: config.json
+        - name: NSOCKET
+          value: "{{ NSOCKET }}"
+        - name: FW_PORT
+          value: "{{ FW_PORT }}"
       lifecycle:
         postStart:
           exec:
@@ -105,7 +106,7 @@ spec:
         privileged: true
       volumeMounts:
         - name: socket-dir
-          mountPath: /var/tmp
+          mountPath: /mnt/ramdisk
         - name: host-sys
           mountPath: /sys
         - name: host-modules
@@ -118,10 +119,10 @@ spec:
           mountPath: /etc/simplyblock
       resources:
         limits:
-          hugepages-2Mi: {{ MEM_GEGA }}Gi
+          hugepages-2Mi: {{ MEM_MEGA }}Mi
           cpu: {{ CORES }}
         requests:
-          hugepages-2Mi: {{ MEM_GEGA }}Gi
+          hugepages-2Mi: {{ MEM_MEGA }}Mi
 
     - name: spdk-proxy-container
       image: {{ SIMPLYBLOCK_DOCKER_IMAGE }}
@@ -129,7 +130,7 @@ spec:
       command: ["python", "simplyblock_core/services/spdk_http_proxy_server.py"]
       volumeMounts:
         - name: socket-dir
-          mountPath: /var/tmp
+          mountPath: /mnt/ramdisk
       env:
         - name: SERVER_IP
           value: "{{ SERVER_IP }}"
@@ -170,4 +171,19 @@ spec:
         - name: dockercontainerlogdirectory
           mountPath: /var/log/pods
           readOnly: true
+  {% elif MODE == "kubernetes" %}
+    - name: fluent-bit
+      image: fluent/fluent-bit:1.8.11
+      volumeMounts:
+        - name: varlog
+          mountPath: /var/log
+        - name: config
+          mountPath: /fluent-bit/etc/
+      resources:
+        requests:
+          cpu: "100m"
+          memory: "200Mi"
+        limits:
+          cpu: "400m"
+          memory: "2Gi"
   {% endif %}
diff --git a/simplyblock_web/templates/storage_init_job.yaml.j2 b/simplyblock_web/templates/storage_init_job.yaml.j2
index 6432d4500..2d59571c7 100644
--- a/simplyblock_web/templates/storage_init_job.yaml.j2
+++ b/simplyblock_web/templates/storage_init_job.yaml.j2
@@ -17,18 +17,34 @@ spec:
           operator: Exists
         - effect: NoExecute
           operator: Exists
+
+      volumes:
+        - name: etc-systemd
+          hostPath:
+            path: /etc/systemd/
+        - name: host-proc
+          hostPath:
+            path: /proc
       containers:
         - name: init-setup
           image: simplyblock/ubuntu-tools:22.04
           securityContext:
             privileged: true
+          volumeMounts:
+            - name: etc-systemd
+              mountPath: /etc/systemd/
+            - name: host-proc
+              mountPath: /proc
           command: ["/bin/sh", "-c"]
           args:
             - |
               set -e
 
               echo "--- Starting init setup ---"
-              
+
+              HUGEPAGES_BEFORE=$(grep HugePages_Total /proc/meminfo | awk '{print $2}')
+              echo "[INFO] Hugepages before: $HUGEPAGES_BEFORE"
+
               NODE_IP=$(ip route get 1.1.1.1 | grep -oE 'src [0-9.]+' | awk '{print $2}')
               echo "Detected node IP: $NODE_IP"
 
@@ -44,18 +60,63 @@ spec:
               OS_ID="$(cat /proc/version | awk '{print $3}' | awk -F'-' '{print $NF}')"
 
               if [ "$OS_ID" != "talos" ]; then
-                echo "--- Restarting kubelet ---"
+
+                echo "--- Creating RAM disk systemd unit on host ---"
+
+
+                UNIT_PATH="/etc/systemd/system/var-mnt-ramdisk.mount"
+
+                echo "Writing systemd unit to $UNIT_PATH"
+
+
+                cat <<EOF > "$UNIT_PATH"
+              [Unit]
+              Description=1G RAM disk at /var/mnt/ramdisk
+              After=local-fs-pre.target
+              Before=local-fs.target
+
+              [Mount]
+              What=tmpfs
+              Where=/var/mnt/ramdisk
+              Type=tmpfs
+              Options=size=1G,mode=1777
+
+              [Install]
+              WantedBy=local-fs.target
+              EOF
+
+                echo "Starting RAM disk mounting."
                 nsenter --target 1 --mount --uts --ipc --net --pid -- /bin/sh -c '
                   if command -v systemctl >/dev/null 2>&1; then
-                    echo "Restarting kubelet..."
-                    systemctl restart kubelet && echo "Kubelet restarted" || echo "Kubelet restart failed"
+                    echo "Reloading systemd..."
+                    systemctl daemon-reload || echo "systemd reload failed"
+
+                    echo "Enabling mount unit..."
+                    systemctl enable var-mnt-ramdisk.mount || echo "enable failed"
+
+                    echo "Starting mount unit..."
+                    systemctl start var-mnt-ramdisk.mount || echo "start failed (check logs or unit file)"
                   else
-                    echo "systemctl not found; skipping kubelet restart"
+                    echo "systemctl not found; skipping RAM disk mounting"
                   fi
                 '
+
+                HUGEPAGES_AFTER=$(grep HugePages_Total /proc/meminfo | awk '{print $2}')
+                echo "[INFO] Hugepages after: $HUGEPAGES_AFTER"
+
+                if [ "$HUGEPAGES_BEFORE" != "$HUGEPAGES_AFTER" ]; then
+                  echo "[INFO] Hugepages changed, restarting kubelet..."
+                  nsenter --target 1 --mount --uts --ipc --net --pid -- /bin/sh -c '
+                    if command -v systemctl >/dev/null 2>&1; then
+                      systemctl restart kubelet && echo "Kubelet restarted" || echo "Kubelet restart failed"
+                    fi
+                  '
+                else
+                  echo "[INFO] Hugepages unchanged, skipping kubelet restart."
+                fi
               else
                 echo "Talos detected - skipping nsenter and kubelet restart."
-                echo "Use 'talosctl service kubelet restart -n $NODE_IP' to restart the node kubelet"
+                echo "Use '\''talosctl service kubelet restart -n $NODE_IP'\'' to restart the node kubelet"
               fi
 
               echo "--- Init setup complete ---"
diff --git a/simplyblock_web/utils.py b/simplyblock_web/utils.py
index b0d1795df..de72db274 100644
--- a/simplyblock_web/utils.py
+++ b/simplyblock_web/utils.py
@@ -148,7 +148,8 @@ def error_handler(exception: Exception):
 
 
 class RPCPortParams(BaseModel):
-    rpc_port: int = Field(constants.RPC_HTTP_PROXY_PORT, ge=0, le=65536)
+    rpc_port: int = Field(constants.RPC_PORT_RANGE_START, ge=0, le=65536)
+    cluster_id: Optional[str]
 
 
 class DeviceParams(BaseModel):