diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index 298d93910715..2bd7305a2350 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -281,25 +281,29 @@ def uploadResults(def pipeline, SlurmCluster cluster, String clusterName, String
                 downloadPerfResultSucceed = Utils.exec(pipeline, script: scpFromRemoteCmd(remote, scpSources, "${stageName}/"), returnStatus: true, numRetries: 3) == 0
             }
 
+            // Download all slurm log files from the working directory
+            def slurmLogListOutput = Utils.exec(
+                pipeline,
+                script: Utils.sshUserCmd(
+                    remote,
+                    "\"find '${perfResultsBasePath}' -maxdepth 1 \\( -name '*.log' -o -name '*.out' -o -name 'slurm-*' \\) -printf '%f\\n' || true\""
+                ),
+                returnStdout: true,
+                numRetries: 3
+            )?.trim() ?: ""
+            def slurmLogFiles = slurmLogListOutput.split(/\s+/).collect { it.trim() }.findAll { it }
+            echo "Slurm Log Files: ${slurmLogFiles}"
+            if (slurmLogFiles) {
+                sh "mkdir -p ${stageName}/slurm-logs"
+                def slurmLogSources = slurmLogFiles.size() == 1
+                    ? "${perfResultsBasePath}/${slurmLogFiles[0]}"
+                    : "{${slurmLogFiles.collect { "${perfResultsBasePath}/${it}" }.join(',')}}"
+                Utils.exec(pipeline, script: scpFromRemoteCmd(remote, slurmLogSources, "${stageName}/slurm-logs/"), returnStatus: true, numRetries: 3)
+            }
+
+
             echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}, downloadPerfResultSucceed: ${downloadPerfResultSucceed}"
-            if (hasTimeoutTest || downloadResultSucceed || downloadPerfResultSucceed) {
-                // On retry attempts, rename freshly-downloaded result XMLs so that
-                // (a) the tar for this attempt is distinguishable from prior attempts
-                //     already uploaded to Artifactory, and
-                // (b) the junit() glob below picks up this attempt's results as a
-                //     separate set, keeping earlier attempts' test data visible in
-                //     the Jenkins build report rather than overwriting it.
-                if (postTag) {
-                    sh """
-                        cd ${stageName}
-                        for f in results*.xml; do
-                            [ -f "\$f" ] || continue
-                            case "\$f" in *${postTag}.xml) continue ;; esac
-                            name=\"\${f%.xml}\"
-                            mv \"\$f\" \"\${name}${postTag}.xml\" || true
-                        done
-                    """
-                }
+            if (hasTimeoutTest || downloadResultSucceed || downloadPerfResultSucceed || slurmLogFiles) {
                 sh "ls -al ${stageName}/"
                 echo "Upload test results."
                 sh "tar -czvf results-${stageName}${postTag}.tar.gz ${stageName}/"
diff --git a/tensorrt_llm/_torch/pyexecutor/_util.py b/tensorrt_llm/_torch/pyexecutor/_util.py
index a7e49d9d369c..792372938952 100644
--- a/tensorrt_llm/_torch/pyexecutor/_util.py
+++ b/tensorrt_llm/_torch/pyexecutor/_util.py
@@ -408,7 +408,8 @@ def configure_kv_cache_capacity(self,
         mapping = self._mapping
 
         # TODO: support CP by generating dummy requests for it.
-        assert 'cp_type' not in mapping.cp_config
+        if not self._skip_est:
+            assert 'cp_type' not in mapping.cp_config
 
         fraction = self._kv_cache_config.free_gpu_memory_fraction
 
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
index 21b5f523942e..1c6a2ac8343d 100644
--- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
+++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@@ -300,7 +300,7 @@ def create_py_executor(
         A fully initialized PyExecutor instance.
     """
 
-    skip_est = os.environ.get("TRTLLM_SKIP_KV_CACHE_ESTIMATION", '0') == '1'
+    skip_est = os.environ.get("TRTLLM_SKIP_KV_CACHE_ESTIMATION", '1') == '1'
     llm_args, checkpoint_loader = _load_config_and_create_checkpoint_loader(
         llm_args, checkpoint_dir)
 
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
index 8e0a98faf49e..af666ad36d68 100644
--- a/tensorrt_llm/llmapi/llm_args.py
+++ b/tensorrt_llm/llmapi/llm_args.py
@@ -2428,11 +2428,11 @@ class KvCacheConfig(StrictBaseModel, PybindMirror):
         description=
         "Number of sink tokens (tokens to always keep in attention window).")
     free_gpu_memory_fraction: Optional[float] = Field(
-        default=0.9,
+        default=0.7,
         ge=0,
         le=1,
         description=
-        "The fraction of GPU memory fraction that should be allocated for the KV cache. Default is 90%. If both `max_tokens` and `free_gpu_memory_fraction` are specified, memory corresponding to the minimum will be used."
+        "The fraction of GPU memory fraction that should be allocated for the KV cache. Default is 70%. If both `max_tokens` and `free_gpu_memory_fraction` are specified, memory corresponding to the minimum will be used."
     )
     host_cache_size: Optional[int] = Field(
         default=None,
diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py
index 84deaeb223d2..8f1ec4ce13c0 100644
--- a/tests/integration/defs/accuracy/test_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@@ -539,7 +539,7 @@ def run_parallel_test(model_name: str,
         )
 
     kv_cache_config = {
-        "free_gpu_memory_fraction": 0.5,
+        "free_gpu_memory_fraction": 0.35,
         "enable_block_reuse": True
     }
     ctx_server_config = {
@@ -600,7 +600,8 @@ def test_auto_dtype(self, ctx_disable_overlap_scheduler,
         ctx_server_config = {
             "disable_overlap_scheduler": ctx_disable_overlap_scheduler,
             "kv_cache_config": {
-                "enable_block_reuse": ctx_enable_block_reuse
+                "enable_block_reuse": ctx_enable_block_reuse,
+                "free_gpu_memory_fraction": 0.4
             }
         }
         ctx_server_config["cache_transceiver_config"] = {
@@ -610,7 +611,8 @@ def test_auto_dtype(self, ctx_disable_overlap_scheduler,
         gen_server_config = {
             "disable_overlap_scheduler": gen_disable_overlap_scheduler,
             "kv_cache_config": {
-                "enable_block_reuse": gen_enable_block_reuse
+                "enable_block_reuse": gen_enable_block_reuse,
+                "free_gpu_memory_fraction": 0.3
             }
         }
         gen_server_config["cache_transceiver_config"] = {
@@ -687,7 +689,7 @@ def test_ngram(self):
             "is_public_pool": True
         }
         kv_cache_config = {
-            "free_gpu_memory_fraction": 0.5,
+            "free_gpu_memory_fraction": 0.35,
             "enable_block_reuse": False
         }
         ctx_server_config = {
@@ -739,7 +741,7 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
             True,  # BS=1 does not need overlap scheduling
             "speculative_config": speculative_decoding_config,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.5,
+                "free_gpu_memory_fraction": 0.3,
                 "enable_block_reuse": True  # reuse on context requests
             },
             "max_num_tokens": 13393 * 2,
@@ -754,7 +756,7 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
             "disable_overlap_scheduler": not overlap_scheduler,
             "speculative_config": speculative_decoding_config,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.5,
+                "free_gpu_memory_fraction": 0.3,
                 "enable_block_reuse": False
             },
             "max_num_tokens": 13393 * 2,
@@ -791,14 +793,20 @@ def test_guided_decoding(self, backend: str, mocker):
             "cache_transceiver_config": {
                 "backend": "DEFAULT",
                 "max_tokens_in_buffer": 4096
-            }
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.3
+            },
         }
         gen_server_config = {
             "guided_decoding_backend": backend,
             "cache_transceiver_config": {
                 "backend": "DEFAULT",
                 "max_tokens_in_buffer": 4096
-            }
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.3
+            },
         }
         disaggregated_server_config = {
             "hostname": "localhost",
@@ -834,7 +842,7 @@ def test_guided_decoding_with_eagle3(self, backend: str,
             "disable_overlap_scheduler": True,
             "speculative_config": speculative_decoding_config,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.3,
             },
             "guided_decoding_backend": backend,
             "cache_transceiver_config": {
@@ -847,7 +855,7 @@ def test_guided_decoding_with_eagle3(self, backend: str,
             "disable_overlap_scheduler": not eagle3_one_model,
             "speculative_config": speculative_decoding_config,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.3,
             },
             "guided_decoding_backend": backend,
             "cache_transceiver_config": {
@@ -938,6 +946,14 @@ def test_auto_dtype(self, overlap_scheduler):
         # Keep this low to avoid warmup OOM in CI
         ctx_server_config["max_seq_len"] = 8192
         gen_server_config["max_seq_len"] = 8192
+
+        ctx_server_config["kv_cache_config"] = {
+            "free_gpu_memory_fraction": 0.6,
+        }
+        gen_server_config["kv_cache_config"] = {
+            "free_gpu_memory_fraction": 0.6,
+        }
+
         disaggregated_server_config = {
             "hostname": "localhost",
             "backend": "pytorch",
@@ -971,6 +987,9 @@ def test_nixl_backend(self):
             "cache_transceiver_config": {
                 "backend": "NIXL",
                 "max_tokens_in_buffer": 4096
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.6
             }
         }
         gen_server_config = {
@@ -978,6 +997,9 @@ def test_nixl_backend(self):
             "cache_transceiver_config": {
                 "backend": "NIXL",
                 "max_tokens_in_buffer": 4096
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.6
             }
         }
         disaggregated_server_config = {
@@ -1012,6 +1034,9 @@ def test_gen_only_sync(self):
                 "transceiver_runtime": "PYTHON",
                 "max_tokens_in_buffer": 4096,
             },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.5
+            }
         }
         gen_server_config = {
             "disable_overlap_scheduler": True,
@@ -1020,6 +1045,9 @@ def test_gen_only_sync(self):
                 "transceiver_runtime": "PYTHON",
                 "max_tokens_in_buffer": 4096,
             },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.5
+            }
         }
         disaggregated_server_config = {
             "hostname": "localhost",
@@ -1056,6 +1084,14 @@ def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
             "backend": "DEFAULT",
             "max_tokens_in_buffer": 4096
         }
+                
+        ctx_server_config["kv_cache_config"] = {
+            "free_gpu_memory_fraction": 0.6,
+        }
+        gen_server_config["kv_cache_config"] = {
+            "free_gpu_memory_fraction": 0.6,
+        }
+
         if mtp_nextn > 0:
             ctx_server_config["speculative_config"] = {
                 "decoding_type": "MTP",
@@ -1065,6 +1101,7 @@ def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
                 "decoding_type": "MTP",
                 "num_nextn_predict_layers": mtp_nextn
             }
+
         disaggregated_server_config = {
             "hostname": "localhost",
             "backend": "pytorch",
@@ -1187,7 +1224,7 @@ def test_guided_decoding(self, backend: str, mtp_nextn: int, mocker):
         ctx_server_config = {
             "disable_overlap_scheduler": True,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.55,
             },
             "guided_decoding_backend": backend,
             "cache_transceiver_config": {
@@ -1198,7 +1235,7 @@ def test_guided_decoding(self, backend: str, mtp_nextn: int, mocker):
         gen_server_config = {
             "disable_overlap_scheduler": False,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.55,
             },
             "guided_decoding_backend": backend,
             "cache_transceiver_config": {
@@ -1239,18 +1276,20 @@ def test_kv_cache_v2_nixl_python(self):
             "disable_overlap_scheduler": True,
             "kv_cache_config": {
                 "enable_block_reuse": False,
-                "use_kv_cache_manager_v2": True
+                "use_kv_cache_manager_v2": True,
+                "free_gpu_memory_fraction": 0.55
             },
             "cache_transceiver_config": {
                 "backend": "NIXL",
-                "transceiver_runtime": "PYTHON"
+                "transceiver_runtime": "PYTHON",
             }
         }
         gen_server_config = {
             "disable_overlap_scheduler": True,
             "kv_cache_config": {
                 "enable_block_reuse": False,
-                "use_kv_cache_manager_v2": True
+                "use_kv_cache_manager_v2": True,
+                "free_gpu_memory_fraction": 0.55
             },
             "cache_transceiver_config": {
                 "backend": "NIXL",
@@ -1306,11 +1345,13 @@ def test_auto_dtype(self, block_reuse):
             "max_attention_window": [512, 512, 512, 512, 512, 32768],
             "enable_block_reuse": block_reuse,
             "enable_partial_reuse": block_reuse,
+            "free_gpu_memory_fraction": 0.65,
         }
         gen_server_config["kv_cache_config"] = {
             "max_attention_window": [512, 512, 512, 512, 512, 32768],
             "enable_block_reuse": block_reuse,
             "enable_partial_reuse": block_reuse,
+            "free_gpu_memory_fraction": 0.65,
         }
         disaggregated_server_config = {
             "hostname": "localhost",
@@ -1619,6 +1660,9 @@ def test_nixl_backend(self):
             "cache_transceiver_config": {
                 "backend": "NIXL",
                 "max_tokens_in_buffer": 4096
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.4
             }
         }
         gen_server_config = {
@@ -1626,6 +1670,9 @@ def test_nixl_backend(self):
             "cache_transceiver_config": {
                 "backend": "NIXL",
                 "max_tokens_in_buffer": 4096
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.4
             }
         }
         disaggregated_server_config = {
@@ -1650,6 +1697,7 @@ def test_auto_dtype(self, overlap_scheduler, enable_partial_reuse):
         kv_cache_config = {
             "enable_block_reuse": True,
             "enable_partial_reuse": enable_partial_reuse,
+            "free_gpu_memory_fraction": 0.5,
         }
         ctx_server_config = {
             "disable_overlap_scheduler": True,
@@ -1690,6 +1738,7 @@ def _test_chunked_prefill_helper(self, *, ctx_pp: int):
 
         kv_cache_config = {
             "enable_block_reuse": True,
+            "free_gpu_memory_fraction": 0.5
         }
 
         ctx_server_config = {
@@ -1712,6 +1761,7 @@ def _test_chunked_prefill_helper(self, *, ctx_pp: int):
                 "max_tokens_in_buffer": 4096
             },
             "max_batch_size": max_batch_size,
+            "kv_cache_config": kv_cache_config,
         }
         disaggregated_server_config = {
             "hostname": "localhost",
@@ -1774,7 +1824,7 @@ def test_auto_dtype_with_helix(self, comms_medium, cuda_graph_config,
             raise ValueError(f"Unknown comms_medium: {comms_medium}")
         gen_ep = gen_tp * gen_cp
         kv_cache_config = {
-            "free_gpu_memory_fraction": 0.5,
+            "free_gpu_memory_fraction": 0.35,
             "enable_block_reuse": False,
             "enable_partial_reuse": False,
             "tokens_per_block": 32,
@@ -1944,7 +1994,7 @@ def test_nvfp4(self):
             "enable_attention_dp": True,
             "trust_remote_code": True,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.65,
             },
         }
         gen_server_config = {
@@ -1958,7 +2008,7 @@ def test_nvfp4(self):
             "enable_attention_dp": True,
             "trust_remote_code": True,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.8,
+                "free_gpu_memory_fraction": 0.65,
             },
         }
         disaggregated_server_config = {
diff --git a/tests/integration/defs/accuracy/test_dwdp_disaggregated_serving.py b/tests/integration/defs/accuracy/test_dwdp_disaggregated_serving.py
index ce1644c04711..b415562dc232 100644
--- a/tests/integration/defs/accuracy/test_dwdp_disaggregated_serving.py
+++ b/tests/integration/defs/accuracy/test_dwdp_disaggregated_serving.py
@@ -204,7 +204,7 @@ def test_dwdp_accuracy(self):
             "max_batch_size": 16,
             "max_num_tokens": 8192,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.4,
+                "free_gpu_memory_fraction": 0.23,
                 "enable_block_reuse": False,
                 "enable_partial_reuse": False,
                 "tokens_per_block": 32,
@@ -236,7 +236,7 @@ def test_dwdp_accuracy(self):
             "max_batch_size": 128,
             "max_num_tokens": 1024,
             "kv_cache_config": {
-                "free_gpu_memory_fraction": 0.5,
+                "free_gpu_memory_fraction": 0.33,
                 "enable_block_reuse": False,
                 "enable_partial_reuse": False,
                 "tokens_per_block": 32,
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
index c7aca9535a20..379260b10e60 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -1735,7 +1735,7 @@ def test_bfloat16_python_scheduler(self, mtp_nextn, attention_dp,
 
     @pytest.mark.skip_less_device_memory(60000)
     def test_bfloat16_2_model_mtp(self):
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.3)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.2)
         pytorch_config = dict(
             disable_overlap_scheduler=True,
             cuda_graph_config=CudaGraphConfig(),
@@ -1796,7 +1796,7 @@ def test_bfloat16_mtp_sa_global_pool(self):
     @parametrize_with_ids("mtp_nextn", [0, 2])
     def test_bfloat16_4gpus_kv_cache_aware_routing(self, mtp_nextn):
         """Accuracy test for attention DP with KV cache-aware routing."""
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.62,
                                         enable_block_reuse=True)
         pytorch_config = dict(
             disable_overlap_scheduler=False,
@@ -1839,7 +1839,7 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, ep_size, mtp_nextn,
             pp_partition[-1] = num_hidden_layers - sum(pp_partition[:-1])
         else:
             pp_partition = None
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.40)
         torch_compile_config = _get_default_torch_compile_config(torch_compile)
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
@@ -1869,7 +1869,7 @@ def test_bfloat16_4gpus(self, tp_size, pp_size, ep_size, mtp_nextn,
     def test_bfloat16_4gpus_python_scheduler(self, tp_size, pp_size, ep_size,
                                              mtp_nextn):
         scheduler_config = SchedulerConfig(use_python_scheduler=True)
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.65)
         pytorch_config = dict(cuda_graph_config=CudaGraphConfig(), )
         mtp_config = None
         if mtp_nextn > 0:
@@ -2426,7 +2426,7 @@ def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
         if moe_backend == "CUTEDSL" and sm_version not in (100, 103):
             pytest.skip(f"{moe_backend} backend supports SM 100 and 103 only")
 
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.65)
         # Picewise Cuda Graph cannot be enabled for nvfp4 attention dp.
         torch_compile_config = _get_default_torch_compile_config(torch_compile)
         pytorch_config = dict(
@@ -2784,7 +2784,7 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
         if moe_backend == "TRTLLM" and sm_version in (120, 121):
             pytest.skip(f"{moe_backend} backend does not support SM 120 or 121")
 
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.70)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.50)
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
@@ -3078,12 +3078,12 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
         if is_sm_100f():
             moe_backend = "DEEPGEMM" if moe_backend == "_DEFAULT" else moe_backend
             moe_config = MoeConfig(backend=moe_backend, max_num_tokens=16384)
-            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
         else:
             if moe_backend != "_DEFAULT":
                 pytest.skip("Not supported MoE backend!")
             moe_config = MoeConfig()
-            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
+            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)
 
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
@@ -3198,7 +3198,7 @@ def test_skip_softmax_attention_multi_gpus(self, target_sparsity: float,
                 "prefill": thr_prefill,
                 "decode": thr_decode,
             })
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.70,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.43,
                                         enable_block_reuse=False)
 
         sm_version = get_sm_version()
@@ -3252,12 +3252,12 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
         if get_sm_version() == 100 or get_sm_version() == 103:
             moe_backend = "DEEPGEMM" if moe_backend == "_DEFAULT" else moe_backend
             moe_config = MoeConfig(backend=moe_backend, max_num_tokens=16384)
-            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
+            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.3)
         else:
             if moe_backend != "_DEFAULT":
                 pytest.skip("Not supported MoE backend!")
             moe_config = MoeConfig()
-            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7)
+            kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4)
 
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
@@ -3335,7 +3335,7 @@ def test_dsa_host_cache_offload(self, tp_size, pp_size, ep_size, mtp_nextn,
 
         # Use a small GPU fraction so that host offload is actually exercised.
         kv_cache_config = KvCacheConfig(
-            free_gpu_memory_fraction=0.4,
+            free_gpu_memory_fraction=0.28,
             host_cache_size=host_cache_size_gb * (1 << 30),
         )
 
@@ -4055,7 +4055,7 @@ class TestKimiK25(LlmapiAccuracyTestHarness):
     def test_nvfp4(self, ep_size, attention_dp):
         model_name = "moonshotai/Kimi-K2.5"
         model_path = f"{llm_models_root()}/Kimi-K2.5-NVFP4"
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
 
         with LLM(model_path,
                  tensor_parallel_size=8,
@@ -4410,12 +4410,15 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
 
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5, )
+
         with LLM(f"{llm_models_root()}/Qwen3/Qwen3-8B"
                  if is_cached else "Qwen/Qwen3-8B",
                  tensor_parallel_size=tp_size,
                  pipeline_parallel_size=pp_size,
                  moe_expert_parallel_size=ep_size,
                  **pytorch_config,
+                 kv_cache_config=kv_cache_config,
                  enable_attention_dp=attention_dp) as llm:
             task = CnnDailymail(self.MODEL_NAME)
             task.evaluate(llm)
@@ -5020,7 +5023,7 @@ def test_skip_softmax_attention_4gpus(self, target_sparsity: float,
                 "prefill": thr_prefill,
                 "decode": thr_decode,
             })
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.45,
                                         enable_block_reuse=False,
                                         dtype="fp8" if fp8kv else "auto")
 
@@ -5345,7 +5348,7 @@ def test_w4_4gpus(self, v2_kv_cache, kv_cache_reuse, kv_cache_dtype,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
             moe_config=MoeConfig(backend=moe_backend))
 
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.45,
                                         dtype=kv_cache_dtype,
                                         enable_block_reuse=kv_cache_reuse,
                                         use_kv_cache_manager_v2=v2_kv_cache)
@@ -5573,7 +5576,7 @@ def test_eagle3_4gpus(self, v2_kv_cache, moe_backend, one_model,
         # https://nvbugs/5590408: 2-Model overlap scheduling has accuracy issue
         pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                               cuda_graph_config=CudaGraphConfig())
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.25,
                                         dtype="auto",
                                         use_kv_cache_manager_v2=v2_kv_cache)
 
@@ -5920,8 +5923,7 @@ class TestQwen3NextInstruct(LlmapiAccuracyTestHarness):
     def test_bf16_4gpu(self, tp_size, pp_size, ep_size, cuda_graph,
                        overlap_scheduler, attention_dp, mocker):
         model_path = f"{self.MODEL_PATH}/Qwen3-Next-80B-A3B-Instruct"
-
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5,
                                         enable_block_reuse=False)
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
@@ -6188,7 +6190,7 @@ def test_nvfp4(self, tp_size, ep_size, cuda_graph, overlap_scheduler,
         if not os.path.exists(model_path):
             pytest.skip(f"Model directory {model_path} does not exist")
 
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9,
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7,
                                         enable_block_reuse=enable_block_reuse)
         pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
                               cuda_graph_config=CudaGraphConfig(
@@ -6226,7 +6228,7 @@ class TestSeedOss_36B(LlmapiAccuracyTestHarness):
     @pytest.mark.timeout(14400)
     @pytest.mark.skip_less_device_memory(140000)
     def test_auto_dtype(self):
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
         chat_template_kwargs = dict(thinking_budget=-1)
 
         with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
diff --git a/tests/integration/defs/disaggregated/disagg_test_utils.py b/tests/integration/defs/disaggregated/disagg_test_utils.py
index d9691a0b1127..bcd862a5b041 100644
--- a/tests/integration/defs/disaggregated/disagg_test_utils.py
+++ b/tests/integration/defs/disaggregated/disagg_test_utils.py
@@ -437,6 +437,7 @@ def disagg_server_config(disagg_cluster_config, router, disagg_port):
         "disagg_cluster": disagg_cluster_config,
         "context_servers": {"router": {"type": router}},
         "generation_servers": {"router": {"type": router}},
+        "kv_cache_config": {"free_gpu_memory_fraction": 0.4},
     }
 
 
diff --git a/tests/integration/defs/perf/test_perf_sanity.py b/tests/integration/defs/perf/test_perf_sanity.py
index 3ce76105bd0c..07f3406b927f 100644
--- a/tests/integration/defs/perf/test_perf_sanity.py
+++ b/tests/integration/defs/perf/test_perf_sanity.py
@@ -27,6 +27,7 @@
 
 import pytest
 import yaml
+from tensorrt_llm import commands
 from test_common.error_utils import report_error
 from test_common.http_utils import wait_for_endpoint_ready
 
@@ -667,7 +668,22 @@ class AggrTestCmds(NamedTuple):
 
     def get_server_logs(self, server_idx) -> List[str]:
         server_file_path = os.path.join(self.test_output_dir, f"trtllm-serve.{server_idx}.log")
-        return [server_file_path]
+        server_logs = [server_file_path]
+        # Include the SLURM-level aggregated server log (written by slurm_launch_draft.sh)
+        aggr_server_log = os.path.join(self.output_dir, "aggr_server.log")
+        server_logs.append(aggr_server_log)
+        return server_logs
+
+    def collect_logs(self) -> None:
+        """Copy SLURM-level logs from output_dir to test_output_dir for artifact collection."""
+        log_files = [os.path.join(self.output_dir, "aggr_server.log")]
+        log_files.append(os.path.join(self.output_dir, "job-output.log"))
+        log_files.extend(glob.glob(os.path.join(self.output_dir, "slurm-*.out")))
+        for src in log_files:
+            if os.path.exists(src):
+                dst = os.path.join(self.test_output_dir, os.path.basename(src))
+                shutil.copy2(src, dst)
+                print_info(f"Collected log: {src} -> {dst}")
 
     def run_cmd(self, server_idx: int) -> List[str]:
         """Run all clients for a server and return outputs."""
@@ -868,6 +884,20 @@ def get_server_logs(self, server_idx: int) -> List[str]:
         server_logs.append(os.path.join(self.output_dir, "disagg_server.log"))
         return server_logs
 
+    def collect_logs(self) -> None:
+        """Copy SLURM-level logs from output_dir to test_output_dir for artifact collection."""
+        log_files = []
+        for i in range(self.num_ctx_servers):
+            log_files.append(os.path.join(self.output_dir, f"ctx_server_{i}.log"))
+        for i in range(self.num_gen_servers):
+            log_files.append(os.path.join(self.output_dir, f"gen_server_{i}.log"))
+        log_files.append(os.path.join(self.output_dir, "disagg_server.log"))
+        for src in log_files:
+            if os.path.exists(src):
+                dst = os.path.join(self.test_output_dir, os.path.basename(src))
+                shutil.copy2(src, dst)
+                print_info(f"Collected log: {src} -> {dst}")
+
     @staticmethod
     def _wait_for_config_file(config_path: str, timeout: int = 600) -> None:
         """Wait for a config file to be written by the primary (_0) worker."""
@@ -909,9 +939,11 @@ def run_cmd(self, server_idx: int) -> List[str]:
                     f"trtllm-serve.{self.disagg_serving_type}.{server_idx}.log",
                 )
                 with open(server_file_path, "w") as server_ctx:
+                    server_env = copy.deepcopy(os.environ)
+                    server_env["TLLM_WORKER_LOG_FILE"] = server_file_path
                     server_proc = subprocess.Popen(
                         server_cmd,
-                        env=copy.deepcopy(os.environ),
+                        env=server_env,
                         stdout=server_ctx,
                         stderr=subprocess.STDOUT,
                     )
@@ -930,9 +962,11 @@ def run_cmd(self, server_idx: int) -> List[str]:
                     f"trtllm-serve.{self.disagg_serving_type}.{server_idx}.log",
                 )
                 with open(disagg_server_file_path, "w") as disagg_server_ctx:
+                    disagg_env = copy.deepcopy(os.environ)
+                    disagg_env["TLLM_WORKER_LOG_FILE"] = disagg_server_file_path
                     disagg_server_proc = subprocess.Popen(
                         disagg_cmd,
-                        env=copy.deepcopy(os.environ),
+                        env=disagg_env,
                         stdout=disagg_server_ctx,
                         stderr=subprocess.STDOUT,
                     )
@@ -1565,6 +1599,9 @@ def run_ex(self, commands) -> Dict[int, List[str]]:
                     error_msg=e,
                     log_files=commands.get_server_logs(server_idx),
                 )
+            finally:
+                # Copy SLURM-level logs to test_output_dir for artifact collection
+                commands.collect_logs()
 
         return outputs
 
diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py
index 8f502edbbce8..ea5753af3fec 100644
--- a/tests/integration/defs/stress_test/stress_test.py
+++ b/tests/integration/defs/stress_test/stress_test.py
@@ -90,8 +90,7 @@ class ServerConfig:
     ep_size: Optional[int] = 1
     max_batch_size: Optional[int] = 1024  # 2048 is default value in BuildConfig
     max_num_tokens: Optional[int] = 8192  # 8192 is default value in BuildConfig
-    kv_cache_free_gpu_memory_fraction: Optional[
-        float] = 0.9  # 0.9 is default value in BuildConfig
+    kv_cache_free_gpu_memory_fraction: Optional[float] = 0.6
     capacity_scheduler_policy: str = "GUARANTEED_NO_EVICT"
     wait_interval: int = 10  # seconds
     max_wait_seconds: int = 600  # 10 mins <- Larger model need longer model loading time
@@ -582,7 +581,7 @@ def stress_test(config,
             max_num_tokens=
             8192,  # DeepSeek-V3 or DeepSeek-R1 specific max_num_tokens
             kv_cache_free_gpu_memory_fraction=
-            0.85,  # DeepSeek-V3 or DeepSeek-R1 specific kv_cache fraction
+            0.75,  # DeepSeek-V3 or DeepSeek-R1 specific kv_cache fraction
             capacity_scheduler_policy=test_server_config.
             capacity_scheduler_policy,
             wait_interval=test_server_config.wait_interval,
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 3166ca140520..65428f9eafba 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -496,7 +496,8 @@ def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name,
                          streaming=False,
                          use_pytorch_backend=use_pytorch_backend,
                          use_mpirun=True,
-                         tp_size=2)
+                         tp_size=2,
+                         kv_cache_free_gpu_mem_fraction=0.5)
     runner()
 
 
@@ -1499,6 +1500,8 @@ def test_ptp_quickstart_advanced_bs1(llm_root, llm_venv):
         "\"NVIDIA is a great company because\"",
         "--model_dir",
         f"{llm_models_root()}/{model_path}",
+        "--kv_cache_fraction",
+        f"0.5",
     ])
 
 
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_2_nodes_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_2_nodes_grace_blackwell.yaml
index 0e194a0b1997..2fd880a1aced 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_2_nodes_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_2_nodes_grace_blackwell.yaml
@@ -29,7 +29,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.53
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -66,7 +66,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.42
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_blackwell.yaml
index df123fbc5df5..3fdbcce8198b 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_blackwell.yaml
@@ -23,7 +23,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.667
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -59,7 +59,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.62
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -90,7 +90,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.646
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -126,7 +126,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.66
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_grace_blackwell.yaml
index 621a7542a60d..c915ac573fbc 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp4_v2_grace_blackwell.yaml
@@ -29,7 +29,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.75
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -127,7 +127,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.5837
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -225,7 +225,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.66
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -287,7 +287,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.657
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp8_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp8_blackwell.yaml
index 7e83196a86d7..9d7dba787abb 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp8_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_r1_fp8_blackwell.yaml
@@ -23,7 +23,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.71
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -90,7 +90,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.7
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -157,7 +157,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.712
     client_configs:
       - name: "con64_iter10_6k1k"
         concurrency: 64
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_blackwell.yaml
index b45d899d9e41..70c639961c90 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_blackwell.yaml
@@ -23,7 +23,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.612
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -59,7 +59,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.373
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
diff --git a/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_grace_blackwell.yaml
index d8b9d1735540..a9ee30afb278 100644
--- a/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/deepseek_v32_fp4_grace_blackwell.yaml
@@ -23,7 +23,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.68
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 3
@@ -59,7 +59,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.68
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
@@ -126,7 +126,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.48
     speculative_config:
       decoding_type: 'MTP'
       num_nextn_predict_layers: 1
diff --git a/tests/scripts/perf-sanity/aggregated/dynamo_deepseek_v32_fp4_2_nodes_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/dynamo_deepseek_v32_fp4_2_nodes_grace_blackwell.yaml
index debea1681f7e..478f5588f2dd 100644
--- a/tests/scripts/perf-sanity/aggregated/dynamo_deepseek_v32_fp4_2_nodes_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/dynamo_deepseek_v32_fp4_2_nodes_grace_blackwell.yaml
@@ -35,7 +35,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.65
       tokens_per_block: 64
     cache_transceiver_config:
       backend: UCX
diff --git a/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_blackwell.yaml
index cde7a71b2a2d..e5ec9eb2bcdb 100644
--- a/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_blackwell.yaml
@@ -23,7 +23,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.85
+      free_gpu_memory_fraction: 0.808
     num_postprocess_workers: 4
     stream_interval: 20
     client_configs:
diff --git a/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_grace_blackwell.yaml
index cc5dbe906753..4ea943b9b633 100644
--- a/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/gpt_oss_120b_fp4_grace_blackwell.yaml
@@ -113,7 +113,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.769
     num_postprocess_workers: 4
     stream_interval: 20
     client_configs:
@@ -142,7 +142,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.774
     speculative_config:
       decoding_type: 'Eagle'
       eagle3_layers_to_capture: [-1]
@@ -177,7 +177,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.85
+      free_gpu_memory_fraction: 0.81
     num_postprocess_workers: 4
     stream_interval: 20
     client_configs:
diff --git a/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_blackwell.yaml
index fe1f29489771..a6b05bd97986 100644
--- a/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_blackwell.yaml
@@ -24,7 +24,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.509
     client_configs:
       - name: "con2_iter10_8k1k"
         concurrency: 2
@@ -90,7 +90,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.517
     client_configs:
       - name: "con2_iter10_32k8k"
         concurrency: 2
@@ -126,7 +126,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.54
     client_configs:
       - name: "con128_iter10_32k8k"
         concurrency: 128
diff --git a/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_grace_blackwell.yaml b/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_grace_blackwell.yaml
index 35335416f193..2d61474c29a4 100644
--- a/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_grace_blackwell.yaml
+++ b/tests/scripts/perf-sanity/aggregated/k25_thinking_fp4_grace_blackwell.yaml
@@ -24,7 +24,7 @@ server_configs:
     kv_cache_config:
       dtype: 'fp8'
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.394
     client_configs:
       - name: "con2_iter10_8k1k"
         concurrency: 2
diff --git a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 94e8e0e4b4a2..e38cca683ba9 100644
--- a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.349
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con2048_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con2048_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
index ac5c36f16665..e33f65bb86f6 100644
--- a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con2048_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_1k1k_con2048_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 256
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.73
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.57
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1536_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1536_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
index f986eeda6983..11176a4a3266 100644
--- a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1536_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1536_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 192
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.73
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.49
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 2b39be1b676d..2270115537b7 100644
--- a/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/b200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.349
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-NIXL.yaml
index 12916108ff24..08d849213cce 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.709
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.3
+      free_gpu_memory_fraction: 0.173
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 0117cf4d0b5a..a99eeb932c7e 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 2
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.536
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.3
+      free_gpu_memory_fraction: 0.21
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
index 5f9722f34ff2..86f77799de03 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
@@ -76,7 +76,7 @@ worker_config:
       - 2048
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.7
+      free_gpu_memory_fraction: 0.60
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -98,7 +98,7 @@ worker_config:
     disable_overlap_scheduler: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.85
+      free_gpu_memory_fraction: 0.72
       dtype: fp8
     cache_transceiver_config:
       max_tokens_in_buffer: 4608
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index aa31b144efbc..3bf3e7779378 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.558
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
index 20706a01c585..e1c99c884995 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
     print_iter_log: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.761
       dtype: fp8
     moe_config:
       backend: CUTLASS
@@ -84,7 +84,7 @@ worker_config:
     disable_overlap_scheduler: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.66
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index ce7d3b5be45d..33feec92fc75 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.828
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
index 3370d1faa8cb..db3b8dca4256 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.662
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -90,7 +90,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
index 43f0535d8383..f72003bec864 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.77
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.39
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 8c08fcde53d2..54f1e96503e0 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.698
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 60bec288c4a2..db7273b05e83 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.828
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
index fcfbe0616235..f4b95909015b 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.551
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -97,7 +97,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.326
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 2c3b2b19759d..f9ea0d218d52 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.825
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -93,7 +93,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.326
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
index bf9ba21f0835..b115a9e79a2e 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.68
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -94,7 +94,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.35
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 2a91f3633c0d..63629e0767bb 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -62,7 +62,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.25
+      free_gpu_memory_fraction: 0.236
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -98,7 +98,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.4
+      free_gpu_memory_fraction: 0.10
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 99002c9f2558..06d83e67471c 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -62,7 +62,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.5
+      free_gpu_memory_fraction: 0.42
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -98,7 +98,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.4
+      free_gpu_memory_fraction: 0.08
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
index 9158df38b979..82fc263960b3 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.698
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -91,7 +91,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: true
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.70
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
index e40afbe44267..c723f4bdea22 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.688
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -97,7 +97,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.51
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 0a7b3a052e37..13dd85d7c3a6 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.80
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -93,7 +93,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.40
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
index 3a3747731f78..78c78688b3a0 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 1536
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.836
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.85
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index ca773d4b268d..6a0b9c0e5bdc 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.85
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index 0765412f2e62..1831e13b4922 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1024
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.86
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index bb027c657d2c..a0af7509465c 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1024
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.86
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
index 3a1d7fdbeffd..a83528237cbf 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.848
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
index 8eb7293b9a87..d6b841ce59f1 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 64
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.694
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.77
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
index 3ee92df5b4a2..52e24b56c544 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 4
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.599
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -83,7 +83,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.408
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index cef09d7dd066..09cae2151133 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -89,7 +89,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.388
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf-sanity/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf-sanity/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
index 6a74a4fcfc36..69ef63b95658 100644
--- a/tests/scripts/perf-sanity/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf-sanity/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 64
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.87
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.52
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
index 28f8296df512..c2790abd37dd 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con128_ctx1_pp8_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.709
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.3
+      free_gpu_memory_fraction: 0.21
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 0117cf4d0b5a..a99eeb932c7e 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_128k8k_con64_ctx1_pp8_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 2
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.536
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.3
+      free_gpu_memory_fraction: 0.21
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
index 5f9722f34ff2..86f77799de03 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
@@ -76,7 +76,7 @@ worker_config:
       - 2048
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.7
+      free_gpu_memory_fraction: 0.60
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -98,7 +98,7 @@ worker_config:
     disable_overlap_scheduler: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.85
+      free_gpu_memory_fraction: 0.72
       dtype: fp8
     cache_transceiver_config:
       max_tokens_in_buffer: 4608
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index aa31b144efbc..3bf3e7779378 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.558
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
index 20706a01c585..e1c99c884995 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
     print_iter_log: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.761
       dtype: fp8
     moe_config:
       backend: CUTLASS
@@ -84,7 +84,7 @@ worker_config:
     disable_overlap_scheduler: true
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.66
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index ce7d3b5be45d..33feec92fc75 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.828
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb0_mtp3_ccb-NIXL.yaml
index 73dae2e6d765..0e1b73315533 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.667
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
index 3370d1faa8cb..db3b8dca4256 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con2048_ctx2_dep4_gen1_dep16_eplb288_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.662
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -90,7 +90,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
index 43f0535d8383..78e3ef77498b 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.50
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.31
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
index c6133efa8011..2f9cdb58691b 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 768
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.77
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.40
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 8c08fcde53d2..54f1e96503e0 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.698
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 60bec288c4a2..db7273b05e83 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.828
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -86,7 +86,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.361
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
index 1ed2d2cff294..aa6df6cb96f1 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-r1-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp1_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 256
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.721
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.362
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
index fcfbe0616235..f4b95909015b 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.551
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -97,7 +97,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.326
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 2c3b2b19759d..f9ea0d218d52 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.825
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -93,7 +93,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.326
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
index bf9ba21f0835..05d104b6f7f4 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.52
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -94,7 +94,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.300
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
index 86b2ca6ea9ba..9474fb85a080 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.70
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -94,7 +94,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.365
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 2a91f3633c0d..797cfa3f9bcb 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -62,7 +62,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.25
+      free_gpu_memory_fraction: 0.236
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -98,7 +98,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.4
+      free_gpu_memory_fraction: 0.097
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con2048_ctx1_dep4_gen1_dep32_eplb288_mtp1_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con2048_ctx1_dep4_gen1_dep32_eplb288_mtp1_ccb-UCX.yaml
index a2b8f7b1a8f4..26bd6a85c93a 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con2048_ctx1_dep4_gen1_dep32_eplb288_mtp1_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con2048_ctx1_dep4_gen1_dep32_eplb288_mtp1_ccb-UCX.yaml
@@ -62,7 +62,7 @@ worker_config:
       max_batch_size: 64
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.5
+      free_gpu_memory_fraction: 0.47
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -101,7 +101,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.4
+      free_gpu_memory_fraction: 0.36
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
index 99002c9f2558..d0410e55b617 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep4_gen1_dep32_eplb0_mtp3_ccb-NIXL.yaml
@@ -62,7 +62,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.5
+      free_gpu_memory_fraction: 0.48
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -98,7 +98,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.4
+      free_gpu_memory_fraction: 0.08
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
index 9158df38b979..82fc263960b3 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_32k4k_con256_ctx1_dep8_gen1_dep8_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 8
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.698
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -91,7 +91,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: true
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.70
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
index e40afbe44267..c723f4bdea22 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1024_ctx1_dep4_gen1_dep32_eplb256_mtp3_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 32
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.75
+      free_gpu_memory_fraction: 0.688
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -97,7 +97,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.51
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index 0a7b3a052e37..1cdf912230f7 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.80
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -93,7 +93,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.38
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX.yaml
index 2c4a93be46a0..8bb304f3c01e 100644
--- a/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_deepseek-v32-fp4_8k1k_con4096_ctx1_dep4_gen1_dep32_eplb256_mtp0_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.67
       dtype: fp8
       tokens_per_block: 64
     moe_config:
@@ -94,7 +94,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.464
       dtype: fp8
       tokens_per_block: 64
     moe_config:
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con2048_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con2048_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX.yaml
index c739b0f68751..2df89d75934a 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con2048_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con2048_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 1536
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.836
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.85
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
index 3a3747731f78..78c78688b3a0 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 1536
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.836
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.85
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index ca773d4b268d..6a0b9c0e5bdc 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.85
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con1024_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con1024_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX.yaml
index ec990076ef08..0f77fbd0965c 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con1024_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con1024_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX.yaml
@@ -68,7 +68,7 @@ worker_config:
       max_batch_size: 1280
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.85
+      free_gpu_memory_fraction: 0.815
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -94,7 +94,7 @@ worker_config:
       max_batch_size: 30
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.779
       dtype: fp8
     moe_config:
       backend: TRTLLM
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index 0765412f2e62..1831e13b4922 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1024
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.86
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
index bb027c657d2c..a0af7509465c 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 1024
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.86
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
index 3a1d7fdbeffd..a83528237cbf 100644
--- a/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_gpt-oss-120b-fp4_8k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-NIXL.yaml
@@ -63,7 +63,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.848
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.846
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
index 8eb7293b9a87..6dcbe19a38fd 100644
--- a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con2048_ctx1_dep4_gen1_dep32_eplb0_mtp0_ccb-NIXL.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 64
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.694
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.5
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
index 1ea511a1e7a3..eaff1b8b36ba 100644
--- a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4096_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 512
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.708
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.408
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
index 3ee92df5b4a2..52e24b56c544 100644
--- a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 4
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.599
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -83,7 +83,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.408
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml
index d84d38d742b7..80af732fac5e 100644
--- a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 256
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.735
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -85,7 +85,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.405
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
index cef09d7dd066..09cae2151133 100644
--- a/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_kimi-k25-thinking-fp4_8k1k_con4_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-NIXL.yaml
@@ -89,7 +89,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.388
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con1024_ctx1_tp1_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml b/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con1024_ctx1_tp1_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
index a97acb893057..8b2da2bee0ca 100644
--- a/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con1024_ctx1_tp1_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con1024_ctx1_tp1_gen1_dep8_eplb0_mtp0_ccb-UCX.yaml
@@ -60,7 +60,7 @@ worker_config:
       max_batch_size: 128
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.841
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -83,7 +83,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.486
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
index 6a74a4fcfc36..69ef63b95658 100644
--- a/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb200_qwen3-235b-fp4_8k1k_con64_ctx1_tp1_gen1_tep4_eplb0_mtp0_ccb-NIXL.yaml
@@ -59,7 +59,7 @@ worker_config:
       max_batch_size: 64
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.9
+      free_gpu_memory_fraction: 0.87
       dtype: fp8
     moe_config:
       backend: TRTLLM
@@ -81,7 +81,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.6
+      free_gpu_memory_fraction: 0.52
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/scripts/perf/disaggregated/gb300_deepseek-r1-fp4_128k8k_con256_ctx1_pp4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml b/tests/scripts/perf/disaggregated/gb300_deepseek-r1-fp4_128k8k_con256_ctx1_pp4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
index 686fb268738a..3e80a62e5124 100644
--- a/tests/scripts/perf/disaggregated/gb300_deepseek-r1-fp4_128k8k_con256_ctx1_pp4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
+++ b/tests/scripts/perf/disaggregated/gb300_deepseek-r1-fp4_128k8k_con256_ctx1_pp4_gen1_dep8_eplb0_mtp1_ccb-NIXL.yaml
@@ -61,7 +61,7 @@ worker_config:
       max_batch_size: 16
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.8
+      free_gpu_memory_fraction: 0.75
       dtype: fp8
     moe_config:
       backend: CUTEDSL
@@ -87,7 +87,7 @@ worker_config:
     cuda_graph_config: null
     kv_cache_config:
       enable_block_reuse: false
-      free_gpu_memory_fraction: 0.3
+      free_gpu_memory_fraction: 0.27
       dtype: fp8
     moe_config:
       backend: CUTEDSL
diff --git a/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py b/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py
index e753ee63679f..96e26438ad41 100644
--- a/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py
+++ b/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py
@@ -54,7 +54,9 @@ def create_nemotron_h_llm(model_folder,
         cuda_graph_config=CudaGraphConfig() if use_cuda_graph else None,
         disable_overlap_scheduler=disable_overlap_scheduler,
         kv_cache_config=KvCacheConfig(
-            mamba_ssm_cache_dtype=mamba_ssm_cache_dtype)
+            enable_block_reuse=False,
+            mamba_ssm_cache_dtype=mamba_ssm_cache_dtype,
+            free_gpu_memory_fraction=0.5)
         if mamba_ssm_cache_dtype is not None else KvCacheConfig(),
         enable_chunked_prefill=enable_chunked_prefill,
         **kwargs,
diff --git a/tests/unittest/_torch/modeling/test_modeling_nemotron_nano_v2_vl.py b/tests/unittest/_torch/modeling/test_modeling_nemotron_nano_v2_vl.py
index 8b2060a25705..6d437612d369 100644
--- a/tests/unittest/_torch/modeling/test_modeling_nemotron_nano_v2_vl.py
+++ b/tests/unittest/_torch/modeling/test_modeling_nemotron_nano_v2_vl.py
@@ -73,7 +73,9 @@ def nano_llm_model():
         tensor_parallel_size=1,
         max_batch_size=2,
         cuda_graph_config=CudaGraphConfig(),
-        kv_cache_config=KvCacheConfig(enable_block_reuse=False, mamba_ssm_cache_dtype="float32"),
+        kv_cache_config=KvCacheConfig(
+            enable_block_reuse=False, mamba_ssm_cache_dtype="float32", free_gpu_memory_fraction=0.4
+        ),
     )
     yield nano_llm
 
diff --git a/tests/unittest/_torch/speculative/test_eagle3.py b/tests/unittest/_torch/speculative/test_eagle3.py
index f06da5cc4e21..073cc6674b41 100644
--- a/tests/unittest/_torch/speculative/test_eagle3.py
+++ b/tests/unittest/_torch/speculative/test_eagle3.py
@@ -368,11 +368,13 @@ def test_llama_eagle3_long_prompt(use_cuda_graph):
     else:
         cuda_graph_config = None
 
+    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4)
     llm_spec = LLM(model=target_model_dir,
                    speculative_config=spec_config,
                    max_batch_size=1,
                    cuda_graph_config=cuda_graph_config,
-                   disable_overlap_scheduler=True)
+                   disable_overlap_scheduler=True,
+                   kv_cache_config=kv_cache_config)
 
     prompt = [", ".join(str(i) for i in range(1000))]
 
diff --git a/tests/unittest/llmapi/apps/_test_openai_lora.py b/tests/unittest/llmapi/apps/_test_openai_lora.py
index 8e624122428c..457457862c5a 100644
--- a/tests/unittest/llmapi/apps/_test_openai_lora.py
+++ b/tests/unittest/llmapi/apps/_test_openai_lora.py
@@ -42,7 +42,10 @@ def temp_extra_llm_api_options_file():
             },
             # Disable CUDA graph
             # TODO: remove this once we have a proper fix for CUDA graph in LoRA
-            "cuda_graph_config": None
+            "cuda_graph_config": None,
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.5
+            },
         }
 
         with open(temp_file_path, 'w') as f:
diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py
index e94c30662b1a..37c9172ae9cd 100644
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py
@@ -28,7 +28,10 @@ def temp_extra_llm_api_options_file():
                 "max_lora_rank": 8,
                 "max_loras": 4,
                 "max_cpu_loras": 4,
-            }
+            },
+            "kv_cache_config": {
+                "free_gpu_memory_fraction": 0.5
+            },
         }
 
         with open(temp_file_path, 'w') as f: