From 595423d6c69e701c690880fe592126bc904e2a61 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Tue, 3 Feb 2026 17:14:03 +0000 Subject: [PATCH 01/60] Add benchmark capabilities for ops. --- benchmark/ops/all_gather_matmul/benchmark.py | 376 ++++++++++++++++ benchmark/ops/matmul_all_gather/benchmark.py | 367 +++++++++++++++ benchmark/ops/matmul_all_reduce/benchmark.py | 378 ++++++++++++++++ .../ops/matmul_reduce_scatter/benchmark.py | 421 ++++++++++++++++++ iris/ops/__init__.py | 12 +- 5 files changed, 1547 insertions(+), 7 deletions(-) create mode 100644 benchmark/ops/all_gather_matmul/benchmark.py create mode 100644 benchmark/ops/matmul_all_gather/benchmark.py create mode 100644 benchmark/ops/matmul_all_reduce/benchmark.py create mode 100644 benchmark/ops/matmul_reduce_scatter/benchmark.py diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py new file mode 100644 index 000000000..3bc45579e --- /dev/null +++ b/benchmark/ops/all_gather_matmul/benchmark.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for iris.ops all_gather_matmul fused operation. + +This benchmark showcases the fused All-Gather + GEMM operation where each rank +has a sharded A matrix that gets gathered, then multiplied with B. +""" + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import random +import argparse + +from examples.common.utils import JSONWriter + +import iris +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark all_gather_matmul fused operation.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") + parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") + parser.add_argument("-k", type=int, default=131072, help="Common dimension total (K)") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") + parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") + parser.add_argument( + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Datatype of tensors", + ) + parser.add_argument( + "--output_file", + type=str, + default="all_gather_matmul.json", + help="Output file", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") + parser.add_argument( + "--benchmark_pytorch", + action="store_true", + help="Also benchmark PyTorch (all_gather_into_tensor + matmul) for comparison", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") + parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" + ) + + return vars(parser.parse_args()) + + +def _worker(local_rank: int, world_size: int, init_url: str, args: dict): + """Worker function for PyTorch distributed execution.""" + backend = "nccl" if torch.cuda.is_available() else "gloo" + dist.init_process_group( + backend=backend, + init_method=init_url, + world_size=world_size, + rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}"), + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + # Datatype mapping + datatype = torch.float32 + if args["datatype"] == "fp16": + datatype = torch.float16 + elif args["datatype"] == "fp32": + datatype = torch.float32 + elif args["datatype"] == "bf16": + datatype = torch.bfloat16 + else: + print("Unknown datatype.") + exit(1) + + M = args["m"] + N = args["n"] + K = args["k"] + K_local = K // world_size # Sharded K dimension + + # Create config with parameters + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + + config = FusedConfig(**config_kwargs) + + json_writer = JSONWriter(args["output_file"]) + json_writer.add_field("world_size", world_size) + json_writer.add_field("operation", "all_gather_matmul") + json_writer.add_field("k_local", K_local) + json_writer.add_field("k_total", K) + + for key, value in args.items(): + json_writer.add_field(key, value) + + # Export actual config values to JSON (including defaults) + json_writer.add_field("block_size_m", config.block_size_m) + json_writer.add_field("block_size_n", config.block_size_n) + json_writer.add_field("block_size_k", config.block_size_k) + json_writer.add_field("group_size_m", config.group_size_m) + json_writer.add_field("num_sms", config.num_sms) + json_writer.add_field("num_xcds", config.num_xcds) + + # Create input and output tensors + # A_sharded is M x K_local, B is K x N, output is M x N + A_sharded = shmem.zeros((M, K_local), dtype=datatype) + B = shmem.zeros((K, N), dtype=datatype) + C = shmem.zeros((M, N), dtype=datatype) + expected_tensor = None + + # Fill inputs with deterministic values + # Each rank has different A_sharded, same B + torch.manual_seed(123 + rank) + A_sharded_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") + A_sharded.copy_(A_sharded_data) + + torch.manual_seed(456) # Same B for all ranks + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + B.copy_(B_data) + + # For validation: compute expected result + if args["validate"]: + # Gather all A_sharded matrices and compute expected result + A_sharded_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_sharded_list, A_sharded_data) + + # Concatenate along K dimension: A_gathered = [A_0 | A_1 | ... | A_n] + A_gathered = torch.cat(A_sharded_list, dim=1) # (M, K) + + # Expected: A_gathered @ B + expected_tensor = shmem.zeros((M, N), dtype=datatype) + expected_result = torch.matmul(A_gathered, B_data) + expected_tensor.copy_(expected_result) + + comm_stream = torch.cuda.Stream() + + kernel_timing = { + "all_gather_matmul": { + "start_event": torch.cuda.Event(enable_timing=True), + "end_event": torch.cuda.Event(enable_timing=True), + "ms": 0, + "experiments": 0, + }, + } + + workspace = None + + def run_experiment(): + nonlocal kernel_timing, workspace + + # Preamble if available + if hasattr(shmem.ops, "all_gather_matmul_preamble"): + workspace = shmem.ops.all_gather_matmul_preamble( + C, + A_sharded, + B, + config=config, + workspace=workspace, + ) + + shmem.barrier() + + torch.cuda.nvtx.range_push("All-Gather-Matmul") + with torch.cuda.stream(comm_stream): + kernel_timing["all_gather_matmul"]["start_event"].record() + shmem.ops.all_gather_matmul( + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + ) + kernel_timing["all_gather_matmul"]["end_event"].record() + kernel_timing["all_gather_matmul"]["experiments"] += 1 + torch.cuda.nvtx.range_pop() + + # Synchronize before querying event timing + shmem.barrier() + + # Update timing + ms = kernel_timing["all_gather_matmul"]["start_event"].elapsed_time( + kernel_timing["all_gather_matmul"]["end_event"] + ) + kernel_timing["all_gather_matmul"]["ms"] += ms + + # Synchronize across all GPUs + shmem.barrier() + + if args["validate"]: + shmem.info("Validating...") + + # Reset output before validation + C.zero_() + shmem.barrier() + + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 1e-1 if datatype == torch.float16 else 1e-3 + success = torch.allclose(C, expected_tensor, atol=atol) + if not success: + max_diff = torch.abs(C - expected_tensor).max().item() + shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") + + if success: + shmem.info("All-gather-matmul validation passed!") + else: + shmem.error("All-gather-matmul validation failed!") + + json_writer.add_field("success", success) + + # Wait for all to finish validation + shmem.barrier() + + if args["benchmark"]: + # Warmup for benchmarking + for k in ["all_gather_matmul"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) + + for k in ["all_gather_matmul"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + # Reset output before benchmarking + C.zero_() + shmem.barrier() + + shmem.info("Benchmarking...") + + # Calculate TFLOPS: 2*M*N*K flops + total_flops = 2 * M * N * K + total_tflops_unit = total_flops * 1e-12 + + triton_ms = iris.do_bench(run_experiment, shmem.barrier) + tflops = total_tflops_unit / ( + (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 + ) + + # Calculate bandwidth for all-gather part + # All-gather moves (world_size - 1) * M * K_local * element_size bytes + element_size = torch.tensor([], dtype=datatype).element_size() + input_bytes = M * K_local * element_size + total_bytes = input_bytes * (world_size - 1) + total_bytes_gb = total_bytes / (1024**3) + + bandwidth_gbps = total_bytes_gb / ( + (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 + ) + + shmem.info( + f"All-gather-matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " + f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" + ) + + json_writer.add_field("tflops", tflops) + json_writer.add_field("bandwidth_gbps", bandwidth_gbps) + json_writer.add_field("total_ms", triton_ms) + json_writer.add_field("total_flops", total_flops) + json_writer.add_field("total_bytes", total_bytes) + json_writer.add_field("total_bytes_gb", total_bytes_gb) + json_writer.add_field( + "all_gather_matmul_ms", + kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"], + ) + json_writer.add_field("all_gather_matmul_experiments", kernel_timing["all_gather_matmul"]["experiments"]) + + # Wait for all to finish benchmarking + shmem.barrier() + + # Benchmark PyTorch (all_gather_into_tensor + matmul) for comparison + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") + + # Create PyTorch tensors (not on Iris heap) + pytorch_A_sharded = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") + pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_A_gathered = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") + pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") + + # Warmup + for _ in range(10): + dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) + pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) + torch.cuda.synchronize() + dist.barrier() + + # Benchmark + dist.barrier() + + def run_pytorch_experiment(): + dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) + pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) + + pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) + + # Calculate TFLOPS and bandwidth + pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) + pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) + + shmem.info( + f"PyTorch all_gather_into_tensor+matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " + f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" + ) + + if args["benchmark"]: + # Calculate performance ratio + iris_tflops = tflops + speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 + shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") + + json_writer.add_field("pytorch_tflops", pytorch_tflops) + json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) + json_writer.add_field("pytorch_ms", pytorch_ms) + json_writer.add_field("iris_speedup", speedup) + + # Wait for all to finish PyTorch benchmarking + shmem.barrier() + + if rank == 0: + json_writer.flush() + json_writer.display() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + args = parse_args() + num_ranks = args["num_ranks"] + init_url = args["init_url"] + + mp.spawn( + fn=_worker, + args=(num_ranks, init_url, args), + nprocs=num_ranks, + join=True, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmark/ops/matmul_all_gather/benchmark.py b/benchmark/ops/matmul_all_gather/benchmark.py new file mode 100644 index 000000000..22c914e8d --- /dev/null +++ b/benchmark/ops/matmul_all_gather/benchmark.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for iris.ops matmul_all_gather fused operation. + +This benchmark showcases the fused GEMM + All-Gather operation where each rank +computes a local matmul and then gathers results along M dimension. +""" + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import random +import argparse + +from examples.common.utils import JSONWriter + +import iris +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark matmul_all_gather fused operation.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=16384, help="Number of rows per rank in matrix A (M_local)") + parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") + parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") + parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") + parser.add_argument( + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Datatype of tensors", + ) + parser.add_argument( + "--output_file", + type=str, + default="matmul_all_gather.json", + help="Output file", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") + parser.add_argument( + "--benchmark_pytorch", + action="store_true", + help="Also benchmark PyTorch (matmul + all_gather_into_tensor) for comparison", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") + parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--init_url", type=str, default="tcp://127.0.0.1:29529", help="Initialization URL for distributed setup" + ) + + return vars(parser.parse_args()) + + +def _worker(local_rank: int, world_size: int, init_url: str, args: dict): + """Worker function for PyTorch distributed execution.""" + backend = "nccl" if torch.cuda.is_available() else "gloo" + dist.init_process_group( + backend=backend, + init_method=init_url, + world_size=world_size, + rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}"), + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + # Datatype mapping + datatype = torch.float32 + if args["datatype"] == "fp16": + datatype = torch.float16 + elif args["datatype"] == "fp32": + datatype = torch.float32 + elif args["datatype"] == "bf16": + datatype = torch.bfloat16 + else: + print("Unknown datatype.") + exit(1) + + M_local = args["m"] # Local M dimension + M = M_local * world_size # Total M after gather + N = args["n"] + K = args["k"] + + # Create config with parameters + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + + config = FusedConfig(**config_kwargs) + + json_writer = JSONWriter(args["output_file"]) + json_writer.add_field("world_size", world_size) + json_writer.add_field("operation", "matmul_all_gather") + json_writer.add_field("m_local", M_local) + json_writer.add_field("m_total", M) + + for key, value in args.items(): + json_writer.add_field(key, value) + + # Export actual config values to JSON (including defaults) + json_writer.add_field("block_size_m", config.block_size_m) + json_writer.add_field("block_size_n", config.block_size_n) + json_writer.add_field("block_size_k", config.block_size_k) + json_writer.add_field("group_size_m", config.group_size_m) + json_writer.add_field("num_sms", config.num_sms) + json_writer.add_field("num_xcds", config.num_xcds) + + # Create input and output tensors + # A_local is M_local x K, output is M x N (gathered) + A_local = shmem.zeros((M_local, K), dtype=datatype) + B = shmem.zeros((K, N), dtype=datatype) + C = shmem.zeros((M, N), dtype=datatype) + expected_tensor = None + + # Fill inputs with deterministic values + # Each rank has different A_local, same B + torch.manual_seed(123 + rank) + A_local_data = torch.randn((M_local, K), dtype=datatype, device=f"cuda:{rank}") + A_local.copy_(A_local_data) + + torch.manual_seed(456) # Same B for all ranks + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + B.copy_(B_data) + + # For validation: compute expected result + if args["validate"]: + # Gather all A_local matrices and compute expected result + A_local_list = [torch.zeros((M_local, K), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_local_list, A_local_data) + + # Expected: [A_0 @ B; A_1 @ B; ...; A_n @ B] stacked along M + expected_tensor = shmem.zeros((M, N), dtype=datatype) + expected_parts = [] + for i, A_rank_local in enumerate(A_local_list): + C_rank_local = torch.matmul(A_rank_local, B_data) + expected_parts.append(C_rank_local) + expected_result = torch.cat(expected_parts, dim=0) + expected_tensor.copy_(expected_result) + + comm_stream = torch.cuda.Stream() + + kernel_timing = { + "matmul_all_gather": { + "start_event": torch.cuda.Event(enable_timing=True), + "end_event": torch.cuda.Event(enable_timing=True), + "ms": 0, + "experiments": 0, + }, + } + + workspace = None + + def run_experiment(): + nonlocal kernel_timing, workspace + + shmem.barrier() + + torch.cuda.nvtx.range_push("Matmul-All-Gather") + with torch.cuda.stream(comm_stream): + kernel_timing["matmul_all_gather"]["start_event"].record() + shmem.ops.matmul_all_gather( + C, + A_local, + B, + config=config, + async_op=False, + workspace=workspace, + ) + kernel_timing["matmul_all_gather"]["end_event"].record() + kernel_timing["matmul_all_gather"]["experiments"] += 1 + torch.cuda.nvtx.range_pop() + + # Synchronize before querying event timing + shmem.barrier() + + # Update timing + ms = kernel_timing["matmul_all_gather"]["start_event"].elapsed_time( + kernel_timing["matmul_all_gather"]["end_event"] + ) + kernel_timing["matmul_all_gather"]["ms"] += ms + + # Synchronize across all GPUs + shmem.barrier() + + if args["validate"]: + shmem.info("Validating...") + + # Reset output before validation + C.zero_() + shmem.barrier() + + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 1e-1 if datatype == torch.float16 else 1e-3 + success = torch.allclose(C, expected_tensor, atol=atol) + if not success: + max_diff = torch.abs(C - expected_tensor).max().item() + shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") + + if success: + shmem.info("Matmul-all-gather validation passed!") + else: + shmem.error("Matmul-all-gather validation failed!") + + json_writer.add_field("success", success) + + # Wait for all to finish validation + shmem.barrier() + + if args["benchmark"]: + # Warmup for benchmarking + for k in ["matmul_all_gather"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) + + for k in ["matmul_all_gather"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + # Reset output before benchmarking + C.zero_() + shmem.barrier() + + shmem.info("Benchmarking...") + + # Calculate TFLOPS: 2*M_local*N*K flops per rank (but total is same across all ranks) + total_flops = 2 * M_local * N * K + total_tflops_unit = total_flops * 1e-12 + + triton_ms = iris.do_bench(run_experiment, shmem.barrier) + tflops = total_tflops_unit / ( + (kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"]) * 1e-3 + ) + + # Calculate bandwidth for all-gather part + # All-gather moves (world_size - 1) * M_local * N * element_size bytes + element_size = torch.tensor([], dtype=datatype).element_size() + output_bytes = M_local * N * element_size + total_bytes = output_bytes * (world_size - 1) + total_bytes_gb = total_bytes / (1024**3) + + bandwidth_gbps = total_bytes_gb / ( + (kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"]) * 1e-3 + ) + + shmem.info( + f"Matmul-all-gather (M_local={M_local}, M_total={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " + f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" + ) + + json_writer.add_field("tflops", tflops) + json_writer.add_field("bandwidth_gbps", bandwidth_gbps) + json_writer.add_field("total_ms", triton_ms) + json_writer.add_field("total_flops", total_flops) + json_writer.add_field("total_bytes", total_bytes) + json_writer.add_field("total_bytes_gb", total_bytes_gb) + json_writer.add_field( + "matmul_all_gather_ms", + kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"], + ) + json_writer.add_field("matmul_all_gather_experiments", kernel_timing["matmul_all_gather"]["experiments"]) + + # Wait for all to finish benchmarking + shmem.barrier() + + # Benchmark PyTorch (matmul + all_gather_into_tensor) for comparison + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (matmul + all_gather_into_tensor)...") + + # Create PyTorch tensors (not on Iris heap) + pytorch_A_local = torch.randn(M_local, K, dtype=datatype, device=f"cuda:{rank}") + pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_C_local = torch.zeros(M_local, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") + + # Warmup + for _ in range(10): + pytorch_C_local = torch.matmul(pytorch_A_local, pytorch_B) + dist.all_gather_into_tensor(pytorch_C, pytorch_C_local) + torch.cuda.synchronize() + dist.barrier() + + # Benchmark + dist.barrier() + + def run_pytorch_experiment(): + pytorch_C_local = torch.matmul(pytorch_A_local, pytorch_B) + dist.all_gather_into_tensor(pytorch_C, pytorch_C_local) + + pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) + + # Calculate TFLOPS and bandwidth + pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) + pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) + + shmem.info( + f"PyTorch matmul+all_gather_into_tensor (M_local={M_local}, M_total={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " + f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" + ) + + if args["benchmark"]: + # Calculate performance ratio + iris_tflops = tflops + speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 + shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") + + json_writer.add_field("pytorch_tflops", pytorch_tflops) + json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) + json_writer.add_field("pytorch_ms", pytorch_ms) + json_writer.add_field("iris_speedup", speedup) + + # Wait for all to finish PyTorch benchmarking + shmem.barrier() + + if rank == 0: + json_writer.flush() + json_writer.display() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + args = parse_args() + num_ranks = args["num_ranks"] + init_url = args["init_url"] + + mp.spawn( + fn=_worker, + args=(num_ranks, init_url, args), + nprocs=num_ranks, + join=True, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmark/ops/matmul_all_reduce/benchmark.py b/benchmark/ops/matmul_all_reduce/benchmark.py new file mode 100644 index 000000000..fd923e051 --- /dev/null +++ b/benchmark/ops/matmul_all_reduce/benchmark.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for iris.ops matmul_all_reduce fused operation. + +This benchmark showcases the fused GEMM + All-Reduce operation and reports +achieved TFLOPS and communication bandwidth. +""" + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import random +import argparse + +from examples.common.utils import JSONWriter + +import iris +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark matmul_all_reduce fused operation.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") + parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") + parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") + parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") + parser.add_argument( + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Datatype of tensors", + ) + parser.add_argument( + "--output_file", + type=str, + default="matmul_all_reduce.json", + help="Output file", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") + parser.add_argument( + "--benchmark_pytorch", + action="store_true", + help="Also benchmark PyTorch (matmul + all_reduce) for comparison", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") + parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--all_reduce_variant", + type=str, + default="two_shot", + choices=["atomic", "ring", "two_shot", "one_shot", "spinlock"], + help="All-reduce variant to use", + ) + parser.add_argument( + "--init_url", type=str, default="tcp://127.0.0.1:29528", help="Initialization URL for distributed setup" + ) + + return vars(parser.parse_args()) + + +def _worker(local_rank: int, world_size: int, init_url: str, args: dict): + """Worker function for PyTorch distributed execution.""" + backend = "nccl" if torch.cuda.is_available() else "gloo" + dist.init_process_group( + backend=backend, + init_method=init_url, + world_size=world_size, + rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}"), + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + # Datatype mapping + datatype = torch.float32 + if args["datatype"] == "fp16": + datatype = torch.float16 + elif args["datatype"] == "fp32": + datatype = torch.float32 + elif args["datatype"] == "bf16": + datatype = torch.bfloat16 + else: + print("Unknown datatype.") + exit(1) + + M = args["m"] + N = args["n"] + K = args["k"] + + # Create config with parameters + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + "all_reduce_variant": args["all_reduce_variant"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + + config = FusedConfig(**config_kwargs) + + json_writer = JSONWriter(args["output_file"]) + json_writer.add_field("world_size", world_size) + json_writer.add_field("operation", "matmul_all_reduce") + + for key, value in args.items(): + json_writer.add_field(key, value) + + # Export actual config values to JSON (including defaults) + json_writer.add_field("block_size_m", config.block_size_m) + json_writer.add_field("block_size_n", config.block_size_n) + json_writer.add_field("block_size_k", config.block_size_k) + json_writer.add_field("group_size_m", config.group_size_m) + json_writer.add_field("num_sms", config.num_sms) + json_writer.add_field("num_xcds", config.num_xcds) + json_writer.add_field("all_reduce_variant", config.all_reduce_variant) + + # Create input and output tensors + # Must use shmem.zeros() to allocate on Iris symmetric heap + A = shmem.zeros((M, K), dtype=datatype) + B = shmem.zeros((K, N), dtype=datatype) + C = shmem.zeros((M, N), dtype=datatype) + expected_tensor = None + + # Fill inputs with deterministic values + # Each rank has different A, same B + torch.manual_seed(123 + rank) + A_local_data = torch.randn((M, K), dtype=datatype, device=f"cuda:{rank}") + A.copy_(A_local_data) + + torch.manual_seed(456) # Same B for all ranks + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + B.copy_(B_data) + + # For validation: compute expected result + # Reference: each rank computes local C = A @ B, then all_reduce + if args["validate"]: + expected_tensor = shmem.zeros((M, N), dtype=datatype) + C_local_ref = torch.matmul(A_local_data, B_data) + pytorch_output = C_local_ref.clone() + shmem.barrier() + dist.all_reduce(pytorch_output, op=dist.ReduceOp.SUM) + torch.cuda.synchronize() + expected_tensor.copy_(pytorch_output) + + comm_stream = torch.cuda.Stream() + + kernel_timing = { + "matmul_all_reduce": { + "start_event": torch.cuda.Event(enable_timing=True), + "end_event": torch.cuda.Event(enable_timing=True), + "ms": 0, + "experiments": 0, + }, + } + + workspace = None + + def run_experiment(): + nonlocal kernel_timing, workspace + + # Preamble if available + if hasattr(shmem.ops, "matmul_all_reduce_preamble"): + workspace = shmem.ops.matmul_all_reduce_preamble( + C, + A, + B, + config=config, + workspace=workspace, + ) + + shmem.barrier() + + torch.cuda.nvtx.range_push("Matmul-All-Reduce") + with torch.cuda.stream(comm_stream): + kernel_timing["matmul_all_reduce"]["start_event"].record() + shmem.ops.matmul_all_reduce( + C, + A, + B, + config=config, + async_op=False, + workspace=workspace, + ) + kernel_timing["matmul_all_reduce"]["end_event"].record() + kernel_timing["matmul_all_reduce"]["experiments"] += 1 + torch.cuda.nvtx.range_pop() + + # Synchronize before querying event timing + shmem.barrier() + + # Update timing + ms = kernel_timing["matmul_all_reduce"]["start_event"].elapsed_time( + kernel_timing["matmul_all_reduce"]["end_event"] + ) + kernel_timing["matmul_all_reduce"]["ms"] += ms + + # Synchronize across all GPUs + shmem.barrier() + + if args["validate"]: + shmem.info("Validating...") + + # Reset output before validation + C.zero_() + shmem.barrier() + + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 0.2 if datatype == torch.float16 else 0.3 + success = torch.allclose(C, expected_tensor, atol=atol) + if not success: + max_diff = torch.abs(C - expected_tensor).max().item() + shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") + + if success: + shmem.info("Matmul-all-reduce validation passed!") + else: + shmem.error("Matmul-all-reduce validation failed!") + + json_writer.add_field("success", success) + + # Wait for all to finish validation + shmem.barrier() + + if args["benchmark"]: + # Warmup for benchmarking + for k in ["matmul_all_reduce"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) + + for k in ["matmul_all_reduce"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + # Reset output before benchmarking + C.zero_() + shmem.barrier() + + shmem.info("Benchmarking...") + + # Calculate TFLOPS: 2*M*N*K flops + total_flops = 2 * M * N * K + total_tflops_unit = total_flops * 1e-12 + + triton_ms = iris.do_bench(run_experiment, shmem.barrier) + tflops = total_tflops_unit / ( + (kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"]) * 1e-3 + ) + + # Calculate bandwidth for all-reduce part + # All-reduce moves 2 * (world_size - 1) / world_size * data_size bytes + element_size = torch.tensor([], dtype=datatype).element_size() + output_bytes = M * N * element_size + total_bytes = output_bytes * (2 * (world_size - 1)) / world_size + total_bytes_gb = total_bytes / (1024**3) + + bandwidth_gbps = total_bytes_gb / ( + (kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"]) * 1e-3 + ) + + shmem.info( + f"Matmul-all-reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}, variant={args['all_reduce_variant']}): " + f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" + ) + + json_writer.add_field("tflops", tflops) + json_writer.add_field("bandwidth_gbps", bandwidth_gbps) + json_writer.add_field("total_ms", triton_ms) + json_writer.add_field("total_flops", total_flops) + json_writer.add_field("total_bytes", total_bytes) + json_writer.add_field("total_bytes_gb", total_bytes_gb) + json_writer.add_field( + "matmul_all_reduce_ms", + kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"], + ) + json_writer.add_field("matmul_all_reduce_experiments", kernel_timing["matmul_all_reduce"]["experiments"]) + + # Wait for all to finish benchmarking + shmem.barrier() + + # Benchmark PyTorch (matmul + all_reduce) for comparison + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (matmul + all_reduce)...") + + # Create PyTorch tensors (not on Iris heap) + pytorch_A = torch.randn(M, K, dtype=datatype, device=f"cuda:{rank}") + pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") + + # Warmup + for _ in range(10): + pytorch_C = torch.matmul(pytorch_A, pytorch_B) + dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) + torch.cuda.synchronize() + dist.barrier() + + # Benchmark + dist.barrier() + + def run_pytorch_experiment(): + pytorch_C = torch.matmul(pytorch_A, pytorch_B) + dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) + + pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) + + # Calculate TFLOPS and bandwidth + pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) + pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) + + shmem.info( + f"PyTorch matmul+all_reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " + f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" + ) + + if args["benchmark"]: + # Calculate performance ratio + iris_tflops = tflops + speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 + shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") + + json_writer.add_field("pytorch_tflops", pytorch_tflops) + json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) + json_writer.add_field("pytorch_ms", pytorch_ms) + json_writer.add_field("iris_speedup", speedup) + + # Wait for all to finish PyTorch benchmarking + shmem.barrier() + + if rank == 0: + json_writer.flush() + json_writer.display() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + args = parse_args() + num_ranks = args["num_ranks"] + init_url = args["init_url"] + + mp.spawn( + fn=_worker, + args=(num_ranks, init_url, args), + nprocs=num_ranks, + join=True, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmark/ops/matmul_reduce_scatter/benchmark.py b/benchmark/ops/matmul_reduce_scatter/benchmark.py new file mode 100644 index 000000000..301444f25 --- /dev/null +++ b/benchmark/ops/matmul_reduce_scatter/benchmark.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for iris.ops matmul_reduce_scatter fused operation. + +This benchmark showcases the fused GEMM + Reduce-Scatter operation where each rank +computes a local matmul, reduces across all ranks, and scatters tiles to ranks. +""" + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import random +import argparse + +from examples.common.utils import JSONWriter + +import iris +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark matmul_reduce_scatter fused operation.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") + parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") + parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") + parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") + parser.add_argument( + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Datatype of tensors", + ) + parser.add_argument( + "--output_file", + type=str, + default="matmul_reduce_scatter.json", + help="Output file", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") + parser.add_argument( + "--benchmark_pytorch", + action="store_true", + help="Also benchmark PyTorch (matmul + all_reduce) for comparison", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") + parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--init_url", type=str, default="tcp://127.0.0.1:29531", help="Initialization URL for distributed setup" + ) + + return vars(parser.parse_args()) + + +def _worker(local_rank: int, world_size: int, init_url: str, args: dict): + """Worker function for PyTorch distributed execution.""" + backend = "nccl" if torch.cuda.is_available() else "gloo" + dist.init_process_group( + backend=backend, + init_method=init_url, + world_size=world_size, + rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}"), + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + # Datatype mapping + datatype = torch.float32 + if args["datatype"] == "fp16": + datatype = torch.float16 + elif args["datatype"] == "fp32": + datatype = torch.float32 + elif args["datatype"] == "bf16": + datatype = torch.bfloat16 + else: + print("Unknown datatype.") + exit(1) + + M = args["m"] + N = args["n"] + K = args["k"] + + # Create config with parameters + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + + config = FusedConfig(**config_kwargs) + + json_writer = JSONWriter(args["output_file"]) + json_writer.add_field("world_size", world_size) + json_writer.add_field("operation", "matmul_reduce_scatter") + + for key, value in args.items(): + json_writer.add_field(key, value) + + # Export actual config values to JSON (including defaults) + json_writer.add_field("block_size_m", config.block_size_m) + json_writer.add_field("block_size_n", config.block_size_n) + json_writer.add_field("block_size_k", config.block_size_k) + json_writer.add_field("group_size_m", config.group_size_m) + json_writer.add_field("num_sms", config.num_sms) + json_writer.add_field("num_xcds", config.num_xcds) + + # Calculate tile distribution + num_pid_m = (M + config.block_size_m - 1) // config.block_size_m + num_pid_n = (N + config.block_size_n - 1) // config.block_size_n + total_tiles = num_pid_m * num_pid_n + tiles_per_rank = total_tiles // world_size + start_tile = rank * tiles_per_rank + if rank == world_size - 1: + tiles_per_rank = total_tiles - start_tile + + json_writer.add_field("total_tiles", total_tiles) + json_writer.add_field("tiles_per_rank", tiles_per_rank) + + # Create input and output tensors + # Each rank computes full A @ B, but only keeps its assigned tiles + A = shmem.zeros((M, K), dtype=datatype) + B = shmem.zeros((K, N), dtype=datatype) + C = shmem.zeros((M, N), dtype=datatype) + expected_tiles = [] + + # Fill inputs with deterministic values + # Each rank has different A, same B + torch.manual_seed(123 + rank) + A_local_data = torch.randn((M, K), dtype=datatype, device=f"cuda:{rank}") + A.copy_(A_local_data) + + torch.manual_seed(456) # Same B for all ranks + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + B.copy_(B_data) + + # For validation: compute expected result for this rank's tiles + if args["validate"]: + # Gather all A matrices to compute expected result + A_list = [torch.zeros((M, K), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_list, A_local_data) + + # Expected: sum of all (A_i @ B) for each rank i, but only for this rank's tiles + expected_full = torch.zeros((M, N), dtype=datatype, device=f"cuda:{rank}") + for A_rank in A_list: + expected_full += torch.matmul(A_rank, B_data) + + # Extract only this rank's tiles + for local_tile_idx in range(tiles_per_rank): + tile_id = start_tile + local_tile_idx + pid_m = tile_id // num_pid_n + pid_n = tile_id % num_pid_n + + m_start = pid_m * config.block_size_m + m_end = min(m_start + config.block_size_m, M) + n_start = pid_n * config.block_size_n + n_end = min(n_start + config.block_size_n, N) + + expected_tiles.append( + { + "tile_id": tile_id, + "pid_m": pid_m, + "pid_n": pid_n, + "m_start": m_start, + "m_end": m_end, + "n_start": n_start, + "n_end": n_end, + "data": expected_full[m_start:m_end, n_start:n_end].clone(), + } + ) + + comm_stream = torch.cuda.Stream() + + kernel_timing = { + "matmul_reduce_scatter": { + "start_event": torch.cuda.Event(enable_timing=True), + "end_event": torch.cuda.Event(enable_timing=True), + "ms": 0, + "experiments": 0, + }, + } + + workspace = None + + def run_experiment(): + nonlocal kernel_timing, workspace + + # Preamble if available + if hasattr(shmem.ops, "matmul_reduce_scatter_preamble"): + workspace = shmem.ops.matmul_reduce_scatter_preamble( + C, + A, + B, + config=config, + workspace=workspace, + ) + + shmem.barrier() + + torch.cuda.nvtx.range_push("Matmul-Reduce-Scatter") + with torch.cuda.stream(comm_stream): + kernel_timing["matmul_reduce_scatter"]["start_event"].record() + shmem.ops.matmul_reduce_scatter( + C, + A, + B, + async_op=False, + config=config, + workspace=workspace, + ) + kernel_timing["matmul_reduce_scatter"]["end_event"].record() + kernel_timing["matmul_reduce_scatter"]["experiments"] += 1 + torch.cuda.nvtx.range_pop() + + # Synchronize before querying event timing + shmem.barrier() + + # Update timing + ms = kernel_timing["matmul_reduce_scatter"]["start_event"].elapsed_time( + kernel_timing["matmul_reduce_scatter"]["end_event"] + ) + kernel_timing["matmul_reduce_scatter"]["ms"] += ms + + # Synchronize across all GPUs + shmem.barrier() + + if args["validate"]: + shmem.info("Validating...") + + # Reset output before validation + C.zero_() + shmem.barrier() + + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 2e-1 if datatype == torch.float16 else 1e-1 + success = True + + # Validate each tile assigned to this rank + for tile_info in expected_tiles: + C_tile = C[tile_info["m_start"] : tile_info["m_end"], tile_info["n_start"] : tile_info["n_end"]] + expected_tile = tile_info["data"] + + tile_match = torch.allclose(C_tile, expected_tile, atol=atol) + if not tile_match: + max_diff = torch.abs(C_tile - expected_tile).max().item() + shmem.error( + f"Rank {rank}, tile {tile_info['tile_id']} ({tile_info['pid_m']},{tile_info['pid_n']}): " + f"Validation failed, max diff: {max_diff}" + ) + success = False + + if success: + shmem.info("Matmul-reduce-scatter validation passed!") + else: + shmem.error("Matmul-reduce-scatter validation failed!") + + json_writer.add_field("success", success) + + # Wait for all to finish validation + shmem.barrier() + + if args["benchmark"]: + # Warmup for benchmarking + for k in ["matmul_reduce_scatter"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) + + for k in ["matmul_reduce_scatter"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + # Reset output before benchmarking + C.zero_() + shmem.barrier() + + shmem.info("Benchmarking...") + + # Calculate TFLOPS: 2*M*N*K flops + total_flops = 2 * M * N * K + total_tflops_unit = total_flops * 1e-12 + + triton_ms = iris.do_bench(run_experiment, shmem.barrier) + tflops = total_tflops_unit / ( + (kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"]) + * 1e-3 + ) + + # Calculate bandwidth for reduce-scatter part + # Similar to all-reduce: 2 * (world_size - 1) / world_size * data_size bytes + element_size = torch.tensor([], dtype=datatype).element_size() + output_bytes = M * N * element_size + total_bytes = output_bytes * (2 * (world_size - 1)) / world_size + total_bytes_gb = total_bytes / (1024**3) + + bandwidth_gbps = total_bytes_gb / ( + (kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"]) + * 1e-3 + ) + + shmem.info( + f"Matmul-reduce-scatter (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " + f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" + ) + + json_writer.add_field("tflops", tflops) + json_writer.add_field("bandwidth_gbps", bandwidth_gbps) + json_writer.add_field("total_ms", triton_ms) + json_writer.add_field("total_flops", total_flops) + json_writer.add_field("total_bytes", total_bytes) + json_writer.add_field("total_bytes_gb", total_bytes_gb) + json_writer.add_field( + "matmul_reduce_scatter_ms", + kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"], + ) + json_writer.add_field( + "matmul_reduce_scatter_experiments", kernel_timing["matmul_reduce_scatter"]["experiments"] + ) + + # Wait for all to finish benchmarking + shmem.barrier() + + # Benchmark PyTorch (matmul + all_reduce) for comparison + # Note: We use all_reduce since PyTorch's reduce_scatter has different semantics + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (matmul + all_reduce)...") + + # Create PyTorch tensors (not on Iris heap) + pytorch_A = torch.randn(M, K, dtype=datatype, device=f"cuda:{rank}") + pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") + + # Warmup + for _ in range(10): + pytorch_C = torch.matmul(pytorch_A, pytorch_B) + dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) + torch.cuda.synchronize() + dist.barrier() + + # Benchmark + dist.barrier() + + def run_pytorch_experiment(): + pytorch_C = torch.matmul(pytorch_A, pytorch_B) + dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) + + pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) + + # Calculate TFLOPS and bandwidth + pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) + pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) + + shmem.info( + f"PyTorch matmul+all_reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " + f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" + ) + + if args["benchmark"]: + # Calculate performance ratio + iris_tflops = tflops + speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 + shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") + + json_writer.add_field("pytorch_tflops", pytorch_tflops) + json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) + json_writer.add_field("pytorch_ms", pytorch_ms) + json_writer.add_field("iris_speedup", speedup) + + # Wait for all to finish PyTorch benchmarking + shmem.barrier() + + if rank == 0: + json_writer.flush() + json_writer.display() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + args = parse_args() + num_ranks = args["num_ranks"] + init_url = args["init_url"] + + mp.spawn( + fn=_worker, + args=(num_ranks, init_url, args), + nprocs=num_ranks, + join=True, + ) + + +if __name__ == "__main__": + main() diff --git a/iris/ops/__init__.py b/iris/ops/__init__.py index e0d12ba51..a6ed4a659 100644 --- a/iris/ops/__init__.py +++ b/iris/ops/__init__.py @@ -141,17 +141,16 @@ def matmul_all_gather(self, output_tensor, A, B, bias=None, async_op=False, conf """ return matmul_all_gather(self._shmem, output_tensor, A, B, bias, async_op, config, workspace) - def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, config=None, workspace=None): + def matmul_reduce_scatter(self, output_tensor, A, B, async_op=False, config=None, workspace=None): """ Fused matrix multiplication and reduce-scatter. - Computes: output = reduce_scatter(A @ B + bias) along N dimension + Computes: output = reduce_scatter(A @ B) where each rank keeps assigned tiles Args: - output_tensor: Output tensor (M, N_local) where N_local = N / world_size + output_tensor: Output tensor (M, N) - will contain reduced tiles for this rank A: Input matrix A (M, K) B: Input matrix B (K, N) - bias: Optional bias vector (M,) or (N,) async_op: If False, performs barrier at end config: Optional FusedConfig for tuning workspace: Optional pre-allocated workspace @@ -160,11 +159,10 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, workspace: Updated workspace object Example: - >>> N_local = N // world_size - >>> output = shmem.zeros((M, N_local), dtype=torch.float16) + >>> output = shmem.zeros((M, N), dtype=torch.float16) >>> shmem.ops.matmul_reduce_scatter(output, A, B) """ - return matmul_reduce_scatter(self._shmem, output_tensor, A, B, bias, async_op, config, workspace) + return matmul_reduce_scatter(self._shmem, output_tensor, A, B, async_op, config, workspace) # Export public API From ef227b08acacc7534f96349e3845064db09589ea Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sat, 7 Feb 2026 19:14:58 +0000 Subject: [PATCH 02/60] Merge conflicts. --- benchmark/ops/all_gather_matmul/benchmark.py | 8 + iris/iris.py | 15 +- iris/iris.py.backup | 2255 ++++++++++++++++++ iris/ops/all_gather_matmul.py.with_chunked | 521 ++++ iris/ops/config.py | 26 +- iris/ops/workspace.py | 4 + iris/x/gather.py | 2 +- tests/ops/test_all_gather_matmul.py | 21 +- 8 files changed, 2831 insertions(+), 21 deletions(-) create mode 100644 iris/iris.py.backup create mode 100644 iris/ops/all_gather_matmul.py.with_chunked diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py index 3bc45579e..20ff0c536 100644 --- a/benchmark/ops/all_gather_matmul/benchmark.py +++ b/benchmark/ops/all_gather_matmul/benchmark.py @@ -61,6 +61,13 @@ def parse_args(): parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--variant", + type=str, + default="pull", + choices=["pull", "chunked"], + help="All-gather matmul variant (pull or chunked)", + ) parser.add_argument( "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" ) @@ -106,6 +113,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): "block_size_n": args["block_size_n"], "block_size_k": args["block_size_k"], "group_size_m": args["group_size_m"], + "all_gather_matmul_variant": args["variant"], } if args["comm_sms"] is not None: config_kwargs["num_sms"] = args["comm_sms"] diff --git a/iris/iris.py b/iris/iris.py index 5032a640e..9b8a3d35a 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1793,17 +1793,12 @@ def __translate(ptr, from_rank, to_rank, heap_bases): # Cast to_base back to pointer type translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) - # Optimization to vectorize the load/store - # We can't do this in general because we don't know the shape of the tensor or block sizes - # ptr = tl.max_contiguous(tl.multiple_of(ptr, (16, 16)), (16, 32)) + # Vectorization hints: must be <= minimum block size used by any caller. + # (32, 32) is safe since all supported block sizes are multiples of 32. + # Largest vectorized load instruction is dwordx4 (128-bits = 8 x fp16). + translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) + translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) - # 0 You can use this if your block sizes are multiples of 32. - # Largest vectorized load instruction is dwordx4 (128-bits) - # translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) - # translated_ptr = tl.max_contiguous(translated_ptr, (1, 32)) - - # ptr = tl.max_contiguous(tl.multiple_of(ptr, 512), 512) - # translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, 512), 512) return translated_ptr diff --git a/iris/iris.py.backup b/iris/iris.py.backup new file mode 100644 index 000000000..e8932c3c8 --- /dev/null +++ b/iris/iris.py.backup @@ -0,0 +1,2255 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Iris: Multi-GPU Communication and Memory Management Framework + +Iris is a high-performance framework that enables seamless multi-GPU programming in Triton, +enabling fine-grained communication and compute overlap natively in Triton +across multiple GPUs with SHMEM-like Remote Memory Access (RMA) capabilities. + +Key Features: +- Symmetric heap management across multiple GPUs +- High-performance atomic operations (add, cas, xchg, xor, and, or, min, max) +- Efficient load/store operations with rank-to-rank communication +- Memory allocation and deallocation utilities +- Built-in logging with rank information +- PyTorch distributed integration for distributed computing + +Example: + >>> import iris + >>> ctx = iris.iris(heap_size=2**30) # 1GB heap + >>> tensor = ctx.zeros(1024, 1024, dtype=torch.float32) +""" + +import triton +import triton.language as tl + +from iris._distributed_helpers import ( + init_distributed, + distributed_barrier, + distributed_broadcast_scalar, + distributed_broadcast_tensor, +) +from iris.hip import ( + set_device, + get_cu_count, + count_devices, +) +from iris.symmetric_heap import SymmetricHeap +import numpy as np +import math +import torch +import logging + +# Import logging functionality from the separate logging module +from .logging import logger + + +class Iris: + """ + Main Iris class for multi-GPU communication and memory management. + + This class provides a unified interface for distributed GPU operations including + memory allocation, atomic operations, and inter-rank communication. + + Args: + heap_size (int): Size of the symmetric heap in bytes. Default: 1GB (2^30) + + Example: + >>> ctx = iris.iris(heap_size=2**31) # 2GB heap + >>> print(f"Rank {ctx.cur_rank} of {ctx.num_ranks}") # Rank 0 of 1 + >>> tensor = ctx.zeros(1000, 1000, dtype=torch.float32) + """ + + def __init__(self, heap_size=1 << 30): + # Initialize distributed environment + comm, cur_rank, num_ranks = init_distributed() + num_gpus = count_devices() + + gpu_id = cur_rank % num_gpus + set_device(gpu_id) + + self.comm = comm + self.num_ranks = num_ranks + self.cur_rank = cur_rank + self.gpu_id = gpu_id + self.heap_size = heap_size + + # Initialize symmetric heap + self.heap = SymmetricHeap(heap_size, gpu_id, cur_rank, num_ranks) + self.device = f"cuda:{gpu_id}" + self.heap_bases = self.heap.get_heap_bases() + + for i in range(num_ranks): + self.debug(f"GPU {i}: Heap base {hex(int(self.heap_bases[i].item()))}") + + distributed_barrier() + + # Initialize CCL interface + self.ccl = self.CCL(self) + + # Lazy initialization for ops interface + self._ops = None + + def _log_with_rank(self, level, message): + """Helper method to log with rank information injected into the record.""" + if logger.isEnabledFor(level): + record = logging.LogRecord( + name=logger.name, level=level, pathname="", lineno=0, msg=message, args=(), exc_info=None + ) + # Inject rank information into the record + record.iris_rank = self.cur_rank + record.iris_num_ranks = self.num_ranks + logger.handle(record) + + def debug(self, message): + """ + Log a debug message with rank information. + + Args: + message (str): Human-readable message to log at debug level. + + Notes: + The log record is enriched with ``iris_rank`` and ``iris_num_ranks`` so + formatters can display the originating rank and world size. + + Example: + >>> ctx = iris.iris() + >>> iris.set_logger_level(iris.DEBUG) + >>> ctx.debug("Allocating buffers") # [Iris] [0/1] Allocating buffers + """ + self._log_with_rank(logging.DEBUG, message) + + def info(self, message): + """ + Log an info message with rank information. + + Args: + message (str): Human-readable message to log at info level. + + Example: + >>> ctx = iris.iris() + >>> ctx.info("Starting iteration 0") # [Iris] [0/1] Starting iteration 0 + """ + self._log_with_rank(logging.INFO, message) + + def warning(self, message): + """ + Log a warning message with rank information. + + Args: + message (str): Human-readable message to log at warning level. + + Example: + >>> ctx = iris.iris() + >>> ctx.warning("Memory usage is high") # [Iris] [0/1] Memory usage is high + """ + self._log_with_rank(logging.WARNING, message) + + def error(self, message): + """ + Log an error message with rank information. + + Args: + message (str): Human-readable message to log at error level. + + Example: + >>> ctx = iris.iris() + >>> ctx.error("Failed to allocate memory") # [Iris] [0/1] Failed to allocate memory + """ + self._log_with_rank(logging.ERROR, message) + + @property + def ops(self): + """ + Access fused GEMM+CCL operations. + + This property provides a namespace for high-level fused operations that combine + matrix multiplication with collective communication. Operations automatically infer + dimensions, strides, and hardware parameters from input tensors. + + Available operations: + - matmul_all_reduce: GEMM + All-Reduce + - all_gather_matmul: All-Gather + GEMM + - matmul_all_gather: GEMM + All-Gather + - matmul_reduce_scatter: GEMM + Reduce-Scatter + + Returns: + OpsNamespace: Namespace with fused operation methods + + Raises: + ImportError: If tritonBLAS is not available + + Example: + >>> ctx = iris.iris() + >>> A = ctx.randn((1024, 512), dtype=torch.float16) + >>> B = ctx.randn((512, 2048), dtype=torch.float16) + >>> output = ctx.zeros((1024, 2048), dtype=torch.float16) + >>> ctx.ops.matmul_all_reduce(output, A, B, ctx) + """ + if self._ops is None: + from iris.ops import OpsNamespace + + self._ops = OpsNamespace(self) + return self._ops + + def broadcast(self, value, source_rank=0): + """ + Broadcast a value from one rank to all ranks. + + This method automatically detects the type of value and uses the appropriate + broadcast mechanism: + - For tensors and arrays: uses efficient PyTorch distributed tensor collectives + - For scalars and other objects: uses object broadcast + + Args: + value (Any): The value to broadcast. Can be a scalar, tensor, numpy array, + or any picklable object. Only the ``source_rank`` value is used; + other ranks should pass a placeholder (e.g., ``None``). + source_rank (int): Rank id that holds the authoritative value. + + Returns: + Any: The value broadcast to all ranks. Tensors and arrays are returned as + numpy arrays; scalars and objects are returned in their original type. + + Examples: + >>> ctx = iris.iris() + >>> # Broadcasting a scalar + >>> value = 42 if ctx.cur_rank == 0 else None + >>> value = ctx.broadcast(value, source_rank=0) # All ranks get 42 + >>> + >>> # Broadcasting a tensor + >>> if ctx.cur_rank == 0: + >>> data = torch.randn(10, 10) + >>> else: + >>> data = None + >>> data = ctx.broadcast(data, source_rank=0) # All ranks get the same array + """ + # Check if the value on source_rank is a tensor or array-like + if self.cur_rank == source_rank and value is not None: + # Explicitly exclude strings and non-numeric types + if isinstance(value, (str, dict, bool)): + is_tensor = False + elif isinstance(value, torch.Tensor): + is_tensor = True + elif isinstance(value, np.ndarray): + is_tensor = True + elif isinstance(value, (list, tuple)): + # Try to convert list/tuple to tensor to check if it's numeric + try: + torch.as_tensor(value) + is_tensor = True + except (TypeError, ValueError): + is_tensor = False + else: + # For other types, try to convert and check + try: + test_array = np.asarray(value) + # Check if it's a numeric dtype that torch can handle + if np.issubdtype(test_array.dtype, np.number): + torch.as_tensor(test_array) + is_tensor = True + else: + is_tensor = False + except (TypeError, ValueError): + is_tensor = False + else: + is_tensor = False + + # Broadcast the type decision to all ranks + is_tensor = distributed_broadcast_scalar(is_tensor, source_rank) + + if is_tensor: + return distributed_broadcast_tensor(value, root=source_rank) + else: + return distributed_broadcast_scalar(value, source_rank) + + def __allocate(self, num_elements, dtype): + """Allocate memory using the symmetric heap.""" + self.debug(f"allocate: num_elements = {num_elements}, dtype = {dtype}") + return self.heap.allocate(num_elements, dtype) + + def __parse_size(self, size): + # Handle nested tuples/lists by flattening them recursively + while len(size) == 1 and isinstance(size[0], (tuple, list)): + size = size[0] + num_elements = math.prod(size) + return size, num_elements + + def zeros_like( + self, input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format + ): + """ + Returns a tensor filled with the scalar value 0, with the same size as input, allocated on the Iris symmetric heap. + + Args: + input (Tensor): the size of input will determine size of the output tensor. + + Keyword Arguments: + dtype (torch.dtype, optional): the desired data type of returned Tensor. + Default: if None, defaults to the dtype of input. + layout (torch.layout, optional): the desired layout of returned tensor. + Default: if None, defaults to the layout of input. Note: Iris tensors are always contiguous (strided). + device (torch.device, optional): the desired device of returned tensor. + Default: if None, defaults to the device of input. Must be compatible with this Iris instance. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + memory_format (torch.memory_format, optional): the desired memory format of returned Tensor. + Default: torch.preserve_format. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> input_tensor = ctx.ones(2, 3) + >>> zeros_tensor = ctx.zeros_like(input_tensor) + >>> print(zeros_tensor.shape) # torch.Size([2, 3]) + """ + self.debug( + f"zeros_like: input_shape = {input.shape}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" + ) + + # Use input's properties as defaults if not specified + if dtype is None: + dtype = input.dtype + if layout is None: + layout = input.layout + if device is None: + device = input.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Get the size from input tensor + size = input.size() + num_elements = input.numel() + + # Allocate new tensor with the same size + new_tensor = self.__allocate(num_elements, dtype) + new_tensor.zero_() + + # Reshape to match input size + new_tensor = new_tensor.reshape(size) + + # Apply the requested memory format + new_tensor = self.__apply_memory_format(new_tensor, size, memory_format, input) + + # Apply the requested layout + new_tensor = self.__apply_layout(new_tensor, layout) + + # Set requires_grad if specified + if requires_grad: + new_tensor.requires_grad_() + + return new_tensor + + def arange( + self, start=0, end=None, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False + ): + """ + Returns a 1-D tensor of size ⌈(end - start) / step⌉ with values from the interval [start, end) + taken with common difference step beginning from start. The tensor is allocated on the symmetric heap. + + Note: When using floating-point dtypes (especially reduced precision types like bfloat16), + the results may be affected by floating-point rounding behavior. Some values in the sequence + might not be exactly representable in certain floating-point formats, which can lead to + repeated values or unexpected rounding. For precise sequences, it is recommended to use + integer dtypes instead of floating-point dtypes. + + Note that non-integer step is subject to floating point rounding errors when comparing + against end; to avoid inconsistency, we advise subtracting a small epsilon from end in such cases. + + Args: + start (Number, optional): the starting value for the set of points. Default: 0. + end (Number): the ending value for the set of points + step (Number, optional): the gap between each pair of adjacent points. Default: 1. + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.get_default_dtype()). + If dtype is not given, infer the data type from the other input arguments. + If any of start, end, or step are floating-point, the dtype is inferred + be the default dtype, see get_default_dtype(). Otherwise, the dtype is inferred + to be torch.int64. + layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. + Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.arange(0, 10, 2) # [0, 2, 4, 6, 8] + >>> print(tensor.shape) # torch.Size([5]) + """ + self.debug(f"arange: start = {start}, end = {end}, step = {step}, dtype = {dtype}, device = {device}") + + # Handle the case where only one argument is provided (end) + if end is None: + end = start + start = 0 + + # Validate inputs + if step == 0: + raise ValueError("step must be non-zero") + + # Validate step direction consistency + if step > 0 and start >= end: + raise ValueError(f"Invalid range: start >= end with positive step (start={start}, end={end}, step={step})") + elif step < 0 and start <= end: + raise ValueError(f"Invalid range: start <= end with negative step (start={start}, end={end}, step={step})") + + # Calculate the number of elements + num_elements = math.ceil((end - start) / step) + + # Infer dtype if not provided + if dtype is None: + if any(isinstance(x, float) for x in [start, end, step]): + dtype = torch.get_default_dtype() + else: + dtype = torch.int64 + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + tensor = out + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + + target_device = tensor.device + arange_tensor = torch.arange(start, end, step, dtype=dtype, device=target_device) + + tensor[:] = arange_tensor + + tensor = self.__apply_layout(tensor, layout) + + if requires_grad: + tensor.requires_grad_() + + return tensor + + def zeros(self, *size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): + """ + Returns a tensor filled with the scalar value 0, with the shape defined by the variable argument size. + The tensor is allocated on the Iris symmetric heap. + + Args: + *size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + + Keyword Arguments: + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.zeros(2, 3) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([0., 0., 0.], device='cuda:0') + """ + self.debug(f"zeros: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") + + # Use global default dtype if None is provided + if dtype is None: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Fill with zeros + out.zero_() + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Fill with zeros + tensor.zero_() + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def randn( + self, + *size, + generator=None, + out=None, + dtype=None, + layout=torch.strided, + device=None, + requires_grad=False, + pin_memory=False, + ): + """ + Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 + (also called the standard normal distribution). The tensor is allocated on the Iris symmetric heap. + + .. math:: + \\text{out}_i \\sim \\mathcal{N}(0, 1) + + For complex dtypes, the tensor is i.i.d. sampled from a complex normal distribution with zero mean + and unit variance as + + .. math:: + \\text{out}_i \\sim \\mathcal{CN}(0, 1) + + This is equivalent to separately sampling the real :math:`(\\text{Re})` and imaginary :math:`(\\text{Im})` + part of :math:`\\text{out}_i` as + + .. math:: + \\text{Re}(\\text{out}_i) \\sim \\mathcal{N}(0, \\frac{1}{2}), \\quad \\text{Im}(\\text{out}_i) \\sim \\mathcal{N}(0, \\frac{1}{2}) + + The shape of the tensor is defined by the variable argument size. + + Args: + *size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + + Keyword Arguments: + generator (torch.Generator, optional): a pseudorandom number generator for sampling + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type (see torch.set_default_device()). + device will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. + Works only for CPU tensors. Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.randn(2, 3) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([ 0.3982, -0.0059, -0.4365], device='cuda:0') + """ + self.debug( + f"randn: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" + ) + + # Use global default dtype if None is provided + if dtype is None: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Generate random data and copy to out tensor + random_data = torch.randn(num_elements, generator=generator, dtype=dtype, device=device, layout=layout) + out.copy_(random_data) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Generate random data and copy to tensor + random_data = torch.randn(num_elements, generator=generator, dtype=dtype, device=device, layout=layout) + tensor.copy_(random_data) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def ones(self, *size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): + """ + Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size. + The tensor is allocated on the Iris symmetric heap. + + Args: + *size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + + Keyword Arguments: + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.ones(2, 3) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([1., 1., 1.], device='cuda:0') + """ + self.debug(f"ones: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") + + # Use global default dtype if None is provided + if dtype is None: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Fill with ones + out.fill_(1) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Fill with ones + tensor.fill_(1) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def full(self, size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): + """ + Creates a tensor of size size filled with fill_value. The tensor's dtype is inferred from fill_value. + The tensor is allocated on the Iris symmetric heap. + + Args: + size (int...): a list, tuple, or torch.Size of integers defining the shape of the output tensor. + fill_value (Scalar): the value to fill the output tensor with. + + Keyword Arguments: + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.full((2, 3), 3.14) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([3.1400, 3.1400, 3.1400], device='cuda:0') + """ + self.debug( + f"full: size = {size}, fill_value = {fill_value}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" + ) + + # Infer dtype from fill_value if not provided + if dtype is None: + if isinstance(fill_value, (int, float)): + if isinstance(fill_value, float): + dtype = torch.get_default_dtype() + else: + dtype = torch.int64 + else: + # For other types (like tensors), use their dtype + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Fill with the specified value + out.fill_(fill_value) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Fill with the specified value + tensor.fill_(fill_value) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def uniform(self, size, low=0.0, high=1.0, dtype=torch.float): + """ + Returns a tensor filled with random numbers from a uniform distribution, allocated on the Iris symmetric heap. + + Args: + size (int or tuple of ints): the size of the output tensor. + low (float, optional): the lower bound of the uniform distribution. Default: 0.0. + high (float, optional): the upper bound of the uniform distribution. Default: 1.0. + dtype (torch.dtype, optional): the desired data type of returned tensor. Default: torch.float. + + Returns: + Tensor: A tensor filled with random numbers from a uniform distribution. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.uniform((2, 3), low=0.0, high=1.0) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([0.1234, 0.5678, 0.9012], device='cuda:0') + """ + self.debug(f"uniform: size = {size}, low = {low}, high = {high}, dtype = {dtype}") + size, num_elements = self.__parse_size(size) + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + tensor.uniform_(low, high) + return tensor.reshape(size) + + def empty( + self, + *size, + out=None, + dtype=None, + layout=torch.strided, + device=None, + requires_grad=False, + pin_memory=False, + memory_format=torch.contiguous_format, + ): + """ + Returns a tensor filled with uninitialized data. The shape of the tensor is defined by the variable argument size. + The tensor is allocated on the Iris symmetric heap. + + Note: + If torch.use_deterministic_algorithms() and torch.utils.deterministic.fill_uninitialized_memory are both set to True, + the output tensor is initialized to prevent any possible nondeterministic behavior from using the data as an input to an operation. + Floating point and complex tensors are filled with NaN, and integer tensors are filled with the maximum value. + + Args: + *size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + + Keyword Arguments: + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. + Works only for CPU tensors. Default: False. Note: Iris tensors are always on GPU. + memory_format (torch.memory_format, optional): the desired memory format of returned Tensor. + Default: torch.contiguous_format. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.empty(2, 3) + >>> print(tensor.shape) # torch.Size([2, 3]) + """ + self.debug( + f"empty: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" + ) + + # Use global default dtype if None is provided + if dtype is None: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Apply the requested memory format + tensor = self.__apply_memory_format(tensor, size, memory_format) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def randint( + self, *args, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False + ): + """ + Returns a tensor filled with random integers generated uniformly between low (inclusive) and high (exclusive). + The shape of the tensor is defined by the variable argument size. + The tensor is allocated on the Iris symmetric heap. + + Note: + With the global dtype default (torch.float32), this function returns a tensor with dtype torch.int64. + + Args: + low (int, optional): Lowest integer to be drawn from the distribution. Default: 0. + high (int): One above the highest integer to be drawn from the distribution. + size (tuple): a tuple defining the shape of the output tensor. + + Keyword Arguments: + generator (torch.Generator, optional): a pseudorandom number generator for sampling. + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): if None, this function returns a tensor with dtype torch.int64. + layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. + device (torch.device, optional): the desired device of returned tensor. Default: if None, uses the current device. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.randint(0, 10, (2, 3)) # Random integers [0, 10) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([7, 2, 9], device='cuda:0') + """ + self.debug(f"randint: args = {args}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") + + # Parse arguments to determine low, high, and size + # PyTorch randint signatures: + # randint(high, size) - where high is the upper bound and size is the shape + # randint(low, high, size) - where low and high are bounds, size is the shape + if len(args) == 2: + # randint(high, size) + high, size = args + low = 0 + elif len(args) == 3: + # randint(low, high, size) + low, high, size = args + else: + raise ValueError(f"randint expects 2 or 3 positional arguments, got {len(args)}") + + # Use default dtype if None is provided + if dtype is None: + dtype = torch.int64 + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Generate random integers using PyTorch's randint + # Use specified device or fall back to current device + target_device = device if device is not None else self.device + + # Handle generator parameter + if generator is not None: + torch.randint(low, high, size, generator=generator, out=tensor, dtype=dtype, device=target_device) + else: + torch.randint(low, high, size, out=tensor, dtype=dtype, device=target_device) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def linspace(self, start, end, steps, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): + """ + Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to end, inclusive. + The tensor is allocated on the Iris symmetric heap. + + The values are: + (start, start + (end-start)/(steps-1), ..., start + (steps-2)*(end-start)/(steps-1), end) + + Args: + start (float or Tensor): the starting value for the set of points. If Tensor, it must be 0-dimensional. + end (float or Tensor): the ending value for the set of points. If Tensor, it must be 0-dimensional. + steps (int): size of the constructed tensor. + + Keyword Arguments: + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the data type to perform the computation in. + Default: if None, uses the global default dtype when both start and end are real, + and corresponding complex dtype when either is complex. + layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. + device (torch.device, optional): the desired device of returned tensor. Default: if None, uses the current device. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.linspace(0, 10, 5) # [0, 2.5, 5, 7.5, 10] + >>> print(tensor) # tensor([ 0.0000, 2.5000, 5.0000, 7.5000, 10.0000], device='cuda:0') + """ + self.debug( + f"linspace: start = {start}, end = {end}, steps = {steps}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" + ) + + # Use global default dtype if None is provided + if dtype is None: + # Check if start or end are complex numbers + start_is_complex = isinstance(start, complex) or (hasattr(start, "dtype") and torch.is_complex(start)) + end_is_complex = isinstance(end, complex) or (hasattr(end, "dtype") and torch.is_complex(end)) + + if start_is_complex or end_is_complex: + # Infer complex dtype based on default dtype + dtype = torch.complex64 if torch.get_default_dtype() == torch.float32 else torch.complex128 + else: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse steps and extract the integer value + if isinstance(steps, (tuple, list)): + if len(steps) == 1: + # Single-element tuple/list like (5,) or [5] + steps_int = steps[0] + # Handle nested tuples like ((5,),) + if isinstance(steps_int, (tuple, list)): + steps_int = steps_int[0] + else: + # Multi-element tuple/list - use __parse_size for compatibility + size, num_elements = self.__parse_size(steps) + steps_int = num_elements + else: + # steps is a single integer + steps_int = steps + + # Ensure steps_int is an integer + steps_int = int(steps_int) + size = (steps_int,) + num_elements = steps_int + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Generate linspace using PyTorch's linspace + # Use specified device or fall back to current device + target_device = device if device is not None else self.device + torch.linspace(start, end, steps_int, out=tensor, dtype=dtype, device=target_device) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def rand( + self, + *size, + generator=None, + out=None, + dtype=None, + layout=torch.strided, + device=None, + requires_grad=False, + pin_memory=False, + ): + """ + Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1). + The tensor is allocated on the Iris symmetric heap. + + Args: + *size (int...): a sequence of integers defining the shape of the output tensor. + Can be a variable number of arguments or a collection like a list or tuple. + + Keyword Arguments: + generator (torch.Generator, optional): a pseudorandom number generator for sampling. + out (Tensor, optional): the output tensor. + dtype (torch.dtype, optional): the desired data type of returned tensor. + Default: if None, uses a global default (see torch.set_default_dtype()). + layout (torch.layout, optional): the desired layout of returned Tensor. + Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. + device (torch.device, optional): the desired device of returned tensor. + Default: if None, uses the current device for the default tensor type. + requires_grad (bool, optional): If autograd should record operations on the returned tensor. + Default: False. + pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. + Works only for CPU tensors. Default: False. Note: Iris tensors are always on GPU. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> tensor = ctx.rand(2, 3) # Random values in [0, 1) + >>> print(tensor.shape) # torch.Size([2, 3]) + >>> print(tensor[0]) # tensor([0.1234, 0.5678, 0.9012], device='cuda:0') + """ + self.debug( + f"rand: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" + ) + + # Use global default dtype if None is provided + if dtype is None: + dtype = torch.get_default_dtype() + + # Use current device if none specified + if device is None: + device = self.device + + # Validate device compatibility with Iris + self.__throw_if_invalid_device(device) + + # Parse size and calculate number of elements + size, num_elements = self.__parse_size(size) + + # If out is provided, use it; otherwise allocate new tensor + if out is not None: + self.__throw_if_invalid_output_tensor(out, num_elements, dtype) + # Create a reshaped view of the out tensor + tensor = out.view(size) + else: + tensor = self.__allocate(num_elements=num_elements, dtype=dtype) + # Reshape to the desired size + tensor = tensor.reshape(size) + + # Generate random numbers using PyTorch's rand + # Use specified device (already validated and set above) + + # Handle generator parameter + if generator is not None: + torch.rand(size, generator=generator, out=tensor, dtype=dtype, device=device) + else: + torch.rand(size, out=tensor, dtype=dtype, device=device) + + # Apply the requested layout + tensor = self.__apply_layout(tensor, layout) + + # Set requires_grad if specified + if requires_grad: + tensor.requires_grad_() + + return tensor + + def __deallocate(self, pointer): + pass + + def get_heap_bases(self): + """ + Return the tensor of symmetric heap base addresses for all ranks. + + Returns: + torch.Tensor: A 1D tensor of ``uint64`` heap base addresses of size ``num_ranks`` + on the Iris device. Pass this to device-side Triton kernels that require + heap translation. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> heap_bases = ctx.get_heap_bases() + >>> print(heap_bases.shape) # torch.Size([num_ranks]) + """ + return self.heap_bases + + def barrier(self, stream=None, group=None): + """ + Synchronize ranks within the specified group and their CUDA devices. + + This first calls ``torch.cuda.synchronize()`` or ``stream.synchronize()`` to ensure the local GPU has + finished all queued work, then performs a distributed barrier so that all + ranks in the group reach the same point before proceeding. + + Args: + stream: If stream is given: wait only for that stream before barrier. If stream is None: legacy behavior (device-wide sync). + group (ProcessGroup, optional): The process group to synchronize. + If None, uses the default process group (all ranks). + + Example: + >>> ctx = iris.iris(1 << 20) + >>> ctx.barrier() # Synchronize all ranks + >>> ctx.barrier(group=my_group) # Synchronize only ranks in my_group + """ + # Wait for all GPUs to finish work + if stream is None: + torch.cuda.synchronize() + else: + stream.synchronize() + + # Distributed barrier + distributed_barrier(group=group) + + def get_device(self): + """ + Get the underlying device where the Iris symmetric heap resides. + + Returns: + torch.device: The CUDA device of Iris-managed memory. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> device = ctx.get_device() + >>> print(device) # cuda:0 + """ + return self.heap.get_device() + + def get_cu_count(self): + """ + Get the number of compute units (CUs) for the current GPU. + + Returns: + int: Number of compute units on this rank's GPU. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> cu_count = ctx.get_cu_count() + >>> print(f"GPU has {cu_count} CUs") # GPU has 304 CUs + """ + return get_cu_count(self.gpu_id) + + def get_rank(self): + """ + Get this process's rank id in the distributed communicator. + + Returns: + int: Zero-based rank id of the current process. + + Example: + >>> ctx = iris.iris(1 << 20) + >>> rank = ctx.get_rank() + >>> print(f"This is rank {rank}") # This is rank 0 + """ + return self.cur_rank + + def get_num_ranks(self): + """ + Get the total number of ranks in the distributed communicator. + + Returns: + int: World size (number of ranks). + + Example: + >>> ctx = iris.iris(1 << 20) + >>> num_ranks = ctx.get_num_ranks() + >>> print(f"Total ranks: {num_ranks}") # Total ranks: 1 + """ + return self.num_ranks + + def __throw_if_invalid_output_tensor(self, tensor: torch.Tensor, num_elements: int, dtype: torch.dtype): + if not self.__tensor_on_device(tensor): + raise RuntimeError( + f"The output tensor is not on the same device as the Iris instance. The Iris instance is on device {self.device} but the output tensor is on device {tensor.device}" + ) + if not self.__on_symmetric_heap(tensor): + raise RuntimeError( + f"The output tensor is not on the symmetric heap. The Iris instance is on heap base {self.heap_bases[self.cur_rank]} but the output tensor is on heap base {tensor.data_ptr()}" + ) + if tensor.numel() != num_elements: + raise RuntimeError(f"The output tensor has {tensor.numel()} elements, but {num_elements} are required") + if tensor.dtype != dtype: + raise RuntimeError(f"The output tensor has dtype {tensor.dtype}, but {dtype} is required") + + def __throw_if_invalid_device(self, device): + """ + Throw a RuntimeError if the requested device is not compatible with this Iris instance. + + Args: + device: The requested device (can be string, torch.device, or None) + + Raises: + RuntimeError: If the device is not compatible + """ + if not self.__is_valid_device(device): + raise RuntimeError( + f"Device mismatch: requested device {device} but Iris instance is on device {self.device}. " + f"Iris only supports tensors on its own device." + ) + + def __apply_memory_format( + self, tensor: torch.Tensor, size: tuple, memory_format: torch.memory_format, input_tensor: torch.Tensor = None + ): + """ + Apply the requested memory format to a tensor by setting appropriate strides. + This keeps the tensor on the symmetric heap while changing how PyTorch interprets the memory layout. + + Args: + tensor: The tensor to modify + size: The tensor's size/dimensions + memory_format: The desired memory format + input_tensor: The original input tensor (needed for preserve_format detection) + """ + if memory_format == torch.contiguous_format: + # Default format, no changes needed + return tensor + elif memory_format == torch.channels_last and len(size) == 4: + # For channels_last format: preserve shape (N, C, H, W) but change strides + # channels_last strides: [C*H*W, 1, C*W, C] for shape (N, C, H, W) + N, C, H, W = size[0], size[1], size[2], size[3] + # Keep the original shape (N, C, H, W) but use channels_last strides + tensor = self.__create_tensor_with_strides(tensor, size, (C * H * W, 1, C * W, C)) + return tensor + elif memory_format == torch.channels_last_3d and len(size) == 5: + # For channels_last_3d format: preserve shape (N, C, D, H, W) but change strides + # channels_last_3d strides: [C*D*H*W, 1, C*D*W, C*W, C] for shape (N, C, D, H, W) + N, C, D, H, W = size[0], size[1], size[2], size[3], size[4] + # Keep the original shape (N, C, D, H, W) but use channels_last_3d strides + tensor = self.__create_tensor_with_strides(tensor, size, (C * D * H * W, 1, C * D * W, C * W, C)) + return tensor + elif memory_format == torch.preserve_format: + # For preserve_format, we need to detect the input tensor's memory format + # and apply the same format to the output + if input_tensor is not None: + # Check the actual memory format of the input tensor + if len(size) == 4: + # Check if input tensor is in channels_last format by examining strides + # channels_last format has strides[1] == 1 (channels dimension is contiguous) + input_strides = input_tensor.stride() + if len(input_strides) == 4 and input_strides[1] == 1: + # Input is in channels_last format, preserve it + # Use the input tensor's actual shape, not the size parameter + input_shape = input_tensor.shape + if len(input_shape) == 4: + # Input is already in channels_last format (N, H, W, C) + new_size = input_shape + # Use the input tensor's strides directly + tensor = self.__create_tensor_with_strides(tensor, new_size, input_strides) + return tensor + elif len(size) == 5: + # Check if input tensor is in channels_last_3d format + input_strides = input_tensor.stride() + if len(input_strides) == 5 and input_strides[1] == 1: + # Input is in channels_last_3d format, preserve it + # Use the input tensor's actual shape, not the size parameter + input_shape = input_tensor.shape + if len(input_shape) == 5: + # Input is already in channels_last_3d format (N, D, H, W, C) + new_size = input_shape + # Use the input tensor's strides directly + tensor = self.__create_tensor_with_strides(tensor, new_size, input_strides) + return tensor + # If no special format detected or no input tensor provided, use contiguous format + return tensor + else: + # Unsupported format or dimension combination + self.debug( + f"Warning: Memory format {memory_format} not supported for {len(size)}D tensor, using contiguous format" + ) + # For unsupported formats, return the tensor as-is (contiguous) + return tensor + + def __create_tensor_with_strides(self, original_tensor: torch.Tensor, size: tuple, strides: tuple) -> torch.Tensor: + """ + Create a new tensor with the specified strides while keeping the data on the symmetric heap. + + Args: + original_tensor: The original tensor (source of data and heap allocation) + size: The tensor's size/dimensions + strides: The desired strides for the new memory format + + Returns: + A new tensor with the specified strides, data copied from original, on the same heap + """ + + # First, create a temporary tensor with the correct strides using PyTorch + temp_tensor = torch.empty_strided(size, strides, dtype=original_tensor.dtype, device=original_tensor.device) + + # Handle different cases based on whether size changes and what the strides indicate + if size != original_tensor.shape: + # Size is different - this might be a format change that requires permutation + # Check if this is a channels_last format by comparing strides + if len(size) == 4: + # For channels_last: expected strides are [H*W*C, 1, W*C, C] for shape (N, H, W, C) + N, H, W, C = size[0], size[1], size[2], size[3] + expected_strides = (H * W * C, 1, W * C, C) + if strides == expected_strides: + permuted = original_tensor.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) + else: + # If the size differs for other reasons, do not permute; just reshape if possible + try: + permuted = original_tensor.reshape(size) + except Exception: + raise ValueError( + "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." + ) + elif len(size) == 5: + # For channels_last_3d: expected strides are [D*H*W*C, 1, H*W*C, W*C, C] for shape (N, D, H, W, C) + N, D, H, W, C = size[0], size[1], size[2], size[3], size[4] + expected_strides = (D * H * W * C, 1, H * W * C, W * C, C) + if strides == expected_strides: + permuted = original_tensor.permute(0, 2, 3, 4, 1) # (N, C, D, H, W) -> (N, D, H, W, C) + else: + # If the size differs for other reasons, do not permute; just reshape if possible + try: + permuted = original_tensor.reshape(size) + except Exception: + raise ValueError( + "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." + ) + else: + # For other dimensions, just try to reshape + try: + permuted = original_tensor.reshape(size) + except Exception: + raise ValueError( + "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." + ) + else: + # Size is the same - this is a stride-only change (like channels_last with preserved shape) + # We need to reorder the data to match the new stride pattern + if len(size) == 4: + # Check if this is channels_last format with preserved shape + N, C, H, W = size[0], size[1], size[2], size[3] + expected_strides = (C * H * W, 1, C * W, C) + if strides == expected_strides: + permuted = original_tensor + else: + permuted = original_tensor + elif len(size) == 5: + # Check if this is channels_last_3d format with preserved shape + N, C, D, H, W = size[0], size[1], size[2], size[3], size[4] + expected_strides = (C * D * H * W, 1, C * D * W, C * W, C) + if strides == expected_strides: + permuted = original_tensor + else: + permuted = original_tensor + else: + permuted = original_tensor + + # Copy the permuted data to the temporary tensor + temp_tensor.copy_(permuted) + + # Now allocate a new tensor on our symmetric heap + num_elements = math.prod(size) + heap_tensor = self.__allocate(num_elements, original_tensor.dtype) + + # Reshape to the desired size + heap_tensor = heap_tensor.reshape(size) + + # Copy the data from the temporary tensor to our heap tensor + heap_tensor.copy_(temp_tensor) + + # Clean up the temporary tensor + del temp_tensor + + # Now we need to create a view with the correct strides + # We can't use as_strided directly on our heap tensor, but we can + # create a new tensor with the right strides and copy the data again + final_tensor = torch.as_strided(heap_tensor, size, strides) + + return final_tensor + + def __apply_layout(self, tensor: torch.Tensor, layout: torch.layout) -> torch.Tensor: + """ + Apply the requested layout to a tensor. + + Args: + tensor: The tensor to modify + layout: The desired layout + + Returns: + Tensor with the requested layout + """ + + if layout == torch.strided: + # Strided layout is the default - no changes needed + return tensor + else: + # Only support strided layout for now + raise ValueError(f"Layout {layout} not supported. Only torch.strided is currently supported.") + + def __tensor_on_device(self, tensor: torch.Tensor): + # Get the Iris device from memory_pool.device + iris_device = self.get_device() + tensor_device = tensor.device + + # For CUDA devices, check if they're compatible + if tensor_device.type == "cuda" and iris_device.type == "cuda": + if iris_device.index is None: + return True + return tensor_device.index == iris_device.index + + # For non-CUDA devices, they must be exactly equal + return tensor_device == iris_device + + def __on_symmetric_heap(self, tensor: torch.Tensor): + """Check if a tensor is allocated on the symmetric heap.""" + return self.heap.on_symmetric_heap(tensor) + + def __is_valid_device(self, device) -> bool: + """ + Check if the requested device is compatible with this Iris instance. + + Args: + device: The requested device (can be string, torch.device, or None) + + Returns: + bool: True if the device is compatible, False otherwise + """ + if device is None: + return True # None means use default device + + # Convert device strings to torch.device objects for proper comparison + requested_device = torch.device(device) if isinstance(device, str) else device + iris_device = self.get_device() + + # Check if both are CUDA devices + if requested_device.type == "cuda" and iris_device.type == "cuda": + # Check if index matches or if requested is "cuda" (any index) + if requested_device.index is None: + return True + else: + return requested_device.index == iris_device.index + + # For non-CUDA devices, always return False + return False + + class CCL: + """ + Collective Communication Library (CCL) interface for Iris. + + Provides collective operations that can be called as methods on the Iris instance. + Example usage: + >>> shmem = iris.iris() + >>> shmem.ccl.all_to_all(output_tensor, input_tensor) + """ + + def __init__(self, iris_instance): + """ + Initialize CCL with a reference to the parent Iris instance. + + Args: + iris_instance: The parent Iris instance + """ + self._iris = iris_instance + + def all_to_all(self, output_tensor, input_tensor, group=None, async_op=False, config=None): + """ + All-to-all collective operation. + + Each rank sends a tensor chunk to each other rank and receives + a tensor chunk from each other rank. Input/output tensors should have + shape (M, N * world_size) where each chunk of N columns corresponds to one rank. + + Args: + output_tensor: Output tensor of shape (M, N * world_size) + input_tensor: Input tensor of shape (M, N * world_size) + group: ProcessGroup or None. If None, uses all ranks in shmem context. + Default: None. + async_op: If False, performs a barrier at the end. If True, returns immediately. + Default: False. + config: Config instance with kernel parameters (default: None). + If None, uses default Config values. + + Example: + >>> shmem = iris.iris() + >>> shmem.ccl.all_to_all(output_tensor, input_tensor) + + >>> # Custom configuration + >>> from iris.ccl import Config + >>> config = Config(block_size_m=128, block_size_n=32) + >>> shmem.ccl.all_to_all(output_tensor, input_tensor, config=config) + + >>> # Async operation (no barrier) + >>> shmem.ccl.all_to_all(output_tensor, input_tensor, async_op=True) + """ + from iris.ccl.all_to_all import all_to_all as _all_to_all + + _all_to_all(output_tensor, input_tensor, self._iris, group=group, async_op=async_op, config=config) + + def all_gather(self, output_tensor, input_tensor, group=None, async_op=False, config=None): + """ + All-gather collective operation. + + Each rank sends its input tensor to all ranks, and all ranks receive + and concatenate all input tensors along dimension 0 (rows), matching + torch.distributed.all_gather_into_tensor behavior. + + Args: + output_tensor: Output tensor of shape (world_size * M, N) - will contain concatenated inputs + input_tensor: Input tensor of shape (M, N) - local rank's data to send + group: ProcessGroup or None. If None, uses all ranks in shmem context. + Default: None. + async_op: If False, performs a barrier at the end. If True, returns immediately. + Default: False. + config: Config instance with kernel parameters (default: None). + If None, uses default Config values. + + Example: + >>> shmem = iris.iris() + >>> # Input: (M, N), Output: (world_size * M, N) + >>> shmem.ccl.all_gather(output_tensor, input_tensor) + + >>> # Custom configuration + >>> from iris.ccl import Config + >>> config = Config(block_size_m=128, block_size_n=32) + >>> shmem.ccl.all_gather(output_tensor, input_tensor, config=config) + + >>> # Async operation (no barrier) + >>> shmem.ccl.all_gather(output_tensor, input_tensor, async_op=True) + """ + from iris.ccl.all_gather import all_gather as _all_gather + + _all_gather(output_tensor, input_tensor, self._iris, group=group, async_op=async_op, config=config) + + def all_reduce_preamble(self, output_tensor, input_tensor, config=None, workspace=None): + """ + Prepare reusable workspace for all-reduce. + + Args: + output_tensor: Output tensor that will receive the reduced data. + input_tensor: Input tensor providing the local contribution. + config: Optional Config describing variant parameters. + workspace: Optional existing workspace to update/reuse. + + Returns: + Workspace object that can be passed to ``all_reduce``. + """ + from iris.ccl.all_reduce import all_reduce_preamble as _all_reduce_preamble + + return _all_reduce_preamble( + output_tensor, + input_tensor, + self._iris, + config=config, + workspace=workspace, + ) + + def all_reduce( + self, output_tensor, input_tensor, op=None, group=None, async_op=False, config=None, workspace=None + ): + """ + All-reduce collective operation. + + Each rank has a local input tensor, and all ranks compute the sum of all + input tensors. The result is written to output_tensor on all ranks. + + Args: + output_tensor: Output tensor of shape (M, N) - will contain sum of all inputs + input_tensor: Input tensor of shape (M, N) - local rank's partial data + op: Reduction operation to apply. Currently only ReduceOp.SUM is supported. + Default: ReduceOp.SUM. + group: ProcessGroup or None. If None, uses all ranks in shmem context. + Default: None. + async_op: If False, performs a barrier at the end. If True, returns immediately. + Default: False. + config: Config instance with kernel parameters (default: None). + If None, uses default Config values. + Set config.all_reduce_variant to choose variant: "atomic", "ring", or "two_shot" + workspace: Optional workspace prepared by ``all_reduce_preamble`` to + reuse internal buffers across invocations. + + Example: + >>> shmem = iris.iris() + >>> shmem.ccl.all_reduce(output_tensor, input_tensor) + + >>> # Custom configuration with ring variant + >>> from iris.ccl import Config + >>> config = Config(all_reduce_variant="ring") + >>> shmem.ccl.all_reduce(output_tensor, input_tensor, config=config) + + >>> # Two-shot variant with block distribution + >>> config = Config(all_reduce_variant="two_shot", all_reduce_distribution=1) + >>> shmem.ccl.all_reduce(output_tensor, input_tensor, config=config) + + >>> # Async operation (no barrier) + >>> shmem.ccl.all_reduce(output_tensor, input_tensor, async_op=True) + """ + from iris.ccl.all_reduce import all_reduce as _all_reduce + from iris.ccl import ReduceOp + + # Default to SUM if not specified + if op is None: + op = ReduceOp.SUM + + return _all_reduce( + output_tensor, + input_tensor, + self._iris, + op=op, + group=group, + async_op=async_op, + config=config, + workspace=workspace, + ) + + def reduce_scatter(self, output_tensor, input_tensor, op=None, group=None, async_op=False, config=None): + """ + Reduce-scatter collective operation. + + Each rank reduces its assigned tiles from all ranks' inputs and stores + the result only to its own output tensor. This is similar to all-reduce + but without broadcasting the result to all ranks. + + Args: + output_tensor: Output tensor of shape (M, N) - will contain reduced tiles for this rank + input_tensor: Input tensor of shape (M, N) - local rank's partial data + op: Reduction operation to apply. Currently only ReduceOp.SUM is supported. + Default: ReduceOp.SUM. + group: ProcessGroup or None. If None, uses all ranks in shmem context. + Default: None. + async_op: If False, performs a barrier at the end. If True, returns immediately. + Default: False. + config: Config instance with kernel parameters (default: None). + If None, uses default Config values. + Only supports reduce_scatter_variant="two_shot". + + Example: + >>> shmem = iris.iris() + >>> shmem.ccl.reduce_scatter(output_tensor, input_tensor) + + >>> # Custom configuration + >>> from iris.ccl import Config + >>> config = Config(reduce_scatter_variant="two_shot", all_reduce_distribution=1) + >>> shmem.ccl.reduce_scatter(output_tensor, input_tensor, config=config) + """ + from iris.ccl.reduce_scatter import reduce_scatter as _reduce_scatter + from iris.ccl import ReduceOp + + # Default to SUM if not specified + if op is None: + op = ReduceOp.SUM + + _reduce_scatter( + output_tensor, input_tensor, self._iris, op=op, group=group, async_op=async_op, config=config + ) + + +@triton.jit +def __translate(ptr, from_rank, to_rank, heap_bases): + from_base = tl.load(heap_bases + from_rank) + to_base = tl.load(heap_bases + to_rank) + # convert to int to compute difference + ptr_int = tl.cast(ptr, tl.uint64) + # Find the offset from from_rank heap + offset = ptr_int - from_base + # Byte cast for byte offset addition + to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) + # Find the offset into the to_rank heap + translated_ptr_byte = to_base_byte + offset + # Cast to_base back to pointer type + translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) + + # Optimization to vectorize the load/store + # We can't do this in general because we don't know the shape of the tensor or block sizes + # ptr = tl.max_contiguous(tl.multiple_of(ptr, (16, 16)), (16, 32)) + + # 0 You can use this if your block sizes are multiples of 32. + # Largest vectorized load instruction is dwordx4 (128-bits) + translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) + translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) + + # ptr = tl.max_contiguous(tl.multiple_of(ptr, 512), 512) + # translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, 512), 512) + return translated_ptr + + +@triton.jit +def load(pointer, to_rank, from_rank, heap_bases, mask=None): + """ + Loads a value from the specified rank's memory location. + + This function performs a memory read operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and loading + data from the target memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local load operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the pointer will be translated. Must be the current rank where the pointer is local. + from_rank (int): The rank ID from which to read the data. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address pointer[idx]. Defaults to None. + + Returns: + Block: The loaded value from the target memory location. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Load data from rank 1's memory into the current rank + >>> cur_rank = 0 # Current rank + >>> remote_rank = 1 # Remote rank to load from + >>> data = iris.load(ptr, cur_rank, remote_rank, heap_bases) + >>> return data + """ + translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases) + result = tl.load(translated_ptr, mask=mask) + return result + + +@triton.jit +def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): + """ + Writes data to the specified rank's memory location. + + This function performs a memory write operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and storing + the provided data to the target memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local store operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + value (Block): The tensor of elements to be stored. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the data will be written. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not store the data at address pointer[idx]. Defaults to None. + + Returns: + None + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Store value 42 into rank 1's heap from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> value = 42 + >>> iris.store(ptr, value, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + tl.store(translated_ptr, value, mask=mask) + + +@triton.jit +def copy(src_ptr, dst_ptr, from_rank, to_rank, cur_rank, heap_bases, mask=None): + """ + Copies data from the specified rank's memory into the destination rank's memory. + This function performs the transfer by translating `src_ptr` from the `from_rank`'s address + space to the `to_rank`'s address space, performing a masked load from the translated + source, and storing the loaded data to `dst_ptr` in the `to_rank` memory location. + If `from_rank` and `to_rank` are the same, this function performs a local copy operation. + It is undefined behaviour if neither `from_rank` nor `to_rank` is the `cur_rank`. + + Args: + src_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s local memory from which to read data. + dst_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `to_rank`'s local memory where the data will be written. + from_rank (int): The rank ID that owns `src_ptr` (source rank). + to_rank (int): The rank ID that will receive the data (destination rank). + cur_rank (int): The rank ID issuing the copy operation. Must be either `from_rank` or `to_rank`. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not load from the translated src_ptr[idx] and do not store to dst_ptr[idx]. Defaults to None. + + Returns: + None + + Example: + >>> @triton.jit + >>> def kernel(remote_ptr, local_ptr, heap_bases): + >>> from_rank = 1 + >>> to_rank = 0 + >>> iris.copy(remote_ptr, local_ptr, from_rank, to_rank, to_rank, heap_bases) + """ + + cur_base = tl.load(heap_bases + cur_rank) + + from_base = tl.load(heap_bases + from_rank) + to_base = tl.load(heap_bases + to_rank) + + src_ptr_int = tl.cast(src_ptr, tl.uint64) + src_offset = src_ptr_int - cur_base + + dst_ptr_int = tl.cast(dst_ptr, tl.uint64) + dst_offset = dst_ptr_int - cur_base + + from_base_byte = tl.cast(from_base, tl.pointer_type(tl.int8)) + to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) + + translated_src = tl.cast(from_base_byte + src_offset, src_ptr.dtype) + translated_dst = tl.cast(to_base_byte + dst_offset, src_ptr.dtype) + + data = tl.load(translated_src, mask=mask) + tl.store(translated_dst, data, mask=mask) + + +@triton.jit +def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): + """ + Copies data from the specified rank's memory to the current rank's local memory. + + This function performs a memory read operation by translating the `from_ptr` + from the current rank's address space to the `from_rank`'s address space, loading data + from the `from_rank` memory location, and storing it to the local `to_ptr`. + If the `from_rank` is the same as the current rank, this function performs a local copy operation. + + Args: + from_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that will be translated to the `from_rank`'s address space. Must be the current rank where the pointer is local. + to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's local memory where the data will be stored. + from_rank (int): The `from_rank` ID from which to read the data. + to_rank (int): The current rank ID where the data will be stored. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. + + Returns: + None + + Example: + >>> @triton.jit + >>> def kernel(remote_ptr, local_ptr, heap_bases): + >>> from_rank = 1 + >>> to_rank = 0 + >>> iris.get(remote_ptr, local_ptr, from_rank, to_rank, heap_bases) + """ + translated_from_ptr = __translate(from_ptr, from_rank, to_rank, heap_bases) + + data = tl.load(translated_from_ptr, mask=mask) + + tl.store(to_ptr, data, mask=mask) + + +@triton.jit +def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): + """ + Copies data from the current rank's local memory to the specified rank's memory. + This function performs a memory write operation by loading data from the current + rank's `from_ptr`, translating the `to_ptr` from the current rank's address + space to the `to_rank`'s address space, and storing the data to the `to_rank` memory location. + If the `to_rank` is the same as the current rank, this function performs a local copy operation. + + Args: + from_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's local memory from which to read data. + to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + from_rank (int): The current rank ID from which to read the data. + to_rank (int): The `to_rank` ID to which the data will be written. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. + + Returns: + None + + Example: + >>> @triton.jit + >>> def kernel(local_ptr, remote_ptr, heap_bases): + >>> from_rank = 0 + >>> to_rank = 1 + >>> iris.put(local_ptr, remote_ptr, from_rank, to_rank, heap_bases) + """ + translated_to_ptr = __translate(to_ptr, from_rank, to_rank, heap_bases) + + data = tl.load(from_ptr, mask=mask) + + tl.store(translated_to_ptr, data, mask=mask) + + +@triton.jit +def atomic_add(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic add at the specified rank's memory location. + + This function performs an atomic addition operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + adding the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic addition operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically add 5 to rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> increment = 5 + >>> old_val = iris.atomic_add(ptr, increment, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_add(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_sub(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Atomically subtracts data from the specified rank's memory location. + + This function performs an atomic subtraction operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + subtracting the provided data from the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic subtraction operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block): The tensor of elements to be subtracted atomically. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". Defaults to "acq_rel". + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). Defaults to "gpu". + + Returns: + Block: The value at the memory location before the atomic subtraction. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically subtract 3 from rank 2's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 2 # Remote rank (destination) + >>> decrement = 3 + >>> old_val = iris.atomic_sub(ptr, decrement, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_sub(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_cas(pointer, cmp, val, from_rank, to_rank, heap_bases, sem=None, scope=None): + """ + Atomically compares and exchanges the specified rank's memory location. + + This function performs an atomic compare-and-swap operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + comparing the current value with the expected value, then writing the new value if they match. + If the `from_rank` and `to_rank` are the same, this function performs a local atomic compare-and-swap operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + cmp (Block): The expected value to be compared with the current value at the memory location. + val (Block): The new value to be written if the compare succeeds. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". Defaults to "acq_rel". + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). Defaults to "gpu". + + Returns: + Block: The value contained at the memory location before the atomic operation attempt. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Compare-and-swap on rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> expected = 0 + >>> new_val = 42 + >>> old_val = iris.atomic_cas(ptr, expected, new_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_cas(translated_ptr, cmp, val, sem=sem, scope=scope) + + +@triton.jit +def atomic_xchg(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic exchange at the specified rank's memory location. + + This function performs an atomic exchange operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + exchanging the current value with the provided new value. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic exchange operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Exchange value with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> new_value = 99 + >>> old_val = iris.atomic_xchg(ptr, new_value, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_xchg(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_xor(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic xor at the specified rank's memory location. + + This function performs an atomic xor operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + xoring the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic xor operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically XOR with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> mask_val = 0xFF + >>> old_val = iris.atomic_xor(ptr, mask_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_xor(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_and(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic and at the specified rank's memory location. + + This function performs an atomic and operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + anding the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic and operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically AND with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> mask_val = 0x0F + >>> old_val = iris.atomic_and(ptr, mask_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_and(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_or(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic or at the specified rank's memory location. + + This function performs an atomic or operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + oring the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic or operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically OR with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> mask_val = 0xF0 + >>> old_val = iris.atomic_or(ptr, mask_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_or(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_min(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic min at the specified rank's memory location. + + This function performs an atomic min operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + performing the min on the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic min operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically find minimum with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> new_val = 10 + >>> old_val = iris.atomic_min(ptr, new_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_min(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +@triton.jit +def atomic_max(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): + """ + Performs an atomic max at the specified rank's memory location. + + This function performs an atomic max operation by translating the pointer + from the `from_rank`'s address space to the `to_rank`'s address space and atomically + performing the max on the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, + this function performs a local atomic max operation. + + Args: + pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. + val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. + from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. + to_rank (int): The rank ID to which the atomic operation will be performed. + heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. + mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. + sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. + scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". + + Returns: + Block: The data stored at pointer before the atomic operation. + + Example: + >>> @triton.jit + >>> def kernel(ptr, heap_bases): + >>> # Atomically find maximum with rank 1's memory from rank 0 + >>> cur_rank = 0 # Current rank (source) + >>> remote_rank = 1 # Remote rank (destination) + >>> new_val = 100 + >>> old_val = iris.atomic_max(ptr, new_val, cur_rank, remote_rank, heap_bases) + """ + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + return tl.atomic_max(translated_ptr, val, mask=mask, sem=sem, scope=scope) + + +def iris(heap_size=1 << 30): + """ + Create and return an Iris instance with the specified heap size. + + Args: + heap_size (int): Size of the heap in bytes. Defaults to 1GB. + + Returns: + Iris: An initialized Iris instance. + + Example: + >>> import iris + >>> iris_ctx = iris.iris(2**30) # 1GB heap + >>> tensor = iris_ctx.zeros(1024, 1024) + """ + return Iris(heap_size) diff --git a/iris/ops/all_gather_matmul.py.with_chunked b/iris/ops/all_gather_matmul.py.with_chunked new file mode 100644 index 000000000..ddc03d027 --- /dev/null +++ b/iris/ops/all_gather_matmul.py.with_chunked @@ -0,0 +1,521 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Fused All-Gather + GEMM operation using pull pattern. + +Each rank has a column-sharded input A_sharded (M x K_local). +This operation computes C = all_gather(A_sharded) @ B by pulling +tiles from remote ranks on-demand during GEMM computation. +""" + +from typing import Optional +import torch +import triton +import triton.language as tl +import iris +import iris.x + +from tritonblas.kernels.stages.algorithms.binary import add_vector +from tritonblas.kernels.stages.algorithms.unary import convert_dtype + +from .config import FusedConfig +from .workspace import FusedWorkspace + + +@triton.jit() +def _fused_all_gather_matmul_kernel( + A_sharded, + B, + C, + bias_ptr, + M: tl.constexpr, + N: tl.constexpr, + K: tl.constexpr, + K_local: tl.constexpr, + stride_am: tl.constexpr, + stride_ak: tl.constexpr, + stride_bk: tl.constexpr, + stride_bn: tl.constexpr, + stride_cm: tl.constexpr, + stride_cn: tl.constexpr, + stride_bias: tl.constexpr, + heap_bases: tl.tensor, + cur_rank: tl.constexpr, + world_size: tl.constexpr, + BLOCK_SIZE_M: tl.constexpr, + BLOCK_SIZE_N: tl.constexpr, + BLOCK_SIZE_K: tl.constexpr, + GROUP_SIZE_M: tl.constexpr, + NUM_SMS: tl.constexpr, + NUM_XCDS: tl.constexpr, + BIAS: tl.constexpr, + EVEN_K: tl.constexpr, + ALLOW_TF32: tl.constexpr, +): + """Fused all-gather + GEMM kernel using pull pattern.""" + pid = tl.program_id(0) + + # Handle multi-XCD devices + if NUM_XCDS != 1: + pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) + + num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + total_tiles = num_pid_m * num_pid_n + + tl.assume(stride_am > 0) + tl.assume(stride_ak > 0) + tl.assume(stride_bk > 0) + tl.assume(stride_bn > 0) + tl.assume(stride_cm > 0) + tl.assume(stride_cn > 0) + + acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 + + # Persistent loop over output tiles + for tile_id in range(pid, total_tiles, NUM_SMS): + # Compute tile coordinates with swizzling + num_pid_in_group = GROUP_SIZE_M * num_pid_n + group_id = tile_id // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) + pid_m = first_pid_m + ((tile_id % num_pid_in_group) % group_size_m) + pid_n = (tile_id % num_pid_in_group) // group_size_m + + # Compute row and column indices + rm = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M + rn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N + rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) + rn = tl.max_contiguous(tl.multiple_of(rn, BLOCK_SIZE_N), BLOCK_SIZE_N) + + # Initialize accumulator + acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) + + # Create DeviceContext and TensorView for gather operations + ctx = iris.x.DeviceContext(cur_rank, world_size, heap_bases) + src_view = iris.x.TensorView(A_sharded, M, K_local, stride_am, stride_ak) + + # Loop over all ranks to pull and accumulate + for source_rank_id in range(world_size): + loop_k_local = tl.cdiv(K_local, BLOCK_SIZE_K) + if not EVEN_K: + loop_k_local -= 1 + + # Loop over K dimension for this rank's shard + for k_block_idx in range(0, loop_k_local): + k_offset = k_block_idx * BLOCK_SIZE_K + + # Create tile view for this K block + tile_k = k_offset // BLOCK_SIZE_K + k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) + + # Pull A tile from source_rank_id using gather primitive + a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) + + # Load B tile + rk_local = k_offset + tl.arange(0, BLOCK_SIZE_K) + rk_global = (source_rank_id * K_local) + rk_local + B_ptr = B + rk_global[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(tl.multiple_of(B_ptr, (16, 1))) + + # Accumulate + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) + + # Handle remaining K elements if not evenly divisible + if not EVEN_K: + k_offset = loop_k_local * BLOCK_SIZE_K + tile_k = k_offset // BLOCK_SIZE_K + k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) + + # Pull A tile from source_rank_id using gather primitive + a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) + + rk_local = k_offset + tl.arange(0, BLOCK_SIZE_K) + rk_global = (source_rank_id * K_local) + rk_local + rk_global_mask = rk_global < K + B_ptr = B + rk_global[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(tl.multiple_of(B_ptr, (16, 1)), mask=rk_global_mask[:, None], other=0.0) + + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) + + # Add bias if provided using tritonBLAS + if BIAS: + bias_vector = tl.load(bias_ptr + rm * stride_bias, mask=rm < M, other=0.0) + acc = add_vector(acc, bias_vector, QUANTIZED=False) + + # Convert to output dtype using tritonBLAS + c = convert_dtype(acc, C.type.element_ty) + + # Store result (manual for now, tritonBLAS store has issues with our indices) + C_ptr = ( + C + + (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] * stride_cm + + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] * stride_cn + ) + mask = ((pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] < M) & ( + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] < N + ) + tl.store(C_ptr, c, mask=mask) + + +@triton.jit() +def _fused_chunked_all_gather_matmul_kernel( + A_sharded, + B, + C, + bias_ptr, + temp_buffer, # Temporary buffer: BLOCK_M x K x num_tiles + M: tl.constexpr, + N: tl.constexpr, + K: tl.constexpr, + K_local: tl.constexpr, + stride_am: tl.constexpr, + stride_ak: tl.constexpr, + stride_bk: tl.constexpr, + stride_bn: tl.constexpr, + stride_cm: tl.constexpr, + stride_cn: tl.constexpr, + stride_bias: tl.constexpr, + heap_bases: tl.tensor, + cur_rank: tl.constexpr, + world_size: tl.constexpr, + BLOCK_SIZE_M: tl.constexpr, + BLOCK_SIZE_N: tl.constexpr, + BLOCK_SIZE_K: tl.constexpr, + GROUP_SIZE_M: tl.constexpr, + NUM_SMS: tl.constexpr, + NUM_XCDS: tl.constexpr, + BIAS: tl.constexpr, + EVEN_K: tl.constexpr, + ALLOW_TF32: tl.constexpr, +): + """ + Fused all-gather + GEMM kernel using chunked/buffered pattern. + + This variant pre-gathers all of A into a temporary buffer before computing GEMM. + Eliminates the world_size loop by using iris.x.all_gather upfront. + + Memory layout: + - temp_buffer: BLOCK_M x K x num_tiles (stores gathered A for each tile) + - Each program gathers its M-tile of A, then does GEMM + """ + pid = tl.program_id(0) + + # Handle multi-XCD devices + if NUM_XCDS != 1: + pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) + + num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + total_tiles = num_pid_m * num_pid_n + + tl.assume(stride_am > 0) + tl.assume(stride_ak > 0) + tl.assume(stride_bk > 0) + tl.assume(stride_bn > 0) + tl.assume(stride_cm > 0) + tl.assume(stride_cn > 0) + + acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 + + # Persistent loop over output tiles + for tile_id in range(pid, total_tiles, NUM_SMS): + # Compute tile coordinates with swizzling + num_pid_in_group = GROUP_SIZE_M * num_pid_n + group_id = tile_id // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) + pid_m = first_pid_m + ((tile_id % num_pid_in_group) % group_size_m) + pid_n = (tile_id % num_pid_in_group) // group_size_m + + # Compute row and column indices + rm = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M + rn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N + rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) + rn = tl.max_contiguous(tl.multiple_of(rn, BLOCK_SIZE_N), BLOCK_SIZE_N) + + # Buffer pointer for this tile: BLOCK_M x K for this pid_m + buffer_ptr = temp_buffer + tile_id * BLOCK_SIZE_M * K + + # Step 1: Pre-gather entire M-tile of A (BLOCK_M x K) + # Create DeviceContext and TensorView for gather operations + ctx = iris.x.DeviceContext(cur_rank, world_size, heap_bases) + src_view = iris.x.TensorView(A_sharded, M, K_local, stride_am, stride_ak) + + # Gather K-tiles from all ranks + for source_rank_id in range(world_size): + k_start = source_rank_id * K_local + # Loop over K dimension in blocks + for k_local_idx in range(0, K_local, BLOCK_SIZE_K): + k_global = k_start + k_local_idx + rk = k_global + tl.arange(0, BLOCK_SIZE_K) + rk_mask = rk < K + + tile_k = k_local_idx // BLOCK_SIZE_K + k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) + + # Pull A tile from source_rank_id + a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) + + # Store in buffer + buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] + tl.store(buffer_A_ptr, a, mask=rk_mask[None, :]) + + # Step 2: Standard GEMM from buffer + # Initialize accumulator + acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) + + # Loop over K dimension + loop_k = tl.cdiv(K, BLOCK_SIZE_K) + if EVEN_K: + for k_block_idx in range(loop_k): + k_offset = k_block_idx * BLOCK_SIZE_K + + # Load A from temp buffer + rk = k_offset + tl.arange(0, BLOCK_SIZE_K) + buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] + a = tl.load(buffer_A_ptr) + + # Load B tile + B_ptr = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(tl.multiple_of(B_ptr, (16, 1))) + + # Accumulate + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) + else: + # Handle case where K is not evenly divisible by BLOCK_SIZE_K + for k_block_idx in range(loop_k): + k_offset = k_block_idx * BLOCK_SIZE_K + + # Load A from temp buffer + rk = k_offset + tl.arange(0, BLOCK_SIZE_K) + rk_mask = rk < K + buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] + a = tl.load(buffer_A_ptr, mask=rk_mask[None, :], other=0.0) + + # Load B tile + B_ptr = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(tl.multiple_of(B_ptr, (16, 1)), mask=rk_mask[:, None], other=0.0) + + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) + + # Convert accumulator and add bias + c = convert_dtype(acc, C.type.element_ty) + if BIAS: + bias_offset = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) * stride_bias + bias_val = tl.load(bias_ptr + bias_offset) + c = add_vector(c, bias_val, 0) + + # Store result + C_ptr = ( + C + + (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] * stride_cm + + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] * stride_cn + ) + mask = ((pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] < M) & ( + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] < N + ) + tl.store(C_ptr, c, mask=mask) + + +def all_gather_matmul_preamble( + shmem, + A_sharded: torch.Tensor, + B: torch.Tensor, + config: Optional[FusedConfig] = None, +) -> FusedWorkspace: + """Allocate workspace for all_gather_matmul (buffer needed for chunked variant).""" + if config is None: + config = FusedConfig() + + M, K_local = A_sharded.shape + K, N = B.shape + world_size = shmem.get_num_ranks() + + expected_K = world_size * K_local + assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" + + # Detect hardware configuration + device = A_sharded.device + if config.num_sms is None: + import iris.hip + num_sms = iris.hip.get_cu_count(device.index) + else: + num_sms = config.num_sms + + if config.num_xcds == 1: + # Auto-detect XCDs if default value is used + import iris.hip + num_xcds = iris.hip.get_num_xcc(device.index) + else: + num_xcds = config.num_xcds + + # Allocate temporary buffer for chunked variant + aux_buffer = None + if config.all_gather_matmul_variant == "chunked": + # Calculate grid size to determine buffer size + num_tiles_m = (M + config.block_size_m - 1) // config.block_size_m + num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n + num_tiles = num_tiles_m * num_tiles_n + + # Allocate buffer: BLOCK_M x K x num_tiles + buffer_size = config.block_size_m * K * num_tiles + aux_buffer = torch.empty(buffer_size, dtype=A_sharded.dtype, device=device) + + return FusedWorkspace( + operation="all_gather_matmul", + shape=(M, N, K), + dtype=A_sharded.dtype, + world_size=world_size, + num_sms=num_sms, + num_xcds=num_xcds, + variant=config.all_gather_matmul_variant, + aux_buffer=aux_buffer, + prepared=True, + ) + + +def all_gather_matmul( + shmem, + output_tensor: torch.Tensor, + A_sharded: torch.Tensor, + B: torch.Tensor, + bias: Optional[torch.Tensor] = None, + async_op: bool = False, + config: Optional[FusedConfig] = None, + workspace: Optional[FusedWorkspace] = None, +) -> FusedWorkspace: + """Fused all-gather and matrix multiplication using pull pattern.""" + if config is None: + config = FusedConfig() + + M, K_local = A_sharded.shape + K, N = B.shape + world_size = shmem.get_num_ranks() + rank = shmem.get_rank() + + expected_K = world_size * K_local + assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" + assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" + + # Validate problem size against block sizes + assert M >= config.block_size_m, ( + f"M ({M}) must be >= block_size_m ({config.block_size_m}). Use smaller block sizes for small problems." + ) + assert K_local >= config.block_size_k, ( + f"K_local ({K_local}) must be >= block_size_k ({config.block_size_k}). " + f"Use smaller block sizes for small problems." + ) + assert N >= config.block_size_n, ( + f"N ({N}) must be >= block_size_n ({config.block_size_n}). Use smaller block sizes for small problems." + ) + + if workspace is None: + workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) + + stride_am, stride_ak = A_sharded.stride() + stride_bk, stride_bn = B.stride() + stride_cm, stride_cn = output_tensor.stride() + + if bias is not None: + assert bias.shape[0] == M + bias_ptr = bias + stride_bias = bias.stride()[0] if bias.dim() > 0 else 1 + use_bias = True + else: + bias_ptr = output_tensor + stride_bias = 1 + use_bias = False + + # Get hardware configuration from workspace + num_sms = workspace.num_sms + num_xcds = workspace.num_xcds + + even_k = K_local % config.block_size_k == 0 + + # Use SM-based grid (persistent kernels) + grid = (num_sms,) + + # Select kernel variant based on config + if config.all_gather_matmul_variant == "chunked": + # Chunked variant: pre-gather into buffer, then GEMM + assert workspace.aux_buffer is not None, "Chunked variant requires aux_buffer in workspace" + _fused_chunked_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + workspace.aux_buffer, # Temporary buffer + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.heap_bases, + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + num_xcds, + use_bias, + even_k, + config.allow_tf32, + ) + else: + # Pull variant (default): on-demand pull from remote ranks + _fused_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.heap_bases, + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + num_xcds, + use_bias, + even_k, + config.allow_tf32, + ) + + if not async_op: + shmem.barrier() + + return workspace diff --git a/iris/ops/config.py b/iris/ops/config.py index 3ca085c31..77c0b5ab9 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -19,10 +19,10 @@ class FusedConfig: but users can override specific settings for performance tuning. GEMM Parameters: - block_size_m: Block size for M dimension (rows). Default: 256. - block_size_n: Block size for N dimension (columns). Default: 64. + block_size_m: Block size for M dimension (rows). Default: 128. + block_size_n: Block size for N dimension (columns). Default: 256. block_size_k: Block size for K dimension (reduction). Default: 64. - group_size_m: Group size for M dimension tiling. Default: 1. + group_size_m: Group size for M dimension tiling. Default: 4. num_sms: Number of SMs to use. If None, auto-detects from device. Default: None. num_xcds: Number of XCDs (chiplets). Default: 1. chunk_size: Chunk size for chiplet transform. Default: 1. @@ -32,8 +32,12 @@ class FusedConfig: CCL Parameters (for operations that need collective communication): all_reduce_variant: All-reduce algorithm variant. Options: "atomic", "ring", - "one_shot", "two_shot", "spinlock". Default: "one_shot". + "one_shot", "two_shot", "spinlock". Default: "two_shot". all_reduce_num_rings: Number of concurrent rings (for ring variant). Default: 1. + all_gather_matmul_variant: All-gather + matmul algorithm variant. Options: + "pull" (on-demand pull from remote ranks), + "chunked" (pre-gather into buffer then GEMM). + Default: "pull". Example: >>> # Use defaults @@ -47,10 +51,10 @@ class FusedConfig: """ # GEMM parameters - block_size_m: int = 256 - block_size_n: int = 64 + block_size_m: int = 128 + block_size_n: int = 256 block_size_k: int = 64 - group_size_m: int = 1 + group_size_m: int = 4 num_sms: Optional[int] = None # Auto-detect if None num_xcds: int = 1 chunk_size: int = 1 @@ -61,6 +65,7 @@ class FusedConfig: # CCL-specific parameters all_reduce_variant: str = "two_shot" # atomic, ring, one_shot, two_shot, spinlock all_reduce_num_rings: int = 1 + all_gather_matmul_variant: str = "pull" # pull, chunked def validate(self, world_size: Optional[int] = None): """ @@ -102,3 +107,10 @@ def validate(self, world_size: Optional[int] = None): if self.all_reduce_num_rings <= 0: raise ValueError(f"all_reduce_num_rings must be positive, got {self.all_reduce_num_rings}") + + # Validate all_gather_matmul_variant + valid_ag_variants = ["pull", "chunked"] + if self.all_gather_matmul_variant not in valid_ag_variants: + raise ValueError( + f"all_gather_matmul_variant must be one of {valid_ag_variants}, got {self.all_gather_matmul_variant}" + ) diff --git a/iris/ops/workspace.py b/iris/ops/workspace.py index a9c7cb616..9328e9f9e 100644 --- a/iris/ops/workspace.py +++ b/iris/ops/workspace.py @@ -38,6 +38,10 @@ class FusedWorkspace: world_size: int = 1 variant: str = "" + # Hardware configuration (detected in preamble) + num_sms: Optional[int] = None # Number of streaming multiprocessors + num_xcds: int = 1 # Number of XCDs/chiplets + # Temporary buffers (allocated as needed) aux_buffer: Optional[torch.Tensor] = None # Generic buffer for intermediate results locks: Optional[torch.Tensor] = None # Synchronization primitives diff --git a/iris/x/gather.py b/iris/x/gather.py index ca8bd4f9c..51f489a03 100644 --- a/iris/x/gather.py +++ b/iris/x/gather.py @@ -52,7 +52,7 @@ def gather( if source_rank == ctx.rank: # Local load - tile_data = tl.load(src_tile_ptr, mask=mask, other=0.0) + tile_data = tl.load(src_tile_ptr, mask=mask) else: # Remote load using RMA tile_data = iris.load( diff --git a/tests/ops/test_all_gather_matmul.py b/tests/ops/test_all_gather_matmul.py index 193505011..7dceea126 100644 --- a/tests/ops/test_all_gather_matmul.py +++ b/tests/ops/test_all_gather_matmul.py @@ -28,7 +28,14 @@ (256, 64, 128), ], ) -def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N): +@pytest.mark.parametrize( + "variant", + [ + "pull", + "chunked", + ], +) +def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N, variant): """Test all_gather_matmul against torch all_gather + matmul.""" if not dist.is_initialized(): pytest.skip("torch.distributed not initialized") @@ -77,12 +84,20 @@ def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N): # Run fused all_gather + matmul using shmem.ops API from iris.ops.config import FusedConfig + if rank == 0: + print(f"\n[Test] Testing variant={variant}, M={M}, K_local={K_local}, N={N}, dtype={dtype}") + # Use appropriate block sizes based on problem size # For small problems, use smaller blocks if M <= 256 or K_local <= 64 or N <= 128: - config = FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) + config = FusedConfig( + block_size_m=64, + block_size_n=64, + block_size_k=32, + all_gather_matmul_variant=variant, + ) else: - config = FusedConfig() + config = FusedConfig(all_gather_matmul_variant=variant) # Validate config against problem size assert M >= config.block_size_m, f"M ({M}) must be >= block_size_m ({config.block_size_m})" From f132cebf3c4202d56da4e81e973f5811fb33d7c5 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Sat, 7 Feb 2026 20:13:20 +0000 Subject: [PATCH 03/60] Up the tritonBLAS commit. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 18e71badb..025337641 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "numpy", "requests", "ruff", - "tritonblas @ git+https://github.com/ROCm/tritonBLAS.git@df58476a4520b72495a3f03f911368a184126568", + "tritonblas @ git+https://github.com/ROCm/tritonBLAS.git@cd119279f3df543a558aa6d2cd4a3daed0b1ec7a", ] From 1628a6192b72f5120d3ec78665c7f9f5430fd646 Mon Sep 17 00:00:00 2001 From: neoblizz Date: Tue, 10 Feb 2026 00:03:37 +0000 Subject: [PATCH 04/60] ... --- benchmark/ops/all_gather_matmul/benchmark.py | 20 ++++--------- iris/iris.py | 4 +-- iris/ops/all_gather_matmul.py | 31 ++++++++++++++++---- iris/ops/config.py | 6 ++-- iris/ops/workspace.py | 6 ++++ 5 files changed, 42 insertions(+), 25 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py index 20ff0c536..ae0443e6d 100644 --- a/benchmark/ops/all_gather_matmul/benchmark.py +++ b/benchmark/ops/all_gather_matmul/benchmark.py @@ -18,6 +18,7 @@ from examples.common.utils import JSONWriter import iris +from iris.ops.all_gather_matmul import all_gather_matmul_preamble from iris.ops import FusedConfig torch.manual_seed(123) @@ -65,8 +66,8 @@ def parse_args(): "--variant", type=str, default="pull", - choices=["pull", "chunked"], - help="All-gather matmul variant (pull or chunked)", + choices=["pull", "chunked", "push", "pipelined_pull"], + help="All-gather matmul variant", ) parser.add_argument( "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" @@ -181,20 +182,11 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): }, } - workspace = None + # Pre-allocate workspace once (important for push variant which needs large buffers) + workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) def run_experiment(): - nonlocal kernel_timing, workspace - - # Preamble if available - if hasattr(shmem.ops, "all_gather_matmul_preamble"): - workspace = shmem.ops.all_gather_matmul_preamble( - C, - A_sharded, - B, - config=config, - workspace=workspace, - ) + nonlocal kernel_timing shmem.barrier() diff --git a/iris/iris.py b/iris/iris.py index 9b8a3d35a..21aaddd8a 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1796,8 +1796,8 @@ def __translate(ptr, from_rank, to_rank, heap_bases): # Vectorization hints: must be <= minimum block size used by any caller. # (32, 32) is safe since all supported block sizes are multiples of 32. # Largest vectorized load instruction is dwordx4 (128-bits = 8 x fp16). - translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) - translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) + # translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) + # translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) return translated_ptr diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index 5d700206c..0dad98aee 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -17,6 +17,7 @@ import iris.x from tritonblas.kernels.stages import GemmContext, ScheduleContext +from tritonblas.kernels.stages.indexing.pid_transforms import chiplet_transform_chunked from .config import FusedConfig from .workspace import FusedWorkspace @@ -164,7 +165,7 @@ def all_gather_matmul_preamble( B: torch.Tensor, config: Optional[FusedConfig] = None, ) -> FusedWorkspace: - """Allocate workspace for all_gather_matmul (none needed for pull pattern).""" + """Allocate workspace for all_gather_matmul.""" if config is None: config = FusedConfig() @@ -175,14 +176,27 @@ def all_gather_matmul_preamble( expected_K = world_size * K_local assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" - return FusedWorkspace( + ws = FusedWorkspace( operation="all_gather_matmul", shape=(M, N, K), dtype=A_sharded.dtype, world_size=world_size, + variant=config.all_gather_matmul_variant, prepared=True, ) + # Allocate push variant workspace + if config.all_gather_matmul_variant == "push": + num_m_tiles = (M + config.block_size_m - 1) // config.block_size_m + num_k_tiles = (K_local + config.block_size_k - 1) // config.block_size_k + ws.a_inbox = shmem.zeros((world_size, M, K_local), dtype=A_sharded.dtype) + ws.signal_flags = shmem.zeros( + (world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32 + ) + shmem.barrier() + + return ws + def all_gather_matmul( shmem, @@ -245,10 +259,15 @@ def all_gather_matmul( even_k = K_local % config.block_size_k == 0 num_k_blocks_local = (K_local + config.block_size_k - 1) // config.block_size_k - # Launch single fused kernel - grid = (num_sms,) - _fused_all_gather_matmul_kernel[grid]( - A_sharded, + variant = config.all_gather_matmul_variant + + if variant == "pull": + num_tiles_m = (M + config.block_size_m - 1) // config.block_size_m + num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n + num_tiles = num_tiles_m * num_tiles_n + # grid = (num_tiles,) + grid = (num_sms,) + _fused_all_gather_matmul_kernel[grid](A_sharded, B, output_tensor, bias_ptr, diff --git a/iris/ops/config.py b/iris/ops/config.py index 77c0b5ab9..a92925035 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -54,9 +54,9 @@ class FusedConfig: block_size_m: int = 128 block_size_n: int = 256 block_size_k: int = 64 - group_size_m: int = 4 + group_size_m: int = 1 num_sms: Optional[int] = None # Auto-detect if None - num_xcds: int = 1 + num_xcds: int = 8 chunk_size: int = 1 cache_modifier_a: str = ".ca" cache_modifier_b: str = ".ca" @@ -109,7 +109,7 @@ def validate(self, world_size: Optional[int] = None): raise ValueError(f"all_reduce_num_rings must be positive, got {self.all_reduce_num_rings}") # Validate all_gather_matmul_variant - valid_ag_variants = ["pull", "chunked"] + valid_ag_variants = ["pull"] if self.all_gather_matmul_variant not in valid_ag_variants: raise ValueError( f"all_gather_matmul_variant must be one of {valid_ag_variants}, got {self.all_gather_matmul_variant}" diff --git a/iris/ops/workspace.py b/iris/ops/workspace.py index 9328e9f9e..e519f0823 100644 --- a/iris/ops/workspace.py +++ b/iris/ops/workspace.py @@ -46,6 +46,10 @@ class FusedWorkspace: aux_buffer: Optional[torch.Tensor] = None # Generic buffer for intermediate results locks: Optional[torch.Tensor] = None # Synchronization primitives + # Push variant workspace + a_inbox: Optional[torch.Tensor] = None # (world_size, M, K_local) inbox buffer + signal_flags: Optional[torch.Tensor] = None # (world_size, world_size, m_tiles, k_tiles) + prepared: bool = False def matches( @@ -86,4 +90,6 @@ def clear(self): """Free all allocated buffers.""" self.aux_buffer = None self.locks = None + self.a_inbox = None + self.signal_flags = None self.prepared = False From c26e87275043e996c9dca78e44c60fc34d6d2eac Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 10 Feb 2026 00:04:25 +0000 Subject: [PATCH 05/60] Apply Ruff auto-fixes --- iris/ops/all_gather_matmul.py | 64 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index 0dad98aee..6000f50ef 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -17,7 +17,6 @@ import iris.x from tritonblas.kernels.stages import GemmContext, ScheduleContext -from tritonblas.kernels.stages.indexing.pid_transforms import chiplet_transform_chunked from .config import FusedConfig from .workspace import FusedWorkspace @@ -190,9 +189,7 @@ def all_gather_matmul_preamble( num_m_tiles = (M + config.block_size_m - 1) // config.block_size_m num_k_tiles = (K_local + config.block_size_k - 1) // config.block_size_k ws.a_inbox = shmem.zeros((world_size, M, K_local), dtype=A_sharded.dtype) - ws.signal_flags = shmem.zeros( - (world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32 - ) + ws.signal_flags = shmem.zeros((world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32) shmem.barrier() return ws @@ -267,35 +264,36 @@ def all_gather_matmul( num_tiles = num_tiles_m * num_tiles_n # grid = (num_tiles,) grid = (num_sms,) - _fused_all_gather_matmul_kernel[grid](A_sharded, - B, - output_tensor, - bias_ptr, - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, - stride_bias, - shmem.get_device_context(), - rank, - world_size, - config.block_size_m, - config.block_size_n, - config.block_size_k, - config.group_size_m, - num_sms, - config.num_xcds, - num_k_blocks_local, - use_bias, - even_k, - config.allow_tf32, - ) + _fused_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.get_device_context(), + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + config.num_xcds, + num_k_blocks_local, + use_bias, + even_k, + config.allow_tf32, + ) if not async_op: shmem.barrier() From 3d4c7d7fc3129cfaf3125247bc9496c1a04bcaa8 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Wed, 11 Feb 2026 12:01:43 -0500 Subject: [PATCH 06/60] Fix load vectorization and transpose config --- benchmark/ops/all_gather_matmul/benchmark.py | 50 +- .../all_gather_matmul/benchmark_torchrun.py | 487 ++++++++++++++++++ .../ops/all_gather_matmul/profile_att.sh | 344 +++++++++++++ benchmark/ops/all_gather_matmul/test.sh | 16 + iris/iris.py | 56 +- iris/ops/all_gather_matmul.py | 1 + iris/x/core.py | 5 +- iris/x/gather.py | 29 +- 8 files changed, 965 insertions(+), 23 deletions(-) create mode 100755 benchmark/ops/all_gather_matmul/benchmark_torchrun.py create mode 100755 benchmark/ops/all_gather_matmul/profile_att.sh create mode 100755 benchmark/ops/all_gather_matmul/test.sh diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py index ae0443e6d..b9d40118d 100644 --- a/benchmark/ops/all_gather_matmul/benchmark.py +++ b/benchmark/ops/all_gather_matmul/benchmark.py @@ -72,6 +72,16 @@ def parse_args(): parser.add_argument( "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" ) + parser.add_argument( + "--b_col_major", + action="store_true", + help="Store B matrix in column-major order (K-contiguous) to reduce LDS transpose overhead", + ) + parser.add_argument( + "--a_col_major", + action="store_true", + help="Store A matrix in column-major order (M-contiguous). Default is row-major (K-contiguous).", + ) return vars(parser.parse_args()) @@ -142,11 +152,45 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): # Create input and output tensors # A_sharded is M x K_local, B is K x N, output is M x N - A_sharded = shmem.zeros((M, K_local), dtype=datatype) - B = shmem.zeros((K, N), dtype=datatype) C = shmem.zeros((M, N), dtype=datatype) expected_tensor = None + # Create A_sharded matrix with optional column-major layout + # When a_col_major=True, M becomes the contiguous dimension + # Default (row-major): K is contiguous (stride_ak=1, stride_am=K_local) + if args["a_col_major"]: + # Allocate storage as (K_local, M) row-major, then transpose to get (M, K_local) with M-contiguous + # This means stride_am=1 and stride_ak=M + A_storage = shmem.zeros((K_local, M), dtype=datatype) + A_sharded = A_storage.T # View as (M, K_local) with M-contiguous strides + shmem.info(f"Using column-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (M-contiguous)") + else: + # Standard row-major (M, K_local) - K is contiguous + A_sharded = shmem.zeros((M, K_local), dtype=datatype) + shmem.info(f"Using row-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (K-contiguous)") + + json_writer.add_field("a_col_major", args["a_col_major"]) + json_writer.add_field("a_stride_m", A_sharded.stride()[0]) + json_writer.add_field("a_stride_k", A_sharded.stride()[1]) + + # Create B matrix with optional column-major layout for K-contiguous access + # When b_col_major=True, we store B such that K is the contiguous dimension + # This reduces LDS transpose overhead when loading B tiles along the K dimension + if args["b_col_major"]: + # Allocate storage as (N, K) row-major, then transpose to get (K, N) with K-contiguous + # This means stride_bk=1 and stride_bn=K + B_storage = shmem.zeros((N, K), dtype=datatype) + B = B_storage.T # View as (K, N) with K-contiguous strides + shmem.info(f"Using column-major B: shape={B.shape}, strides={B.stride()} (K-contiguous)") + else: + # Standard row-major (K, N) - N is contiguous + B = shmem.zeros((K, N), dtype=datatype) + shmem.info(f"Using row-major B: shape={B.shape}, strides={B.stride()} (N-contiguous)") + + json_writer.add_field("b_col_major", args["b_col_major"]) + json_writer.add_field("b_stride_k", B.stride()[0]) + json_writer.add_field("b_stride_n", B.stride()[1]) + # Fill inputs with deterministic values # Each rank has different A_sharded, same B torch.manual_seed(123 + rank) @@ -154,7 +198,9 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): A_sharded.copy_(A_sharded_data) torch.manual_seed(456) # Same B for all ranks + # Generate B data in standard (K, N) layout for consistency B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + # Copy to B (handles both row-major and column-major storage) B.copy_(B_data) # For validation: compute expected result diff --git a/benchmark/ops/all_gather_matmul/benchmark_torchrun.py b/benchmark/ops/all_gather_matmul/benchmark_torchrun.py new file mode 100755 index 000000000..f4526410c --- /dev/null +++ b/benchmark/ops/all_gather_matmul/benchmark_torchrun.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for iris.ops all_gather_matmul fused operation. + +This benchmark showcases the fused All-Gather + GEMM operation where each rank +has a sharded A matrix that gets gathered, then multiplied with B. + +This version is compatible with torchrun for use with profiling tools like rocprofv3/att. + +Usage with torchrun: + torchrun --nproc_per_node=8 benchmark_torchrun.py -m 16384 -n 2048 -k 131072 --benchmark + +Usage with rocprofv3: + torchrun --nproc_per_node=8 rocprofv3 --att benchmark_torchrun.py -m 16384 -n 2048 -k 131072 --benchmark +""" + +import os +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import random +import argparse + +from examples.common.utils import JSONWriter + +import iris +from iris.ops.all_gather_matmul import all_gather_matmul_preamble +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark all_gather_matmul fused operation.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") + parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") + parser.add_argument("-k", type=int, default=131072, help="Common dimension total (K)") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") + parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") + parser.add_argument( + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Datatype of tensors", + ) + parser.add_argument( + "--output_file", + type=str, + default="all_gather_matmul.json", + help="Output file", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") + parser.add_argument( + "--benchmark_pytorch", + action="store_true", + help="Also benchmark PyTorch (all_gather_into_tensor + matmul) for comparison", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") + parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") + parser.add_argument( + "--variant", + type=str, + default="pull", + choices=["pull", "chunked", "push", "pipelined_pull"], + help="All-gather matmul variant", + ) + parser.add_argument( + "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" + ) + parser.add_argument( + "--single-run", + action="store_true", + help="Run only one iteration (no warmup, 1 repeat) - useful for profiling", + ) + parser.add_argument( + "--b_col_major", + action="store_true", + help="Store B matrix in column-major order (K-contiguous) to reduce LDS transpose overhead", + ) + parser.add_argument( + "--a_col_major", + action="store_true", + help="Store A matrix in column-major order (M-contiguous). Default is row-major (K-contiguous).", + ) + + return vars(parser.parse_args()) + + +def _worker(local_rank: int = None, world_size: int = None, init_url: str = None, args: dict = None): + """Worker function for PyTorch distributed execution.""" + # Support torchrun: read from environment variables if available + if local_rank is None: + local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) + if world_size is None: + world_size = int(os.environ.get("WORLD_SIZE", 1)) + if init_url is None: + # torchrun sets MASTER_ADDR and MASTER_PORT + master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") + master_port = os.environ.get("MASTER_PORT", "29500") + init_url = f"tcp://{master_addr}:{master_port}" + + # Use nccl backend - gloo doesn't support uint64 tensors used by Iris + backend = "nccl" if torch.cuda.is_available() else "gloo" + print(f"Rank {local_rank}: Using backend: {backend}") + + # Use environment-based initialization if torchrun is detected + if "RANK" in os.environ or "LOCAL_RANK" in os.environ: + # For torchrun, use env:// initialization with device_id for nccl + dist.init_process_group( + backend=backend, + init_method="env://", + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, + ) + else: + dist.init_process_group( + backend=backend, + init_method=init_url, + world_size=world_size, + rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + # Datatype mapping + datatype = torch.float32 + if args["datatype"] == "fp16": + datatype = torch.float16 + elif args["datatype"] == "fp32": + datatype = torch.float32 + elif args["datatype"] == "bf16": + datatype = torch.bfloat16 + else: + print("Unknown datatype.") + exit(1) + + M = args["m"] + N = args["n"] + K = args["k"] + K_local = K // world_size # Sharded K dimension + + # Create config with parameters + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + "all_gather_matmul_variant": args["variant"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + + config = FusedConfig(**config_kwargs) + + json_writer = JSONWriter(args["output_file"]) + json_writer.add_field("world_size", world_size) + json_writer.add_field("operation", "all_gather_matmul") + json_writer.add_field("k_local", K_local) + json_writer.add_field("k_total", K) + + for key, value in args.items(): + json_writer.add_field(key, value) + + # Export actual config values to JSON (including defaults) + json_writer.add_field("block_size_m", config.block_size_m) + json_writer.add_field("block_size_n", config.block_size_n) + json_writer.add_field("block_size_k", config.block_size_k) + json_writer.add_field("group_size_m", config.group_size_m) + json_writer.add_field("num_sms", config.num_sms) + json_writer.add_field("num_xcds", config.num_xcds) + + # Create input and output tensors + # A_sharded is M x K_local, B is K x N, output is M x N + C = shmem.zeros((M, N), dtype=datatype) + expected_tensor = None + + # Create A_sharded matrix with optional column-major layout + # When a_col_major=True, M becomes the contiguous dimension + # Default (row-major): K is contiguous (stride_ak=1, stride_am=K_local) + if args["a_col_major"]: + # Allocate storage as (K_local, M) row-major, then transpose to get (M, K_local) with M-contiguous + # This means stride_am=1 and stride_ak=M + A_storage = shmem.zeros((K_local, M), dtype=datatype) + A_sharded = A_storage.T # View as (M, K_local) with M-contiguous strides + shmem.info(f"Using column-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (M-contiguous)") + else: + # Standard row-major (M, K_local) - K is contiguous + A_sharded = shmem.zeros((M, K_local), dtype=datatype) + shmem.info(f"Using row-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (K-contiguous)") + + json_writer.add_field("a_col_major", args["a_col_major"]) + json_writer.add_field("a_stride_m", A_sharded.stride()[0]) + json_writer.add_field("a_stride_k", A_sharded.stride()[1]) + + # Create B matrix with optional column-major layout for K-contiguous access + # When b_col_major=True, we store B such that K is the contiguous dimension + # This reduces LDS transpose overhead when loading B tiles along the K dimension + if args["b_col_major"]: + # Allocate storage as (N, K) row-major, then transpose to get (K, N) with K-contiguous + # This means stride_bk=1 and stride_bn=K + B_storage = shmem.zeros((N, K), dtype=datatype) + B = B_storage.T # View as (K, N) with K-contiguous strides + shmem.info(f"Using column-major B: shape={B.shape}, strides={B.stride()} (K-contiguous)") + else: + # Standard row-major (K, N) - N is contiguous + B = shmem.zeros((K, N), dtype=datatype) + shmem.info(f"Using row-major B: shape={B.shape}, strides={B.stride()} (N-contiguous)") + + json_writer.add_field("b_col_major", args["b_col_major"]) + json_writer.add_field("b_stride_k", B.stride()[0]) + json_writer.add_field("b_stride_n", B.stride()[1]) + + # Fill inputs with deterministic values + # Each rank has different A_sharded, same B + torch.manual_seed(123 + rank) + A_sharded_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") + A_sharded.copy_(A_sharded_data) + + torch.manual_seed(456) # Same B for all ranks + # Generate B data in standard (K, N) layout for consistency + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + # Copy to B (handles both row-major and column-major storage) + B.copy_(B_data) + + # For validation: compute expected result + if args["validate"]: + # Gather all A_sharded matrices and compute expected result + A_sharded_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_sharded_list, A_sharded_data) + + # Concatenate along K dimension: A_gathered = [A_0 | A_1 | ... | A_n] + A_gathered = torch.cat(A_sharded_list, dim=1) # (M, K) + + # Expected: A_gathered @ B + expected_tensor = shmem.zeros((M, N), dtype=datatype) + expected_result = torch.matmul(A_gathered, B_data) + expected_tensor.copy_(expected_result) + + comm_stream = torch.cuda.Stream() + + kernel_timing = { + "all_gather_matmul": { + "start_event": torch.cuda.Event(enable_timing=True), + "end_event": torch.cuda.Event(enable_timing=True), + "ms": 0, + "experiments": 0, + }, + } + + # Pre-allocate workspace once (important for push variant which needs large buffers) + workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) + + def run_experiment(): + nonlocal kernel_timing + + shmem.barrier() + + torch.cuda.nvtx.range_push("All-Gather-Matmul") + with torch.cuda.stream(comm_stream): + kernel_timing["all_gather_matmul"]["start_event"].record() + shmem.ops.all_gather_matmul( + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + ) + kernel_timing["all_gather_matmul"]["end_event"].record() + kernel_timing["all_gather_matmul"]["experiments"] += 1 + torch.cuda.nvtx.range_pop() + + # Synchronize before querying event timing + shmem.barrier() + + # Update timing + ms = kernel_timing["all_gather_matmul"]["start_event"].elapsed_time( + kernel_timing["all_gather_matmul"]["end_event"] + ) + kernel_timing["all_gather_matmul"]["ms"] += ms + + # Synchronize across all GPUs + shmem.barrier() + + if args["validate"]: + shmem.info("Validating...") + + # Reset output before validation + C.zero_() + shmem.barrier() + + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 1e-1 if datatype == torch.float16 else 1e-3 + success = torch.allclose(C, expected_tensor, atol=atol) + if not success: + max_diff = torch.abs(C - expected_tensor).max().item() + shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") + + if success: + shmem.info("All-gather-matmul validation passed!") + else: + shmem.error("All-gather-matmul validation failed!") + + json_writer.add_field("success", success) + + # Wait for all to finish validation + shmem.barrier() + + if args["benchmark"]: + # Determine warmup and repeat counts + if args.get("single_run", False): + n_warmup = 0 + n_repeat = 1 + shmem.info("Single-run mode: no warmup, 1 repeat") + else: + n_warmup = 25 + n_repeat = 100 # default from iris.do_bench + + # Warmup for benchmarking (skip if single-run) + if not args.get("single_run", False): + for k in ["all_gather_matmul"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=1) + + for k in ["all_gather_matmul"]: + kernel_timing[k]["ms"] = 0 + kernel_timing[k]["experiments"] = 0 + + # Reset output before benchmarking + C.zero_() + shmem.barrier() + + shmem.info("Benchmarking...") + + # Calculate TFLOPS: 2*M*N*K flops + total_flops = 2 * M * N * K + total_tflops_unit = total_flops * 1e-12 + + triton_ms = iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=n_repeat) + tflops = total_tflops_unit / ( + (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 + ) + + # Calculate bandwidth for all-gather part + # All-gather moves (world_size - 1) * M * K_local * element_size bytes + element_size = torch.tensor([], dtype=datatype).element_size() + input_bytes = M * K_local * element_size + total_bytes = input_bytes * (world_size - 1) + total_bytes_gb = total_bytes / (1024**3) + + bandwidth_gbps = total_bytes_gb / ( + (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 + ) + + shmem.info( + f"All-gather-matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " + f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" + ) + + json_writer.add_field("tflops", tflops) + json_writer.add_field("bandwidth_gbps", bandwidth_gbps) + json_writer.add_field("total_ms", triton_ms) + json_writer.add_field("total_flops", total_flops) + json_writer.add_field("total_bytes", total_bytes) + json_writer.add_field("total_bytes_gb", total_bytes_gb) + json_writer.add_field( + "all_gather_matmul_ms", + kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"], + ) + json_writer.add_field("all_gather_matmul_experiments", kernel_timing["all_gather_matmul"]["experiments"]) + + # Wait for all to finish benchmarking + shmem.barrier() + + # Benchmark PyTorch (all_gather_into_tensor + matmul) for comparison + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") + + # Create PyTorch tensors (not on Iris heap) + pytorch_A_sharded = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") + pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pytorch_A_gathered = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") + pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") + + # Warmup + for _ in range(10): + dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) + pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) + torch.cuda.synchronize() + dist.barrier() + + # Benchmark + dist.barrier() + + # Calculate TFLOPS: 2*M*N*K flops + total_flops = 2 * M * N * K + total_tflops_unit = total_flops * 1e-12 + + # Calculate bandwidth for all-gather part + element_size = torch.tensor([], dtype=datatype).element_size() + input_bytes = M * K_local * element_size + total_bytes = input_bytes * (world_size - 1) + total_bytes_gb = total_bytes / (1024**3) + + def run_pytorch_experiment(): + dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) + pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) + + pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) + + # Calculate TFLOPS and bandwidth + pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) + pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) + + shmem.info( + f"PyTorch all_gather_into_tensor+matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " + f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" + ) + + if args["benchmark"]: + # Calculate performance ratio + iris_tflops = tflops + speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 + shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") + + json_writer.add_field("pytorch_tflops", pytorch_tflops) + json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) + json_writer.add_field("pytorch_ms", pytorch_ms) + json_writer.add_field("iris_speedup", speedup) + + # Wait for all to finish PyTorch benchmarking + shmem.barrier() + + if rank == 0: + json_writer.flush() + json_writer.display() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + print("Starting all_gather_matmul benchmark...") + args = parse_args() + + # Check if running with torchrun (detected by environment variables) + if "RANK" in os.environ or "LOCAL_RANK" in os.environ: + # torchrun handles process spawning, so call _worker directly + print("Detected torchrun execution mode") + _worker(args=args) + else: + # Use multiprocessing spawn for backward compatibility + num_ranks = args["num_ranks"] + init_url = args["init_url"] + mp.spawn( + fn=_worker, + args=(num_ranks, init_url, args), + nprocs=num_ranks, + join=True, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmark/ops/all_gather_matmul/profile_att.sh b/benchmark/ops/all_gather_matmul/profile_att.sh new file mode 100755 index 000000000..21f6f21fe --- /dev/null +++ b/benchmark/ops/all_gather_matmul/profile_att.sh @@ -0,0 +1,344 @@ +#!/bin/bash +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +# ATT (Advanced Thread Trace) Profiling Script for all_gather_matmul benchmark +# Uses rocprofv3 with thread trace to profile the benchmark at ISA instruction level. +# +# Usage: +# ./profile_att.sh [OPTIONS] +# +# Options: +# -r, --ranks NUM_RANKS Number of ranks/GPUs (default: 8) +# -m, --m-dim M M dimension (default: 2048) +# -n, --n-dim N N dimension (default: 16384) +# -k, --k-dim K K dimension (default: 131072) +# -v, --variant VARIANT Variant: pull, chunked, push, pipelined_pull (default: pull) +# --block-m SIZE Block size for M dimension (default: 256) +# --block-n SIZE Block size for N dimension (default: 256) +# --block-k SIZE Block size for K dimension (default: 64) +# --group-m SIZE Group size for M dimension tiling (default: 1) +# --num-xcds NUM Number of XCDs (default: 8) +# --validate Enable validation mode +# --benchmark-pytorch Also benchmark PyTorch for comparison +# -o, --output-dir DIR Base output directory (default: ./att_profiles) +# --att-target-cu CU Target CU for thread trace (default: 1) +# --att-buffer-size SIZE Trace buffer size in hex (default: 0x6000000 = 96MB) +# --att-activity LEVEL Perfcounter streaming level 1-16 (default: 8) +# --kernel-regex REGEX Kernel name regex filter (optional) +# --single-run Run only one iteration (no warmup, no repeat) +# --k-contiguous Use K-contiguous layout for both A and B matrices +# (default A is row-major/K-contiguous, adds --b_col_major) +# --a-col-major Store A matrix in column-major order (M-contiguous) +# --b-col-major Store B matrix in column-major order (K-contiguous) +# -h, --help Show this help message + +set -e + +# Default values +NUM_RANKS=8 +M_DIM=2048 +N_DIM=16384 +K_DIM=131072 +VARIANT="pull" +BASE_OUTPUT_DIR="./att_profiles" +ATT_TARGET_CU=1 +ATT_BUFFER_SIZE="0x6000000" # 96MB +ATT_ACTIVITY=8 +KERNEL_REGEX="" +SINGLE_RUN=true +K_CONTIGUOUS=true # Default to K-contiguous layout for both matrices +A_COL_MAJOR=false +B_COL_MAJOR=false +BLOCK_M=256 +BLOCK_N=256 +BLOCK_K=64 +GROUP_M=1 +NUM_XCDS=8 +VALIDATE=true +BENCHMARK_PYTORCH=true + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BENCHMARK_SCRIPT="${SCRIPT_DIR}/benchmark_torchrun.py" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + -r|--ranks) + NUM_RANKS="$2" + shift 2 + ;; + -m|--m-dim) + M_DIM="$2" + shift 2 + ;; + -n|--n-dim) + N_DIM="$2" + shift 2 + ;; + -k|--k-dim) + K_DIM="$2" + shift 2 + ;; + -v|--variant) + VARIANT="$2" + shift 2 + ;; + -o|--output-dir) + BASE_OUTPUT_DIR="$2" + shift 2 + ;; + --att-target-cu) + ATT_TARGET_CU="$2" + shift 2 + ;; + --att-buffer-size) + ATT_BUFFER_SIZE="$2" + shift 2 + ;; + --att-activity) + ATT_ACTIVITY="$2" + shift 2 + ;; + --kernel-regex) + KERNEL_REGEX="$2" + shift 2 + ;; + --single-run) + SINGLE_RUN=true + shift + ;; + --k-contiguous) + K_CONTIGUOUS=true + shift + ;; + --a-col-major) + A_COL_MAJOR=true + shift + ;; + --b-col-major) + B_COL_MAJOR=true + shift + ;; + --block-m) + BLOCK_M="$2" + shift 2 + ;; + --block-n) + BLOCK_N="$2" + shift 2 + ;; + --block-k) + BLOCK_K="$2" + shift 2 + ;; + --group-m) + GROUP_M="$2" + shift 2 + ;; + --num-xcds) + NUM_XCDS="$2" + shift 2 + ;; + --validate) + VALIDATE=true + shift + ;; + --no-validate) + VALIDATE=false + shift + ;; + --benchmark-pytorch) + BENCHMARK_PYTORCH=true + shift + ;; + --no-benchmark-pytorch) + BENCHMARK_PYTORCH=false + shift + ;; + -h|--help) + head -30 "$0" | tail -n +2 | sed 's/^# //' | sed 's/^#//' + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Generate timestamp for output directory +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +OUTPUT_DIR="${BASE_OUTPUT_DIR}/att_${VARIANT}_m${M_DIM}_n${N_DIM}_k${K_DIM}_${TIMESTAMP}" + +# Create output directory +mkdir -p "${OUTPUT_DIR}" + +# Log file with timestamp +LOG_FILE="${OUTPUT_DIR}/profile_${TIMESTAMP}.log" + +echo "==============================================" | tee "${LOG_FILE}" +echo "ATT Profiling for all_gather_matmul benchmark" | tee -a "${LOG_FILE}" +echo "==============================================" | tee -a "${LOG_FILE}" +echo "Timestamp: $(date)" | tee -a "${LOG_FILE}" +echo "Output directory: ${OUTPUT_DIR}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" +echo "Configuration:" | tee -a "${LOG_FILE}" +echo " NUM_RANKS: ${NUM_RANKS}" | tee -a "${LOG_FILE}" +echo " M: ${M_DIM}" | tee -a "${LOG_FILE}" +echo " N: ${N_DIM}" | tee -a "${LOG_FILE}" +echo " K: ${K_DIM}" | tee -a "${LOG_FILE}" +echo " Variant: ${VARIANT}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" +echo "ATT Parameters:" | tee -a "${LOG_FILE}" +echo " att-target-cu: ${ATT_TARGET_CU}" | tee -a "${LOG_FILE}" +echo " att-buffer-size: ${ATT_BUFFER_SIZE}" | tee -a "${LOG_FILE}" +echo " att-activity: ${ATT_ACTIVITY}" | tee -a "${LOG_FILE}" +if [[ -n "${KERNEL_REGEX}" ]]; then + echo " kernel-include-regex: ${KERNEL_REGEX}" | tee -a "${LOG_FILE}" +fi +echo " single-run: ${SINGLE_RUN}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" +echo "Matrix Layout:" | tee -a "${LOG_FILE}" +echo " k-contiguous: ${K_CONTIGUOUS}" | tee -a "${LOG_FILE}" +echo " a-col-major: ${A_COL_MAJOR}" | tee -a "${LOG_FILE}" +echo " b-col-major: ${B_COL_MAJOR}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" +echo "Block Sizes:" | tee -a "${LOG_FILE}" +echo " block-m: ${BLOCK_M}" | tee -a "${LOG_FILE}" +echo " block-n: ${BLOCK_N}" | tee -a "${LOG_FILE}" +echo " block-k: ${BLOCK_K}" | tee -a "${LOG_FILE}" +echo " group-m: ${GROUP_M}" | tee -a "${LOG_FILE}" +echo " num-xcds: ${NUM_XCDS}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" +echo "Benchmark Options:" | tee -a "${LOG_FILE}" +echo " validate: ${VALIDATE}" | tee -a "${LOG_FILE}" +echo " benchmark-pytorch: ${BENCHMARK_PYTORCH}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" + +# Build rocprofv3 ATT options +ROCPROF_OPTS="--att" +ROCPROF_OPTS="${ROCPROF_OPTS} --att-target-cu ${ATT_TARGET_CU}" +ROCPROF_OPTS="${ROCPROF_OPTS} --att-buffer-size ${ATT_BUFFER_SIZE}" +ROCPROF_OPTS="${ROCPROF_OPTS} --att-activity ${ATT_ACTIVITY}" + +if [[ -n "${KERNEL_REGEX}" ]]; then + ROCPROF_OPTS="${ROCPROF_OPTS} --kernel-include-regex \"${KERNEL_REGEX}\"" +fi + +# Build benchmark args +BENCH_ARGS="-m ${M_DIM} -n ${N_DIM} -k ${K_DIM} --variant ${VARIANT} --benchmark -r ${NUM_RANKS}" +BENCH_ARGS="${BENCH_ARGS} --block_size_m ${BLOCK_M} --block_size_n ${BLOCK_N} --block_size_k ${BLOCK_K}" +BENCH_ARGS="${BENCH_ARGS} --group_size_m ${GROUP_M} --num_xcds ${NUM_XCDS}" + +if [[ "${SINGLE_RUN}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} --single-run" +fi + +if [[ "${VALIDATE}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} -v" +fi + +if [[ "${BENCHMARK_PYTORCH}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} --benchmark_pytorch" +fi + +# Add K-contiguous layout options +# --k-contiguous: Both A and B become K-contiguous +# - A is already K-contiguous in default row-major layout +# - B needs --b_col_major to become K-contiguous +if [[ "${K_CONTIGUOUS}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} --b_col_major" +fi + +# Individual matrix layout overrides +if [[ "${A_COL_MAJOR}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} --a_col_major" +fi +if [[ "${B_COL_MAJOR}" == "true" ]]; then + BENCH_ARGS="${BENCH_ARGS} --b_col_major" +fi + +# Full command +# rocprofv3 wraps the entire torchrun command, not the other way around +# HSA_NO_SCRATCH_RECLAIM=1 prevents scratch memory reclaim issues +FULL_CMD="HSA_NO_SCRATCH_RECLAIM=1 rocprofv3 ${ROCPROF_OPTS} -d ${OUTPUT_DIR} -- torchrun --nproc_per_node=${NUM_RANKS} ${BENCHMARK_SCRIPT} ${BENCH_ARGS}" + +echo "Command:" | tee -a "${LOG_FILE}" +echo "${FULL_CMD}" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" + +# Save configuration to JSON for reference +cat > "${OUTPUT_DIR}/config.json" << EOF +{ + "timestamp": "${TIMESTAMP}", + "num_ranks": ${NUM_RANKS}, + "m_dim": ${M_DIM}, + "n_dim": ${N_DIM}, + "k_dim": ${K_DIM}, + "variant": "${VARIANT}", + "att_target_cu": ${ATT_TARGET_CU}, + "att_buffer_size": "${ATT_BUFFER_SIZE}", + "att_activity": ${ATT_ACTIVITY}, + "kernel_regex": "${KERNEL_REGEX}", + "single_run": ${SINGLE_RUN}, + "k_contiguous": ${K_CONTIGUOUS}, + "a_col_major": ${A_COL_MAJOR}, + "b_col_major": ${B_COL_MAJOR}, + "block_m": ${BLOCK_M}, + "block_n": ${BLOCK_N}, + "block_k": ${BLOCK_K}, + "group_m": ${GROUP_M}, + "num_xcds": ${NUM_XCDS}, + "validate": ${VALIDATE}, + "benchmark_pytorch": ${BENCHMARK_PYTORCH}, + "command": "${FULL_CMD}" +} +EOF + +echo "Starting profiling..." | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" + +# Run the profiling command +START_TIME=$(date +%s) + +# Execute the command and capture output +eval "${FULL_CMD}" 2>&1 | tee -a "${LOG_FILE}" +EXIT_CODE=${PIPESTATUS[0]} + +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) + +echo "" | tee -a "${LOG_FILE}" +echo "==============================================" | tee -a "${LOG_FILE}" +echo "Profiling completed" | tee -a "${LOG_FILE}" +echo "Exit code: ${EXIT_CODE}" | tee -a "${LOG_FILE}" +echo "Duration: ${DURATION} seconds" | tee -a "${LOG_FILE}" +echo "End time: $(date)" | tee -a "${LOG_FILE}" +echo "==============================================" | tee -a "${LOG_FILE}" +echo "" | tee -a "${LOG_FILE}" + +# List output files +echo "Output files:" | tee -a "${LOG_FILE}" +ls -la "${OUTPUT_DIR}" 2>&1 | tee -a "${LOG_FILE}" + +# Check for stats CSV files +if ls "${OUTPUT_DIR}"/stats_*.csv 1> /dev/null 2>&1; then + echo "" | tee -a "${LOG_FILE}" + echo "Stats CSV files found:" | tee -a "${LOG_FILE}" + ls -la "${OUTPUT_DIR}"/stats_*.csv 2>&1 | tee -a "${LOG_FILE}" +fi + +# Check for ui_output directories (ROCprof Compute Viewer compatible) +if ls -d "${OUTPUT_DIR}"/ui_output_* 1> /dev/null 2>&1; then + echo "" | tee -a "${LOG_FILE}" + echo "UI output directories (for ROCprof Compute Viewer):" | tee -a "${LOG_FILE}" + ls -d "${OUTPUT_DIR}"/ui_output_* 2>&1 | tee -a "${LOG_FILE}" +fi + +echo "" | tee -a "${LOG_FILE}" +echo "Profile output saved to: ${OUTPUT_DIR}" | tee -a "${LOG_FILE}" +echo "Log file: ${LOG_FILE}" | tee -a "${LOG_FILE}" + +exit ${EXIT_CODE} diff --git a/benchmark/ops/all_gather_matmul/test.sh b/benchmark/ops/all_gather_matmul/test.sh new file mode 100755 index 000000000..7d5ef1a98 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/test.sh @@ -0,0 +1,16 @@ +HSA_NO_SCRATCH_RECLAIM=1 \ +python3 $(pwd)/benchmark.py \ + -m 2048 \ + -n 16384 \ + -k 131072 \ + --num_ranks 8 \ + --num_xcds 8 \ + --datatype fp16 \ + --block_size_m 512 \ + --block_size_n 128 \ + --block_size_k 64 \ + --group_size_m 1 \ + --benchmark \ + --b_col_major \ + -v \ + --benchmark_pytorch \ No newline at end of file diff --git a/iris/iris.py b/iris/iris.py index 21aaddd8a..50063a55e 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1780,6 +1780,10 @@ def reduce_scatter(self, output_tensor, input_tensor, op=None, group=None, async @triton.jit def __translate(ptr, from_rank, to_rank, heap_bases): + """ + Basic pointer translation without vectorization hints. + Used for atomic operations which may receive scalar pointers. + """ from_base = tl.load(heap_bases + from_rank) to_base = tl.load(heap_bases + to_rank) # convert to int to compute difference @@ -1793,11 +1797,30 @@ def __translate(ptr, from_rank, to_rank, heap_bases): # Cast to_base back to pointer type translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) - # Vectorization hints: must be <= minimum block size used by any caller. - # (32, 32) is safe since all supported block sizes are multiples of 32. - # Largest vectorized load instruction is dwordx4 (128-bits = 8 x fp16). - # translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) - # translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) + return translated_ptr + + + +@triton.jit +def __translate_block_2d(ptr, from_rank, to_rank, heap_bases): + """ + Pointer translation for block load/store operations. + + Note: Vectorization hints should be applied in the tile_ptr computation (core.py) + where the 2D block shape is actually created, not here in the translation. + """ + from_base = tl.load(heap_bases + from_rank) + to_base = tl.load(heap_bases + to_rank) + # convert to int to compute difference + ptr_int = tl.cast(ptr, tl.uint64) + # Find the offset from from_rank heap + offset = ptr_int - from_base + # Byte cast for byte offset addition + to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) + # Find the offset into the to_rank heap + translated_ptr_byte = to_base_byte + offset + # Cast to_base back to pointer type + translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) return translated_ptr @@ -1976,9 +1999,16 @@ def initialize(context_tensor, rank, world_size, tracing: tl.constexpr = False): @triton.jit def _translate(self, ptr, from_rank, to_rank): - """Internal pointer translation between rank address spaces.""" + """Internal pointer translation between rank address spaces. + Used for atomic operations which may receive scalar pointers.""" return __translate(ptr, from_rank, to_rank, self.heap_bases) + @triton.jit + def _translate_block_2d(self, ptr, from_rank, to_rank): + """Internal pointer translation with 2D vectorization hints. + Used for block load/store operations with 2D block pointers.""" + return __translate_block_2d(ptr, from_rank, to_rank, self.heap_bases) + @triton.jit def load(self, pointer, from_rank, mask=None): """ @@ -2000,7 +2030,7 @@ def load(self, pointer, from_rank, mask=None): Example: >>> data = ctx.load(buffer + offsets, from_rank=1, mask=mask) """ - translated_ptr = self._translate(pointer, self.rank, from_rank) + translated_ptr = self._translate_block_2d(pointer, self.rank, from_rank) result = tl.load(translated_ptr, mask=mask) return result @@ -2026,7 +2056,7 @@ def store(self, pointer, value, to_rank, mask=None): Example: >>> ctx.store(buffer + offsets, values, to_rank=1, mask=mask) """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate_block_2d(pointer, self.rank, to_rank) tl.store(translated_ptr, value, mask=mask) @triton.jit @@ -2356,6 +2386,9 @@ def load(pointer, to_rank, from_rank, heap_bases, mask=None): data from the target memory location. If the `from_rank` and `to_rank` are the same, this function performs a local load operation. + This function uses 2D vectorization hints for optimal performance with block pointers. + Minimum block size in each dimension should be >= 16. + Args: pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. to_rank (int): The rank ID to which the pointer will be translated. Must be the current rank where the pointer is local. @@ -2375,7 +2408,7 @@ def load(pointer, to_rank, from_rank, heap_bases, mask=None): >>> data = iris.load(ptr, cur_rank, remote_rank, heap_bases) >>> return data """ - translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases) + translated_ptr = __translate_block_2d(pointer, to_rank, from_rank, heap_bases) result = tl.load(translated_ptr, mask=mask) return result @@ -2390,6 +2423,9 @@ def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): the provided data to the target memory location. If the `from_rank` and `to_rank` are the same, this function performs a local store operation. + This function uses 2D vectorization hints for optimal performance with block pointers. + Minimum block size in each dimension should be >= 16. + Args: pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. value (Block): The tensor of elements to be stored. @@ -2410,7 +2446,7 @@ def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): >>> value = 42 >>> iris.store(ptr, value, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate_block_2d(pointer, from_rank, to_rank, heap_bases) tl.store(translated_ptr, value, mask=mask) diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index 0dad98aee..ed4d72b8a 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -295,6 +295,7 @@ def all_gather_matmul( use_bias, even_k, config.allow_tf32, + matrix_instr_nonkdim=16, ) if not async_op: diff --git a/iris/x/core.py b/iris/x/core.py index fee50918e..58786e79e 100644 --- a/iris/x/core.py +++ b/iris/x/core.py @@ -80,7 +80,10 @@ def tile_ptr(ptr, M, N, stride_m, stride_n, pid_m, pid_n, BLOCK_SIZE_M: tl.const rm, rn, mask = tile_layout(pid_m, pid_n, M, N, BLOCK_SIZE_M, BLOCK_SIZE_N) offset = rm[:, None] * stride_m + rn[None, :] * stride_n tile_ptr = ptr + offset - tile_ptr = tl.multiple_of(tile_ptr, (BLOCK_SIZE_M, BLOCK_SIZE_N)) + # NOTE: Vectorization hints are applied at the call site (e.g., gather.py) + # rather than here, because the caller knows the block dimensions. + # Alignment IS preserved through pointer translation since symmetric heaps + # are all page-aligned, so relative offsets within the heap are maintained. return tile_ptr, mask diff --git a/iris/x/gather.py b/iris/x/gather.py index 51f489a03..d94e85a93 100644 --- a/iris/x/gather.py +++ b/iris/x/gather.py @@ -51,16 +51,25 @@ def gather( src_tile_ptr, mask = src_view.tile_ptr(tile) if source_rank == ctx.rank: - # Local load - tile_data = tl.load(src_tile_ptr, mask=mask) + # Local load - can use vectorization hints since alignment is guaranteed + local_ptr = tl.multiple_of(src_tile_ptr, (1, tile.block_n)) + local_ptr = tl.max_contiguous(local_ptr, (1, tile.block_n)) + tile_data = tl.load(local_ptr, mask=mask) else: - # Remote load using RMA - tile_data = iris.load( - src_tile_ptr, - ctx.rank, # to_rank (current rank) - source_rank, # from_rank (source rank) - ctx.heap_bases, - mask=mask, - ) + # Remote load using RMA - inline translation and apply hints AFTER translation + # Hints must be applied to the translated pointer because pointer arithmetic + # (cast to uint64, subtract, add, cast back) destroys hint metadata. + # Alignment IS preserved because symmetric heaps are all page-aligned. + from_base = tl.load(ctx.heap_bases + ctx.rank) + to_base = tl.load(ctx.heap_bases + source_rank) + ptr_int = tl.cast(src_tile_ptr, tl.uint64) + offset = ptr_int - from_base + to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) + translated_ptr_byte = to_base_byte + offset + translated_ptr = tl.cast(translated_ptr_byte, src_tile_ptr.dtype) + # Apply vectorization hints AFTER translation + translated_ptr = tl.multiple_of(translated_ptr, (1, tile.block_n)) + translated_ptr = tl.max_contiguous(translated_ptr, (1, tile.block_n)) + tile_data = tl.load(translated_ptr, mask=mask) return tile_data From 5b022114ca303e354308b499b868f639dd6d8498 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Feb 2026 17:02:36 +0000 Subject: [PATCH 07/60] Apply Ruff auto-fixes --- iris/iris.py | 1 - iris/ops/all_gather_matmul.py | 66 +++++++++++++++++------------------ iris/x/gather.py | 1 - 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/iris/iris.py b/iris/iris.py index 50063a55e..94cd0ae6e 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1800,7 +1800,6 @@ def __translate(ptr, from_rank, to_rank, heap_bases): return translated_ptr - @triton.jit def __translate_block_2d(ptr, from_rank, to_rank, heap_bases): """ diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index ed4d72b8a..e72d0ef68 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -17,7 +17,6 @@ import iris.x from tritonblas.kernels.stages import GemmContext, ScheduleContext -from tritonblas.kernels.stages.indexing.pid_transforms import chiplet_transform_chunked from .config import FusedConfig from .workspace import FusedWorkspace @@ -190,9 +189,7 @@ def all_gather_matmul_preamble( num_m_tiles = (M + config.block_size_m - 1) // config.block_size_m num_k_tiles = (K_local + config.block_size_k - 1) // config.block_size_k ws.a_inbox = shmem.zeros((world_size, M, K_local), dtype=A_sharded.dtype) - ws.signal_flags = shmem.zeros( - (world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32 - ) + ws.signal_flags = shmem.zeros((world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32) shmem.barrier() return ws @@ -267,36 +264,37 @@ def all_gather_matmul( num_tiles = num_tiles_m * num_tiles_n # grid = (num_tiles,) grid = (num_sms,) - _fused_all_gather_matmul_kernel[grid](A_sharded, - B, - output_tensor, - bias_ptr, - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, - stride_bias, - shmem.get_device_context(), - rank, - world_size, - config.block_size_m, - config.block_size_n, - config.block_size_k, - config.group_size_m, - num_sms, - config.num_xcds, - num_k_blocks_local, - use_bias, - even_k, - config.allow_tf32, - matrix_instr_nonkdim=16, - ) + _fused_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.get_device_context(), + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + config.num_xcds, + num_k_blocks_local, + use_bias, + even_k, + config.allow_tf32, + matrix_instr_nonkdim=16, + ) if not async_op: shmem.barrier() diff --git a/iris/x/gather.py b/iris/x/gather.py index d94e85a93..bb3fb637a 100644 --- a/iris/x/gather.py +++ b/iris/x/gather.py @@ -13,7 +13,6 @@ import triton import triton.language as tl -import iris from iris.iris import DeviceContext from .core import Tile, TensorView From 4c3b3f429e7abde4f0c5f37dca787e027841445c Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Wed, 11 Feb 2026 14:17:51 -0500 Subject: [PATCH 08/60] Add HBM buffered version --- .../all_gather_matmul/benchmark_hbm_buffer.py | 334 ++++++++++++++++ iris/ops/all_gather_matmul_hbm_buffer.py | 366 ++++++++++++++++++ 2 files changed, 700 insertions(+) create mode 100644 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py create mode 100644 iris/ops/all_gather_matmul_hbm_buffer.py diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py new file mode 100644 index 000000000..8a2dbae21 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Benchmark for the HBM-buffered all_gather_matmul variant. + +This variant cooperatively gathers A into a local HBM buffer with per-tile +ready flags, then runs GEMM from local memory. No global barriers -- CUs +that finish gathering early start GEMM immediately, spinning on flags for +any tile not yet available. + +Usage with torchrun: + torchrun --nproc_per_node=8 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py \\ + -m 2048 -n 16384 -k 131072 --benchmark + + torchrun --nproc_per_node=8 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py \\ + -m 2048 -n 16384 -k 131072 --benchmark --benchmark_pytorch --b_col_major +""" + +import os +import time +import torch +import torch.distributed as dist +import random +import argparse + +import iris +from iris.ops.all_gather_matmul_hbm_buffer import ( + all_gather_matmul_hbm_buffer, + all_gather_matmul_hbm_buffer_preamble, +) +from iris.ops import FusedConfig + +torch.manual_seed(123) +random.seed(123) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark HBM-buffered all_gather_matmul (per-tile flags).", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-m", type=int, default=2048, help="M dimension") + parser.add_argument("-n", type=int, default=16384, help="N dimension") + parser.add_argument("-k", type=int, default=131072, help="K dimension (total)") + parser.add_argument("-v", "--validate", action="store_true", help="Validate correctness") + parser.add_argument("-b", "--benchmark", action="store_true", help="Run benchmark") + parser.add_argument( + "--datatype", type=str, default="fp16", + choices=["fp16", "fp32", "bf16"], help="Tensor datatype", + ) + parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") + parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs (auto if None)") + parser.add_argument( + "--benchmark_pytorch", action="store_true", + help="Also benchmark PyTorch (all_gather_into_tensor + matmul)", + ) + parser.add_argument("--block_size_m", type=int, default=256, help="Block size M") + parser.add_argument("--block_size_n", type=int, default=64, help="Block size N") + parser.add_argument("--block_size_k", type=int, default=64, help="Block size K") + parser.add_argument("--group_size_m", type=int, default=1, help="Group size M") + parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto if None)") + parser.add_argument("--b_col_major", action="store_true", help="B col-major (K-contiguous)") + parser.add_argument("--a_col_major", action="store_true", help="A col-major (M-contiguous)") + parser.add_argument("--single-run", action="store_true", help="1 iteration (for profiling)") + return vars(parser.parse_args()) + + +def _worker(args): + """Worker function for torchrun.""" + local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) + world_size_env = int(os.environ.get("WORLD_SIZE", 1)) + + backend = "nccl" if torch.cuda.is_available() else "gloo" + + if "RANK" in os.environ or "LOCAL_RANK" in os.environ: + dist.init_process_group( + backend=backend, init_method="env://", + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, + ) + else: + dist.init_process_group( + backend=backend, init_method="tcp://127.0.0.1:29530", + world_size=world_size_env, rank=local_rank, + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, + ) + + shmem = iris.iris(args["heap_size"]) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + datatype_map = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16} + datatype = datatype_map.get(args["datatype"], torch.float16) + + M = args["m"] + N = args["n"] + K = args["k"] + K_local = K // world_size + + config_kwargs = { + "block_size_m": args["block_size_m"], + "block_size_n": args["block_size_n"], + "block_size_k": args["block_size_k"], + "group_size_m": args["group_size_m"], + } + if args["comm_sms"] is not None: + config_kwargs["num_sms"] = args["comm_sms"] + if args["num_xcds"] is not None: + config_kwargs["num_xcds"] = args["num_xcds"] + config = FusedConfig(**config_kwargs) + + buffer_mb = M * K * torch.tensor([], dtype=datatype).element_size() / (1024 ** 2) + num_m_tiles = M // config.block_size_m + num_k_blocks = K // config.block_size_k + shmem.info( + f"HBM-Buffer variant: M={M} N={N} K={K} K_local={K_local} " + f"block=({config.block_size_m},{config.block_size_n},{config.block_size_k}) " + f"buffer={buffer_mb:.0f}MB flags={num_m_tiles}x{num_k_blocks}" + ) + + # ── Allocate tensors ───────────────────────────────────────────────── + C = shmem.zeros((M, N), dtype=datatype) + + if args["a_col_major"]: + A_storage = shmem.zeros((K_local, M), dtype=datatype) + A_sharded = A_storage.T + else: + A_sharded = shmem.zeros((M, K_local), dtype=datatype) + + if args["b_col_major"]: + B_storage = shmem.zeros((N, K), dtype=datatype) + B = B_storage.T + else: + B = shmem.zeros((K, N), dtype=datatype) + + shmem.info(f"A strides={A_sharded.stride()}, B strides={B.stride()}") + + # Fill + torch.manual_seed(123 + rank) + A_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") + A_sharded.copy_(A_data) + + torch.manual_seed(456) + B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") + B.copy_(B_data) + + # Expected + expected_tensor = None + if args["validate"]: + A_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_list, A_data) + A_gathered = torch.cat(A_list, dim=1) + expected_tensor = shmem.zeros((M, N), dtype=datatype) + expected_tensor.copy_(torch.matmul(A_gathered, B_data)) + + # Pre-allocate workspace + workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config) + + # ── Timing ─────────────────────────────────────────────────────────── + comm_stream = torch.cuda.Stream() + start_ev = torch.cuda.Event(enable_timing=True) + end_ev = torch.cuda.Event(enable_timing=True) + total_ms = 0.0 + num_experiments = 0 + + def run_experiment(): + nonlocal total_ms, num_experiments + shmem.barrier() + with torch.cuda.stream(comm_stream): + start_ev.record() + all_gather_matmul_hbm_buffer( + shmem, C, A_sharded, B, + config=config, async_op=False, workspace=workspace, + ) + end_ev.record() + num_experiments += 1 + shmem.barrier() + total_ms += start_ev.elapsed_time(end_ev) + + shmem.barrier() + + # ── Validate ───────────────────────────────────────────────────────── + if args["validate"]: + shmem.info("Validating...") + C.zero_() + shmem.barrier() + run_experiment() + torch.cuda.synchronize() + shmem.barrier() + + atol = 1e-1 if datatype == torch.float16 else 1e-3 + success = torch.allclose(C, expected_tensor, atol=atol) + if not success: + max_diff = torch.abs(C - expected_tensor).max().item() + shmem.error(f"Rank {rank}: Validation FAILED, max diff: {max_diff}") + else: + shmem.info("Validation PASSED!") + shmem.barrier() + + # ── Benchmark ──────────────────────────────────────────────────────── + if args["benchmark"]: + if args.get("single_run"): + n_warmup, n_repeat = 0, 1 + else: + n_warmup, n_repeat = 25, 100 + + # Warmup + total_ms = 0.0 + num_experiments = 0 + if n_warmup > 0: + iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=1) + + total_ms = 0.0 + num_experiments = 0 + C.zero_() + shmem.barrier() + + iris.do_bench(run_experiment, shmem.barrier, n_warmup=0, n_repeat=n_repeat) + avg_ms = total_ms / num_experiments if num_experiments > 0 else 0 + + total_flops = 2 * M * N * K + tflops = (total_flops * 1e-12) / (avg_ms * 1e-3) if avg_ms > 0 else 0 + element_size = torch.tensor([], dtype=datatype).element_size() + total_bytes = M * K_local * element_size * (world_size - 1) + bw_gbps = (total_bytes / (1024 ** 3)) / (avg_ms * 1e-3) if avg_ms > 0 else 0 + + shmem.info( + f"HBM-Buffer (M={M}, K_local={K_local}, K={K}, N={N}, " + f"ws={world_size}, dtype={args['datatype']}): " + f"{avg_ms:.3f} ms, {tflops:.3f} TFLOPS, {bw_gbps:.3f} GB/s" + ) + shmem.barrier() + + # ── Per-rank finish time measurement ───────────────────────────── + # Run a single iteration and record wall-clock finish time per rank + # to see if ranks complete at different times (load imbalance). + shmem.barrier() + torch.cuda.synchronize() + dist.barrier() + + # Synchronized start + dist.barrier() + t_start = time.perf_counter() + + all_gather_matmul_hbm_buffer( + shmem, C, A_sharded, B, + config=config, async_op=False, workspace=workspace, + ) + torch.cuda.synchronize() + t_end = time.perf_counter() + + finish_ms = (t_end - t_start) * 1000.0 + + # Gather all finish times to rank 0 for display + finish_tensor = torch.tensor([finish_ms], dtype=torch.float64, device=f"cuda:{rank}") + all_finish = [torch.zeros(1, dtype=torch.float64, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(all_finish, finish_tensor) + + if rank == 0: + times = [t.item() for t in all_finish] + min_t = min(times) + max_t = max(times) + print(f"\n Per-rank finish times (single run):") + print(f" {'Rank':>6} {'Finish ms':>10} {'Delta ms':>10}") + print(f" {'-' * 30}") + for r, t in enumerate(times): + delta = t - min_t + print(f" {r:>6} {t:>10.3f} {delta:>+10.3f}") + print(f" {'-' * 30}") + print(f" Spread (max - min): {max_t - min_t:.3f} ms") + print() + + shmem.barrier() + + # ── PyTorch baseline ───────────────────────────────────────────────── + if args["benchmark_pytorch"]: + shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") + + pt_A = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") + pt_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") + pt_Ag = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") + + for _ in range(10): + dist.all_gather_into_tensor(pt_Ag, pt_A) + _ = torch.matmul(pt_Ag, pt_B) + torch.cuda.synchronize() + dist.barrier() + + def run_pt(): + dist.all_gather_into_tensor(pt_Ag, pt_A) + _ = torch.matmul(pt_Ag, pt_B) + + total_flops = 2 * M * N * K + element_size = torch.tensor([], dtype=datatype).element_size() + total_bytes = M * K_local * element_size * (world_size - 1) + + pt_ms = iris.do_bench(run_pt, dist.barrier) + pt_tflops = (total_flops * 1e-12) / (pt_ms * 1e-3) if pt_ms > 0 else 0 + pt_bw = (total_bytes / (1024 ** 3)) / (pt_ms * 1e-3) if pt_ms > 0 else 0 + + shmem.info( + f"PyTorch (M={M}, K_local={K_local}, K={K}, N={N}, ws={world_size}, " + f"dtype={args['datatype']}): " + f"{pt_ms:.3f} ms, {pt_tflops:.3f} TFLOPS, {pt_bw:.3f} GB/s" + ) + + if args["benchmark"]: + avg_ms = total_ms / num_experiments if num_experiments > 0 else 0 + iris_tflops = (total_flops * 1e-12) / (avg_ms * 1e-3) if avg_ms > 0 else 0 + speedup = iris_tflops / pt_tflops if pt_tflops > 0 else 0 + shmem.info(f"Speedup (HBM-Buffer / PyTorch): {speedup:.2f}x") + + shmem.barrier() + + shmem.barrier() + dist.destroy_process_group() + + +def main(): + print("Starting HBM-buffer all_gather_matmul benchmark...") + args = parse_args() + if "RANK" in os.environ or "LOCAL_RANK" in os.environ: + _worker(args) + else: + print( + "Please run with torchrun:\n" + " torchrun --nproc_per_node=N " + "benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py [OPTIONS]" + ) + + +if __name__ == "__main__": + main() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py new file mode 100644 index 000000000..a0233b6bb --- /dev/null +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -0,0 +1,366 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Fused All-Gather + GEMM using a local HBM staging buffer with per-tile flags. + +Each rank has a column-sharded input A_sharded (M x K_local). +This operation computes C = all_gather(A_sharded) @ B by: + 1. All CUs cooperate to gather A into a local HBM buffer, setting a ready + flag for each (m_tile, k_block) as it lands. + 2. Each CU then runs GEMM from the local buffer. Before consuming a tile, + it checks the ready flag; if not yet set, it spins until the gathering + CU writes it. + +No global barriers are needed. The per-tile flags provide fine-grained +producer-consumer synchronization: a CU that finishes gathering early can +start GEMM immediately, consuming any tile whose flag is already set. +""" + +from typing import Optional +import torch +import triton +import triton.language as tl +import iris +import iris.x + +from .config import FusedConfig +from .workspace import FusedWorkspace + + +# ========================================================================== +# Kernel +# ========================================================================== + + +@triton.jit +def _hbm_buffer_all_gather_matmul_kernel( + A_sharded, + B, + C, + bias_ptr, + staged_a, # Local HBM buffer: (M, K) fp16 + flags_ptr, # int32[NUM_M_TILES * NUM_K_BLOCKS] per-tile ready flags + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + context_tensor: tl.tensor, + cur_rank: tl.constexpr, + world_size: tl.constexpr, + BLOCK_SIZE_M: tl.constexpr, + BLOCK_SIZE_N: tl.constexpr, + BLOCK_SIZE_K: tl.constexpr, + GROUP_SIZE_M: tl.constexpr, + NUM_SMS: tl.constexpr, + NUM_XCDS: tl.constexpr, + NUM_M_TILES: tl.constexpr, + NUM_K_BLOCKS: tl.constexpr, # K // BLOCK_SIZE_K (global) + NUM_K_BLOCKS_LOCAL: tl.constexpr, # K_local // BLOCK_SIZE_K + BIAS: tl.constexpr, + ALLOW_TF32: tl.constexpr, +): + """ + HBM-buffered all-gather + GEMM with per-tile ready flags. + + Each CU executes two phases back-to-back (no global barrier): + + Phase 1 (gather): The CU is assigned a slice of the (m_tile, src_rank, + k_block_local) gather work. For each assigned tile it pulls from remote + via iris.x.gather, writes to staged_a, and atomically sets the ready + flag. Local rank tiles are copied via a fast local load. + + Phase 2 (GEMM): The CU iterates over its assigned output tiles + (pid_m, pid_n). For each K-block in the accumulation loop it checks the + ready flag; if not yet set, it spins until the producing CU posts it. + A tiles are loaded from staged_a (local HBM) and B tiles from B. + """ + pid = tl.program_id(0) + + # XCD-aware PID remapping + if NUM_XCDS != 1: + pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) + + acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 + + # DeviceContext and TensorView for gather + ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) + src_view = iris.x.make_tensor_view(A_sharded, M, K_local, stride_am, stride_ak) + + # ================================================================== + # Phase 1: Cooperative gather into staged_a, set per-tile flags + # ================================================================== + # Total gather work = NUM_M_TILES * world_size * NUM_K_BLOCKS_LOCAL + # Each tile is BLOCK_SIZE_M x BLOCK_SIZE_K elements. + total_gather_tiles = NUM_M_TILES * world_size * NUM_K_BLOCKS_LOCAL + + for gather_idx in range(pid, total_gather_tiles, NUM_SMS): + # Decompose flat index -> (m_tile, src_rank_idx, k_block_local) + m_tile = gather_idx // (world_size * NUM_K_BLOCKS_LOCAL) + remainder = gather_idx % (world_size * NUM_K_BLOCKS_LOCAL) + src_rank_idx = remainder // NUM_K_BLOCKS_LOCAL + k_block_local = remainder % NUM_K_BLOCKS_LOCAL + + # Global k-block index in the full K dimension + k_block_global = src_rank_idx * NUM_K_BLOCKS_LOCAL + k_block_local + + # Gather the tile from the source rank, store to buffer, set flag. + # source_rank must be constexpr for iris.x.gather, so we iterate + # over all ranks at compile time and select at runtime. + # The store and flag-set are inside the branch so that a_tile is + # always defined when used. + zero = tl.program_id(0) * 0 + pid_m_t = zero + m_tile + tile_k_t = zero + k_block_local + k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) + + rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + staged_ptrs = staged_a + rm[:, None] * K + rk[None, :] + flag_idx = m_tile * NUM_K_BLOCKS + k_block_global + + for compile_rank in range(world_size): + if src_rank_idx == compile_rank: + a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) + tl.store(staged_ptrs, a_tile) + tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + + # ================================================================== + # Phase 2: GEMM from staged_a (local) x B, checking flags + # ================================================================== + num_tiles_n = tl.cdiv(N, BLOCK_SIZE_N) + total_gemm_tiles = NUM_M_TILES * num_tiles_n + + for gemm_tile_id in range(pid, total_gemm_tiles, NUM_SMS): + # Tile scheduling with swizzle (GROUP_SIZE_M grouping) + num_pid_in_group = GROUP_SIZE_M * num_tiles_n + group_id = gemm_tile_id // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_sz = min(NUM_M_TILES - first_pid_m, GROUP_SIZE_M) + pid_m = first_pid_m + ((gemm_tile_id % num_pid_in_group) % group_sz) + pid_n = (gemm_tile_id % num_pid_in_group) // group_sz + + # Row / column indices + rm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) + rn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + rn = tl.max_contiguous(tl.multiple_of(rn % N, BLOCK_SIZE_N), BLOCK_SIZE_N) + + # Initialize accumulator + acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) + + # K-reduction loop + for k_block in range(NUM_K_BLOCKS): + # Wait for the (pid_m, k_block) tile to be ready. + # acquire semantics ensure subsequent loads see the stored data. + flag_idx = pid_m * NUM_K_BLOCKS + k_block + while tl.atomic_add(flags_ptr + flag_idx, 0, sem="acquire", scope="gpu") == 0: + pass + + # Load A from staged_a (purely local HBM) + rk = k_block * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + rk = tl.max_contiguous(tl.multiple_of(rk, BLOCK_SIZE_K), BLOCK_SIZE_K) + a_ptrs = staged_a + rm[:, None] * K + rk[None, :] + a = tl.load(a_ptrs) + + # Load B + B_ptrs = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(B_ptrs) + + # Accumulate + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) + + # Add bias if provided + if BIAS: + bias_val = tl.load(bias_ptr + rm * stride_bias, mask=rm < M, other=0.0) + acc = acc + bias_val[:, None] + + # Convert to output dtype and store + c = acc.to(C.type.element_ty) + C_ptrs = C + rm[:, None] * stride_cm + rn[None, :] * stride_cn + mask = (rm[:, None] < M) & (rn[None, :] < N) + tl.store(C_ptrs, c, mask=mask) + + +# ========================================================================== +# Python API +# ========================================================================== + + +def all_gather_matmul_hbm_buffer_preamble( + shmem, + A_sharded: torch.Tensor, + B: torch.Tensor, + config: Optional[FusedConfig] = None, +) -> FusedWorkspace: + """ + Allocate workspace for the HBM-buffered all_gather_matmul. + + Allocates: + - staged_a: (M, K) local HBM buffer for the gathered A matrix. + - flags: int32[num_m_tiles * num_k_blocks] per-tile ready flags. + """ + if config is None: + config = FusedConfig() + + M, K_local = A_sharded.shape + K, N = B.shape + world_size = shmem.get_num_ranks() + + expected_K = world_size * K_local + assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" + assert K_local % config.block_size_k == 0, ( + f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" + ) + assert K % config.block_size_k == 0, ( + f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" + ) + assert M % config.block_size_m == 0, ( + f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" + ) + + num_m_tiles = M // config.block_size_m + num_k_blocks = K // config.block_size_k + + ws = FusedWorkspace( + operation="all_gather_matmul_hbm_buffer", + shape=(M, N, K), + dtype=A_sharded.dtype, + world_size=world_size, + variant="hbm_buffer", + prepared=True, + ) + + # (M, K) staging buffer in local HBM + ws.aux_buffer = shmem.zeros((M, K), dtype=A_sharded.dtype) + # Per-tile ready flags + ws.locks = shmem.zeros((num_m_tiles * num_k_blocks,), dtype=torch.int32) + + buffer_mb = M * K * A_sharded.element_size() / (1024 ** 2) + shmem.info(f"HBM buffer workspace: staged_a=({M},{K}) [{buffer_mb:.1f} MB], " + f"flags=[{num_m_tiles}x{num_k_blocks}={num_m_tiles * num_k_blocks}]") + + shmem.barrier() + return ws + + +def all_gather_matmul_hbm_buffer( + shmem, + output_tensor: torch.Tensor, + A_sharded: torch.Tensor, + B: torch.Tensor, + bias: Optional[torch.Tensor] = None, + async_op: bool = False, + config: Optional[FusedConfig] = None, + workspace: Optional[FusedWorkspace] = None, +) -> FusedWorkspace: + """ + All-gather + matmul using a local HBM staging buffer with per-tile flags. + + Computes C = all_gather(A_sharded) @ B + bias. + + Each CU first gathers its assigned slice of A tiles into the local buffer + (setting per-tile ready flags), then runs GEMM from the buffer, spinning + on flags for any tile not yet available. + """ + if config is None: + config = FusedConfig() + + M, K_local = A_sharded.shape + K, N = B.shape + world_size = shmem.get_num_ranks() + rank = shmem.get_rank() + + expected_K = world_size * K_local + assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" + assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" + assert M % config.block_size_m == 0, ( + f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" + ) + assert K % config.block_size_k == 0, ( + f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" + ) + assert K_local % config.block_size_k == 0, ( + f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" + ) + + if workspace is None: + workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config) + + # Reset flags to 0 before each launch + workspace.locks.zero_() + + stride_am, stride_ak = A_sharded.stride() + stride_bk, stride_bn = B.stride() + stride_cm, stride_cn = output_tensor.stride() + + if bias is not None: + assert bias.shape[0] == M + bias_ptr = bias + stride_bias = bias.stride()[0] if bias.dim() > 0 else 1 + use_bias = True + else: + bias_ptr = output_tensor # dummy, won't be read + stride_bias = 1 + use_bias = False + + device = A_sharded.device + num_sms = config.num_sms + if num_sms is None: + props = torch.cuda.get_device_properties(device) + num_sms = props.multi_processor_count + + num_m_tiles = M // config.block_size_m + num_k_blocks = K // config.block_size_k + num_k_blocks_local = K_local // config.block_size_k + + grid = (num_sms,) + _hbm_buffer_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + workspace.aux_buffer, # staged_a + workspace.locks, # flags + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.get_device_context(), + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + config.num_xcds, + num_m_tiles, + num_k_blocks, + num_k_blocks_local, + use_bias, + config.allow_tf32, + matrix_instr_nonkdim=16, + ) + + if not async_op: + shmem.barrier() + + return workspace From 1f3b9ef87b218e6b405d317fd1194fded4099c20 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Feb 2026 19:18:56 +0000 Subject: [PATCH 09/60] Apply Ruff auto-fixes --- .../all_gather_matmul/benchmark_hbm_buffer.py | 45 +++++++++++++------ iris/ops/all_gather_matmul_hbm_buffer.py | 36 +++++++-------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 8a2dbae21..0529ebb46 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -47,13 +47,17 @@ def parse_args(): parser.add_argument("-v", "--validate", action="store_true", help="Validate correctness") parser.add_argument("-b", "--benchmark", action="store_true", help="Run benchmark") parser.add_argument( - "--datatype", type=str, default="fp16", - choices=["fp16", "fp32", "bf16"], help="Tensor datatype", + "--datatype", + type=str, + default="fp16", + choices=["fp16", "fp32", "bf16"], + help="Tensor datatype", ) parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs (auto if None)") parser.add_argument( - "--benchmark_pytorch", action="store_true", + "--benchmark_pytorch", + action="store_true", help="Also benchmark PyTorch (all_gather_into_tensor + matmul)", ) parser.add_argument("--block_size_m", type=int, default=256, help="Block size M") @@ -76,13 +80,16 @@ def _worker(args): if "RANK" in os.environ or "LOCAL_RANK" in os.environ: dist.init_process_group( - backend=backend, init_method="env://", + backend=backend, + init_method="env://", device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) else: dist.init_process_group( - backend=backend, init_method="tcp://127.0.0.1:29530", - world_size=world_size_env, rank=local_rank, + backend=backend, + init_method="tcp://127.0.0.1:29530", + world_size=world_size_env, + rank=local_rank, device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) @@ -110,7 +117,7 @@ def _worker(args): config_kwargs["num_xcds"] = args["num_xcds"] config = FusedConfig(**config_kwargs) - buffer_mb = M * K * torch.tensor([], dtype=datatype).element_size() / (1024 ** 2) + buffer_mb = M * K * torch.tensor([], dtype=datatype).element_size() / (1024**2) num_m_tiles = M // config.block_size_m num_k_blocks = K // config.block_size_k shmem.info( @@ -170,8 +177,13 @@ def run_experiment(): with torch.cuda.stream(comm_stream): start_ev.record() all_gather_matmul_hbm_buffer( - shmem, C, A_sharded, B, - config=config, async_op=False, workspace=workspace, + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, ) end_ev.record() num_experiments += 1 @@ -223,7 +235,7 @@ def run_experiment(): tflops = (total_flops * 1e-12) / (avg_ms * 1e-3) if avg_ms > 0 else 0 element_size = torch.tensor([], dtype=datatype).element_size() total_bytes = M * K_local * element_size * (world_size - 1) - bw_gbps = (total_bytes / (1024 ** 3)) / (avg_ms * 1e-3) if avg_ms > 0 else 0 + bw_gbps = (total_bytes / (1024**3)) / (avg_ms * 1e-3) if avg_ms > 0 else 0 shmem.info( f"HBM-Buffer (M={M}, K_local={K_local}, K={K}, N={N}, " @@ -244,8 +256,13 @@ def run_experiment(): t_start = time.perf_counter() all_gather_matmul_hbm_buffer( - shmem, C, A_sharded, B, - config=config, async_op=False, workspace=workspace, + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, ) torch.cuda.synchronize() t_end = time.perf_counter() @@ -261,7 +278,7 @@ def run_experiment(): times = [t.item() for t in all_finish] min_t = min(times) max_t = max(times) - print(f"\n Per-rank finish times (single run):") + print("\n Per-rank finish times (single run):") print(f" {'Rank':>6} {'Finish ms':>10} {'Delta ms':>10}") print(f" {'-' * 30}") for r, t in enumerate(times): @@ -297,7 +314,7 @@ def run_pt(): pt_ms = iris.do_bench(run_pt, dist.barrier) pt_tflops = (total_flops * 1e-12) / (pt_ms * 1e-3) if pt_ms > 0 else 0 - pt_bw = (total_bytes / (1024 ** 3)) / (pt_ms * 1e-3) if pt_ms > 0 else 0 + pt_bw = (total_bytes / (1024**3)) / (pt_ms * 1e-3) if pt_ms > 0 else 0 shmem.info( f"PyTorch (M={M}, K_local={K_local}, K={K}, N={N}, ws={world_size}, " diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index a0233b6bb..daeec0e1b 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -39,8 +39,8 @@ def _hbm_buffer_all_gather_matmul_kernel( B, C, bias_ptr, - staged_a, # Local HBM buffer: (M, K) fp16 - flags_ptr, # int32[NUM_M_TILES * NUM_K_BLOCKS] per-tile ready flags + staged_a, # Local HBM buffer: (M, K) fp16 + flags_ptr, # int32[NUM_M_TILES * NUM_K_BLOCKS] per-tile ready flags M, N, K, @@ -62,8 +62,8 @@ def _hbm_buffer_all_gather_matmul_kernel( NUM_SMS: tl.constexpr, NUM_XCDS: tl.constexpr, NUM_M_TILES: tl.constexpr, - NUM_K_BLOCKS: tl.constexpr, # K // BLOCK_SIZE_K (global) - NUM_K_BLOCKS_LOCAL: tl.constexpr, # K_local // BLOCK_SIZE_K + NUM_K_BLOCKS: tl.constexpr, # K // BLOCK_SIZE_K (global) + NUM_K_BLOCKS_LOCAL: tl.constexpr, # K_local // BLOCK_SIZE_K BIAS: tl.constexpr, ALLOW_TF32: tl.constexpr, ): @@ -222,12 +222,8 @@ def all_gather_matmul_hbm_buffer_preamble( assert K_local % config.block_size_k == 0, ( f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" ) - assert K % config.block_size_k == 0, ( - f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" - ) - assert M % config.block_size_m == 0, ( - f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" - ) + assert K % config.block_size_k == 0, f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" + assert M % config.block_size_m == 0, f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" num_m_tiles = M // config.block_size_m num_k_blocks = K // config.block_size_k @@ -246,9 +242,11 @@ def all_gather_matmul_hbm_buffer_preamble( # Per-tile ready flags ws.locks = shmem.zeros((num_m_tiles * num_k_blocks,), dtype=torch.int32) - buffer_mb = M * K * A_sharded.element_size() / (1024 ** 2) - shmem.info(f"HBM buffer workspace: staged_a=({M},{K}) [{buffer_mb:.1f} MB], " - f"flags=[{num_m_tiles}x{num_k_blocks}={num_m_tiles * num_k_blocks}]") + buffer_mb = M * K * A_sharded.element_size() / (1024**2) + shmem.info( + f"HBM buffer workspace: staged_a=({M},{K}) [{buffer_mb:.1f} MB], " + f"flags=[{num_m_tiles}x{num_k_blocks}={num_m_tiles * num_k_blocks}]" + ) shmem.barrier() return ws @@ -284,12 +282,8 @@ def all_gather_matmul_hbm_buffer( expected_K = world_size * K_local assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" - assert M % config.block_size_m == 0, ( - f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" - ) - assert K % config.block_size_k == 0, ( - f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" - ) + assert M % config.block_size_m == 0, f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" + assert K % config.block_size_k == 0, f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" assert K_local % config.block_size_k == 0, ( f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" ) @@ -330,8 +324,8 @@ def all_gather_matmul_hbm_buffer( B, output_tensor, bias_ptr, - workspace.aux_buffer, # staged_a - workspace.locks, # flags + workspace.aux_buffer, # staged_a + workspace.locks, # flags M, N, K, From 45288ff39a32924339707f48180b9c27c0ec1bef Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Thu, 12 Feb 2026 21:16:06 -0500 Subject: [PATCH 10/60] Use workgroup specialized variant --- iris/ops/all_gather_matmul_hbm_buffer.py | 340 +++++++++++------------ 1 file changed, 164 insertions(+), 176 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index daeec0e1b..936a9a9a4 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -2,19 +2,11 @@ # Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. """ -Fused All-Gather + GEMM using a local HBM staging buffer with per-tile flags. - -Each rank has a column-sharded input A_sharded (M x K_local). -This operation computes C = all_gather(A_sharded) @ B by: - 1. All CUs cooperate to gather A into a local HBM buffer, setting a ready - flag for each (m_tile, k_block) as it lands. - 2. Each CU then runs GEMM from the local buffer. Before consuming a tile, - it checks the ready flag; if not yet set, it spins until the gathering - CU writes it. - -No global barriers are needed. The per-tile flags provide fine-grained -producer-consumer synchronization: a CU that finishes gathering early can -start GEMM immediately, consuming any tile whose flag is already set. +Fused All-Gather + GEMM using a local HBM staging buffer with dedicated +fetcher and GEMM workgroups, launched data-parallel. + +Supports configurable staged_a buffer layout (M-contiguous or K-contiguous) +and B layout to match optimal tritonblas conventions (TN, TT, NT, NN). """ from typing import Optional @@ -28,19 +20,14 @@ from .workspace import FusedWorkspace -# ========================================================================== -# Kernel -# ========================================================================== - - @triton.jit def _hbm_buffer_all_gather_matmul_kernel( A_sharded, B, C, bias_ptr, - staged_a, # Local HBM buffer: (M, K) fp16 - flags_ptr, # int32[NUM_M_TILES * NUM_K_BLOCKS] per-tile ready flags + staged_a, + flags_ptr, M, N, K, @@ -51,6 +38,8 @@ def _hbm_buffer_all_gather_matmul_kernel( stride_bn, stride_cm, stride_cn, + stride_sa_m, # staged_a stride in M dim + stride_sa_k, # staged_a stride in K dim stride_bias, context_tensor: tl.tensor, cur_rank: tl.constexpr, @@ -59,137 +48,116 @@ def _hbm_buffer_all_gather_matmul_kernel( BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, GROUP_SIZE_M: tl.constexpr, - NUM_SMS: tl.constexpr, - NUM_XCDS: tl.constexpr, + NUM_FETCH_SMS: tl.constexpr, NUM_M_TILES: tl.constexpr, - NUM_K_BLOCKS: tl.constexpr, # K // BLOCK_SIZE_K (global) - NUM_K_BLOCKS_LOCAL: tl.constexpr, # K_local // BLOCK_SIZE_K + NUM_TILES_N: tl.constexpr, + NUM_K_BLOCKS: tl.constexpr, + NUM_K_BLOCKS_LOCAL: tl.constexpr, + K_PER_FLAG: tl.constexpr, + NUM_FLAG_GROUPS_K: tl.constexpr, + TOTAL_GATHER_TILES: tl.constexpr, BIAS: tl.constexpr, ALLOW_TF32: tl.constexpr, ): - """ - HBM-buffered all-gather + GEMM with per-tile ready flags. + pid = tl.program_id(0) + acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 + zero = tl.program_id(0) * 0 - Each CU executes two phases back-to-back (no global barrier): + if pid < NUM_FETCH_SMS: + # ============================================================== + # FETCHER + # ============================================================== + ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) + src_view = iris.x.make_tensor_view(A_sharded, M, K_local, stride_am, stride_ak) - Phase 1 (gather): The CU is assigned a slice of the (m_tile, src_rank, - k_block_local) gather work. For each assigned tile it pulls from remote - via iris.x.gather, writes to staged_a, and atomically sets the ready - flag. Local rank tiles are copied via a fast local load. + num_m_groups = (NUM_M_TILES + GROUP_SIZE_M - 1) // GROUP_SIZE_M + tiles_per_m_group = NUM_FLAG_GROUPS_K * GROUP_SIZE_M + total_flag_groups = NUM_FLAG_GROUPS_K * NUM_M_TILES - Phase 2 (GEMM): The CU iterates over its assigned output tiles - (pid_m, pid_n). For each K-block in the accumulation loop it checks the - ready flag; if not yet set, it spins until the producing CU posts it. - A tiles are loaded from staged_a (local HBM) and B tiles from B. - """ - pid = tl.program_id(0) + for fg_idx in range(pid, total_flag_groups, NUM_FETCH_SMS): + m_group = fg_idx // tiles_per_m_group + within_group = fg_idx % tiles_per_m_group + k_flag_group = within_group // GROUP_SIZE_M + m_in_group = within_group % GROUP_SIZE_M + m_tile = m_group * GROUP_SIZE_M + m_in_group + m_tile = min(m_tile, NUM_M_TILES - 1) + k_block_start = k_flag_group * K_PER_FLAG - # XCD-aware PID remapping - if NUM_XCDS != 1: - pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) + rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) - acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 + for k_off in range(K_PER_FLAG): + k_block_global = k_block_start + k_off + + src_rank_idx = k_block_global // NUM_K_BLOCKS_LOCAL + k_block_local = k_block_global % NUM_K_BLOCKS_LOCAL + + pid_m_t = zero + m_tile + tile_k_t = zero + k_block_local + k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) + + rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + # Use parameterized strides for staged_a + staged_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k + + for compile_rank in range(world_size): + if src_rank_idx == compile_rank: + a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) + tl.store(staged_ptrs, a_tile) - # DeviceContext and TensorView for gather - ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) - src_view = iris.x.make_tensor_view(A_sharded, M, K_local, stride_am, stride_ak) - - # ================================================================== - # Phase 1: Cooperative gather into staged_a, set per-tile flags - # ================================================================== - # Total gather work = NUM_M_TILES * world_size * NUM_K_BLOCKS_LOCAL - # Each tile is BLOCK_SIZE_M x BLOCK_SIZE_K elements. - total_gather_tiles = NUM_M_TILES * world_size * NUM_K_BLOCKS_LOCAL - - for gather_idx in range(pid, total_gather_tiles, NUM_SMS): - # Decompose flat index -> (m_tile, src_rank_idx, k_block_local) - m_tile = gather_idx // (world_size * NUM_K_BLOCKS_LOCAL) - remainder = gather_idx % (world_size * NUM_K_BLOCKS_LOCAL) - src_rank_idx = remainder // NUM_K_BLOCKS_LOCAL - k_block_local = remainder % NUM_K_BLOCKS_LOCAL - - # Global k-block index in the full K dimension - k_block_global = src_rank_idx * NUM_K_BLOCKS_LOCAL + k_block_local - - # Gather the tile from the source rank, store to buffer, set flag. - # source_rank must be constexpr for iris.x.gather, so we iterate - # over all ranks at compile time and select at runtime. - # The store and flag-set are inside the branch so that a_tile is - # always defined when used. - zero = tl.program_id(0) * 0 - pid_m_t = zero + m_tile - tile_k_t = zero + k_block_local - k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) - - rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) - rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) - staged_ptrs = staged_a + rm[:, None] * K + rk[None, :] - flag_idx = m_tile * NUM_K_BLOCKS + k_block_global - - for compile_rank in range(world_size): - if src_rank_idx == compile_rank: - a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile) - tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") - - # ================================================================== - # Phase 2: GEMM from staged_a (local) x B, checking flags - # ================================================================== - num_tiles_n = tl.cdiv(N, BLOCK_SIZE_N) - total_gemm_tiles = NUM_M_TILES * num_tiles_n - - for gemm_tile_id in range(pid, total_gemm_tiles, NUM_SMS): - # Tile scheduling with swizzle (GROUP_SIZE_M grouping) - num_pid_in_group = GROUP_SIZE_M * num_tiles_n + flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group + tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + + else: + # ============================================================== + # GEMM + # ============================================================== + gemm_tile_id = pid - NUM_FETCH_SMS + + num_pid_in_group = GROUP_SIZE_M * NUM_TILES_N group_id = gemm_tile_id // num_pid_in_group first_pid_m = group_id * GROUP_SIZE_M group_sz = min(NUM_M_TILES - first_pid_m, GROUP_SIZE_M) pid_m = first_pid_m + ((gemm_tile_id % num_pid_in_group) % group_sz) pid_n = (gemm_tile_id % num_pid_in_group) // group_sz - # Row / column indices rm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) rn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) rn = tl.max_contiguous(tl.multiple_of(rn % N, BLOCK_SIZE_N), BLOCK_SIZE_N) - # Initialize accumulator acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) - # K-reduction loop - for k_block in range(NUM_K_BLOCKS): - # Wait for the (pid_m, k_block) tile to be ready. - # acquire semantics ensure subsequent loads see the stored data. - flag_idx = pid_m * NUM_K_BLOCKS + k_block + for k_fg in range(NUM_FLAG_GROUPS_K): + flag_idx = pid_m * NUM_FLAG_GROUPS_K + k_fg while tl.atomic_add(flags_ptr + flag_idx, 0, sem="acquire", scope="gpu") == 0: pass - # Load A from staged_a (purely local HBM) - rk = k_block * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) - rk = tl.max_contiguous(tl.multiple_of(rk, BLOCK_SIZE_K), BLOCK_SIZE_K) - a_ptrs = staged_a + rm[:, None] * K + rk[None, :] - a = tl.load(a_ptrs) + k_block_base = k_fg * K_PER_FLAG + for k_off in range(K_PER_FLAG): + k_block = k_block_base + k_off + rk = k_block * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + rk = tl.max_contiguous(tl.multiple_of(rk, BLOCK_SIZE_K), BLOCK_SIZE_K) + + # Use parameterized strides for staged_a + a_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k + a = tl.load(a_ptrs) - # Load B - B_ptrs = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn - b = tl.load(B_ptrs) + B_ptrs = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn + b = tl.load(B_ptrs) - # Accumulate - if ALLOW_TF32: - acc = tl.dot(a, b, acc, allow_tf32=True) - else: - acc += tl.dot(a, b, allow_tf32=False) + if ALLOW_TF32: + acc = tl.dot(a, b, acc, allow_tf32=True) + else: + acc += tl.dot(a, b, allow_tf32=False) - # Add bias if provided if BIAS: bias_val = tl.load(bias_ptr + rm * stride_bias, mask=rm < M, other=0.0) acc = acc + bias_val[:, None] - # Convert to output dtype and store c = acc.to(C.type.element_ty) C_ptrs = C + rm[:, None] * stride_cm + rn[None, :] * stride_cn - mask = (rm[:, None] < M) & (rn[None, :] < N) - tl.store(C_ptrs, c, mask=mask) + c_mask = (rm[:, None] < M) & (rn[None, :] < N) + tl.store(C_ptrs, c, mask=c_mask) # ========================================================================== @@ -202,13 +170,15 @@ def all_gather_matmul_hbm_buffer_preamble( A_sharded: torch.Tensor, B: torch.Tensor, config: Optional[FusedConfig] = None, + k_per_flag: int = 1, + staged_a_layout: str = "k_contiguous", ) -> FusedWorkspace: """ - Allocate workspace for the HBM-buffered all_gather_matmul. + Allocate workspace. - Allocates: - - staged_a: (M, K) local HBM buffer for the gathered A matrix. - - flags: int32[num_m_tiles * num_k_blocks] per-tile ready flags. + Args: + staged_a_layout: "k_contiguous" (default, row-major (M,K)) or + "m_contiguous" (col-major, stored as (K,M) transposed). """ if config is None: config = FusedConfig() @@ -217,35 +187,41 @@ def all_gather_matmul_hbm_buffer_preamble( K, N = B.shape world_size = shmem.get_num_ranks() - expected_K = world_size * K_local - assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" - assert K_local % config.block_size_k == 0, ( - f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" - ) - assert K % config.block_size_k == 0, f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" - assert M % config.block_size_m == 0, f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" + assert world_size * K_local == K + assert K_local % config.block_size_k == 0 + assert K % config.block_size_k == 0 + assert M % config.block_size_m == 0 num_m_tiles = M // config.block_size_m num_k_blocks = K // config.block_size_k + assert num_k_blocks % k_per_flag == 0 + num_flag_groups_k = num_k_blocks // k_per_flag ws = FusedWorkspace( operation="all_gather_matmul_hbm_buffer", shape=(M, N, K), dtype=A_sharded.dtype, world_size=world_size, - variant="hbm_buffer", + variant=f"hbm_buffer_{staged_a_layout}", prepared=True, ) - # (M, K) staging buffer in local HBM - ws.aux_buffer = shmem.zeros((M, K), dtype=A_sharded.dtype) - # Per-tile ready flags - ws.locks = shmem.zeros((num_m_tiles * num_k_blocks,), dtype=torch.int32) + if staged_a_layout == "m_contiguous": + # Allocate (K, M) row-major, .T gives (M, K) with stride_m=1, stride_k=M + storage = shmem.zeros((K, M), dtype=A_sharded.dtype) + ws.aux_buffer = storage.T # (M, K) view, M-contiguous + else: + # Default: (M, K) row-major, stride_m=K, stride_k=1 + ws.aux_buffer = shmem.zeros((M, K), dtype=A_sharded.dtype) + + ws.locks = shmem.zeros((num_m_tiles * num_flag_groups_k,), dtype=torch.int32) - buffer_mb = M * K * A_sharded.element_size() / (1024**2) + buffer_mb = M * K * A_sharded.element_size() / (1024 ** 2) + sa_stride_m, sa_stride_k = ws.aux_buffer.stride() shmem.info( - f"HBM buffer workspace: staged_a=({M},{K}) [{buffer_mb:.1f} MB], " - f"flags=[{num_m_tiles}x{num_k_blocks}={num_m_tiles * num_k_blocks}]" + f"HBM buffer: staged_a=({M},{K}) [{buffer_mb:.1f} MB] " + f"layout={staged_a_layout} strides=({sa_stride_m},{sa_stride_k}), " + f"flags={num_m_tiles}x{num_flag_groups_k}, k_per_flag={k_per_flag}" ) shmem.barrier() @@ -261,15 +237,19 @@ def all_gather_matmul_hbm_buffer( async_op: bool = False, config: Optional[FusedConfig] = None, workspace: Optional[FusedWorkspace] = None, + num_fetch_sms: Optional[int] = None, + k_per_flag: int = 1, + fetch_block_m: Optional[int] = None, + fetch_block_k: Optional[int] = None, + staged_a_layout: str = "k_contiguous", ) -> FusedWorkspace: """ - All-gather + matmul using a local HBM staging buffer with per-tile flags. + All-gather + matmul with dedicated fetcher/GEMM workgroups. - Computes C = all_gather(A_sharded) @ B + bias. - - Each CU first gathers its assigned slice of A tiles into the local buffer - (setting per-tile ready flags), then runs GEMM from the buffer, spinning - on flags for any tile not yet available. + Args: + staged_a_layout: Buffer layout for gathered A. + "k_contiguous" — (M,K) row-major, K is fast dim. Matches NN convention. + "m_contiguous" — (M,K) with M as fast dim. Matches TN convention (best for tritonblas). """ if config is None: config = FusedConfig() @@ -279,24 +259,31 @@ def all_gather_matmul_hbm_buffer( world_size = shmem.get_num_ranks() rank = shmem.get_rank() - expected_K = world_size * K_local - assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" - assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" - assert M % config.block_size_m == 0, f"M ({M}) must be divisible by block_size_m ({config.block_size_m})" - assert K % config.block_size_k == 0, f"K ({K}) must be divisible by block_size_k ({config.block_size_k})" - assert K_local % config.block_size_k == 0, ( - f"K_local ({K_local}) must be divisible by block_size_k ({config.block_size_k})" - ) + assert world_size * K_local == K + assert output_tensor.shape == (M, N) + assert M % config.block_size_m == 0 + assert K % config.block_size_k == 0 + assert K_local % config.block_size_k == 0 + + if fetch_block_m is None: + fetch_block_m = config.block_size_m + if fetch_block_k is None: + fetch_block_k = config.block_size_k + + num_k_blocks = K // config.block_size_k + assert num_k_blocks % k_per_flag == 0 if workspace is None: - workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config) + workspace = all_gather_matmul_hbm_buffer_preamble( + shmem, A_sharded, B, config, k_per_flag, staged_a_layout + ) - # Reset flags to 0 before each launch workspace.locks.zero_() stride_am, stride_ak = A_sharded.stride() stride_bk, stride_bn = B.stride() stride_cm, stride_cn = output_tensor.stride() + stride_sa_m, stride_sa_k = workspace.aux_buffer.stride() if bias is not None: assert bias.shape[0] == M @@ -304,7 +291,7 @@ def all_gather_matmul_hbm_buffer( stride_bias = bias.stride()[0] if bias.dim() > 0 else 1 use_bias = True else: - bias_ptr = output_tensor # dummy, won't be read + bias_ptr = output_tensor stride_bias = 1 use_bias = False @@ -315,40 +302,41 @@ def all_gather_matmul_hbm_buffer( num_sms = props.multi_processor_count num_m_tiles = M // config.block_size_m - num_k_blocks = K // config.block_size_k + num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n + total_gemm_tiles = num_m_tiles * num_tiles_n num_k_blocks_local = K_local // config.block_size_k - - grid = (num_sms,) - _hbm_buffer_all_gather_matmul_kernel[grid]( - A_sharded, - B, - output_tensor, - bias_ptr, - workspace.aux_buffer, # staged_a - workspace.locks, # flags - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, + num_flag_groups_k = num_k_blocks // k_per_flag + total_gather_tiles = num_m_tiles * num_k_blocks + + if num_fetch_sms is None: + num_fetch_sms = max(1, num_sms // 10) + assert 0 < num_fetch_sms + + grid_size = num_fetch_sms + total_gemm_tiles + + _hbm_buffer_all_gather_matmul_kernel[(grid_size,)]( + A_sharded, B, output_tensor, bias_ptr, + workspace.aux_buffer, workspace.locks, + M, N, K, K_local, + stride_am, stride_ak, + stride_bk, stride_bn, + stride_cm, stride_cn, + stride_sa_m, stride_sa_k, stride_bias, shmem.get_device_context(), - rank, - world_size, + rank, world_size, config.block_size_m, config.block_size_n, config.block_size_k, config.group_size_m, - num_sms, - config.num_xcds, + num_fetch_sms, num_m_tiles, + num_tiles_n, num_k_blocks, num_k_blocks_local, + k_per_flag, + num_flag_groups_k, + total_gather_tiles, use_bias, config.allow_tf32, matrix_instr_nonkdim=16, From b2aadcd5dc8f7d28d8833fe3f4ed5834e58740cc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 13 Feb 2026 02:16:39 +0000 Subject: [PATCH 11/60] Apply Ruff auto-fixes --- iris/ops/all_gather_matmul_hbm_buffer.py | 38 +++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 936a9a9a4..ab8e9d4f8 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -38,8 +38,8 @@ def _hbm_buffer_all_gather_matmul_kernel( stride_bn, stride_cm, stride_cn, - stride_sa_m, # staged_a stride in M dim - stride_sa_k, # staged_a stride in K dim + stride_sa_m, # staged_a stride in M dim + stride_sa_k, # staged_a stride in K dim stride_bias, context_tensor: tl.tensor, cur_rank: tl.constexpr, @@ -216,7 +216,7 @@ def all_gather_matmul_hbm_buffer_preamble( ws.locks = shmem.zeros((num_m_tiles * num_flag_groups_k,), dtype=torch.int32) - buffer_mb = M * K * A_sharded.element_size() / (1024 ** 2) + buffer_mb = M * K * A_sharded.element_size() / (1024**2) sa_stride_m, sa_stride_k = ws.aux_buffer.stride() shmem.info( f"HBM buffer: staged_a=({M},{K}) [{buffer_mb:.1f} MB] " @@ -274,9 +274,7 @@ def all_gather_matmul_hbm_buffer( assert num_k_blocks % k_per_flag == 0 if workspace is None: - workspace = all_gather_matmul_hbm_buffer_preamble( - shmem, A_sharded, B, config, k_per_flag, staged_a_layout - ) + workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config, k_per_flag, staged_a_layout) workspace.locks.zero_() @@ -315,16 +313,28 @@ def all_gather_matmul_hbm_buffer( grid_size = num_fetch_sms + total_gemm_tiles _hbm_buffer_all_gather_matmul_kernel[(grid_size,)]( - A_sharded, B, output_tensor, bias_ptr, - workspace.aux_buffer, workspace.locks, - M, N, K, K_local, - stride_am, stride_ak, - stride_bk, stride_bn, - stride_cm, stride_cn, - stride_sa_m, stride_sa_k, + A_sharded, + B, + output_tensor, + bias_ptr, + workspace.aux_buffer, + workspace.locks, + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_sa_m, + stride_sa_k, stride_bias, shmem.get_device_context(), - rank, world_size, + rank, + world_size, config.block_size_m, config.block_size_n, config.block_size_k, From 7b2321eac0f31f65ec13c3ca259f49d629d8169b Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Mon, 16 Feb 2026 15:56:37 -0600 Subject: [PATCH 12/60] Update hbm buffered all gather matmul --- .../all_gather_matmul/benchmark_hbm_buffer.py | 21 ++++++++++++++++++- iris/ops/all_gather_matmul_hbm_buffer.py | 15 ++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 0529ebb46..1a6ca502a 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -68,6 +68,10 @@ def parse_args(): parser.add_argument("--b_col_major", action="store_true", help="B col-major (K-contiguous)") parser.add_argument("--a_col_major", action="store_true", help="A col-major (M-contiguous)") parser.add_argument("--single-run", action="store_true", help="1 iteration (for profiling)") + parser.add_argument("--num_fetch_sms", type=int, default=None, help="Fetcher SMs (auto if None)") + parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") + parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") + parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") return vars(parser.parse_args()) @@ -162,7 +166,10 @@ def _worker(args): expected_tensor.copy_(torch.matmul(A_gathered, B_data)) # Pre-allocate workspace - workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config) + k_per_flag = args["k_per_flag"] + workspace = all_gather_matmul_hbm_buffer_preamble( + shmem, A_sharded, B, config, k_per_flag=k_per_flag + ) # ── Timing ─────────────────────────────────────────────────────────── comm_stream = torch.cuda.Stream() @@ -171,6 +178,10 @@ def _worker(args): total_ms = 0.0 num_experiments = 0 + num_fetch_sms = args["num_fetch_sms"] + num_warps = args["num_warps"] + num_stages = args["num_stages"] + def run_experiment(): nonlocal total_ms, num_experiments shmem.barrier() @@ -184,6 +195,10 @@ def run_experiment(): config=config, async_op=False, workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, ) end_ev.record() num_experiments += 1 @@ -263,6 +278,10 @@ def run_experiment(): config=config, async_op=False, workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, ) torch.cuda.synchronize() t_end = time.perf_counter() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 936a9a9a4..8ab69704b 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -102,10 +102,11 @@ def _hbm_buffer_all_gather_matmul_kernel( for compile_rank in range(world_size): if src_rank_idx == compile_rank: a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile) + tl.store(staged_ptrs, a_tile,cache_modifier=".wt") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + #tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + tl.store(flags_ptr + flag_idx, 1) else: # ============================================================== @@ -242,6 +243,8 @@ def all_gather_matmul_hbm_buffer( fetch_block_m: Optional[int] = None, fetch_block_k: Optional[int] = None, staged_a_layout: str = "k_contiguous", + num_warps: Optional[int] = None, + num_stages: Optional[int] = None, ) -> FusedWorkspace: """ All-gather + matmul with dedicated fetcher/GEMM workgroups. @@ -314,6 +317,12 @@ def all_gather_matmul_hbm_buffer( grid_size = num_fetch_sms + total_gemm_tiles + launch_kwargs = {"matrix_instr_nonkdim": 16} + if num_warps is not None: + launch_kwargs["num_warps"] = num_warps + if num_stages is not None: + launch_kwargs["num_stages"] = num_stages + _hbm_buffer_all_gather_matmul_kernel[(grid_size,)]( A_sharded, B, output_tensor, bias_ptr, workspace.aux_buffer, workspace.locks, @@ -339,7 +348,7 @@ def all_gather_matmul_hbm_buffer( total_gather_tiles, use_bias, config.allow_tf32, - matrix_instr_nonkdim=16, + **launch_kwargs, ) if not async_op: From 9692222bfb74930ef9fb50028c3554b3181c35ee Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 16 Feb 2026 21:57:42 +0000 Subject: [PATCH 13/60] Apply Ruff auto-fixes --- benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py | 4 +--- iris/ops/all_gather_matmul_hbm_buffer.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 1a6ca502a..aa8221d60 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -167,9 +167,7 @@ def _worker(args): # Pre-allocate workspace k_per_flag = args["k_per_flag"] - workspace = all_gather_matmul_hbm_buffer_preamble( - shmem, A_sharded, B, config, k_per_flag=k_per_flag - ) + workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config, k_per_flag=k_per_flag) # ── Timing ─────────────────────────────────────────────────────────── comm_stream = torch.cuda.Stream() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 329516bc0..4f8de5044 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -102,10 +102,10 @@ def _hbm_buffer_all_gather_matmul_kernel( for compile_rank in range(world_size): if src_rank_idx == compile_rank: a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile,cache_modifier=".wt") + tl.store(staged_ptrs, a_tile, cache_modifier=".wt") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - #tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + # tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") tl.store(flags_ptr + flag_idx, 1) else: From 44ebc976f983e0e0573fbbdca17ad0a6c8b78231 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Mon, 16 Feb 2026 17:59:20 -0600 Subject: [PATCH 14/60] Add tracing --- .../all_gather_matmul/benchmark_hbm_buffer.py | 192 ++++++++++++++++++ iris/ops/all_gather_matmul_hbm_buffer.py | 66 +++++- 2 files changed, 254 insertions(+), 4 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 1a6ca502a..dc74c7fa3 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -24,6 +24,7 @@ import torch.distributed as dist import random import argparse +import numpy as np import iris from iris.ops.all_gather_matmul_hbm_buffer import ( @@ -35,6 +36,135 @@ torch.manual_seed(123) random.seed(123) +TICKS_PER_US = 100 # s_memrealtime runs at 100 MHz: 1 tick = 10 ns = 0.01 us + + +def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): + """Generate a tall Gantt chart showing per-workgroup activity over time. + + Y-axis: workgroup (sorted by start time) + X-axis: time in microseconds + Colors: fetcher (blue), GEMM wait (red), GEMM compute (green) + """ + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + from matplotlib.patches import Rectangle + from matplotlib.lines import Line2D + + starts = trace_data["start"].numpy().astype(np.int64) + ends = trace_data["end"].numpy().astype(np.int64) + waits = trace_data["wait"].numpy().astype(np.int64) + xcds = trace_data["xcd"].numpy().astype(np.int32) + grid_size = trace_data["grid_size"] + n_fetch = trace_data["num_fetch_sms"] + + # Convert to microseconds relative to earliest start + t_min = starts.min() + starts_us = (starts - t_min) / TICKS_PER_US + ends_us = (ends - t_min) / TICKS_PER_US + waits_us = waits / TICKS_PER_US + + # Build role array: 0=fetcher, 1=GEMM + roles = np.array([0 if i < n_fetch else 1 for i in range(grid_size)]) + + # Sort by start time + order = np.argsort(starts_us) + + # Compute figure height: ~0.012 inches per row, min 12 inches + row_h = 0.012 + fig_h = max(12, grid_size * row_h + 2) + fig, ax = plt.subplots(figsize=(18, fig_h)) + + fetch_color = "#2196F3" # blue + wait_color = "#F44336" # red + compute_color = "#4CAF50" # green + + for y_idx, wg_idx in enumerate(order): + s = starts_us[wg_idx] + e = ends_us[wg_idx] + dur = e - s + role = roles[wg_idx] + + if role == 0: + # Fetcher: solid blue bar + ax.barh(y_idx, dur, left=s, height=0.8, color=fetch_color, + edgecolor="none", linewidth=0) + else: + # GEMM: split into wait (red) and compute (green) + w = waits_us[wg_idx] + c = max(0, dur - w) + # Show wait portion first, then compute + ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, + edgecolor="none", linewidth=0) + ax.barh(y_idx, c, left=s + w, height=0.8, color=compute_color, + edgecolor="none", linewidth=0) + + # XCD annotations on the right margin + xcd_set = sorted(set(xcds.tolist())) + xcd_cmap = {} + if len(xcd_set) > 1: + cmap = matplotlib.colormaps.get_cmap("tab10").resampled(len(xcd_set)) + for i, x in enumerate(xcd_set): + xcd_cmap[x] = cmap(i) + + x_max = ends_us.max() * 1.02 + for y_idx, wg_idx in enumerate(order): + xcd_id = xcds[wg_idx] + if xcd_id in xcd_cmap: + ax.plot(x_max, y_idx, marker="s", markersize=1.5, + color=xcd_cmap[xcd_id], clip_on=False) + + ax.set_xlabel("Time (us)", fontsize=12) + ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) + ax.set_title( + f"Rank {rank} | All-Gather GEMM Trace | " + f"M={M} N={N} K={K} | " + f"{n_fetch} fetchers + {grid_size - n_fetch} GEMM workgroups", + fontsize=13, + ) + ax.set_ylim(-1, grid_size + 1) + ax.set_xlim(0, x_max) + + # Invert y so earliest-starting workgroups are at top + ax.invert_yaxis() + + # Legend + legend_elements = [ + Line2D([0], [0], color=fetch_color, lw=6, label="Fetcher (all-gather)"), + Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data"), + Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute"), + ] + ax.legend(handles=legend_elements, loc="upper right", fontsize=10) + + # Summary stats + fetch_mask = roles == 0 + gemm_mask = roles == 1 + fetch_dur = (ends_us - starts_us)[fetch_mask] + gemm_dur = (ends_us - starts_us)[gemm_mask] + gemm_wait = waits_us[gemm_mask] + gemm_compute = gemm_dur - gemm_wait + + stats_text = ( + f"Fetcher: {fetch_dur.mean():.1f} us avg ({fetch_dur.min():.1f}-{fetch_dur.max():.1f})\n" + f"GEMM total: {gemm_dur.mean():.1f} us avg ({gemm_dur.min():.1f}-{gemm_dur.max():.1f})\n" + f" wait: {gemm_wait.mean():.1f} us avg ({gemm_wait.min():.1f}-{gemm_wait.max():.1f})\n" + f" compute: {gemm_compute.mean():.1f} us avg ({gemm_compute.min():.1f}-{gemm_compute.max():.1f})\n" + f" wait%: {100 * gemm_wait.sum() / gemm_dur.sum():.1f}%\n" + f"Wall time: {ends_us.max():.1f} us" + ) + ax.text( + 0.01, 0.99, stats_text, transform=ax.transAxes, + fontsize=9, verticalalignment="top", fontfamily="monospace", + bbox=dict(boxstyle="round,pad=0.4", facecolor="white", alpha=0.85), + ) + + plt.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + plt.close(fig) + print(f" [Rank {rank}] Trace plot saved to: {output_path}") + print(f" {stats_text}") + def parse_args(): parser = argparse.ArgumentParser( @@ -72,6 +202,8 @@ def parse_args(): parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") + parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") + parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -80,6 +212,8 @@ def _worker(args): local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) world_size_env = int(os.environ.get("WORLD_SIZE", 1)) + t0 = time.perf_counter() + backend = "nccl" if torch.cuda.is_available() else "gloo" if "RANK" in os.environ or "LOCAL_RANK" in os.environ: @@ -97,10 +231,18 @@ def _worker(args): device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) + t1 = time.perf_counter() + shmem = iris.iris(args["heap_size"]) rank = shmem.get_rank() world_size = shmem.get_num_ranks() + t2 = time.perf_counter() + shmem.info( + f"Startup: dist.init={t1 - t0:.1f}s, iris.init={t2 - t1:.1f}s, " + f"total={t2 - t0:.1f}s" + ) + datatype_map = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16} datatype = datatype_map.get(args["datatype"], torch.float16) @@ -309,6 +451,56 @@ def run_experiment(): shmem.barrier() + # ── Trace ──────────────────────────────────────────────────────────── + if args["trace"]: + # Warmup: compile the TRACE=True kernel variant before the real run + shmem.info("Trace warmup (compiling traced kernel variant)...") + C.zero_() + workspace.locks.zero_() + shmem.barrier() + all_gather_matmul_hbm_buffer( + shmem, C, A_sharded, B, + config=config, async_op=False, workspace=workspace, + num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, + num_warps=num_warps, num_stages=num_stages, + trace=True, + ) + torch.cuda.synchronize() + shmem.barrier() + + # Actual traced run (post-compilation, clean state) + shmem.info("Running single traced iteration...") + C.zero_() + workspace.locks.zero_() + shmem.barrier() + + all_gather_matmul_hbm_buffer( + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, + trace=True, + ) + torch.cuda.synchronize() + shmem.barrier() + + if rank == 0 and hasattr(workspace, "trace_data"): + trace_out = args.get("trace_output", "trace_gantt.png") + try: + _plot_trace(workspace.trace_data, trace_out, rank, M, N, K, num_fetch_sms) + except ImportError: + print(" (matplotlib not available -- skipping trace plot)") + except Exception as e: + print(f" (Trace plot failed: {e})") + shmem.barrier() + # ── PyTorch baseline ───────────────────────────────────────────────── if args["benchmark_pytorch"]: shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 329516bc0..c797c939a 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -16,6 +16,7 @@ import iris import iris.x +from iris.device_utils import read_realtime, get_xcc_id from .config import FusedConfig from .workspace import FusedWorkspace @@ -58,11 +59,20 @@ def _hbm_buffer_all_gather_matmul_kernel( TOTAL_GATHER_TILES: tl.constexpr, BIAS: tl.constexpr, ALLOW_TF32: tl.constexpr, + trace_start_ptr, + trace_end_ptr, + trace_wait_ptr, + trace_xcd_ptr, + TRACE: tl.constexpr, ): pid = tl.program_id(0) acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 zero = tl.program_id(0) * 0 + if TRACE: + tl.store(trace_start_ptr + pid, read_realtime()) + tl.store(trace_xcd_ptr + pid, get_xcc_id()) + if pid < NUM_FETCH_SMS: # ============================================================== # FETCHER @@ -102,11 +112,15 @@ def _hbm_buffer_all_gather_matmul_kernel( for compile_rank in range(world_size): if src_rank_idx == compile_rank: a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile,cache_modifier=".wt") + tl.store(staged_ptrs, a_tile,cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - #tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") - tl.store(flags_ptr + flag_idx, 1) + tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + #tl.store(flags_ptr + flag_idx, 1,cache_modifier=".wt") + + if TRACE: + tl.store(trace_wait_ptr + pid, zero.to(tl.int64),cache_modifier=".wt") + tl.store(trace_end_ptr + pid, read_realtime(),cache_modifier=".wt") else: # ============================================================== @@ -128,11 +142,20 @@ def _hbm_buffer_all_gather_matmul_kernel( acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) + if TRACE: + _wt = zero.to(tl.int64) + for k_fg in range(NUM_FLAG_GROUPS_K): + if TRACE: + _ws = read_realtime() + flag_idx = pid_m * NUM_FLAG_GROUPS_K + k_fg while tl.atomic_add(flags_ptr + flag_idx, 0, sem="acquire", scope="gpu") == 0: pass + if TRACE: + _wt = _wt + (read_realtime() - _ws) + k_block_base = k_fg * K_PER_FLAG for k_off in range(K_PER_FLAG): k_block = k_block_base + k_off @@ -158,7 +181,11 @@ def _hbm_buffer_all_gather_matmul_kernel( c = acc.to(C.type.element_ty) C_ptrs = C + rm[:, None] * stride_cm + rn[None, :] * stride_cn c_mask = (rm[:, None] < M) & (rn[None, :] < N) - tl.store(C_ptrs, c, mask=c_mask) + tl.store(C_ptrs, c, mask=c_mask,cache_modifier=".wt") + + if TRACE: + tl.store(trace_wait_ptr + pid, _wt) + tl.store(trace_end_ptr + pid, read_realtime(),cache_modifier=".wt") # ========================================================================== @@ -245,6 +272,7 @@ def all_gather_matmul_hbm_buffer( staged_a_layout: str = "k_contiguous", num_warps: Optional[int] = None, num_stages: Optional[int] = None, + trace: bool = False, ) -> FusedWorkspace: """ All-gather + matmul with dedicated fetcher/GEMM workgroups. @@ -315,6 +343,18 @@ def all_gather_matmul_hbm_buffer( grid_size = num_fetch_sms + total_gemm_tiles + # Trace buffers + if trace: + trace_start = torch.zeros(grid_size, dtype=torch.int64, device=device) + trace_end = torch.zeros(grid_size, dtype=torch.int64, device=device) + trace_wait = torch.zeros(grid_size, dtype=torch.int64, device=device) + trace_xcd = torch.zeros(grid_size, dtype=torch.int32, device=device) + else: + trace_start = torch.empty(1, dtype=torch.int64, device=device) + trace_end = torch.empty(1, dtype=torch.int64, device=device) + trace_wait = torch.empty(1, dtype=torch.int64, device=device) + trace_xcd = torch.empty(1, dtype=torch.int32, device=device) + launch_kwargs = {"matrix_instr_nonkdim": 16} if num_warps is not None: launch_kwargs["num_warps"] = num_warps @@ -358,10 +398,28 @@ def all_gather_matmul_hbm_buffer( total_gather_tiles, use_bias, config.allow_tf32, + trace_start, + trace_end, + trace_wait, + trace_xcd, + trace, **launch_kwargs, ) if not async_op: shmem.barrier() + if trace: + torch.cuda.synchronize() + workspace.trace_data = { + "start": trace_start.cpu(), + "end": trace_end.cpu(), + "wait": trace_wait.cpu(), + "xcd": trace_xcd.cpu(), + "grid_size": grid_size, + "num_fetch_sms": num_fetch_sms, + "num_m_tiles": num_m_tiles, + "num_tiles_n": num_tiles_n, + } + return workspace From 11d017aa8f1bdeefe5da27ba72a85113c3b5784c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 17 Feb 2026 00:00:52 +0000 Subject: [PATCH 15/60] Apply Ruff auto-fixes --- .../all_gather_matmul/benchmark_hbm_buffer.py | 49 ++++++++++--------- iris/ops/all_gather_matmul_hbm_buffer.py | 12 ++--- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 62dfa9acb..3bf2edf92 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -47,9 +47,9 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): Colors: fetcher (blue), GEMM wait (red), GEMM compute (green) """ import matplotlib + matplotlib.use("Agg") import matplotlib.pyplot as plt - from matplotlib.patches import Rectangle from matplotlib.lines import Line2D starts = trace_data["start"].numpy().astype(np.int64) @@ -76,9 +76,9 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): fig_h = max(12, grid_size * row_h + 2) fig, ax = plt.subplots(figsize=(18, fig_h)) - fetch_color = "#2196F3" # blue - wait_color = "#F44336" # red - compute_color = "#4CAF50" # green + fetch_color = "#2196F3" # blue + wait_color = "#F44336" # red + compute_color = "#4CAF50" # green for y_idx, wg_idx in enumerate(order): s = starts_us[wg_idx] @@ -88,17 +88,14 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): if role == 0: # Fetcher: solid blue bar - ax.barh(y_idx, dur, left=s, height=0.8, color=fetch_color, - edgecolor="none", linewidth=0) + ax.barh(y_idx, dur, left=s, height=0.8, color=fetch_color, edgecolor="none", linewidth=0) else: # GEMM: split into wait (red) and compute (green) w = waits_us[wg_idx] c = max(0, dur - w) # Show wait portion first, then compute - ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, - edgecolor="none", linewidth=0) - ax.barh(y_idx, c, left=s + w, height=0.8, color=compute_color, - edgecolor="none", linewidth=0) + ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, edgecolor="none", linewidth=0) + ax.barh(y_idx, c, left=s + w, height=0.8, color=compute_color, edgecolor="none", linewidth=0) # XCD annotations on the right margin xcd_set = sorted(set(xcds.tolist())) @@ -112,8 +109,7 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): for y_idx, wg_idx in enumerate(order): xcd_id = xcds[wg_idx] if xcd_id in xcd_cmap: - ax.plot(x_max, y_idx, marker="s", markersize=1.5, - color=xcd_cmap[xcd_id], clip_on=False) + ax.plot(x_max, y_idx, marker="s", markersize=1.5, color=xcd_cmap[xcd_id], clip_on=False) ax.set_xlabel("Time (us)", fontsize=12) ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) @@ -154,8 +150,13 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): f"Wall time: {ends_us.max():.1f} us" ) ax.text( - 0.01, 0.99, stats_text, transform=ax.transAxes, - fontsize=9, verticalalignment="top", fontfamily="monospace", + 0.01, + 0.99, + stats_text, + transform=ax.transAxes, + fontsize=9, + verticalalignment="top", + fontfamily="monospace", bbox=dict(boxstyle="round,pad=0.4", facecolor="white", alpha=0.85), ) @@ -238,10 +239,7 @@ def _worker(args): world_size = shmem.get_num_ranks() t2 = time.perf_counter() - shmem.info( - f"Startup: dist.init={t1 - t0:.1f}s, iris.init={t2 - t1:.1f}s, " - f"total={t2 - t0:.1f}s" - ) + shmem.info(f"Startup: dist.init={t1 - t0:.1f}s, iris.init={t2 - t1:.1f}s, total={t2 - t0:.1f}s") datatype_map = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16} datatype = datatype_map.get(args["datatype"], torch.float16) @@ -457,10 +455,17 @@ def run_experiment(): workspace.locks.zero_() shmem.barrier() all_gather_matmul_hbm_buffer( - shmem, C, A_sharded, B, - config=config, async_op=False, workspace=workspace, - num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, - num_warps=num_warps, num_stages=num_stages, + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, trace=True, ) torch.cuda.synchronize() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index c797c939a..e7f3b11bd 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -112,15 +112,15 @@ def _hbm_buffer_all_gather_matmul_kernel( for compile_rank in range(world_size): if src_rank_idx == compile_rank: a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile,cache_modifier=".cg") + tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") - #tl.store(flags_ptr + flag_idx, 1,cache_modifier=".wt") + # tl.store(flags_ptr + flag_idx, 1,cache_modifier=".wt") if TRACE: - tl.store(trace_wait_ptr + pid, zero.to(tl.int64),cache_modifier=".wt") - tl.store(trace_end_ptr + pid, read_realtime(),cache_modifier=".wt") + tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") + tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") else: # ============================================================== @@ -181,11 +181,11 @@ def _hbm_buffer_all_gather_matmul_kernel( c = acc.to(C.type.element_ty) C_ptrs = C + rm[:, None] * stride_cm + rn[None, :] * stride_cn c_mask = (rm[:, None] < M) & (rn[None, :] < N) - tl.store(C_ptrs, c, mask=c_mask,cache_modifier=".wt") + tl.store(C_ptrs, c, mask=c_mask, cache_modifier=".wt") if TRACE: tl.store(trace_wait_ptr + pid, _wt) - tl.store(trace_end_ptr + pid, read_realtime(),cache_modifier=".wt") + tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") # ========================================================================== From ace40d0df894098dbc64f91ce7ac344dba11b4f8 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Mon, 16 Feb 2026 18:48:50 -0600 Subject: [PATCH 16/60] Add stages to all_gather_matmul_hbm_buffer --- .../all_gather_matmul/benchmark_hbm_buffer.py | 93 ++++++++++----- iris/ops/all_gather_matmul_hbm_buffer.py | 109 +++++++++++------- 2 files changed, 134 insertions(+), 68 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 62dfa9acb..1991111c7 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -44,12 +44,11 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): Y-axis: workgroup (sorted by start time) X-axis: time in microseconds - Colors: fetcher (blue), GEMM wait (red), GEMM compute (green) + Colors: fetcher stages (blue shades), GEMM wait (red), GEMM compute (green) """ import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt - from matplotlib.patches import Rectangle from matplotlib.lines import Line2D starts = trace_data["start"].numpy().astype(np.int64) @@ -57,7 +56,10 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): waits = trace_data["wait"].numpy().astype(np.int64) xcds = trace_data["xcd"].numpy().astype(np.int32) grid_size = trace_data["grid_size"] - n_fetch = trace_data["num_fetch_sms"] + n_fetch_per_stage = trace_data["num_fetch_sms"] + n_stages = trace_data.get("num_fetch_stages", 1) + total_fetch = trace_data.get("total_fetch_wgs", n_fetch_per_stage) + wgs_per_stage = trace_data.get("wgs_per_stage", grid_size) # Convert to microseconds relative to earliest start t_min = starts.min() @@ -65,8 +67,16 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): ends_us = (ends - t_min) / TICKS_PER_US waits_us = waits / TICKS_PER_US - # Build role array: 0=fetcher, 1=GEMM - roles = np.array([0 if i < n_fetch else 1 for i in range(grid_size)]) + # Build role array: stage index for fetchers (0..S-1), S for GEMM + # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] + roles = np.empty(grid_size, dtype=np.int32) + for i in range(grid_size): + stage = i // wgs_per_stage + local = i % wgs_per_stage + if local < n_fetch_per_stage: + roles[i] = stage # fetcher for this stage + else: + roles[i] = n_stages # GEMM # Sort by start time order = np.argsort(starts_us) @@ -76,7 +86,8 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): fig_h = max(12, grid_size * row_h + 2) fig, ax = plt.subplots(figsize=(18, fig_h)) - fetch_color = "#2196F3" # blue + # One color per fetch stage (blue palette), plus GEMM colors + fetch_blues = ["#1565C0", "#42A5F5", "#90CAF9", "#BBDEFB"] wait_color = "#F44336" # red compute_color = "#4CAF50" # green @@ -86,18 +97,18 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): dur = e - s role = roles[wg_idx] - if role == 0: - # Fetcher: solid blue bar - ax.barh(y_idx, dur, left=s, height=0.8, color=fetch_color, + if role < n_stages: + # Fetcher: color by stage + c = fetch_blues[role % len(fetch_blues)] + ax.barh(y_idx, dur, left=s, height=0.8, color=c, edgecolor="none", linewidth=0) else: # GEMM: split into wait (red) and compute (green) w = waits_us[wg_idx] - c = max(0, dur - w) - # Show wait portion first, then compute + comp = max(0, dur - w) ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, edgecolor="none", linewidth=0) - ax.barh(y_idx, c, left=s + w, height=0.8, color=compute_color, + ax.barh(y_idx, comp, left=s + w, height=0.8, color=compute_color, edgecolor="none", linewidth=0) # XCD annotations on the right margin @@ -115,12 +126,15 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): ax.plot(x_max, y_idx, marker="s", markersize=1.5, color=xcd_cmap[xcd_id], clip_on=False) + n_gemm = grid_size - total_fetch + stage_info = (f"{n_stages}x{n_fetch_per_stage}" if n_stages > 1 + else str(n_fetch_per_stage)) ax.set_xlabel("Time (us)", fontsize=12) ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) ax.set_title( f"Rank {rank} | All-Gather GEMM Trace | " f"M={M} N={N} K={K} | " - f"{n_fetch} fetchers + {grid_size - n_fetch} GEMM workgroups", + f"{stage_info} fetchers + {n_gemm} GEMM workgroups", fontsize=13, ) ax.set_ylim(-1, grid_size + 1) @@ -130,29 +144,45 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): ax.invert_yaxis() # Legend - legend_elements = [ - Line2D([0], [0], color=fetch_color, lw=6, label="Fetcher (all-gather)"), - Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data"), - Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute"), - ] + legend_elements = [] + for s_idx in range(min(n_stages, len(fetch_blues))): + legend_elements.append( + Line2D([0], [0], color=fetch_blues[s_idx], lw=6, + label=f"Fetch stage {s_idx}") + ) + legend_elements.append( + Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data")) + legend_elements.append( + Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute")) ax.legend(handles=legend_elements, loc="upper right", fontsize=10) # Summary stats - fetch_mask = roles == 0 - gemm_mask = roles == 1 + fetch_mask = roles < n_stages + gemm_mask = roles == n_stages fetch_dur = (ends_us - starts_us)[fetch_mask] gemm_dur = (ends_us - starts_us)[gemm_mask] gemm_wait = waits_us[gemm_mask] gemm_compute = gemm_dur - gemm_wait - stats_text = ( - f"Fetcher: {fetch_dur.mean():.1f} us avg ({fetch_dur.min():.1f}-{fetch_dur.max():.1f})\n" - f"GEMM total: {gemm_dur.mean():.1f} us avg ({gemm_dur.min():.1f}-{gemm_dur.max():.1f})\n" - f" wait: {gemm_wait.mean():.1f} us avg ({gemm_wait.min():.1f}-{gemm_wait.max():.1f})\n" - f" compute: {gemm_compute.mean():.1f} us avg ({gemm_compute.min():.1f}-{gemm_compute.max():.1f})\n" - f" wait%: {100 * gemm_wait.sum() / gemm_dur.sum():.1f}%\n" - f"Wall time: {ends_us.max():.1f} us" - ) + stats_lines = [] + for s_idx in range(n_stages): + s_mask = roles == s_idx + s_dur = (ends_us - starts_us)[s_mask] + s_start = starts_us[s_mask] + if len(s_dur) > 0: + stats_lines.append( + f"Fetch stg{s_idx}: {s_dur.mean():.1f} us avg " + f"({s_dur.min():.1f}-{s_dur.max():.1f}) " + f"first@{s_start.min():.0f}us" + ) + stats_lines += [ + f"GEMM total: {gemm_dur.mean():.1f} us avg ({gemm_dur.min():.1f}-{gemm_dur.max():.1f})", + f" wait: {gemm_wait.mean():.1f} us avg ({gemm_wait.min():.1f}-{gemm_wait.max():.1f})", + f" compute: {gemm_compute.mean():.1f} us avg ({gemm_compute.min():.1f}-{gemm_compute.max():.1f})", + f" wait%: {100 * gemm_wait.sum() / gemm_dur.sum():.1f}%", + f"Wall time: {ends_us.max():.1f} us", + ] + stats_text = "\n".join(stats_lines) ax.text( 0.01, 0.99, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment="top", fontfamily="monospace", @@ -202,6 +232,7 @@ def parse_args(): parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") + parser.add_argument("--num_fetch_stages", type=int, default=1, help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)") parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -321,6 +352,7 @@ def _worker(args): num_fetch_sms = args["num_fetch_sms"] num_warps = args["num_warps"] num_stages = args["num_stages"] + num_fetch_stages = args["num_fetch_stages"] def run_experiment(): nonlocal total_ms, num_experiments @@ -339,6 +371,7 @@ def run_experiment(): k_per_flag=k_per_flag, num_warps=num_warps, num_stages=num_stages, + num_fetch_stages=num_fetch_stages, ) end_ev.record() num_experiments += 1 @@ -422,6 +455,7 @@ def run_experiment(): k_per_flag=k_per_flag, num_warps=num_warps, num_stages=num_stages, + num_fetch_stages=num_fetch_stages, ) torch.cuda.synchronize() t_end = time.perf_counter() @@ -461,7 +495,7 @@ def run_experiment(): config=config, async_op=False, workspace=workspace, num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, num_warps=num_warps, num_stages=num_stages, - trace=True, + num_fetch_stages=num_fetch_stages, trace=True, ) torch.cuda.synchronize() shmem.barrier() @@ -484,6 +518,7 @@ def run_experiment(): k_per_flag=k_per_flag, num_warps=num_warps, num_stages=num_stages, + num_fetch_stages=num_fetch_stages, trace=True, ) torch.cuda.synchronize() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index c797c939a..e9db8f5e7 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -59,6 +59,8 @@ def _hbm_buffer_all_gather_matmul_kernel( TOTAL_GATHER_TILES: tl.constexpr, BIAS: tl.constexpr, ALLOW_TF32: tl.constexpr, + NUM_FETCH_STAGES: tl.constexpr, + GEMM_TILES_PER_STAGE: tl.constexpr, trace_start_ptr, trace_end_ptr, trace_wait_ptr, @@ -73,67 +75,83 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(trace_start_ptr + pid, read_realtime()) tl.store(trace_xcd_ptr + pid, get_xcc_id()) - if pid < NUM_FETCH_SMS: + # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] + WGS_PER_STAGE: tl.constexpr = NUM_FETCH_SMS + GEMM_TILES_PER_STAGE + M_PER_STAGE: tl.constexpr = (NUM_M_TILES + NUM_FETCH_STAGES - 1) // NUM_FETCH_STAGES + + local_pid = pid % WGS_PER_STAGE + + if local_pid < NUM_FETCH_SMS: # ============================================================== - # FETCHER + # FETCHER — interleaved: stage determined by pid // WGS_PER_STAGE # ============================================================== + my_stage = pid // WGS_PER_STAGE + stage_pid = local_pid + ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) src_view = iris.x.make_tensor_view(A_sharded, M, K_local, stride_am, stride_ak) - num_m_groups = (NUM_M_TILES + GROUP_SIZE_M - 1) // GROUP_SIZE_M tiles_per_m_group = NUM_FLAG_GROUPS_K * GROUP_SIZE_M - total_flag_groups = NUM_FLAG_GROUPS_K * NUM_M_TILES - for fg_idx in range(pid, total_flag_groups, NUM_FETCH_SMS): - m_group = fg_idx // tiles_per_m_group - within_group = fg_idx % tiles_per_m_group - k_flag_group = within_group // GROUP_SIZE_M - m_in_group = within_group % GROUP_SIZE_M - m_tile = m_group * GROUP_SIZE_M + m_in_group - m_tile = min(m_tile, NUM_M_TILES - 1) - k_block_start = k_flag_group * K_PER_FLAG + for const_stage in range(NUM_FETCH_STAGES): + if my_stage == const_stage: + stage_m_start = const_stage * M_PER_STAGE + stage_m_count = min(M_PER_STAGE, NUM_M_TILES - stage_m_start) + total_fg_stage = NUM_FLAG_GROUPS_K * stage_m_count - rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + for fg_idx in range(stage_pid, total_fg_stage, NUM_FETCH_SMS): + m_group = fg_idx // tiles_per_m_group + within_group = fg_idx % tiles_per_m_group + k_flag_group = within_group // GROUP_SIZE_M + m_in_group = within_group % GROUP_SIZE_M + m_tile = stage_m_start + m_group * GROUP_SIZE_M + m_in_group + m_tile = min(m_tile, NUM_M_TILES - 1) + k_block_start = k_flag_group * K_PER_FLAG - for k_off in range(K_PER_FLAG): - k_block_global = k_block_start + k_off + rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) - src_rank_idx = k_block_global // NUM_K_BLOCKS_LOCAL - k_block_local = k_block_global % NUM_K_BLOCKS_LOCAL + for k_off in range(K_PER_FLAG): + k_block_global = k_block_start + k_off - pid_m_t = zero + m_tile - tile_k_t = zero + k_block_local - k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) + src_rank_idx = k_block_global // NUM_K_BLOCKS_LOCAL + k_block_local = k_block_global % NUM_K_BLOCKS_LOCAL - rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) - # Use parameterized strides for staged_a - staged_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k + pid_m_t = zero + m_tile + tile_k_t = zero + k_block_local + k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) + + rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + staged_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k - for compile_rank in range(world_size): - if src_rank_idx == compile_rank: - a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) - tl.store(staged_ptrs, a_tile,cache_modifier=".cg") + for compile_rank in range(world_size): + if src_rank_idx == compile_rank: + a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) + tl.store(staged_ptrs, a_tile, cache_modifier=".cg") - flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") - #tl.store(flags_ptr + flag_idx, 1,cache_modifier=".wt") + flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group + tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") if TRACE: - tl.store(trace_wait_ptr + pid, zero.to(tl.int64),cache_modifier=".wt") - tl.store(trace_end_ptr + pid, read_realtime(),cache_modifier=".wt") + tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") + tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") else: # ============================================================== - # GEMM + # GEMM — interleaved: stage determined by pid // WGS_PER_STAGE + # gemm_local_id indexes into this stage's M-tile range # ============================================================== - gemm_tile_id = pid - NUM_FETCH_SMS + my_stage = pid // WGS_PER_STAGE + gemm_local_id = local_pid - NUM_FETCH_SMS + stage_m_start = my_stage * M_PER_STAGE num_pid_in_group = GROUP_SIZE_M * NUM_TILES_N - group_id = gemm_tile_id // num_pid_in_group - first_pid_m = group_id * GROUP_SIZE_M + group_id = gemm_local_id // num_pid_in_group + first_pid_m = stage_m_start + group_id * GROUP_SIZE_M + first_pid_m = min(first_pid_m, NUM_M_TILES - 1) group_sz = min(NUM_M_TILES - first_pid_m, GROUP_SIZE_M) - pid_m = first_pid_m + ((gemm_tile_id % num_pid_in_group) % group_sz) - pid_n = (gemm_tile_id % num_pid_in_group) // group_sz + pid_m = first_pid_m + ((gemm_local_id % num_pid_in_group) % group_sz) + pid_n = (gemm_local_id % num_pid_in_group) // group_sz + pid_m = min(pid_m, NUM_M_TILES - 1) rm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) @@ -272,6 +290,7 @@ def all_gather_matmul_hbm_buffer( staged_a_layout: str = "k_contiguous", num_warps: Optional[int] = None, num_stages: Optional[int] = None, + num_fetch_stages: int = 1, trace: bool = False, ) -> FusedWorkspace: """ @@ -340,8 +359,14 @@ def all_gather_matmul_hbm_buffer( if num_fetch_sms is None: num_fetch_sms = max(1, num_sms // 10) assert 0 < num_fetch_sms + assert num_fetch_stages >= 1 - grid_size = num_fetch_sms + total_gemm_tiles + # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] + m_per_stage = (num_m_tiles + num_fetch_stages - 1) // num_fetch_stages + gemm_tiles_per_stage = m_per_stage * num_tiles_n + wgs_per_stage = num_fetch_sms + gemm_tiles_per_stage + total_fetch_wgs = num_fetch_sms * num_fetch_stages + grid_size = wgs_per_stage * num_fetch_stages # Trace buffers if trace: @@ -398,6 +423,8 @@ def all_gather_matmul_hbm_buffer( total_gather_tiles, use_bias, config.allow_tf32, + num_fetch_stages, + gemm_tiles_per_stage, trace_start, trace_end, trace_wait, @@ -418,8 +445,12 @@ def all_gather_matmul_hbm_buffer( "xcd": trace_xcd.cpu(), "grid_size": grid_size, "num_fetch_sms": num_fetch_sms, + "num_fetch_stages": num_fetch_stages, + "total_fetch_wgs": total_fetch_wgs, "num_m_tiles": num_m_tiles, "num_tiles_n": num_tiles_n, + "wgs_per_stage": wgs_per_stage, + "gemm_tiles_per_stage": gemm_tiles_per_stage, } return workspace From f7612bd17ee2e4b683e300c02189f5a3d0924a27 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 17 Feb 2026 00:51:39 +0000 Subject: [PATCH 17/60] Apply Ruff auto-fixes --- .../all_gather_matmul/benchmark_hbm_buffer.py | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index b54aadc02..90aacc056 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -89,8 +89,8 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): # One color per fetch stage (blue palette), plus GEMM colors fetch_blues = ["#1565C0", "#42A5F5", "#90CAF9", "#BBDEFB"] - wait_color = "#F44336" # red - compute_color = "#4CAF50" # green + wait_color = "#F44336" # red + compute_color = "#4CAF50" # green for y_idx, wg_idx in enumerate(order): s = starts_us[wg_idx] @@ -101,16 +101,13 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): if role < n_stages: # Fetcher: color by stage c = fetch_blues[role % len(fetch_blues)] - ax.barh(y_idx, dur, left=s, height=0.8, color=c, - edgecolor="none", linewidth=0) + ax.barh(y_idx, dur, left=s, height=0.8, color=c, edgecolor="none", linewidth=0) else: # GEMM: split into wait (red) and compute (green) w = waits_us[wg_idx] comp = max(0, dur - w) - ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, - edgecolor="none", linewidth=0) - ax.barh(y_idx, comp, left=s + w, height=0.8, color=compute_color, - edgecolor="none", linewidth=0) + ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, edgecolor="none", linewidth=0) + ax.barh(y_idx, comp, left=s + w, height=0.8, color=compute_color, edgecolor="none", linewidth=0) # XCD annotations on the right margin xcd_set = sorted(set(xcds.tolist())) @@ -127,8 +124,7 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): ax.plot(x_max, y_idx, marker="s", markersize=1.5, color=xcd_cmap[xcd_id], clip_on=False) n_gemm = grid_size - total_fetch - stage_info = (f"{n_stages}x{n_fetch_per_stage}" if n_stages > 1 - else str(n_fetch_per_stage)) + stage_info = f"{n_stages}x{n_fetch_per_stage}" if n_stages > 1 else str(n_fetch_per_stage) ax.set_xlabel("Time (us)", fontsize=12) ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) ax.set_title( @@ -146,14 +142,9 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): # Legend legend_elements = [] for s_idx in range(min(n_stages, len(fetch_blues))): - legend_elements.append( - Line2D([0], [0], color=fetch_blues[s_idx], lw=6, - label=f"Fetch stage {s_idx}") - ) - legend_elements.append( - Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data")) - legend_elements.append( - Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute")) + legend_elements.append(Line2D([0], [0], color=fetch_blues[s_idx], lw=6, label=f"Fetch stage {s_idx}")) + legend_elements.append(Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data")) + legend_elements.append(Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute")) ax.legend(handles=legend_elements, loc="upper right", fontsize=10) # Summary stats @@ -237,7 +228,12 @@ def parse_args(): parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") - parser.add_argument("--num_fetch_stages", type=int, default=1, help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)") + parser.add_argument( + "--num_fetch_stages", + type=int, + default=1, + help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)", + ) parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -493,11 +489,19 @@ def run_experiment(): workspace.locks.zero_() shmem.barrier() all_gather_matmul_hbm_buffer( - shmem, C, A_sharded, B, - config=config, async_op=False, workspace=workspace, - num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, - num_warps=num_warps, num_stages=num_stages, - num_fetch_stages=num_fetch_stages, trace=True, + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, + num_fetch_stages=num_fetch_stages, + trace=True, ) torch.cuda.synchronize() shmem.barrier() From 51bccb5eeaa9b7ec1f6878accf9fd6d897a3e4c6 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Tue, 17 Feb 2026 15:04:50 -0600 Subject: [PATCH 18/60] Updates to benchmark and kernel --- .../all_gather_matmul/benchmark_hbm_buffer.py | 35 +- .../ops/all_gather_matmul/tune_hbm_buffer.py | 576 ++++++++++++++++++ iris/iris.py | 20 +- iris/ops/all_gather_matmul_hbm_buffer.py | 59 +- 4 files changed, 657 insertions(+), 33 deletions(-) create mode 100644 benchmark/ops/all_gather_matmul/tune_hbm_buffer.py diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index b54aadc02..ba6afaa70 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -60,7 +60,9 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): n_fetch_per_stage = trace_data["num_fetch_sms"] n_stages = trace_data.get("num_fetch_stages", 1) total_fetch = trace_data.get("total_fetch_wgs", n_fetch_per_stage) - wgs_per_stage = trace_data.get("wgs_per_stage", grid_size) + first_stage_fetch = trace_data.get("first_stage_fetch_sms", n_fetch_per_stage) + first_stage_size = trace_data.get("first_stage_size", grid_size) + rest_stage_size = trace_data.get("rest_stage_size", grid_size) # Convert to microseconds relative to earliest start t_min = starts.min() @@ -69,12 +71,19 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): waits_us = waits / TICKS_PER_US # Build role array: stage index for fetchers (0..S-1), S for GEMM - # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] + # Asymmetric layout: [fetch0 (P)] [gemm0] [fetch1 (F)] [gemm1] ... roles = np.empty(grid_size, dtype=np.int32) for i in range(grid_size): - stage = i // wgs_per_stage - local = i % wgs_per_stage - if local < n_fetch_per_stage: + if i < first_stage_size: + stage = 0 + local = i + fetch_thresh = first_stage_fetch + else: + adjusted = i - first_stage_size + stage = 1 + adjusted // rest_stage_size + local = adjusted % rest_stage_size + fetch_thresh = n_fetch_per_stage + if local < fetch_thresh: roles[i] = stage # fetcher for this stage else: roles[i] = n_stages # GEMM @@ -127,8 +136,12 @@ def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): ax.plot(x_max, y_idx, marker="s", markersize=1.5, color=xcd_cmap[xcd_id], clip_on=False) n_gemm = grid_size - total_fetch - stage_info = (f"{n_stages}x{n_fetch_per_stage}" if n_stages > 1 - else str(n_fetch_per_stage)) + if n_stages > 1 and first_stage_fetch != n_fetch_per_stage: + stage_info = f"{first_stage_fetch}+{n_stages - 1}x{n_fetch_per_stage}" + elif n_stages > 1: + stage_info = f"{n_stages}x{n_fetch_per_stage}" + else: + stage_info = str(first_stage_fetch) ax.set_xlabel("Time (us)", fontsize=12) ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) ax.set_title( @@ -238,6 +251,7 @@ def parse_args(): parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") parser.add_argument("--num_fetch_stages", type=int, default=1, help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)") + parser.add_argument("--first_stage_fetch_sms", type=int, default=None, help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)") parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -355,6 +369,7 @@ def _worker(args): num_warps = args["num_warps"] num_stages = args["num_stages"] num_fetch_stages = args["num_fetch_stages"] + first_stage_fetch_sms = args["first_stage_fetch_sms"] def run_experiment(): nonlocal total_ms, num_experiments @@ -374,6 +389,7 @@ def run_experiment(): num_warps=num_warps, num_stages=num_stages, num_fetch_stages=num_fetch_stages, + first_stage_fetch_sms=first_stage_fetch_sms, ) end_ev.record() num_experiments += 1 @@ -458,6 +474,7 @@ def run_experiment(): num_warps=num_warps, num_stages=num_stages, num_fetch_stages=num_fetch_stages, + first_stage_fetch_sms=first_stage_fetch_sms, ) torch.cuda.synchronize() t_end = time.perf_counter() @@ -497,7 +514,8 @@ def run_experiment(): config=config, async_op=False, workspace=workspace, num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, num_warps=num_warps, num_stages=num_stages, - num_fetch_stages=num_fetch_stages, trace=True, + num_fetch_stages=num_fetch_stages, + first_stage_fetch_sms=first_stage_fetch_sms, trace=True, ) torch.cuda.synchronize() shmem.barrier() @@ -521,6 +539,7 @@ def run_experiment(): num_warps=num_warps, num_stages=num_stages, num_fetch_stages=num_fetch_stages, + first_stage_fetch_sms=first_stage_fetch_sms, trace=True, ) torch.cuda.synchronize() diff --git a/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py b/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py new file mode 100644 index 000000000..7a5243eba --- /dev/null +++ b/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py @@ -0,0 +1,576 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Parameter tuning script for HBM-buffered all_gather_matmul. + +Sweeps parameters around a baseline configuration, collecting traces, TFLOPs, +PyTorch baseline, and validation for every configuration. + +This script does NOT modify benchmark_hbm_buffer.py — it invokes it via +``torchrun`` as a subprocess for each parameter set. + +Usage: + # Default one-at-a-time sweep (each param varied independently): + python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py + + # Custom matrix size: + python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py -m 8192 -n 4096 -k 131072 + + # Only sweep specific parameters: + python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --params num_fetch_sms k_per_flag + + # Full cartesian product (warning: combinatorial explosion): + python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --mode full + + # Dry run — just print what would be tested: + python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --dry_run +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import time +from datetime import datetime +from itertools import product +from pathlib import Path + +# ───────────────────────────────────────────────────────────────────────────── +# Baseline configuration — the centre point of every sweep. +# Edit these to match your current best-known config. +# ───────────────────────────────────────────────────────────────────────────── +BASELINE = { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 4, + "num_fetch_sms": 64, + "k_per_flag": 64, + "num_warps": 8, + "num_fetch_stages": 4, + "first_stage_fetch_sms": 304, +} + +# ───────────────────────────────────────────────────────────────────────────── +# Sweep ranges — values to try for each parameter. +# In ``oneatatime`` mode only one parameter deviates from the baseline at a +# time; in ``full`` mode the cartesian product is taken (use with care). +# ───────────────────────────────────────────────────────────────────────────── +SWEEP_RANGES = { + "block_size_m": [64, 128, 256], + "block_size_n": [64, 128, 256], + "block_size_k": [64], + "group_size_m": [1, 2, 4, 8], + "num_fetch_sms": [64, 128, 192, 256], + "k_per_flag": [16, 32, 64, 128], + "num_warps": [4, 8], + "num_fetch_stages": [2, 4, 8], + "first_stage_fetch_sms": [128, 192, 256, 304], +} + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers +# ───────────────────────────────────────────────────────────────────────────── + +def make_label(cfg): + """Short human-readable label for a config.""" + parts = [ + f"bm{cfg['block_size_m']}", + f"bn{cfg['block_size_n']}", + f"bk{cfg['block_size_k']}", + f"gm{cfg['group_size_m']}", + f"nf{cfg['num_fetch_sms']}", + f"kpf{cfg['k_per_flag']}", + f"nw{cfg['num_warps']}", + f"fs{cfg['num_fetch_stages']}", + ] + if cfg["num_fetch_stages"] > 1: + parts.append(f"fsf{cfg['first_stage_fetch_sms']}") + return "_".join(parts) + + +def validate_config(cfg, M, N, K, world_size=8): + """Return a list of error strings; empty list means valid.""" + errors = [] + K_local = K // world_size + bm, bn, bk = cfg["block_size_m"], cfg["block_size_n"], cfg["block_size_k"] + kpf = cfg["k_per_flag"] + + if M % bm != 0: + errors.append(f"M={M} not divisible by block_size_m={bm}") + if N % bn != 0: + errors.append(f"N={N} not divisible by block_size_n={bn}") + if K % bk != 0: + errors.append(f"K={K} not divisible by block_size_k={bk}") + if K_local % bk != 0: + errors.append(f"K_local={K_local} not divisible by block_size_k={bk}") + + num_k_blocks = K // bk + if num_k_blocks % kpf != 0: + errors.append(f"num_k_blocks={num_k_blocks} not divisible by k_per_flag={kpf}") + + if cfg["num_warps"] not in (1, 2, 4, 8, 16): + errors.append(f"num_warps={cfg['num_warps']} must be a power of 2 in [1..16]") + + return errors + + +def build_command(cfg, M, N, K, trace_path, nproc=8, + validate=True, benchmark=True, benchmark_pytorch=False): + """Build the ``torchrun`` CLI for one configuration.""" + cmd = [ + "torchrun", "--nproc_per_node", str(nproc), + "benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py", + "-m", str(M), + "-n", str(N), + "-k", str(K), + "--block_size_m", str(cfg["block_size_m"]), + "--block_size_n", str(cfg["block_size_n"]), + "--block_size_k", str(cfg["block_size_k"]), + "--group_size_m", str(cfg["group_size_m"]), + "--num_fetch_sms", str(cfg["num_fetch_sms"]), + "--k_per_flag", str(cfg["k_per_flag"]), + "--num_warps", str(cfg["num_warps"]), + "--num_fetch_stages", str(cfg["num_fetch_stages"]), + ] + + if cfg["num_fetch_stages"] > 1 and cfg.get("first_stage_fetch_sms") is not None: + cmd.extend(["--first_stage_fetch_sms", str(cfg["first_stage_fetch_sms"])]) + + if validate: + cmd.append("-v") + if benchmark: + cmd.append("-b") + if benchmark_pytorch: + cmd.append("--benchmark_pytorch") + + cmd.extend(["--trace", "--trace_output", trace_path]) + return cmd + + +# ── Output parsing ──────────────────────────────────────────────────────────── + +_RE_IRIS = re.compile( + r"HBM-Buffer\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s" +) +_RE_PYTORCH = re.compile( + r"PyTorch\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s" +) +_RE_SPEEDUP = re.compile(r"Speedup.*?:\s*([\d.]+)x") +_RE_VALID_FAIL = re.compile(r"Validation FAILED.*?max diff:\s*([\d.eE+-]+)") + + +def parse_output(output): + """Extract metrics from benchmark stdout+stderr.""" + result = { + "iris_ms": None, + "iris_tflops": None, + "iris_bw_gbps": None, + "pytorch_ms": None, + "pytorch_tflops": None, + "pytorch_bw_gbps": None, + "validation": None, + "speedup": None, + } + + m = _RE_IRIS.search(output) + if m: + result["iris_ms"] = float(m.group(1)) + result["iris_tflops"] = float(m.group(2)) + result["iris_bw_gbps"] = float(m.group(3)) + + m = _RE_PYTORCH.search(output) + if m: + result["pytorch_ms"] = float(m.group(1)) + result["pytorch_tflops"] = float(m.group(2)) + result["pytorch_bw_gbps"] = float(m.group(3)) + + if "Validation PASSED" in output: + result["validation"] = "PASSED" + elif "Validation FAILED" in output: + fm = _RE_VALID_FAIL.search(output) + result["validation"] = f"FAILED (diff={fm.group(1)})" if fm else "FAILED" + + m = _RE_SPEEDUP.search(output) + if m: + result["speedup"] = float(m.group(1)) + + return result + + +# ── Sweep generation ────────────────────────────────────────────────────────── + +def generate_configs(baseline, sweep_ranges, mode="oneatatime", params=None): + """ + Generate the list of configs to evaluate. + + Args: + baseline: dict of default values + sweep_ranges: dict mapping param name -> list of values + mode: "oneatatime" or "full" + params: optional list of param names to sweep (None = all) + """ + configs = [] + seen = set() + + def _add(cfg): + label = make_label(cfg) + if label not in seen: + configs.append(dict(cfg)) + seen.add(label) + + # Always include baseline first + _add(baseline) + + active_params = params if params else list(sweep_ranges.keys()) + + if mode == "oneatatime": + for param in active_params: + if param not in sweep_ranges: + print(f" WARNING: unknown param '{param}', skipping") + continue + for val in sweep_ranges[param]: + cfg = dict(baseline) + cfg[param] = val + # When num_fetch_stages == 1, first_stage_fetch_sms is irrelevant + if cfg["num_fetch_stages"] == 1: + cfg["first_stage_fetch_sms"] = cfg["num_fetch_sms"] + _add(cfg) + + elif mode == "full": + active_ranges = {p: sweep_ranges[p] for p in active_params if p in sweep_ranges} + names = list(active_ranges.keys()) + values = [active_ranges[n] for n in names] + for combo in product(*values): + cfg = dict(baseline) + for n, v in zip(names, combo): + cfg[n] = v + if cfg["num_fetch_stages"] == 1: + cfg["first_stage_fetch_sms"] = cfg["num_fetch_sms"] + _add(cfg) + + return configs + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser( + description="Parameter tuning for HBM-buffered all_gather_matmul.", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + # ── Matrix dimensions ──────────────────────────────────────────────── + parser.add_argument("-m", type=int, default=16384, help="M dimension") + parser.add_argument("-n", type=int, default=2048, help="N dimension") + parser.add_argument("-k", type=int, default=131072, help="K dimension (total)") + parser.add_argument("--nproc", type=int, default=8, help="Number of GPUs") + + # ── Baseline overrides (non-swept params use these values) ──────── + parser.add_argument("--block_size_m", type=int, default=None, + help=f"Baseline block_size_m (default: {BASELINE['block_size_m']})") + parser.add_argument("--block_size_n", type=int, default=None, + help=f"Baseline block_size_n (default: {BASELINE['block_size_n']})") + parser.add_argument("--block_size_k", type=int, default=None, + help=f"Baseline block_size_k (default: {BASELINE['block_size_k']})") + parser.add_argument("--group_size_m", type=int, default=None, + help=f"Baseline group_size_m (default: {BASELINE['group_size_m']})") + parser.add_argument("--num_fetch_sms", type=int, default=None, + help=f"Baseline num_fetch_sms (default: {BASELINE['num_fetch_sms']})") + parser.add_argument("--k_per_flag", type=int, default=None, + help=f"Baseline k_per_flag (default: {BASELINE['k_per_flag']})") + parser.add_argument("--num_warps", type=int, default=None, + help=f"Baseline num_warps (default: {BASELINE['num_warps']})") + parser.add_argument("--num_fetch_stages", type=int, default=None, + help=f"Baseline num_fetch_stages (default: {BASELINE['num_fetch_stages']})") + parser.add_argument("--first_stage_fetch_sms", type=int, default=None, + help=f"Baseline first_stage_fetch_sms (default: {BASELINE['first_stage_fetch_sms']})") + + # ── Sweep control ───────────────────────────────────────────────── + parser.add_argument( + "--mode", choices=["oneatatime", "full"], default="oneatatime", + help="'oneatatime' varies one param at a time; 'full' = cartesian product", + ) + parser.add_argument( + "--params", nargs="+", default=None, + help="Only sweep these parameters (default: all). " + "Choices: " + ", ".join(SWEEP_RANGES.keys()), + ) + parser.add_argument("--output_dir", type=str, default=None, + help="Output directory (auto-generated if unset)") + parser.add_argument("--dry_run", action="store_true", + help="Print configs and exit without running") + parser.add_argument("--skip_validation", action="store_true", + help="Skip validation (faster, no correctness check)") + parser.add_argument("--timeout", type=int, default=600, + help="Per-config timeout in seconds (default: 600)") + + args = parser.parse_args() + M, N, K = args.m, args.n, args.k + + # Apply any CLI baseline overrides + baseline = dict(BASELINE) + for key in baseline: + cli_val = getattr(args, key, None) + if cli_val is not None: + baseline[key] = cli_val + + # Output directory + if args.output_dir: + output_dir = Path(args.output_dir) + else: + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + output_dir = Path(f"benchmark/ops/all_gather_matmul/tune_results_{ts}") + output_dir.mkdir(parents=True, exist_ok=True) + trace_dir = output_dir / "traces" + trace_dir.mkdir(exist_ok=True) + + # Generate configs + configs = generate_configs(baseline, SWEEP_RANGES, + mode=args.mode, params=args.params) + + # Pre-validate all configs + valid_configs = [] + skipped = [] + for cfg in configs: + errs = validate_config(cfg, M, N, K, world_size=args.nproc) + if errs: + skipped.append((cfg, errs)) + else: + valid_configs.append(cfg) + + # Banner + print(f"\n{'='*100}") + print(f" HBM-Buffer All-Gather MatMul — Parameter Tuning") + print(f" M={M} N={N} K={K} nproc={args.nproc} mode={args.mode}") + print(f" Baseline: {make_label(baseline)}") + print(f" Configs to run: {len(valid_configs)} (skipped: {len(skipped)})") + print(f" Output dir: {output_dir}") + print(f" Validation: {'OFF' if args.skip_validation else 'ON'}") + print(f"{'='*100}") + + if skipped: + print(f"\n Skipped (invalid for M={M}, N={N}, K={K}):") + for cfg, errs in skipped: + print(f" {make_label(cfg)}: {'; '.join(errs)}") + + if args.dry_run: + print(f"\n Configs that would be run:") + for i, cfg in enumerate(valid_configs): + label = make_label(cfg) + is_baseline = (cfg == baseline) + tag = " [BASELINE]" if is_baseline else "" + print(f" [{i+1:>3}] {label}{tag}") + print(f"\n Total: {len(valid_configs)} configs") + return + + # ── Run sweep ───────────────────────────────────────────────────────── + results = [] + pytorch_baseline = None + env = os.environ.copy() + env["HSA_NO_SCRATCH_RECLAIM"] = "1" + + total_start = time.time() + + for i, cfg in enumerate(valid_configs): + label = make_label(cfg) + trace_path = str(trace_dir / f"trace_{label}.png") + is_first = (i == 0) + + sep = "-" * 80 + print(f"\n{sep}") + print(f"[{i+1}/{len(valid_configs)}] {label}") + if is_first: + print(f" (includes PyTorch baseline benchmark)") + print(sep) + + cmd = build_command( + cfg, M, N, K, trace_path, + nproc=args.nproc, + validate=not args.skip_validation, + benchmark=True, + benchmark_pytorch=is_first, + ) + cmd_str = " ".join(cmd) + print(f" $ HSA_NO_SCRATCH_RECLAIM=1 {cmd_str}") + + t0 = time.time() + try: + proc = subprocess.run( + cmd, env=env, + capture_output=True, text=True, + timeout=args.timeout, + ) + elapsed = time.time() - t0 + full_output = proc.stdout + "\n" + proc.stderr + + parsed = parse_output(full_output) + + # Capture PyTorch baseline on first run + if is_first and parsed["pytorch_tflops"] is not None: + pytorch_baseline = { + "ms": parsed["pytorch_ms"], + "tflops": parsed["pytorch_tflops"], + "bw_gbps": parsed["pytorch_bw_gbps"], + } + + trace_exists = os.path.exists(trace_path) + results.append({ + "label": label, + "config": cfg, + "iris_ms": parsed["iris_ms"], + "iris_tflops": parsed["iris_tflops"], + "iris_bw_gbps": parsed["iris_bw_gbps"], + "validation": parsed["validation"], + "trace_path": trace_path if trace_exists else None, + "elapsed_s": round(elapsed, 1), + "returncode": proc.returncode, + }) + + # Print summary line + parts = [] + if parsed["iris_tflops"] is not None: + parts.append(f"{parsed['iris_tflops']:.2f} TFLOPS") + parts.append(f"{parsed['iris_ms']:.3f} ms") + if parsed["iris_bw_gbps"] is not None: + parts.append(f"{parsed['iris_bw_gbps']:.1f} GB/s") + if parsed["validation"]: + parts.append(f"valid={parsed['validation']}") + if trace_exists: + parts.append(f"trace=OK") + else: + parts.append(f"trace=MISSING") + if proc.returncode != 0: + parts.append(f"EXIT={proc.returncode}") + print(f" => {' | '.join(parts)} ({elapsed:.0f}s)") + + if is_first and pytorch_baseline: + print(f" => PyTorch baseline: {pytorch_baseline['tflops']:.2f} TFLOPS" + f" {pytorch_baseline['ms']:.3f} ms") + + # Save full log for debugging + log_path = output_dir / f"log_{label}.txt" + with open(log_path, "w") as f: + f.write(f"COMMAND: HSA_NO_SCRATCH_RECLAIM=1 {cmd_str}\n") + f.write(f"EXIT CODE: {proc.returncode}\n") + f.write(f"ELAPSED: {elapsed:.1f}s\n\n") + f.write("=== STDOUT ===\n") + f.write(proc.stdout) + f.write("\n=== STDERR ===\n") + f.write(proc.stderr) + + except subprocess.TimeoutExpired: + elapsed = time.time() - t0 + results.append({ + "label": label, + "config": cfg, + "iris_ms": None, + "iris_tflops": None, + "iris_bw_gbps": None, + "validation": "TIMEOUT", + "trace_path": None, + "elapsed_s": round(elapsed, 1), + "returncode": -1, + }) + print(f" => TIMEOUT after {args.timeout}s") + + except Exception as e: + elapsed = time.time() - t0 + results.append({ + "label": label, + "config": cfg, + "iris_ms": None, + "iris_tflops": None, + "iris_bw_gbps": None, + "validation": f"ERROR: {e}", + "trace_path": None, + "elapsed_s": round(elapsed, 1), + "returncode": -1, + }) + print(f" => ERROR: {e}") + + total_elapsed = time.time() - total_start + + # ── Summary table ───────────────────────────────────────────────────── + W = 130 + print(f"\n\n{'='*W}") + print(f" TUNING RESULTS | M={M} N={N} K={K} | nproc={args.nproc} | " + f"{len(valid_configs)} configs in {total_elapsed:.0f}s") + if pytorch_baseline: + print(f" PyTorch baseline: {pytorch_baseline['ms']:.3f} ms | " + f"{pytorch_baseline['tflops']:.2f} TFLOPS | " + f"{pytorch_baseline['bw_gbps']:.1f} GB/s") + print(f"{'='*W}") + + col_label_w = 65 + print(f" {'#':>3} {'Configuration':<{col_label_w}} {'ms':>8} {'TFLOPS':>8} " + f"{'vs PT':>7} {'Valid':>8} {'Trace':>5}") + print(f" {'-'*(W-4)}") + + for i, r in enumerate(results): + ms_s = f"{r['iris_ms']:.3f}" if r["iris_ms"] is not None else "--" + tf_s = f"{r['iris_tflops']:.2f}" if r["iris_tflops"] is not None else "--" + + if pytorch_baseline and r["iris_tflops"] is not None and pytorch_baseline["tflops"] > 0: + vs_pt = f"{r['iris_tflops'] / pytorch_baseline['tflops']:.2f}x" + else: + vs_pt = "--" + + valid_s = (r["validation"] or "--")[:8] + trace_s = "Y" if r.get("trace_path") else "N" + + tag = " *" if (r["iris_tflops"] is not None and + r["iris_tflops"] == max((x["iris_tflops"] for x in results + if x["iris_tflops"] is not None), default=0)) else "" + + print(f" {i+1:>3} {r['label']:<{col_label_w}} {ms_s:>8} {tf_s:>8} " + f"{vs_pt:>7} {valid_s:>8} {trace_s:>5}{tag}") + + # Best config + valid_results = [r for r in results if r["iris_tflops"] is not None] + if valid_results: + best = max(valid_results, key=lambda r: r["iris_tflops"]) + worst = min(valid_results, key=lambda r: r["iris_tflops"]) + print(f"\n {'BEST':>6}: {best['label']}") + print(f" {best['iris_ms']:.3f} ms | {best['iris_tflops']:.2f} TFLOPS | " + f"valid={best['validation']}") + if pytorch_baseline and pytorch_baseline["tflops"] > 0: + print(f" {best['iris_tflops'] / pytorch_baseline['tflops']:.2f}x vs PyTorch") + if best.get("trace_path"): + print(f" trace: {best['trace_path']}") + print(f" {'WORST':>6}: {worst['label']}") + print(f" {worst['iris_ms']:.3f} ms | {worst['iris_tflops']:.2f} TFLOPS") + if best["iris_tflops"] > 0 and worst["iris_tflops"] > 0: + print(f" SPREAD: {best['iris_tflops'] / worst['iris_tflops']:.2f}x " + f"({worst['iris_tflops']:.2f} → {best['iris_tflops']:.2f} TFLOPS)") + + print(f"{'='*W}") + + # ── Save results JSON ───────────────────────────────────────────────── + results_path = output_dir / "results.json" + with open(results_path, "w") as f: + json.dump({ + "meta": { + "M": M, "N": N, "K": K, + "nproc": args.nproc, + "mode": args.mode, + "baseline": baseline, + "sweep_ranges": SWEEP_RANGES, + "timestamp": datetime.now().isoformat(), + "total_elapsed_s": round(total_elapsed, 1), + "pytorch_baseline": pytorch_baseline, + }, + "results": results, + }, f, indent=2, default=str) + + print(f"\n Results JSON : {results_path}") + print(f" Trace PNGs : {trace_dir}/") + print(f" Per-run logs : {output_dir}/log_*.txt") + print() + + +if __name__ == "__main__": + main() diff --git a/iris/iris.py b/iris/iris.py index 94cd0ae6e..c283abf24 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1780,10 +1780,6 @@ def reduce_scatter(self, output_tensor, input_tensor, op=None, group=None, async @triton.jit def __translate(ptr, from_rank, to_rank, heap_bases): - """ - Basic pointer translation without vectorization hints. - Used for atomic operations which may receive scalar pointers. - """ from_base = tl.load(heap_bases + from_rank) to_base = tl.load(heap_bases + to_rank) # convert to int to compute difference @@ -1797,9 +1793,21 @@ def __translate(ptr, from_rank, to_rank, heap_bases): # Cast to_base back to pointer type translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) + # Optimization to vectorize the load/store + # We can't do this in general because we don't know the shape of the tensor or block sizes + # ptr = tl.max_contiguous(tl.multiple_of(ptr, (16, 16)), (16, 32)) + + # 0 You can use this if your block sizes are multiples of 32. + # Largest vectorized load instruction is dwordx4 (128-bits) + translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) + translated_ptr = tl.max_contiguous(translated_ptr, (1, 32)) + + # ptr = tl.max_contiguous(tl.multiple_of(ptr, 512), 512) + # translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, 512), 512) return translated_ptr + @triton.jit def __translate_block_2d(ptr, from_rank, to_rank, heap_bases): """ @@ -2029,7 +2037,7 @@ def load(self, pointer, from_rank, mask=None): Example: >>> data = ctx.load(buffer + offsets, from_rank=1, mask=mask) """ - translated_ptr = self._translate_block_2d(pointer, self.rank, from_rank) + translated_ptr = self.__translate(pointer, self.rank, from_rank) result = tl.load(translated_ptr, mask=mask) return result @@ -2055,7 +2063,7 @@ def store(self, pointer, value, to_rank, mask=None): Example: >>> ctx.store(buffer + offsets, values, to_rank=1, mask=mask) """ - translated_ptr = self._translate_block_2d(pointer, self.rank, to_rank) + translated_ptr = self.__translate(pointer, self.rank, to_rank) tl.store(translated_ptr, value, mask=mask) @triton.jit diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 4fdc1b067..e9a5b6b0b 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -61,6 +61,7 @@ def _hbm_buffer_all_gather_matmul_kernel( ALLOW_TF32: tl.constexpr, NUM_FETCH_STAGES: tl.constexpr, GEMM_TILES_PER_STAGE: tl.constexpr, + FIRST_STAGE_FETCH_SMS: tl.constexpr, trace_start_ptr, trace_end_ptr, trace_wait_ptr, @@ -71,21 +72,34 @@ def _hbm_buffer_all_gather_matmul_kernel( acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 zero = tl.program_id(0) * 0 + if TRACE: tl.store(trace_start_ptr + pid, read_realtime()) tl.store(trace_xcd_ptr + pid, get_xcc_id()) - # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] - WGS_PER_STAGE: tl.constexpr = NUM_FETCH_SMS + GEMM_TILES_PER_STAGE + # Interleaved layout with asymmetric first stage: + # [fetch0 (P)] [gemm0 (G)] [fetch1 (F)] [gemm1 (G)] ... + # P = FIRST_STAGE_FETCH_SMS, F = NUM_FETCH_SMS, G = GEMM_TILES_PER_STAGE + FIRST_STAGE_SIZE: tl.constexpr = FIRST_STAGE_FETCH_SMS + GEMM_TILES_PER_STAGE + REST_STAGE_SIZE: tl.constexpr = NUM_FETCH_SMS + GEMM_TILES_PER_STAGE M_PER_STAGE: tl.constexpr = (NUM_M_TILES + NUM_FETCH_STAGES - 1) // NUM_FETCH_STAGES - local_pid = pid % WGS_PER_STAGE + # Two-phase decode: stage 0 has a different size than subsequent stages + if pid < FIRST_STAGE_SIZE: + my_stage = zero + local_pid = pid + fetch_threshold = zero + FIRST_STAGE_FETCH_SMS + else: + adjusted = pid - FIRST_STAGE_SIZE + my_stage = 1 + adjusted // REST_STAGE_SIZE + local_pid = adjusted % REST_STAGE_SIZE + fetch_threshold = zero + NUM_FETCH_SMS - if local_pid < NUM_FETCH_SMS: + if local_pid < fetch_threshold: # ============================================================== - # FETCHER — interleaved: stage determined by pid // WGS_PER_STAGE + # FETCHER — stage 0 uses FIRST_STAGE_FETCH_SMS WGs, + # later stages use NUM_FETCH_SMS WGs # ============================================================== - my_stage = pid // WGS_PER_STAGE stage_pid = local_pid ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) @@ -95,11 +109,12 @@ def _hbm_buffer_all_gather_matmul_kernel( for const_stage in range(NUM_FETCH_STAGES): if my_stage == const_stage: + stage_fetch_sms = FIRST_STAGE_FETCH_SMS if const_stage == 0 else NUM_FETCH_SMS stage_m_start = const_stage * M_PER_STAGE stage_m_count = min(M_PER_STAGE, NUM_M_TILES - stage_m_start) total_fg_stage = NUM_FLAG_GROUPS_K * stage_m_count - for fg_idx in range(stage_pid, total_fg_stage, NUM_FETCH_SMS): + for fg_idx in range(stage_pid, total_fg_stage, stage_fetch_sms): m_group = fg_idx // tiles_per_m_group within_group = fg_idx % tiles_per_m_group k_flag_group = within_group // GROUP_SIZE_M @@ -129,21 +144,18 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + #tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + tl.store(flags_ptr + flag_idx, 1, cache_modifier=".wt") if TRACE: tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") - tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") - tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") else: # ============================================================== - # GEMM — interleaved: stage determined by pid // WGS_PER_STAGE - # gemm_local_id indexes into this stage's M-tile range + # GEMM — gemm_local_id indexes into this stage's M-tile range # ============================================================== - my_stage = pid // WGS_PER_STAGE - gemm_local_id = local_pid - NUM_FETCH_SMS + gemm_local_id = local_pid - fetch_threshold stage_m_start = my_stage * M_PER_STAGE num_pid_in_group = GROUP_SIZE_M * NUM_TILES_N @@ -293,6 +305,7 @@ def all_gather_matmul_hbm_buffer( num_warps: Optional[int] = None, num_stages: Optional[int] = None, num_fetch_stages: int = 1, + first_stage_fetch_sms: Optional[int] = None, trace: bool = False, ) -> FusedWorkspace: """ @@ -363,12 +376,17 @@ def all_gather_matmul_hbm_buffer( assert 0 < num_fetch_sms assert num_fetch_stages >= 1 - # Interleaved layout: [fetch0 | gemm0 | fetch1 | gemm1 | ...] + # First stage can use more fetcher WGs to fill the first GPU wave + if first_stage_fetch_sms is None: + first_stage_fetch_sms = num_fetch_sms + + # Interleaved layout: [fetch0 (P)] [gemm0 (G)] [fetch1 (F)] [gemm1 (G)] ... m_per_stage = (num_m_tiles + num_fetch_stages - 1) // num_fetch_stages gemm_tiles_per_stage = m_per_stage * num_tiles_n - wgs_per_stage = num_fetch_sms + gemm_tiles_per_stage - total_fetch_wgs = num_fetch_sms * num_fetch_stages - grid_size = wgs_per_stage * num_fetch_stages + first_stage_size = first_stage_fetch_sms + gemm_tiles_per_stage + rest_stage_size = num_fetch_sms + gemm_tiles_per_stage + total_fetch_wgs = first_stage_fetch_sms + num_fetch_sms * max(0, num_fetch_stages - 1) + grid_size = first_stage_size + rest_stage_size * max(0, num_fetch_stages - 1) # Trace buffers if trace: @@ -427,6 +445,7 @@ def all_gather_matmul_hbm_buffer( config.allow_tf32, num_fetch_stages, gemm_tiles_per_stage, + first_stage_fetch_sms, trace_start, trace_end, trace_wait, @@ -451,7 +470,9 @@ def all_gather_matmul_hbm_buffer( "total_fetch_wgs": total_fetch_wgs, "num_m_tiles": num_m_tiles, "num_tiles_n": num_tiles_n, - "wgs_per_stage": wgs_per_stage, + "first_stage_fetch_sms": first_stage_fetch_sms, + "first_stage_size": first_stage_size, + "rest_stage_size": rest_stage_size, "gemm_tiles_per_stage": gemm_tiles_per_stage, } From cbe2aff3d8bd08bef7cd5ce31fce7d1bd298dcad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 17 Feb 2026 21:05:48 +0000 Subject: [PATCH 19/60] Apply Ruff auto-fixes --- .../all_gather_matmul/benchmark_hbm_buffer.py | 32 +- .../ops/all_gather_matmul/tune_hbm_buffer.py | 342 ++++++++++-------- iris/iris.py | 1 - iris/ops/all_gather_matmul_hbm_buffer.py | 3 +- 4 files changed, 226 insertions(+), 152 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 9b5e6c265..666a37a79 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -242,8 +242,18 @@ def parse_args(): parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") - parser.add_argument("--num_fetch_stages", type=int, default=1, help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)") - parser.add_argument("--first_stage_fetch_sms", type=int, default=None, help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)") + parser.add_argument( + "--num_fetch_stages", + type=int, + default=1, + help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)", + ) + parser.add_argument( + "--first_stage_fetch_sms", + type=int, + default=None, + help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)", + ) parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -502,12 +512,20 @@ def run_experiment(): workspace.locks.zero_() shmem.barrier() all_gather_matmul_hbm_buffer( - shmem, C, A_sharded, B, - config=config, async_op=False, workspace=workspace, - num_fetch_sms=num_fetch_sms, k_per_flag=k_per_flag, - num_warps=num_warps, num_stages=num_stages, + shmem, + C, + A_sharded, + B, + config=config, + async_op=False, + workspace=workspace, + num_fetch_sms=num_fetch_sms, + k_per_flag=k_per_flag, + num_warps=num_warps, + num_stages=num_stages, num_fetch_stages=num_fetch_stages, - first_stage_fetch_sms=first_stage_fetch_sms, trace=True, + first_stage_fetch_sms=first_stage_fetch_sms, + trace=True, ) torch.cuda.synchronize() shmem.barrier() diff --git a/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py b/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py index 7a5243eba..db9cc56f2 100644 --- a/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py @@ -33,7 +33,6 @@ import os import re import subprocess -import sys import time from datetime import datetime from itertools import product @@ -61,14 +60,14 @@ # time; in ``full`` mode the cartesian product is taken (use with care). # ───────────────────────────────────────────────────────────────────────────── SWEEP_RANGES = { - "block_size_m": [64, 128, 256], - "block_size_n": [64, 128, 256], - "block_size_k": [64], - "group_size_m": [1, 2, 4, 8], - "num_fetch_sms": [64, 128, 192, 256], - "k_per_flag": [16, 32, 64, 128], - "num_warps": [4, 8], - "num_fetch_stages": [2, 4, 8], + "block_size_m": [64, 128, 256], + "block_size_n": [64, 128, 256], + "block_size_k": [64], + "group_size_m": [1, 2, 4, 8], + "num_fetch_sms": [64, 128, 192, 256], + "k_per_flag": [16, 32, 64, 128], + "num_warps": [4, 8], + "num_fetch_stages": [2, 4, 8], "first_stage_fetch_sms": [128, 192, 256, 304], } @@ -76,6 +75,7 @@ # Helpers # ───────────────────────────────────────────────────────────────────────────── + def make_label(cfg): """Short human-readable label for a config.""" parts = [ @@ -119,23 +119,35 @@ def validate_config(cfg, M, N, K, world_size=8): return errors -def build_command(cfg, M, N, K, trace_path, nproc=8, - validate=True, benchmark=True, benchmark_pytorch=False): +def build_command(cfg, M, N, K, trace_path, nproc=8, validate=True, benchmark=True, benchmark_pytorch=False): """Build the ``torchrun`` CLI for one configuration.""" cmd = [ - "torchrun", "--nproc_per_node", str(nproc), + "torchrun", + "--nproc_per_node", + str(nproc), "benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py", - "-m", str(M), - "-n", str(N), - "-k", str(K), - "--block_size_m", str(cfg["block_size_m"]), - "--block_size_n", str(cfg["block_size_n"]), - "--block_size_k", str(cfg["block_size_k"]), - "--group_size_m", str(cfg["group_size_m"]), - "--num_fetch_sms", str(cfg["num_fetch_sms"]), - "--k_per_flag", str(cfg["k_per_flag"]), - "--num_warps", str(cfg["num_warps"]), - "--num_fetch_stages", str(cfg["num_fetch_stages"]), + "-m", + str(M), + "-n", + str(N), + "-k", + str(K), + "--block_size_m", + str(cfg["block_size_m"]), + "--block_size_n", + str(cfg["block_size_n"]), + "--block_size_k", + str(cfg["block_size_k"]), + "--group_size_m", + str(cfg["group_size_m"]), + "--num_fetch_sms", + str(cfg["num_fetch_sms"]), + "--k_per_flag", + str(cfg["k_per_flag"]), + "--num_warps", + str(cfg["num_warps"]), + "--num_fetch_stages", + str(cfg["num_fetch_stages"]), ] if cfg["num_fetch_stages"] > 1 and cfg.get("first_stage_fetch_sms") is not None: @@ -154,12 +166,8 @@ def build_command(cfg, M, N, K, trace_path, nproc=8, # ── Output parsing ──────────────────────────────────────────────────────────── -_RE_IRIS = re.compile( - r"HBM-Buffer\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s" -) -_RE_PYTORCH = re.compile( - r"PyTorch\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s" -) +_RE_IRIS = re.compile(r"HBM-Buffer\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s") +_RE_PYTORCH = re.compile(r"PyTorch\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s") _RE_SPEEDUP = re.compile(r"Speedup.*?:\s*([\d.]+)x") _RE_VALID_FAIL = re.compile(r"Validation FAILED.*?max diff:\s*([\d.eE+-]+)") @@ -204,6 +212,7 @@ def parse_output(output): # ── Sweep generation ────────────────────────────────────────────────────────── + def generate_configs(baseline, sweep_ranges, mode="oneatatime", params=None): """ Generate the list of configs to evaluate. @@ -258,6 +267,7 @@ def _add(cfg): # ── Main ────────────────────────────────────────────────────────────────────── + def main(): parser = argparse.ArgumentParser( description="Parameter tuning for HBM-buffered all_gather_matmul.", @@ -270,43 +280,57 @@ def main(): parser.add_argument("--nproc", type=int, default=8, help="Number of GPUs") # ── Baseline overrides (non-swept params use these values) ──────── - parser.add_argument("--block_size_m", type=int, default=None, - help=f"Baseline block_size_m (default: {BASELINE['block_size_m']})") - parser.add_argument("--block_size_n", type=int, default=None, - help=f"Baseline block_size_n (default: {BASELINE['block_size_n']})") - parser.add_argument("--block_size_k", type=int, default=None, - help=f"Baseline block_size_k (default: {BASELINE['block_size_k']})") - parser.add_argument("--group_size_m", type=int, default=None, - help=f"Baseline group_size_m (default: {BASELINE['group_size_m']})") - parser.add_argument("--num_fetch_sms", type=int, default=None, - help=f"Baseline num_fetch_sms (default: {BASELINE['num_fetch_sms']})") - parser.add_argument("--k_per_flag", type=int, default=None, - help=f"Baseline k_per_flag (default: {BASELINE['k_per_flag']})") - parser.add_argument("--num_warps", type=int, default=None, - help=f"Baseline num_warps (default: {BASELINE['num_warps']})") - parser.add_argument("--num_fetch_stages", type=int, default=None, - help=f"Baseline num_fetch_stages (default: {BASELINE['num_fetch_stages']})") - parser.add_argument("--first_stage_fetch_sms", type=int, default=None, - help=f"Baseline first_stage_fetch_sms (default: {BASELINE['first_stage_fetch_sms']})") + parser.add_argument( + "--block_size_m", type=int, default=None, help=f"Baseline block_size_m (default: {BASELINE['block_size_m']})" + ) + parser.add_argument( + "--block_size_n", type=int, default=None, help=f"Baseline block_size_n (default: {BASELINE['block_size_n']})" + ) + parser.add_argument( + "--block_size_k", type=int, default=None, help=f"Baseline block_size_k (default: {BASELINE['block_size_k']})" + ) + parser.add_argument( + "--group_size_m", type=int, default=None, help=f"Baseline group_size_m (default: {BASELINE['group_size_m']})" + ) + parser.add_argument( + "--num_fetch_sms", type=int, default=None, help=f"Baseline num_fetch_sms (default: {BASELINE['num_fetch_sms']})" + ) + parser.add_argument( + "--k_per_flag", type=int, default=None, help=f"Baseline k_per_flag (default: {BASELINE['k_per_flag']})" + ) + parser.add_argument( + "--num_warps", type=int, default=None, help=f"Baseline num_warps (default: {BASELINE['num_warps']})" + ) + parser.add_argument( + "--num_fetch_stages", + type=int, + default=None, + help=f"Baseline num_fetch_stages (default: {BASELINE['num_fetch_stages']})", + ) + parser.add_argument( + "--first_stage_fetch_sms", + type=int, + default=None, + help=f"Baseline first_stage_fetch_sms (default: {BASELINE['first_stage_fetch_sms']})", + ) # ── Sweep control ───────────────────────────────────────────────── parser.add_argument( - "--mode", choices=["oneatatime", "full"], default="oneatatime", + "--mode", + choices=["oneatatime", "full"], + default="oneatatime", help="'oneatatime' varies one param at a time; 'full' = cartesian product", ) parser.add_argument( - "--params", nargs="+", default=None, - help="Only sweep these parameters (default: all). " - "Choices: " + ", ".join(SWEEP_RANGES.keys()), + "--params", + nargs="+", + default=None, + help="Only sweep these parameters (default: all). Choices: " + ", ".join(SWEEP_RANGES.keys()), ) - parser.add_argument("--output_dir", type=str, default=None, - help="Output directory (auto-generated if unset)") - parser.add_argument("--dry_run", action="store_true", - help="Print configs and exit without running") - parser.add_argument("--skip_validation", action="store_true", - help="Skip validation (faster, no correctness check)") - parser.add_argument("--timeout", type=int, default=600, - help="Per-config timeout in seconds (default: 600)") + parser.add_argument("--output_dir", type=str, default=None, help="Output directory (auto-generated if unset)") + parser.add_argument("--dry_run", action="store_true", help="Print configs and exit without running") + parser.add_argument("--skip_validation", action="store_true", help="Skip validation (faster, no correctness check)") + parser.add_argument("--timeout", type=int, default=600, help="Per-config timeout in seconds (default: 600)") args = parser.parse_args() M, N, K = args.m, args.n, args.k @@ -329,8 +353,7 @@ def main(): trace_dir.mkdir(exist_ok=True) # Generate configs - configs = generate_configs(baseline, SWEEP_RANGES, - mode=args.mode, params=args.params) + configs = generate_configs(baseline, SWEEP_RANGES, mode=args.mode, params=args.params) # Pre-validate all configs valid_configs = [] @@ -343,14 +366,14 @@ def main(): valid_configs.append(cfg) # Banner - print(f"\n{'='*100}") - print(f" HBM-Buffer All-Gather MatMul — Parameter Tuning") + print(f"\n{'=' * 100}") + print(" HBM-Buffer All-Gather MatMul — Parameter Tuning") print(f" M={M} N={N} K={K} nproc={args.nproc} mode={args.mode}") print(f" Baseline: {make_label(baseline)}") print(f" Configs to run: {len(valid_configs)} (skipped: {len(skipped)})") print(f" Output dir: {output_dir}") print(f" Validation: {'OFF' if args.skip_validation else 'ON'}") - print(f"{'='*100}") + print(f"{'=' * 100}") if skipped: print(f"\n Skipped (invalid for M={M}, N={N}, K={K}):") @@ -358,12 +381,12 @@ def main(): print(f" {make_label(cfg)}: {'; '.join(errs)}") if args.dry_run: - print(f"\n Configs that would be run:") + print("\n Configs that would be run:") for i, cfg in enumerate(valid_configs): label = make_label(cfg) - is_baseline = (cfg == baseline) + is_baseline = cfg == baseline tag = " [BASELINE]" if is_baseline else "" - print(f" [{i+1:>3}] {label}{tag}") + print(f" [{i + 1:>3}] {label}{tag}") print(f"\n Total: {len(valid_configs)} configs") return @@ -378,17 +401,21 @@ def main(): for i, cfg in enumerate(valid_configs): label = make_label(cfg) trace_path = str(trace_dir / f"trace_{label}.png") - is_first = (i == 0) + is_first = i == 0 sep = "-" * 80 print(f"\n{sep}") - print(f"[{i+1}/{len(valid_configs)}] {label}") + print(f"[{i + 1}/{len(valid_configs)}] {label}") if is_first: - print(f" (includes PyTorch baseline benchmark)") + print(" (includes PyTorch baseline benchmark)") print(sep) cmd = build_command( - cfg, M, N, K, trace_path, + cfg, + M, + N, + K, + trace_path, nproc=args.nproc, validate=not args.skip_validation, benchmark=True, @@ -400,8 +427,10 @@ def main(): t0 = time.time() try: proc = subprocess.run( - cmd, env=env, - capture_output=True, text=True, + cmd, + env=env, + capture_output=True, + text=True, timeout=args.timeout, ) elapsed = time.time() - t0 @@ -418,17 +447,19 @@ def main(): } trace_exists = os.path.exists(trace_path) - results.append({ - "label": label, - "config": cfg, - "iris_ms": parsed["iris_ms"], - "iris_tflops": parsed["iris_tflops"], - "iris_bw_gbps": parsed["iris_bw_gbps"], - "validation": parsed["validation"], - "trace_path": trace_path if trace_exists else None, - "elapsed_s": round(elapsed, 1), - "returncode": proc.returncode, - }) + results.append( + { + "label": label, + "config": cfg, + "iris_ms": parsed["iris_ms"], + "iris_tflops": parsed["iris_tflops"], + "iris_bw_gbps": parsed["iris_bw_gbps"], + "validation": parsed["validation"], + "trace_path": trace_path if trace_exists else None, + "elapsed_s": round(elapsed, 1), + "returncode": proc.returncode, + } + ) # Print summary line parts = [] @@ -440,16 +471,17 @@ def main(): if parsed["validation"]: parts.append(f"valid={parsed['validation']}") if trace_exists: - parts.append(f"trace=OK") + parts.append("trace=OK") else: - parts.append(f"trace=MISSING") + parts.append("trace=MISSING") if proc.returncode != 0: parts.append(f"EXIT={proc.returncode}") print(f" => {' | '.join(parts)} ({elapsed:.0f}s)") if is_first and pytorch_baseline: - print(f" => PyTorch baseline: {pytorch_baseline['tflops']:.2f} TFLOPS" - f" {pytorch_baseline['ms']:.3f} ms") + print( + f" => PyTorch baseline: {pytorch_baseline['tflops']:.2f} TFLOPS {pytorch_baseline['ms']:.3f} ms" + ) # Save full log for debugging log_path = output_dir / f"log_{label}.txt" @@ -464,51 +496,61 @@ def main(): except subprocess.TimeoutExpired: elapsed = time.time() - t0 - results.append({ - "label": label, - "config": cfg, - "iris_ms": None, - "iris_tflops": None, - "iris_bw_gbps": None, - "validation": "TIMEOUT", - "trace_path": None, - "elapsed_s": round(elapsed, 1), - "returncode": -1, - }) + results.append( + { + "label": label, + "config": cfg, + "iris_ms": None, + "iris_tflops": None, + "iris_bw_gbps": None, + "validation": "TIMEOUT", + "trace_path": None, + "elapsed_s": round(elapsed, 1), + "returncode": -1, + } + ) print(f" => TIMEOUT after {args.timeout}s") except Exception as e: elapsed = time.time() - t0 - results.append({ - "label": label, - "config": cfg, - "iris_ms": None, - "iris_tflops": None, - "iris_bw_gbps": None, - "validation": f"ERROR: {e}", - "trace_path": None, - "elapsed_s": round(elapsed, 1), - "returncode": -1, - }) + results.append( + { + "label": label, + "config": cfg, + "iris_ms": None, + "iris_tflops": None, + "iris_bw_gbps": None, + "validation": f"ERROR: {e}", + "trace_path": None, + "elapsed_s": round(elapsed, 1), + "returncode": -1, + } + ) print(f" => ERROR: {e}") total_elapsed = time.time() - total_start # ── Summary table ───────────────────────────────────────────────────── W = 130 - print(f"\n\n{'='*W}") - print(f" TUNING RESULTS | M={M} N={N} K={K} | nproc={args.nproc} | " - f"{len(valid_configs)} configs in {total_elapsed:.0f}s") + print(f"\n\n{'=' * W}") + print( + f" TUNING RESULTS | M={M} N={N} K={K} | nproc={args.nproc} | " + f"{len(valid_configs)} configs in {total_elapsed:.0f}s" + ) if pytorch_baseline: - print(f" PyTorch baseline: {pytorch_baseline['ms']:.3f} ms | " - f"{pytorch_baseline['tflops']:.2f} TFLOPS | " - f"{pytorch_baseline['bw_gbps']:.1f} GB/s") - print(f"{'='*W}") + print( + f" PyTorch baseline: {pytorch_baseline['ms']:.3f} ms | " + f"{pytorch_baseline['tflops']:.2f} TFLOPS | " + f"{pytorch_baseline['bw_gbps']:.1f} GB/s" + ) + print(f"{'=' * W}") col_label_w = 65 - print(f" {'#':>3} {'Configuration':<{col_label_w}} {'ms':>8} {'TFLOPS':>8} " - f"{'vs PT':>7} {'Valid':>8} {'Trace':>5}") - print(f" {'-'*(W-4)}") + print( + f" {'#':>3} {'Configuration':<{col_label_w}} {'ms':>8} {'TFLOPS':>8} " + f"{'vs PT':>7} {'Valid':>8} {'Trace':>5}" + ) + print(f" {'-' * (W - 4)}") for i, r in enumerate(results): ms_s = f"{r['iris_ms']:.3f}" if r["iris_ms"] is not None else "--" @@ -522,12 +564,20 @@ def main(): valid_s = (r["validation"] or "--")[:8] trace_s = "Y" if r.get("trace_path") else "N" - tag = " *" if (r["iris_tflops"] is not None and - r["iris_tflops"] == max((x["iris_tflops"] for x in results - if x["iris_tflops"] is not None), default=0)) else "" + tag = ( + " *" + if ( + r["iris_tflops"] is not None + and r["iris_tflops"] + == max((x["iris_tflops"] for x in results if x["iris_tflops"] is not None), default=0) + ) + else "" + ) - print(f" {i+1:>3} {r['label']:<{col_label_w}} {ms_s:>8} {tf_s:>8} " - f"{vs_pt:>7} {valid_s:>8} {trace_s:>5}{tag}") + print( + f" {i + 1:>3} {r['label']:<{col_label_w}} {ms_s:>8} {tf_s:>8} " + f"{vs_pt:>7} {valid_s:>8} {trace_s:>5}{tag}" + ) # Best config valid_results = [r for r in results if r["iris_tflops"] is not None] @@ -535,8 +585,7 @@ def main(): best = max(valid_results, key=lambda r: r["iris_tflops"]) worst = min(valid_results, key=lambda r: r["iris_tflops"]) print(f"\n {'BEST':>6}: {best['label']}") - print(f" {best['iris_ms']:.3f} ms | {best['iris_tflops']:.2f} TFLOPS | " - f"valid={best['validation']}") + print(f" {best['iris_ms']:.3f} ms | {best['iris_tflops']:.2f} TFLOPS | valid={best['validation']}") if pytorch_baseline and pytorch_baseline["tflops"] > 0: print(f" {best['iris_tflops'] / pytorch_baseline['tflops']:.2f}x vs PyTorch") if best.get("trace_path"): @@ -544,27 +593,36 @@ def main(): print(f" {'WORST':>6}: {worst['label']}") print(f" {worst['iris_ms']:.3f} ms | {worst['iris_tflops']:.2f} TFLOPS") if best["iris_tflops"] > 0 and worst["iris_tflops"] > 0: - print(f" SPREAD: {best['iris_tflops'] / worst['iris_tflops']:.2f}x " - f"({worst['iris_tflops']:.2f} → {best['iris_tflops']:.2f} TFLOPS)") + print( + f" SPREAD: {best['iris_tflops'] / worst['iris_tflops']:.2f}x " + f"({worst['iris_tflops']:.2f} → {best['iris_tflops']:.2f} TFLOPS)" + ) - print(f"{'='*W}") + print(f"{'=' * W}") # ── Save results JSON ───────────────────────────────────────────────── results_path = output_dir / "results.json" with open(results_path, "w") as f: - json.dump({ - "meta": { - "M": M, "N": N, "K": K, - "nproc": args.nproc, - "mode": args.mode, - "baseline": baseline, - "sweep_ranges": SWEEP_RANGES, - "timestamp": datetime.now().isoformat(), - "total_elapsed_s": round(total_elapsed, 1), - "pytorch_baseline": pytorch_baseline, + json.dump( + { + "meta": { + "M": M, + "N": N, + "K": K, + "nproc": args.nproc, + "mode": args.mode, + "baseline": baseline, + "sweep_ranges": SWEEP_RANGES, + "timestamp": datetime.now().isoformat(), + "total_elapsed_s": round(total_elapsed, 1), + "pytorch_baseline": pytorch_baseline, + }, + "results": results, }, - "results": results, - }, f, indent=2, default=str) + f, + indent=2, + default=str, + ) print(f"\n Results JSON : {results_path}") print(f" Trace PNGs : {trace_dir}/") diff --git a/iris/iris.py b/iris/iris.py index c283abf24..d061f09ea 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1807,7 +1807,6 @@ def __translate(ptr, from_rank, to_rank, heap_bases): return translated_ptr - @triton.jit def __translate_block_2d(ptr, from_rank, to_rank, heap_bases): """ diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index e9a5b6b0b..8c2d94159 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -72,7 +72,6 @@ def _hbm_buffer_all_gather_matmul_kernel( acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 zero = tl.program_id(0) * 0 - if TRACE: tl.store(trace_start_ptr + pid, read_realtime()) tl.store(trace_xcd_ptr + pid, get_xcc_id()) @@ -144,7 +143,7 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - #tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") + # tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") tl.store(flags_ptr + flag_idx, 1, cache_modifier=".wt") if TRACE: From 11d90019dc440b04ae78b7dcbe3623c40f9896c7 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Tue, 3 Mar 2026 11:59:24 -0500 Subject: [PATCH 20/60] Add predictive params, fix pointer overflows, fix race conditions --- .../all_gather_matmul/benchmark_hbm_buffer.py | 90 ++- .../ops/all_gather_matmul/derive_params.py | 683 ++++++++++++++++++ iris/ops/all_gather_matmul_hbm_buffer.py | 8 +- 3 files changed, 766 insertions(+), 15 deletions(-) create mode 100644 benchmark/ops/all_gather_matmul/derive_params.py diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 666a37a79..7978c0682 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -33,6 +33,35 @@ ) from iris.ops import FusedConfig +_DERIVE_AVAILABLE = False +try: + import sys as _sys + _script_dir = os.path.dirname(os.path.abspath(__file__)) + if _script_dir not in _sys.path: + _sys.path.insert(0, _script_dir) + from derive_params import ( + derive as _derive_params, + DEFAULT_NUM_CUS, + DEFAULT_PEAK_TFLOPS_FP16, + DEFAULT_HBM_BW_GBPS, + DEFAULT_L2_SIZE_BYTES, + DEFAULT_SCHEDULING_FACTOR, + ) + _DERIVE_AVAILABLE = True +except Exception: + pass + +_MODEL_PARAMS = ( + "block_size_m", "block_size_n", "block_size_k", "group_size_m", + "num_fetch_sms", "k_per_flag", "num_warps", + "num_fetch_stages", "first_stage_fetch_sms", +) + +_FALLBACK_DEFAULTS = { + "block_size_m": 256, "block_size_n": 64, "block_size_k": 64, + "group_size_m": 1, "k_per_flag": 1, "num_fetch_stages": 1, +} + torch.manual_seed(123) random.seed(123) @@ -230,23 +259,23 @@ def parse_args(): action="store_true", help="Also benchmark PyTorch (all_gather_into_tensor + matmul)", ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size M") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size N") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size K") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size M") + parser.add_argument("--block_size_m", type=int, default=None, help="Block size M (model-derived if omitted)") + parser.add_argument("--block_size_n", type=int, default=None, help="Block size N (model-derived if omitted)") + parser.add_argument("--block_size_k", type=int, default=None, help="Block size K (model-derived if omitted)") + parser.add_argument("--group_size_m", type=int, default=None, help="Group size M (model-derived if omitted)") parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto if None)") parser.add_argument("--b_col_major", action="store_true", help="B col-major (K-contiguous)") parser.add_argument("--a_col_major", action="store_true", help="A col-major (M-contiguous)") parser.add_argument("--single-run", action="store_true", help="1 iteration (for profiling)") parser.add_argument("--num_fetch_sms", type=int, default=None, help="Fetcher SMs (auto if None)") - parser.add_argument("--k_per_flag", type=int, default=1, help="K-blocks per ready flag") + parser.add_argument("--k_per_flag", type=int, default=None, help="K-blocks per ready flag (model-derived if omitted)") parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") parser.add_argument( "--num_fetch_stages", type=int, - default=1, - help="Number of fetch stages (1=all at once, 2=top/bottom half, etc.)", + default=None, + help="Number of fetch stages (model-derived if omitted)", ) parser.add_argument( "--first_stage_fetch_sms", @@ -254,11 +283,43 @@ def parse_args(): default=None, help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)", ) - parser.add_argument("--trace", action="store_true", help="Collect per-workgroup trace and save Gantt chart PNG") - parser.add_argument("--trace_output", type=str, default="trace_gantt.png", help="Output path for trace plot") + parser.add_argument("--trace", action=argparse.BooleanOptionalAction, default=True, help="Collect per-workgroup trace and save Gantt chart PNG") + parser.add_argument("--trace_output", type=str, default="trace.png", help="Output path for trace plot") return vars(parser.parse_args()) +def _apply_model_defaults(args, world_size, dtype_bytes=2): + """Fill None-valued kernel parameters with model-derived predictions. + + Returns a list of parameter names that were set by the model. + """ + applied = [] + if _DERIVE_AVAILABLE: + try: + p = _derive_params( + args["m"], args["n"], args["k"], world_size, + link_bw=50.0, + num_cus=DEFAULT_NUM_CUS, + peak_tflops=DEFAULT_PEAK_TFLOPS_FP16, + hbm_bw_gbps=DEFAULT_HBM_BW_GBPS, + l2_size=DEFAULT_L2_SIZE_BYTES, + scheduling_factor=DEFAULT_SCHEDULING_FACTOR, + dtype_bytes=dtype_bytes, + ) + for name in _MODEL_PARAMS: + if args.get(name) is None and name in p: + args[name] = p[name] + applied.append(name) + except Exception: + pass + + for name, fallback in _FALLBACK_DEFAULTS.items(): + if args.get(name) is None: + args[name] = fallback + + return applied + + def _worker(args): """Worker function for torchrun.""" local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) @@ -294,6 +355,14 @@ def _worker(args): datatype_map = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16} datatype = datatype_map.get(args["datatype"], torch.float16) + dtype_bytes = torch.tensor([], dtype=datatype).element_size() + + model_applied = _apply_model_defaults(args, world_size, dtype_bytes) + if rank == 0 and model_applied: + shmem.info(f"Model-derived defaults: {', '.join(model_applied)}") + if rank == 0: + param_summary = " ".join(f"{k}={args[k]}" for k in _MODEL_PARAMS) + shmem.info(f"Kernel params: {param_summary}") M = args["m"] N = args["n"] @@ -410,7 +479,8 @@ def run_experiment(): shmem.barrier() atol = 1e-1 if datatype == torch.float16 else 1e-3 - success = torch.allclose(C, expected_tensor, atol=atol) + rtol = 1e-2 if datatype == torch.float16 else 1e-5 + success = torch.allclose(C, expected_tensor, atol=atol, rtol=rtol) if not success: max_diff = torch.abs(C - expected_tensor).max().item() shmem.error(f"Rank {rank}: Validation FAILED, max diff: {max_diff}") diff --git a/benchmark/ops/all_gather_matmul/derive_params.py b/benchmark/ops/all_gather_matmul/derive_params.py new file mode 100644 index 000000000..539b298a5 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/derive_params.py @@ -0,0 +1,683 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Parameter derivation for the HBM-buffered all_gather_matmul kernel. + +Given a problem size (M, N, K), world size, and per-link XGMI bandwidth, +derives kernel parameters that balance communication and computation in +the device-level pipeline. + +The kernel fuses all-gather with GEMM using two workgroup roles: + - Fetcher WGs: gather remote A tiles into an HBM staging buffer, + setting per-tile ready flags as data arrives. + - GEMM WGs: poll flags, then compute C += A_staged @ B tile-by-tile. + +The M dimension is split into `num_fetch_stages` pipeline stages. Each +stage's fetchers and GEMM WGs are interleaved in the launch grid so that +stage N+1's fetch overlaps with stage N's compute. + +Pipeline timeline (S stages): + |-- fetch stage 0 --|-- max(fetch, compute) * (S-1) --|-- compute last --| + +Usage: + python derive_params.py -m 131072 -n 2048 -k 16384 + python derive_params.py -m 196608 -n 2304 -k 16384 --link_bw 50 + python derive_params.py -m 196608 -n 2304 -k 16384 -v -b --trace + +When --link_bw is omitted the script automatically profiles the XGMI +link bandwidth by timing GPU-to-GPU copies across all peer pairs visible +from GPU 0. +""" + +import argparse +import math +import time + +# ── MI300X hardware defaults ────────────────────────────────────────────── +DEFAULT_NUM_CUS = 304 +DEFAULT_PEAK_TFLOPS_FP16 = 1300.0 +DEFAULT_HBM_BW_GBPS = 5300.0 +DEFAULT_L2_SIZE_BYTES = 256 * 1024 * 1024 +DEFAULT_NUM_XCDS = 8 +DEFAULT_WORLD_SIZE = 8 + +# Calibrated from MI300X trace data: the ratio of measured wall time to +# the CU-work-queue lower bound. Captures WG dispatch overhead, +# cross-XCD coherence latency, and pipeline bubble effects. +DEFAULT_SCHEDULING_FACTOR = 4.5 + + +def profile_link_bandwidth(world_size=DEFAULT_WORLD_SIZE): + """Measure per-link unidirectional XGMI bandwidth. + + Copies a 256 MB fp16 tensor from GPU 0 to every other visible GPU, + times the transfers with host-side timing after explicit device syncs, + and returns the conservative (min) per-link bandwidth. + """ + import torch + + n_gpus = torch.cuda.device_count() + if n_gpus < 2: + raise RuntimeError( + f"Need >= 2 visible GPUs for bandwidth profiling, found {n_gpus}. " + f"Pass --link_bw explicitly instead." + ) + + n_peers = min(world_size, n_gpus) - 1 + size_bytes = 256 * 1024 * 1024 + numel = size_bytes // 2 + warmup_iters = 10 + timed_iters = 40 + + print(f"\n── Link Bandwidth Profiling {'─' * 43}") + print(f" GPUs visible: {n_gpus}") + print(f" Testing: GPU 0 → GPUs 1..{n_peers}") + print(f" Transfer size: {size_bytes // (1024**2)} MB × {timed_iters} iterations\n") + + src = torch.empty(numel, dtype=torch.float16, device="cuda:0").normal_() + bandwidths = [] + + for peer in range(1, n_peers + 1): + dst = torch.empty(numel, dtype=torch.float16, device=f"cuda:{peer}") + + for _ in range(warmup_iters): + dst.copy_(src) + torch.cuda.synchronize(0) + torch.cuda.synchronize(peer) + + t_start = time.perf_counter() + for _ in range(timed_iters): + dst.copy_(src) + torch.cuda.synchronize(peer) + elapsed_s = time.perf_counter() - t_start + + bw = size_bytes * timed_iters / elapsed_s / 1e9 + bandwidths.append(bw) + print(f" GPU 0 → GPU {peer}: {bw:6.1f} GB/s") + + del dst + + del src + torch.cuda.empty_cache() + + bw_min = min(bandwidths) + bw_max = max(bandwidths) + bw_avg = sum(bandwidths) / len(bandwidths) + print(f"\n min = {bw_min:.1f} avg = {bw_avg:.1f} max = {bw_max:.1f} GB/s") + print(f" Using conservative (min): {bw_min:.1f} GB/s per link") + + return bw_min + + +# ── Tile / block size heuristics ────────────────────────────────────────── + +def _choose_block_sizes(M, N, K, K_local): + """Heuristic tile-size selection for MI300X MFMA.""" + bk = 64 + + bm = 256 if M >= 8192 else 128 + while M % bm != 0 and bm > 64: + bm //= 2 + + if N >= 512: + bn = 256 + elif N >= 256: + bn = 256 if N % 256 == 0 else 128 + else: + bn = 128 + while N % bn != 0 and bn > 32: + bn //= 2 + + while K % bk != 0 and bk > 16: + bk //= 2 + while K_local % bk != 0 and bk > 16: + bk //= 2 + + nw = 8 if bm * bn >= 256 * 256 else 4 + return bm, bn, bk, nw + + +def _choose_k_per_flag(num_k_blocks, num_k_blocks_local, target_groups=8): + """Pick k_per_flag so that flag groups align to rank boundaries when + possible, falling back to the largest divisor near the target.""" + if num_k_blocks % num_k_blocks_local == 0: + candidate = num_k_blocks_local + groups = num_k_blocks // candidate + if groups >= 4: + return candidate + + kpf = max(1, num_k_blocks // target_groups) + while num_k_blocks % kpf != 0 and kpf > 1: + kpf -= 1 + return kpf + + +# ── Per-tile roofline model ────────────────────────────────────────────── + +def _tile_roofline(bm, bn, bk, M, K, N, dtype_bytes, + peak_tflops, hbm_bw_gbps, l2_size): + """Compute achievable per-CU TFLOPS from tile arithmetic intensity. + + staged_a is always >> L2, so A tiles come from HBM. B may fit in L2 + only when staged_a is small enough that reads don't evict B. + Returns (roofline_tflops, tile_intensity, ridge_point, b_in_l2). + """ + tile_flops = 2 * bm * bn * bk + a_bytes = bm * bk * dtype_bytes + b_bytes = bk * bn * dtype_bytes + + b_total = K * N * dtype_bytes + staged_a_total = M * K * dtype_bytes + # When staged_a exceeds L2, streaming GEMM reads evict B regardless + # of B's absolute size. + b_in_l2 = (staged_a_total <= l2_size) and (b_total <= l2_size) + + hbm_bytes = a_bytes + (0 if b_in_l2 else b_bytes) + intensity = tile_flops / max(hbm_bytes, 1) + + ridge = peak_tflops * 1e3 / hbm_bw_gbps + if intensity >= ridge: + roofline = peak_tflops + else: + roofline = hbm_bw_gbps * intensity / 1e3 + + return roofline, intensity, ridge, b_in_l2 + + +# ── Per-WG execution time models ──────────────────────────────────────── + +def _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups, + roofline_tflops, num_cus): + """Estimate per-WG GEMM execution time in microseconds. + + Uses the per-tile roofline to get the per-CU throughput, then applies + a calibrated overhead for memory-latency hiding and instruction + scheduling at single-WG occupancy (large tiles). + """ + total_flops = 2 * bm * bn * K + per_cu_tflops = roofline_tflops / num_cus + + # Roofline-ideal per-WG time + ideal_us = total_flops / (per_cu_tflops * 1e6) + + # Single-occupancy overhead: imperfect latency hiding, instruction + # scheduling gaps, cross-XCD coherence on staged_a reads. + # Calibrated from MI300X traces: actual/ideal ≈ 1.2-1.3. + occupancy_factor = 1.25 if bm * bn >= 256 * 256 else 1.10 + + # Flag polling: acquire-semantics atomic per flag group + flag_us = num_flag_groups * 2.5 + + return ideal_us * occupancy_factor + flag_us + + +def _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, + dtype_bytes, num_fgs_per_wg): + """Estimate per-fetcher-WG execution time in microseconds. + + Each flag group fetches kpf K-blocks (each BM × BK) from one rank. + Remote data traverses XGMI; local data uses HBM. + """ + bytes_per_fg = bm * kpf * bk * dtype_bytes + remote_frac = (world_size - 1) / world_size + + # XGMI gather: raw transfer + iris.x.gather software overhead + remote_bytes = bytes_per_fg * remote_frac + gather_overhead = 1.5 + xgmi_us = remote_bytes / (link_bw * 1e3) * gather_overhead + + # HBM write to staged_a (.cg → L2/HBM, per-WG share of bandwidth) + write_bw = 15.0 # GB/s effective per fetcher WG (calibrated from traces) + write_us = bytes_per_fg / (write_bw * 1e3) + + # Read and write overlap within each tile; dominant cost + flag-store + per_fg_us = max(xgmi_us, write_us) + 5.0 + + return num_fgs_per_wg * per_fg_us + + +# ── Kernel time estimation ─────────────────────────────────────────────── + +def _estimate_kernel_time(total_gemm_wgs, gemm_wg_us, + total_fetch_wgs, fetch_wg_us, + num_cus, scheduling_factor): + """Estimate kernel wall-clock time from the CU work queue model. + + total_CU_work / num_CUs gives the ideal (work-conserving) lower + bound. The scheduling_factor captures GPU dispatch overhead, + cross-XCD coherence, and pipeline bubble effects measured on MI300X. + """ + total_cu_work_us = (total_gemm_wgs * gemm_wg_us + + total_fetch_wgs * fetch_wg_us) + + ideal_ms = total_cu_work_us / num_cus / 1e3 + estimated_ms = ideal_ms * scheduling_factor + return estimated_ms, ideal_ms + + +# ── Pipeline stage selection ───────────────────────────────────────────── + +def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, + comm_time_ms, compute_time_ms, num_cus): + """Choose num_fetch_stages for good pipeline efficiency while keeping + m_per_stage divisible by group_size_m.""" + ratio = comm_time_ms / compute_time_ms if compute_time_ms > 0 else 999 + + if ratio > 1.5: + ideal_stages = 32 + elif ratio > 0.8: + ideal_stages = 16 + elif ratio > 0.3: + ideal_stages = 8 + else: + ideal_stages = 4 + + min_gemm_tiles = max(num_cus // 4, 32) + min_m_per_stage = max(group_size_m, + math.ceil(min_gemm_tiles / max(num_tiles_n, 1))) + max_stages = max(1, num_m_tiles // min_m_per_stage) + num_stages = min(ideal_stages, max_stages) + num_stages = max(1, num_stages) + + m_per_stage = math.ceil(num_m_tiles / num_stages) + if m_per_stage % group_size_m != 0: + m_per_stage = ((m_per_stage + group_size_m - 1) + // group_size_m) * group_size_m + num_stages = max(1, math.ceil(num_m_tiles / m_per_stage)) + + m_per_stage = math.ceil(num_m_tiles / num_stages) + return num_stages, m_per_stage + + +# ── num_fetch_sms optimisation ─────────────────────────────────────────── + +def _choose_num_fetch_sms(m_per_stage, group_size_m, num_flag_groups_k, + link_bw, world_size, num_cus, + bm, bk, kpf, dtype_bytes, + gemm_wg_us, gemm_tiles_per_stage): + """Choose num_fetch_sms for good pipeline overlap. + + Balances three constraints: + 1. Flag delivery parallelism: ≥ m_per_stage so every M-tile gets + a fetcher early (good for reducing GEMM flag-poll stalls). + 2. Link saturation: enough concurrent fetchers to use the XGMI + aggregate bandwidth. + 3. CU budget: leave enough CUs for GEMM in the first dispatch wave. + + Returns (num_fetch_sms, per-WG timing info dict). + """ + total_fg_per_stage = num_flag_groups_k * m_per_stage + + # Constraint 1: one fetcher per M-group for broad flag delivery + parallel_min = max(1, m_per_stage // group_size_m) + + # Constraint 2: enough fetchers to keep XGMI links busy + per_fg_bytes = bm * kpf * bk * dtype_bytes + per_fg_remote = per_fg_bytes * (world_size - 1) / world_size + per_fg_xgmi_us = per_fg_remote / (link_bw * 1e3) * 1.5 + per_fg_write_us = per_fg_bytes / (15.0 * 1e3) + per_fg_us = max(per_fg_xgmi_us, per_fg_write_us) + 5.0 + + # Total flag groups per stage should finish within the stage GEMM time + gemm_waves = math.ceil(gemm_tiles_per_stage / num_cus) + stage_gemm_us = gemm_waves * gemm_wg_us + if per_fg_us > 0: + balance_min = max(1, math.ceil( + total_fg_per_stage * per_fg_us / stage_gemm_us)) + else: + balance_min = 1 + + nf = max(parallel_min, balance_min, 64) + nf = min(nf, num_cus // 2) + nf = max(1, nf) + + return nf + + +# ── Main derivation ────────────────────────────────────────────────────── + +def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, + hbm_bw_gbps, l2_size, scheduling_factor, dtype_bytes): + K_local = K // world_size + + # 1. Tile sizes + bm, bn, bk, nw = _choose_block_sizes(M, N, K, K_local) + gm = 4 + num_m_tiles = M // bm + num_tiles_n = math.ceil(N / bn) + num_k_blocks = K // bk + num_k_blocks_local = K_local // bk + + # 2. Per-tile roofline + roofline_tflops, intensity, ridge, b_in_l2 = _tile_roofline( + bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size) + + # 3. Communication model (link-limited) + total_remote_bytes = M * K_local * (world_size - 1) * dtype_bytes + total_link_bw = link_bw * (world_size - 1) + comm_time_ms = total_remote_bytes / (total_link_bw * 1e9) * 1e3 + + # 4. Compute model (roofline-limited) + total_flops = 2 * M * N * K + compute_time_ms = total_flops / (roofline_tflops * 1e12) * 1e3 + + ratio = comm_time_ms / compute_time_ms if compute_time_ms > 0 else 999 + + # 5. k_per_flag + kpf = _choose_k_per_flag(num_k_blocks, num_k_blocks_local) + num_flag_groups_k = num_k_blocks // kpf + + # 6. Pipeline stages + num_stages, m_per_stage = _choose_fetch_stages( + num_m_tiles, num_tiles_n, gm, comm_time_ms, compute_time_ms, num_cus) + gemm_tiles_per_stage = m_per_stage * num_tiles_n + + # 7. first_stage_fetch_sms: use all CUs to fill the pipeline ASAP + fsf = num_cus + + # 8. Per-WG timing + gemm_wg_us_val = _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups_k, + roofline_tflops, num_cus) + + # 9. Choose num_fetch_sms + nf = _choose_num_fetch_sms( + m_per_stage, gm, num_flag_groups_k, + link_bw, world_size, num_cus, + bm, bk, kpf, dtype_bytes, + gemm_wg_us_val, gemm_tiles_per_stage) + + # 10. Compute per-WG fetch times + total_fg_per_stage = num_flag_groups_k * m_per_stage + fgs_per_wg_stg0 = max(1, math.ceil(total_fg_per_stage / fsf)) + fgs_per_wg_rest = max(1, math.ceil(total_fg_per_stage / nf)) + fetch_us_stg0 = _fetch_wg_time_us(bm, bk, kpf, world_size, + link_bw, dtype_bytes, fgs_per_wg_stg0) + fetch_us_rest = _fetch_wg_time_us(bm, bk, kpf, world_size, + link_bw, dtype_bytes, fgs_per_wg_rest) + + # 11. Grid geometry + first_stage_size = fsf + gemm_tiles_per_stage + rest_stage_size = nf + gemm_tiles_per_stage + grid_size = first_stage_size + rest_stage_size * max(0, num_stages - 1) + total_fetch_wgs = fsf + nf * max(0, num_stages - 1) + total_gemm_wgs = gemm_tiles_per_stage * num_stages + + # 12. Kernel time estimate (CU-work model) + avg_fetch_us = (fsf * fetch_us_stg0 + nf * max(0, num_stages - 1) * fetch_us_rest) + avg_fetch_us /= max(total_fetch_wgs, 1) + est_kernel_ms, est_ideal_ms = _estimate_kernel_time( + total_gemm_wgs, gemm_wg_us_val, + total_fetch_wgs, avg_fetch_us, + num_cus, scheduling_factor) + + # 13. Link-limited pipeline estimate (simple model for comparison) + stage_m = m_per_stage * bm + stage_comm_ms = (stage_m * K_local * (world_size - 1) * dtype_bytes + / (total_link_bw * 1e9) * 1e3) + stage_compute_ms = (2 * stage_m * N * K + / (roofline_tflops * 1e12) * 1e3) + startup_ms = stage_comm_ms + steady_ms = max(stage_comm_ms, stage_compute_ms) * max(0, num_stages - 1) + drain_ms = stage_compute_ms + pipeline_ms = startup_ms + steady_ms + drain_ms + sequential_ms = comm_time_ms + compute_time_ms + + # 14. Standalone GEMM estimate (rocBLAS-class efficiency for comparison) + standalone_gemm_eff = 0.30 + standalone_tflops = roofline_tflops * standalone_gemm_eff + standalone_gemm_ms = total_flops / (standalone_tflops * 1e12) * 1e3 + pytorch_est_ms = comm_time_ms + standalone_gemm_ms + + staged_a_gb = M * K * dtype_bytes / (1024**3) + + return dict( + block_size_m=bm, block_size_n=bn, block_size_k=bk, + group_size_m=gm, num_warps=nw, + num_fetch_sms=nf, k_per_flag=kpf, + num_fetch_stages=num_stages, first_stage_fetch_sms=fsf, + # derived + K_local=K_local, num_m_tiles=num_m_tiles, num_tiles_n=num_tiles_n, + num_k_blocks=num_k_blocks, num_flag_groups_k=num_flag_groups_k, + m_per_stage=m_per_stage, gemm_tiles_per_stage=gemm_tiles_per_stage, + grid_size=grid_size, total_fetch_wgs=total_fetch_wgs, + total_gemm_wgs=total_gemm_wgs, + # roofline + roofline_tflops=roofline_tflops, tile_intensity=intensity, + ridge_point=ridge, b_in_l2=b_in_l2, + # per-WG timing + gemm_wg_us=gemm_wg_us_val, + fetch_wg_us_stg0=fetch_us_stg0, + fetch_wg_us_rest=fetch_us_rest, + # estimates + total_remote_bytes=total_remote_bytes, total_link_bw=total_link_bw, + comm_time_ms=comm_time_ms, total_flops=total_flops, + compute_time_ms=compute_time_ms, ratio=ratio, + stage_comm_ms=stage_comm_ms, stage_compute_ms=stage_compute_ms, + pipeline_ms=pipeline_ms, sequential_ms=sequential_ms, + est_kernel_ms=est_kernel_ms, + est_ideal_ms=est_ideal_ms, + standalone_gemm_ms=standalone_gemm_ms, + pytorch_est_ms=pytorch_est_ms, + staged_a_gb=staged_a_gb, + scheduling_factor=scheduling_factor, + ) + + +# ── Formatting helpers ─────────────────────────────────────────────────── + +def _fmt_bytes(n): + if n >= 1024**3: + return f"{n / 1024**3:.2f} GB" + if n >= 1024**2: + return f"{n / 1024**2:.1f} MB" + return f"{n / 1024:.1f} KB" + + +def _fmt_flops(n): + if n >= 1e15: + return f"{n / 1e15:.2f} PFLOPs" + return f"{n / 1e12:.2f} TFLOPs" + + +def _fmt_tflops(t): + return f"{t:.0f} TFLOPS" + + +# ── Analysis output ────────────────────────────────────────────────────── + +def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, + bw_profiled=False): + K_local = p["K_local"] + dtype_bytes = 2 + bound = "COMM-BOUND" if p["ratio"] > 1.0 else "COMPUTE-BOUND" + + print("=" * 72) + print(" All-Gather Matmul HBM-Buffer — Parameter Derivation") + print("=" * 72) + + # ── Problem ─────────────────────────────────────────────────────── + print(f"\n{'Problem':>14}: C({M}, {N}) = all_gather(A_shard({M}, {K_local})) @ B({K}, {N})") + print(f"{'World size':>14}: {world_size} GPUs") + print(f"{'Dtype':>14}: fp16 ({dtype_bytes}B)") + + # ── Data sizes ──────────────────────────────────────────────────── + a_shard = M * K_local * dtype_bytes + b_size = K * N * dtype_bytes + c_size = M * N * dtype_bytes + staged = M * K * dtype_bytes + print(f"\n{'A_shard':>14}: ({M}, {K_local}) {_fmt_bytes(a_shard)}") + print(f"{'B':>14}: ({K}, {N}) {_fmt_bytes(b_size)}") + print(f"{'C':>14}: ({M}, {N}) {_fmt_bytes(c_size)}") + print(f"{'staged_a':>14}: ({M}, {K}) {_fmt_bytes(staged)}") + if staged > 4 * 1024**3: + print(f"{'':>14} *** > 4 GB: requires int64 pointer arithmetic ***") + + # ── Per-tile roofline ───────────────────────────────────────────── + print(f"\n── Roofline {'─' * 59}") + print(f"{'Tile':>14}: ({p['block_size_m']}, {p['block_size_n']}, {p['block_size_k']})") + print(f"{'Intensity':>14}: {p['tile_intensity']:.0f} FLOPs/byte " + f"{'(B in L2)' if p['b_in_l2'] else '(B from HBM)'}") + print(f"{'Ridge point':>14}: {p['ridge_point']:.0f} FLOPs/byte") + region = "COMPUTE" if p["tile_intensity"] >= p["ridge_point"] else "MEMORY" + print(f"{'Roofline':>14}: {_fmt_tflops(p['roofline_tflops'])} ({region}-bound tiles)") + + # ── Communication ───────────────────────────────────────────────── + print(f"\n── Communication {'─' * 54}") + print(f"{'Remote bytes':>14}: {_fmt_bytes(p['total_remote_bytes'])} " + f"(from {world_size - 1} peers)") + bw_src = "profiled" if bw_profiled else "user" + print(f"{'Link BW':>14}: {link_bw:.1f} GB/s/link × {world_size - 1} links " + f"= {p['total_link_bw']:.0f} GB/s aggregate ({bw_src})") + print(f"{'Comm time':>14}: {p['comm_time_ms']:.3f} ms (link-limited)") + + # ── Compute ─────────────────────────────────────────────────────── + print(f"\n── Compute {'─' * 60}") + print(f"{'Total FLOPs':>14}: {_fmt_flops(p['total_flops'])}") + print(f"{'Roofline time':>14}: {p['compute_time_ms']:.3f} ms " + f"(at {_fmt_tflops(p['roofline_tflops'])})") + print(f"{'Comm/Compute':>14}: {p['ratio']:.2f}x → {bound}") + + # ── Per-WG timing ───────────────────────────────────────────────── + print(f"\n── Per-WG Model {'─' * 55}") + print(f"{'GEMM WG':>14}: {p['gemm_wg_us']:.0f} us " + f"({p['total_flops'] / p['total_gemm_wgs'] / 1e9:.2f} GFLOPs/WG)") + print(f"{'Fetch WG stg0':>14}: {p['fetch_wg_us_stg0']:.0f} us") + if p["num_fetch_stages"] > 1: + print(f"{'Fetch WG rest':>14}: {p['fetch_wg_us_rest']:.0f} us") + + # ── Pipeline ────────────────────────────────────────────────────── + S = p["num_fetch_stages"] + print(f"\n── Pipeline {'─' * 59}") + print(f"{'Stages (S)':>14}: {S}") + print(f"{'M tiles/stage':>14}: {p['m_per_stage']} ({p['m_per_stage'] * p['block_size_m']} rows)") + print(f"{'GEMM WGs/stg':>14}: {p['gemm_tiles_per_stage']} " + f"({p['m_per_stage']} m-tiles × {p['num_tiles_n']} n-tiles)") + print(f"{'K flag groups':>14}: {p['num_flag_groups_k']} " + f"(k_per_flag={p['k_per_flag']})") + print(f"{'Stage comm':>14}: {p['stage_comm_ms']:.3f} ms") + print(f"{'Stage compute':>14}: {p['stage_compute_ms']:.3f} ms") + + # ── Grid ────────────────────────────────────────────────────────── + print(f"\n── Grid Layout {'─' * 56}") + print(f"{'Stage 0':>14}: {p['first_stage_fetch_sms']} fetchers + " + f"{p['gemm_tiles_per_stage']} GEMM = " + f"{p['first_stage_fetch_sms'] + p['gemm_tiles_per_stage']} WGs") + if S > 1: + print(f"{'Stages 1..{}'.format(S - 1):>14}: {p['num_fetch_sms']} fetchers + " + f"{p['gemm_tiles_per_stage']} GEMM = " + f"{p['num_fetch_sms'] + p['gemm_tiles_per_stage']} WGs (×{S - 1})") + print(f"{'Total grid':>14}: {p['grid_size']} WGs " + f"({p['total_fetch_wgs']} fetch + {p['total_gemm_wgs']} GEMM)") + + # ── Time estimates ──────────────────────────────────────────────── + print(f"\n── Time Estimates {'─' * 53}") + print(f"{'CU-work lower':>14}: {p['est_ideal_ms']:.1f} ms " + f"(total WG time / {DEFAULT_NUM_CUS} CUs)") + print(f"{'Fused kernel':>14}: {p['est_kernel_ms']:.1f} ms " + f"(×{p['scheduling_factor']:.1f} scheduling overhead)") + est_tflops = p["total_flops"] / (p["est_kernel_ms"] * 1e-3) / 1e12 + print(f"{'Est. TFLOPS':>14}: {est_tflops:.0f} TFLOPS " + f"({est_tflops / p['roofline_tflops'] * 100:.0f}% of roofline)") + print(f"{'':>14}") + print(f"{'PyTorch est.':>14}: {p['pytorch_est_ms']:.1f} ms " + f"(all_gather {p['comm_time_ms']:.1f} + matmul {p['standalone_gemm_ms']:.1f})") + if p["est_kernel_ms"] < p["pytorch_est_ms"]: + speedup = p["pytorch_est_ms"] / p["est_kernel_ms"] + print(f"{'Fused speedup':>14}: {speedup:.2f}x over sequential PyTorch") + else: + slowdown = p["est_kernel_ms"] / p["pytorch_est_ms"] + print(f"{'Fused speedup':>14}: {1/slowdown:.2f}x (slower than sequential by {slowdown:.2f}x)") + + # ── Recommended parameters ──────────────────────────────────────── + print(f"\n── Recommended Kernel Parameters {'─' * 38}") + params = [ + ("block_size_m", p["block_size_m"]), + ("block_size_n", p["block_size_n"]), + ("block_size_k", p["block_size_k"]), + ("group_size_m", p["group_size_m"]), + ("num_fetch_sms", p["num_fetch_sms"]), + ("k_per_flag", p["k_per_flag"]), + ("num_warps", p["num_warps"]), + ("num_fetch_stages", p["num_fetch_stages"]), + ("first_stage_fetch_sms", p["first_stage_fetch_sms"]), + ] + for name, val in params: + print(f" --{name:30s} {val}") + + # ── Command line ────────────────────────────────────────────────── + extra = " ".join(passthrough_args) + if extra: + extra = " " + extra + cmd = ( + f"HSA_NO_SCRATCH_RECLAIM=1 torchrun --nproc_per_node {world_size} " + f"benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py " + f"-m {M} -n {N} -k {K} " + f"--block_size_m {p['block_size_m']} " + f"--block_size_n {p['block_size_n']} " + f"--block_size_k {p['block_size_k']} " + f"--group_size_m {p['group_size_m']} " + f"--num_fetch_sms {p['num_fetch_sms']} " + f"--k_per_flag {p['k_per_flag']} " + f"--num_warps {p['num_warps']} " + f"--num_fetch_stages {p['num_fetch_stages']} " + f"--first_stage_fetch_sms {p['first_stage_fetch_sms']}" + f"{extra}" + ) + print(f"\n── Command {'─' * 60}") + print(f" {cmd}") + print() + + +def main(): + parser = argparse.ArgumentParser( + description="Derive parameters for HBM-buffered all_gather_matmul.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("-m", type=int, required=True, help="M dimension (rows of output)") + parser.add_argument("-n", type=int, required=True, help="N dimension (cols of output)") + parser.add_argument("-k", type=int, required=True, help="K dimension (total reduction dim)") + parser.add_argument("--world_size", type=int, default=DEFAULT_WORLD_SIZE, + help="Number of GPUs") + parser.add_argument("--link_bw", type=float, default=None, + help="Per-link XGMI bandwidth in GB/s (one direction). " + "Omit to auto-profile via GPU-to-GPU copies.") + parser.add_argument("--num_cus", type=int, default=DEFAULT_NUM_CUS, + help="Number of compute units") + parser.add_argument("--peak_tflops", type=float, default=DEFAULT_PEAK_TFLOPS_FP16, + help="Peak fp16 TFLOPS") + parser.add_argument("--hbm_bw", type=float, default=DEFAULT_HBM_BW_GBPS, + help="HBM bandwidth in GB/s") + parser.add_argument("--scheduling_factor", type=float, + default=DEFAULT_SCHEDULING_FACTOR, + help="CU scheduling overhead factor (calibrated from traces)") + + args, passthrough = parser.parse_known_args() + + if args.k % args.world_size != 0: + parser.error(f"K ({args.k}) must be divisible by world_size ({args.world_size})") + + link_bw = args.link_bw + bw_profiled = False + if link_bw is None: + try: + link_bw = profile_link_bandwidth(args.world_size) + bw_profiled = True + except Exception as e: + print(f"\n Auto-profiling failed: {e}") + print(" Falling back to --link_bw 50 (MI300X default)\n") + link_bw = 50.0 + + p = derive(args.m, args.n, args.k, args.world_size, link_bw, + args.num_cus, args.peak_tflops, args.hbm_bw, + DEFAULT_L2_SIZE_BYTES, args.scheduling_factor, + dtype_bytes=2) + + print_analysis(args.m, args.n, args.k, args.world_size, link_bw, + p, passthrough, bw_profiled=bw_profiled) + + +if __name__ == "__main__": + main() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 8c2d94159..36010e24f 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -135,7 +135,7 @@ def _hbm_buffer_all_gather_matmul_kernel( k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) - staged_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k + staged_ptrs = staged_a + rm.to(tl.int64)[:, None] * stride_sa_m + rk[None, :] * stride_sa_k for compile_rank in range(world_size): if src_rank_idx == compile_rank: @@ -143,8 +143,7 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group - # tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") - tl.store(flags_ptr + flag_idx, 1, cache_modifier=".wt") + tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") if TRACE: tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") @@ -193,8 +192,7 @@ def _hbm_buffer_all_gather_matmul_kernel( rk = k_block * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) rk = tl.max_contiguous(tl.multiple_of(rk, BLOCK_SIZE_K), BLOCK_SIZE_K) - # Use parameterized strides for staged_a - a_ptrs = staged_a + rm[:, None] * stride_sa_m + rk[None, :] * stride_sa_k + a_ptrs = staged_a + rm.to(tl.int64)[:, None] * stride_sa_m + rk[None, :] * stride_sa_k a = tl.load(a_ptrs) B_ptrs = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn From 3c4cb4dfa02cb5be71f84c32101011165bd57015 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 3 Mar 2026 17:00:39 +0000 Subject: [PATCH 21/60] Apply Ruff auto-fixes --- .../all_gather_matmul/benchmark_hbm_buffer.py | 38 ++- .../ops/all_gather_matmul/derive_params.py | 270 ++++++++++-------- 2 files changed, 184 insertions(+), 124 deletions(-) diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py index 7978c0682..190799986 100644 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py @@ -36,6 +36,7 @@ _DERIVE_AVAILABLE = False try: import sys as _sys + _script_dir = os.path.dirname(os.path.abspath(__file__)) if _script_dir not in _sys.path: _sys.path.insert(0, _script_dir) @@ -47,19 +48,30 @@ DEFAULT_L2_SIZE_BYTES, DEFAULT_SCHEDULING_FACTOR, ) + _DERIVE_AVAILABLE = True except Exception: pass _MODEL_PARAMS = ( - "block_size_m", "block_size_n", "block_size_k", "group_size_m", - "num_fetch_sms", "k_per_flag", "num_warps", - "num_fetch_stages", "first_stage_fetch_sms", + "block_size_m", + "block_size_n", + "block_size_k", + "group_size_m", + "num_fetch_sms", + "k_per_flag", + "num_warps", + "num_fetch_stages", + "first_stage_fetch_sms", ) _FALLBACK_DEFAULTS = { - "block_size_m": 256, "block_size_n": 64, "block_size_k": 64, - "group_size_m": 1, "k_per_flag": 1, "num_fetch_stages": 1, + "block_size_m": 256, + "block_size_n": 64, + "block_size_k": 64, + "group_size_m": 1, + "k_per_flag": 1, + "num_fetch_stages": 1, } torch.manual_seed(123) @@ -268,7 +280,9 @@ def parse_args(): parser.add_argument("--a_col_major", action="store_true", help="A col-major (M-contiguous)") parser.add_argument("--single-run", action="store_true", help="1 iteration (for profiling)") parser.add_argument("--num_fetch_sms", type=int, default=None, help="Fetcher SMs (auto if None)") - parser.add_argument("--k_per_flag", type=int, default=None, help="K-blocks per ready flag (model-derived if omitted)") + parser.add_argument( + "--k_per_flag", type=int, default=None, help="K-blocks per ready flag (model-derived if omitted)" + ) parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") parser.add_argument( @@ -283,7 +297,12 @@ def parse_args(): default=None, help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)", ) - parser.add_argument("--trace", action=argparse.BooleanOptionalAction, default=True, help="Collect per-workgroup trace and save Gantt chart PNG") + parser.add_argument( + "--trace", + action=argparse.BooleanOptionalAction, + default=True, + help="Collect per-workgroup trace and save Gantt chart PNG", + ) parser.add_argument("--trace_output", type=str, default="trace.png", help="Output path for trace plot") return vars(parser.parse_args()) @@ -297,7 +316,10 @@ def _apply_model_defaults(args, world_size, dtype_bytes=2): if _DERIVE_AVAILABLE: try: p = _derive_params( - args["m"], args["n"], args["k"], world_size, + args["m"], + args["n"], + args["k"], + world_size, link_bw=50.0, num_cus=DEFAULT_NUM_CUS, peak_tflops=DEFAULT_PEAK_TFLOPS_FP16, diff --git a/benchmark/ops/all_gather_matmul/derive_params.py b/benchmark/ops/all_gather_matmul/derive_params.py index 539b298a5..cf4acd9fe 100644 --- a/benchmark/ops/all_gather_matmul/derive_params.py +++ b/benchmark/ops/all_gather_matmul/derive_params.py @@ -61,8 +61,7 @@ def profile_link_bandwidth(world_size=DEFAULT_WORLD_SIZE): n_gpus = torch.cuda.device_count() if n_gpus < 2: raise RuntimeError( - f"Need >= 2 visible GPUs for bandwidth profiling, found {n_gpus}. " - f"Pass --link_bw explicitly instead." + f"Need >= 2 visible GPUs for bandwidth profiling, found {n_gpus}. Pass --link_bw explicitly instead." ) n_peers = min(world_size, n_gpus) - 1 @@ -113,6 +112,7 @@ def profile_link_bandwidth(world_size=DEFAULT_WORLD_SIZE): # ── Tile / block size heuristics ────────────────────────────────────────── + def _choose_block_sizes(M, N, K, K_local): """Heuristic tile-size selection for MI300X MFMA.""" bk = 64 @@ -156,8 +156,8 @@ def _choose_k_per_flag(num_k_blocks, num_k_blocks_local, target_groups=8): # ── Per-tile roofline model ────────────────────────────────────────────── -def _tile_roofline(bm, bn, bk, M, K, N, dtype_bytes, - peak_tflops, hbm_bw_gbps, l2_size): + +def _tile_roofline(bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size): """Compute achievable per-CU TFLOPS from tile arithmetic intensity. staged_a is always >> L2, so A tiles come from HBM. B may fit in L2 @@ -188,8 +188,8 @@ def _tile_roofline(bm, bn, bk, M, K, N, dtype_bytes, # ── Per-WG execution time models ──────────────────────────────────────── -def _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups, - roofline_tflops, num_cus): + +def _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups, roofline_tflops, num_cus): """Estimate per-WG GEMM execution time in microseconds. Uses the per-tile roofline to get the per-CU throughput, then applies @@ -213,8 +213,7 @@ def _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups, return ideal_us * occupancy_factor + flag_us -def _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, - dtype_bytes, num_fgs_per_wg): +def _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, num_fgs_per_wg): """Estimate per-fetcher-WG execution time in microseconds. Each flag group fetches kpf K-blocks (each BM × BK) from one rank. @@ -240,17 +239,15 @@ def _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, # ── Kernel time estimation ─────────────────────────────────────────────── -def _estimate_kernel_time(total_gemm_wgs, gemm_wg_us, - total_fetch_wgs, fetch_wg_us, - num_cus, scheduling_factor): + +def _estimate_kernel_time(total_gemm_wgs, gemm_wg_us, total_fetch_wgs, fetch_wg_us, num_cus, scheduling_factor): """Estimate kernel wall-clock time from the CU work queue model. total_CU_work / num_CUs gives the ideal (work-conserving) lower bound. The scheduling_factor captures GPU dispatch overhead, cross-XCD coherence, and pipeline bubble effects measured on MI300X. """ - total_cu_work_us = (total_gemm_wgs * gemm_wg_us + - total_fetch_wgs * fetch_wg_us) + total_cu_work_us = total_gemm_wgs * gemm_wg_us + total_fetch_wgs * fetch_wg_us ideal_ms = total_cu_work_us / num_cus / 1e3 estimated_ms = ideal_ms * scheduling_factor @@ -259,8 +256,8 @@ def _estimate_kernel_time(total_gemm_wgs, gemm_wg_us, # ── Pipeline stage selection ───────────────────────────────────────────── -def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, - comm_time_ms, compute_time_ms, num_cus): + +def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, comm_time_ms, compute_time_ms, num_cus): """Choose num_fetch_stages for good pipeline efficiency while keeping m_per_stage divisible by group_size_m.""" ratio = comm_time_ms / compute_time_ms if compute_time_ms > 0 else 999 @@ -275,16 +272,14 @@ def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, ideal_stages = 4 min_gemm_tiles = max(num_cus // 4, 32) - min_m_per_stage = max(group_size_m, - math.ceil(min_gemm_tiles / max(num_tiles_n, 1))) + min_m_per_stage = max(group_size_m, math.ceil(min_gemm_tiles / max(num_tiles_n, 1))) max_stages = max(1, num_m_tiles // min_m_per_stage) num_stages = min(ideal_stages, max_stages) num_stages = max(1, num_stages) m_per_stage = math.ceil(num_m_tiles / num_stages) if m_per_stage % group_size_m != 0: - m_per_stage = ((m_per_stage + group_size_m - 1) - // group_size_m) * group_size_m + m_per_stage = ((m_per_stage + group_size_m - 1) // group_size_m) * group_size_m num_stages = max(1, math.ceil(num_m_tiles / m_per_stage)) m_per_stage = math.ceil(num_m_tiles / num_stages) @@ -293,10 +288,21 @@ def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, # ── num_fetch_sms optimisation ─────────────────────────────────────────── -def _choose_num_fetch_sms(m_per_stage, group_size_m, num_flag_groups_k, - link_bw, world_size, num_cus, - bm, bk, kpf, dtype_bytes, - gemm_wg_us, gemm_tiles_per_stage): + +def _choose_num_fetch_sms( + m_per_stage, + group_size_m, + num_flag_groups_k, + link_bw, + world_size, + num_cus, + bm, + bk, + kpf, + dtype_bytes, + gemm_wg_us, + gemm_tiles_per_stage, +): """Choose num_fetch_sms for good pipeline overlap. Balances three constraints: @@ -324,8 +330,7 @@ def _choose_num_fetch_sms(m_per_stage, group_size_m, num_flag_groups_k, gemm_waves = math.ceil(gemm_tiles_per_stage / num_cus) stage_gemm_us = gemm_waves * gemm_wg_us if per_fg_us > 0: - balance_min = max(1, math.ceil( - total_fg_per_stage * per_fg_us / stage_gemm_us)) + balance_min = max(1, math.ceil(total_fg_per_stage * per_fg_us / stage_gemm_us)) else: balance_min = 1 @@ -338,8 +343,8 @@ def _choose_num_fetch_sms(m_per_stage, group_size_m, num_flag_groups_k, # ── Main derivation ────────────────────────────────────────────────────── -def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, - hbm_bw_gbps, l2_size, scheduling_factor, dtype_bytes): + +def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, hbm_bw_gbps, l2_size, scheduling_factor, dtype_bytes): K_local = K // world_size # 1. Tile sizes @@ -352,7 +357,8 @@ def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, # 2. Per-tile roofline roofline_tflops, intensity, ridge, b_in_l2 = _tile_roofline( - bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size) + bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size + ) # 3. Communication model (link-limited) total_remote_bytes = M * K_local * (world_size - 1) * dtype_bytes @@ -370,32 +376,37 @@ def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, num_flag_groups_k = num_k_blocks // kpf # 6. Pipeline stages - num_stages, m_per_stage = _choose_fetch_stages( - num_m_tiles, num_tiles_n, gm, comm_time_ms, compute_time_ms, num_cus) + num_stages, m_per_stage = _choose_fetch_stages(num_m_tiles, num_tiles_n, gm, comm_time_ms, compute_time_ms, num_cus) gemm_tiles_per_stage = m_per_stage * num_tiles_n # 7. first_stage_fetch_sms: use all CUs to fill the pipeline ASAP fsf = num_cus # 8. Per-WG timing - gemm_wg_us_val = _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups_k, - roofline_tflops, num_cus) + gemm_wg_us_val = _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups_k, roofline_tflops, num_cus) # 9. Choose num_fetch_sms nf = _choose_num_fetch_sms( - m_per_stage, gm, num_flag_groups_k, - link_bw, world_size, num_cus, - bm, bk, kpf, dtype_bytes, - gemm_wg_us_val, gemm_tiles_per_stage) + m_per_stage, + gm, + num_flag_groups_k, + link_bw, + world_size, + num_cus, + bm, + bk, + kpf, + dtype_bytes, + gemm_wg_us_val, + gemm_tiles_per_stage, + ) # 10. Compute per-WG fetch times total_fg_per_stage = num_flag_groups_k * m_per_stage fgs_per_wg_stg0 = max(1, math.ceil(total_fg_per_stage / fsf)) fgs_per_wg_rest = max(1, math.ceil(total_fg_per_stage / nf)) - fetch_us_stg0 = _fetch_wg_time_us(bm, bk, kpf, world_size, - link_bw, dtype_bytes, fgs_per_wg_stg0) - fetch_us_rest = _fetch_wg_time_us(bm, bk, kpf, world_size, - link_bw, dtype_bytes, fgs_per_wg_rest) + fetch_us_stg0 = _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, fgs_per_wg_stg0) + fetch_us_rest = _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, fgs_per_wg_rest) # 11. Grid geometry first_stage_size = fsf + gemm_tiles_per_stage @@ -405,19 +416,16 @@ def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, total_gemm_wgs = gemm_tiles_per_stage * num_stages # 12. Kernel time estimate (CU-work model) - avg_fetch_us = (fsf * fetch_us_stg0 + nf * max(0, num_stages - 1) * fetch_us_rest) + avg_fetch_us = fsf * fetch_us_stg0 + nf * max(0, num_stages - 1) * fetch_us_rest avg_fetch_us /= max(total_fetch_wgs, 1) est_kernel_ms, est_ideal_ms = _estimate_kernel_time( - total_gemm_wgs, gemm_wg_us_val, - total_fetch_wgs, avg_fetch_us, - num_cus, scheduling_factor) + total_gemm_wgs, gemm_wg_us_val, total_fetch_wgs, avg_fetch_us, num_cus, scheduling_factor + ) # 13. Link-limited pipeline estimate (simple model for comparison) stage_m = m_per_stage * bm - stage_comm_ms = (stage_m * K_local * (world_size - 1) * dtype_bytes - / (total_link_bw * 1e9) * 1e3) - stage_compute_ms = (2 * stage_m * N * K - / (roofline_tflops * 1e12) * 1e3) + stage_comm_ms = stage_m * K_local * (world_size - 1) * dtype_bytes / (total_link_bw * 1e9) * 1e3 + stage_compute_ms = 2 * stage_m * N * K / (roofline_tflops * 1e12) * 1e3 startup_ms = stage_comm_ms steady_ms = max(stage_comm_ms, stage_compute_ms) * max(0, num_stages - 1) drain_ms = stage_compute_ms @@ -433,29 +441,46 @@ def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, staged_a_gb = M * K * dtype_bytes / (1024**3) return dict( - block_size_m=bm, block_size_n=bn, block_size_k=bk, - group_size_m=gm, num_warps=nw, - num_fetch_sms=nf, k_per_flag=kpf, - num_fetch_stages=num_stages, first_stage_fetch_sms=fsf, + block_size_m=bm, + block_size_n=bn, + block_size_k=bk, + group_size_m=gm, + num_warps=nw, + num_fetch_sms=nf, + k_per_flag=kpf, + num_fetch_stages=num_stages, + first_stage_fetch_sms=fsf, # derived - K_local=K_local, num_m_tiles=num_m_tiles, num_tiles_n=num_tiles_n, - num_k_blocks=num_k_blocks, num_flag_groups_k=num_flag_groups_k, - m_per_stage=m_per_stage, gemm_tiles_per_stage=gemm_tiles_per_stage, - grid_size=grid_size, total_fetch_wgs=total_fetch_wgs, + K_local=K_local, + num_m_tiles=num_m_tiles, + num_tiles_n=num_tiles_n, + num_k_blocks=num_k_blocks, + num_flag_groups_k=num_flag_groups_k, + m_per_stage=m_per_stage, + gemm_tiles_per_stage=gemm_tiles_per_stage, + grid_size=grid_size, + total_fetch_wgs=total_fetch_wgs, total_gemm_wgs=total_gemm_wgs, # roofline - roofline_tflops=roofline_tflops, tile_intensity=intensity, - ridge_point=ridge, b_in_l2=b_in_l2, + roofline_tflops=roofline_tflops, + tile_intensity=intensity, + ridge_point=ridge, + b_in_l2=b_in_l2, # per-WG timing gemm_wg_us=gemm_wg_us_val, fetch_wg_us_stg0=fetch_us_stg0, fetch_wg_us_rest=fetch_us_rest, # estimates - total_remote_bytes=total_remote_bytes, total_link_bw=total_link_bw, - comm_time_ms=comm_time_ms, total_flops=total_flops, - compute_time_ms=compute_time_ms, ratio=ratio, - stage_comm_ms=stage_comm_ms, stage_compute_ms=stage_compute_ms, - pipeline_ms=pipeline_ms, sequential_ms=sequential_ms, + total_remote_bytes=total_remote_bytes, + total_link_bw=total_link_bw, + comm_time_ms=comm_time_ms, + total_flops=total_flops, + compute_time_ms=compute_time_ms, + ratio=ratio, + stage_comm_ms=stage_comm_ms, + stage_compute_ms=stage_compute_ms, + pipeline_ms=pipeline_ms, + sequential_ms=sequential_ms, est_kernel_ms=est_kernel_ms, est_ideal_ms=est_ideal_ms, standalone_gemm_ms=standalone_gemm_ms, @@ -467,6 +492,7 @@ def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, # ── Formatting helpers ─────────────────────────────────────────────────── + def _fmt_bytes(n): if n >= 1024**3: return f"{n / 1024**3:.2f} GB" @@ -487,8 +513,8 @@ def _fmt_tflops(t): # ── Analysis output ────────────────────────────────────────────────────── -def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, - bw_profiled=False): + +def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, bw_profiled=False): K_local = p["K_local"] dtype_bytes = 2 bound = "COMM-BOUND" if p["ratio"] > 1.0 else "COMPUTE-BOUND" @@ -517,32 +543,30 @@ def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, # ── Per-tile roofline ───────────────────────────────────────────── print(f"\n── Roofline {'─' * 59}") print(f"{'Tile':>14}: ({p['block_size_m']}, {p['block_size_n']}, {p['block_size_k']})") - print(f"{'Intensity':>14}: {p['tile_intensity']:.0f} FLOPs/byte " - f"{'(B in L2)' if p['b_in_l2'] else '(B from HBM)'}") + print(f"{'Intensity':>14}: {p['tile_intensity']:.0f} FLOPs/byte {'(B in L2)' if p['b_in_l2'] else '(B from HBM)'}") print(f"{'Ridge point':>14}: {p['ridge_point']:.0f} FLOPs/byte") region = "COMPUTE" if p["tile_intensity"] >= p["ridge_point"] else "MEMORY" print(f"{'Roofline':>14}: {_fmt_tflops(p['roofline_tflops'])} ({region}-bound tiles)") # ── Communication ───────────────────────────────────────────────── print(f"\n── Communication {'─' * 54}") - print(f"{'Remote bytes':>14}: {_fmt_bytes(p['total_remote_bytes'])} " - f"(from {world_size - 1} peers)") + print(f"{'Remote bytes':>14}: {_fmt_bytes(p['total_remote_bytes'])} (from {world_size - 1} peers)") bw_src = "profiled" if bw_profiled else "user" - print(f"{'Link BW':>14}: {link_bw:.1f} GB/s/link × {world_size - 1} links " - f"= {p['total_link_bw']:.0f} GB/s aggregate ({bw_src})") + print( + f"{'Link BW':>14}: {link_bw:.1f} GB/s/link × {world_size - 1} links " + f"= {p['total_link_bw']:.0f} GB/s aggregate ({bw_src})" + ) print(f"{'Comm time':>14}: {p['comm_time_ms']:.3f} ms (link-limited)") # ── Compute ─────────────────────────────────────────────────────── print(f"\n── Compute {'─' * 60}") print(f"{'Total FLOPs':>14}: {_fmt_flops(p['total_flops'])}") - print(f"{'Roofline time':>14}: {p['compute_time_ms']:.3f} ms " - f"(at {_fmt_tflops(p['roofline_tflops'])})") + print(f"{'Roofline time':>14}: {p['compute_time_ms']:.3f} ms (at {_fmt_tflops(p['roofline_tflops'])})") print(f"{'Comm/Compute':>14}: {p['ratio']:.2f}x → {bound}") # ── Per-WG timing ───────────────────────────────────────────────── print(f"\n── Per-WG Model {'─' * 55}") - print(f"{'GEMM WG':>14}: {p['gemm_wg_us']:.0f} us " - f"({p['total_flops'] / p['total_gemm_wgs'] / 1e9:.2f} GFLOPs/WG)") + print(f"{'GEMM WG':>14}: {p['gemm_wg_us']:.0f} us ({p['total_flops'] / p['total_gemm_wgs'] / 1e9:.2f} GFLOPs/WG)") print(f"{'Fetch WG stg0':>14}: {p['fetch_wg_us_stg0']:.0f} us") if p["num_fetch_stages"] > 1: print(f"{'Fetch WG rest':>14}: {p['fetch_wg_us_rest']:.0f} us") @@ -552,43 +576,47 @@ def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, print(f"\n── Pipeline {'─' * 59}") print(f"{'Stages (S)':>14}: {S}") print(f"{'M tiles/stage':>14}: {p['m_per_stage']} ({p['m_per_stage'] * p['block_size_m']} rows)") - print(f"{'GEMM WGs/stg':>14}: {p['gemm_tiles_per_stage']} " - f"({p['m_per_stage']} m-tiles × {p['num_tiles_n']} n-tiles)") - print(f"{'K flag groups':>14}: {p['num_flag_groups_k']} " - f"(k_per_flag={p['k_per_flag']})") + print( + f"{'GEMM WGs/stg':>14}: {p['gemm_tiles_per_stage']} ({p['m_per_stage']} m-tiles × {p['num_tiles_n']} n-tiles)" + ) + print(f"{'K flag groups':>14}: {p['num_flag_groups_k']} (k_per_flag={p['k_per_flag']})") print(f"{'Stage comm':>14}: {p['stage_comm_ms']:.3f} ms") print(f"{'Stage compute':>14}: {p['stage_compute_ms']:.3f} ms") # ── Grid ────────────────────────────────────────────────────────── print(f"\n── Grid Layout {'─' * 56}") - print(f"{'Stage 0':>14}: {p['first_stage_fetch_sms']} fetchers + " - f"{p['gemm_tiles_per_stage']} GEMM = " - f"{p['first_stage_fetch_sms'] + p['gemm_tiles_per_stage']} WGs") + print( + f"{'Stage 0':>14}: {p['first_stage_fetch_sms']} fetchers + " + f"{p['gemm_tiles_per_stage']} GEMM = " + f"{p['first_stage_fetch_sms'] + p['gemm_tiles_per_stage']} WGs" + ) if S > 1: - print(f"{'Stages 1..{}'.format(S - 1):>14}: {p['num_fetch_sms']} fetchers + " - f"{p['gemm_tiles_per_stage']} GEMM = " - f"{p['num_fetch_sms'] + p['gemm_tiles_per_stage']} WGs (×{S - 1})") - print(f"{'Total grid':>14}: {p['grid_size']} WGs " - f"({p['total_fetch_wgs']} fetch + {p['total_gemm_wgs']} GEMM)") + print( + f"{'Stages 1..{}'.format(S - 1):>14}: {p['num_fetch_sms']} fetchers + " + f"{p['gemm_tiles_per_stage']} GEMM = " + f"{p['num_fetch_sms'] + p['gemm_tiles_per_stage']} WGs (×{S - 1})" + ) + print(f"{'Total grid':>14}: {p['grid_size']} WGs ({p['total_fetch_wgs']} fetch + {p['total_gemm_wgs']} GEMM)") # ── Time estimates ──────────────────────────────────────────────── print(f"\n── Time Estimates {'─' * 53}") - print(f"{'CU-work lower':>14}: {p['est_ideal_ms']:.1f} ms " - f"(total WG time / {DEFAULT_NUM_CUS} CUs)") - print(f"{'Fused kernel':>14}: {p['est_kernel_ms']:.1f} ms " - f"(×{p['scheduling_factor']:.1f} scheduling overhead)") + print(f"{'CU-work lower':>14}: {p['est_ideal_ms']:.1f} ms (total WG time / {DEFAULT_NUM_CUS} CUs)") + print(f"{'Fused kernel':>14}: {p['est_kernel_ms']:.1f} ms (×{p['scheduling_factor']:.1f} scheduling overhead)") est_tflops = p["total_flops"] / (p["est_kernel_ms"] * 1e-3) / 1e12 - print(f"{'Est. TFLOPS':>14}: {est_tflops:.0f} TFLOPS " - f"({est_tflops / p['roofline_tflops'] * 100:.0f}% of roofline)") + print( + f"{'Est. TFLOPS':>14}: {est_tflops:.0f} TFLOPS ({est_tflops / p['roofline_tflops'] * 100:.0f}% of roofline)" + ) print(f"{'':>14}") - print(f"{'PyTorch est.':>14}: {p['pytorch_est_ms']:.1f} ms " - f"(all_gather {p['comm_time_ms']:.1f} + matmul {p['standalone_gemm_ms']:.1f})") + print( + f"{'PyTorch est.':>14}: {p['pytorch_est_ms']:.1f} ms " + f"(all_gather {p['comm_time_ms']:.1f} + matmul {p['standalone_gemm_ms']:.1f})" + ) if p["est_kernel_ms"] < p["pytorch_est_ms"]: speedup = p["pytorch_est_ms"] / p["est_kernel_ms"] print(f"{'Fused speedup':>14}: {speedup:.2f}x over sequential PyTorch") else: slowdown = p["est_kernel_ms"] / p["pytorch_est_ms"] - print(f"{'Fused speedup':>14}: {1/slowdown:.2f}x (slower than sequential by {slowdown:.2f}x)") + print(f"{'Fused speedup':>14}: {1 / slowdown:.2f}x (slower than sequential by {slowdown:.2f}x)") # ── Recommended parameters ──────────────────────────────────────── print(f"\n── Recommended Kernel Parameters {'─' * 38}") @@ -639,20 +667,22 @@ def main(): parser.add_argument("-m", type=int, required=True, help="M dimension (rows of output)") parser.add_argument("-n", type=int, required=True, help="N dimension (cols of output)") parser.add_argument("-k", type=int, required=True, help="K dimension (total reduction dim)") - parser.add_argument("--world_size", type=int, default=DEFAULT_WORLD_SIZE, - help="Number of GPUs") - parser.add_argument("--link_bw", type=float, default=None, - help="Per-link XGMI bandwidth in GB/s (one direction). " - "Omit to auto-profile via GPU-to-GPU copies.") - parser.add_argument("--num_cus", type=int, default=DEFAULT_NUM_CUS, - help="Number of compute units") - parser.add_argument("--peak_tflops", type=float, default=DEFAULT_PEAK_TFLOPS_FP16, - help="Peak fp16 TFLOPS") - parser.add_argument("--hbm_bw", type=float, default=DEFAULT_HBM_BW_GBPS, - help="HBM bandwidth in GB/s") - parser.add_argument("--scheduling_factor", type=float, - default=DEFAULT_SCHEDULING_FACTOR, - help="CU scheduling overhead factor (calibrated from traces)") + parser.add_argument("--world_size", type=int, default=DEFAULT_WORLD_SIZE, help="Number of GPUs") + parser.add_argument( + "--link_bw", + type=float, + default=None, + help="Per-link XGMI bandwidth in GB/s (one direction). Omit to auto-profile via GPU-to-GPU copies.", + ) + parser.add_argument("--num_cus", type=int, default=DEFAULT_NUM_CUS, help="Number of compute units") + parser.add_argument("--peak_tflops", type=float, default=DEFAULT_PEAK_TFLOPS_FP16, help="Peak fp16 TFLOPS") + parser.add_argument("--hbm_bw", type=float, default=DEFAULT_HBM_BW_GBPS, help="HBM bandwidth in GB/s") + parser.add_argument( + "--scheduling_factor", + type=float, + default=DEFAULT_SCHEDULING_FACTOR, + help="CU scheduling overhead factor (calibrated from traces)", + ) args, passthrough = parser.parse_known_args() @@ -670,13 +700,21 @@ def main(): print(" Falling back to --link_bw 50 (MI300X default)\n") link_bw = 50.0 - p = derive(args.m, args.n, args.k, args.world_size, link_bw, - args.num_cus, args.peak_tflops, args.hbm_bw, - DEFAULT_L2_SIZE_BYTES, args.scheduling_factor, - dtype_bytes=2) + p = derive( + args.m, + args.n, + args.k, + args.world_size, + link_bw, + args.num_cus, + args.peak_tflops, + args.hbm_bw, + DEFAULT_L2_SIZE_BYTES, + args.scheduling_factor, + dtype_bytes=2, + ) - print_analysis(args.m, args.n, args.k, args.world_size, link_bw, - p, passthrough, bw_profiled=bw_profiled) + print_analysis(args.m, args.n, args.k, args.world_size, link_bw, p, passthrough, bw_profiled=bw_profiled) if __name__ == "__main__": From 77eff5b17ea8a34c6c5f9e9fe66a0af10cd7f678 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Tue, 3 Mar 2026 16:32:15 -0500 Subject: [PATCH 22/60] Reverse 2D block translate --- iris/iris.py | 47 +++++------------------------------------------ iris/x/core.py | 5 +---- iris/x/gather.py | 30 +++++++++++------------------- 3 files changed, 17 insertions(+), 65 deletions(-) diff --git a/iris/iris.py b/iris/iris.py index d061f09ea..e68adc3f0 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1807,30 +1807,6 @@ def __translate(ptr, from_rank, to_rank, heap_bases): return translated_ptr -@triton.jit -def __translate_block_2d(ptr, from_rank, to_rank, heap_bases): - """ - Pointer translation for block load/store operations. - - Note: Vectorization hints should be applied in the tile_ptr computation (core.py) - where the 2D block shape is actually created, not here in the translation. - """ - from_base = tl.load(heap_bases + from_rank) - to_base = tl.load(heap_bases + to_rank) - # convert to int to compute difference - ptr_int = tl.cast(ptr, tl.uint64) - # Find the offset from from_rank heap - offset = ptr_int - from_base - # Byte cast for byte offset addition - to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) - # Find the offset into the to_rank heap - translated_ptr_byte = to_base_byte + offset - # Cast to_base back to pointer type - translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) - - return translated_ptr - - @aggregate class DeviceContext: """ @@ -2005,16 +1981,9 @@ def initialize(context_tensor, rank, world_size, tracing: tl.constexpr = False): @triton.jit def _translate(self, ptr, from_rank, to_rank): - """Internal pointer translation between rank address spaces. - Used for atomic operations which may receive scalar pointers.""" + """Internal pointer translation between rank address spaces.""" return __translate(ptr, from_rank, to_rank, self.heap_bases) - @triton.jit - def _translate_block_2d(self, ptr, from_rank, to_rank): - """Internal pointer translation with 2D vectorization hints. - Used for block load/store operations with 2D block pointers.""" - return __translate_block_2d(ptr, from_rank, to_rank, self.heap_bases) - @triton.jit def load(self, pointer, from_rank, mask=None): """ @@ -2036,7 +2005,7 @@ def load(self, pointer, from_rank, mask=None): Example: >>> data = ctx.load(buffer + offsets, from_rank=1, mask=mask) """ - translated_ptr = self.__translate(pointer, self.rank, from_rank) + translated_ptr = self._translate(pointer, self.rank, from_rank) result = tl.load(translated_ptr, mask=mask) return result @@ -2062,7 +2031,7 @@ def store(self, pointer, value, to_rank, mask=None): Example: >>> ctx.store(buffer + offsets, values, to_rank=1, mask=mask) """ - translated_ptr = self.__translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank) tl.store(translated_ptr, value, mask=mask) @triton.jit @@ -2392,9 +2361,6 @@ def load(pointer, to_rank, from_rank, heap_bases, mask=None): data from the target memory location. If the `from_rank` and `to_rank` are the same, this function performs a local load operation. - This function uses 2D vectorization hints for optimal performance with block pointers. - Minimum block size in each dimension should be >= 16. - Args: pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. to_rank (int): The rank ID to which the pointer will be translated. Must be the current rank where the pointer is local. @@ -2414,7 +2380,7 @@ def load(pointer, to_rank, from_rank, heap_bases, mask=None): >>> data = iris.load(ptr, cur_rank, remote_rank, heap_bases) >>> return data """ - translated_ptr = __translate_block_2d(pointer, to_rank, from_rank, heap_bases) + translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases) result = tl.load(translated_ptr, mask=mask) return result @@ -2429,9 +2395,6 @@ def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): the provided data to the target memory location. If the `from_rank` and `to_rank` are the same, this function performs a local store operation. - This function uses 2D vectorization hints for optimal performance with block pointers. - Minimum block size in each dimension should be >= 16. - Args: pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. value (Block): The tensor of elements to be stored. @@ -2452,7 +2415,7 @@ def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): >>> value = 42 >>> iris.store(ptr, value, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate_block_2d(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) tl.store(translated_ptr, value, mask=mask) diff --git a/iris/x/core.py b/iris/x/core.py index 58786e79e..fee50918e 100644 --- a/iris/x/core.py +++ b/iris/x/core.py @@ -80,10 +80,7 @@ def tile_ptr(ptr, M, N, stride_m, stride_n, pid_m, pid_n, BLOCK_SIZE_M: tl.const rm, rn, mask = tile_layout(pid_m, pid_n, M, N, BLOCK_SIZE_M, BLOCK_SIZE_N) offset = rm[:, None] * stride_m + rn[None, :] * stride_n tile_ptr = ptr + offset - # NOTE: Vectorization hints are applied at the call site (e.g., gather.py) - # rather than here, because the caller knows the block dimensions. - # Alignment IS preserved through pointer translation since symmetric heaps - # are all page-aligned, so relative offsets within the heap are maintained. + tile_ptr = tl.multiple_of(tile_ptr, (BLOCK_SIZE_M, BLOCK_SIZE_N)) return tile_ptr, mask diff --git a/iris/x/gather.py b/iris/x/gather.py index bb3fb637a..ca8bd4f9c 100644 --- a/iris/x/gather.py +++ b/iris/x/gather.py @@ -13,6 +13,7 @@ import triton import triton.language as tl +import iris from iris.iris import DeviceContext from .core import Tile, TensorView @@ -50,25 +51,16 @@ def gather( src_tile_ptr, mask = src_view.tile_ptr(tile) if source_rank == ctx.rank: - # Local load - can use vectorization hints since alignment is guaranteed - local_ptr = tl.multiple_of(src_tile_ptr, (1, tile.block_n)) - local_ptr = tl.max_contiguous(local_ptr, (1, tile.block_n)) - tile_data = tl.load(local_ptr, mask=mask) + # Local load + tile_data = tl.load(src_tile_ptr, mask=mask, other=0.0) else: - # Remote load using RMA - inline translation and apply hints AFTER translation - # Hints must be applied to the translated pointer because pointer arithmetic - # (cast to uint64, subtract, add, cast back) destroys hint metadata. - # Alignment IS preserved because symmetric heaps are all page-aligned. - from_base = tl.load(ctx.heap_bases + ctx.rank) - to_base = tl.load(ctx.heap_bases + source_rank) - ptr_int = tl.cast(src_tile_ptr, tl.uint64) - offset = ptr_int - from_base - to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) - translated_ptr_byte = to_base_byte + offset - translated_ptr = tl.cast(translated_ptr_byte, src_tile_ptr.dtype) - # Apply vectorization hints AFTER translation - translated_ptr = tl.multiple_of(translated_ptr, (1, tile.block_n)) - translated_ptr = tl.max_contiguous(translated_ptr, (1, tile.block_n)) - tile_data = tl.load(translated_ptr, mask=mask) + # Remote load using RMA + tile_data = iris.load( + src_tile_ptr, + ctx.rank, # to_rank (current rank) + source_rank, # from_rank (source rank) + ctx.heap_bases, + mask=mask, + ) return tile_data From dcafd2a669d77a3ac225a96c00e66d394110d68d Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Tue, 3 Mar 2026 16:51:16 -0500 Subject: [PATCH 23/60] Properly use iris tracing APIs --- iris/ops/all_gather_matmul_hbm_buffer.py | 127 +++++++++++++++-------- iris/tracing/events.py | 13 +++ 2 files changed, 97 insertions(+), 43 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 36010e24f..b23123fcb 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -16,7 +16,8 @@ import iris import iris.x -from iris.device_utils import read_realtime, get_xcc_id +from iris.device_utils import read_realtime +from iris.tracing.events import TraceEvent from .config import FusedConfig from .workspace import FusedWorkspace @@ -62,19 +63,13 @@ def _hbm_buffer_all_gather_matmul_kernel( NUM_FETCH_STAGES: tl.constexpr, GEMM_TILES_PER_STAGE: tl.constexpr, FIRST_STAGE_FETCH_SMS: tl.constexpr, - trace_start_ptr, - trace_end_ptr, - trace_wait_ptr, - trace_xcd_ptr, TRACE: tl.constexpr, ): pid = tl.program_id(0) acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 zero = tl.program_id(0) * 0 - if TRACE: - tl.store(trace_start_ptr + pid, read_realtime()) - tl.store(trace_xcd_ptr + pid, get_xcc_id()) + ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size, tracing=TRACE) # Interleaved layout with asymmetric first stage: # [fetch0 (P)] [gemm0 (G)] [fetch1 (F)] [gemm1 (G)] ... @@ -101,7 +96,15 @@ def _hbm_buffer_all_gather_matmul_kernel( # ============================================================== stage_pid = local_pid - ctx = iris.DeviceContext.initialize(context_tensor, cur_rank, world_size) + if TRACE: + _trace_handle = ctx.tracing.record_event_start( + event_id=TraceEvent().wg_fetch, + target_rank=cur_rank, + address=flags_ptr + tl.arange(0, 1), + pid_m=pid, + pid_n=my_stage, + ) + src_view = iris.x.make_tensor_view(A_sharded, M, K_local, stride_am, stride_ak) tiles_per_m_group = NUM_FLAG_GROUPS_K * GROUP_SIZE_M @@ -146,8 +149,7 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") if TRACE: - tl.store(trace_wait_ptr + pid, zero.to(tl.int64), cache_modifier=".wt") - tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") + ctx.tracing.record_event_end(_trace_handle) else: # ============================================================== @@ -173,6 +175,13 @@ def _hbm_buffer_all_gather_matmul_kernel( acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) if TRACE: + _trace_handle = ctx.tracing.record_event_start( + event_id=TraceEvent().wg_gemm, + target_rank=cur_rank, + address=flags_ptr + tl.arange(0, 1), + pid_m=pid, + pid_n=my_stage, + ) _wt = zero.to(tl.int64) for k_fg in range(NUM_FLAG_GROUPS_K): @@ -213,8 +222,14 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(C_ptrs, c, mask=c_mask, cache_modifier=".wt") if TRACE: - tl.store(trace_wait_ptr + pid, _wt) - tl.store(trace_end_ptr + pid, read_realtime(), cache_modifier=".wt") + ctx.tracing.record_event_end(_trace_handle) + ctx.tracing.record_event_start( + event_id=TraceEvent().wg_gemm_wait, + target_rank=cur_rank, + address=flags_ptr + tl.arange(0, 1), + pid_m=pid, + pid_n=_wt.to(tl.int32), + ) # ========================================================================== @@ -285,6 +300,45 @@ def all_gather_matmul_hbm_buffer_preamble( return ws +_WG_FETCH = 14 +_WG_GEMM = 15 +_WG_GEMM_WAIT = 16 + + +def _extract_wg_trace(shmem, grid_size, **metadata): + """Reconstruct per-workgroup trace arrays from DeviceTracing events.""" + import numpy as np + + bufs = shmem.tracing.trace_buffers + n = min(shmem.tracing.trace_counter.item(), shmem.tracing.max_events) + + event_ids = bufs["event_id"][:n].cpu().numpy() + pids = bufs["pid"][:n].cpu().numpy() + timestamps = bufs["timestamp"][:n].cpu().numpy().astype(np.int64) + end_ts = bufs["duration_cycles"][:n].cpu().numpy().astype(np.int64) + xcc_ids = bufs["xcc_id"][:n].cpu().numpy().astype(np.int32) + pid_ns = bufs["pid_n"][:n].cpu().numpy() + + starts = torch.zeros(grid_size, dtype=torch.int64) + ends = torch.zeros(grid_size, dtype=torch.int64) + waits = torch.zeros(grid_size, dtype=torch.int64) + xcds = torch.zeros(grid_size, dtype=torch.int32) + + for i in range(n): + eid = int(event_ids[i]) + wg = int(pids[i]) + if wg >= grid_size: + continue + if eid == _WG_FETCH or eid == _WG_GEMM: + starts[wg] = int(timestamps[i]) + ends[wg] = int(end_ts[i]) + xcds[wg] = int(xcc_ids[i]) + elif eid == _WG_GEMM_WAIT: + waits[wg] = int(pid_ns[i]) + + return {"start": starts, "end": ends, "wait": waits, "xcd": xcds, "grid_size": grid_size, **metadata} + + def all_gather_matmul_hbm_buffer( shmem, output_tensor: torch.Tensor, @@ -385,17 +439,12 @@ def all_gather_matmul_hbm_buffer( total_fetch_wgs = first_stage_fetch_sms + num_fetch_sms * max(0, num_fetch_stages - 1) grid_size = first_stage_size + rest_stage_size * max(0, num_fetch_stages - 1) - # Trace buffers if trace: - trace_start = torch.zeros(grid_size, dtype=torch.int64, device=device) - trace_end = torch.zeros(grid_size, dtype=torch.int64, device=device) - trace_wait = torch.zeros(grid_size, dtype=torch.int64, device=device) - trace_xcd = torch.zeros(grid_size, dtype=torch.int32, device=device) - else: - trace_start = torch.empty(1, dtype=torch.int64, device=device) - trace_end = torch.empty(1, dtype=torch.int64, device=device) - trace_wait = torch.empty(1, dtype=torch.int64, device=device) - trace_xcd = torch.empty(1, dtype=torch.int32, device=device) + max_trace_events = grid_size * 4 + if not shmem.tracing.enabled: + shmem.tracing.enable(max_events=max_trace_events) + else: + shmem.tracing.reset() launch_kwargs = {"matrix_instr_nonkdim": 16} if num_warps is not None: @@ -443,10 +492,6 @@ def all_gather_matmul_hbm_buffer( num_fetch_stages, gemm_tiles_per_stage, first_stage_fetch_sms, - trace_start, - trace_end, - trace_wait, - trace_xcd, trace, **launch_kwargs, ) @@ -456,21 +501,17 @@ def all_gather_matmul_hbm_buffer( if trace: torch.cuda.synchronize() - workspace.trace_data = { - "start": trace_start.cpu(), - "end": trace_end.cpu(), - "wait": trace_wait.cpu(), - "xcd": trace_xcd.cpu(), - "grid_size": grid_size, - "num_fetch_sms": num_fetch_sms, - "num_fetch_stages": num_fetch_stages, - "total_fetch_wgs": total_fetch_wgs, - "num_m_tiles": num_m_tiles, - "num_tiles_n": num_tiles_n, - "first_stage_fetch_sms": first_stage_fetch_sms, - "first_stage_size": first_stage_size, - "rest_stage_size": rest_stage_size, - "gemm_tiles_per_stage": gemm_tiles_per_stage, - } + workspace.trace_data = _extract_wg_trace( + shmem, grid_size, + num_fetch_sms=num_fetch_sms, + num_fetch_stages=num_fetch_stages, + total_fetch_wgs=total_fetch_wgs, + num_m_tiles=num_m_tiles, + num_tiles_n=num_tiles_n, + first_stage_fetch_sms=first_stage_fetch_sms, + first_stage_size=first_stage_size, + rest_stage_size=rest_stage_size, + gemm_tiles_per_stage=gemm_tiles_per_stage, + ) return workspace diff --git a/iris/tracing/events.py b/iris/tracing/events.py index 4838c09d6..62d7cf8df 100644 --- a/iris/tracing/events.py +++ b/iris/tracing/events.py @@ -26,6 +26,9 @@ 11: "atomic_or", 12: "atomic_min", 13: "atomic_max", + 14: "wg_fetch", + 15: "wg_gemm", + 16: "wg_gemm_wait", } @@ -75,6 +78,11 @@ class TraceEvent: atomic_min: tl.constexpr atomic_max: tl.constexpr + # Workgroup-level profiling events + wg_fetch: tl.constexpr + wg_gemm: tl.constexpr + wg_gemm_wait: tl.constexpr + @triton.constexpr_function def __init__(self): # Data movement @@ -94,3 +102,8 @@ def __init__(self): self.atomic_or = tl.constexpr(11) self.atomic_min = tl.constexpr(12) self.atomic_max = tl.constexpr(13) + + # Workgroup-level profiling + self.wg_fetch = tl.constexpr(14) + self.wg_gemm = tl.constexpr(15) + self.wg_gemm_wait = tl.constexpr(16) From 6fdad6dad72f79befe4a83ea3fdc49b9c3cb5c56 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 3 Mar 2026 21:51:56 +0000 Subject: [PATCH 24/60] Apply Ruff auto-fixes --- iris/ops/all_gather_matmul_hbm_buffer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index b23123fcb..abe3b3936 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -502,7 +502,8 @@ def all_gather_matmul_hbm_buffer( if trace: torch.cuda.synchronize() workspace.trace_data = _extract_wg_trace( - shmem, grid_size, + shmem, + grid_size, num_fetch_sms=num_fetch_sms, num_fetch_stages=num_fetch_stages, total_fetch_wgs=total_fetch_wgs, From 08755b777bded836ef206968f3c63700f0be9b8c Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Tue, 3 Mar 2026 16:54:57 -0500 Subject: [PATCH 25/60] Remove test.sh --- benchmark/ops/all_gather_matmul/test.sh | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100755 benchmark/ops/all_gather_matmul/test.sh diff --git a/benchmark/ops/all_gather_matmul/test.sh b/benchmark/ops/all_gather_matmul/test.sh deleted file mode 100755 index 7d5ef1a98..000000000 --- a/benchmark/ops/all_gather_matmul/test.sh +++ /dev/null @@ -1,16 +0,0 @@ -HSA_NO_SCRATCH_RECLAIM=1 \ -python3 $(pwd)/benchmark.py \ - -m 2048 \ - -n 16384 \ - -k 131072 \ - --num_ranks 8 \ - --num_xcds 8 \ - --datatype fp16 \ - --block_size_m 512 \ - --block_size_n 128 \ - --block_size_k 64 \ - --group_size_m 1 \ - --benchmark \ - --b_col_major \ - -v \ - --benchmark_pytorch \ No newline at end of file From f55829349c3fcd0ecef290361e41367440aa23b5 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Thu, 5 Mar 2026 19:38:29 -0500 Subject: [PATCH 26/60] Fix CI: restore vectorization hints, align tritonBLAS versions, remove temp files - Restore optional `hint` parameter in `__translate` and all public iris API functions (load, store, get, put, copy, atomic_*) to match main branch pattern. The previous hardcoded `tl.multiple_of(ptr, (32, 32))` assumed 2D pointers and broke all scalar-pointer atomic operations. - Align tritonBLAS commit across pyproject.toml, run_tests.sh, apptainer/iris.def, and docker/Dockerfile to cd119279f. - Remove tracked backup files (iris.py.backup, all_gather_matmul.py.with_chunked) and add gitignore patterns. - Remove unimplemented "chunked" variant from test_all_gather_matmul parametrization. - Fix test_matmul_all_reduce_via_shmem_ops dimensions (N=128->256) to match new default block_size_n=256. - Remove phantom "matmul" from iris/ops/__init__.py __all__. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/scripts/run_tests.sh | 4 +- .gitignore | 2 + apptainer/iris.def | 2 +- docker/Dockerfile | 2 +- iris/iris.py | 154 +- iris/iris.py.backup | 2255 -------------------- iris/ops/__init__.py | 1 - iris/ops/all_gather_matmul.py.with_chunked | 521 ----- iris/ops/config.py | 3 +- tests/ops/test_all_gather_matmul.py | 1 - tests/ops/test_matmul_all_reduce.py | 2 +- 11 files changed, 93 insertions(+), 2854 deletions(-) delete mode 100644 iris/iris.py.backup delete mode 100644 iris/ops/all_gather_matmul.py.with_chunked diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh index 4abf4a717..8f254b326 100755 --- a/.github/scripts/run_tests.sh +++ b/.github/scripts/run_tests.sh @@ -75,11 +75,11 @@ fi if [ ! -d \"\$TRITONBLAS_DIR\" ]; then git clone https://github.com/ROCm/tritonBLAS.git \"\$TRITONBLAS_DIR\" cd \"\$TRITONBLAS_DIR\" - git checkout 47768c93acb7f89511d797964b84544c30ab81ad + git checkout cd119279f3df543a558aa6d2cd4a3daed0b1ec7a else cd \"\$TRITONBLAS_DIR\" git fetch - git checkout 47768c93acb7f89511d797964b84544c30ab81ad + git checkout cd119279f3df543a558aa6d2cd4a3daed0b1ec7a fi # Install with dependencies pip install -e . diff --git a/.gitignore b/.gitignore index 57d842401..845d61207 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,8 @@ omni*.pdf slurm*.out *.egg-info +*.backup +*.with_chunked examples/gemm/results/* asm/ diff --git a/apptainer/iris.def b/apptainer/iris.def index a5f3c3088..a02c2c32d 100644 --- a/apptainer/iris.def +++ b/apptainer/iris.def @@ -38,7 +38,7 @@ From: rocm/pytorch:rocm7.1_ubuntu24.04_py3.13_pytorch_release_2.9.1 cd /opt git clone https://github.com/ROCm/tritonBLAS.git cd tritonBLAS - git checkout 47768c93acb7f89511d797964b84544c30ab81ad + git checkout cd119279f3df543a558aa6d2cd4a3daed0b1ec7a pip3 install -e . " diff --git a/docker/Dockerfile b/docker/Dockerfile index a0f97d1c5..9c3954f98 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -43,7 +43,7 @@ ENV PYTHONPATH=$TRITON_PATH WORKDIR /opt RUN git clone https://github.com/ROCm/tritonBLAS.git && \ cd tritonBLAS && \ - git checkout 47768c93acb7f89511d797964b84544c30ab81ad && \ + git checkout cd119279f3df543a558aa6d2cd4a3daed0b1ec7a && \ pip3 install -e . # Set up workspace diff --git a/iris/iris.py b/iris/iris.py index e68adc3f0..5791d2e76 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1779,7 +1779,7 @@ def reduce_scatter(self, output_tensor, input_tensor, op=None, group=None, async @triton.jit -def __translate(ptr, from_rank, to_rank, heap_bases): +def __translate(ptr, from_rank, to_rank, heap_bases, hint: tl.constexpr = None): from_base = tl.load(heap_bases + from_rank) to_base = tl.load(heap_bases + to_rank) # convert to int to compute difference @@ -1792,18 +1792,8 @@ def __translate(ptr, from_rank, to_rank, heap_bases): translated_ptr_byte = to_base_byte + offset # Cast to_base back to pointer type translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) - - # Optimization to vectorize the load/store - # We can't do this in general because we don't know the shape of the tensor or block sizes - # ptr = tl.max_contiguous(tl.multiple_of(ptr, (16, 16)), (16, 32)) - - # 0 You can use this if your block sizes are multiples of 32. - # Largest vectorized load instruction is dwordx4 (128-bits) - translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) - translated_ptr = tl.max_contiguous(translated_ptr, (1, 32)) - - # ptr = tl.max_contiguous(tl.multiple_of(ptr, 512), 512) - # translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, 512), 512) + if hint is not None: + translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, hint), hint) return translated_ptr @@ -1980,12 +1970,12 @@ def initialize(context_tensor, rank, world_size, tracing: tl.constexpr = False): return DeviceContext(rank, world_size, heap_bases, device_tracing) @triton.jit - def _translate(self, ptr, from_rank, to_rank): + def _translate(self, ptr, from_rank, to_rank, hint: tl.constexpr = None): """Internal pointer translation between rank address spaces.""" - return __translate(ptr, from_rank, to_rank, self.heap_bases) + return __translate(ptr, from_rank, to_rank, self.heap_bases, hint) @triton.jit - def load(self, pointer, from_rank, mask=None): + def load(self, pointer, from_rank, mask=None, hint: tl.constexpr = None): """ Loads a value from the specified rank's memory location. @@ -1998,6 +1988,7 @@ def load(self, pointer, from_rank, mask=None): pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that will be translated to the `from_rank`'s address space. from_rank (int): The rank ID from which to read the data. mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address pointer[idx]. Defaults to None. + hint (int or tuple, optional): Vectorization hint passed to tl.multiple_of / tl.max_contiguous on the translated pointer. Defaults to None. Returns: Block: The loaded value from the target memory location. @@ -2005,12 +1996,12 @@ def load(self, pointer, from_rank, mask=None): Example: >>> data = ctx.load(buffer + offsets, from_rank=1, mask=mask) """ - translated_ptr = self._translate(pointer, self.rank, from_rank) + translated_ptr = self._translate(pointer, self.rank, from_rank, hint) result = tl.load(translated_ptr, mask=mask) return result @triton.jit - def store(self, pointer, value, to_rank, mask=None): + def store(self, pointer, value, to_rank, mask=None, hint: tl.constexpr = None): """ Writes data to the specified rank's memory location. @@ -2024,6 +2015,7 @@ def store(self, pointer, value, to_rank, mask=None): value (Block): The tensor of elements to be stored. to_rank (int): The rank ID to which the data will be written. mask (Block of triton.int1, optional): If mask[idx] is false, do not store the data at address pointer[idx]. Defaults to None. + hint (int or tuple, optional): Vectorization hint passed to tl.multiple_of / tl.max_contiguous on the translated pointer. Defaults to None. Returns: None @@ -2031,11 +2023,11 @@ def store(self, pointer, value, to_rank, mask=None): Example: >>> ctx.store(buffer + offsets, values, to_rank=1, mask=mask) """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) tl.store(translated_ptr, value, mask=mask) @triton.jit - def get(self, from_ptr, to_ptr, from_rank, mask=None): + def get(self, from_ptr, to_ptr, from_rank, mask=None, hint: tl.constexpr = None): """ Copies data from the specified rank's memory into current rank's local memory. @@ -2049,6 +2041,7 @@ def get(self, from_ptr, to_ptr, from_rank, mask=None): to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer to local memory in current rank where the data will be written. from_rank (int): The rank ID from which to read the data. mask (Block of triton.int1, optional): If mask[idx] is false, do not load from from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. + hint (int or tuple, optional): Vectorization hint passed to tl.multiple_of / tl.max_contiguous on the translated pointer. Defaults to None. Returns: None @@ -2056,12 +2049,12 @@ def get(self, from_ptr, to_ptr, from_rank, mask=None): Example: >>> ctx.get(remote_ptr + offsets, local_ptr + offsets, from_rank=1, mask=mask) """ - translated_from_ptr = self._translate(from_ptr, self.rank, from_rank) + translated_from_ptr = self._translate(from_ptr, self.rank, from_rank, hint) data = tl.load(translated_from_ptr, mask=mask) tl.store(to_ptr, data, mask=mask) @triton.jit - def put(self, from_ptr, to_ptr, to_rank, mask=None): + def put(self, from_ptr, to_ptr, to_rank, mask=None, hint: tl.constexpr = None): """ Copies data from current rank's local memory to the specified rank's memory. @@ -2075,6 +2068,7 @@ def put(self, from_ptr, to_ptr, to_rank, mask=None): to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that references memory in `to_rank`. to_rank (int): The rank ID to which the data will be written. mask (Block of triton.int1, optional): If mask[idx] is false, do not load from from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. + hint (int or tuple, optional): Vectorization hint passed to tl.multiple_of / tl.max_contiguous on the translated pointer. Defaults to None. Returns: None @@ -2082,12 +2076,12 @@ def put(self, from_ptr, to_ptr, to_rank, mask=None): Example: >>> ctx.put(local_ptr + offsets, remote_ptr + offsets, to_rank=1, mask=mask) """ - translated_to_ptr = self._translate(to_ptr, self.rank, to_rank) + translated_to_ptr = self._translate(to_ptr, self.rank, to_rank, hint) data = tl.load(from_ptr, mask=mask) tl.store(translated_to_ptr, data, mask=mask) @triton.jit - def copy(self, src_ptr, dst_ptr, from_rank, to_rank, mask=None): + def copy(self, src_ptr, dst_ptr, from_rank, to_rank, mask=None, hint: tl.constexpr = None): """ Copies data from one rank's memory to another rank's memory. @@ -2127,11 +2121,15 @@ def copy(self, src_ptr, dst_ptr, from_rank, to_rank, mask=None): translated_src = tl.cast(from_base_byte + src_offset, src_ptr.dtype) translated_dst = tl.cast(to_base_byte + dst_offset, src_ptr.dtype) + if hint is not None: + translated_src = tl.max_contiguous(tl.multiple_of(translated_src, hint), hint) + translated_dst = tl.max_contiguous(tl.multiple_of(translated_dst, hint), hint) + data = tl.load(translated_src, mask=mask) tl.store(translated_dst, data, mask=mask) @triton.jit - def atomic_add(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_add(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic add at the specified rank's memory location. @@ -2154,11 +2152,11 @@ def atomic_add(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Example: >>> old_val = ctx.atomic_add(counter, 1, to_rank=1) """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_add(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_sub(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_sub(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Atomically subtracts data from the specified rank's memory location. @@ -2178,11 +2176,11 @@ def atomic_sub(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_sub(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_cas(self, pointer, cmp, val, to_rank, sem=None, scope=None): + def atomic_cas(self, pointer, cmp, val, to_rank, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic compare-and-swap at the specified rank's memory location. @@ -2203,11 +2201,11 @@ def atomic_cas(self, pointer, cmp, val, to_rank, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_cas(translated_ptr, cmp, val, sem=sem, scope=scope) @triton.jit - def atomic_xchg(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_xchg(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic exchange at the specified rank's memory location. @@ -2227,11 +2225,11 @@ def atomic_xchg(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_xchg(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_xor(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_xor(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic XOR at the specified rank's memory location. @@ -2251,11 +2249,11 @@ def atomic_xor(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_xor(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_and(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_and(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic AND at the specified rank's memory location. @@ -2275,11 +2273,11 @@ def atomic_and(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_and(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_or(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_or(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic OR at the specified rank's memory location. @@ -2299,11 +2297,11 @@ def atomic_or(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_or(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_min(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_min(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic minimum at the specified rank's memory location. @@ -2323,11 +2321,11 @@ def atomic_min(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_min(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit - def atomic_max(self, pointer, val, to_rank, mask=None, sem=None, scope=None): + def atomic_max(self, pointer, val, to_rank, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic maximum at the specified rank's memory location. @@ -2347,12 +2345,12 @@ def atomic_max(self, pointer, val, to_rank, mask=None, sem=None, scope=None): Returns: Block: The data stored at pointer before the atomic operation. """ - translated_ptr = self._translate(pointer, self.rank, to_rank) + translated_ptr = self._translate(pointer, self.rank, to_rank, hint) return tl.atomic_max(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def load(pointer, to_rank, from_rank, heap_bases, mask=None): +def load(pointer, to_rank, from_rank, heap_bases, mask=None, hint: tl.constexpr = None): """ Loads a value from the specified rank's memory location. @@ -2380,13 +2378,13 @@ def load(pointer, to_rank, from_rank, heap_bases, mask=None): >>> data = iris.load(ptr, cur_rank, remote_rank, heap_bases) >>> return data """ - translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases) + translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases, hint) result = tl.load(translated_ptr, mask=mask) return result @triton.jit -def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): +def store(pointer, value, from_rank, to_rank, heap_bases, mask=None, hint: tl.constexpr = None): """ Writes data to the specified rank's memory location. @@ -2415,12 +2413,12 @@ def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): >>> value = 42 >>> iris.store(ptr, value, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) tl.store(translated_ptr, value, mask=mask) @triton.jit -def copy(src_ptr, dst_ptr, from_rank, to_rank, cur_rank, heap_bases, mask=None): +def copy(src_ptr, dst_ptr, from_rank, to_rank, cur_rank, heap_bases, mask=None, hint: tl.constexpr = None): """ Copies data from the specified rank's memory into the destination rank's memory. This function performs the transfer by translating `src_ptr` from the `from_rank`'s address @@ -2466,12 +2464,16 @@ def copy(src_ptr, dst_ptr, from_rank, to_rank, cur_rank, heap_bases, mask=None): translated_src = tl.cast(from_base_byte + src_offset, src_ptr.dtype) translated_dst = tl.cast(to_base_byte + dst_offset, src_ptr.dtype) + if hint is not None: + translated_src = tl.max_contiguous(tl.multiple_of(translated_src, hint), hint) + translated_dst = tl.max_contiguous(tl.multiple_of(translated_dst, hint), hint) + data = tl.load(translated_src, mask=mask) tl.store(translated_dst, data, mask=mask) @triton.jit -def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): +def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None, hint: tl.constexpr = None): """ Copies data from the specified rank's memory to the current rank's local memory. @@ -2498,7 +2500,7 @@ def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): >>> to_rank = 0 >>> iris.get(remote_ptr, local_ptr, from_rank, to_rank, heap_bases) """ - translated_from_ptr = __translate(from_ptr, from_rank, to_rank, heap_bases) + translated_from_ptr = __translate(from_ptr, from_rank, to_rank, heap_bases, hint) data = tl.load(translated_from_ptr, mask=mask) @@ -2506,7 +2508,7 @@ def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): @triton.jit -def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): +def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None, hint: tl.constexpr = None): """ Copies data from the current rank's local memory to the specified rank's memory. This function performs a memory write operation by loading data from the current @@ -2532,7 +2534,7 @@ def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): >>> to_rank = 1 >>> iris.put(local_ptr, remote_ptr, from_rank, to_rank, heap_bases) """ - translated_to_ptr = __translate(to_ptr, from_rank, to_rank, heap_bases) + translated_to_ptr = __translate(to_ptr, from_rank, to_rank, heap_bases, hint) data = tl.load(from_ptr, mask=mask) @@ -2540,7 +2542,9 @@ def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): @triton.jit -def atomic_add(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_add( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic add at the specified rank's memory location. @@ -2571,12 +2575,14 @@ def atomic_add(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> increment = 5 >>> old_val = iris.atomic_add(ptr, increment, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_add(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_sub(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_sub( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Atomically subtracts data from the specified rank's memory location. @@ -2607,12 +2613,12 @@ def atomic_sub(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> decrement = 3 >>> old_val = iris.atomic_sub(ptr, decrement, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_sub(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_cas(pointer, cmp, val, from_rank, to_rank, heap_bases, sem=None, scope=None): +def atomic_cas(pointer, cmp, val, from_rank, to_rank, heap_bases, sem=None, scope=None, hint: tl.constexpr = None): """ Atomically compares and exchanges the specified rank's memory location. @@ -2644,12 +2650,14 @@ def atomic_cas(pointer, cmp, val, from_rank, to_rank, heap_bases, sem=None, scop >>> new_val = 42 >>> old_val = iris.atomic_cas(ptr, expected, new_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_cas(translated_ptr, cmp, val, sem=sem, scope=scope) @triton.jit -def atomic_xchg(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_xchg( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic exchange at the specified rank's memory location. @@ -2680,12 +2688,14 @@ def atomic_xchg(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=Non >>> new_value = 99 >>> old_val = iris.atomic_xchg(ptr, new_value, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_xchg(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_xor(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_xor( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic xor at the specified rank's memory location. @@ -2716,12 +2726,14 @@ def atomic_xor(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> mask_val = 0xFF >>> old_val = iris.atomic_xor(ptr, mask_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_xor(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_and(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_and( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic and at the specified rank's memory location. @@ -2752,12 +2764,12 @@ def atomic_and(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> mask_val = 0x0F >>> old_val = iris.atomic_and(ptr, mask_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_and(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_or(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_or(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None): """ Performs an atomic or at the specified rank's memory location. @@ -2788,12 +2800,14 @@ def atomic_or(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, >>> mask_val = 0xF0 >>> old_val = iris.atomic_or(ptr, mask_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_or(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_min(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_min( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic min at the specified rank's memory location. @@ -2824,12 +2838,14 @@ def atomic_min(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> new_val = 10 >>> old_val = iris.atomic_min(ptr, new_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_min(translated_ptr, val, mask=mask, sem=sem, scope=scope) @triton.jit -def atomic_max(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): +def atomic_max( + pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None, hint: tl.constexpr = None +): """ Performs an atomic max at the specified rank's memory location. @@ -2860,7 +2876,7 @@ def atomic_max(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None >>> new_val = 100 >>> old_val = iris.atomic_max(ptr, new_val, cur_rank, remote_rank, heap_bases) """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) + translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases, hint) return tl.atomic_max(translated_ptr, val, mask=mask, sem=sem, scope=scope) diff --git a/iris/iris.py.backup b/iris/iris.py.backup deleted file mode 100644 index e8932c3c8..000000000 --- a/iris/iris.py.backup +++ /dev/null @@ -1,2255 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. - -""" -Iris: Multi-GPU Communication and Memory Management Framework - -Iris is a high-performance framework that enables seamless multi-GPU programming in Triton, -enabling fine-grained communication and compute overlap natively in Triton -across multiple GPUs with SHMEM-like Remote Memory Access (RMA) capabilities. - -Key Features: -- Symmetric heap management across multiple GPUs -- High-performance atomic operations (add, cas, xchg, xor, and, or, min, max) -- Efficient load/store operations with rank-to-rank communication -- Memory allocation and deallocation utilities -- Built-in logging with rank information -- PyTorch distributed integration for distributed computing - -Example: - >>> import iris - >>> ctx = iris.iris(heap_size=2**30) # 1GB heap - >>> tensor = ctx.zeros(1024, 1024, dtype=torch.float32) -""" - -import triton -import triton.language as tl - -from iris._distributed_helpers import ( - init_distributed, - distributed_barrier, - distributed_broadcast_scalar, - distributed_broadcast_tensor, -) -from iris.hip import ( - set_device, - get_cu_count, - count_devices, -) -from iris.symmetric_heap import SymmetricHeap -import numpy as np -import math -import torch -import logging - -# Import logging functionality from the separate logging module -from .logging import logger - - -class Iris: - """ - Main Iris class for multi-GPU communication and memory management. - - This class provides a unified interface for distributed GPU operations including - memory allocation, atomic operations, and inter-rank communication. - - Args: - heap_size (int): Size of the symmetric heap in bytes. Default: 1GB (2^30) - - Example: - >>> ctx = iris.iris(heap_size=2**31) # 2GB heap - >>> print(f"Rank {ctx.cur_rank} of {ctx.num_ranks}") # Rank 0 of 1 - >>> tensor = ctx.zeros(1000, 1000, dtype=torch.float32) - """ - - def __init__(self, heap_size=1 << 30): - # Initialize distributed environment - comm, cur_rank, num_ranks = init_distributed() - num_gpus = count_devices() - - gpu_id = cur_rank % num_gpus - set_device(gpu_id) - - self.comm = comm - self.num_ranks = num_ranks - self.cur_rank = cur_rank - self.gpu_id = gpu_id - self.heap_size = heap_size - - # Initialize symmetric heap - self.heap = SymmetricHeap(heap_size, gpu_id, cur_rank, num_ranks) - self.device = f"cuda:{gpu_id}" - self.heap_bases = self.heap.get_heap_bases() - - for i in range(num_ranks): - self.debug(f"GPU {i}: Heap base {hex(int(self.heap_bases[i].item()))}") - - distributed_barrier() - - # Initialize CCL interface - self.ccl = self.CCL(self) - - # Lazy initialization for ops interface - self._ops = None - - def _log_with_rank(self, level, message): - """Helper method to log with rank information injected into the record.""" - if logger.isEnabledFor(level): - record = logging.LogRecord( - name=logger.name, level=level, pathname="", lineno=0, msg=message, args=(), exc_info=None - ) - # Inject rank information into the record - record.iris_rank = self.cur_rank - record.iris_num_ranks = self.num_ranks - logger.handle(record) - - def debug(self, message): - """ - Log a debug message with rank information. - - Args: - message (str): Human-readable message to log at debug level. - - Notes: - The log record is enriched with ``iris_rank`` and ``iris_num_ranks`` so - formatters can display the originating rank and world size. - - Example: - >>> ctx = iris.iris() - >>> iris.set_logger_level(iris.DEBUG) - >>> ctx.debug("Allocating buffers") # [Iris] [0/1] Allocating buffers - """ - self._log_with_rank(logging.DEBUG, message) - - def info(self, message): - """ - Log an info message with rank information. - - Args: - message (str): Human-readable message to log at info level. - - Example: - >>> ctx = iris.iris() - >>> ctx.info("Starting iteration 0") # [Iris] [0/1] Starting iteration 0 - """ - self._log_with_rank(logging.INFO, message) - - def warning(self, message): - """ - Log a warning message with rank information. - - Args: - message (str): Human-readable message to log at warning level. - - Example: - >>> ctx = iris.iris() - >>> ctx.warning("Memory usage is high") # [Iris] [0/1] Memory usage is high - """ - self._log_with_rank(logging.WARNING, message) - - def error(self, message): - """ - Log an error message with rank information. - - Args: - message (str): Human-readable message to log at error level. - - Example: - >>> ctx = iris.iris() - >>> ctx.error("Failed to allocate memory") # [Iris] [0/1] Failed to allocate memory - """ - self._log_with_rank(logging.ERROR, message) - - @property - def ops(self): - """ - Access fused GEMM+CCL operations. - - This property provides a namespace for high-level fused operations that combine - matrix multiplication with collective communication. Operations automatically infer - dimensions, strides, and hardware parameters from input tensors. - - Available operations: - - matmul_all_reduce: GEMM + All-Reduce - - all_gather_matmul: All-Gather + GEMM - - matmul_all_gather: GEMM + All-Gather - - matmul_reduce_scatter: GEMM + Reduce-Scatter - - Returns: - OpsNamespace: Namespace with fused operation methods - - Raises: - ImportError: If tritonBLAS is not available - - Example: - >>> ctx = iris.iris() - >>> A = ctx.randn((1024, 512), dtype=torch.float16) - >>> B = ctx.randn((512, 2048), dtype=torch.float16) - >>> output = ctx.zeros((1024, 2048), dtype=torch.float16) - >>> ctx.ops.matmul_all_reduce(output, A, B, ctx) - """ - if self._ops is None: - from iris.ops import OpsNamespace - - self._ops = OpsNamespace(self) - return self._ops - - def broadcast(self, value, source_rank=0): - """ - Broadcast a value from one rank to all ranks. - - This method automatically detects the type of value and uses the appropriate - broadcast mechanism: - - For tensors and arrays: uses efficient PyTorch distributed tensor collectives - - For scalars and other objects: uses object broadcast - - Args: - value (Any): The value to broadcast. Can be a scalar, tensor, numpy array, - or any picklable object. Only the ``source_rank`` value is used; - other ranks should pass a placeholder (e.g., ``None``). - source_rank (int): Rank id that holds the authoritative value. - - Returns: - Any: The value broadcast to all ranks. Tensors and arrays are returned as - numpy arrays; scalars and objects are returned in their original type. - - Examples: - >>> ctx = iris.iris() - >>> # Broadcasting a scalar - >>> value = 42 if ctx.cur_rank == 0 else None - >>> value = ctx.broadcast(value, source_rank=0) # All ranks get 42 - >>> - >>> # Broadcasting a tensor - >>> if ctx.cur_rank == 0: - >>> data = torch.randn(10, 10) - >>> else: - >>> data = None - >>> data = ctx.broadcast(data, source_rank=0) # All ranks get the same array - """ - # Check if the value on source_rank is a tensor or array-like - if self.cur_rank == source_rank and value is not None: - # Explicitly exclude strings and non-numeric types - if isinstance(value, (str, dict, bool)): - is_tensor = False - elif isinstance(value, torch.Tensor): - is_tensor = True - elif isinstance(value, np.ndarray): - is_tensor = True - elif isinstance(value, (list, tuple)): - # Try to convert list/tuple to tensor to check if it's numeric - try: - torch.as_tensor(value) - is_tensor = True - except (TypeError, ValueError): - is_tensor = False - else: - # For other types, try to convert and check - try: - test_array = np.asarray(value) - # Check if it's a numeric dtype that torch can handle - if np.issubdtype(test_array.dtype, np.number): - torch.as_tensor(test_array) - is_tensor = True - else: - is_tensor = False - except (TypeError, ValueError): - is_tensor = False - else: - is_tensor = False - - # Broadcast the type decision to all ranks - is_tensor = distributed_broadcast_scalar(is_tensor, source_rank) - - if is_tensor: - return distributed_broadcast_tensor(value, root=source_rank) - else: - return distributed_broadcast_scalar(value, source_rank) - - def __allocate(self, num_elements, dtype): - """Allocate memory using the symmetric heap.""" - self.debug(f"allocate: num_elements = {num_elements}, dtype = {dtype}") - return self.heap.allocate(num_elements, dtype) - - def __parse_size(self, size): - # Handle nested tuples/lists by flattening them recursively - while len(size) == 1 and isinstance(size[0], (tuple, list)): - size = size[0] - num_elements = math.prod(size) - return size, num_elements - - def zeros_like( - self, input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format - ): - """ - Returns a tensor filled with the scalar value 0, with the same size as input, allocated on the Iris symmetric heap. - - Args: - input (Tensor): the size of input will determine size of the output tensor. - - Keyword Arguments: - dtype (torch.dtype, optional): the desired data type of returned Tensor. - Default: if None, defaults to the dtype of input. - layout (torch.layout, optional): the desired layout of returned tensor. - Default: if None, defaults to the layout of input. Note: Iris tensors are always contiguous (strided). - device (torch.device, optional): the desired device of returned tensor. - Default: if None, defaults to the device of input. Must be compatible with this Iris instance. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - memory_format (torch.memory_format, optional): the desired memory format of returned Tensor. - Default: torch.preserve_format. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> input_tensor = ctx.ones(2, 3) - >>> zeros_tensor = ctx.zeros_like(input_tensor) - >>> print(zeros_tensor.shape) # torch.Size([2, 3]) - """ - self.debug( - f"zeros_like: input_shape = {input.shape}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" - ) - - # Use input's properties as defaults if not specified - if dtype is None: - dtype = input.dtype - if layout is None: - layout = input.layout - if device is None: - device = input.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Get the size from input tensor - size = input.size() - num_elements = input.numel() - - # Allocate new tensor with the same size - new_tensor = self.__allocate(num_elements, dtype) - new_tensor.zero_() - - # Reshape to match input size - new_tensor = new_tensor.reshape(size) - - # Apply the requested memory format - new_tensor = self.__apply_memory_format(new_tensor, size, memory_format, input) - - # Apply the requested layout - new_tensor = self.__apply_layout(new_tensor, layout) - - # Set requires_grad if specified - if requires_grad: - new_tensor.requires_grad_() - - return new_tensor - - def arange( - self, start=0, end=None, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False - ): - """ - Returns a 1-D tensor of size ⌈(end - start) / step⌉ with values from the interval [start, end) - taken with common difference step beginning from start. The tensor is allocated on the symmetric heap. - - Note: When using floating-point dtypes (especially reduced precision types like bfloat16), - the results may be affected by floating-point rounding behavior. Some values in the sequence - might not be exactly representable in certain floating-point formats, which can lead to - repeated values or unexpected rounding. For precise sequences, it is recommended to use - integer dtypes instead of floating-point dtypes. - - Note that non-integer step is subject to floating point rounding errors when comparing - against end; to avoid inconsistency, we advise subtracting a small epsilon from end in such cases. - - Args: - start (Number, optional): the starting value for the set of points. Default: 0. - end (Number): the ending value for the set of points - step (Number, optional): the gap between each pair of adjacent points. Default: 1. - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.get_default_dtype()). - If dtype is not given, infer the data type from the other input arguments. - If any of start, end, or step are floating-point, the dtype is inferred - be the default dtype, see get_default_dtype(). Otherwise, the dtype is inferred - to be torch.int64. - layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. - Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.arange(0, 10, 2) # [0, 2, 4, 6, 8] - >>> print(tensor.shape) # torch.Size([5]) - """ - self.debug(f"arange: start = {start}, end = {end}, step = {step}, dtype = {dtype}, device = {device}") - - # Handle the case where only one argument is provided (end) - if end is None: - end = start - start = 0 - - # Validate inputs - if step == 0: - raise ValueError("step must be non-zero") - - # Validate step direction consistency - if step > 0 and start >= end: - raise ValueError(f"Invalid range: start >= end with positive step (start={start}, end={end}, step={step})") - elif step < 0 and start <= end: - raise ValueError(f"Invalid range: start <= end with negative step (start={start}, end={end}, step={step})") - - # Calculate the number of elements - num_elements = math.ceil((end - start) / step) - - # Infer dtype if not provided - if dtype is None: - if any(isinstance(x, float) for x in [start, end, step]): - dtype = torch.get_default_dtype() - else: - dtype = torch.int64 - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - tensor = out - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - - target_device = tensor.device - arange_tensor = torch.arange(start, end, step, dtype=dtype, device=target_device) - - tensor[:] = arange_tensor - - tensor = self.__apply_layout(tensor, layout) - - if requires_grad: - tensor.requires_grad_() - - return tensor - - def zeros(self, *size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): - """ - Returns a tensor filled with the scalar value 0, with the shape defined by the variable argument size. - The tensor is allocated on the Iris symmetric heap. - - Args: - *size (int...): a sequence of integers defining the shape of the output tensor. - Can be a variable number of arguments or a collection like a list or tuple. - - Keyword Arguments: - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.zeros(2, 3) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([0., 0., 0.], device='cuda:0') - """ - self.debug(f"zeros: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") - - # Use global default dtype if None is provided - if dtype is None: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Fill with zeros - out.zero_() - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Fill with zeros - tensor.zero_() - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def randn( - self, - *size, - generator=None, - out=None, - dtype=None, - layout=torch.strided, - device=None, - requires_grad=False, - pin_memory=False, - ): - """ - Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 - (also called the standard normal distribution). The tensor is allocated on the Iris symmetric heap. - - .. math:: - \\text{out}_i \\sim \\mathcal{N}(0, 1) - - For complex dtypes, the tensor is i.i.d. sampled from a complex normal distribution with zero mean - and unit variance as - - .. math:: - \\text{out}_i \\sim \\mathcal{CN}(0, 1) - - This is equivalent to separately sampling the real :math:`(\\text{Re})` and imaginary :math:`(\\text{Im})` - part of :math:`\\text{out}_i` as - - .. math:: - \\text{Re}(\\text{out}_i) \\sim \\mathcal{N}(0, \\frac{1}{2}), \\quad \\text{Im}(\\text{out}_i) \\sim \\mathcal{N}(0, \\frac{1}{2}) - - The shape of the tensor is defined by the variable argument size. - - Args: - *size (int...): a sequence of integers defining the shape of the output tensor. - Can be a variable number of arguments or a collection like a list or tuple. - - Keyword Arguments: - generator (torch.Generator, optional): a pseudorandom number generator for sampling - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type (see torch.set_default_device()). - device will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. - Works only for CPU tensors. Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.randn(2, 3) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([ 0.3982, -0.0059, -0.4365], device='cuda:0') - """ - self.debug( - f"randn: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" - ) - - # Use global default dtype if None is provided - if dtype is None: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Generate random data and copy to out tensor - random_data = torch.randn(num_elements, generator=generator, dtype=dtype, device=device, layout=layout) - out.copy_(random_data) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Generate random data and copy to tensor - random_data = torch.randn(num_elements, generator=generator, dtype=dtype, device=device, layout=layout) - tensor.copy_(random_data) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def ones(self, *size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): - """ - Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size. - The tensor is allocated on the Iris symmetric heap. - - Args: - *size (int...): a sequence of integers defining the shape of the output tensor. - Can be a variable number of arguments or a collection like a list or tuple. - - Keyword Arguments: - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.ones(2, 3) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([1., 1., 1.], device='cuda:0') - """ - self.debug(f"ones: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") - - # Use global default dtype if None is provided - if dtype is None: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Fill with ones - out.fill_(1) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Fill with ones - tensor.fill_(1) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def full(self, size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): - """ - Creates a tensor of size size filled with fill_value. The tensor's dtype is inferred from fill_value. - The tensor is allocated on the Iris symmetric heap. - - Args: - size (int...): a list, tuple, or torch.Size of integers defining the shape of the output tensor. - fill_value (Scalar): the value to fill the output tensor with. - - Keyword Arguments: - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.full((2, 3), 3.14) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([3.1400, 3.1400, 3.1400], device='cuda:0') - """ - self.debug( - f"full: size = {size}, fill_value = {fill_value}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" - ) - - # Infer dtype from fill_value if not provided - if dtype is None: - if isinstance(fill_value, (int, float)): - if isinstance(fill_value, float): - dtype = torch.get_default_dtype() - else: - dtype = torch.int64 - else: - # For other types (like tensors), use their dtype - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Fill with the specified value - out.fill_(fill_value) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Fill with the specified value - tensor.fill_(fill_value) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def uniform(self, size, low=0.0, high=1.0, dtype=torch.float): - """ - Returns a tensor filled with random numbers from a uniform distribution, allocated on the Iris symmetric heap. - - Args: - size (int or tuple of ints): the size of the output tensor. - low (float, optional): the lower bound of the uniform distribution. Default: 0.0. - high (float, optional): the upper bound of the uniform distribution. Default: 1.0. - dtype (torch.dtype, optional): the desired data type of returned tensor. Default: torch.float. - - Returns: - Tensor: A tensor filled with random numbers from a uniform distribution. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.uniform((2, 3), low=0.0, high=1.0) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([0.1234, 0.5678, 0.9012], device='cuda:0') - """ - self.debug(f"uniform: size = {size}, low = {low}, high = {high}, dtype = {dtype}") - size, num_elements = self.__parse_size(size) - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - tensor.uniform_(low, high) - return tensor.reshape(size) - - def empty( - self, - *size, - out=None, - dtype=None, - layout=torch.strided, - device=None, - requires_grad=False, - pin_memory=False, - memory_format=torch.contiguous_format, - ): - """ - Returns a tensor filled with uninitialized data. The shape of the tensor is defined by the variable argument size. - The tensor is allocated on the Iris symmetric heap. - - Note: - If torch.use_deterministic_algorithms() and torch.utils.deterministic.fill_uninitialized_memory are both set to True, - the output tensor is initialized to prevent any possible nondeterministic behavior from using the data as an input to an operation. - Floating point and complex tensors are filled with NaN, and integer tensors are filled with the maximum value. - - Args: - *size (int...): a sequence of integers defining the shape of the output tensor. - Can be a variable number of arguments or a collection like a list or tuple. - - Keyword Arguments: - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. - Works only for CPU tensors. Default: False. Note: Iris tensors are always on GPU. - memory_format (torch.memory_format, optional): the desired memory format of returned Tensor. - Default: torch.contiguous_format. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.empty(2, 3) - >>> print(tensor.shape) # torch.Size([2, 3]) - """ - self.debug( - f"empty: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" - ) - - # Use global default dtype if None is provided - if dtype is None: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Apply the requested memory format - tensor = self.__apply_memory_format(tensor, size, memory_format) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def randint( - self, *args, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False - ): - """ - Returns a tensor filled with random integers generated uniformly between low (inclusive) and high (exclusive). - The shape of the tensor is defined by the variable argument size. - The tensor is allocated on the Iris symmetric heap. - - Note: - With the global dtype default (torch.float32), this function returns a tensor with dtype torch.int64. - - Args: - low (int, optional): Lowest integer to be drawn from the distribution. Default: 0. - high (int): One above the highest integer to be drawn from the distribution. - size (tuple): a tuple defining the shape of the output tensor. - - Keyword Arguments: - generator (torch.Generator, optional): a pseudorandom number generator for sampling. - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): if None, this function returns a tensor with dtype torch.int64. - layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. - device (torch.device, optional): the desired device of returned tensor. Default: if None, uses the current device. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.randint(0, 10, (2, 3)) # Random integers [0, 10) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([7, 2, 9], device='cuda:0') - """ - self.debug(f"randint: args = {args}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}") - - # Parse arguments to determine low, high, and size - # PyTorch randint signatures: - # randint(high, size) - where high is the upper bound and size is the shape - # randint(low, high, size) - where low and high are bounds, size is the shape - if len(args) == 2: - # randint(high, size) - high, size = args - low = 0 - elif len(args) == 3: - # randint(low, high, size) - low, high, size = args - else: - raise ValueError(f"randint expects 2 or 3 positional arguments, got {len(args)}") - - # Use default dtype if None is provided - if dtype is None: - dtype = torch.int64 - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Generate random integers using PyTorch's randint - # Use specified device or fall back to current device - target_device = device if device is not None else self.device - - # Handle generator parameter - if generator is not None: - torch.randint(low, high, size, generator=generator, out=tensor, dtype=dtype, device=target_device) - else: - torch.randint(low, high, size, out=tensor, dtype=dtype, device=target_device) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def linspace(self, start, end, steps, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False): - """ - Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to end, inclusive. - The tensor is allocated on the Iris symmetric heap. - - The values are: - (start, start + (end-start)/(steps-1), ..., start + (steps-2)*(end-start)/(steps-1), end) - - Args: - start (float or Tensor): the starting value for the set of points. If Tensor, it must be 0-dimensional. - end (float or Tensor): the ending value for the set of points. If Tensor, it must be 0-dimensional. - steps (int): size of the constructed tensor. - - Keyword Arguments: - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the data type to perform the computation in. - Default: if None, uses the global default dtype when both start and end are real, - and corresponding complex dtype when either is complex. - layout (torch.layout, optional): the desired layout of returned Tensor. Default: torch.strided. - device (torch.device, optional): the desired device of returned tensor. Default: if None, uses the current device. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.linspace(0, 10, 5) # [0, 2.5, 5, 7.5, 10] - >>> print(tensor) # tensor([ 0.0000, 2.5000, 5.0000, 7.5000, 10.0000], device='cuda:0') - """ - self.debug( - f"linspace: start = {start}, end = {end}, steps = {steps}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}" - ) - - # Use global default dtype if None is provided - if dtype is None: - # Check if start or end are complex numbers - start_is_complex = isinstance(start, complex) or (hasattr(start, "dtype") and torch.is_complex(start)) - end_is_complex = isinstance(end, complex) or (hasattr(end, "dtype") and torch.is_complex(end)) - - if start_is_complex or end_is_complex: - # Infer complex dtype based on default dtype - dtype = torch.complex64 if torch.get_default_dtype() == torch.float32 else torch.complex128 - else: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse steps and extract the integer value - if isinstance(steps, (tuple, list)): - if len(steps) == 1: - # Single-element tuple/list like (5,) or [5] - steps_int = steps[0] - # Handle nested tuples like ((5,),) - if isinstance(steps_int, (tuple, list)): - steps_int = steps_int[0] - else: - # Multi-element tuple/list - use __parse_size for compatibility - size, num_elements = self.__parse_size(steps) - steps_int = num_elements - else: - # steps is a single integer - steps_int = steps - - # Ensure steps_int is an integer - steps_int = int(steps_int) - size = (steps_int,) - num_elements = steps_int - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Generate linspace using PyTorch's linspace - # Use specified device or fall back to current device - target_device = device if device is not None else self.device - torch.linspace(start, end, steps_int, out=tensor, dtype=dtype, device=target_device) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def rand( - self, - *size, - generator=None, - out=None, - dtype=None, - layout=torch.strided, - device=None, - requires_grad=False, - pin_memory=False, - ): - """ - Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1). - The tensor is allocated on the Iris symmetric heap. - - Args: - *size (int...): a sequence of integers defining the shape of the output tensor. - Can be a variable number of arguments or a collection like a list or tuple. - - Keyword Arguments: - generator (torch.Generator, optional): a pseudorandom number generator for sampling. - out (Tensor, optional): the output tensor. - dtype (torch.dtype, optional): the desired data type of returned tensor. - Default: if None, uses a global default (see torch.set_default_dtype()). - layout (torch.layout, optional): the desired layout of returned Tensor. - Default: torch.strided. Note: Iris tensors always use `torch.strided` regardless of this parameter. - device (torch.device, optional): the desired device of returned tensor. - Default: if None, uses the current device for the default tensor type. - requires_grad (bool, optional): If autograd should record operations on the returned tensor. - Default: False. - pin_memory (bool, optional): If set, returned tensor would be allocated in the pinned memory. - Works only for CPU tensors. Default: False. Note: Iris tensors are always on GPU. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> tensor = ctx.rand(2, 3) # Random values in [0, 1) - >>> print(tensor.shape) # torch.Size([2, 3]) - >>> print(tensor[0]) # tensor([0.1234, 0.5678, 0.9012], device='cuda:0') - """ - self.debug( - f"rand: size = {size}, dtype = {dtype}, device = {device}, requires_grad = {requires_grad}, pin_memory = {pin_memory}" - ) - - # Use global default dtype if None is provided - if dtype is None: - dtype = torch.get_default_dtype() - - # Use current device if none specified - if device is None: - device = self.device - - # Validate device compatibility with Iris - self.__throw_if_invalid_device(device) - - # Parse size and calculate number of elements - size, num_elements = self.__parse_size(size) - - # If out is provided, use it; otherwise allocate new tensor - if out is not None: - self.__throw_if_invalid_output_tensor(out, num_elements, dtype) - # Create a reshaped view of the out tensor - tensor = out.view(size) - else: - tensor = self.__allocate(num_elements=num_elements, dtype=dtype) - # Reshape to the desired size - tensor = tensor.reshape(size) - - # Generate random numbers using PyTorch's rand - # Use specified device (already validated and set above) - - # Handle generator parameter - if generator is not None: - torch.rand(size, generator=generator, out=tensor, dtype=dtype, device=device) - else: - torch.rand(size, out=tensor, dtype=dtype, device=device) - - # Apply the requested layout - tensor = self.__apply_layout(tensor, layout) - - # Set requires_grad if specified - if requires_grad: - tensor.requires_grad_() - - return tensor - - def __deallocate(self, pointer): - pass - - def get_heap_bases(self): - """ - Return the tensor of symmetric heap base addresses for all ranks. - - Returns: - torch.Tensor: A 1D tensor of ``uint64`` heap base addresses of size ``num_ranks`` - on the Iris device. Pass this to device-side Triton kernels that require - heap translation. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> heap_bases = ctx.get_heap_bases() - >>> print(heap_bases.shape) # torch.Size([num_ranks]) - """ - return self.heap_bases - - def barrier(self, stream=None, group=None): - """ - Synchronize ranks within the specified group and their CUDA devices. - - This first calls ``torch.cuda.synchronize()`` or ``stream.synchronize()`` to ensure the local GPU has - finished all queued work, then performs a distributed barrier so that all - ranks in the group reach the same point before proceeding. - - Args: - stream: If stream is given: wait only for that stream before barrier. If stream is None: legacy behavior (device-wide sync). - group (ProcessGroup, optional): The process group to synchronize. - If None, uses the default process group (all ranks). - - Example: - >>> ctx = iris.iris(1 << 20) - >>> ctx.barrier() # Synchronize all ranks - >>> ctx.barrier(group=my_group) # Synchronize only ranks in my_group - """ - # Wait for all GPUs to finish work - if stream is None: - torch.cuda.synchronize() - else: - stream.synchronize() - - # Distributed barrier - distributed_barrier(group=group) - - def get_device(self): - """ - Get the underlying device where the Iris symmetric heap resides. - - Returns: - torch.device: The CUDA device of Iris-managed memory. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> device = ctx.get_device() - >>> print(device) # cuda:0 - """ - return self.heap.get_device() - - def get_cu_count(self): - """ - Get the number of compute units (CUs) for the current GPU. - - Returns: - int: Number of compute units on this rank's GPU. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> cu_count = ctx.get_cu_count() - >>> print(f"GPU has {cu_count} CUs") # GPU has 304 CUs - """ - return get_cu_count(self.gpu_id) - - def get_rank(self): - """ - Get this process's rank id in the distributed communicator. - - Returns: - int: Zero-based rank id of the current process. - - Example: - >>> ctx = iris.iris(1 << 20) - >>> rank = ctx.get_rank() - >>> print(f"This is rank {rank}") # This is rank 0 - """ - return self.cur_rank - - def get_num_ranks(self): - """ - Get the total number of ranks in the distributed communicator. - - Returns: - int: World size (number of ranks). - - Example: - >>> ctx = iris.iris(1 << 20) - >>> num_ranks = ctx.get_num_ranks() - >>> print(f"Total ranks: {num_ranks}") # Total ranks: 1 - """ - return self.num_ranks - - def __throw_if_invalid_output_tensor(self, tensor: torch.Tensor, num_elements: int, dtype: torch.dtype): - if not self.__tensor_on_device(tensor): - raise RuntimeError( - f"The output tensor is not on the same device as the Iris instance. The Iris instance is on device {self.device} but the output tensor is on device {tensor.device}" - ) - if not self.__on_symmetric_heap(tensor): - raise RuntimeError( - f"The output tensor is not on the symmetric heap. The Iris instance is on heap base {self.heap_bases[self.cur_rank]} but the output tensor is on heap base {tensor.data_ptr()}" - ) - if tensor.numel() != num_elements: - raise RuntimeError(f"The output tensor has {tensor.numel()} elements, but {num_elements} are required") - if tensor.dtype != dtype: - raise RuntimeError(f"The output tensor has dtype {tensor.dtype}, but {dtype} is required") - - def __throw_if_invalid_device(self, device): - """ - Throw a RuntimeError if the requested device is not compatible with this Iris instance. - - Args: - device: The requested device (can be string, torch.device, or None) - - Raises: - RuntimeError: If the device is not compatible - """ - if not self.__is_valid_device(device): - raise RuntimeError( - f"Device mismatch: requested device {device} but Iris instance is on device {self.device}. " - f"Iris only supports tensors on its own device." - ) - - def __apply_memory_format( - self, tensor: torch.Tensor, size: tuple, memory_format: torch.memory_format, input_tensor: torch.Tensor = None - ): - """ - Apply the requested memory format to a tensor by setting appropriate strides. - This keeps the tensor on the symmetric heap while changing how PyTorch interprets the memory layout. - - Args: - tensor: The tensor to modify - size: The tensor's size/dimensions - memory_format: The desired memory format - input_tensor: The original input tensor (needed for preserve_format detection) - """ - if memory_format == torch.contiguous_format: - # Default format, no changes needed - return tensor - elif memory_format == torch.channels_last and len(size) == 4: - # For channels_last format: preserve shape (N, C, H, W) but change strides - # channels_last strides: [C*H*W, 1, C*W, C] for shape (N, C, H, W) - N, C, H, W = size[0], size[1], size[2], size[3] - # Keep the original shape (N, C, H, W) but use channels_last strides - tensor = self.__create_tensor_with_strides(tensor, size, (C * H * W, 1, C * W, C)) - return tensor - elif memory_format == torch.channels_last_3d and len(size) == 5: - # For channels_last_3d format: preserve shape (N, C, D, H, W) but change strides - # channels_last_3d strides: [C*D*H*W, 1, C*D*W, C*W, C] for shape (N, C, D, H, W) - N, C, D, H, W = size[0], size[1], size[2], size[3], size[4] - # Keep the original shape (N, C, D, H, W) but use channels_last_3d strides - tensor = self.__create_tensor_with_strides(tensor, size, (C * D * H * W, 1, C * D * W, C * W, C)) - return tensor - elif memory_format == torch.preserve_format: - # For preserve_format, we need to detect the input tensor's memory format - # and apply the same format to the output - if input_tensor is not None: - # Check the actual memory format of the input tensor - if len(size) == 4: - # Check if input tensor is in channels_last format by examining strides - # channels_last format has strides[1] == 1 (channels dimension is contiguous) - input_strides = input_tensor.stride() - if len(input_strides) == 4 and input_strides[1] == 1: - # Input is in channels_last format, preserve it - # Use the input tensor's actual shape, not the size parameter - input_shape = input_tensor.shape - if len(input_shape) == 4: - # Input is already in channels_last format (N, H, W, C) - new_size = input_shape - # Use the input tensor's strides directly - tensor = self.__create_tensor_with_strides(tensor, new_size, input_strides) - return tensor - elif len(size) == 5: - # Check if input tensor is in channels_last_3d format - input_strides = input_tensor.stride() - if len(input_strides) == 5 and input_strides[1] == 1: - # Input is in channels_last_3d format, preserve it - # Use the input tensor's actual shape, not the size parameter - input_shape = input_tensor.shape - if len(input_shape) == 5: - # Input is already in channels_last_3d format (N, D, H, W, C) - new_size = input_shape - # Use the input tensor's strides directly - tensor = self.__create_tensor_with_strides(tensor, new_size, input_strides) - return tensor - # If no special format detected or no input tensor provided, use contiguous format - return tensor - else: - # Unsupported format or dimension combination - self.debug( - f"Warning: Memory format {memory_format} not supported for {len(size)}D tensor, using contiguous format" - ) - # For unsupported formats, return the tensor as-is (contiguous) - return tensor - - def __create_tensor_with_strides(self, original_tensor: torch.Tensor, size: tuple, strides: tuple) -> torch.Tensor: - """ - Create a new tensor with the specified strides while keeping the data on the symmetric heap. - - Args: - original_tensor: The original tensor (source of data and heap allocation) - size: The tensor's size/dimensions - strides: The desired strides for the new memory format - - Returns: - A new tensor with the specified strides, data copied from original, on the same heap - """ - - # First, create a temporary tensor with the correct strides using PyTorch - temp_tensor = torch.empty_strided(size, strides, dtype=original_tensor.dtype, device=original_tensor.device) - - # Handle different cases based on whether size changes and what the strides indicate - if size != original_tensor.shape: - # Size is different - this might be a format change that requires permutation - # Check if this is a channels_last format by comparing strides - if len(size) == 4: - # For channels_last: expected strides are [H*W*C, 1, W*C, C] for shape (N, H, W, C) - N, H, W, C = size[0], size[1], size[2], size[3] - expected_strides = (H * W * C, 1, W * C, C) - if strides == expected_strides: - permuted = original_tensor.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) - else: - # If the size differs for other reasons, do not permute; just reshape if possible - try: - permuted = original_tensor.reshape(size) - except Exception: - raise ValueError( - "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." - ) - elif len(size) == 5: - # For channels_last_3d: expected strides are [D*H*W*C, 1, H*W*C, W*C, C] for shape (N, D, H, W, C) - N, D, H, W, C = size[0], size[1], size[2], size[3], size[4] - expected_strides = (D * H * W * C, 1, H * W * C, W * C, C) - if strides == expected_strides: - permuted = original_tensor.permute(0, 2, 3, 4, 1) # (N, C, D, H, W) -> (N, D, H, W, C) - else: - # If the size differs for other reasons, do not permute; just reshape if possible - try: - permuted = original_tensor.reshape(size) - except Exception: - raise ValueError( - "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." - ) - else: - # For other dimensions, just try to reshape - try: - permuted = original_tensor.reshape(size) - except Exception: - raise ValueError( - "Cannot safely permute or reshape tensor: size differs from original shape for unknown reason." - ) - else: - # Size is the same - this is a stride-only change (like channels_last with preserved shape) - # We need to reorder the data to match the new stride pattern - if len(size) == 4: - # Check if this is channels_last format with preserved shape - N, C, H, W = size[0], size[1], size[2], size[3] - expected_strides = (C * H * W, 1, C * W, C) - if strides == expected_strides: - permuted = original_tensor - else: - permuted = original_tensor - elif len(size) == 5: - # Check if this is channels_last_3d format with preserved shape - N, C, D, H, W = size[0], size[1], size[2], size[3], size[4] - expected_strides = (C * D * H * W, 1, C * D * W, C * W, C) - if strides == expected_strides: - permuted = original_tensor - else: - permuted = original_tensor - else: - permuted = original_tensor - - # Copy the permuted data to the temporary tensor - temp_tensor.copy_(permuted) - - # Now allocate a new tensor on our symmetric heap - num_elements = math.prod(size) - heap_tensor = self.__allocate(num_elements, original_tensor.dtype) - - # Reshape to the desired size - heap_tensor = heap_tensor.reshape(size) - - # Copy the data from the temporary tensor to our heap tensor - heap_tensor.copy_(temp_tensor) - - # Clean up the temporary tensor - del temp_tensor - - # Now we need to create a view with the correct strides - # We can't use as_strided directly on our heap tensor, but we can - # create a new tensor with the right strides and copy the data again - final_tensor = torch.as_strided(heap_tensor, size, strides) - - return final_tensor - - def __apply_layout(self, tensor: torch.Tensor, layout: torch.layout) -> torch.Tensor: - """ - Apply the requested layout to a tensor. - - Args: - tensor: The tensor to modify - layout: The desired layout - - Returns: - Tensor with the requested layout - """ - - if layout == torch.strided: - # Strided layout is the default - no changes needed - return tensor - else: - # Only support strided layout for now - raise ValueError(f"Layout {layout} not supported. Only torch.strided is currently supported.") - - def __tensor_on_device(self, tensor: torch.Tensor): - # Get the Iris device from memory_pool.device - iris_device = self.get_device() - tensor_device = tensor.device - - # For CUDA devices, check if they're compatible - if tensor_device.type == "cuda" and iris_device.type == "cuda": - if iris_device.index is None: - return True - return tensor_device.index == iris_device.index - - # For non-CUDA devices, they must be exactly equal - return tensor_device == iris_device - - def __on_symmetric_heap(self, tensor: torch.Tensor): - """Check if a tensor is allocated on the symmetric heap.""" - return self.heap.on_symmetric_heap(tensor) - - def __is_valid_device(self, device) -> bool: - """ - Check if the requested device is compatible with this Iris instance. - - Args: - device: The requested device (can be string, torch.device, or None) - - Returns: - bool: True if the device is compatible, False otherwise - """ - if device is None: - return True # None means use default device - - # Convert device strings to torch.device objects for proper comparison - requested_device = torch.device(device) if isinstance(device, str) else device - iris_device = self.get_device() - - # Check if both are CUDA devices - if requested_device.type == "cuda" and iris_device.type == "cuda": - # Check if index matches or if requested is "cuda" (any index) - if requested_device.index is None: - return True - else: - return requested_device.index == iris_device.index - - # For non-CUDA devices, always return False - return False - - class CCL: - """ - Collective Communication Library (CCL) interface for Iris. - - Provides collective operations that can be called as methods on the Iris instance. - Example usage: - >>> shmem = iris.iris() - >>> shmem.ccl.all_to_all(output_tensor, input_tensor) - """ - - def __init__(self, iris_instance): - """ - Initialize CCL with a reference to the parent Iris instance. - - Args: - iris_instance: The parent Iris instance - """ - self._iris = iris_instance - - def all_to_all(self, output_tensor, input_tensor, group=None, async_op=False, config=None): - """ - All-to-all collective operation. - - Each rank sends a tensor chunk to each other rank and receives - a tensor chunk from each other rank. Input/output tensors should have - shape (M, N * world_size) where each chunk of N columns corresponds to one rank. - - Args: - output_tensor: Output tensor of shape (M, N * world_size) - input_tensor: Input tensor of shape (M, N * world_size) - group: ProcessGroup or None. If None, uses all ranks in shmem context. - Default: None. - async_op: If False, performs a barrier at the end. If True, returns immediately. - Default: False. - config: Config instance with kernel parameters (default: None). - If None, uses default Config values. - - Example: - >>> shmem = iris.iris() - >>> shmem.ccl.all_to_all(output_tensor, input_tensor) - - >>> # Custom configuration - >>> from iris.ccl import Config - >>> config = Config(block_size_m=128, block_size_n=32) - >>> shmem.ccl.all_to_all(output_tensor, input_tensor, config=config) - - >>> # Async operation (no barrier) - >>> shmem.ccl.all_to_all(output_tensor, input_tensor, async_op=True) - """ - from iris.ccl.all_to_all import all_to_all as _all_to_all - - _all_to_all(output_tensor, input_tensor, self._iris, group=group, async_op=async_op, config=config) - - def all_gather(self, output_tensor, input_tensor, group=None, async_op=False, config=None): - """ - All-gather collective operation. - - Each rank sends its input tensor to all ranks, and all ranks receive - and concatenate all input tensors along dimension 0 (rows), matching - torch.distributed.all_gather_into_tensor behavior. - - Args: - output_tensor: Output tensor of shape (world_size * M, N) - will contain concatenated inputs - input_tensor: Input tensor of shape (M, N) - local rank's data to send - group: ProcessGroup or None. If None, uses all ranks in shmem context. - Default: None. - async_op: If False, performs a barrier at the end. If True, returns immediately. - Default: False. - config: Config instance with kernel parameters (default: None). - If None, uses default Config values. - - Example: - >>> shmem = iris.iris() - >>> # Input: (M, N), Output: (world_size * M, N) - >>> shmem.ccl.all_gather(output_tensor, input_tensor) - - >>> # Custom configuration - >>> from iris.ccl import Config - >>> config = Config(block_size_m=128, block_size_n=32) - >>> shmem.ccl.all_gather(output_tensor, input_tensor, config=config) - - >>> # Async operation (no barrier) - >>> shmem.ccl.all_gather(output_tensor, input_tensor, async_op=True) - """ - from iris.ccl.all_gather import all_gather as _all_gather - - _all_gather(output_tensor, input_tensor, self._iris, group=group, async_op=async_op, config=config) - - def all_reduce_preamble(self, output_tensor, input_tensor, config=None, workspace=None): - """ - Prepare reusable workspace for all-reduce. - - Args: - output_tensor: Output tensor that will receive the reduced data. - input_tensor: Input tensor providing the local contribution. - config: Optional Config describing variant parameters. - workspace: Optional existing workspace to update/reuse. - - Returns: - Workspace object that can be passed to ``all_reduce``. - """ - from iris.ccl.all_reduce import all_reduce_preamble as _all_reduce_preamble - - return _all_reduce_preamble( - output_tensor, - input_tensor, - self._iris, - config=config, - workspace=workspace, - ) - - def all_reduce( - self, output_tensor, input_tensor, op=None, group=None, async_op=False, config=None, workspace=None - ): - """ - All-reduce collective operation. - - Each rank has a local input tensor, and all ranks compute the sum of all - input tensors. The result is written to output_tensor on all ranks. - - Args: - output_tensor: Output tensor of shape (M, N) - will contain sum of all inputs - input_tensor: Input tensor of shape (M, N) - local rank's partial data - op: Reduction operation to apply. Currently only ReduceOp.SUM is supported. - Default: ReduceOp.SUM. - group: ProcessGroup or None. If None, uses all ranks in shmem context. - Default: None. - async_op: If False, performs a barrier at the end. If True, returns immediately. - Default: False. - config: Config instance with kernel parameters (default: None). - If None, uses default Config values. - Set config.all_reduce_variant to choose variant: "atomic", "ring", or "two_shot" - workspace: Optional workspace prepared by ``all_reduce_preamble`` to - reuse internal buffers across invocations. - - Example: - >>> shmem = iris.iris() - >>> shmem.ccl.all_reduce(output_tensor, input_tensor) - - >>> # Custom configuration with ring variant - >>> from iris.ccl import Config - >>> config = Config(all_reduce_variant="ring") - >>> shmem.ccl.all_reduce(output_tensor, input_tensor, config=config) - - >>> # Two-shot variant with block distribution - >>> config = Config(all_reduce_variant="two_shot", all_reduce_distribution=1) - >>> shmem.ccl.all_reduce(output_tensor, input_tensor, config=config) - - >>> # Async operation (no barrier) - >>> shmem.ccl.all_reduce(output_tensor, input_tensor, async_op=True) - """ - from iris.ccl.all_reduce import all_reduce as _all_reduce - from iris.ccl import ReduceOp - - # Default to SUM if not specified - if op is None: - op = ReduceOp.SUM - - return _all_reduce( - output_tensor, - input_tensor, - self._iris, - op=op, - group=group, - async_op=async_op, - config=config, - workspace=workspace, - ) - - def reduce_scatter(self, output_tensor, input_tensor, op=None, group=None, async_op=False, config=None): - """ - Reduce-scatter collective operation. - - Each rank reduces its assigned tiles from all ranks' inputs and stores - the result only to its own output tensor. This is similar to all-reduce - but without broadcasting the result to all ranks. - - Args: - output_tensor: Output tensor of shape (M, N) - will contain reduced tiles for this rank - input_tensor: Input tensor of shape (M, N) - local rank's partial data - op: Reduction operation to apply. Currently only ReduceOp.SUM is supported. - Default: ReduceOp.SUM. - group: ProcessGroup or None. If None, uses all ranks in shmem context. - Default: None. - async_op: If False, performs a barrier at the end. If True, returns immediately. - Default: False. - config: Config instance with kernel parameters (default: None). - If None, uses default Config values. - Only supports reduce_scatter_variant="two_shot". - - Example: - >>> shmem = iris.iris() - >>> shmem.ccl.reduce_scatter(output_tensor, input_tensor) - - >>> # Custom configuration - >>> from iris.ccl import Config - >>> config = Config(reduce_scatter_variant="two_shot", all_reduce_distribution=1) - >>> shmem.ccl.reduce_scatter(output_tensor, input_tensor, config=config) - """ - from iris.ccl.reduce_scatter import reduce_scatter as _reduce_scatter - from iris.ccl import ReduceOp - - # Default to SUM if not specified - if op is None: - op = ReduceOp.SUM - - _reduce_scatter( - output_tensor, input_tensor, self._iris, op=op, group=group, async_op=async_op, config=config - ) - - -@triton.jit -def __translate(ptr, from_rank, to_rank, heap_bases): - from_base = tl.load(heap_bases + from_rank) - to_base = tl.load(heap_bases + to_rank) - # convert to int to compute difference - ptr_int = tl.cast(ptr, tl.uint64) - # Find the offset from from_rank heap - offset = ptr_int - from_base - # Byte cast for byte offset addition - to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) - # Find the offset into the to_rank heap - translated_ptr_byte = to_base_byte + offset - # Cast to_base back to pointer type - translated_ptr = tl.cast(translated_ptr_byte, ptr.dtype) - - # Optimization to vectorize the load/store - # We can't do this in general because we don't know the shape of the tensor or block sizes - # ptr = tl.max_contiguous(tl.multiple_of(ptr, (16, 16)), (16, 32)) - - # 0 You can use this if your block sizes are multiples of 32. - # Largest vectorized load instruction is dwordx4 (128-bits) - translated_ptr = tl.multiple_of(translated_ptr, (32, 32)) - translated_ptr = tl.max_contiguous(translated_ptr, (32, 32)) - - # ptr = tl.max_contiguous(tl.multiple_of(ptr, 512), 512) - # translated_ptr = tl.max_contiguous(tl.multiple_of(translated_ptr, 512), 512) - return translated_ptr - - -@triton.jit -def load(pointer, to_rank, from_rank, heap_bases, mask=None): - """ - Loads a value from the specified rank's memory location. - - This function performs a memory read operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and loading - data from the target memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local load operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the pointer will be translated. Must be the current rank where the pointer is local. - from_rank (int): The rank ID from which to read the data. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address pointer[idx]. Defaults to None. - - Returns: - Block: The loaded value from the target memory location. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Load data from rank 1's memory into the current rank - >>> cur_rank = 0 # Current rank - >>> remote_rank = 1 # Remote rank to load from - >>> data = iris.load(ptr, cur_rank, remote_rank, heap_bases) - >>> return data - """ - translated_ptr = __translate(pointer, to_rank, from_rank, heap_bases) - result = tl.load(translated_ptr, mask=mask) - return result - - -@triton.jit -def store(pointer, value, from_rank, to_rank, heap_bases, mask=None): - """ - Writes data to the specified rank's memory location. - - This function performs a memory write operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and storing - the provided data to the target memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local store operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - value (Block): The tensor of elements to be stored. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the data will be written. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not store the data at address pointer[idx]. Defaults to None. - - Returns: - None - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Store value 42 into rank 1's heap from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> value = 42 - >>> iris.store(ptr, value, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - tl.store(translated_ptr, value, mask=mask) - - -@triton.jit -def copy(src_ptr, dst_ptr, from_rank, to_rank, cur_rank, heap_bases, mask=None): - """ - Copies data from the specified rank's memory into the destination rank's memory. - This function performs the transfer by translating `src_ptr` from the `from_rank`'s address - space to the `to_rank`'s address space, performing a masked load from the translated - source, and storing the loaded data to `dst_ptr` in the `to_rank` memory location. - If `from_rank` and `to_rank` are the same, this function performs a local copy operation. - It is undefined behaviour if neither `from_rank` nor `to_rank` is the `cur_rank`. - - Args: - src_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s local memory from which to read data. - dst_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `to_rank`'s local memory where the data will be written. - from_rank (int): The rank ID that owns `src_ptr` (source rank). - to_rank (int): The rank ID that will receive the data (destination rank). - cur_rank (int): The rank ID issuing the copy operation. Must be either `from_rank` or `to_rank`. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not load from the translated src_ptr[idx] and do not store to dst_ptr[idx]. Defaults to None. - - Returns: - None - - Example: - >>> @triton.jit - >>> def kernel(remote_ptr, local_ptr, heap_bases): - >>> from_rank = 1 - >>> to_rank = 0 - >>> iris.copy(remote_ptr, local_ptr, from_rank, to_rank, to_rank, heap_bases) - """ - - cur_base = tl.load(heap_bases + cur_rank) - - from_base = tl.load(heap_bases + from_rank) - to_base = tl.load(heap_bases + to_rank) - - src_ptr_int = tl.cast(src_ptr, tl.uint64) - src_offset = src_ptr_int - cur_base - - dst_ptr_int = tl.cast(dst_ptr, tl.uint64) - dst_offset = dst_ptr_int - cur_base - - from_base_byte = tl.cast(from_base, tl.pointer_type(tl.int8)) - to_base_byte = tl.cast(to_base, tl.pointer_type(tl.int8)) - - translated_src = tl.cast(from_base_byte + src_offset, src_ptr.dtype) - translated_dst = tl.cast(to_base_byte + dst_offset, src_ptr.dtype) - - data = tl.load(translated_src, mask=mask) - tl.store(translated_dst, data, mask=mask) - - -@triton.jit -def get(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): - """ - Copies data from the specified rank's memory to the current rank's local memory. - - This function performs a memory read operation by translating the `from_ptr` - from the current rank's address space to the `from_rank`'s address space, loading data - from the `from_rank` memory location, and storing it to the local `to_ptr`. - If the `from_rank` is the same as the current rank, this function performs a local copy operation. - - Args: - from_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that will be translated to the `from_rank`'s address space. Must be the current rank where the pointer is local. - to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's local memory where the data will be stored. - from_rank (int): The `from_rank` ID from which to read the data. - to_rank (int): The current rank ID where the data will be stored. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. - - Returns: - None - - Example: - >>> @triton.jit - >>> def kernel(remote_ptr, local_ptr, heap_bases): - >>> from_rank = 1 - >>> to_rank = 0 - >>> iris.get(remote_ptr, local_ptr, from_rank, to_rank, heap_bases) - """ - translated_from_ptr = __translate(from_ptr, from_rank, to_rank, heap_bases) - - data = tl.load(translated_from_ptr, mask=mask) - - tl.store(to_ptr, data, mask=mask) - - -@triton.jit -def put(from_ptr, to_ptr, from_rank, to_rank, heap_bases, mask=None): - """ - Copies data from the current rank's local memory to the specified rank's memory. - This function performs a memory write operation by loading data from the current - rank's `from_ptr`, translating the `to_ptr` from the current rank's address - space to the `to_rank`'s address space, and storing the data to the `to_rank` memory location. - If the `to_rank` is the same as the current rank, this function performs a local copy operation. - - Args: - from_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's local memory from which to read data. - to_ptr (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the current rank's address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - from_rank (int): The current rank ID from which to read the data. - to_rank (int): The `to_rank` ID to which the data will be written. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not load the data at address from_ptr[idx] and do not store to to_ptr[idx]. Defaults to None. - - Returns: - None - - Example: - >>> @triton.jit - >>> def kernel(local_ptr, remote_ptr, heap_bases): - >>> from_rank = 0 - >>> to_rank = 1 - >>> iris.put(local_ptr, remote_ptr, from_rank, to_rank, heap_bases) - """ - translated_to_ptr = __translate(to_ptr, from_rank, to_rank, heap_bases) - - data = tl.load(from_ptr, mask=mask) - - tl.store(translated_to_ptr, data, mask=mask) - - -@triton.jit -def atomic_add(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic add at the specified rank's memory location. - - This function performs an atomic addition operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - adding the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic addition operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically add 5 to rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> increment = 5 - >>> old_val = iris.atomic_add(ptr, increment, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_add(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_sub(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Atomically subtracts data from the specified rank's memory location. - - This function performs an atomic subtraction operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - subtracting the provided data from the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic subtraction operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block): The tensor of elements to be subtracted atomically. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". Defaults to "acq_rel". - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). Defaults to "gpu". - - Returns: - Block: The value at the memory location before the atomic subtraction. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically subtract 3 from rank 2's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 2 # Remote rank (destination) - >>> decrement = 3 - >>> old_val = iris.atomic_sub(ptr, decrement, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_sub(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_cas(pointer, cmp, val, from_rank, to_rank, heap_bases, sem=None, scope=None): - """ - Atomically compares and exchanges the specified rank's memory location. - - This function performs an atomic compare-and-swap operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - comparing the current value with the expected value, then writing the new value if they match. - If the `from_rank` and `to_rank` are the same, this function performs a local atomic compare-and-swap operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): Pointer in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - cmp (Block): The expected value to be compared with the current value at the memory location. - val (Block): The new value to be written if the compare succeeds. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". Defaults to "acq_rel". - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). Defaults to "gpu". - - Returns: - Block: The value contained at the memory location before the atomic operation attempt. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Compare-and-swap on rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> expected = 0 - >>> new_val = 42 - >>> old_val = iris.atomic_cas(ptr, expected, new_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_cas(translated_ptr, cmp, val, sem=sem, scope=scope) - - -@triton.jit -def atomic_xchg(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic exchange at the specified rank's memory location. - - This function performs an atomic exchange operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - exchanging the current value with the provided new value. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic exchange operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Exchange value with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> new_value = 99 - >>> old_val = iris.atomic_xchg(ptr, new_value, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_xchg(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_xor(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic xor at the specified rank's memory location. - - This function performs an atomic xor operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - xoring the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic xor operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically XOR with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> mask_val = 0xFF - >>> old_val = iris.atomic_xor(ptr, mask_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_xor(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_and(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic and at the specified rank's memory location. - - This function performs an atomic and operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - anding the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic and operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically AND with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> mask_val = 0x0F - >>> old_val = iris.atomic_and(ptr, mask_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_and(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_or(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic or at the specified rank's memory location. - - This function performs an atomic or operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - oring the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic or operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically OR with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> mask_val = 0xF0 - >>> old_val = iris.atomic_or(ptr, mask_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_or(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_min(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic min at the specified rank's memory location. - - This function performs an atomic min operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - performing the min on the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic min operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically find minimum with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> new_val = 10 - >>> old_val = iris.atomic_min(ptr, new_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_min(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -@triton.jit -def atomic_max(pointer, val, from_rank, to_rank, heap_bases, mask=None, sem=None, scope=None): - """ - Performs an atomic max at the specified rank's memory location. - - This function performs an atomic max operation by translating the pointer - from the `from_rank`'s address space to the `to_rank`'s address space and atomically - performing the max on the provided data to the `to_rank` memory location. If the `from_rank` and `to_rank` are the same, - this function performs a local atomic max operation. - - Args: - pointer (triton.PointerType, or block of dtype=triton.PointerType): The memory locations in the `from_rank`'s address space that will be translated to the `to_rank`'s address space. Must be the current rank where the pointer is local. - val (Block of dtype=pointer.dtype.element_ty): The values with which to perform the atomic operation. - from_rank (int): The rank ID from which the pointer originates. Must be the current rank where the pointer is local. - to_rank (int): The rank ID to which the atomic operation will be performed. - heap_bases (triton.PointerType): Array containing the heap base addresses for all ranks. - mask (Block of triton.int1, optional): If mask[idx] is false, do not perform the atomic operation at address pointer[idx]. Defaults to None. - sem (str, optional): Specifies the memory semantics for the operation. Acceptable values are "acquire", "release", "acq_rel" (stands for "ACQUIRE_RELEASE"), and "relaxed". If not provided, the function defaults to using "acq_rel" semantics. - scope (str, optional): Defines the scope of threads that observe the synchronizing effect of the atomic operation. Acceptable values are "gpu" (default), "cta" (cooperative thread array, thread block), or "sys" (stands for "SYSTEM"). The default value is "gpu". - - Returns: - Block: The data stored at pointer before the atomic operation. - - Example: - >>> @triton.jit - >>> def kernel(ptr, heap_bases): - >>> # Atomically find maximum with rank 1's memory from rank 0 - >>> cur_rank = 0 # Current rank (source) - >>> remote_rank = 1 # Remote rank (destination) - >>> new_val = 100 - >>> old_val = iris.atomic_max(ptr, new_val, cur_rank, remote_rank, heap_bases) - """ - translated_ptr = __translate(pointer, from_rank, to_rank, heap_bases) - return tl.atomic_max(translated_ptr, val, mask=mask, sem=sem, scope=scope) - - -def iris(heap_size=1 << 30): - """ - Create and return an Iris instance with the specified heap size. - - Args: - heap_size (int): Size of the heap in bytes. Defaults to 1GB. - - Returns: - Iris: An initialized Iris instance. - - Example: - >>> import iris - >>> iris_ctx = iris.iris(2**30) # 1GB heap - >>> tensor = iris_ctx.zeros(1024, 1024) - """ - return Iris(heap_size) diff --git a/iris/ops/__init__.py b/iris/ops/__init__.py index a6ed4a659..c96fa32e5 100644 --- a/iris/ops/__init__.py +++ b/iris/ops/__init__.py @@ -173,7 +173,6 @@ def matmul_reduce_scatter(self, output_tensor, A, B, async_op=False, config=None # Namespace "OpsNamespace", # Operations - "matmul", # Simple single-GPU GEMM "matmul_all_reduce", "matmul_all_reduce_preamble", "all_gather_matmul", diff --git a/iris/ops/all_gather_matmul.py.with_chunked b/iris/ops/all_gather_matmul.py.with_chunked deleted file mode 100644 index ddc03d027..000000000 --- a/iris/ops/all_gather_matmul.py.with_chunked +++ /dev/null @@ -1,521 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Fused All-Gather + GEMM operation using pull pattern. - -Each rank has a column-sharded input A_sharded (M x K_local). -This operation computes C = all_gather(A_sharded) @ B by pulling -tiles from remote ranks on-demand during GEMM computation. -""" - -from typing import Optional -import torch -import triton -import triton.language as tl -import iris -import iris.x - -from tritonblas.kernels.stages.algorithms.binary import add_vector -from tritonblas.kernels.stages.algorithms.unary import convert_dtype - -from .config import FusedConfig -from .workspace import FusedWorkspace - - -@triton.jit() -def _fused_all_gather_matmul_kernel( - A_sharded, - B, - C, - bias_ptr, - M: tl.constexpr, - N: tl.constexpr, - K: tl.constexpr, - K_local: tl.constexpr, - stride_am: tl.constexpr, - stride_ak: tl.constexpr, - stride_bk: tl.constexpr, - stride_bn: tl.constexpr, - stride_cm: tl.constexpr, - stride_cn: tl.constexpr, - stride_bias: tl.constexpr, - heap_bases: tl.tensor, - cur_rank: tl.constexpr, - world_size: tl.constexpr, - BLOCK_SIZE_M: tl.constexpr, - BLOCK_SIZE_N: tl.constexpr, - BLOCK_SIZE_K: tl.constexpr, - GROUP_SIZE_M: tl.constexpr, - NUM_SMS: tl.constexpr, - NUM_XCDS: tl.constexpr, - BIAS: tl.constexpr, - EVEN_K: tl.constexpr, - ALLOW_TF32: tl.constexpr, -): - """Fused all-gather + GEMM kernel using pull pattern.""" - pid = tl.program_id(0) - - # Handle multi-XCD devices - if NUM_XCDS != 1: - pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) - - num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) - num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) - total_tiles = num_pid_m * num_pid_n - - tl.assume(stride_am > 0) - tl.assume(stride_ak > 0) - tl.assume(stride_bk > 0) - tl.assume(stride_bn > 0) - tl.assume(stride_cm > 0) - tl.assume(stride_cn > 0) - - acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 - - # Persistent loop over output tiles - for tile_id in range(pid, total_tiles, NUM_SMS): - # Compute tile coordinates with swizzling - num_pid_in_group = GROUP_SIZE_M * num_pid_n - group_id = tile_id // num_pid_in_group - first_pid_m = group_id * GROUP_SIZE_M - group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) - pid_m = first_pid_m + ((tile_id % num_pid_in_group) % group_size_m) - pid_n = (tile_id % num_pid_in_group) // group_size_m - - # Compute row and column indices - rm = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M - rn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N - rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) - rn = tl.max_contiguous(tl.multiple_of(rn, BLOCK_SIZE_N), BLOCK_SIZE_N) - - # Initialize accumulator - acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) - - # Create DeviceContext and TensorView for gather operations - ctx = iris.x.DeviceContext(cur_rank, world_size, heap_bases) - src_view = iris.x.TensorView(A_sharded, M, K_local, stride_am, stride_ak) - - # Loop over all ranks to pull and accumulate - for source_rank_id in range(world_size): - loop_k_local = tl.cdiv(K_local, BLOCK_SIZE_K) - if not EVEN_K: - loop_k_local -= 1 - - # Loop over K dimension for this rank's shard - for k_block_idx in range(0, loop_k_local): - k_offset = k_block_idx * BLOCK_SIZE_K - - # Create tile view for this K block - tile_k = k_offset // BLOCK_SIZE_K - k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) - - # Pull A tile from source_rank_id using gather primitive - a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) - - # Load B tile - rk_local = k_offset + tl.arange(0, BLOCK_SIZE_K) - rk_global = (source_rank_id * K_local) + rk_local - B_ptr = B + rk_global[:, None] * stride_bk + rn[None, :] * stride_bn - b = tl.load(tl.multiple_of(B_ptr, (16, 1))) - - # Accumulate - if ALLOW_TF32: - acc = tl.dot(a, b, acc, allow_tf32=True) - else: - acc += tl.dot(a, b, allow_tf32=False) - - # Handle remaining K elements if not evenly divisible - if not EVEN_K: - k_offset = loop_k_local * BLOCK_SIZE_K - tile_k = k_offset // BLOCK_SIZE_K - k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) - - # Pull A tile from source_rank_id using gather primitive - a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) - - rk_local = k_offset + tl.arange(0, BLOCK_SIZE_K) - rk_global = (source_rank_id * K_local) + rk_local - rk_global_mask = rk_global < K - B_ptr = B + rk_global[:, None] * stride_bk + rn[None, :] * stride_bn - b = tl.load(tl.multiple_of(B_ptr, (16, 1)), mask=rk_global_mask[:, None], other=0.0) - - if ALLOW_TF32: - acc = tl.dot(a, b, acc, allow_tf32=True) - else: - acc += tl.dot(a, b, allow_tf32=False) - - # Add bias if provided using tritonBLAS - if BIAS: - bias_vector = tl.load(bias_ptr + rm * stride_bias, mask=rm < M, other=0.0) - acc = add_vector(acc, bias_vector, QUANTIZED=False) - - # Convert to output dtype using tritonBLAS - c = convert_dtype(acc, C.type.element_ty) - - # Store result (manual for now, tritonBLAS store has issues with our indices) - C_ptr = ( - C - + (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] * stride_cm - + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] * stride_cn - ) - mask = ((pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] < M) & ( - (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] < N - ) - tl.store(C_ptr, c, mask=mask) - - -@triton.jit() -def _fused_chunked_all_gather_matmul_kernel( - A_sharded, - B, - C, - bias_ptr, - temp_buffer, # Temporary buffer: BLOCK_M x K x num_tiles - M: tl.constexpr, - N: tl.constexpr, - K: tl.constexpr, - K_local: tl.constexpr, - stride_am: tl.constexpr, - stride_ak: tl.constexpr, - stride_bk: tl.constexpr, - stride_bn: tl.constexpr, - stride_cm: tl.constexpr, - stride_cn: tl.constexpr, - stride_bias: tl.constexpr, - heap_bases: tl.tensor, - cur_rank: tl.constexpr, - world_size: tl.constexpr, - BLOCK_SIZE_M: tl.constexpr, - BLOCK_SIZE_N: tl.constexpr, - BLOCK_SIZE_K: tl.constexpr, - GROUP_SIZE_M: tl.constexpr, - NUM_SMS: tl.constexpr, - NUM_XCDS: tl.constexpr, - BIAS: tl.constexpr, - EVEN_K: tl.constexpr, - ALLOW_TF32: tl.constexpr, -): - """ - Fused all-gather + GEMM kernel using chunked/buffered pattern. - - This variant pre-gathers all of A into a temporary buffer before computing GEMM. - Eliminates the world_size loop by using iris.x.all_gather upfront. - - Memory layout: - - temp_buffer: BLOCK_M x K x num_tiles (stores gathered A for each tile) - - Each program gathers its M-tile of A, then does GEMM - """ - pid = tl.program_id(0) - - # Handle multi-XCD devices - if NUM_XCDS != 1: - pid = (pid % NUM_XCDS) * (NUM_SMS // NUM_XCDS) + (pid // NUM_XCDS) - - num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) - num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) - total_tiles = num_pid_m * num_pid_n - - tl.assume(stride_am > 0) - tl.assume(stride_ak > 0) - tl.assume(stride_bk > 0) - tl.assume(stride_bn > 0) - tl.assume(stride_cm > 0) - tl.assume(stride_cn > 0) - - acc_dtype = tl.int32 if C.type.element_ty == tl.int8 else tl.float32 - - # Persistent loop over output tiles - for tile_id in range(pid, total_tiles, NUM_SMS): - # Compute tile coordinates with swizzling - num_pid_in_group = GROUP_SIZE_M * num_pid_n - group_id = tile_id // num_pid_in_group - first_pid_m = group_id * GROUP_SIZE_M - group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) - pid_m = first_pid_m + ((tile_id % num_pid_in_group) % group_size_m) - pid_n = (tile_id % num_pid_in_group) // group_size_m - - # Compute row and column indices - rm = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M - rn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N - rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) - rn = tl.max_contiguous(tl.multiple_of(rn, BLOCK_SIZE_N), BLOCK_SIZE_N) - - # Buffer pointer for this tile: BLOCK_M x K for this pid_m - buffer_ptr = temp_buffer + tile_id * BLOCK_SIZE_M * K - - # Step 1: Pre-gather entire M-tile of A (BLOCK_M x K) - # Create DeviceContext and TensorView for gather operations - ctx = iris.x.DeviceContext(cur_rank, world_size, heap_bases) - src_view = iris.x.TensorView(A_sharded, M, K_local, stride_am, stride_ak) - - # Gather K-tiles from all ranks - for source_rank_id in range(world_size): - k_start = source_rank_id * K_local - # Loop over K dimension in blocks - for k_local_idx in range(0, K_local, BLOCK_SIZE_K): - k_global = k_start + k_local_idx - rk = k_global + tl.arange(0, BLOCK_SIZE_K) - rk_mask = rk < K - - tile_k = k_local_idx // BLOCK_SIZE_K - k_tile = iris.x.TileView(pid_m, tile_k, BLOCK_SIZE_M, BLOCK_SIZE_K) - - # Pull A tile from source_rank_id - a = iris.x.gather(k_tile, src_view, source_rank_id, ctx) - - # Store in buffer - buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] - tl.store(buffer_A_ptr, a, mask=rk_mask[None, :]) - - # Step 2: Standard GEMM from buffer - # Initialize accumulator - acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=acc_dtype) - - # Loop over K dimension - loop_k = tl.cdiv(K, BLOCK_SIZE_K) - if EVEN_K: - for k_block_idx in range(loop_k): - k_offset = k_block_idx * BLOCK_SIZE_K - - # Load A from temp buffer - rk = k_offset + tl.arange(0, BLOCK_SIZE_K) - buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] - a = tl.load(buffer_A_ptr) - - # Load B tile - B_ptr = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn - b = tl.load(tl.multiple_of(B_ptr, (16, 1))) - - # Accumulate - if ALLOW_TF32: - acc = tl.dot(a, b, acc, allow_tf32=True) - else: - acc += tl.dot(a, b, allow_tf32=False) - else: - # Handle case where K is not evenly divisible by BLOCK_SIZE_K - for k_block_idx in range(loop_k): - k_offset = k_block_idx * BLOCK_SIZE_K - - # Load A from temp buffer - rk = k_offset + tl.arange(0, BLOCK_SIZE_K) - rk_mask = rk < K - buffer_A_ptr = buffer_ptr + rm[:, None] * K + rk[None, :] - a = tl.load(buffer_A_ptr, mask=rk_mask[None, :], other=0.0) - - # Load B tile - B_ptr = B + rk[:, None] * stride_bk + rn[None, :] * stride_bn - b = tl.load(tl.multiple_of(B_ptr, (16, 1)), mask=rk_mask[:, None], other=0.0) - - if ALLOW_TF32: - acc = tl.dot(a, b, acc, allow_tf32=True) - else: - acc += tl.dot(a, b, allow_tf32=False) - - # Convert accumulator and add bias - c = convert_dtype(acc, C.type.element_ty) - if BIAS: - bias_offset = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) * stride_bias - bias_val = tl.load(bias_ptr + bias_offset) - c = add_vector(c, bias_val, 0) - - # Store result - C_ptr = ( - C - + (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] * stride_cm - + (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] * stride_cn - ) - mask = ((pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M))[:, None] < M) & ( - (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))[None, :] < N - ) - tl.store(C_ptr, c, mask=mask) - - -def all_gather_matmul_preamble( - shmem, - A_sharded: torch.Tensor, - B: torch.Tensor, - config: Optional[FusedConfig] = None, -) -> FusedWorkspace: - """Allocate workspace for all_gather_matmul (buffer needed for chunked variant).""" - if config is None: - config = FusedConfig() - - M, K_local = A_sharded.shape - K, N = B.shape - world_size = shmem.get_num_ranks() - - expected_K = world_size * K_local - assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" - - # Detect hardware configuration - device = A_sharded.device - if config.num_sms is None: - import iris.hip - num_sms = iris.hip.get_cu_count(device.index) - else: - num_sms = config.num_sms - - if config.num_xcds == 1: - # Auto-detect XCDs if default value is used - import iris.hip - num_xcds = iris.hip.get_num_xcc(device.index) - else: - num_xcds = config.num_xcds - - # Allocate temporary buffer for chunked variant - aux_buffer = None - if config.all_gather_matmul_variant == "chunked": - # Calculate grid size to determine buffer size - num_tiles_m = (M + config.block_size_m - 1) // config.block_size_m - num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n - num_tiles = num_tiles_m * num_tiles_n - - # Allocate buffer: BLOCK_M x K x num_tiles - buffer_size = config.block_size_m * K * num_tiles - aux_buffer = torch.empty(buffer_size, dtype=A_sharded.dtype, device=device) - - return FusedWorkspace( - operation="all_gather_matmul", - shape=(M, N, K), - dtype=A_sharded.dtype, - world_size=world_size, - num_sms=num_sms, - num_xcds=num_xcds, - variant=config.all_gather_matmul_variant, - aux_buffer=aux_buffer, - prepared=True, - ) - - -def all_gather_matmul( - shmem, - output_tensor: torch.Tensor, - A_sharded: torch.Tensor, - B: torch.Tensor, - bias: Optional[torch.Tensor] = None, - async_op: bool = False, - config: Optional[FusedConfig] = None, - workspace: Optional[FusedWorkspace] = None, -) -> FusedWorkspace: - """Fused all-gather and matrix multiplication using pull pattern.""" - if config is None: - config = FusedConfig() - - M, K_local = A_sharded.shape - K, N = B.shape - world_size = shmem.get_num_ranks() - rank = shmem.get_rank() - - expected_K = world_size * K_local - assert K == expected_K, f"K ({K}) must equal world_size ({world_size}) * K_local ({K_local})" - assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" - - # Validate problem size against block sizes - assert M >= config.block_size_m, ( - f"M ({M}) must be >= block_size_m ({config.block_size_m}). Use smaller block sizes for small problems." - ) - assert K_local >= config.block_size_k, ( - f"K_local ({K_local}) must be >= block_size_k ({config.block_size_k}). " - f"Use smaller block sizes for small problems." - ) - assert N >= config.block_size_n, ( - f"N ({N}) must be >= block_size_n ({config.block_size_n}). Use smaller block sizes for small problems." - ) - - if workspace is None: - workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) - - stride_am, stride_ak = A_sharded.stride() - stride_bk, stride_bn = B.stride() - stride_cm, stride_cn = output_tensor.stride() - - if bias is not None: - assert bias.shape[0] == M - bias_ptr = bias - stride_bias = bias.stride()[0] if bias.dim() > 0 else 1 - use_bias = True - else: - bias_ptr = output_tensor - stride_bias = 1 - use_bias = False - - # Get hardware configuration from workspace - num_sms = workspace.num_sms - num_xcds = workspace.num_xcds - - even_k = K_local % config.block_size_k == 0 - - # Use SM-based grid (persistent kernels) - grid = (num_sms,) - - # Select kernel variant based on config - if config.all_gather_matmul_variant == "chunked": - # Chunked variant: pre-gather into buffer, then GEMM - assert workspace.aux_buffer is not None, "Chunked variant requires aux_buffer in workspace" - _fused_chunked_all_gather_matmul_kernel[grid]( - A_sharded, - B, - output_tensor, - bias_ptr, - workspace.aux_buffer, # Temporary buffer - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, - stride_bias, - shmem.heap_bases, - rank, - world_size, - config.block_size_m, - config.block_size_n, - config.block_size_k, - config.group_size_m, - num_sms, - num_xcds, - use_bias, - even_k, - config.allow_tf32, - ) - else: - # Pull variant (default): on-demand pull from remote ranks - _fused_all_gather_matmul_kernel[grid]( - A_sharded, - B, - output_tensor, - bias_ptr, - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, - stride_bias, - shmem.heap_bases, - rank, - world_size, - config.block_size_m, - config.block_size_n, - config.block_size_k, - config.group_size_m, - num_sms, - num_xcds, - use_bias, - even_k, - config.allow_tf32, - ) - - if not async_op: - shmem.barrier() - - return workspace diff --git a/iris/ops/config.py b/iris/ops/config.py index a92925035..c5d15349b 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -35,8 +35,7 @@ class FusedConfig: "one_shot", "two_shot", "spinlock". Default: "two_shot". all_reduce_num_rings: Number of concurrent rings (for ring variant). Default: 1. all_gather_matmul_variant: All-gather + matmul algorithm variant. Options: - "pull" (on-demand pull from remote ranks), - "chunked" (pre-gather into buffer then GEMM). + "pull" (on-demand pull from remote ranks). Default: "pull". Example: diff --git a/tests/ops/test_all_gather_matmul.py b/tests/ops/test_all_gather_matmul.py index 7dceea126..db4b21250 100644 --- a/tests/ops/test_all_gather_matmul.py +++ b/tests/ops/test_all_gather_matmul.py @@ -32,7 +32,6 @@ "variant", [ "pull", - "chunked", ], ) def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N, variant): diff --git a/tests/ops/test_matmul_all_reduce.py b/tests/ops/test_matmul_all_reduce.py index 5780b5d4d..0fd278fe0 100644 --- a/tests/ops/test_matmul_all_reduce.py +++ b/tests/ops/test_matmul_all_reduce.py @@ -112,7 +112,7 @@ def test_matmul_all_reduce_via_shmem_ops(): shmem = iris.iris(heap_size) rank = shmem.get_rank() - M, N, K = 256, 128, 64 + M, N, K = 256, 256, 64 dtype = torch.float16 A = shmem.randn((M, K), dtype=dtype) From 477b47220da79207529c40369592d093af59ccb2 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Fri, 6 Mar 2026 19:03:44 +0000 Subject: [PATCH 27/60] Fix CI: increase default N to match FusedConfig block_size_n=256 Examples 28 (matmul_all_reduce) and 30 (matmul_all_gather) used N=128 as default, which is smaller than the new FusedConfig default block_size_n=256. This triggers assertion failures (N >= block_size_n) in CI, crashing all ranks and causing the 8-rank test to hang for 179 minutes waiting for the dead rank. Increase both examples' default N from 128 to 256 to match the new config defaults. Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/28_ops_matmul_all_reduce/example.py | 2 +- examples/30_ops_matmul_all_gather/example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/28_ops_matmul_all_reduce/example.py b/examples/28_ops_matmul_all_reduce/example.py index acaaff85d..086ef4d70 100644 --- a/examples/28_ops_matmul_all_reduce/example.py +++ b/examples/28_ops_matmul_all_reduce/example.py @@ -26,7 +26,7 @@ def parse_args(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("-m", type=int, default=512, help="Rows of A") - parser.add_argument("-n", type=int, default=128, help="Columns of B") + parser.add_argument("-n", type=int, default=256, help="Columns of B") parser.add_argument("-k", type=int, default=256, help="Inner dimension") parser.add_argument("--heap_size", type=int, default=1 << 31, help="Iris heap size") parser.add_argument("--datatype", type=str, default="fp16", choices=["fp16", "fp32", "bf16"], help="Data type") diff --git a/examples/30_ops_matmul_all_gather/example.py b/examples/30_ops_matmul_all_gather/example.py index fbb12442e..e704246cc 100644 --- a/examples/30_ops_matmul_all_gather/example.py +++ b/examples/30_ops_matmul_all_gather/example.py @@ -27,7 +27,7 @@ def parse_args(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("-m", type=int, default=4096, help="Total rows (must be divisible by world_size)") - parser.add_argument("-n", type=int, default=128, help="Columns of B") + parser.add_argument("-n", type=int, default=256, help="Columns of B") parser.add_argument("-k", type=int, default=256, help="Inner dimension") parser.add_argument("--heap_size", type=int, default=1 << 31, help="Iris heap size") parser.add_argument("--datatype", type=str, default="fp16", choices=["fp16", "fp32", "bf16"], help="Data type") From 76cc30d256f7cb6e4e9fe06314959e4131b72cfc Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Fri, 6 Mar 2026 19:15:24 +0000 Subject: [PATCH 28/60] Revert "Fix CI: increase default N to match FusedConfig block_size_n=256" This reverts commit 477b47220da79207529c40369592d093af59ccb2. --- examples/28_ops_matmul_all_reduce/example.py | 2 +- examples/30_ops_matmul_all_gather/example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/28_ops_matmul_all_reduce/example.py b/examples/28_ops_matmul_all_reduce/example.py index 086ef4d70..acaaff85d 100644 --- a/examples/28_ops_matmul_all_reduce/example.py +++ b/examples/28_ops_matmul_all_reduce/example.py @@ -26,7 +26,7 @@ def parse_args(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("-m", type=int, default=512, help="Rows of A") - parser.add_argument("-n", type=int, default=256, help="Columns of B") + parser.add_argument("-n", type=int, default=128, help="Columns of B") parser.add_argument("-k", type=int, default=256, help="Inner dimension") parser.add_argument("--heap_size", type=int, default=1 << 31, help="Iris heap size") parser.add_argument("--datatype", type=str, default="fp16", choices=["fp16", "fp32", "bf16"], help="Data type") diff --git a/examples/30_ops_matmul_all_gather/example.py b/examples/30_ops_matmul_all_gather/example.py index e704246cc..fbb12442e 100644 --- a/examples/30_ops_matmul_all_gather/example.py +++ b/examples/30_ops_matmul_all_gather/example.py @@ -27,7 +27,7 @@ def parse_args(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("-m", type=int, default=4096, help="Total rows (must be divisible by world_size)") - parser.add_argument("-n", type=int, default=256, help="Columns of B") + parser.add_argument("-n", type=int, default=128, help="Columns of B") parser.add_argument("-k", type=int, default=256, help="Inner dimension") parser.add_argument("--heap_size", type=int, default=1 << 31, help="Iris heap size") parser.add_argument("--datatype", type=str, default="fp16", choices=["fp16", "fp32", "bf16"], help="Data type") From 9743b13e56dd724e942fbe2b96a9928b275b2088 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Fri, 6 Mar 2026 19:18:55 +0000 Subject: [PATCH 29/60] =?UTF-8?q?Remove=20unnecessary=20block=20size=20ass?= =?UTF-8?q?ertions=20=E2=80=94=20Triton=20handles=20masking?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Triton kernels already handle block_size > dimension via: - tl.cdiv(N, BLOCK_SIZE_N) for grid sizing - mask=(rn < N) on loads/stores - tritonblas GemmContext.reduce_axis handles K masking The assertions were preventing valid configurations (e.g., block_size_n=256 with N=128) that the kernels handle correctly. Removed for_problem() clamping too — it's unnecessary when the kernels already mask. Fixes CI failures on examples 28 and 30 which use N=128 with default FusedConfig block_size_n=256. --- iris/ops/all_gather_matmul.py | 11 ----------- iris/ops/matmul_all_gather.py | 11 ----------- iris/ops/matmul_all_reduce.py | 5 ----- 3 files changed, 27 deletions(-) diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index 6000f50ef..4f272825f 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -219,17 +219,6 @@ def all_gather_matmul( assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" # Validate problem size against block sizes - assert M >= config.block_size_m, ( - f"M ({M}) must be >= block_size_m ({config.block_size_m}). Use smaller block sizes for small problems." - ) - assert K_local >= config.block_size_k, ( - f"K_local ({K_local}) must be >= block_size_k ({config.block_size_k}). " - f"Use smaller block sizes for small problems." - ) - assert N >= config.block_size_n, ( - f"N ({N}) must be >= block_size_n ({config.block_size_n}). Use smaller block sizes for small problems." - ) - if workspace is None: workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) diff --git a/iris/ops/matmul_all_gather.py b/iris/ops/matmul_all_gather.py index ad42ac041..6b19caea4 100644 --- a/iris/ops/matmul_all_gather.py +++ b/iris/ops/matmul_all_gather.py @@ -180,17 +180,6 @@ def matmul_all_gather( assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" # Validate problem size against block sizes - assert M_local >= config.block_size_m, ( - f"M_local ({M_local}) must be >= block_size_m ({config.block_size_m}). " - f"Use smaller block sizes for small problems." - ) - assert K >= config.block_size_k, ( - f"K ({K}) must be >= block_size_k ({config.block_size_k}). Use smaller block sizes for small problems." - ) - assert N >= config.block_size_n, ( - f"N ({N}) must be >= block_size_n ({config.block_size_n}). Use smaller block sizes for small problems." - ) - # Allocate workspace if not provided if workspace is None: workspace = matmul_all_gather_preamble(shmem, A, B, config) diff --git a/iris/ops/matmul_all_reduce.py b/iris/ops/matmul_all_reduce.py index 73bea92c2..ceded7057 100644 --- a/iris/ops/matmul_all_reduce.py +++ b/iris/ops/matmul_all_reduce.py @@ -272,11 +272,6 @@ def matmul_all_reduce( if A.dtype != B.dtype or A.dtype != C.dtype: raise ValueError(f"All tensors must have same dtype, got A:{A.dtype}, B:{B.dtype}, C:{C.dtype}") - # Validate block sizes match problem dimensions - assert M >= config.block_size_m, f"M={M} too small for block_size_m={config.block_size_m}" - assert K >= config.block_size_k, f"K={K} too small for block_size_k={config.block_size_k}" - assert N >= config.block_size_n, f"N={N} too small for block_size_n={config.block_size_n}" - # Extract strides stride_am, stride_ak = A.stride() stride_bk, stride_bn = B.stride() From a86dc0400de2fff3e8b4d27cd637b2c41fc5dffc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:11:23 +0000 Subject: [PATCH 30/60] Initial plan From 445b25cb095078a51c3db9fb5cbf5c5b6eec80ee Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 19:48:25 +0000 Subject: [PATCH 31/60] Add vectorization hints and tests for HBM buffer all-gather matmul Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- iris/ops/all_gather_matmul_hbm_buffer.py | 4 +- iris/x/gather.py | 5 + .../ops/test_all_gather_matmul_hbm_buffer.py | 202 ++++++++++++++++++ 3 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 tests/ops/test_all_gather_matmul_hbm_buffer.py diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index abe3b3936..2db1b6ed7 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -126,6 +126,7 @@ def _hbm_buffer_all_gather_matmul_kernel( k_block_start = k_flag_group * K_PER_FLAG rm = m_tile * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + rm = tl.max_contiguous(tl.multiple_of(rm, BLOCK_SIZE_M), BLOCK_SIZE_M) for k_off in range(K_PER_FLAG): k_block_global = k_block_start + k_off @@ -138,11 +139,12 @@ def _hbm_buffer_all_gather_matmul_kernel( k_tile = iris.x.TileView(pid_m_t, tile_k_t, BLOCK_SIZE_M, BLOCK_SIZE_K) rk = k_block_global * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + rk = tl.max_contiguous(tl.multiple_of(rk, BLOCK_SIZE_K), BLOCK_SIZE_K) staged_ptrs = staged_a + rm.to(tl.int64)[:, None] * stride_sa_m + rk[None, :] * stride_sa_k for compile_rank in range(world_size): if src_rank_idx == compile_rank: - a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx) + a_tile = iris.x.gather(k_tile, src_view, compile_rank, ctx, hint=(1, BLOCK_SIZE_K)) tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group diff --git a/iris/x/gather.py b/iris/x/gather.py index ca8bd4f9c..4e2b10cc9 100644 --- a/iris/x/gather.py +++ b/iris/x/gather.py @@ -24,6 +24,7 @@ def gather( src_view: TensorView, source_rank: tl.constexpr, ctx: DeviceContext, + hint: tl.constexpr = None, ): """ Tile-level gather from a specific rank. @@ -37,6 +38,9 @@ def gather( src_view: TensorView for source tensor on source_rank. source_rank: Specific rank to load from (constexpr). ctx: DeviceContext with rank, world_size, and heap_bases. + hint: Vectorization hint passed to tl.multiple_of / tl.max_contiguous on + the translated pointer. Use a scalar (e.g. 16) or a tuple + (e.g. (1, 16)) to indicate alignment. Defaults to None (no hint). Returns: Loaded tile data as a tensor. @@ -61,6 +65,7 @@ def gather( source_rank, # from_rank (source rank) ctx.heap_bases, mask=mask, + hint=hint, ) return tile_data diff --git a/tests/ops/test_all_gather_matmul_hbm_buffer.py b/tests/ops/test_all_gather_matmul_hbm_buffer.py new file mode 100644 index 000000000..af173ea8b --- /dev/null +++ b/tests/ops/test_all_gather_matmul_hbm_buffer.py @@ -0,0 +1,202 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Tests for fused all_gather + matmul using the HBM staging buffer implementation. + +Each rank has A_sharded (M x K_local), B is replicated. +The operation gathers A from all ranks into a local HBM buffer and computes C = A_gathered @ B. +""" + +import pytest +import torch +import torch.distributed as dist + +import iris +from iris.ops.all_gather_matmul_hbm_buffer import ( + all_gather_matmul_hbm_buffer, + all_gather_matmul_hbm_buffer_preamble, +) +from iris.ops.config import FusedConfig + + +@pytest.mark.parametrize( + "dtype, atol, rtol", + [ + (torch.float16, 1e-2, 1e-2), + ], +) +@pytest.mark.parametrize( + "M,K_local,N", + [ + (128, 32, 64), + (256, 64, 128), + ], +) +@pytest.mark.parametrize( + "staged_a_layout", + [ + "k_contiguous", + "m_contiguous", + ], +) +def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a_layout): + """Test all_gather_matmul_hbm_buffer against torch all_gather + matmul.""" + if not dist.is_initialized(): + pytest.skip("torch.distributed not initialized") + + heap_size = 2**33 + shmem = iris.iris(heap_size) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + K = K_local * world_size # Full K dimension + + # Seed for reproducibility - different seed per rank for A_sharded + torch.manual_seed(42 + rank) + A_sharded = torch.randn(M, K_local, dtype=dtype, device=f"cuda:{rank}") + + # B must be identical on all ranks + torch.manual_seed(123) + B = torch.randn(K, N, dtype=dtype, device=f"cuda:{rank}") + + # Reference: torch all_gather + matmul + A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_gathered_list, A_sharded) + A_gathered_ref = torch.cat(A_gathered_list, dim=1) # (M, K) + ref_output = torch.matmul(A_gathered_ref, B) + torch.cuda.synchronize() + + # Create shmem tensors + A_sharded_shmem = shmem.zeros((M, K_local), dtype=dtype) + A_sharded_shmem.copy_(A_sharded) + B_shmem = shmem.zeros((K, N), dtype=dtype) + B_shmem.copy_(B) + output = shmem.zeros((M, N), dtype=dtype) + + shmem.barrier() + + # Use small block sizes for small test problems + config = FusedConfig( + block_size_m=64, + block_size_n=64, + block_size_k=32, + ) + + workspace = all_gather_matmul_hbm_buffer_preamble( + shmem, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout + ) + + all_gather_matmul_hbm_buffer( + shmem, + output, + A_sharded_shmem, + B_shmem, + config=config, + workspace=workspace, + staged_a_layout=staged_a_layout, + trace=False, + ) + + torch.cuda.synchronize() + shmem.barrier() + + max_diff = (output - ref_output).abs().max().item() + + assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( + f"Rank {rank}: Max diff {max_diff}, expected < {atol} " + f"(staged_a_layout={staged_a_layout}, M={M}, K_local={K_local}, N={N})" + ) + + +@pytest.mark.parametrize( + "dtype, atol, rtol", + [ + (torch.float16, 1e-2, 1e-2), + ], +) +@pytest.mark.parametrize( + "M,K_local,N", + [ + (128, 32, 64), + ], +) +def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N): + """Test all_gather_matmul_hbm_buffer with a bias vector.""" + if not dist.is_initialized(): + pytest.skip("torch.distributed not initialized") + + heap_size = 2**33 + shmem = iris.iris(heap_size) + rank = shmem.get_rank() + world_size = shmem.get_num_ranks() + + K = K_local * world_size + + torch.manual_seed(42 + rank) + A_sharded = torch.randn(M, K_local, dtype=dtype, device=f"cuda:{rank}") + + torch.manual_seed(123) + B = torch.randn(K, N, dtype=dtype, device=f"cuda:{rank}") + + torch.manual_seed(77) + bias = torch.randn(M, dtype=dtype, device=f"cuda:{rank}") + + # Reference: torch all_gather + matmul + bias + A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=f"cuda:{rank}") for _ in range(world_size)] + dist.all_gather(A_gathered_list, A_sharded) + A_gathered_ref = torch.cat(A_gathered_list, dim=1) + ref_output = torch.matmul(A_gathered_ref, B) + bias[:, None] + torch.cuda.synchronize() + + # Create shmem tensors + A_sharded_shmem = shmem.zeros((M, K_local), dtype=dtype) + A_sharded_shmem.copy_(A_sharded) + B_shmem = shmem.zeros((K, N), dtype=dtype) + B_shmem.copy_(B) + bias_shmem = shmem.zeros((M,), dtype=dtype) + bias_shmem.copy_(bias) + output = shmem.zeros((M, N), dtype=dtype) + + shmem.barrier() + + config = FusedConfig( + block_size_m=64, + block_size_n=64, + block_size_k=32, + ) + + all_gather_matmul_hbm_buffer( + shmem, + output, + A_sharded_shmem, + B_shmem, + bias=bias_shmem, + config=config, + trace=False, + ) + + torch.cuda.synchronize() + shmem.barrier() + + max_diff = (output - ref_output).abs().max().item() + + assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( + f"Rank {rank}: Max diff {max_diff}, expected < {atol} (with bias)" + ) + + +if __name__ == "__main__": + # For quick debugging + import sys + + if not dist.is_initialized(): + print("Run with: torchrun --nproc_per_node=2 tests/ops/test_all_gather_matmul_hbm_buffer.py") + sys.exit(1) + + rank = dist.get_rank() + torch.cuda.set_device(rank) + + print(f"[Rank {rank}] Testing all_gather_matmul_hbm_buffer...") + test_all_gather_matmul_hbm_buffer(torch.float16, 1e-2, 1e-2, 128, 32, 64, "k_contiguous") + print(f"[Rank {rank}] ✓ Test passed!") From bad3422cc208e9e2667e2078d44f6a7a1de61ae6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:26:32 +0000 Subject: [PATCH 32/60] Initial plan for PR cleanup Agent-Logs-Url: https://github.com/ROCm/iris/sessions/1e2f61b2-a214-44c6-9091-c3482f911eb9 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .github/agents/skills/accordo/SKILL.md | 68 +++++++++++++ .github/agents/skills/kerncap/SKILL.md | 131 +++++++++++++++++++++++++ .github/agents/skills/linex/SKILL.md | 98 ++++++++++++++++++ .github/agents/skills/metrix/SKILL.md | 76 ++++++++++++++ .github/agents/skills/nexus/SKILL.md | 74 ++++++++++++++ .intellikit | 1 + 6 files changed, 448 insertions(+) create mode 100644 .github/agents/skills/accordo/SKILL.md create mode 100644 .github/agents/skills/kerncap/SKILL.md create mode 100644 .github/agents/skills/linex/SKILL.md create mode 100644 .github/agents/skills/metrix/SKILL.md create mode 100644 .github/agents/skills/nexus/SKILL.md create mode 160000 .intellikit diff --git a/.github/agents/skills/accordo/SKILL.md b/.github/agents/skills/accordo/SKILL.md new file mode 100644 index 000000000..1917a5d24 --- /dev/null +++ b/.github/agents/skills/accordo/SKILL.md @@ -0,0 +1,68 @@ +--- +name: accordo-validation +description: Validate GPU kernel correctness by comparing reference and optimized outputs. Use when verifying that an optimized or modified kernel matches a reference implementation. +--- + +# Accordo: GPU Kernel Validation + +Capture and compare kernel outputs from reference and optimized binaries to validate correctness. Uses kernelDB for automatic kernel extraction; supports configurable tolerance and execution-time comparison. + +## When to Use + +- User has a reference and an optimized (or modified) GPU kernel and wants to check they produce the same results +- Regression testing after kernel or build changes +- Validating multiple optimization variants against one baseline + +## Instructions + +1. **Require two or more binaries:** one reference (e.g. `./app_ref`) and one or more to validate (e.g. `./app_opt`). All must expose the same kernel by name. +2. **Ensure binaries are built with debug symbols** (`-g`) so kernel arguments can be extracted. +3. **Choose execution path:** + - If an Accordo MCP server is available, call its `validate_kernel_correctness` tool, which performs capture-and-compare with the same semantics described below. + - Otherwise use the Python API or the `accordo validate` CLI (`accordo validate --help` for flags: `--kernel-name`, `--ref-binary`, `--opt-binary`, `--tolerance`, `--timeout`, `--working-dir`, `--kernel-args`, `--log-level`). + +### Python API + +```python +from accordo import Accordo + +# Validator for the kernel to validate (binary used to extract signature) +validator = Accordo(binary="./app_ref", kernel_name="reduce_sum") + +# Optional: set working directory if binaries expect it +validator = Accordo(binary="./app_ref", kernel_name="reduce_sum", working_directory="./run") + +# Capture snapshots +ref = validator.capture_snapshot(binary="./app_ref") +opt = validator.capture_snapshot(binary="./app_opt") + +# Compare with tolerance (default 1e-6) +result = validator.compare_snapshots(ref, opt, tolerance=1e-6) + +if result.is_valid: + print("PASS:", result.num_arrays_validated, "arrays matched") +else: + print(result.summary()) +``` + +For multiple optimizations, capture the reference once and compare each optimized snapshot against it. + +### Snapshot and result attributes + +- **Snapshot:** `arrays`, `execution_time_ms`, `grid_size`, `block_size` +- **ValidationResult:** `is_valid`, `num_arrays_validated`, `num_mismatches`, `mismatches`, `success_rate`; use `summary()` for a human-readable report. + +## Workflow + +1. Build reference and optimized binaries with the same kernel name and `-g`. +2. Create an `Accordo(binary=ref_binary, kernel_name="...")` validator; set `working_directory` if needed. +3. Capture reference snapshot with `capture_snapshot(binary=ref_binary)`. +4. For each variant, capture with `capture_snapshot(binary=opt_binary)` and compare with `compare_snapshots(ref, opt, tolerance=...)`. +5. If `result.is_valid` is false, use `result.summary()` and `result.mismatches` to diagnose. +6. Use relative paths for binaries and working directory so the skill is portable. + +## Notes + +- kernelDB is used automatically; no separate kernelDB setup is required when using the Python API. +- Increase `tolerance` for floating-point comparisons when appropriate (e.g. 1e-4 or 1e-5 for single precision). +- Use `timeout_seconds` in `capture_snapshot` if the run may hang. diff --git a/.github/agents/skills/kerncap/SKILL.md b/.github/agents/skills/kerncap/SKILL.md new file mode 100644 index 000000000..f7d007143 --- /dev/null +++ b/.github/agents/skills/kerncap/SKILL.md @@ -0,0 +1,131 @@ +--- +name: test-kerncap +description: Test local kerncap changes end-to-end by profiling an application, extracting a kernel, and validating the reproducer. Use when the user asks to test kerncap against any HIP or Triton workload, or wants to validate extraction on a real GPU application. +--- + +# Test kerncap Against an Application + +Test local kerncap changes end-to-end by extracting and validating a kernel from any application. + +## Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `app_cmd` | **Yes** | Full command to run the application (binary + arguments), e.g. `$WORK/dev/llama.cpp/build/bin/llama-bench -m model.gguf -p 512 -n 32` | +| `conda_env` | No | Conda environment to activate before running commands (e.g. `llama_cpp`). If not provided, use the current environment. | +| `kernel_name` | No | Name of the kernel to extract (e.g. `mul_mat_q`). If not provided, profile the application first and select the top kernel by execution time. | + +## Paths + +| Item | Path | +|------|------| +| kerncap source | `kerncap/` (relative to IntelliKit repo root) | +| Output directory | `/tmp/kerncap-test/` | + +## Environment Setup + +If `conda_env` is provided, activate it before any other step: + +```bash +conda activate +``` + +If already in a different environment, switch explicitly. Do not assume the current shell environment is correct. + +If `conda_env` is not provided, proceed with the current environment as-is. + +## Workflow + +### Step 1: Reinstall kerncap + +Ensure the correct environment is active (if applicable), then uninstall and reinstall to pick up local changes: + +```bash +pip uninstall kerncap -y && pip install kerncap/ +``` + +### Step 2: Profile to identify target kernel + +**If `kernel_name` was provided**: Skip this step and proceed to Step 3. + +**If `kernel_name` was not provided**: Run profiling to discover the top bottleneck kernel: + +```bash +kerncap profile -- +``` + +Select the kernel with the highest total execution time from the profile output. Use its name as `kernel_name` for all subsequent steps. Tell the user which kernel was selected and why. + +**Important**: Use a sufficiently long substring from the profile output as `kernel_name` so that `kerncap extract` matches the intended kernel, not a different instantiation. For example, templated kernels like `mul_mat_q` have many instantiations differing only by template parameters; passing just `mul_mat_q` will capture the first dispatch that matches, which may not be the top-ranked one. Prefer including template parameters in the substring (e.g. `mul_mat_q<(ggml_type)39` instead of `mul_mat_q`). + +### Step 3: Extract the kernel + +```bash +kerncap extract --help +``` + +Use the help output to construct the appropriate `kerncap extract` command for the application. Key flags to determine: + +- `--cmd` — the application command (`app_cmd`) +- `--source-dir` — where the kernel source lives (ask the user if unclear) +- `--output` — `/tmp/kerncap-test/` +- `--language` — `hip` or `triton` depending on the workload +- Any additional flags (`-D` defines, `--dispatch`, etc.) + +**If extraction fails or produces errors**: Stop here and report the full error output. This indicates the local kerncap changes have a bug that needs fixing. + +**If extraction succeeds**: Inspect the output directory for expected files (metadata.json, argument dumps, source files). If the output looks reasonable, proceed to compile and run. + +### Step 4: Compile and run the reproducer + +Navigate to the output directory and build/run the reproducer: + +```bash +cd /tmp/kerncap-test/ +make run +``` + +**If `make run` fails**: Stop here and report the full compiler or runtime error output. This is the primary signal that kerncap generated an incorrect reproducer. + +**If `make run` succeeds**: Proceed to validation. + +### Step 5: Validate the reproducer + +**5a. Smoke test** — confirm baseline replay works: + +```bash +kerncap validate /tmp/kerncap-test/ +``` + +This is a smoke test only (VA-faithful captures). It confirms the replay runs without crashing but does not check numerical correctness. + +**5b. Recompile** — build a baseline HSACO from the unmodified kernel source: + +```bash +cd /tmp/kerncap-test/ +make recompile +``` + +This confirms the VFS-overlay recompile pipeline works. It produces `optimized.hsaco` from the unmodified `kernel_variant.cpp`. + +**If `make recompile` fails**: Stop here and report the error. This indicates an issue with the source finder or VFS overlay generation. + +**5c. Correctness validation** — compare recompiled HSACO against captured baseline: + +```bash +kerncap validate /tmp/kerncap-test/ --hsaco /tmp/kerncap-test//optimized.hsaco +``` + +This runs replay twice (captured HSACO vs recompiled HSACO) and compares outputs byte-for-byte. Since the kernel source is unmodified, they should match exactly. A failure here indicates a recompilation fidelity issue. + +### Step 6: Report results + +Summarize: +- Whether reinstall succeeded +- Whether profiling identified a kernel (if applicable, and which one) +- Whether extraction completed (and any warnings) +- Whether `make run` compiled and executed successfully +- Whether smoke test passed (Step 5a) +- Whether recompile succeeded (Step 5b) +- Whether correctness validation passed (Step 5c) +- Any errors or warnings encountered at each step diff --git a/.github/agents/skills/linex/SKILL.md b/.github/agents/skills/linex/SKILL.md new file mode 100644 index 000000000..dca5b7d6d --- /dev/null +++ b/.github/agents/skills/linex/SKILL.md @@ -0,0 +1,98 @@ +--- +name: linex-profiling +description: Profile GPU kernels at source-line granularity with cycle-level timing and stall analysis. Use when identifying performance hotspots at the source code level or analyzing instruction-level metrics mapped to source lines. +--- + +# Linex: Source-Level GPU Performance Profiling + +Map GPU performance metrics to your source code lines. Get cycle-level timing, stall analysis, and instruction-level metrics for each line of source code. + +## When to Use + +- User asks to profile a GPU application at source-line granularity +- Need to identify which specific lines of code are performance bottlenecks +- Analyzing stall patterns and execution bottlenecks at the source level +- Understanding cycle-level timing for each line of code +- Instruction-level analysis mapped to source lines + +## Instructions + +1. **Ensure the target runs on AMD ROCm 7.0+** with `rocprofv3` available. +2. **Kernels must be compiled with `-g`** (debug symbols) for source mapping. +3. **Choose execution path:** + - If a Linex MCP server is available, use its MCP tools: + - `profile_application` to run and profile a target application with the options below. + - `analyze_instruction_hotspots` to perform instruction-level hotspot analysis on collected profiles. + - Otherwise use the Python API from the environment where Linex is installed. + +### Python API + +```python +from linex import Linex + +profiler = Linex( + target_cu=0, # Target compute unit + shader_engine_mask="0xFFFFFFFF", # All shader engines + activity=10, # Activity counter polling +) + +profiler.profile("./my_app", kernel_filter="my_kernel") + +# Show hotspots (sorted by total_cycles) +for line in profiler.source_lines[:5]: + print(f"{line.file}:{line.line_number}") + print(f" {line.total_cycles:,} cycles ({line.stall_percent:.1f}% stalled)") + print(f" Executed {line.execution_count} times") + +# Find memory-bound lines +memory_bound = [ + l for l in profiler.source_lines + if l.stall_percent > 50 +] + +# Instruction-level analysis +for line in profiler.source_lines[:1]: + for inst in line.instructions: + print(f"{inst.isa}: {inst.latency_cycles} cycles") +``` + +### SourceLine Properties + +- `file` - Source file path +- `line_number` - Line number +- `total_cycles` - Sum of all instruction cycles +- `stall_cycles` - Cycles spent waiting +- `idle_cycles` - Cycles slot was idle +- `execution_count` - Total executions +- `instructions` - List of ISA instructions +- `stall_percent` - Convenience: stall_cycles / total_cycles * 100 + +### InstructionData Properties + +- `isa` - ISA instruction text +- `latency_cycles` - Total cycles for this instruction +- `stall_cycles` - Cycles spent waiting +- `idle_cycles` - Cycles slot was idle +- `execution_count` - How many times it ran +- `instruction_address` - Virtual address in GPU memory +- `file` - Parsed from source_location +- `line` - Parsed from source_location +- `stall_percent` - Convenience: stall_cycles / latency_cycles * 100 + +## Workflow + +1. Ensure the target binary is built with `-g` (debug symbols) for source mapping. +2. Create a `Linex()` profiler; optionally set `target_cu`, `shader_engine_mask`, or `activity`. +3. Call `profiler.profile(command, kernel_filter=...)` to run profiling. +4. Access `profiler.source_lines` (sorted by total_cycles) to find hotspots. +5. Use `line.stall_percent` to identify memory-bound or dependency-bound lines. +6. Drill down into `line.instructions` for instruction-level analysis. +7. Use relative paths for the target binary so the skill is portable. + +## Notes + +- Requires ROCm 7.0+ with `rocprofv3` support. +- Source mapping requires kernels compiled with `-g` (debug symbols). +- `source_lines` are automatically sorted by `total_cycles` (descending). +- Use `kernel_filter` to profile specific kernels by name (regex pattern). +- For Triton or other frameworks, ensure debug symbols are available in the compiled output. diff --git a/.github/agents/skills/metrix/SKILL.md b/.github/agents/skills/metrix/SKILL.md new file mode 100644 index 000000000..969ef6eef --- /dev/null +++ b/.github/agents/skills/metrix/SKILL.md @@ -0,0 +1,76 @@ +--- +name: metrix-profiling +description: Profile GPU kernels when performance analysis or optimization is required. Use for AMD ROCm GPU metrics, bandwidth, cache hit rates, coalescing, or kernel timing. +--- + +# Metrix: GPU Profiling + +Profile AMD GPU kernels and get human-readable metrics (bandwidth, cache, coalescing, FLOPS). Architecture is auto-detected. + +## When to Use + +- User asks to profile a GPU application or kernel +- Performance analysis, optimization, or bottleneck investigation +- Need HBM/L2/L1 bandwidth, hit rates, or compute metrics +- Need timing-only runs (fast, no hardware counters) + +## Instructions + +1. **Ensure the target runs on AMD ROCm** (e.g. `hipcc`-built binary or Python script that launches HIP/ROCm kernels). +2. **Choose execution path:** + - If a Metrix MCP server is available, use its profile tool with the same options below. + - Otherwise run the CLI or Python API from the environment where Metrix is installed. + +### CLI + +From the project or install prefix: + +```bash +# Profile with all metrics (auto-detected arch) +metrix ./my_app + +# Time only (fast, no counters) +metrix --time-only -n 10 ./my_app + +# Filter kernels by name +metrix --kernel matmul ./my_app + +# Specific metrics +metrix --metrics memory.l2_hit_rate,memory.coalescing_efficiency,compute.total_flops ./my_app + +# Save to JSON/CSV +metrix -o results.json ./my_app +``` + +Options: `--profile`/`-p` (run `metrix list profiles` for names: `quick`, `memory`, `memory_bandwidth`, `memory_cache`, `compute`), `--metrics`/`-m`, `--time-only`, `--kernel`/`-k` (regular expression), `--num-replays`/`-n`, `--output`/`-o`, `--top`, `--aggregate`, `--timeout`, `--no-counters`, `--log`/`-l`, `--quiet`/`-q`. Discovery: `metrix list `, `metrix info `. Note: `metrix list counters` and `metrix info counter ` are not implemented yet (CLI reports “not yet implemented”). + +### Python API + +```python +from metrix import Metrix + +profiler = Metrix() +results = profiler.profile("./my_app", num_replays=5) + +for kernel in results.kernels: + print(kernel.name, kernel.duration_us.avg) + for metric, stats in kernel.metrics.items(): + print(f" {metric}: {stats.avg}") +``` + +Use `metrics=[...]` for a subset; omit for all metrics. Use `cwd` when the binary expects a specific working directory. + +## Workflow + +1. Identify the executable or script to profile (e.g. `./app` or `python run_kernels.py`). +2. If only timing is needed, use `--time-only` for speed. +3. If full metrics are needed, run `metrix ./app` (or MCP equivalent); optionally restrict with `--kernel` or `--metrics`. +4. Interpret results: low L2 hit rate, low coalescing, or low HBM utilization suggest optimization targets. +5. For automation or tooling, use `-o results.json` and parse the JSON output. + +## Key Metrics (reference) + +- **Memory:** `memory.hbm_bandwidth_utilization`, `memory.l2_hit_rate`, `memory.l1_hit_rate`, `memory.coalescing_efficiency`, `memory.global_load_efficiency`, `memory.lds_bank_conflicts`, `memory.atomic_latency` +- **Compute:** `compute.total_flops`, `compute.hbm_gflops`, `compute.hbm_arithmetic_intensity`, `compute.l2_arithmetic_intensity`, `compute.l1_arithmetic_intensity` + +Use relative paths for the target binary and output files so the skill is portable across environments. diff --git a/.github/agents/skills/nexus/SKILL.md b/.github/agents/skills/nexus/SKILL.md new file mode 100644 index 000000000..ad714bc4d --- /dev/null +++ b/.github/agents/skills/nexus/SKILL.md @@ -0,0 +1,74 @@ +--- +name: nexus-trace +description: Extract GPU kernel assembly and HIP source from HSA packet traces. Use when analyzing what code ran on the GPU, debugging kernel dispatch, or inspecting assembly and source mapping. +--- + +# Nexus: HSA Packet Source Code Extractor + +Intercepts HSA packets from a running process and extracts, per kernel, assembly and HIP source into a structured trace (e.g. JSON). Use for kernel-level inspection and assembly/source correlation. + +## When to Use + +- User needs to see which kernels ran and their assembly or HIP source +- Debugging or analyzing GPU dispatch and code generation +- Inspecting assembly-to-source mapping for a HIP (or ROCm) application + +## Instructions + +1. **Ensure the target runs on AMD ROCm** and uses HSA (e.g. HIP application or ROCm runtime). +2. **Choose execution path:** + - If a Nexus MCP server is available, use its tools: `list_kernels` to enumerate kernels in a trace, and `extract_kernel_code` to get assembly and HIP/source mapping (signature, files, lines). See `nexus/nexus/mcp/server.py` for tool parameters and schemas. + - Otherwise use the Python API from the environment where Nexus is installed. + +### Python API (recommended when no MCP) + +```python +from nexus import Nexus + +nexus = Nexus(log_level=1) +trace = nexus.run(["python", "my_gpu_script.py"]) + +# Or run a binary: +# trace = nexus.run(["./my_hip_app"]) + +for kernel in trace: + print(kernel.name, len(kernel.assembly), "instructions") + for i, asm_line in enumerate(kernel.assembly, 1): + print(f" {i}. {asm_line}") + for line_no, hip_line in zip(kernel.lines or range(1, len(kernel.hip)+1), kernel.hip): + print(f" {line_no}: {hip_line}") + +# Access by kernel name +k = trace["vector_add(float const*, float const*, float*, int)"] +print(k.assembly, k.hip, k.signature, k.files, k.lines) + +# Save/load trace +trace.save("trace.json") +loaded = Nexus.load("trace.json") +``` + +Set `log_level` (0–4) to control verbosity. Use relative paths for the run command and output file so the skill is portable. + +### Environment-based usage (no Python API) + +When the process cannot be launched via `nexus.run()`: + +1. Set `HSA_TOOLS_LIB` to the Nexus shared library path (e.g. `build/lib/libnexus.so` or the installed path). +2. Set `NEXUS_OUTPUT_FILE` to the output JSON path. +3. Set `NEXUS_LOG_LEVEL` (0–4) if needed. +4. Run the application as usual; it will be traced and the output file will contain the kernel data. + +Optional: `NEXUS_EXTRA_SEARCH_PREFIX` (colon-separated) for HIP source search; `TRITON_DISABLE_LINE_INFO=0` for Triton kernel line info. + +## Workflow + +1. Identify the command that runs the GPU workload (e.g. `python script.py` or `./app`). +2. If using the Python API: create `Nexus(log_level=...)`, call `nexus.run([...])`, then iterate `trace` and optionally `trace.save(...)`. +3. If using the env method: set `HSA_TOOLS_LIB` and `NEXUS_OUTPUT_FILE`, then run the app; open the JSON and parse the `kernels` structure. +4. Use kernel `signature`, `assembly`, `hip`, `files`, and `lines` to analyze what ran and map assembly back to source. +5. Use relative paths for commands and output files. + +## Notes + +- Nexus is intended for research/analysis; ensure the target environment has the Nexus library and compatible ROCm/HSA stack. +- For Triton kernels, enable line info via `TRITON_DISABLE_LINE_INFO=0` when using the Python API. diff --git a/.intellikit b/.intellikit new file mode 160000 index 000000000..bcbfa0252 --- /dev/null +++ b/.intellikit @@ -0,0 +1 @@ +Subproject commit bcbfa0252df9d55f3aab68c95dd3ce45ccbe5b46 From 2a9f31a84f7ff6a32ea50e7592bfb03a9e2b669e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:29:52 +0000 Subject: [PATCH 33/60] Cleanup PR: address reviewer feedback - Add ValueError for unsupported all_gather_matmul_variant - Fix config.py docstring defaults (group_size_m=1, num_xcds=8) - Fix config.py comment (remove 'chunked' from all_gather_matmul_variant) - Clarify duration_cycles naming in _extract_wg_trace (it stores end timestamp) - Gate CUDA device_id on torch.cuda.is_available() in all benchmark files - Restrict variant choices to 'pull' only in benchmark_torchrun.py and benchmark.py - Remove noisy print from test_all_gather_matmul.py - Add bias=None to OpsNamespace.matmul_reduce_scatter for backward compat - Add .intellikit/ and .github/agents/ to .gitignore Agent-Logs-Url: https://github.com/ROCm/iris/sessions/1e2f61b2-a214-44c6-9091-c3482f911eb9 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .github/agents/skills/accordo/SKILL.md | 68 --------- .github/agents/skills/kerncap/SKILL.md | 131 ------------------ .github/agents/skills/linex/SKILL.md | 98 ------------- .github/agents/skills/metrix/SKILL.md | 76 ---------- .github/agents/skills/nexus/SKILL.md | 74 ---------- .gitignore | 4 +- .intellikit | 1 - benchmark/ops/all_gather_matmul/benchmark.py | 4 +- .../all_gather_matmul/benchmark_torchrun.py | 2 +- benchmark/ops/matmul_all_gather/benchmark.py | 2 +- benchmark/ops/matmul_all_reduce/benchmark.py | 2 +- .../ops/matmul_reduce_scatter/benchmark.py | 2 +- iris/ops/__init__.py | 3 +- iris/ops/all_gather_matmul.py | 2 + iris/ops/all_gather_matmul_hbm_buffer.py | 6 +- iris/ops/config.py | 6 +- tests/ops/test_all_gather_matmul.py | 3 - 17 files changed, 20 insertions(+), 464 deletions(-) delete mode 100644 .github/agents/skills/accordo/SKILL.md delete mode 100644 .github/agents/skills/kerncap/SKILL.md delete mode 100644 .github/agents/skills/linex/SKILL.md delete mode 100644 .github/agents/skills/metrix/SKILL.md delete mode 100644 .github/agents/skills/nexus/SKILL.md delete mode 160000 .intellikit diff --git a/.github/agents/skills/accordo/SKILL.md b/.github/agents/skills/accordo/SKILL.md deleted file mode 100644 index 1917a5d24..000000000 --- a/.github/agents/skills/accordo/SKILL.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -name: accordo-validation -description: Validate GPU kernel correctness by comparing reference and optimized outputs. Use when verifying that an optimized or modified kernel matches a reference implementation. ---- - -# Accordo: GPU Kernel Validation - -Capture and compare kernel outputs from reference and optimized binaries to validate correctness. Uses kernelDB for automatic kernel extraction; supports configurable tolerance and execution-time comparison. - -## When to Use - -- User has a reference and an optimized (or modified) GPU kernel and wants to check they produce the same results -- Regression testing after kernel or build changes -- Validating multiple optimization variants against one baseline - -## Instructions - -1. **Require two or more binaries:** one reference (e.g. `./app_ref`) and one or more to validate (e.g. `./app_opt`). All must expose the same kernel by name. -2. **Ensure binaries are built with debug symbols** (`-g`) so kernel arguments can be extracted. -3. **Choose execution path:** - - If an Accordo MCP server is available, call its `validate_kernel_correctness` tool, which performs capture-and-compare with the same semantics described below. - - Otherwise use the Python API or the `accordo validate` CLI (`accordo validate --help` for flags: `--kernel-name`, `--ref-binary`, `--opt-binary`, `--tolerance`, `--timeout`, `--working-dir`, `--kernel-args`, `--log-level`). - -### Python API - -```python -from accordo import Accordo - -# Validator for the kernel to validate (binary used to extract signature) -validator = Accordo(binary="./app_ref", kernel_name="reduce_sum") - -# Optional: set working directory if binaries expect it -validator = Accordo(binary="./app_ref", kernel_name="reduce_sum", working_directory="./run") - -# Capture snapshots -ref = validator.capture_snapshot(binary="./app_ref") -opt = validator.capture_snapshot(binary="./app_opt") - -# Compare with tolerance (default 1e-6) -result = validator.compare_snapshots(ref, opt, tolerance=1e-6) - -if result.is_valid: - print("PASS:", result.num_arrays_validated, "arrays matched") -else: - print(result.summary()) -``` - -For multiple optimizations, capture the reference once and compare each optimized snapshot against it. - -### Snapshot and result attributes - -- **Snapshot:** `arrays`, `execution_time_ms`, `grid_size`, `block_size` -- **ValidationResult:** `is_valid`, `num_arrays_validated`, `num_mismatches`, `mismatches`, `success_rate`; use `summary()` for a human-readable report. - -## Workflow - -1. Build reference and optimized binaries with the same kernel name and `-g`. -2. Create an `Accordo(binary=ref_binary, kernel_name="...")` validator; set `working_directory` if needed. -3. Capture reference snapshot with `capture_snapshot(binary=ref_binary)`. -4. For each variant, capture with `capture_snapshot(binary=opt_binary)` and compare with `compare_snapshots(ref, opt, tolerance=...)`. -5. If `result.is_valid` is false, use `result.summary()` and `result.mismatches` to diagnose. -6. Use relative paths for binaries and working directory so the skill is portable. - -## Notes - -- kernelDB is used automatically; no separate kernelDB setup is required when using the Python API. -- Increase `tolerance` for floating-point comparisons when appropriate (e.g. 1e-4 or 1e-5 for single precision). -- Use `timeout_seconds` in `capture_snapshot` if the run may hang. diff --git a/.github/agents/skills/kerncap/SKILL.md b/.github/agents/skills/kerncap/SKILL.md deleted file mode 100644 index f7d007143..000000000 --- a/.github/agents/skills/kerncap/SKILL.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -name: test-kerncap -description: Test local kerncap changes end-to-end by profiling an application, extracting a kernel, and validating the reproducer. Use when the user asks to test kerncap against any HIP or Triton workload, or wants to validate extraction on a real GPU application. ---- - -# Test kerncap Against an Application - -Test local kerncap changes end-to-end by extracting and validating a kernel from any application. - -## Parameters - -| Parameter | Required | Description | -|-----------|----------|-------------| -| `app_cmd` | **Yes** | Full command to run the application (binary + arguments), e.g. `$WORK/dev/llama.cpp/build/bin/llama-bench -m model.gguf -p 512 -n 32` | -| `conda_env` | No | Conda environment to activate before running commands (e.g. `llama_cpp`). If not provided, use the current environment. | -| `kernel_name` | No | Name of the kernel to extract (e.g. `mul_mat_q`). If not provided, profile the application first and select the top kernel by execution time. | - -## Paths - -| Item | Path | -|------|------| -| kerncap source | `kerncap/` (relative to IntelliKit repo root) | -| Output directory | `/tmp/kerncap-test/` | - -## Environment Setup - -If `conda_env` is provided, activate it before any other step: - -```bash -conda activate -``` - -If already in a different environment, switch explicitly. Do not assume the current shell environment is correct. - -If `conda_env` is not provided, proceed with the current environment as-is. - -## Workflow - -### Step 1: Reinstall kerncap - -Ensure the correct environment is active (if applicable), then uninstall and reinstall to pick up local changes: - -```bash -pip uninstall kerncap -y && pip install kerncap/ -``` - -### Step 2: Profile to identify target kernel - -**If `kernel_name` was provided**: Skip this step and proceed to Step 3. - -**If `kernel_name` was not provided**: Run profiling to discover the top bottleneck kernel: - -```bash -kerncap profile -- -``` - -Select the kernel with the highest total execution time from the profile output. Use its name as `kernel_name` for all subsequent steps. Tell the user which kernel was selected and why. - -**Important**: Use a sufficiently long substring from the profile output as `kernel_name` so that `kerncap extract` matches the intended kernel, not a different instantiation. For example, templated kernels like `mul_mat_q` have many instantiations differing only by template parameters; passing just `mul_mat_q` will capture the first dispatch that matches, which may not be the top-ranked one. Prefer including template parameters in the substring (e.g. `mul_mat_q<(ggml_type)39` instead of `mul_mat_q`). - -### Step 3: Extract the kernel - -```bash -kerncap extract --help -``` - -Use the help output to construct the appropriate `kerncap extract` command for the application. Key flags to determine: - -- `--cmd` — the application command (`app_cmd`) -- `--source-dir` — where the kernel source lives (ask the user if unclear) -- `--output` — `/tmp/kerncap-test/` -- `--language` — `hip` or `triton` depending on the workload -- Any additional flags (`-D` defines, `--dispatch`, etc.) - -**If extraction fails or produces errors**: Stop here and report the full error output. This indicates the local kerncap changes have a bug that needs fixing. - -**If extraction succeeds**: Inspect the output directory for expected files (metadata.json, argument dumps, source files). If the output looks reasonable, proceed to compile and run. - -### Step 4: Compile and run the reproducer - -Navigate to the output directory and build/run the reproducer: - -```bash -cd /tmp/kerncap-test/ -make run -``` - -**If `make run` fails**: Stop here and report the full compiler or runtime error output. This is the primary signal that kerncap generated an incorrect reproducer. - -**If `make run` succeeds**: Proceed to validation. - -### Step 5: Validate the reproducer - -**5a. Smoke test** — confirm baseline replay works: - -```bash -kerncap validate /tmp/kerncap-test/ -``` - -This is a smoke test only (VA-faithful captures). It confirms the replay runs without crashing but does not check numerical correctness. - -**5b. Recompile** — build a baseline HSACO from the unmodified kernel source: - -```bash -cd /tmp/kerncap-test/ -make recompile -``` - -This confirms the VFS-overlay recompile pipeline works. It produces `optimized.hsaco` from the unmodified `kernel_variant.cpp`. - -**If `make recompile` fails**: Stop here and report the error. This indicates an issue with the source finder or VFS overlay generation. - -**5c. Correctness validation** — compare recompiled HSACO against captured baseline: - -```bash -kerncap validate /tmp/kerncap-test/ --hsaco /tmp/kerncap-test//optimized.hsaco -``` - -This runs replay twice (captured HSACO vs recompiled HSACO) and compares outputs byte-for-byte. Since the kernel source is unmodified, they should match exactly. A failure here indicates a recompilation fidelity issue. - -### Step 6: Report results - -Summarize: -- Whether reinstall succeeded -- Whether profiling identified a kernel (if applicable, and which one) -- Whether extraction completed (and any warnings) -- Whether `make run` compiled and executed successfully -- Whether smoke test passed (Step 5a) -- Whether recompile succeeded (Step 5b) -- Whether correctness validation passed (Step 5c) -- Any errors or warnings encountered at each step diff --git a/.github/agents/skills/linex/SKILL.md b/.github/agents/skills/linex/SKILL.md deleted file mode 100644 index dca5b7d6d..000000000 --- a/.github/agents/skills/linex/SKILL.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: linex-profiling -description: Profile GPU kernels at source-line granularity with cycle-level timing and stall analysis. Use when identifying performance hotspots at the source code level or analyzing instruction-level metrics mapped to source lines. ---- - -# Linex: Source-Level GPU Performance Profiling - -Map GPU performance metrics to your source code lines. Get cycle-level timing, stall analysis, and instruction-level metrics for each line of source code. - -## When to Use - -- User asks to profile a GPU application at source-line granularity -- Need to identify which specific lines of code are performance bottlenecks -- Analyzing stall patterns and execution bottlenecks at the source level -- Understanding cycle-level timing for each line of code -- Instruction-level analysis mapped to source lines - -## Instructions - -1. **Ensure the target runs on AMD ROCm 7.0+** with `rocprofv3` available. -2. **Kernels must be compiled with `-g`** (debug symbols) for source mapping. -3. **Choose execution path:** - - If a Linex MCP server is available, use its MCP tools: - - `profile_application` to run and profile a target application with the options below. - - `analyze_instruction_hotspots` to perform instruction-level hotspot analysis on collected profiles. - - Otherwise use the Python API from the environment where Linex is installed. - -### Python API - -```python -from linex import Linex - -profiler = Linex( - target_cu=0, # Target compute unit - shader_engine_mask="0xFFFFFFFF", # All shader engines - activity=10, # Activity counter polling -) - -profiler.profile("./my_app", kernel_filter="my_kernel") - -# Show hotspots (sorted by total_cycles) -for line in profiler.source_lines[:5]: - print(f"{line.file}:{line.line_number}") - print(f" {line.total_cycles:,} cycles ({line.stall_percent:.1f}% stalled)") - print(f" Executed {line.execution_count} times") - -# Find memory-bound lines -memory_bound = [ - l for l in profiler.source_lines - if l.stall_percent > 50 -] - -# Instruction-level analysis -for line in profiler.source_lines[:1]: - for inst in line.instructions: - print(f"{inst.isa}: {inst.latency_cycles} cycles") -``` - -### SourceLine Properties - -- `file` - Source file path -- `line_number` - Line number -- `total_cycles` - Sum of all instruction cycles -- `stall_cycles` - Cycles spent waiting -- `idle_cycles` - Cycles slot was idle -- `execution_count` - Total executions -- `instructions` - List of ISA instructions -- `stall_percent` - Convenience: stall_cycles / total_cycles * 100 - -### InstructionData Properties - -- `isa` - ISA instruction text -- `latency_cycles` - Total cycles for this instruction -- `stall_cycles` - Cycles spent waiting -- `idle_cycles` - Cycles slot was idle -- `execution_count` - How many times it ran -- `instruction_address` - Virtual address in GPU memory -- `file` - Parsed from source_location -- `line` - Parsed from source_location -- `stall_percent` - Convenience: stall_cycles / latency_cycles * 100 - -## Workflow - -1. Ensure the target binary is built with `-g` (debug symbols) for source mapping. -2. Create a `Linex()` profiler; optionally set `target_cu`, `shader_engine_mask`, or `activity`. -3. Call `profiler.profile(command, kernel_filter=...)` to run profiling. -4. Access `profiler.source_lines` (sorted by total_cycles) to find hotspots. -5. Use `line.stall_percent` to identify memory-bound or dependency-bound lines. -6. Drill down into `line.instructions` for instruction-level analysis. -7. Use relative paths for the target binary so the skill is portable. - -## Notes - -- Requires ROCm 7.0+ with `rocprofv3` support. -- Source mapping requires kernels compiled with `-g` (debug symbols). -- `source_lines` are automatically sorted by `total_cycles` (descending). -- Use `kernel_filter` to profile specific kernels by name (regex pattern). -- For Triton or other frameworks, ensure debug symbols are available in the compiled output. diff --git a/.github/agents/skills/metrix/SKILL.md b/.github/agents/skills/metrix/SKILL.md deleted file mode 100644 index 969ef6eef..000000000 --- a/.github/agents/skills/metrix/SKILL.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -name: metrix-profiling -description: Profile GPU kernels when performance analysis or optimization is required. Use for AMD ROCm GPU metrics, bandwidth, cache hit rates, coalescing, or kernel timing. ---- - -# Metrix: GPU Profiling - -Profile AMD GPU kernels and get human-readable metrics (bandwidth, cache, coalescing, FLOPS). Architecture is auto-detected. - -## When to Use - -- User asks to profile a GPU application or kernel -- Performance analysis, optimization, or bottleneck investigation -- Need HBM/L2/L1 bandwidth, hit rates, or compute metrics -- Need timing-only runs (fast, no hardware counters) - -## Instructions - -1. **Ensure the target runs on AMD ROCm** (e.g. `hipcc`-built binary or Python script that launches HIP/ROCm kernels). -2. **Choose execution path:** - - If a Metrix MCP server is available, use its profile tool with the same options below. - - Otherwise run the CLI or Python API from the environment where Metrix is installed. - -### CLI - -From the project or install prefix: - -```bash -# Profile with all metrics (auto-detected arch) -metrix ./my_app - -# Time only (fast, no counters) -metrix --time-only -n 10 ./my_app - -# Filter kernels by name -metrix --kernel matmul ./my_app - -# Specific metrics -metrix --metrics memory.l2_hit_rate,memory.coalescing_efficiency,compute.total_flops ./my_app - -# Save to JSON/CSV -metrix -o results.json ./my_app -``` - -Options: `--profile`/`-p` (run `metrix list profiles` for names: `quick`, `memory`, `memory_bandwidth`, `memory_cache`, `compute`), `--metrics`/`-m`, `--time-only`, `--kernel`/`-k` (regular expression), `--num-replays`/`-n`, `--output`/`-o`, `--top`, `--aggregate`, `--timeout`, `--no-counters`, `--log`/`-l`, `--quiet`/`-q`. Discovery: `metrix list `, `metrix info `. Note: `metrix list counters` and `metrix info counter ` are not implemented yet (CLI reports “not yet implemented”). - -### Python API - -```python -from metrix import Metrix - -profiler = Metrix() -results = profiler.profile("./my_app", num_replays=5) - -for kernel in results.kernels: - print(kernel.name, kernel.duration_us.avg) - for metric, stats in kernel.metrics.items(): - print(f" {metric}: {stats.avg}") -``` - -Use `metrics=[...]` for a subset; omit for all metrics. Use `cwd` when the binary expects a specific working directory. - -## Workflow - -1. Identify the executable or script to profile (e.g. `./app` or `python run_kernels.py`). -2. If only timing is needed, use `--time-only` for speed. -3. If full metrics are needed, run `metrix ./app` (or MCP equivalent); optionally restrict with `--kernel` or `--metrics`. -4. Interpret results: low L2 hit rate, low coalescing, or low HBM utilization suggest optimization targets. -5. For automation or tooling, use `-o results.json` and parse the JSON output. - -## Key Metrics (reference) - -- **Memory:** `memory.hbm_bandwidth_utilization`, `memory.l2_hit_rate`, `memory.l1_hit_rate`, `memory.coalescing_efficiency`, `memory.global_load_efficiency`, `memory.lds_bank_conflicts`, `memory.atomic_latency` -- **Compute:** `compute.total_flops`, `compute.hbm_gflops`, `compute.hbm_arithmetic_intensity`, `compute.l2_arithmetic_intensity`, `compute.l1_arithmetic_intensity` - -Use relative paths for the target binary and output files so the skill is portable across environments. diff --git a/.github/agents/skills/nexus/SKILL.md b/.github/agents/skills/nexus/SKILL.md deleted file mode 100644 index ad714bc4d..000000000 --- a/.github/agents/skills/nexus/SKILL.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: nexus-trace -description: Extract GPU kernel assembly and HIP source from HSA packet traces. Use when analyzing what code ran on the GPU, debugging kernel dispatch, or inspecting assembly and source mapping. ---- - -# Nexus: HSA Packet Source Code Extractor - -Intercepts HSA packets from a running process and extracts, per kernel, assembly and HIP source into a structured trace (e.g. JSON). Use for kernel-level inspection and assembly/source correlation. - -## When to Use - -- User needs to see which kernels ran and their assembly or HIP source -- Debugging or analyzing GPU dispatch and code generation -- Inspecting assembly-to-source mapping for a HIP (or ROCm) application - -## Instructions - -1. **Ensure the target runs on AMD ROCm** and uses HSA (e.g. HIP application or ROCm runtime). -2. **Choose execution path:** - - If a Nexus MCP server is available, use its tools: `list_kernels` to enumerate kernels in a trace, and `extract_kernel_code` to get assembly and HIP/source mapping (signature, files, lines). See `nexus/nexus/mcp/server.py` for tool parameters and schemas. - - Otherwise use the Python API from the environment where Nexus is installed. - -### Python API (recommended when no MCP) - -```python -from nexus import Nexus - -nexus = Nexus(log_level=1) -trace = nexus.run(["python", "my_gpu_script.py"]) - -# Or run a binary: -# trace = nexus.run(["./my_hip_app"]) - -for kernel in trace: - print(kernel.name, len(kernel.assembly), "instructions") - for i, asm_line in enumerate(kernel.assembly, 1): - print(f" {i}. {asm_line}") - for line_no, hip_line in zip(kernel.lines or range(1, len(kernel.hip)+1), kernel.hip): - print(f" {line_no}: {hip_line}") - -# Access by kernel name -k = trace["vector_add(float const*, float const*, float*, int)"] -print(k.assembly, k.hip, k.signature, k.files, k.lines) - -# Save/load trace -trace.save("trace.json") -loaded = Nexus.load("trace.json") -``` - -Set `log_level` (0–4) to control verbosity. Use relative paths for the run command and output file so the skill is portable. - -### Environment-based usage (no Python API) - -When the process cannot be launched via `nexus.run()`: - -1. Set `HSA_TOOLS_LIB` to the Nexus shared library path (e.g. `build/lib/libnexus.so` or the installed path). -2. Set `NEXUS_OUTPUT_FILE` to the output JSON path. -3. Set `NEXUS_LOG_LEVEL` (0–4) if needed. -4. Run the application as usual; it will be traced and the output file will contain the kernel data. - -Optional: `NEXUS_EXTRA_SEARCH_PREFIX` (colon-separated) for HIP source search; `TRITON_DISABLE_LINE_INFO=0` for Triton kernel line info. - -## Workflow - -1. Identify the command that runs the GPU workload (e.g. `python script.py` or `./app`). -2. If using the Python API: create `Nexus(log_level=...)`, call `nexus.run([...])`, then iterate `trace` and optionally `trace.save(...)`. -3. If using the env method: set `HSA_TOOLS_LIB` and `NEXUS_OUTPUT_FILE`, then run the app; open the JSON and parse the `kernels` structure. -4. Use kernel `signature`, `assembly`, `hip`, `files`, and `lines` to analyze what ran and map assembly back to source. -5. Use relative paths for commands and output files. - -## Notes - -- Nexus is intended for research/analysis; ensure the target environment has the Nexus library and compatible ROCm/HSA stack. -- For Triton kernels, enable line info via `TRITON_DISABLE_LINE_INFO=0` when using the Python API. diff --git a/.gitignore b/.gitignore index 7c603be98..0c1773ddf 100644 --- a/.gitignore +++ b/.gitignore @@ -59,4 +59,6 @@ gpucore.* logs/ *.cap hsakmt_counters.csv -core \ No newline at end of file +core +.intellikit/ +.github/agents/ \ No newline at end of file diff --git a/.intellikit b/.intellikit deleted file mode 160000 index bcbfa0252..000000000 --- a/.intellikit +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bcbfa0252df9d55f3aab68c95dd3ce45ccbe5b46 diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py index b9d40118d..cc2cede2e 100644 --- a/benchmark/ops/all_gather_matmul/benchmark.py +++ b/benchmark/ops/all_gather_matmul/benchmark.py @@ -66,7 +66,7 @@ def parse_args(): "--variant", type=str, default="pull", - choices=["pull", "chunked", "push", "pipelined_pull"], + choices=["pull"], help="All-gather matmul variant", ) parser.add_argument( @@ -94,7 +94,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): init_method=init_url, world_size=world_size, rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}"), + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) shmem = iris.iris(args["heap_size"]) diff --git a/benchmark/ops/all_gather_matmul/benchmark_torchrun.py b/benchmark/ops/all_gather_matmul/benchmark_torchrun.py index f4526410c..253cabe23 100755 --- a/benchmark/ops/all_gather_matmul/benchmark_torchrun.py +++ b/benchmark/ops/all_gather_matmul/benchmark_torchrun.py @@ -75,7 +75,7 @@ def parse_args(): "--variant", type=str, default="pull", - choices=["pull", "chunked", "push", "pipelined_pull"], + choices=["pull"], help="All-gather matmul variant", ) parser.add_argument( diff --git a/benchmark/ops/matmul_all_gather/benchmark.py b/benchmark/ops/matmul_all_gather/benchmark.py index 22c914e8d..8aaac23f8 100644 --- a/benchmark/ops/matmul_all_gather/benchmark.py +++ b/benchmark/ops/matmul_all_gather/benchmark.py @@ -76,7 +76,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): init_method=init_url, world_size=world_size, rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}"), + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) shmem = iris.iris(args["heap_size"]) diff --git a/benchmark/ops/matmul_all_reduce/benchmark.py b/benchmark/ops/matmul_all_reduce/benchmark.py index fd923e051..8a77f4b41 100644 --- a/benchmark/ops/matmul_all_reduce/benchmark.py +++ b/benchmark/ops/matmul_all_reduce/benchmark.py @@ -83,7 +83,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): init_method=init_url, world_size=world_size, rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}"), + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) shmem = iris.iris(args["heap_size"]) diff --git a/benchmark/ops/matmul_reduce_scatter/benchmark.py b/benchmark/ops/matmul_reduce_scatter/benchmark.py index 301444f25..1b4936167 100644 --- a/benchmark/ops/matmul_reduce_scatter/benchmark.py +++ b/benchmark/ops/matmul_reduce_scatter/benchmark.py @@ -76,7 +76,7 @@ def _worker(local_rank: int, world_size: int, init_url: str, args: dict): init_method=init_url, world_size=world_size, rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}"), + device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, ) shmem = iris.iris(args["heap_size"]) diff --git a/iris/ops/__init__.py b/iris/ops/__init__.py index c96fa32e5..a49fffd84 100644 --- a/iris/ops/__init__.py +++ b/iris/ops/__init__.py @@ -141,7 +141,7 @@ def matmul_all_gather(self, output_tensor, A, B, bias=None, async_op=False, conf """ return matmul_all_gather(self._shmem, output_tensor, A, B, bias, async_op, config, workspace) - def matmul_reduce_scatter(self, output_tensor, A, B, async_op=False, config=None, workspace=None): + def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, config=None, workspace=None): """ Fused matrix multiplication and reduce-scatter. @@ -151,6 +151,7 @@ def matmul_reduce_scatter(self, output_tensor, A, B, async_op=False, config=None output_tensor: Output tensor (M, N) - will contain reduced tiles for this rank A: Input matrix A (M, K) B: Input matrix B (K, N) + bias: Optional bias (currently unused; reserved for future support) async_op: If False, performs barrier at end config: Optional FusedConfig for tuning workspace: Optional pre-allocated workspace diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index 4f272825f..e92dc255b 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -283,6 +283,8 @@ def all_gather_matmul( even_k, config.allow_tf32, ) + else: + raise ValueError(f"Unsupported all_gather_matmul_variant '{variant}'. Only 'pull' is currently supported.") if not async_op: shmem.barrier() diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 2db1b6ed7..2c8b80224 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -317,7 +317,9 @@ def _extract_wg_trace(shmem, grid_size, **metadata): event_ids = bufs["event_id"][:n].cpu().numpy() pids = bufs["pid"][:n].cpu().numpy() timestamps = bufs["timestamp"][:n].cpu().numpy().astype(np.int64) - end_ts = bufs["duration_cycles"][:n].cpu().numpy().astype(np.int64) + # Note: despite the field name, "duration_cycles" stores the absolute end timestamp + # (set by record_event_end). The actual duration is end_ts - start_ts. + end_timestamps = bufs["duration_cycles"][:n].cpu().numpy().astype(np.int64) xcc_ids = bufs["xcc_id"][:n].cpu().numpy().astype(np.int32) pid_ns = bufs["pid_n"][:n].cpu().numpy() @@ -333,7 +335,7 @@ def _extract_wg_trace(shmem, grid_size, **metadata): continue if eid == _WG_FETCH or eid == _WG_GEMM: starts[wg] = int(timestamps[i]) - ends[wg] = int(end_ts[i]) + ends[wg] = int(end_timestamps[i]) xcds[wg] = int(xcc_ids[i]) elif eid == _WG_GEMM_WAIT: waits[wg] = int(pid_ns[i]) diff --git a/iris/ops/config.py b/iris/ops/config.py index c5d15349b..58bbe2d39 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -22,9 +22,9 @@ class FusedConfig: block_size_m: Block size for M dimension (rows). Default: 128. block_size_n: Block size for N dimension (columns). Default: 256. block_size_k: Block size for K dimension (reduction). Default: 64. - group_size_m: Group size for M dimension tiling. Default: 4. + group_size_m: Group size for M dimension tiling. Default: 1. num_sms: Number of SMs to use. If None, auto-detects from device. Default: None. - num_xcds: Number of XCDs (chiplets). Default: 1. + num_xcds: Number of XCDs (chiplets). Default: 8. chunk_size: Chunk size for chiplet transform. Default: 1. cache_modifier_a: Cache modifier for matrix A (".ca" for cached). Default: ".ca". cache_modifier_b: Cache modifier for matrix B (".ca" for cached). Default: ".ca". @@ -64,7 +64,7 @@ class FusedConfig: # CCL-specific parameters all_reduce_variant: str = "two_shot" # atomic, ring, one_shot, two_shot, spinlock all_reduce_num_rings: int = 1 - all_gather_matmul_variant: str = "pull" # pull, chunked + all_gather_matmul_variant: str = "pull" # pull def validate(self, world_size: Optional[int] = None): """ diff --git a/tests/ops/test_all_gather_matmul.py b/tests/ops/test_all_gather_matmul.py index db4b21250..9ceaa7581 100644 --- a/tests/ops/test_all_gather_matmul.py +++ b/tests/ops/test_all_gather_matmul.py @@ -83,9 +83,6 @@ def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N, variant): # Run fused all_gather + matmul using shmem.ops API from iris.ops.config import FusedConfig - if rank == 0: - print(f"\n[Test] Testing variant={variant}, M={M}, K_local={K_local}, N={N}, dtype={dtype}") - # Use appropriate block sizes based on problem size # For small problems, use smaller blocks if M <= 256 or K_local <= 64 or N <= 128: From 98d25bfb7cc2cb96aee1e7287304fc4d4b14443e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:31:46 +0000 Subject: [PATCH 34/60] Clarify bias handling in matmul_reduce_scatter: raise NotImplementedError if non-None Agent-Logs-Url: https://github.com/ROCm/iris/sessions/1e2f61b2-a214-44c6-9091-c3482f911eb9 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/ops/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/iris/ops/__init__.py b/iris/ops/__init__.py index a49fffd84..ba6a698de 100644 --- a/iris/ops/__init__.py +++ b/iris/ops/__init__.py @@ -151,7 +151,8 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, output_tensor: Output tensor (M, N) - will contain reduced tiles for this rank A: Input matrix A (M, K) B: Input matrix B (K, N) - bias: Optional bias (currently unused; reserved for future support) + bias: Optional bias (currently unused; reserved for future support). + Passing a non-None value will raise a NotImplementedError. async_op: If False, performs barrier at end config: Optional FusedConfig for tuning workspace: Optional pre-allocated workspace @@ -163,6 +164,8 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, >>> output = shmem.zeros((M, N), dtype=torch.float16) >>> shmem.ops.matmul_reduce_scatter(output, A, B) """ + if bias is not None: + raise NotImplementedError("bias is not yet supported for matmul_reduce_scatter") return matmul_reduce_scatter(self._shmem, output_tensor, A, B, async_op, config, workspace) From f4b4e755e39692f8f728c8a9b4b3b0f49feb5b2e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 21:34:27 +0000 Subject: [PATCH 35/60] Sync with main, remove unneeded scripts, minimize PR footprint - Merged origin/main and resolved docstring conflicts in iris/iris.py - Reverted iris/ops/matmul_all_reduce.py, matmul_all_gather.py, workspace.py to main - Reverted iris/ops/config.py to main defaults; kept only all_gather_matmul_variant addition - Reverted pyproject.toml tritonBLAS bump, .github/scripts/run_tests.sh, apptainer/iris.def - Removed old-style benchmark directories: matmul_all_reduce/, matmul_all_gather/, matmul_reduce_scatter/ - Removed superseded all_gather_matmul/benchmark.py and benchmark_torchrun.py (main has bench_all_gather_matmul.py) - Removed profile_att.sh (referenced now-deleted benchmark_torchrun.py) - PR now only changes 13 files vs main (was 96) Agent-Logs-Url: https://github.com/ROCm/iris/sessions/781d8069-40c3-4c5c-8dc2-d79919514a68 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .github/scripts/run_tests.sh | 6 +- apptainer/iris.def | 2 +- benchmark/ops/all_gather_matmul/benchmark.py | 422 --------------- .../all_gather_matmul/benchmark_torchrun.py | 487 ------------------ .../ops/all_gather_matmul/profile_att.sh | 344 ------------- benchmark/ops/matmul_all_gather/benchmark.py | 367 ------------- benchmark/ops/matmul_all_reduce/benchmark.py | 378 -------------- .../ops/matmul_reduce_scatter/benchmark.py | 421 --------------- iris/iris.py | 8 + iris/ops/__init__.py | 18 +- iris/ops/config.py | 12 +- iris/ops/matmul_all_gather.py | 11 + iris/ops/matmul_all_reduce.py | 5 + iris/ops/workspace.py | 10 - pyproject.toml | 2 +- tests/ops/test_matmul_all_reduce.py | 2 +- 16 files changed, 46 insertions(+), 2449 deletions(-) delete mode 100644 benchmark/ops/all_gather_matmul/benchmark.py delete mode 100755 benchmark/ops/all_gather_matmul/benchmark_torchrun.py delete mode 100755 benchmark/ops/all_gather_matmul/profile_att.sh delete mode 100644 benchmark/ops/matmul_all_gather/benchmark.py delete mode 100644 benchmark/ops/matmul_all_reduce/benchmark.py delete mode 100644 benchmark/ops/matmul_reduce_scatter/benchmark.py diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh index 1e17e85a1..c126df7ca 100755 --- a/.github/scripts/run_tests.sh +++ b/.github/scripts/run_tests.sh @@ -74,10 +74,10 @@ EXIT_CODE=0 # shellcheck disable=SC2086 "$SCRIPT_DIR/container_exec.sh" $GPU_ARG " set -e - + echo \"Installing iris using method: $INSTALL_METHOD\" $INSTALL_CMD - + # Run tests in the specified directory for test_file in tests/$TEST_DIR/test_*.py; do if [ -f \"\$test_file\" ]; then @@ -88,4 +88,4 @@ EXIT_CODE=0 " || { EXIT_CODE=$?; } # GPU cleanup is now handled by workflow-level release_gpus.sh step -exit $EXIT_CODE +exit $EXIT_CODE \ No newline at end of file diff --git a/apptainer/iris.def b/apptainer/iris.def index 370669445..7a1f39849 100644 --- a/apptainer/iris.def +++ b/apptainer/iris.def @@ -30,7 +30,7 @@ From: rocm/pytorch:rocm7.1_ubuntu24.04_py3.13_pytorch_release_2.9.1 cd /opt git clone https://github.com/triton-lang/triton.git \$TRITON_PATH cd \$TRITON_PATH - git checkout bcbcabdd0cff6539c7168299075992b2a23ff38e + git checkout bcbcabdd0cff6539c7168299075992b2a23ff38e pip3 install -e . # Make the venv writable by all diff --git a/benchmark/ops/all_gather_matmul/benchmark.py b/benchmark/ops/all_gather_matmul/benchmark.py deleted file mode 100644 index cc2cede2e..000000000 --- a/benchmark/ops/all_gather_matmul/benchmark.py +++ /dev/null @@ -1,422 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for iris.ops all_gather_matmul fused operation. - -This benchmark showcases the fused All-Gather + GEMM operation where each rank -has a sharded A matrix that gets gathered, then multiplied with B. -""" - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import random -import argparse - -from examples.common.utils import JSONWriter - -import iris -from iris.ops.all_gather_matmul import all_gather_matmul_preamble -from iris.ops import FusedConfig - -torch.manual_seed(123) -random.seed(123) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark all_gather_matmul fused operation.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") - parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") - parser.add_argument("-k", type=int, default=131072, help="Common dimension total (K)") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") - parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") - parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Datatype of tensors", - ) - parser.add_argument( - "--output_file", - type=str, - default="all_gather_matmul.json", - help="Output file", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (all_gather_into_tensor + matmul) for comparison", - ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") - parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") - parser.add_argument( - "--variant", - type=str, - default="pull", - choices=["pull"], - help="All-gather matmul variant", - ) - parser.add_argument( - "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" - ) - parser.add_argument( - "--b_col_major", - action="store_true", - help="Store B matrix in column-major order (K-contiguous) to reduce LDS transpose overhead", - ) - parser.add_argument( - "--a_col_major", - action="store_true", - help="Store A matrix in column-major order (M-contiguous). Default is row-major (K-contiguous).", - ) - - return vars(parser.parse_args()) - - -def _worker(local_rank: int, world_size: int, init_url: str, args: dict): - """Worker function for PyTorch distributed execution.""" - backend = "nccl" if torch.cuda.is_available() else "gloo" - dist.init_process_group( - backend=backend, - init_method=init_url, - world_size=world_size, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - # Datatype mapping - datatype = torch.float32 - if args["datatype"] == "fp16": - datatype = torch.float16 - elif args["datatype"] == "fp32": - datatype = torch.float32 - elif args["datatype"] == "bf16": - datatype = torch.bfloat16 - else: - print("Unknown datatype.") - exit(1) - - M = args["m"] - N = args["n"] - K = args["k"] - K_local = K // world_size # Sharded K dimension - - # Create config with parameters - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - "all_gather_matmul_variant": args["variant"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - - config = FusedConfig(**config_kwargs) - - json_writer = JSONWriter(args["output_file"]) - json_writer.add_field("world_size", world_size) - json_writer.add_field("operation", "all_gather_matmul") - json_writer.add_field("k_local", K_local) - json_writer.add_field("k_total", K) - - for key, value in args.items(): - json_writer.add_field(key, value) - - # Export actual config values to JSON (including defaults) - json_writer.add_field("block_size_m", config.block_size_m) - json_writer.add_field("block_size_n", config.block_size_n) - json_writer.add_field("block_size_k", config.block_size_k) - json_writer.add_field("group_size_m", config.group_size_m) - json_writer.add_field("num_sms", config.num_sms) - json_writer.add_field("num_xcds", config.num_xcds) - - # Create input and output tensors - # A_sharded is M x K_local, B is K x N, output is M x N - C = shmem.zeros((M, N), dtype=datatype) - expected_tensor = None - - # Create A_sharded matrix with optional column-major layout - # When a_col_major=True, M becomes the contiguous dimension - # Default (row-major): K is contiguous (stride_ak=1, stride_am=K_local) - if args["a_col_major"]: - # Allocate storage as (K_local, M) row-major, then transpose to get (M, K_local) with M-contiguous - # This means stride_am=1 and stride_ak=M - A_storage = shmem.zeros((K_local, M), dtype=datatype) - A_sharded = A_storage.T # View as (M, K_local) with M-contiguous strides - shmem.info(f"Using column-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (M-contiguous)") - else: - # Standard row-major (M, K_local) - K is contiguous - A_sharded = shmem.zeros((M, K_local), dtype=datatype) - shmem.info(f"Using row-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (K-contiguous)") - - json_writer.add_field("a_col_major", args["a_col_major"]) - json_writer.add_field("a_stride_m", A_sharded.stride()[0]) - json_writer.add_field("a_stride_k", A_sharded.stride()[1]) - - # Create B matrix with optional column-major layout for K-contiguous access - # When b_col_major=True, we store B such that K is the contiguous dimension - # This reduces LDS transpose overhead when loading B tiles along the K dimension - if args["b_col_major"]: - # Allocate storage as (N, K) row-major, then transpose to get (K, N) with K-contiguous - # This means stride_bk=1 and stride_bn=K - B_storage = shmem.zeros((N, K), dtype=datatype) - B = B_storage.T # View as (K, N) with K-contiguous strides - shmem.info(f"Using column-major B: shape={B.shape}, strides={B.stride()} (K-contiguous)") - else: - # Standard row-major (K, N) - N is contiguous - B = shmem.zeros((K, N), dtype=datatype) - shmem.info(f"Using row-major B: shape={B.shape}, strides={B.stride()} (N-contiguous)") - - json_writer.add_field("b_col_major", args["b_col_major"]) - json_writer.add_field("b_stride_k", B.stride()[0]) - json_writer.add_field("b_stride_n", B.stride()[1]) - - # Fill inputs with deterministic values - # Each rank has different A_sharded, same B - torch.manual_seed(123 + rank) - A_sharded_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") - A_sharded.copy_(A_sharded_data) - - torch.manual_seed(456) # Same B for all ranks - # Generate B data in standard (K, N) layout for consistency - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - # Copy to B (handles both row-major and column-major storage) - B.copy_(B_data) - - # For validation: compute expected result - if args["validate"]: - # Gather all A_sharded matrices and compute expected result - A_sharded_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_sharded_list, A_sharded_data) - - # Concatenate along K dimension: A_gathered = [A_0 | A_1 | ... | A_n] - A_gathered = torch.cat(A_sharded_list, dim=1) # (M, K) - - # Expected: A_gathered @ B - expected_tensor = shmem.zeros((M, N), dtype=datatype) - expected_result = torch.matmul(A_gathered, B_data) - expected_tensor.copy_(expected_result) - - comm_stream = torch.cuda.Stream() - - kernel_timing = { - "all_gather_matmul": { - "start_event": torch.cuda.Event(enable_timing=True), - "end_event": torch.cuda.Event(enable_timing=True), - "ms": 0, - "experiments": 0, - }, - } - - # Pre-allocate workspace once (important for push variant which needs large buffers) - workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) - - def run_experiment(): - nonlocal kernel_timing - - shmem.barrier() - - torch.cuda.nvtx.range_push("All-Gather-Matmul") - with torch.cuda.stream(comm_stream): - kernel_timing["all_gather_matmul"]["start_event"].record() - shmem.ops.all_gather_matmul( - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - ) - kernel_timing["all_gather_matmul"]["end_event"].record() - kernel_timing["all_gather_matmul"]["experiments"] += 1 - torch.cuda.nvtx.range_pop() - - # Synchronize before querying event timing - shmem.barrier() - - # Update timing - ms = kernel_timing["all_gather_matmul"]["start_event"].elapsed_time( - kernel_timing["all_gather_matmul"]["end_event"] - ) - kernel_timing["all_gather_matmul"]["ms"] += ms - - # Synchronize across all GPUs - shmem.barrier() - - if args["validate"]: - shmem.info("Validating...") - - # Reset output before validation - C.zero_() - shmem.barrier() - - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 1e-1 if datatype == torch.float16 else 1e-3 - success = torch.allclose(C, expected_tensor, atol=atol) - if not success: - max_diff = torch.abs(C - expected_tensor).max().item() - shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") - - if success: - shmem.info("All-gather-matmul validation passed!") - else: - shmem.error("All-gather-matmul validation failed!") - - json_writer.add_field("success", success) - - # Wait for all to finish validation - shmem.barrier() - - if args["benchmark"]: - # Warmup for benchmarking - for k in ["all_gather_matmul"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) - - for k in ["all_gather_matmul"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - # Reset output before benchmarking - C.zero_() - shmem.barrier() - - shmem.info("Benchmarking...") - - # Calculate TFLOPS: 2*M*N*K flops - total_flops = 2 * M * N * K - total_tflops_unit = total_flops * 1e-12 - - triton_ms = iris.do_bench(run_experiment, shmem.barrier) - tflops = total_tflops_unit / ( - (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 - ) - - # Calculate bandwidth for all-gather part - # All-gather moves (world_size - 1) * M * K_local * element_size bytes - element_size = torch.tensor([], dtype=datatype).element_size() - input_bytes = M * K_local * element_size - total_bytes = input_bytes * (world_size - 1) - total_bytes_gb = total_bytes / (1024**3) - - bandwidth_gbps = total_bytes_gb / ( - (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 - ) - - shmem.info( - f"All-gather-matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " - f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" - ) - - json_writer.add_field("tflops", tflops) - json_writer.add_field("bandwidth_gbps", bandwidth_gbps) - json_writer.add_field("total_ms", triton_ms) - json_writer.add_field("total_flops", total_flops) - json_writer.add_field("total_bytes", total_bytes) - json_writer.add_field("total_bytes_gb", total_bytes_gb) - json_writer.add_field( - "all_gather_matmul_ms", - kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"], - ) - json_writer.add_field("all_gather_matmul_experiments", kernel_timing["all_gather_matmul"]["experiments"]) - - # Wait for all to finish benchmarking - shmem.barrier() - - # Benchmark PyTorch (all_gather_into_tensor + matmul) for comparison - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") - - # Create PyTorch tensors (not on Iris heap) - pytorch_A_sharded = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") - pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_A_gathered = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") - pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") - - # Warmup - for _ in range(10): - dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) - pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) - torch.cuda.synchronize() - dist.barrier() - - # Benchmark - dist.barrier() - - def run_pytorch_experiment(): - dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) - pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) - - pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) - - # Calculate TFLOPS and bandwidth - pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) - pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) - - shmem.info( - f"PyTorch all_gather_into_tensor+matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " - f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" - ) - - if args["benchmark"]: - # Calculate performance ratio - iris_tflops = tflops - speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 - shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") - - json_writer.add_field("pytorch_tflops", pytorch_tflops) - json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) - json_writer.add_field("pytorch_ms", pytorch_ms) - json_writer.add_field("iris_speedup", speedup) - - # Wait for all to finish PyTorch benchmarking - shmem.barrier() - - if rank == 0: - json_writer.flush() - json_writer.display() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - args = parse_args() - num_ranks = args["num_ranks"] - init_url = args["init_url"] - - mp.spawn( - fn=_worker, - args=(num_ranks, init_url, args), - nprocs=num_ranks, - join=True, - ) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/all_gather_matmul/benchmark_torchrun.py b/benchmark/ops/all_gather_matmul/benchmark_torchrun.py deleted file mode 100755 index 253cabe23..000000000 --- a/benchmark/ops/all_gather_matmul/benchmark_torchrun.py +++ /dev/null @@ -1,487 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for iris.ops all_gather_matmul fused operation. - -This benchmark showcases the fused All-Gather + GEMM operation where each rank -has a sharded A matrix that gets gathered, then multiplied with B. - -This version is compatible with torchrun for use with profiling tools like rocprofv3/att. - -Usage with torchrun: - torchrun --nproc_per_node=8 benchmark_torchrun.py -m 16384 -n 2048 -k 131072 --benchmark - -Usage with rocprofv3: - torchrun --nproc_per_node=8 rocprofv3 --att benchmark_torchrun.py -m 16384 -n 2048 -k 131072 --benchmark -""" - -import os -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import random -import argparse - -from examples.common.utils import JSONWriter - -import iris -from iris.ops.all_gather_matmul import all_gather_matmul_preamble -from iris.ops import FusedConfig - -torch.manual_seed(123) -random.seed(123) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark all_gather_matmul fused operation.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") - parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") - parser.add_argument("-k", type=int, default=131072, help="Common dimension total (K)") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") - parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") - parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Datatype of tensors", - ) - parser.add_argument( - "--output_file", - type=str, - default="all_gather_matmul.json", - help="Output file", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (all_gather_into_tensor + matmul) for comparison", - ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") - parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") - parser.add_argument( - "--variant", - type=str, - default="pull", - choices=["pull"], - help="All-gather matmul variant", - ) - parser.add_argument( - "--init_url", type=str, default="tcp://127.0.0.1:29530", help="Initialization URL for distributed setup" - ) - parser.add_argument( - "--single-run", - action="store_true", - help="Run only one iteration (no warmup, 1 repeat) - useful for profiling", - ) - parser.add_argument( - "--b_col_major", - action="store_true", - help="Store B matrix in column-major order (K-contiguous) to reduce LDS transpose overhead", - ) - parser.add_argument( - "--a_col_major", - action="store_true", - help="Store A matrix in column-major order (M-contiguous). Default is row-major (K-contiguous).", - ) - - return vars(parser.parse_args()) - - -def _worker(local_rank: int = None, world_size: int = None, init_url: str = None, args: dict = None): - """Worker function for PyTorch distributed execution.""" - # Support torchrun: read from environment variables if available - if local_rank is None: - local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) - if world_size is None: - world_size = int(os.environ.get("WORLD_SIZE", 1)) - if init_url is None: - # torchrun sets MASTER_ADDR and MASTER_PORT - master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") - master_port = os.environ.get("MASTER_PORT", "29500") - init_url = f"tcp://{master_addr}:{master_port}" - - # Use nccl backend - gloo doesn't support uint64 tensors used by Iris - backend = "nccl" if torch.cuda.is_available() else "gloo" - print(f"Rank {local_rank}: Using backend: {backend}") - - # Use environment-based initialization if torchrun is detected - if "RANK" in os.environ or "LOCAL_RANK" in os.environ: - # For torchrun, use env:// initialization with device_id for nccl - dist.init_process_group( - backend=backend, - init_method="env://", - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - else: - dist.init_process_group( - backend=backend, - init_method=init_url, - world_size=world_size, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - # Datatype mapping - datatype = torch.float32 - if args["datatype"] == "fp16": - datatype = torch.float16 - elif args["datatype"] == "fp32": - datatype = torch.float32 - elif args["datatype"] == "bf16": - datatype = torch.bfloat16 - else: - print("Unknown datatype.") - exit(1) - - M = args["m"] - N = args["n"] - K = args["k"] - K_local = K // world_size # Sharded K dimension - - # Create config with parameters - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - "all_gather_matmul_variant": args["variant"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - - config = FusedConfig(**config_kwargs) - - json_writer = JSONWriter(args["output_file"]) - json_writer.add_field("world_size", world_size) - json_writer.add_field("operation", "all_gather_matmul") - json_writer.add_field("k_local", K_local) - json_writer.add_field("k_total", K) - - for key, value in args.items(): - json_writer.add_field(key, value) - - # Export actual config values to JSON (including defaults) - json_writer.add_field("block_size_m", config.block_size_m) - json_writer.add_field("block_size_n", config.block_size_n) - json_writer.add_field("block_size_k", config.block_size_k) - json_writer.add_field("group_size_m", config.group_size_m) - json_writer.add_field("num_sms", config.num_sms) - json_writer.add_field("num_xcds", config.num_xcds) - - # Create input and output tensors - # A_sharded is M x K_local, B is K x N, output is M x N - C = shmem.zeros((M, N), dtype=datatype) - expected_tensor = None - - # Create A_sharded matrix with optional column-major layout - # When a_col_major=True, M becomes the contiguous dimension - # Default (row-major): K is contiguous (stride_ak=1, stride_am=K_local) - if args["a_col_major"]: - # Allocate storage as (K_local, M) row-major, then transpose to get (M, K_local) with M-contiguous - # This means stride_am=1 and stride_ak=M - A_storage = shmem.zeros((K_local, M), dtype=datatype) - A_sharded = A_storage.T # View as (M, K_local) with M-contiguous strides - shmem.info(f"Using column-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (M-contiguous)") - else: - # Standard row-major (M, K_local) - K is contiguous - A_sharded = shmem.zeros((M, K_local), dtype=datatype) - shmem.info(f"Using row-major A: shape={A_sharded.shape}, strides={A_sharded.stride()} (K-contiguous)") - - json_writer.add_field("a_col_major", args["a_col_major"]) - json_writer.add_field("a_stride_m", A_sharded.stride()[0]) - json_writer.add_field("a_stride_k", A_sharded.stride()[1]) - - # Create B matrix with optional column-major layout for K-contiguous access - # When b_col_major=True, we store B such that K is the contiguous dimension - # This reduces LDS transpose overhead when loading B tiles along the K dimension - if args["b_col_major"]: - # Allocate storage as (N, K) row-major, then transpose to get (K, N) with K-contiguous - # This means stride_bk=1 and stride_bn=K - B_storage = shmem.zeros((N, K), dtype=datatype) - B = B_storage.T # View as (K, N) with K-contiguous strides - shmem.info(f"Using column-major B: shape={B.shape}, strides={B.stride()} (K-contiguous)") - else: - # Standard row-major (K, N) - N is contiguous - B = shmem.zeros((K, N), dtype=datatype) - shmem.info(f"Using row-major B: shape={B.shape}, strides={B.stride()} (N-contiguous)") - - json_writer.add_field("b_col_major", args["b_col_major"]) - json_writer.add_field("b_stride_k", B.stride()[0]) - json_writer.add_field("b_stride_n", B.stride()[1]) - - # Fill inputs with deterministic values - # Each rank has different A_sharded, same B - torch.manual_seed(123 + rank) - A_sharded_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") - A_sharded.copy_(A_sharded_data) - - torch.manual_seed(456) # Same B for all ranks - # Generate B data in standard (K, N) layout for consistency - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - # Copy to B (handles both row-major and column-major storage) - B.copy_(B_data) - - # For validation: compute expected result - if args["validate"]: - # Gather all A_sharded matrices and compute expected result - A_sharded_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_sharded_list, A_sharded_data) - - # Concatenate along K dimension: A_gathered = [A_0 | A_1 | ... | A_n] - A_gathered = torch.cat(A_sharded_list, dim=1) # (M, K) - - # Expected: A_gathered @ B - expected_tensor = shmem.zeros((M, N), dtype=datatype) - expected_result = torch.matmul(A_gathered, B_data) - expected_tensor.copy_(expected_result) - - comm_stream = torch.cuda.Stream() - - kernel_timing = { - "all_gather_matmul": { - "start_event": torch.cuda.Event(enable_timing=True), - "end_event": torch.cuda.Event(enable_timing=True), - "ms": 0, - "experiments": 0, - }, - } - - # Pre-allocate workspace once (important for push variant which needs large buffers) - workspace = all_gather_matmul_preamble(shmem, A_sharded, B, config) - - def run_experiment(): - nonlocal kernel_timing - - shmem.barrier() - - torch.cuda.nvtx.range_push("All-Gather-Matmul") - with torch.cuda.stream(comm_stream): - kernel_timing["all_gather_matmul"]["start_event"].record() - shmem.ops.all_gather_matmul( - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - ) - kernel_timing["all_gather_matmul"]["end_event"].record() - kernel_timing["all_gather_matmul"]["experiments"] += 1 - torch.cuda.nvtx.range_pop() - - # Synchronize before querying event timing - shmem.barrier() - - # Update timing - ms = kernel_timing["all_gather_matmul"]["start_event"].elapsed_time( - kernel_timing["all_gather_matmul"]["end_event"] - ) - kernel_timing["all_gather_matmul"]["ms"] += ms - - # Synchronize across all GPUs - shmem.barrier() - - if args["validate"]: - shmem.info("Validating...") - - # Reset output before validation - C.zero_() - shmem.barrier() - - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 1e-1 if datatype == torch.float16 else 1e-3 - success = torch.allclose(C, expected_tensor, atol=atol) - if not success: - max_diff = torch.abs(C - expected_tensor).max().item() - shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") - - if success: - shmem.info("All-gather-matmul validation passed!") - else: - shmem.error("All-gather-matmul validation failed!") - - json_writer.add_field("success", success) - - # Wait for all to finish validation - shmem.barrier() - - if args["benchmark"]: - # Determine warmup and repeat counts - if args.get("single_run", False): - n_warmup = 0 - n_repeat = 1 - shmem.info("Single-run mode: no warmup, 1 repeat") - else: - n_warmup = 25 - n_repeat = 100 # default from iris.do_bench - - # Warmup for benchmarking (skip if single-run) - if not args.get("single_run", False): - for k in ["all_gather_matmul"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=1) - - for k in ["all_gather_matmul"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - # Reset output before benchmarking - C.zero_() - shmem.barrier() - - shmem.info("Benchmarking...") - - # Calculate TFLOPS: 2*M*N*K flops - total_flops = 2 * M * N * K - total_tflops_unit = total_flops * 1e-12 - - triton_ms = iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=n_repeat) - tflops = total_tflops_unit / ( - (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 - ) - - # Calculate bandwidth for all-gather part - # All-gather moves (world_size - 1) * M * K_local * element_size bytes - element_size = torch.tensor([], dtype=datatype).element_size() - input_bytes = M * K_local * element_size - total_bytes = input_bytes * (world_size - 1) - total_bytes_gb = total_bytes / (1024**3) - - bandwidth_gbps = total_bytes_gb / ( - (kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"]) * 1e-3 - ) - - shmem.info( - f"All-gather-matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " - f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" - ) - - json_writer.add_field("tflops", tflops) - json_writer.add_field("bandwidth_gbps", bandwidth_gbps) - json_writer.add_field("total_ms", triton_ms) - json_writer.add_field("total_flops", total_flops) - json_writer.add_field("total_bytes", total_bytes) - json_writer.add_field("total_bytes_gb", total_bytes_gb) - json_writer.add_field( - "all_gather_matmul_ms", - kernel_timing["all_gather_matmul"]["ms"] / kernel_timing["all_gather_matmul"]["experiments"], - ) - json_writer.add_field("all_gather_matmul_experiments", kernel_timing["all_gather_matmul"]["experiments"]) - - # Wait for all to finish benchmarking - shmem.barrier() - - # Benchmark PyTorch (all_gather_into_tensor + matmul) for comparison - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") - - # Create PyTorch tensors (not on Iris heap) - pytorch_A_sharded = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") - pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_A_gathered = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") - pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") - - # Warmup - for _ in range(10): - dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) - pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) - torch.cuda.synchronize() - dist.barrier() - - # Benchmark - dist.barrier() - - # Calculate TFLOPS: 2*M*N*K flops - total_flops = 2 * M * N * K - total_tflops_unit = total_flops * 1e-12 - - # Calculate bandwidth for all-gather part - element_size = torch.tensor([], dtype=datatype).element_size() - input_bytes = M * K_local * element_size - total_bytes = input_bytes * (world_size - 1) - total_bytes_gb = total_bytes / (1024**3) - - def run_pytorch_experiment(): - dist.all_gather_into_tensor(pytorch_A_gathered, pytorch_A_sharded) - pytorch_C = torch.matmul(pytorch_A_gathered, pytorch_B) - - pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) - - # Calculate TFLOPS and bandwidth - pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) - pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) - - shmem.info( - f"PyTorch all_gather_into_tensor+matmul (M={M}, K_local={K_local}, K_total={K}, N={N}, world_size={world_size}, dtype={args['datatype']}): " - f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" - ) - - if args["benchmark"]: - # Calculate performance ratio - iris_tflops = tflops - speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 - shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") - - json_writer.add_field("pytorch_tflops", pytorch_tflops) - json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) - json_writer.add_field("pytorch_ms", pytorch_ms) - json_writer.add_field("iris_speedup", speedup) - - # Wait for all to finish PyTorch benchmarking - shmem.barrier() - - if rank == 0: - json_writer.flush() - json_writer.display() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - print("Starting all_gather_matmul benchmark...") - args = parse_args() - - # Check if running with torchrun (detected by environment variables) - if "RANK" in os.environ or "LOCAL_RANK" in os.environ: - # torchrun handles process spawning, so call _worker directly - print("Detected torchrun execution mode") - _worker(args=args) - else: - # Use multiprocessing spawn for backward compatibility - num_ranks = args["num_ranks"] - init_url = args["init_url"] - mp.spawn( - fn=_worker, - args=(num_ranks, init_url, args), - nprocs=num_ranks, - join=True, - ) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/all_gather_matmul/profile_att.sh b/benchmark/ops/all_gather_matmul/profile_att.sh deleted file mode 100755 index 21f6f21fe..000000000 --- a/benchmark/ops/all_gather_matmul/profile_att.sh +++ /dev/null @@ -1,344 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -# ATT (Advanced Thread Trace) Profiling Script for all_gather_matmul benchmark -# Uses rocprofv3 with thread trace to profile the benchmark at ISA instruction level. -# -# Usage: -# ./profile_att.sh [OPTIONS] -# -# Options: -# -r, --ranks NUM_RANKS Number of ranks/GPUs (default: 8) -# -m, --m-dim M M dimension (default: 2048) -# -n, --n-dim N N dimension (default: 16384) -# -k, --k-dim K K dimension (default: 131072) -# -v, --variant VARIANT Variant: pull, chunked, push, pipelined_pull (default: pull) -# --block-m SIZE Block size for M dimension (default: 256) -# --block-n SIZE Block size for N dimension (default: 256) -# --block-k SIZE Block size for K dimension (default: 64) -# --group-m SIZE Group size for M dimension tiling (default: 1) -# --num-xcds NUM Number of XCDs (default: 8) -# --validate Enable validation mode -# --benchmark-pytorch Also benchmark PyTorch for comparison -# -o, --output-dir DIR Base output directory (default: ./att_profiles) -# --att-target-cu CU Target CU for thread trace (default: 1) -# --att-buffer-size SIZE Trace buffer size in hex (default: 0x6000000 = 96MB) -# --att-activity LEVEL Perfcounter streaming level 1-16 (default: 8) -# --kernel-regex REGEX Kernel name regex filter (optional) -# --single-run Run only one iteration (no warmup, no repeat) -# --k-contiguous Use K-contiguous layout for both A and B matrices -# (default A is row-major/K-contiguous, adds --b_col_major) -# --a-col-major Store A matrix in column-major order (M-contiguous) -# --b-col-major Store B matrix in column-major order (K-contiguous) -# -h, --help Show this help message - -set -e - -# Default values -NUM_RANKS=8 -M_DIM=2048 -N_DIM=16384 -K_DIM=131072 -VARIANT="pull" -BASE_OUTPUT_DIR="./att_profiles" -ATT_TARGET_CU=1 -ATT_BUFFER_SIZE="0x6000000" # 96MB -ATT_ACTIVITY=8 -KERNEL_REGEX="" -SINGLE_RUN=true -K_CONTIGUOUS=true # Default to K-contiguous layout for both matrices -A_COL_MAJOR=false -B_COL_MAJOR=false -BLOCK_M=256 -BLOCK_N=256 -BLOCK_K=64 -GROUP_M=1 -NUM_XCDS=8 -VALIDATE=true -BENCHMARK_PYTORCH=true - -# Script directory -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -BENCHMARK_SCRIPT="${SCRIPT_DIR}/benchmark_torchrun.py" - -# Parse arguments -while [[ $# -gt 0 ]]; do - case $1 in - -r|--ranks) - NUM_RANKS="$2" - shift 2 - ;; - -m|--m-dim) - M_DIM="$2" - shift 2 - ;; - -n|--n-dim) - N_DIM="$2" - shift 2 - ;; - -k|--k-dim) - K_DIM="$2" - shift 2 - ;; - -v|--variant) - VARIANT="$2" - shift 2 - ;; - -o|--output-dir) - BASE_OUTPUT_DIR="$2" - shift 2 - ;; - --att-target-cu) - ATT_TARGET_CU="$2" - shift 2 - ;; - --att-buffer-size) - ATT_BUFFER_SIZE="$2" - shift 2 - ;; - --att-activity) - ATT_ACTIVITY="$2" - shift 2 - ;; - --kernel-regex) - KERNEL_REGEX="$2" - shift 2 - ;; - --single-run) - SINGLE_RUN=true - shift - ;; - --k-contiguous) - K_CONTIGUOUS=true - shift - ;; - --a-col-major) - A_COL_MAJOR=true - shift - ;; - --b-col-major) - B_COL_MAJOR=true - shift - ;; - --block-m) - BLOCK_M="$2" - shift 2 - ;; - --block-n) - BLOCK_N="$2" - shift 2 - ;; - --block-k) - BLOCK_K="$2" - shift 2 - ;; - --group-m) - GROUP_M="$2" - shift 2 - ;; - --num-xcds) - NUM_XCDS="$2" - shift 2 - ;; - --validate) - VALIDATE=true - shift - ;; - --no-validate) - VALIDATE=false - shift - ;; - --benchmark-pytorch) - BENCHMARK_PYTORCH=true - shift - ;; - --no-benchmark-pytorch) - BENCHMARK_PYTORCH=false - shift - ;; - -h|--help) - head -30 "$0" | tail -n +2 | sed 's/^# //' | sed 's/^#//' - exit 0 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -# Generate timestamp for output directory -TIMESTAMP=$(date +"%Y%m%d_%H%M%S") -OUTPUT_DIR="${BASE_OUTPUT_DIR}/att_${VARIANT}_m${M_DIM}_n${N_DIM}_k${K_DIM}_${TIMESTAMP}" - -# Create output directory -mkdir -p "${OUTPUT_DIR}" - -# Log file with timestamp -LOG_FILE="${OUTPUT_DIR}/profile_${TIMESTAMP}.log" - -echo "==============================================" | tee "${LOG_FILE}" -echo "ATT Profiling for all_gather_matmul benchmark" | tee -a "${LOG_FILE}" -echo "==============================================" | tee -a "${LOG_FILE}" -echo "Timestamp: $(date)" | tee -a "${LOG_FILE}" -echo "Output directory: ${OUTPUT_DIR}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" -echo "Configuration:" | tee -a "${LOG_FILE}" -echo " NUM_RANKS: ${NUM_RANKS}" | tee -a "${LOG_FILE}" -echo " M: ${M_DIM}" | tee -a "${LOG_FILE}" -echo " N: ${N_DIM}" | tee -a "${LOG_FILE}" -echo " K: ${K_DIM}" | tee -a "${LOG_FILE}" -echo " Variant: ${VARIANT}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" -echo "ATT Parameters:" | tee -a "${LOG_FILE}" -echo " att-target-cu: ${ATT_TARGET_CU}" | tee -a "${LOG_FILE}" -echo " att-buffer-size: ${ATT_BUFFER_SIZE}" | tee -a "${LOG_FILE}" -echo " att-activity: ${ATT_ACTIVITY}" | tee -a "${LOG_FILE}" -if [[ -n "${KERNEL_REGEX}" ]]; then - echo " kernel-include-regex: ${KERNEL_REGEX}" | tee -a "${LOG_FILE}" -fi -echo " single-run: ${SINGLE_RUN}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" -echo "Matrix Layout:" | tee -a "${LOG_FILE}" -echo " k-contiguous: ${K_CONTIGUOUS}" | tee -a "${LOG_FILE}" -echo " a-col-major: ${A_COL_MAJOR}" | tee -a "${LOG_FILE}" -echo " b-col-major: ${B_COL_MAJOR}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" -echo "Block Sizes:" | tee -a "${LOG_FILE}" -echo " block-m: ${BLOCK_M}" | tee -a "${LOG_FILE}" -echo " block-n: ${BLOCK_N}" | tee -a "${LOG_FILE}" -echo " block-k: ${BLOCK_K}" | tee -a "${LOG_FILE}" -echo " group-m: ${GROUP_M}" | tee -a "${LOG_FILE}" -echo " num-xcds: ${NUM_XCDS}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" -echo "Benchmark Options:" | tee -a "${LOG_FILE}" -echo " validate: ${VALIDATE}" | tee -a "${LOG_FILE}" -echo " benchmark-pytorch: ${BENCHMARK_PYTORCH}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" - -# Build rocprofv3 ATT options -ROCPROF_OPTS="--att" -ROCPROF_OPTS="${ROCPROF_OPTS} --att-target-cu ${ATT_TARGET_CU}" -ROCPROF_OPTS="${ROCPROF_OPTS} --att-buffer-size ${ATT_BUFFER_SIZE}" -ROCPROF_OPTS="${ROCPROF_OPTS} --att-activity ${ATT_ACTIVITY}" - -if [[ -n "${KERNEL_REGEX}" ]]; then - ROCPROF_OPTS="${ROCPROF_OPTS} --kernel-include-regex \"${KERNEL_REGEX}\"" -fi - -# Build benchmark args -BENCH_ARGS="-m ${M_DIM} -n ${N_DIM} -k ${K_DIM} --variant ${VARIANT} --benchmark -r ${NUM_RANKS}" -BENCH_ARGS="${BENCH_ARGS} --block_size_m ${BLOCK_M} --block_size_n ${BLOCK_N} --block_size_k ${BLOCK_K}" -BENCH_ARGS="${BENCH_ARGS} --group_size_m ${GROUP_M} --num_xcds ${NUM_XCDS}" - -if [[ "${SINGLE_RUN}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} --single-run" -fi - -if [[ "${VALIDATE}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} -v" -fi - -if [[ "${BENCHMARK_PYTORCH}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} --benchmark_pytorch" -fi - -# Add K-contiguous layout options -# --k-contiguous: Both A and B become K-contiguous -# - A is already K-contiguous in default row-major layout -# - B needs --b_col_major to become K-contiguous -if [[ "${K_CONTIGUOUS}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} --b_col_major" -fi - -# Individual matrix layout overrides -if [[ "${A_COL_MAJOR}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} --a_col_major" -fi -if [[ "${B_COL_MAJOR}" == "true" ]]; then - BENCH_ARGS="${BENCH_ARGS} --b_col_major" -fi - -# Full command -# rocprofv3 wraps the entire torchrun command, not the other way around -# HSA_NO_SCRATCH_RECLAIM=1 prevents scratch memory reclaim issues -FULL_CMD="HSA_NO_SCRATCH_RECLAIM=1 rocprofv3 ${ROCPROF_OPTS} -d ${OUTPUT_DIR} -- torchrun --nproc_per_node=${NUM_RANKS} ${BENCHMARK_SCRIPT} ${BENCH_ARGS}" - -echo "Command:" | tee -a "${LOG_FILE}" -echo "${FULL_CMD}" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" - -# Save configuration to JSON for reference -cat > "${OUTPUT_DIR}/config.json" << EOF -{ - "timestamp": "${TIMESTAMP}", - "num_ranks": ${NUM_RANKS}, - "m_dim": ${M_DIM}, - "n_dim": ${N_DIM}, - "k_dim": ${K_DIM}, - "variant": "${VARIANT}", - "att_target_cu": ${ATT_TARGET_CU}, - "att_buffer_size": "${ATT_BUFFER_SIZE}", - "att_activity": ${ATT_ACTIVITY}, - "kernel_regex": "${KERNEL_REGEX}", - "single_run": ${SINGLE_RUN}, - "k_contiguous": ${K_CONTIGUOUS}, - "a_col_major": ${A_COL_MAJOR}, - "b_col_major": ${B_COL_MAJOR}, - "block_m": ${BLOCK_M}, - "block_n": ${BLOCK_N}, - "block_k": ${BLOCK_K}, - "group_m": ${GROUP_M}, - "num_xcds": ${NUM_XCDS}, - "validate": ${VALIDATE}, - "benchmark_pytorch": ${BENCHMARK_PYTORCH}, - "command": "${FULL_CMD}" -} -EOF - -echo "Starting profiling..." | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" - -# Run the profiling command -START_TIME=$(date +%s) - -# Execute the command and capture output -eval "${FULL_CMD}" 2>&1 | tee -a "${LOG_FILE}" -EXIT_CODE=${PIPESTATUS[0]} - -END_TIME=$(date +%s) -DURATION=$((END_TIME - START_TIME)) - -echo "" | tee -a "${LOG_FILE}" -echo "==============================================" | tee -a "${LOG_FILE}" -echo "Profiling completed" | tee -a "${LOG_FILE}" -echo "Exit code: ${EXIT_CODE}" | tee -a "${LOG_FILE}" -echo "Duration: ${DURATION} seconds" | tee -a "${LOG_FILE}" -echo "End time: $(date)" | tee -a "${LOG_FILE}" -echo "==============================================" | tee -a "${LOG_FILE}" -echo "" | tee -a "${LOG_FILE}" - -# List output files -echo "Output files:" | tee -a "${LOG_FILE}" -ls -la "${OUTPUT_DIR}" 2>&1 | tee -a "${LOG_FILE}" - -# Check for stats CSV files -if ls "${OUTPUT_DIR}"/stats_*.csv 1> /dev/null 2>&1; then - echo "" | tee -a "${LOG_FILE}" - echo "Stats CSV files found:" | tee -a "${LOG_FILE}" - ls -la "${OUTPUT_DIR}"/stats_*.csv 2>&1 | tee -a "${LOG_FILE}" -fi - -# Check for ui_output directories (ROCprof Compute Viewer compatible) -if ls -d "${OUTPUT_DIR}"/ui_output_* 1> /dev/null 2>&1; then - echo "" | tee -a "${LOG_FILE}" - echo "UI output directories (for ROCprof Compute Viewer):" | tee -a "${LOG_FILE}" - ls -d "${OUTPUT_DIR}"/ui_output_* 2>&1 | tee -a "${LOG_FILE}" -fi - -echo "" | tee -a "${LOG_FILE}" -echo "Profile output saved to: ${OUTPUT_DIR}" | tee -a "${LOG_FILE}" -echo "Log file: ${LOG_FILE}" | tee -a "${LOG_FILE}" - -exit ${EXIT_CODE} diff --git a/benchmark/ops/matmul_all_gather/benchmark.py b/benchmark/ops/matmul_all_gather/benchmark.py deleted file mode 100644 index 8aaac23f8..000000000 --- a/benchmark/ops/matmul_all_gather/benchmark.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for iris.ops matmul_all_gather fused operation. - -This benchmark showcases the fused GEMM + All-Gather operation where each rank -computes a local matmul and then gathers results along M dimension. -""" - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import random -import argparse - -from examples.common.utils import JSONWriter - -import iris -from iris.ops import FusedConfig - -torch.manual_seed(123) -random.seed(123) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark matmul_all_gather fused operation.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=16384, help="Number of rows per rank in matrix A (M_local)") - parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") - parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") - parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") - parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Datatype of tensors", - ) - parser.add_argument( - "--output_file", - type=str, - default="matmul_all_gather.json", - help="Output file", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (matmul + all_gather_into_tensor) for comparison", - ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") - parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") - parser.add_argument( - "--init_url", type=str, default="tcp://127.0.0.1:29529", help="Initialization URL for distributed setup" - ) - - return vars(parser.parse_args()) - - -def _worker(local_rank: int, world_size: int, init_url: str, args: dict): - """Worker function for PyTorch distributed execution.""" - backend = "nccl" if torch.cuda.is_available() else "gloo" - dist.init_process_group( - backend=backend, - init_method=init_url, - world_size=world_size, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - # Datatype mapping - datatype = torch.float32 - if args["datatype"] == "fp16": - datatype = torch.float16 - elif args["datatype"] == "fp32": - datatype = torch.float32 - elif args["datatype"] == "bf16": - datatype = torch.bfloat16 - else: - print("Unknown datatype.") - exit(1) - - M_local = args["m"] # Local M dimension - M = M_local * world_size # Total M after gather - N = args["n"] - K = args["k"] - - # Create config with parameters - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - - config = FusedConfig(**config_kwargs) - - json_writer = JSONWriter(args["output_file"]) - json_writer.add_field("world_size", world_size) - json_writer.add_field("operation", "matmul_all_gather") - json_writer.add_field("m_local", M_local) - json_writer.add_field("m_total", M) - - for key, value in args.items(): - json_writer.add_field(key, value) - - # Export actual config values to JSON (including defaults) - json_writer.add_field("block_size_m", config.block_size_m) - json_writer.add_field("block_size_n", config.block_size_n) - json_writer.add_field("block_size_k", config.block_size_k) - json_writer.add_field("group_size_m", config.group_size_m) - json_writer.add_field("num_sms", config.num_sms) - json_writer.add_field("num_xcds", config.num_xcds) - - # Create input and output tensors - # A_local is M_local x K, output is M x N (gathered) - A_local = shmem.zeros((M_local, K), dtype=datatype) - B = shmem.zeros((K, N), dtype=datatype) - C = shmem.zeros((M, N), dtype=datatype) - expected_tensor = None - - # Fill inputs with deterministic values - # Each rank has different A_local, same B - torch.manual_seed(123 + rank) - A_local_data = torch.randn((M_local, K), dtype=datatype, device=f"cuda:{rank}") - A_local.copy_(A_local_data) - - torch.manual_seed(456) # Same B for all ranks - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - B.copy_(B_data) - - # For validation: compute expected result - if args["validate"]: - # Gather all A_local matrices and compute expected result - A_local_list = [torch.zeros((M_local, K), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_local_list, A_local_data) - - # Expected: [A_0 @ B; A_1 @ B; ...; A_n @ B] stacked along M - expected_tensor = shmem.zeros((M, N), dtype=datatype) - expected_parts = [] - for i, A_rank_local in enumerate(A_local_list): - C_rank_local = torch.matmul(A_rank_local, B_data) - expected_parts.append(C_rank_local) - expected_result = torch.cat(expected_parts, dim=0) - expected_tensor.copy_(expected_result) - - comm_stream = torch.cuda.Stream() - - kernel_timing = { - "matmul_all_gather": { - "start_event": torch.cuda.Event(enable_timing=True), - "end_event": torch.cuda.Event(enable_timing=True), - "ms": 0, - "experiments": 0, - }, - } - - workspace = None - - def run_experiment(): - nonlocal kernel_timing, workspace - - shmem.barrier() - - torch.cuda.nvtx.range_push("Matmul-All-Gather") - with torch.cuda.stream(comm_stream): - kernel_timing["matmul_all_gather"]["start_event"].record() - shmem.ops.matmul_all_gather( - C, - A_local, - B, - config=config, - async_op=False, - workspace=workspace, - ) - kernel_timing["matmul_all_gather"]["end_event"].record() - kernel_timing["matmul_all_gather"]["experiments"] += 1 - torch.cuda.nvtx.range_pop() - - # Synchronize before querying event timing - shmem.barrier() - - # Update timing - ms = kernel_timing["matmul_all_gather"]["start_event"].elapsed_time( - kernel_timing["matmul_all_gather"]["end_event"] - ) - kernel_timing["matmul_all_gather"]["ms"] += ms - - # Synchronize across all GPUs - shmem.barrier() - - if args["validate"]: - shmem.info("Validating...") - - # Reset output before validation - C.zero_() - shmem.barrier() - - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 1e-1 if datatype == torch.float16 else 1e-3 - success = torch.allclose(C, expected_tensor, atol=atol) - if not success: - max_diff = torch.abs(C - expected_tensor).max().item() - shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") - - if success: - shmem.info("Matmul-all-gather validation passed!") - else: - shmem.error("Matmul-all-gather validation failed!") - - json_writer.add_field("success", success) - - # Wait for all to finish validation - shmem.barrier() - - if args["benchmark"]: - # Warmup for benchmarking - for k in ["matmul_all_gather"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) - - for k in ["matmul_all_gather"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - # Reset output before benchmarking - C.zero_() - shmem.barrier() - - shmem.info("Benchmarking...") - - # Calculate TFLOPS: 2*M_local*N*K flops per rank (but total is same across all ranks) - total_flops = 2 * M_local * N * K - total_tflops_unit = total_flops * 1e-12 - - triton_ms = iris.do_bench(run_experiment, shmem.barrier) - tflops = total_tflops_unit / ( - (kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"]) * 1e-3 - ) - - # Calculate bandwidth for all-gather part - # All-gather moves (world_size - 1) * M_local * N * element_size bytes - element_size = torch.tensor([], dtype=datatype).element_size() - output_bytes = M_local * N * element_size - total_bytes = output_bytes * (world_size - 1) - total_bytes_gb = total_bytes / (1024**3) - - bandwidth_gbps = total_bytes_gb / ( - (kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"]) * 1e-3 - ) - - shmem.info( - f"Matmul-all-gather (M_local={M_local}, M_total={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " - f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" - ) - - json_writer.add_field("tflops", tflops) - json_writer.add_field("bandwidth_gbps", bandwidth_gbps) - json_writer.add_field("total_ms", triton_ms) - json_writer.add_field("total_flops", total_flops) - json_writer.add_field("total_bytes", total_bytes) - json_writer.add_field("total_bytes_gb", total_bytes_gb) - json_writer.add_field( - "matmul_all_gather_ms", - kernel_timing["matmul_all_gather"]["ms"] / kernel_timing["matmul_all_gather"]["experiments"], - ) - json_writer.add_field("matmul_all_gather_experiments", kernel_timing["matmul_all_gather"]["experiments"]) - - # Wait for all to finish benchmarking - shmem.barrier() - - # Benchmark PyTorch (matmul + all_gather_into_tensor) for comparison - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (matmul + all_gather_into_tensor)...") - - # Create PyTorch tensors (not on Iris heap) - pytorch_A_local = torch.randn(M_local, K, dtype=datatype, device=f"cuda:{rank}") - pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_C_local = torch.zeros(M_local, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") - - # Warmup - for _ in range(10): - pytorch_C_local = torch.matmul(pytorch_A_local, pytorch_B) - dist.all_gather_into_tensor(pytorch_C, pytorch_C_local) - torch.cuda.synchronize() - dist.barrier() - - # Benchmark - dist.barrier() - - def run_pytorch_experiment(): - pytorch_C_local = torch.matmul(pytorch_A_local, pytorch_B) - dist.all_gather_into_tensor(pytorch_C, pytorch_C_local) - - pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) - - # Calculate TFLOPS and bandwidth - pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) - pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) - - shmem.info( - f"PyTorch matmul+all_gather_into_tensor (M_local={M_local}, M_total={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " - f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" - ) - - if args["benchmark"]: - # Calculate performance ratio - iris_tflops = tflops - speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 - shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") - - json_writer.add_field("pytorch_tflops", pytorch_tflops) - json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) - json_writer.add_field("pytorch_ms", pytorch_ms) - json_writer.add_field("iris_speedup", speedup) - - # Wait for all to finish PyTorch benchmarking - shmem.barrier() - - if rank == 0: - json_writer.flush() - json_writer.display() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - args = parse_args() - num_ranks = args["num_ranks"] - init_url = args["init_url"] - - mp.spawn( - fn=_worker, - args=(num_ranks, init_url, args), - nprocs=num_ranks, - join=True, - ) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/matmul_all_reduce/benchmark.py b/benchmark/ops/matmul_all_reduce/benchmark.py deleted file mode 100644 index 8a77f4b41..000000000 --- a/benchmark/ops/matmul_all_reduce/benchmark.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for iris.ops matmul_all_reduce fused operation. - -This benchmark showcases the fused GEMM + All-Reduce operation and reports -achieved TFLOPS and communication bandwidth. -""" - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import random -import argparse - -from examples.common.utils import JSONWriter - -import iris -from iris.ops import FusedConfig - -torch.manual_seed(123) -random.seed(123) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark matmul_all_reduce fused operation.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") - parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") - parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") - parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") - parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Datatype of tensors", - ) - parser.add_argument( - "--output_file", - type=str, - default="matmul_all_reduce.json", - help="Output file", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (matmul + all_reduce) for comparison", - ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") - parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") - parser.add_argument( - "--all_reduce_variant", - type=str, - default="two_shot", - choices=["atomic", "ring", "two_shot", "one_shot", "spinlock"], - help="All-reduce variant to use", - ) - parser.add_argument( - "--init_url", type=str, default="tcp://127.0.0.1:29528", help="Initialization URL for distributed setup" - ) - - return vars(parser.parse_args()) - - -def _worker(local_rank: int, world_size: int, init_url: str, args: dict): - """Worker function for PyTorch distributed execution.""" - backend = "nccl" if torch.cuda.is_available() else "gloo" - dist.init_process_group( - backend=backend, - init_method=init_url, - world_size=world_size, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - # Datatype mapping - datatype = torch.float32 - if args["datatype"] == "fp16": - datatype = torch.float16 - elif args["datatype"] == "fp32": - datatype = torch.float32 - elif args["datatype"] == "bf16": - datatype = torch.bfloat16 - else: - print("Unknown datatype.") - exit(1) - - M = args["m"] - N = args["n"] - K = args["k"] - - # Create config with parameters - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - "all_reduce_variant": args["all_reduce_variant"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - - config = FusedConfig(**config_kwargs) - - json_writer = JSONWriter(args["output_file"]) - json_writer.add_field("world_size", world_size) - json_writer.add_field("operation", "matmul_all_reduce") - - for key, value in args.items(): - json_writer.add_field(key, value) - - # Export actual config values to JSON (including defaults) - json_writer.add_field("block_size_m", config.block_size_m) - json_writer.add_field("block_size_n", config.block_size_n) - json_writer.add_field("block_size_k", config.block_size_k) - json_writer.add_field("group_size_m", config.group_size_m) - json_writer.add_field("num_sms", config.num_sms) - json_writer.add_field("num_xcds", config.num_xcds) - json_writer.add_field("all_reduce_variant", config.all_reduce_variant) - - # Create input and output tensors - # Must use shmem.zeros() to allocate on Iris symmetric heap - A = shmem.zeros((M, K), dtype=datatype) - B = shmem.zeros((K, N), dtype=datatype) - C = shmem.zeros((M, N), dtype=datatype) - expected_tensor = None - - # Fill inputs with deterministic values - # Each rank has different A, same B - torch.manual_seed(123 + rank) - A_local_data = torch.randn((M, K), dtype=datatype, device=f"cuda:{rank}") - A.copy_(A_local_data) - - torch.manual_seed(456) # Same B for all ranks - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - B.copy_(B_data) - - # For validation: compute expected result - # Reference: each rank computes local C = A @ B, then all_reduce - if args["validate"]: - expected_tensor = shmem.zeros((M, N), dtype=datatype) - C_local_ref = torch.matmul(A_local_data, B_data) - pytorch_output = C_local_ref.clone() - shmem.barrier() - dist.all_reduce(pytorch_output, op=dist.ReduceOp.SUM) - torch.cuda.synchronize() - expected_tensor.copy_(pytorch_output) - - comm_stream = torch.cuda.Stream() - - kernel_timing = { - "matmul_all_reduce": { - "start_event": torch.cuda.Event(enable_timing=True), - "end_event": torch.cuda.Event(enable_timing=True), - "ms": 0, - "experiments": 0, - }, - } - - workspace = None - - def run_experiment(): - nonlocal kernel_timing, workspace - - # Preamble if available - if hasattr(shmem.ops, "matmul_all_reduce_preamble"): - workspace = shmem.ops.matmul_all_reduce_preamble( - C, - A, - B, - config=config, - workspace=workspace, - ) - - shmem.barrier() - - torch.cuda.nvtx.range_push("Matmul-All-Reduce") - with torch.cuda.stream(comm_stream): - kernel_timing["matmul_all_reduce"]["start_event"].record() - shmem.ops.matmul_all_reduce( - C, - A, - B, - config=config, - async_op=False, - workspace=workspace, - ) - kernel_timing["matmul_all_reduce"]["end_event"].record() - kernel_timing["matmul_all_reduce"]["experiments"] += 1 - torch.cuda.nvtx.range_pop() - - # Synchronize before querying event timing - shmem.barrier() - - # Update timing - ms = kernel_timing["matmul_all_reduce"]["start_event"].elapsed_time( - kernel_timing["matmul_all_reduce"]["end_event"] - ) - kernel_timing["matmul_all_reduce"]["ms"] += ms - - # Synchronize across all GPUs - shmem.barrier() - - if args["validate"]: - shmem.info("Validating...") - - # Reset output before validation - C.zero_() - shmem.barrier() - - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 0.2 if datatype == torch.float16 else 0.3 - success = torch.allclose(C, expected_tensor, atol=atol) - if not success: - max_diff = torch.abs(C - expected_tensor).max().item() - shmem.error(f"Rank {rank}: Validation failed, max diff: {max_diff}") - - if success: - shmem.info("Matmul-all-reduce validation passed!") - else: - shmem.error("Matmul-all-reduce validation failed!") - - json_writer.add_field("success", success) - - # Wait for all to finish validation - shmem.barrier() - - if args["benchmark"]: - # Warmup for benchmarking - for k in ["matmul_all_reduce"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) - - for k in ["matmul_all_reduce"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - # Reset output before benchmarking - C.zero_() - shmem.barrier() - - shmem.info("Benchmarking...") - - # Calculate TFLOPS: 2*M*N*K flops - total_flops = 2 * M * N * K - total_tflops_unit = total_flops * 1e-12 - - triton_ms = iris.do_bench(run_experiment, shmem.barrier) - tflops = total_tflops_unit / ( - (kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"]) * 1e-3 - ) - - # Calculate bandwidth for all-reduce part - # All-reduce moves 2 * (world_size - 1) / world_size * data_size bytes - element_size = torch.tensor([], dtype=datatype).element_size() - output_bytes = M * N * element_size - total_bytes = output_bytes * (2 * (world_size - 1)) / world_size - total_bytes_gb = total_bytes / (1024**3) - - bandwidth_gbps = total_bytes_gb / ( - (kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"]) * 1e-3 - ) - - shmem.info( - f"Matmul-all-reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}, variant={args['all_reduce_variant']}): " - f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" - ) - - json_writer.add_field("tflops", tflops) - json_writer.add_field("bandwidth_gbps", bandwidth_gbps) - json_writer.add_field("total_ms", triton_ms) - json_writer.add_field("total_flops", total_flops) - json_writer.add_field("total_bytes", total_bytes) - json_writer.add_field("total_bytes_gb", total_bytes_gb) - json_writer.add_field( - "matmul_all_reduce_ms", - kernel_timing["matmul_all_reduce"]["ms"] / kernel_timing["matmul_all_reduce"]["experiments"], - ) - json_writer.add_field("matmul_all_reduce_experiments", kernel_timing["matmul_all_reduce"]["experiments"]) - - # Wait for all to finish benchmarking - shmem.barrier() - - # Benchmark PyTorch (matmul + all_reduce) for comparison - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (matmul + all_reduce)...") - - # Create PyTorch tensors (not on Iris heap) - pytorch_A = torch.randn(M, K, dtype=datatype, device=f"cuda:{rank}") - pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") - - # Warmup - for _ in range(10): - pytorch_C = torch.matmul(pytorch_A, pytorch_B) - dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) - torch.cuda.synchronize() - dist.barrier() - - # Benchmark - dist.barrier() - - def run_pytorch_experiment(): - pytorch_C = torch.matmul(pytorch_A, pytorch_B) - dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) - - pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) - - # Calculate TFLOPS and bandwidth - pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) - pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) - - shmem.info( - f"PyTorch matmul+all_reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " - f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" - ) - - if args["benchmark"]: - # Calculate performance ratio - iris_tflops = tflops - speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 - shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") - - json_writer.add_field("pytorch_tflops", pytorch_tflops) - json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) - json_writer.add_field("pytorch_ms", pytorch_ms) - json_writer.add_field("iris_speedup", speedup) - - # Wait for all to finish PyTorch benchmarking - shmem.barrier() - - if rank == 0: - json_writer.flush() - json_writer.display() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - args = parse_args() - num_ranks = args["num_ranks"] - init_url = args["init_url"] - - mp.spawn( - fn=_worker, - args=(num_ranks, init_url, args), - nprocs=num_ranks, - join=True, - ) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/matmul_reduce_scatter/benchmark.py b/benchmark/ops/matmul_reduce_scatter/benchmark.py deleted file mode 100644 index 1b4936167..000000000 --- a/benchmark/ops/matmul_reduce_scatter/benchmark.py +++ /dev/null @@ -1,421 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for iris.ops matmul_reduce_scatter fused operation. - -This benchmark showcases the fused GEMM + Reduce-Scatter operation where each rank -computes a local matmul, reduces across all ranks, and scatters tiles to ranks. -""" - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import random -import argparse - -from examples.common.utils import JSONWriter - -import iris -from iris.ops import FusedConfig - -torch.manual_seed(123) -random.seed(123) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark matmul_reduce_scatter fused operation.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=16384, help="Number of rows in matrix A (M)") - parser.add_argument("-n", type=int, default=2048, help="Number of columns in matrix B (N)") - parser.add_argument("-k", type=int, default=131072, help="Common dimension (K)") - parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode") - parser.add_argument("-v", "--validate", action="store_true", help="Enable validation mode") - parser.add_argument("-b", "--benchmark", action="store_true", help="Enable benchmarking mode") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Datatype of tensors", - ) - parser.add_argument( - "--output_file", - type=str, - default="matmul_reduce_scatter.json", - help="Output file", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs for operation (auto-detect if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (matmul + all_reduce) for comparison", - ) - parser.add_argument("--block_size_m", type=int, default=256, help="Block size for M dimension") - parser.add_argument("--block_size_n", type=int, default=64, help="Block size for N dimension") - parser.add_argument("--block_size_k", type=int, default=64, help="Block size for K dimension") - parser.add_argument("--group_size_m", type=int, default=1, help="Group size for M dimension tiling") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto-detected if not set)") - parser.add_argument("-r", "--num_ranks", type=int, default=8, help="Number of ranks/processes") - parser.add_argument( - "--init_url", type=str, default="tcp://127.0.0.1:29531", help="Initialization URL for distributed setup" - ) - - return vars(parser.parse_args()) - - -def _worker(local_rank: int, world_size: int, init_url: str, args: dict): - """Worker function for PyTorch distributed execution.""" - backend = "nccl" if torch.cuda.is_available() else "gloo" - dist.init_process_group( - backend=backend, - init_method=init_url, - world_size=world_size, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - # Datatype mapping - datatype = torch.float32 - if args["datatype"] == "fp16": - datatype = torch.float16 - elif args["datatype"] == "fp32": - datatype = torch.float32 - elif args["datatype"] == "bf16": - datatype = torch.bfloat16 - else: - print("Unknown datatype.") - exit(1) - - M = args["m"] - N = args["n"] - K = args["k"] - - # Create config with parameters - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - - config = FusedConfig(**config_kwargs) - - json_writer = JSONWriter(args["output_file"]) - json_writer.add_field("world_size", world_size) - json_writer.add_field("operation", "matmul_reduce_scatter") - - for key, value in args.items(): - json_writer.add_field(key, value) - - # Export actual config values to JSON (including defaults) - json_writer.add_field("block_size_m", config.block_size_m) - json_writer.add_field("block_size_n", config.block_size_n) - json_writer.add_field("block_size_k", config.block_size_k) - json_writer.add_field("group_size_m", config.group_size_m) - json_writer.add_field("num_sms", config.num_sms) - json_writer.add_field("num_xcds", config.num_xcds) - - # Calculate tile distribution - num_pid_m = (M + config.block_size_m - 1) // config.block_size_m - num_pid_n = (N + config.block_size_n - 1) // config.block_size_n - total_tiles = num_pid_m * num_pid_n - tiles_per_rank = total_tiles // world_size - start_tile = rank * tiles_per_rank - if rank == world_size - 1: - tiles_per_rank = total_tiles - start_tile - - json_writer.add_field("total_tiles", total_tiles) - json_writer.add_field("tiles_per_rank", tiles_per_rank) - - # Create input and output tensors - # Each rank computes full A @ B, but only keeps its assigned tiles - A = shmem.zeros((M, K), dtype=datatype) - B = shmem.zeros((K, N), dtype=datatype) - C = shmem.zeros((M, N), dtype=datatype) - expected_tiles = [] - - # Fill inputs with deterministic values - # Each rank has different A, same B - torch.manual_seed(123 + rank) - A_local_data = torch.randn((M, K), dtype=datatype, device=f"cuda:{rank}") - A.copy_(A_local_data) - - torch.manual_seed(456) # Same B for all ranks - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - B.copy_(B_data) - - # For validation: compute expected result for this rank's tiles - if args["validate"]: - # Gather all A matrices to compute expected result - A_list = [torch.zeros((M, K), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_list, A_local_data) - - # Expected: sum of all (A_i @ B) for each rank i, but only for this rank's tiles - expected_full = torch.zeros((M, N), dtype=datatype, device=f"cuda:{rank}") - for A_rank in A_list: - expected_full += torch.matmul(A_rank, B_data) - - # Extract only this rank's tiles - for local_tile_idx in range(tiles_per_rank): - tile_id = start_tile + local_tile_idx - pid_m = tile_id // num_pid_n - pid_n = tile_id % num_pid_n - - m_start = pid_m * config.block_size_m - m_end = min(m_start + config.block_size_m, M) - n_start = pid_n * config.block_size_n - n_end = min(n_start + config.block_size_n, N) - - expected_tiles.append( - { - "tile_id": tile_id, - "pid_m": pid_m, - "pid_n": pid_n, - "m_start": m_start, - "m_end": m_end, - "n_start": n_start, - "n_end": n_end, - "data": expected_full[m_start:m_end, n_start:n_end].clone(), - } - ) - - comm_stream = torch.cuda.Stream() - - kernel_timing = { - "matmul_reduce_scatter": { - "start_event": torch.cuda.Event(enable_timing=True), - "end_event": torch.cuda.Event(enable_timing=True), - "ms": 0, - "experiments": 0, - }, - } - - workspace = None - - def run_experiment(): - nonlocal kernel_timing, workspace - - # Preamble if available - if hasattr(shmem.ops, "matmul_reduce_scatter_preamble"): - workspace = shmem.ops.matmul_reduce_scatter_preamble( - C, - A, - B, - config=config, - workspace=workspace, - ) - - shmem.barrier() - - torch.cuda.nvtx.range_push("Matmul-Reduce-Scatter") - with torch.cuda.stream(comm_stream): - kernel_timing["matmul_reduce_scatter"]["start_event"].record() - shmem.ops.matmul_reduce_scatter( - C, - A, - B, - async_op=False, - config=config, - workspace=workspace, - ) - kernel_timing["matmul_reduce_scatter"]["end_event"].record() - kernel_timing["matmul_reduce_scatter"]["experiments"] += 1 - torch.cuda.nvtx.range_pop() - - # Synchronize before querying event timing - shmem.barrier() - - # Update timing - ms = kernel_timing["matmul_reduce_scatter"]["start_event"].elapsed_time( - kernel_timing["matmul_reduce_scatter"]["end_event"] - ) - kernel_timing["matmul_reduce_scatter"]["ms"] += ms - - # Synchronize across all GPUs - shmem.barrier() - - if args["validate"]: - shmem.info("Validating...") - - # Reset output before validation - C.zero_() - shmem.barrier() - - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 2e-1 if datatype == torch.float16 else 1e-1 - success = True - - # Validate each tile assigned to this rank - for tile_info in expected_tiles: - C_tile = C[tile_info["m_start"] : tile_info["m_end"], tile_info["n_start"] : tile_info["n_end"]] - expected_tile = tile_info["data"] - - tile_match = torch.allclose(C_tile, expected_tile, atol=atol) - if not tile_match: - max_diff = torch.abs(C_tile - expected_tile).max().item() - shmem.error( - f"Rank {rank}, tile {tile_info['tile_id']} ({tile_info['pid_m']},{tile_info['pid_n']}): " - f"Validation failed, max diff: {max_diff}" - ) - success = False - - if success: - shmem.info("Matmul-reduce-scatter validation passed!") - else: - shmem.error("Matmul-reduce-scatter validation failed!") - - json_writer.add_field("success", success) - - # Wait for all to finish validation - shmem.barrier() - - if args["benchmark"]: - # Warmup for benchmarking - for k in ["matmul_reduce_scatter"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=25, n_repeat=1) - - for k in ["matmul_reduce_scatter"]: - kernel_timing[k]["ms"] = 0 - kernel_timing[k]["experiments"] = 0 - - # Reset output before benchmarking - C.zero_() - shmem.barrier() - - shmem.info("Benchmarking...") - - # Calculate TFLOPS: 2*M*N*K flops - total_flops = 2 * M * N * K - total_tflops_unit = total_flops * 1e-12 - - triton_ms = iris.do_bench(run_experiment, shmem.barrier) - tflops = total_tflops_unit / ( - (kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"]) - * 1e-3 - ) - - # Calculate bandwidth for reduce-scatter part - # Similar to all-reduce: 2 * (world_size - 1) / world_size * data_size bytes - element_size = torch.tensor([], dtype=datatype).element_size() - output_bytes = M * N * element_size - total_bytes = output_bytes * (2 * (world_size - 1)) / world_size - total_bytes_gb = total_bytes / (1024**3) - - bandwidth_gbps = total_bytes_gb / ( - (kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"]) - * 1e-3 - ) - - shmem.info( - f"Matmul-reduce-scatter (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " - f"{triton_ms:.3f} ms, {tflops:.3f} TFLOPS, {bandwidth_gbps:.3f} GB/s" - ) - - json_writer.add_field("tflops", tflops) - json_writer.add_field("bandwidth_gbps", bandwidth_gbps) - json_writer.add_field("total_ms", triton_ms) - json_writer.add_field("total_flops", total_flops) - json_writer.add_field("total_bytes", total_bytes) - json_writer.add_field("total_bytes_gb", total_bytes_gb) - json_writer.add_field( - "matmul_reduce_scatter_ms", - kernel_timing["matmul_reduce_scatter"]["ms"] / kernel_timing["matmul_reduce_scatter"]["experiments"], - ) - json_writer.add_field( - "matmul_reduce_scatter_experiments", kernel_timing["matmul_reduce_scatter"]["experiments"] - ) - - # Wait for all to finish benchmarking - shmem.barrier() - - # Benchmark PyTorch (matmul + all_reduce) for comparison - # Note: We use all_reduce since PyTorch's reduce_scatter has different semantics - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (matmul + all_reduce)...") - - # Create PyTorch tensors (not on Iris heap) - pytorch_A = torch.randn(M, K, dtype=datatype, device=f"cuda:{rank}") - pytorch_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pytorch_C = torch.zeros(M, N, dtype=datatype, device=f"cuda:{rank}") - - # Warmup - for _ in range(10): - pytorch_C = torch.matmul(pytorch_A, pytorch_B) - dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) - torch.cuda.synchronize() - dist.barrier() - - # Benchmark - dist.barrier() - - def run_pytorch_experiment(): - pytorch_C = torch.matmul(pytorch_A, pytorch_B) - dist.all_reduce(pytorch_C, op=dist.ReduceOp.SUM) - - pytorch_ms = iris.do_bench(run_pytorch_experiment, dist.barrier) - - # Calculate TFLOPS and bandwidth - pytorch_tflops = total_tflops_unit / (pytorch_ms * 1e-3) - pytorch_bandwidth_gbps = total_bytes_gb / (pytorch_ms * 1e-3) - - shmem.info( - f"PyTorch matmul+all_reduce (M={M}, N={N}, K={K}, world_size={world_size}, dtype={args['datatype']}): " - f"{pytorch_ms:.3f} ms, {pytorch_tflops:.3f} TFLOPS, {pytorch_bandwidth_gbps:.3f} GB/s" - ) - - if args["benchmark"]: - # Calculate performance ratio - iris_tflops = tflops - speedup = (iris_tflops / pytorch_tflops) if pytorch_tflops > 0 else 0 - shmem.info(f"Speedup (Iris/PyTorch): {speedup:.2f}x") - - json_writer.add_field("pytorch_tflops", pytorch_tflops) - json_writer.add_field("pytorch_bandwidth_gbps", pytorch_bandwidth_gbps) - json_writer.add_field("pytorch_ms", pytorch_ms) - json_writer.add_field("iris_speedup", speedup) - - # Wait for all to finish PyTorch benchmarking - shmem.barrier() - - if rank == 0: - json_writer.flush() - json_writer.display() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - args = parse_args() - num_ranks = args["num_ranks"] - init_url = args["init_url"] - - mp.spawn( - fn=_worker, - args=(num_ranks, init_url, args), - nprocs=num_ranks, - join=True, - ) - - -if __name__ == "__main__": - main() diff --git a/iris/iris.py b/iris/iris.py index 7fcf29638..52b91293c 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -1638,6 +1638,14 @@ def get( - ".cg": Cache Global. Equivalent to ".wb" — stored through L1 → L2 → LLC under LRU. - ".cs": Cache Streaming. Bypasses L1, streamed through L2, not retained in LLC. - ".wt": Write-Through. Bypasses L1 and L2 (coherent cache bypass), may hit in LLC with LRU. + + Returns: + None + + Example: + >>> ctx.get(remote_ptr + offsets, local_ptr + offsets, from_rank=1, mask=mask) + """ + translated_from_ptr = self._translate(from_ptr, self.rank, from_rank, hint) data = tl.load(translated_from_ptr, mask=mask, other=other, cache_modifier=load_cache_modifier) tl.store(to_ptr, data, mask=mask, cache_modifier=store_cache_modifier) diff --git a/iris/ops/__init__.py b/iris/ops/__init__.py index ba6a698de..647ff91de 100644 --- a/iris/ops/__init__.py +++ b/iris/ops/__init__.py @@ -36,6 +36,7 @@ # from .matmul import matmul # Simple single-GPU GEMM - TODO: implement from .matmul_all_reduce import matmul_all_reduce, matmul_all_reduce_preamble from .all_gather_matmul import all_gather_matmul, all_gather_matmul_preamble +from .all_gather_matmul_hbm_buffer import all_gather_matmul_hbm_buffer, all_gather_matmul_hbm_buffer_preamble from .matmul_all_gather import matmul_all_gather from .matmul_reduce_scatter import matmul_reduce_scatter, matmul_reduce_scatter_preamble @@ -145,14 +146,13 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, """ Fused matrix multiplication and reduce-scatter. - Computes: output = reduce_scatter(A @ B) where each rank keeps assigned tiles + Computes: output = reduce_scatter(A @ B + bias) along N dimension Args: - output_tensor: Output tensor (M, N) - will contain reduced tiles for this rank + output_tensor: Output tensor (M, N_local) where N_local = N / world_size A: Input matrix A (M, K) B: Input matrix B (K, N) - bias: Optional bias (currently unused; reserved for future support). - Passing a non-None value will raise a NotImplementedError. + bias: Optional bias vector (M,) or (N,) async_op: If False, performs barrier at end config: Optional FusedConfig for tuning workspace: Optional pre-allocated workspace @@ -161,12 +161,11 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, workspace: Updated workspace object Example: - >>> output = shmem.zeros((M, N), dtype=torch.float16) + >>> N_local = N // world_size + >>> output = shmem.zeros((M, N_local), dtype=torch.float16) >>> shmem.ops.matmul_reduce_scatter(output, A, B) """ - if bias is not None: - raise NotImplementedError("bias is not yet supported for matmul_reduce_scatter") - return matmul_reduce_scatter(self._shmem, output_tensor, A, B, async_op, config, workspace) + return matmul_reduce_scatter(self._shmem, output_tensor, A, B, bias, async_op, config, workspace) # Export public API @@ -177,10 +176,13 @@ def matmul_reduce_scatter(self, output_tensor, A, B, bias=None, async_op=False, # Namespace "OpsNamespace", # Operations + "matmul", # Simple single-GPU GEMM "matmul_all_reduce", "matmul_all_reduce_preamble", "all_gather_matmul", "all_gather_matmul_preamble", + "all_gather_matmul_hbm_buffer", + "all_gather_matmul_hbm_buffer_preamble", "matmul_all_gather", "matmul_reduce_scatter", "matmul_reduce_scatter_preamble", diff --git a/iris/ops/config.py b/iris/ops/config.py index 58bbe2d39..6a4a0575a 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -19,12 +19,12 @@ class FusedConfig: but users can override specific settings for performance tuning. GEMM Parameters: - block_size_m: Block size for M dimension (rows). Default: 128. - block_size_n: Block size for N dimension (columns). Default: 256. + block_size_m: Block size for M dimension (rows). Default: 256. + block_size_n: Block size for N dimension (columns). Default: 64. block_size_k: Block size for K dimension (reduction). Default: 64. group_size_m: Group size for M dimension tiling. Default: 1. num_sms: Number of SMs to use. If None, auto-detects from device. Default: None. - num_xcds: Number of XCDs (chiplets). Default: 8. + num_xcds: Number of XCDs (chiplets). Default: 1. chunk_size: Chunk size for chiplet transform. Default: 1. cache_modifier_a: Cache modifier for matrix A (".ca" for cached). Default: ".ca". cache_modifier_b: Cache modifier for matrix B (".ca" for cached). Default: ".ca". @@ -50,12 +50,12 @@ class FusedConfig: """ # GEMM parameters - block_size_m: int = 128 - block_size_n: int = 256 + block_size_m: int = 256 + block_size_n: int = 64 block_size_k: int = 64 group_size_m: int = 1 num_sms: Optional[int] = None # Auto-detect if None - num_xcds: int = 8 + num_xcds: int = 1 chunk_size: int = 1 cache_modifier_a: str = ".ca" cache_modifier_b: str = ".ca" diff --git a/iris/ops/matmul_all_gather.py b/iris/ops/matmul_all_gather.py index 6b19caea4..ad42ac041 100644 --- a/iris/ops/matmul_all_gather.py +++ b/iris/ops/matmul_all_gather.py @@ -180,6 +180,17 @@ def matmul_all_gather( assert output_tensor.shape == (M, N), f"Output must be ({M}, {N}), got {output_tensor.shape}" # Validate problem size against block sizes + assert M_local >= config.block_size_m, ( + f"M_local ({M_local}) must be >= block_size_m ({config.block_size_m}). " + f"Use smaller block sizes for small problems." + ) + assert K >= config.block_size_k, ( + f"K ({K}) must be >= block_size_k ({config.block_size_k}). Use smaller block sizes for small problems." + ) + assert N >= config.block_size_n, ( + f"N ({N}) must be >= block_size_n ({config.block_size_n}). Use smaller block sizes for small problems." + ) + # Allocate workspace if not provided if workspace is None: workspace = matmul_all_gather_preamble(shmem, A, B, config) diff --git a/iris/ops/matmul_all_reduce.py b/iris/ops/matmul_all_reduce.py index 1054f166a..6bf3cb3a0 100644 --- a/iris/ops/matmul_all_reduce.py +++ b/iris/ops/matmul_all_reduce.py @@ -272,6 +272,11 @@ def matmul_all_reduce( if A.dtype != B.dtype or A.dtype != C.dtype: raise ValueError(f"All tensors must have same dtype, got A:{A.dtype}, B:{B.dtype}, C:{C.dtype}") + # Validate block sizes match problem dimensions + assert M >= config.block_size_m, f"M={M} too small for block_size_m={config.block_size_m}" + assert K >= config.block_size_k, f"K={K} too small for block_size_k={config.block_size_k}" + assert N >= config.block_size_n, f"N={N} too small for block_size_n={config.block_size_n}" + # Extract strides stride_am, stride_ak = A.stride() stride_bk, stride_bn = B.stride() diff --git a/iris/ops/workspace.py b/iris/ops/workspace.py index e519f0823..a9c7cb616 100644 --- a/iris/ops/workspace.py +++ b/iris/ops/workspace.py @@ -38,18 +38,10 @@ class FusedWorkspace: world_size: int = 1 variant: str = "" - # Hardware configuration (detected in preamble) - num_sms: Optional[int] = None # Number of streaming multiprocessors - num_xcds: int = 1 # Number of XCDs/chiplets - # Temporary buffers (allocated as needed) aux_buffer: Optional[torch.Tensor] = None # Generic buffer for intermediate results locks: Optional[torch.Tensor] = None # Synchronization primitives - # Push variant workspace - a_inbox: Optional[torch.Tensor] = None # (world_size, M, K_local) inbox buffer - signal_flags: Optional[torch.Tensor] = None # (world_size, world_size, m_tiles, k_tiles) - prepared: bool = False def matches( @@ -90,6 +82,4 @@ def clear(self): """Free all allocated buffers.""" self.aux_buffer = None self.locks = None - self.a_inbox = None - self.signal_flags = None self.prepared = False diff --git a/pyproject.toml b/pyproject.toml index f1f10655d..4a8f1916c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "numpy", "requests", "ruff", - "tritonblas @ git+https://github.com/ROCm/tritonBLAS.git@cd119279f3df543a558aa6d2cd4a3daed0b1ec7a", + "tritonblas @ git+https://github.com/ROCm/tritonBLAS.git@df58476a4520b72495a3f03f911368a184126568", ] diff --git a/tests/ops/test_matmul_all_reduce.py b/tests/ops/test_matmul_all_reduce.py index f3dcfe977..bf5b15053 100644 --- a/tests/ops/test_matmul_all_reduce.py +++ b/tests/ops/test_matmul_all_reduce.py @@ -112,7 +112,7 @@ def test_matmul_all_reduce_via_shmem_ops(): shmem = iris.iris(heap_size) rank = shmem.get_rank() - M, N, K = 256, 256, 64 + M, N, K = 256, 128, 64 dtype = torch.float16 A = shmem.randn((M, K), dtype=dtype) From 9d29d8c4bd875b73e526761fc8729f8905ebb9e5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 8 Apr 2026 23:50:26 +0000 Subject: [PATCH 36/60] Port HBM buffer benchmark to iris.bench, remove helper scripts - Removed benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py (718 lines) - Removed benchmark/ops/all_gather_matmul/derive_params.py (721 lines) - Removed benchmark/ops/all_gather_matmul/tune_hbm_buffer.py (634 lines) - Removed now-empty benchmark/ops/all_gather_matmul/ directory - Added benchmark/ops/bench_all_gather_matmul_hbm_buffer.py (47 lines) using iris.bench declarative API, consistent with other bench_*.py files Agent-Logs-Url: https://github.com/ROCm/iris/sessions/38d07d1d-e548-4f14-adf8-30968d0a9c67 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .../all_gather_matmul/benchmark_hbm_buffer.py | 718 ----------------- .../ops/all_gather_matmul/derive_params.py | 721 ------------------ .../ops/all_gather_matmul/tune_hbm_buffer.py | 634 --------------- .../ops/bench_all_gather_matmul_hbm_buffer.py | 46 ++ 4 files changed, 46 insertions(+), 2073 deletions(-) delete mode 100644 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py delete mode 100644 benchmark/ops/all_gather_matmul/derive_params.py delete mode 100644 benchmark/ops/all_gather_matmul/tune_hbm_buffer.py create mode 100644 benchmark/ops/bench_all_gather_matmul_hbm_buffer.py diff --git a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py b/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py deleted file mode 100644 index 190799986..000000000 --- a/benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py +++ /dev/null @@ -1,718 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. - -""" -Benchmark for the HBM-buffered all_gather_matmul variant. - -This variant cooperatively gathers A into a local HBM buffer with per-tile -ready flags, then runs GEMM from local memory. No global barriers -- CUs -that finish gathering early start GEMM immediately, spinning on flags for -any tile not yet available. - -Usage with torchrun: - torchrun --nproc_per_node=8 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py \\ - -m 2048 -n 16384 -k 131072 --benchmark - - torchrun --nproc_per_node=8 benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py \\ - -m 2048 -n 16384 -k 131072 --benchmark --benchmark_pytorch --b_col_major -""" - -import os -import time -import torch -import torch.distributed as dist -import random -import argparse -import numpy as np - -import iris -from iris.ops.all_gather_matmul_hbm_buffer import ( - all_gather_matmul_hbm_buffer, - all_gather_matmul_hbm_buffer_preamble, -) -from iris.ops import FusedConfig - -_DERIVE_AVAILABLE = False -try: - import sys as _sys - - _script_dir = os.path.dirname(os.path.abspath(__file__)) - if _script_dir not in _sys.path: - _sys.path.insert(0, _script_dir) - from derive_params import ( - derive as _derive_params, - DEFAULT_NUM_CUS, - DEFAULT_PEAK_TFLOPS_FP16, - DEFAULT_HBM_BW_GBPS, - DEFAULT_L2_SIZE_BYTES, - DEFAULT_SCHEDULING_FACTOR, - ) - - _DERIVE_AVAILABLE = True -except Exception: - pass - -_MODEL_PARAMS = ( - "block_size_m", - "block_size_n", - "block_size_k", - "group_size_m", - "num_fetch_sms", - "k_per_flag", - "num_warps", - "num_fetch_stages", - "first_stage_fetch_sms", -) - -_FALLBACK_DEFAULTS = { - "block_size_m": 256, - "block_size_n": 64, - "block_size_k": 64, - "group_size_m": 1, - "k_per_flag": 1, - "num_fetch_stages": 1, -} - -torch.manual_seed(123) -random.seed(123) - -TICKS_PER_US = 100 # s_memrealtime runs at 100 MHz: 1 tick = 10 ns = 0.01 us - - -def _plot_trace(trace_data, output_path, rank, M, N, K, num_fetch_sms_cfg): - """Generate a tall Gantt chart showing per-workgroup activity over time. - - Y-axis: workgroup (sorted by start time) - X-axis: time in microseconds - Colors: fetcher stages (blue shades), GEMM wait (red), GEMM compute (green) - """ - import matplotlib - - matplotlib.use("Agg") - import matplotlib.pyplot as plt - from matplotlib.lines import Line2D - - starts = trace_data["start"].numpy().astype(np.int64) - ends = trace_data["end"].numpy().astype(np.int64) - waits = trace_data["wait"].numpy().astype(np.int64) - xcds = trace_data["xcd"].numpy().astype(np.int32) - grid_size = trace_data["grid_size"] - n_fetch_per_stage = trace_data["num_fetch_sms"] - n_stages = trace_data.get("num_fetch_stages", 1) - total_fetch = trace_data.get("total_fetch_wgs", n_fetch_per_stage) - first_stage_fetch = trace_data.get("first_stage_fetch_sms", n_fetch_per_stage) - first_stage_size = trace_data.get("first_stage_size", grid_size) - rest_stage_size = trace_data.get("rest_stage_size", grid_size) - - # Convert to microseconds relative to earliest start - t_min = starts.min() - starts_us = (starts - t_min) / TICKS_PER_US - ends_us = (ends - t_min) / TICKS_PER_US - waits_us = waits / TICKS_PER_US - - # Build role array: stage index for fetchers (0..S-1), S for GEMM - # Asymmetric layout: [fetch0 (P)] [gemm0] [fetch1 (F)] [gemm1] ... - roles = np.empty(grid_size, dtype=np.int32) - for i in range(grid_size): - if i < first_stage_size: - stage = 0 - local = i - fetch_thresh = first_stage_fetch - else: - adjusted = i - first_stage_size - stage = 1 + adjusted // rest_stage_size - local = adjusted % rest_stage_size - fetch_thresh = n_fetch_per_stage - if local < fetch_thresh: - roles[i] = stage # fetcher for this stage - else: - roles[i] = n_stages # GEMM - - # Sort by start time - order = np.argsort(starts_us) - - # Compute figure height: ~0.012 inches per row, min 12 inches - row_h = 0.012 - fig_h = max(12, grid_size * row_h + 2) - fig, ax = plt.subplots(figsize=(18, fig_h)) - - # One color per fetch stage (blue palette), plus GEMM colors - fetch_blues = ["#1565C0", "#42A5F5", "#90CAF9", "#BBDEFB"] - wait_color = "#F44336" # red - compute_color = "#4CAF50" # green - - for y_idx, wg_idx in enumerate(order): - s = starts_us[wg_idx] - e = ends_us[wg_idx] - dur = e - s - role = roles[wg_idx] - - if role < n_stages: - # Fetcher: color by stage - c = fetch_blues[role % len(fetch_blues)] - ax.barh(y_idx, dur, left=s, height=0.8, color=c, edgecolor="none", linewidth=0) - else: - # GEMM: split into wait (red) and compute (green) - w = waits_us[wg_idx] - comp = max(0, dur - w) - ax.barh(y_idx, w, left=s, height=0.8, color=wait_color, edgecolor="none", linewidth=0) - ax.barh(y_idx, comp, left=s + w, height=0.8, color=compute_color, edgecolor="none", linewidth=0) - - # XCD annotations on the right margin - xcd_set = sorted(set(xcds.tolist())) - xcd_cmap = {} - if len(xcd_set) > 1: - cmap = matplotlib.colormaps.get_cmap("tab10").resampled(len(xcd_set)) - for i, x in enumerate(xcd_set): - xcd_cmap[x] = cmap(i) - - x_max = ends_us.max() * 1.02 - for y_idx, wg_idx in enumerate(order): - xcd_id = xcds[wg_idx] - if xcd_id in xcd_cmap: - ax.plot(x_max, y_idx, marker="s", markersize=1.5, color=xcd_cmap[xcd_id], clip_on=False) - - n_gemm = grid_size - total_fetch - if n_stages > 1 and first_stage_fetch != n_fetch_per_stage: - stage_info = f"{first_stage_fetch}+{n_stages - 1}x{n_fetch_per_stage}" - elif n_stages > 1: - stage_info = f"{n_stages}x{n_fetch_per_stage}" - else: - stage_info = str(first_stage_fetch) - ax.set_xlabel("Time (us)", fontsize=12) - ax.set_ylabel("Workgroup (sorted by start time)", fontsize=12) - ax.set_title( - f"Rank {rank} | All-Gather GEMM Trace | " - f"M={M} N={N} K={K} | " - f"{stage_info} fetchers + {n_gemm} GEMM workgroups", - fontsize=13, - ) - ax.set_ylim(-1, grid_size + 1) - ax.set_xlim(0, x_max) - - # Invert y so earliest-starting workgroups are at top - ax.invert_yaxis() - - # Legend - legend_elements = [] - for s_idx in range(min(n_stages, len(fetch_blues))): - legend_elements.append(Line2D([0], [0], color=fetch_blues[s_idx], lw=6, label=f"Fetch stage {s_idx}")) - legend_elements.append(Line2D([0], [0], color=wait_color, lw=6, label="GEMM: waiting on data")) - legend_elements.append(Line2D([0], [0], color=compute_color, lw=6, label="GEMM: compute")) - ax.legend(handles=legend_elements, loc="upper right", fontsize=10) - - # Summary stats - fetch_mask = roles < n_stages - gemm_mask = roles == n_stages - fetch_dur = (ends_us - starts_us)[fetch_mask] - gemm_dur = (ends_us - starts_us)[gemm_mask] - gemm_wait = waits_us[gemm_mask] - gemm_compute = gemm_dur - gemm_wait - - stats_lines = [] - for s_idx in range(n_stages): - s_mask = roles == s_idx - s_dur = (ends_us - starts_us)[s_mask] - s_start = starts_us[s_mask] - if len(s_dur) > 0: - stats_lines.append( - f"Fetch stg{s_idx}: {s_dur.mean():.1f} us avg " - f"({s_dur.min():.1f}-{s_dur.max():.1f}) " - f"first@{s_start.min():.0f}us" - ) - stats_lines += [ - f"GEMM total: {gemm_dur.mean():.1f} us avg ({gemm_dur.min():.1f}-{gemm_dur.max():.1f})", - f" wait: {gemm_wait.mean():.1f} us avg ({gemm_wait.min():.1f}-{gemm_wait.max():.1f})", - f" compute: {gemm_compute.mean():.1f} us avg ({gemm_compute.min():.1f}-{gemm_compute.max():.1f})", - f" wait%: {100 * gemm_wait.sum() / gemm_dur.sum():.1f}%", - f"Wall time: {ends_us.max():.1f} us", - ] - stats_text = "\n".join(stats_lines) - ax.text( - 0.01, - 0.99, - stats_text, - transform=ax.transAxes, - fontsize=9, - verticalalignment="top", - fontfamily="monospace", - bbox=dict(boxstyle="round,pad=0.4", facecolor="white", alpha=0.85), - ) - - plt.tight_layout() - fig.savefig(output_path, dpi=150, bbox_inches="tight") - plt.close(fig) - print(f" [Rank {rank}] Trace plot saved to: {output_path}") - print(f" {stats_text}") - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark HBM-buffered all_gather_matmul (per-tile flags).", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument("-m", type=int, default=2048, help="M dimension") - parser.add_argument("-n", type=int, default=16384, help="N dimension") - parser.add_argument("-k", type=int, default=131072, help="K dimension (total)") - parser.add_argument("-v", "--validate", action="store_true", help="Validate correctness") - parser.add_argument("-b", "--benchmark", action="store_true", help="Run benchmark") - parser.add_argument( - "--datatype", - type=str, - default="fp16", - choices=["fp16", "fp32", "bf16"], - help="Tensor datatype", - ) - parser.add_argument("--heap_size", type=int, default=1 << 34, help="Iris heap size") - parser.add_argument("--comm_sms", type=int, default=None, help="Number of SMs (auto if None)") - parser.add_argument( - "--benchmark_pytorch", - action="store_true", - help="Also benchmark PyTorch (all_gather_into_tensor + matmul)", - ) - parser.add_argument("--block_size_m", type=int, default=None, help="Block size M (model-derived if omitted)") - parser.add_argument("--block_size_n", type=int, default=None, help="Block size N (model-derived if omitted)") - parser.add_argument("--block_size_k", type=int, default=None, help="Block size K (model-derived if omitted)") - parser.add_argument("--group_size_m", type=int, default=None, help="Group size M (model-derived if omitted)") - parser.add_argument("--num_xcds", type=int, default=None, help="Number of XCDs (auto if None)") - parser.add_argument("--b_col_major", action="store_true", help="B col-major (K-contiguous)") - parser.add_argument("--a_col_major", action="store_true", help="A col-major (M-contiguous)") - parser.add_argument("--single-run", action="store_true", help="1 iteration (for profiling)") - parser.add_argument("--num_fetch_sms", type=int, default=None, help="Fetcher SMs (auto if None)") - parser.add_argument( - "--k_per_flag", type=int, default=None, help="K-blocks per ready flag (model-derived if omitted)" - ) - parser.add_argument("--num_warps", type=int, default=None, help="Triton num_warps (auto if None)") - parser.add_argument("--num_stages", type=int, default=None, help="Triton num_stages (auto if None)") - parser.add_argument( - "--num_fetch_stages", - type=int, - default=None, - help="Number of fetch stages (model-derived if omitted)", - ) - parser.add_argument( - "--first_stage_fetch_sms", - type=int, - default=None, - help="Fetcher WGs for stage 0 (fills first GPU wave; defaults to num_fetch_sms)", - ) - parser.add_argument( - "--trace", - action=argparse.BooleanOptionalAction, - default=True, - help="Collect per-workgroup trace and save Gantt chart PNG", - ) - parser.add_argument("--trace_output", type=str, default="trace.png", help="Output path for trace plot") - return vars(parser.parse_args()) - - -def _apply_model_defaults(args, world_size, dtype_bytes=2): - """Fill None-valued kernel parameters with model-derived predictions. - - Returns a list of parameter names that were set by the model. - """ - applied = [] - if _DERIVE_AVAILABLE: - try: - p = _derive_params( - args["m"], - args["n"], - args["k"], - world_size, - link_bw=50.0, - num_cus=DEFAULT_NUM_CUS, - peak_tflops=DEFAULT_PEAK_TFLOPS_FP16, - hbm_bw_gbps=DEFAULT_HBM_BW_GBPS, - l2_size=DEFAULT_L2_SIZE_BYTES, - scheduling_factor=DEFAULT_SCHEDULING_FACTOR, - dtype_bytes=dtype_bytes, - ) - for name in _MODEL_PARAMS: - if args.get(name) is None and name in p: - args[name] = p[name] - applied.append(name) - except Exception: - pass - - for name, fallback in _FALLBACK_DEFAULTS.items(): - if args.get(name) is None: - args[name] = fallback - - return applied - - -def _worker(args): - """Worker function for torchrun.""" - local_rank = int(os.environ.get("RANK", os.environ.get("LOCAL_RANK", 0))) - world_size_env = int(os.environ.get("WORLD_SIZE", 1)) - - t0 = time.perf_counter() - - backend = "nccl" if torch.cuda.is_available() else "gloo" - - if "RANK" in os.environ or "LOCAL_RANK" in os.environ: - dist.init_process_group( - backend=backend, - init_method="env://", - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - else: - dist.init_process_group( - backend=backend, - init_method="tcp://127.0.0.1:29530", - world_size=world_size_env, - rank=local_rank, - device_id=torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else None, - ) - - t1 = time.perf_counter() - - shmem = iris.iris(args["heap_size"]) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - t2 = time.perf_counter() - shmem.info(f"Startup: dist.init={t1 - t0:.1f}s, iris.init={t2 - t1:.1f}s, total={t2 - t0:.1f}s") - - datatype_map = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16} - datatype = datatype_map.get(args["datatype"], torch.float16) - dtype_bytes = torch.tensor([], dtype=datatype).element_size() - - model_applied = _apply_model_defaults(args, world_size, dtype_bytes) - if rank == 0 and model_applied: - shmem.info(f"Model-derived defaults: {', '.join(model_applied)}") - if rank == 0: - param_summary = " ".join(f"{k}={args[k]}" for k in _MODEL_PARAMS) - shmem.info(f"Kernel params: {param_summary}") - - M = args["m"] - N = args["n"] - K = args["k"] - K_local = K // world_size - - config_kwargs = { - "block_size_m": args["block_size_m"], - "block_size_n": args["block_size_n"], - "block_size_k": args["block_size_k"], - "group_size_m": args["group_size_m"], - } - if args["comm_sms"] is not None: - config_kwargs["num_sms"] = args["comm_sms"] - if args["num_xcds"] is not None: - config_kwargs["num_xcds"] = args["num_xcds"] - config = FusedConfig(**config_kwargs) - - buffer_mb = M * K * torch.tensor([], dtype=datatype).element_size() / (1024**2) - num_m_tiles = M // config.block_size_m - num_k_blocks = K // config.block_size_k - shmem.info( - f"HBM-Buffer variant: M={M} N={N} K={K} K_local={K_local} " - f"block=({config.block_size_m},{config.block_size_n},{config.block_size_k}) " - f"buffer={buffer_mb:.0f}MB flags={num_m_tiles}x{num_k_blocks}" - ) - - # ── Allocate tensors ───────────────────────────────────────────────── - C = shmem.zeros((M, N), dtype=datatype) - - if args["a_col_major"]: - A_storage = shmem.zeros((K_local, M), dtype=datatype) - A_sharded = A_storage.T - else: - A_sharded = shmem.zeros((M, K_local), dtype=datatype) - - if args["b_col_major"]: - B_storage = shmem.zeros((N, K), dtype=datatype) - B = B_storage.T - else: - B = shmem.zeros((K, N), dtype=datatype) - - shmem.info(f"A strides={A_sharded.stride()}, B strides={B.stride()}") - - # Fill - torch.manual_seed(123 + rank) - A_data = torch.randn((M, K_local), dtype=datatype, device=f"cuda:{rank}") - A_sharded.copy_(A_data) - - torch.manual_seed(456) - B_data = torch.randn((K, N), dtype=datatype, device=f"cuda:{rank}") - B.copy_(B_data) - - # Expected - expected_tensor = None - if args["validate"]: - A_list = [torch.zeros((M, K_local), dtype=datatype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_list, A_data) - A_gathered = torch.cat(A_list, dim=1) - expected_tensor = shmem.zeros((M, N), dtype=datatype) - expected_tensor.copy_(torch.matmul(A_gathered, B_data)) - - # Pre-allocate workspace - k_per_flag = args["k_per_flag"] - workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config, k_per_flag=k_per_flag) - - # ── Timing ─────────────────────────────────────────────────────────── - comm_stream = torch.cuda.Stream() - start_ev = torch.cuda.Event(enable_timing=True) - end_ev = torch.cuda.Event(enable_timing=True) - total_ms = 0.0 - num_experiments = 0 - - num_fetch_sms = args["num_fetch_sms"] - num_warps = args["num_warps"] - num_stages = args["num_stages"] - num_fetch_stages = args["num_fetch_stages"] - first_stage_fetch_sms = args["first_stage_fetch_sms"] - - def run_experiment(): - nonlocal total_ms, num_experiments - shmem.barrier() - with torch.cuda.stream(comm_stream): - start_ev.record() - all_gather_matmul_hbm_buffer( - shmem, - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - num_fetch_sms=num_fetch_sms, - k_per_flag=k_per_flag, - num_warps=num_warps, - num_stages=num_stages, - num_fetch_stages=num_fetch_stages, - first_stage_fetch_sms=first_stage_fetch_sms, - ) - end_ev.record() - num_experiments += 1 - shmem.barrier() - total_ms += start_ev.elapsed_time(end_ev) - - shmem.barrier() - - # ── Validate ───────────────────────────────────────────────────────── - if args["validate"]: - shmem.info("Validating...") - C.zero_() - shmem.barrier() - run_experiment() - torch.cuda.synchronize() - shmem.barrier() - - atol = 1e-1 if datatype == torch.float16 else 1e-3 - rtol = 1e-2 if datatype == torch.float16 else 1e-5 - success = torch.allclose(C, expected_tensor, atol=atol, rtol=rtol) - if not success: - max_diff = torch.abs(C - expected_tensor).max().item() - shmem.error(f"Rank {rank}: Validation FAILED, max diff: {max_diff}") - else: - shmem.info("Validation PASSED!") - shmem.barrier() - - # ── Benchmark ──────────────────────────────────────────────────────── - if args["benchmark"]: - if args.get("single_run"): - n_warmup, n_repeat = 0, 1 - else: - n_warmup, n_repeat = 25, 100 - - # Warmup - total_ms = 0.0 - num_experiments = 0 - if n_warmup > 0: - iris.do_bench(run_experiment, shmem.barrier, n_warmup=n_warmup, n_repeat=1) - - total_ms = 0.0 - num_experiments = 0 - C.zero_() - shmem.barrier() - - iris.do_bench(run_experiment, shmem.barrier, n_warmup=0, n_repeat=n_repeat) - avg_ms = total_ms / num_experiments if num_experiments > 0 else 0 - - total_flops = 2 * M * N * K - tflops = (total_flops * 1e-12) / (avg_ms * 1e-3) if avg_ms > 0 else 0 - element_size = torch.tensor([], dtype=datatype).element_size() - total_bytes = M * K_local * element_size * (world_size - 1) - bw_gbps = (total_bytes / (1024**3)) / (avg_ms * 1e-3) if avg_ms > 0 else 0 - - shmem.info( - f"HBM-Buffer (M={M}, K_local={K_local}, K={K}, N={N}, " - f"ws={world_size}, dtype={args['datatype']}): " - f"{avg_ms:.3f} ms, {tflops:.3f} TFLOPS, {bw_gbps:.3f} GB/s" - ) - shmem.barrier() - - # ── Per-rank finish time measurement ───────────────────────────── - # Run a single iteration and record wall-clock finish time per rank - # to see if ranks complete at different times (load imbalance). - shmem.barrier() - torch.cuda.synchronize() - dist.barrier() - - # Synchronized start - dist.barrier() - t_start = time.perf_counter() - - all_gather_matmul_hbm_buffer( - shmem, - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - num_fetch_sms=num_fetch_sms, - k_per_flag=k_per_flag, - num_warps=num_warps, - num_stages=num_stages, - num_fetch_stages=num_fetch_stages, - first_stage_fetch_sms=first_stage_fetch_sms, - ) - torch.cuda.synchronize() - t_end = time.perf_counter() - - finish_ms = (t_end - t_start) * 1000.0 - - # Gather all finish times to rank 0 for display - finish_tensor = torch.tensor([finish_ms], dtype=torch.float64, device=f"cuda:{rank}") - all_finish = [torch.zeros(1, dtype=torch.float64, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(all_finish, finish_tensor) - - if rank == 0: - times = [t.item() for t in all_finish] - min_t = min(times) - max_t = max(times) - print("\n Per-rank finish times (single run):") - print(f" {'Rank':>6} {'Finish ms':>10} {'Delta ms':>10}") - print(f" {'-' * 30}") - for r, t in enumerate(times): - delta = t - min_t - print(f" {r:>6} {t:>10.3f} {delta:>+10.3f}") - print(f" {'-' * 30}") - print(f" Spread (max - min): {max_t - min_t:.3f} ms") - print() - - shmem.barrier() - - # ── Trace ──────────────────────────────────────────────────────────── - if args["trace"]: - # Warmup: compile the TRACE=True kernel variant before the real run - shmem.info("Trace warmup (compiling traced kernel variant)...") - C.zero_() - workspace.locks.zero_() - shmem.barrier() - all_gather_matmul_hbm_buffer( - shmem, - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - num_fetch_sms=num_fetch_sms, - k_per_flag=k_per_flag, - num_warps=num_warps, - num_stages=num_stages, - num_fetch_stages=num_fetch_stages, - first_stage_fetch_sms=first_stage_fetch_sms, - trace=True, - ) - torch.cuda.synchronize() - shmem.barrier() - - # Actual traced run (post-compilation, clean state) - shmem.info("Running single traced iteration...") - C.zero_() - workspace.locks.zero_() - shmem.barrier() - - all_gather_matmul_hbm_buffer( - shmem, - C, - A_sharded, - B, - config=config, - async_op=False, - workspace=workspace, - num_fetch_sms=num_fetch_sms, - k_per_flag=k_per_flag, - num_warps=num_warps, - num_stages=num_stages, - num_fetch_stages=num_fetch_stages, - first_stage_fetch_sms=first_stage_fetch_sms, - trace=True, - ) - torch.cuda.synchronize() - shmem.barrier() - - if rank == 0 and hasattr(workspace, "trace_data"): - trace_out = args.get("trace_output", "trace_gantt.png") - try: - _plot_trace(workspace.trace_data, trace_out, rank, M, N, K, num_fetch_sms) - except ImportError: - print(" (matplotlib not available -- skipping trace plot)") - except Exception as e: - print(f" (Trace plot failed: {e})") - shmem.barrier() - - # ── PyTorch baseline ───────────────────────────────────────────────── - if args["benchmark_pytorch"]: - shmem.info("Benchmarking PyTorch (all_gather_into_tensor + matmul)...") - - pt_A = torch.randn(M, K_local, dtype=datatype, device=f"cuda:{rank}") - pt_B = torch.randn(K, N, dtype=datatype, device=f"cuda:{rank}") - pt_Ag = torch.zeros(M, K, dtype=datatype, device=f"cuda:{rank}") - - for _ in range(10): - dist.all_gather_into_tensor(pt_Ag, pt_A) - _ = torch.matmul(pt_Ag, pt_B) - torch.cuda.synchronize() - dist.barrier() - - def run_pt(): - dist.all_gather_into_tensor(pt_Ag, pt_A) - _ = torch.matmul(pt_Ag, pt_B) - - total_flops = 2 * M * N * K - element_size = torch.tensor([], dtype=datatype).element_size() - total_bytes = M * K_local * element_size * (world_size - 1) - - pt_ms = iris.do_bench(run_pt, dist.barrier) - pt_tflops = (total_flops * 1e-12) / (pt_ms * 1e-3) if pt_ms > 0 else 0 - pt_bw = (total_bytes / (1024**3)) / (pt_ms * 1e-3) if pt_ms > 0 else 0 - - shmem.info( - f"PyTorch (M={M}, K_local={K_local}, K={K}, N={N}, ws={world_size}, " - f"dtype={args['datatype']}): " - f"{pt_ms:.3f} ms, {pt_tflops:.3f} TFLOPS, {pt_bw:.3f} GB/s" - ) - - if args["benchmark"]: - avg_ms = total_ms / num_experiments if num_experiments > 0 else 0 - iris_tflops = (total_flops * 1e-12) / (avg_ms * 1e-3) if avg_ms > 0 else 0 - speedup = iris_tflops / pt_tflops if pt_tflops > 0 else 0 - shmem.info(f"Speedup (HBM-Buffer / PyTorch): {speedup:.2f}x") - - shmem.barrier() - - shmem.barrier() - dist.destroy_process_group() - - -def main(): - print("Starting HBM-buffer all_gather_matmul benchmark...") - args = parse_args() - if "RANK" in os.environ or "LOCAL_RANK" in os.environ: - _worker(args) - else: - print( - "Please run with torchrun:\n" - " torchrun --nproc_per_node=N " - "benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py [OPTIONS]" - ) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/all_gather_matmul/derive_params.py b/benchmark/ops/all_gather_matmul/derive_params.py deleted file mode 100644 index cf4acd9fe..000000000 --- a/benchmark/ops/all_gather_matmul/derive_params.py +++ /dev/null @@ -1,721 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. - -""" -Parameter derivation for the HBM-buffered all_gather_matmul kernel. - -Given a problem size (M, N, K), world size, and per-link XGMI bandwidth, -derives kernel parameters that balance communication and computation in -the device-level pipeline. - -The kernel fuses all-gather with GEMM using two workgroup roles: - - Fetcher WGs: gather remote A tiles into an HBM staging buffer, - setting per-tile ready flags as data arrives. - - GEMM WGs: poll flags, then compute C += A_staged @ B tile-by-tile. - -The M dimension is split into `num_fetch_stages` pipeline stages. Each -stage's fetchers and GEMM WGs are interleaved in the launch grid so that -stage N+1's fetch overlaps with stage N's compute. - -Pipeline timeline (S stages): - |-- fetch stage 0 --|-- max(fetch, compute) * (S-1) --|-- compute last --| - -Usage: - python derive_params.py -m 131072 -n 2048 -k 16384 - python derive_params.py -m 196608 -n 2304 -k 16384 --link_bw 50 - python derive_params.py -m 196608 -n 2304 -k 16384 -v -b --trace - -When --link_bw is omitted the script automatically profiles the XGMI -link bandwidth by timing GPU-to-GPU copies across all peer pairs visible -from GPU 0. -""" - -import argparse -import math -import time - -# ── MI300X hardware defaults ────────────────────────────────────────────── -DEFAULT_NUM_CUS = 304 -DEFAULT_PEAK_TFLOPS_FP16 = 1300.0 -DEFAULT_HBM_BW_GBPS = 5300.0 -DEFAULT_L2_SIZE_BYTES = 256 * 1024 * 1024 -DEFAULT_NUM_XCDS = 8 -DEFAULT_WORLD_SIZE = 8 - -# Calibrated from MI300X trace data: the ratio of measured wall time to -# the CU-work-queue lower bound. Captures WG dispatch overhead, -# cross-XCD coherence latency, and pipeline bubble effects. -DEFAULT_SCHEDULING_FACTOR = 4.5 - - -def profile_link_bandwidth(world_size=DEFAULT_WORLD_SIZE): - """Measure per-link unidirectional XGMI bandwidth. - - Copies a 256 MB fp16 tensor from GPU 0 to every other visible GPU, - times the transfers with host-side timing after explicit device syncs, - and returns the conservative (min) per-link bandwidth. - """ - import torch - - n_gpus = torch.cuda.device_count() - if n_gpus < 2: - raise RuntimeError( - f"Need >= 2 visible GPUs for bandwidth profiling, found {n_gpus}. Pass --link_bw explicitly instead." - ) - - n_peers = min(world_size, n_gpus) - 1 - size_bytes = 256 * 1024 * 1024 - numel = size_bytes // 2 - warmup_iters = 10 - timed_iters = 40 - - print(f"\n── Link Bandwidth Profiling {'─' * 43}") - print(f" GPUs visible: {n_gpus}") - print(f" Testing: GPU 0 → GPUs 1..{n_peers}") - print(f" Transfer size: {size_bytes // (1024**2)} MB × {timed_iters} iterations\n") - - src = torch.empty(numel, dtype=torch.float16, device="cuda:0").normal_() - bandwidths = [] - - for peer in range(1, n_peers + 1): - dst = torch.empty(numel, dtype=torch.float16, device=f"cuda:{peer}") - - for _ in range(warmup_iters): - dst.copy_(src) - torch.cuda.synchronize(0) - torch.cuda.synchronize(peer) - - t_start = time.perf_counter() - for _ in range(timed_iters): - dst.copy_(src) - torch.cuda.synchronize(peer) - elapsed_s = time.perf_counter() - t_start - - bw = size_bytes * timed_iters / elapsed_s / 1e9 - bandwidths.append(bw) - print(f" GPU 0 → GPU {peer}: {bw:6.1f} GB/s") - - del dst - - del src - torch.cuda.empty_cache() - - bw_min = min(bandwidths) - bw_max = max(bandwidths) - bw_avg = sum(bandwidths) / len(bandwidths) - print(f"\n min = {bw_min:.1f} avg = {bw_avg:.1f} max = {bw_max:.1f} GB/s") - print(f" Using conservative (min): {bw_min:.1f} GB/s per link") - - return bw_min - - -# ── Tile / block size heuristics ────────────────────────────────────────── - - -def _choose_block_sizes(M, N, K, K_local): - """Heuristic tile-size selection for MI300X MFMA.""" - bk = 64 - - bm = 256 if M >= 8192 else 128 - while M % bm != 0 and bm > 64: - bm //= 2 - - if N >= 512: - bn = 256 - elif N >= 256: - bn = 256 if N % 256 == 0 else 128 - else: - bn = 128 - while N % bn != 0 and bn > 32: - bn //= 2 - - while K % bk != 0 and bk > 16: - bk //= 2 - while K_local % bk != 0 and bk > 16: - bk //= 2 - - nw = 8 if bm * bn >= 256 * 256 else 4 - return bm, bn, bk, nw - - -def _choose_k_per_flag(num_k_blocks, num_k_blocks_local, target_groups=8): - """Pick k_per_flag so that flag groups align to rank boundaries when - possible, falling back to the largest divisor near the target.""" - if num_k_blocks % num_k_blocks_local == 0: - candidate = num_k_blocks_local - groups = num_k_blocks // candidate - if groups >= 4: - return candidate - - kpf = max(1, num_k_blocks // target_groups) - while num_k_blocks % kpf != 0 and kpf > 1: - kpf -= 1 - return kpf - - -# ── Per-tile roofline model ────────────────────────────────────────────── - - -def _tile_roofline(bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size): - """Compute achievable per-CU TFLOPS from tile arithmetic intensity. - - staged_a is always >> L2, so A tiles come from HBM. B may fit in L2 - only when staged_a is small enough that reads don't evict B. - Returns (roofline_tflops, tile_intensity, ridge_point, b_in_l2). - """ - tile_flops = 2 * bm * bn * bk - a_bytes = bm * bk * dtype_bytes - b_bytes = bk * bn * dtype_bytes - - b_total = K * N * dtype_bytes - staged_a_total = M * K * dtype_bytes - # When staged_a exceeds L2, streaming GEMM reads evict B regardless - # of B's absolute size. - b_in_l2 = (staged_a_total <= l2_size) and (b_total <= l2_size) - - hbm_bytes = a_bytes + (0 if b_in_l2 else b_bytes) - intensity = tile_flops / max(hbm_bytes, 1) - - ridge = peak_tflops * 1e3 / hbm_bw_gbps - if intensity >= ridge: - roofline = peak_tflops - else: - roofline = hbm_bw_gbps * intensity / 1e3 - - return roofline, intensity, ridge, b_in_l2 - - -# ── Per-WG execution time models ──────────────────────────────────────── - - -def _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups, roofline_tflops, num_cus): - """Estimate per-WG GEMM execution time in microseconds. - - Uses the per-tile roofline to get the per-CU throughput, then applies - a calibrated overhead for memory-latency hiding and instruction - scheduling at single-WG occupancy (large tiles). - """ - total_flops = 2 * bm * bn * K - per_cu_tflops = roofline_tflops / num_cus - - # Roofline-ideal per-WG time - ideal_us = total_flops / (per_cu_tflops * 1e6) - - # Single-occupancy overhead: imperfect latency hiding, instruction - # scheduling gaps, cross-XCD coherence on staged_a reads. - # Calibrated from MI300X traces: actual/ideal ≈ 1.2-1.3. - occupancy_factor = 1.25 if bm * bn >= 256 * 256 else 1.10 - - # Flag polling: acquire-semantics atomic per flag group - flag_us = num_flag_groups * 2.5 - - return ideal_us * occupancy_factor + flag_us - - -def _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, num_fgs_per_wg): - """Estimate per-fetcher-WG execution time in microseconds. - - Each flag group fetches kpf K-blocks (each BM × BK) from one rank. - Remote data traverses XGMI; local data uses HBM. - """ - bytes_per_fg = bm * kpf * bk * dtype_bytes - remote_frac = (world_size - 1) / world_size - - # XGMI gather: raw transfer + iris.x.gather software overhead - remote_bytes = bytes_per_fg * remote_frac - gather_overhead = 1.5 - xgmi_us = remote_bytes / (link_bw * 1e3) * gather_overhead - - # HBM write to staged_a (.cg → L2/HBM, per-WG share of bandwidth) - write_bw = 15.0 # GB/s effective per fetcher WG (calibrated from traces) - write_us = bytes_per_fg / (write_bw * 1e3) - - # Read and write overlap within each tile; dominant cost + flag-store - per_fg_us = max(xgmi_us, write_us) + 5.0 - - return num_fgs_per_wg * per_fg_us - - -# ── Kernel time estimation ─────────────────────────────────────────────── - - -def _estimate_kernel_time(total_gemm_wgs, gemm_wg_us, total_fetch_wgs, fetch_wg_us, num_cus, scheduling_factor): - """Estimate kernel wall-clock time from the CU work queue model. - - total_CU_work / num_CUs gives the ideal (work-conserving) lower - bound. The scheduling_factor captures GPU dispatch overhead, - cross-XCD coherence, and pipeline bubble effects measured on MI300X. - """ - total_cu_work_us = total_gemm_wgs * gemm_wg_us + total_fetch_wgs * fetch_wg_us - - ideal_ms = total_cu_work_us / num_cus / 1e3 - estimated_ms = ideal_ms * scheduling_factor - return estimated_ms, ideal_ms - - -# ── Pipeline stage selection ───────────────────────────────────────────── - - -def _choose_fetch_stages(num_m_tiles, num_tiles_n, group_size_m, comm_time_ms, compute_time_ms, num_cus): - """Choose num_fetch_stages for good pipeline efficiency while keeping - m_per_stage divisible by group_size_m.""" - ratio = comm_time_ms / compute_time_ms if compute_time_ms > 0 else 999 - - if ratio > 1.5: - ideal_stages = 32 - elif ratio > 0.8: - ideal_stages = 16 - elif ratio > 0.3: - ideal_stages = 8 - else: - ideal_stages = 4 - - min_gemm_tiles = max(num_cus // 4, 32) - min_m_per_stage = max(group_size_m, math.ceil(min_gemm_tiles / max(num_tiles_n, 1))) - max_stages = max(1, num_m_tiles // min_m_per_stage) - num_stages = min(ideal_stages, max_stages) - num_stages = max(1, num_stages) - - m_per_stage = math.ceil(num_m_tiles / num_stages) - if m_per_stage % group_size_m != 0: - m_per_stage = ((m_per_stage + group_size_m - 1) // group_size_m) * group_size_m - num_stages = max(1, math.ceil(num_m_tiles / m_per_stage)) - - m_per_stage = math.ceil(num_m_tiles / num_stages) - return num_stages, m_per_stage - - -# ── num_fetch_sms optimisation ─────────────────────────────────────────── - - -def _choose_num_fetch_sms( - m_per_stage, - group_size_m, - num_flag_groups_k, - link_bw, - world_size, - num_cus, - bm, - bk, - kpf, - dtype_bytes, - gemm_wg_us, - gemm_tiles_per_stage, -): - """Choose num_fetch_sms for good pipeline overlap. - - Balances three constraints: - 1. Flag delivery parallelism: ≥ m_per_stage so every M-tile gets - a fetcher early (good for reducing GEMM flag-poll stalls). - 2. Link saturation: enough concurrent fetchers to use the XGMI - aggregate bandwidth. - 3. CU budget: leave enough CUs for GEMM in the first dispatch wave. - - Returns (num_fetch_sms, per-WG timing info dict). - """ - total_fg_per_stage = num_flag_groups_k * m_per_stage - - # Constraint 1: one fetcher per M-group for broad flag delivery - parallel_min = max(1, m_per_stage // group_size_m) - - # Constraint 2: enough fetchers to keep XGMI links busy - per_fg_bytes = bm * kpf * bk * dtype_bytes - per_fg_remote = per_fg_bytes * (world_size - 1) / world_size - per_fg_xgmi_us = per_fg_remote / (link_bw * 1e3) * 1.5 - per_fg_write_us = per_fg_bytes / (15.0 * 1e3) - per_fg_us = max(per_fg_xgmi_us, per_fg_write_us) + 5.0 - - # Total flag groups per stage should finish within the stage GEMM time - gemm_waves = math.ceil(gemm_tiles_per_stage / num_cus) - stage_gemm_us = gemm_waves * gemm_wg_us - if per_fg_us > 0: - balance_min = max(1, math.ceil(total_fg_per_stage * per_fg_us / stage_gemm_us)) - else: - balance_min = 1 - - nf = max(parallel_min, balance_min, 64) - nf = min(nf, num_cus // 2) - nf = max(1, nf) - - return nf - - -# ── Main derivation ────────────────────────────────────────────────────── - - -def derive(M, N, K, world_size, link_bw, num_cus, peak_tflops, hbm_bw_gbps, l2_size, scheduling_factor, dtype_bytes): - K_local = K // world_size - - # 1. Tile sizes - bm, bn, bk, nw = _choose_block_sizes(M, N, K, K_local) - gm = 4 - num_m_tiles = M // bm - num_tiles_n = math.ceil(N / bn) - num_k_blocks = K // bk - num_k_blocks_local = K_local // bk - - # 2. Per-tile roofline - roofline_tflops, intensity, ridge, b_in_l2 = _tile_roofline( - bm, bn, bk, M, K, N, dtype_bytes, peak_tflops, hbm_bw_gbps, l2_size - ) - - # 3. Communication model (link-limited) - total_remote_bytes = M * K_local * (world_size - 1) * dtype_bytes - total_link_bw = link_bw * (world_size - 1) - comm_time_ms = total_remote_bytes / (total_link_bw * 1e9) * 1e3 - - # 4. Compute model (roofline-limited) - total_flops = 2 * M * N * K - compute_time_ms = total_flops / (roofline_tflops * 1e12) * 1e3 - - ratio = comm_time_ms / compute_time_ms if compute_time_ms > 0 else 999 - - # 5. k_per_flag - kpf = _choose_k_per_flag(num_k_blocks, num_k_blocks_local) - num_flag_groups_k = num_k_blocks // kpf - - # 6. Pipeline stages - num_stages, m_per_stage = _choose_fetch_stages(num_m_tiles, num_tiles_n, gm, comm_time_ms, compute_time_ms, num_cus) - gemm_tiles_per_stage = m_per_stage * num_tiles_n - - # 7. first_stage_fetch_sms: use all CUs to fill the pipeline ASAP - fsf = num_cus - - # 8. Per-WG timing - gemm_wg_us_val = _gemm_wg_time_us(bm, bn, bk, K, num_flag_groups_k, roofline_tflops, num_cus) - - # 9. Choose num_fetch_sms - nf = _choose_num_fetch_sms( - m_per_stage, - gm, - num_flag_groups_k, - link_bw, - world_size, - num_cus, - bm, - bk, - kpf, - dtype_bytes, - gemm_wg_us_val, - gemm_tiles_per_stage, - ) - - # 10. Compute per-WG fetch times - total_fg_per_stage = num_flag_groups_k * m_per_stage - fgs_per_wg_stg0 = max(1, math.ceil(total_fg_per_stage / fsf)) - fgs_per_wg_rest = max(1, math.ceil(total_fg_per_stage / nf)) - fetch_us_stg0 = _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, fgs_per_wg_stg0) - fetch_us_rest = _fetch_wg_time_us(bm, bk, kpf, world_size, link_bw, dtype_bytes, fgs_per_wg_rest) - - # 11. Grid geometry - first_stage_size = fsf + gemm_tiles_per_stage - rest_stage_size = nf + gemm_tiles_per_stage - grid_size = first_stage_size + rest_stage_size * max(0, num_stages - 1) - total_fetch_wgs = fsf + nf * max(0, num_stages - 1) - total_gemm_wgs = gemm_tiles_per_stage * num_stages - - # 12. Kernel time estimate (CU-work model) - avg_fetch_us = fsf * fetch_us_stg0 + nf * max(0, num_stages - 1) * fetch_us_rest - avg_fetch_us /= max(total_fetch_wgs, 1) - est_kernel_ms, est_ideal_ms = _estimate_kernel_time( - total_gemm_wgs, gemm_wg_us_val, total_fetch_wgs, avg_fetch_us, num_cus, scheduling_factor - ) - - # 13. Link-limited pipeline estimate (simple model for comparison) - stage_m = m_per_stage * bm - stage_comm_ms = stage_m * K_local * (world_size - 1) * dtype_bytes / (total_link_bw * 1e9) * 1e3 - stage_compute_ms = 2 * stage_m * N * K / (roofline_tflops * 1e12) * 1e3 - startup_ms = stage_comm_ms - steady_ms = max(stage_comm_ms, stage_compute_ms) * max(0, num_stages - 1) - drain_ms = stage_compute_ms - pipeline_ms = startup_ms + steady_ms + drain_ms - sequential_ms = comm_time_ms + compute_time_ms - - # 14. Standalone GEMM estimate (rocBLAS-class efficiency for comparison) - standalone_gemm_eff = 0.30 - standalone_tflops = roofline_tflops * standalone_gemm_eff - standalone_gemm_ms = total_flops / (standalone_tflops * 1e12) * 1e3 - pytorch_est_ms = comm_time_ms + standalone_gemm_ms - - staged_a_gb = M * K * dtype_bytes / (1024**3) - - return dict( - block_size_m=bm, - block_size_n=bn, - block_size_k=bk, - group_size_m=gm, - num_warps=nw, - num_fetch_sms=nf, - k_per_flag=kpf, - num_fetch_stages=num_stages, - first_stage_fetch_sms=fsf, - # derived - K_local=K_local, - num_m_tiles=num_m_tiles, - num_tiles_n=num_tiles_n, - num_k_blocks=num_k_blocks, - num_flag_groups_k=num_flag_groups_k, - m_per_stage=m_per_stage, - gemm_tiles_per_stage=gemm_tiles_per_stage, - grid_size=grid_size, - total_fetch_wgs=total_fetch_wgs, - total_gemm_wgs=total_gemm_wgs, - # roofline - roofline_tflops=roofline_tflops, - tile_intensity=intensity, - ridge_point=ridge, - b_in_l2=b_in_l2, - # per-WG timing - gemm_wg_us=gemm_wg_us_val, - fetch_wg_us_stg0=fetch_us_stg0, - fetch_wg_us_rest=fetch_us_rest, - # estimates - total_remote_bytes=total_remote_bytes, - total_link_bw=total_link_bw, - comm_time_ms=comm_time_ms, - total_flops=total_flops, - compute_time_ms=compute_time_ms, - ratio=ratio, - stage_comm_ms=stage_comm_ms, - stage_compute_ms=stage_compute_ms, - pipeline_ms=pipeline_ms, - sequential_ms=sequential_ms, - est_kernel_ms=est_kernel_ms, - est_ideal_ms=est_ideal_ms, - standalone_gemm_ms=standalone_gemm_ms, - pytorch_est_ms=pytorch_est_ms, - staged_a_gb=staged_a_gb, - scheduling_factor=scheduling_factor, - ) - - -# ── Formatting helpers ─────────────────────────────────────────────────── - - -def _fmt_bytes(n): - if n >= 1024**3: - return f"{n / 1024**3:.2f} GB" - if n >= 1024**2: - return f"{n / 1024**2:.1f} MB" - return f"{n / 1024:.1f} KB" - - -def _fmt_flops(n): - if n >= 1e15: - return f"{n / 1e15:.2f} PFLOPs" - return f"{n / 1e12:.2f} TFLOPs" - - -def _fmt_tflops(t): - return f"{t:.0f} TFLOPS" - - -# ── Analysis output ────────────────────────────────────────────────────── - - -def print_analysis(M, N, K, world_size, link_bw, p, passthrough_args, bw_profiled=False): - K_local = p["K_local"] - dtype_bytes = 2 - bound = "COMM-BOUND" if p["ratio"] > 1.0 else "COMPUTE-BOUND" - - print("=" * 72) - print(" All-Gather Matmul HBM-Buffer — Parameter Derivation") - print("=" * 72) - - # ── Problem ─────────────────────────────────────────────────────── - print(f"\n{'Problem':>14}: C({M}, {N}) = all_gather(A_shard({M}, {K_local})) @ B({K}, {N})") - print(f"{'World size':>14}: {world_size} GPUs") - print(f"{'Dtype':>14}: fp16 ({dtype_bytes}B)") - - # ── Data sizes ──────────────────────────────────────────────────── - a_shard = M * K_local * dtype_bytes - b_size = K * N * dtype_bytes - c_size = M * N * dtype_bytes - staged = M * K * dtype_bytes - print(f"\n{'A_shard':>14}: ({M}, {K_local}) {_fmt_bytes(a_shard)}") - print(f"{'B':>14}: ({K}, {N}) {_fmt_bytes(b_size)}") - print(f"{'C':>14}: ({M}, {N}) {_fmt_bytes(c_size)}") - print(f"{'staged_a':>14}: ({M}, {K}) {_fmt_bytes(staged)}") - if staged > 4 * 1024**3: - print(f"{'':>14} *** > 4 GB: requires int64 pointer arithmetic ***") - - # ── Per-tile roofline ───────────────────────────────────────────── - print(f"\n── Roofline {'─' * 59}") - print(f"{'Tile':>14}: ({p['block_size_m']}, {p['block_size_n']}, {p['block_size_k']})") - print(f"{'Intensity':>14}: {p['tile_intensity']:.0f} FLOPs/byte {'(B in L2)' if p['b_in_l2'] else '(B from HBM)'}") - print(f"{'Ridge point':>14}: {p['ridge_point']:.0f} FLOPs/byte") - region = "COMPUTE" if p["tile_intensity"] >= p["ridge_point"] else "MEMORY" - print(f"{'Roofline':>14}: {_fmt_tflops(p['roofline_tflops'])} ({region}-bound tiles)") - - # ── Communication ───────────────────────────────────────────────── - print(f"\n── Communication {'─' * 54}") - print(f"{'Remote bytes':>14}: {_fmt_bytes(p['total_remote_bytes'])} (from {world_size - 1} peers)") - bw_src = "profiled" if bw_profiled else "user" - print( - f"{'Link BW':>14}: {link_bw:.1f} GB/s/link × {world_size - 1} links " - f"= {p['total_link_bw']:.0f} GB/s aggregate ({bw_src})" - ) - print(f"{'Comm time':>14}: {p['comm_time_ms']:.3f} ms (link-limited)") - - # ── Compute ─────────────────────────────────────────────────────── - print(f"\n── Compute {'─' * 60}") - print(f"{'Total FLOPs':>14}: {_fmt_flops(p['total_flops'])}") - print(f"{'Roofline time':>14}: {p['compute_time_ms']:.3f} ms (at {_fmt_tflops(p['roofline_tflops'])})") - print(f"{'Comm/Compute':>14}: {p['ratio']:.2f}x → {bound}") - - # ── Per-WG timing ───────────────────────────────────────────────── - print(f"\n── Per-WG Model {'─' * 55}") - print(f"{'GEMM WG':>14}: {p['gemm_wg_us']:.0f} us ({p['total_flops'] / p['total_gemm_wgs'] / 1e9:.2f} GFLOPs/WG)") - print(f"{'Fetch WG stg0':>14}: {p['fetch_wg_us_stg0']:.0f} us") - if p["num_fetch_stages"] > 1: - print(f"{'Fetch WG rest':>14}: {p['fetch_wg_us_rest']:.0f} us") - - # ── Pipeline ────────────────────────────────────────────────────── - S = p["num_fetch_stages"] - print(f"\n── Pipeline {'─' * 59}") - print(f"{'Stages (S)':>14}: {S}") - print(f"{'M tiles/stage':>14}: {p['m_per_stage']} ({p['m_per_stage'] * p['block_size_m']} rows)") - print( - f"{'GEMM WGs/stg':>14}: {p['gemm_tiles_per_stage']} ({p['m_per_stage']} m-tiles × {p['num_tiles_n']} n-tiles)" - ) - print(f"{'K flag groups':>14}: {p['num_flag_groups_k']} (k_per_flag={p['k_per_flag']})") - print(f"{'Stage comm':>14}: {p['stage_comm_ms']:.3f} ms") - print(f"{'Stage compute':>14}: {p['stage_compute_ms']:.3f} ms") - - # ── Grid ────────────────────────────────────────────────────────── - print(f"\n── Grid Layout {'─' * 56}") - print( - f"{'Stage 0':>14}: {p['first_stage_fetch_sms']} fetchers + " - f"{p['gemm_tiles_per_stage']} GEMM = " - f"{p['first_stage_fetch_sms'] + p['gemm_tiles_per_stage']} WGs" - ) - if S > 1: - print( - f"{'Stages 1..{}'.format(S - 1):>14}: {p['num_fetch_sms']} fetchers + " - f"{p['gemm_tiles_per_stage']} GEMM = " - f"{p['num_fetch_sms'] + p['gemm_tiles_per_stage']} WGs (×{S - 1})" - ) - print(f"{'Total grid':>14}: {p['grid_size']} WGs ({p['total_fetch_wgs']} fetch + {p['total_gemm_wgs']} GEMM)") - - # ── Time estimates ──────────────────────────────────────────────── - print(f"\n── Time Estimates {'─' * 53}") - print(f"{'CU-work lower':>14}: {p['est_ideal_ms']:.1f} ms (total WG time / {DEFAULT_NUM_CUS} CUs)") - print(f"{'Fused kernel':>14}: {p['est_kernel_ms']:.1f} ms (×{p['scheduling_factor']:.1f} scheduling overhead)") - est_tflops = p["total_flops"] / (p["est_kernel_ms"] * 1e-3) / 1e12 - print( - f"{'Est. TFLOPS':>14}: {est_tflops:.0f} TFLOPS ({est_tflops / p['roofline_tflops'] * 100:.0f}% of roofline)" - ) - print(f"{'':>14}") - print( - f"{'PyTorch est.':>14}: {p['pytorch_est_ms']:.1f} ms " - f"(all_gather {p['comm_time_ms']:.1f} + matmul {p['standalone_gemm_ms']:.1f})" - ) - if p["est_kernel_ms"] < p["pytorch_est_ms"]: - speedup = p["pytorch_est_ms"] / p["est_kernel_ms"] - print(f"{'Fused speedup':>14}: {speedup:.2f}x over sequential PyTorch") - else: - slowdown = p["est_kernel_ms"] / p["pytorch_est_ms"] - print(f"{'Fused speedup':>14}: {1 / slowdown:.2f}x (slower than sequential by {slowdown:.2f}x)") - - # ── Recommended parameters ──────────────────────────────────────── - print(f"\n── Recommended Kernel Parameters {'─' * 38}") - params = [ - ("block_size_m", p["block_size_m"]), - ("block_size_n", p["block_size_n"]), - ("block_size_k", p["block_size_k"]), - ("group_size_m", p["group_size_m"]), - ("num_fetch_sms", p["num_fetch_sms"]), - ("k_per_flag", p["k_per_flag"]), - ("num_warps", p["num_warps"]), - ("num_fetch_stages", p["num_fetch_stages"]), - ("first_stage_fetch_sms", p["first_stage_fetch_sms"]), - ] - for name, val in params: - print(f" --{name:30s} {val}") - - # ── Command line ────────────────────────────────────────────────── - extra = " ".join(passthrough_args) - if extra: - extra = " " + extra - cmd = ( - f"HSA_NO_SCRATCH_RECLAIM=1 torchrun --nproc_per_node {world_size} " - f"benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py " - f"-m {M} -n {N} -k {K} " - f"--block_size_m {p['block_size_m']} " - f"--block_size_n {p['block_size_n']} " - f"--block_size_k {p['block_size_k']} " - f"--group_size_m {p['group_size_m']} " - f"--num_fetch_sms {p['num_fetch_sms']} " - f"--k_per_flag {p['k_per_flag']} " - f"--num_warps {p['num_warps']} " - f"--num_fetch_stages {p['num_fetch_stages']} " - f"--first_stage_fetch_sms {p['first_stage_fetch_sms']}" - f"{extra}" - ) - print(f"\n── Command {'─' * 60}") - print(f" {cmd}") - print() - - -def main(): - parser = argparse.ArgumentParser( - description="Derive parameters for HBM-buffered all_gather_matmul.", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=__doc__, - ) - parser.add_argument("-m", type=int, required=True, help="M dimension (rows of output)") - parser.add_argument("-n", type=int, required=True, help="N dimension (cols of output)") - parser.add_argument("-k", type=int, required=True, help="K dimension (total reduction dim)") - parser.add_argument("--world_size", type=int, default=DEFAULT_WORLD_SIZE, help="Number of GPUs") - parser.add_argument( - "--link_bw", - type=float, - default=None, - help="Per-link XGMI bandwidth in GB/s (one direction). Omit to auto-profile via GPU-to-GPU copies.", - ) - parser.add_argument("--num_cus", type=int, default=DEFAULT_NUM_CUS, help="Number of compute units") - parser.add_argument("--peak_tflops", type=float, default=DEFAULT_PEAK_TFLOPS_FP16, help="Peak fp16 TFLOPS") - parser.add_argument("--hbm_bw", type=float, default=DEFAULT_HBM_BW_GBPS, help="HBM bandwidth in GB/s") - parser.add_argument( - "--scheduling_factor", - type=float, - default=DEFAULT_SCHEDULING_FACTOR, - help="CU scheduling overhead factor (calibrated from traces)", - ) - - args, passthrough = parser.parse_known_args() - - if args.k % args.world_size != 0: - parser.error(f"K ({args.k}) must be divisible by world_size ({args.world_size})") - - link_bw = args.link_bw - bw_profiled = False - if link_bw is None: - try: - link_bw = profile_link_bandwidth(args.world_size) - bw_profiled = True - except Exception as e: - print(f"\n Auto-profiling failed: {e}") - print(" Falling back to --link_bw 50 (MI300X default)\n") - link_bw = 50.0 - - p = derive( - args.m, - args.n, - args.k, - args.world_size, - link_bw, - args.num_cus, - args.peak_tflops, - args.hbm_bw, - DEFAULT_L2_SIZE_BYTES, - args.scheduling_factor, - dtype_bytes=2, - ) - - print_analysis(args.m, args.n, args.k, args.world_size, link_bw, p, passthrough, bw_profiled=bw_profiled) - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py b/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py deleted file mode 100644 index db9cc56f2..000000000 --- a/benchmark/ops/all_gather_matmul/tune_hbm_buffer.py +++ /dev/null @@ -1,634 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. - -""" -Parameter tuning script for HBM-buffered all_gather_matmul. - -Sweeps parameters around a baseline configuration, collecting traces, TFLOPs, -PyTorch baseline, and validation for every configuration. - -This script does NOT modify benchmark_hbm_buffer.py — it invokes it via -``torchrun`` as a subprocess for each parameter set. - -Usage: - # Default one-at-a-time sweep (each param varied independently): - python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py - - # Custom matrix size: - python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py -m 8192 -n 4096 -k 131072 - - # Only sweep specific parameters: - python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --params num_fetch_sms k_per_flag - - # Full cartesian product (warning: combinatorial explosion): - python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --mode full - - # Dry run — just print what would be tested: - python benchmark/ops/all_gather_matmul/tune_hbm_buffer.py --dry_run -""" - -import argparse -import json -import os -import re -import subprocess -import time -from datetime import datetime -from itertools import product -from pathlib import Path - -# ───────────────────────────────────────────────────────────────────────────── -# Baseline configuration — the centre point of every sweep. -# Edit these to match your current best-known config. -# ───────────────────────────────────────────────────────────────────────────── -BASELINE = { - "block_size_m": 256, - "block_size_n": 256, - "block_size_k": 64, - "group_size_m": 4, - "num_fetch_sms": 64, - "k_per_flag": 64, - "num_warps": 8, - "num_fetch_stages": 4, - "first_stage_fetch_sms": 304, -} - -# ───────────────────────────────────────────────────────────────────────────── -# Sweep ranges — values to try for each parameter. -# In ``oneatatime`` mode only one parameter deviates from the baseline at a -# time; in ``full`` mode the cartesian product is taken (use with care). -# ───────────────────────────────────────────────────────────────────────────── -SWEEP_RANGES = { - "block_size_m": [64, 128, 256], - "block_size_n": [64, 128, 256], - "block_size_k": [64], - "group_size_m": [1, 2, 4, 8], - "num_fetch_sms": [64, 128, 192, 256], - "k_per_flag": [16, 32, 64, 128], - "num_warps": [4, 8], - "num_fetch_stages": [2, 4, 8], - "first_stage_fetch_sms": [128, 192, 256, 304], -} - -# ───────────────────────────────────────────────────────────────────────────── -# Helpers -# ───────────────────────────────────────────────────────────────────────────── - - -def make_label(cfg): - """Short human-readable label for a config.""" - parts = [ - f"bm{cfg['block_size_m']}", - f"bn{cfg['block_size_n']}", - f"bk{cfg['block_size_k']}", - f"gm{cfg['group_size_m']}", - f"nf{cfg['num_fetch_sms']}", - f"kpf{cfg['k_per_flag']}", - f"nw{cfg['num_warps']}", - f"fs{cfg['num_fetch_stages']}", - ] - if cfg["num_fetch_stages"] > 1: - parts.append(f"fsf{cfg['first_stage_fetch_sms']}") - return "_".join(parts) - - -def validate_config(cfg, M, N, K, world_size=8): - """Return a list of error strings; empty list means valid.""" - errors = [] - K_local = K // world_size - bm, bn, bk = cfg["block_size_m"], cfg["block_size_n"], cfg["block_size_k"] - kpf = cfg["k_per_flag"] - - if M % bm != 0: - errors.append(f"M={M} not divisible by block_size_m={bm}") - if N % bn != 0: - errors.append(f"N={N} not divisible by block_size_n={bn}") - if K % bk != 0: - errors.append(f"K={K} not divisible by block_size_k={bk}") - if K_local % bk != 0: - errors.append(f"K_local={K_local} not divisible by block_size_k={bk}") - - num_k_blocks = K // bk - if num_k_blocks % kpf != 0: - errors.append(f"num_k_blocks={num_k_blocks} not divisible by k_per_flag={kpf}") - - if cfg["num_warps"] not in (1, 2, 4, 8, 16): - errors.append(f"num_warps={cfg['num_warps']} must be a power of 2 in [1..16]") - - return errors - - -def build_command(cfg, M, N, K, trace_path, nproc=8, validate=True, benchmark=True, benchmark_pytorch=False): - """Build the ``torchrun`` CLI for one configuration.""" - cmd = [ - "torchrun", - "--nproc_per_node", - str(nproc), - "benchmark/ops/all_gather_matmul/benchmark_hbm_buffer.py", - "-m", - str(M), - "-n", - str(N), - "-k", - str(K), - "--block_size_m", - str(cfg["block_size_m"]), - "--block_size_n", - str(cfg["block_size_n"]), - "--block_size_k", - str(cfg["block_size_k"]), - "--group_size_m", - str(cfg["group_size_m"]), - "--num_fetch_sms", - str(cfg["num_fetch_sms"]), - "--k_per_flag", - str(cfg["k_per_flag"]), - "--num_warps", - str(cfg["num_warps"]), - "--num_fetch_stages", - str(cfg["num_fetch_stages"]), - ] - - if cfg["num_fetch_stages"] > 1 and cfg.get("first_stage_fetch_sms") is not None: - cmd.extend(["--first_stage_fetch_sms", str(cfg["first_stage_fetch_sms"])]) - - if validate: - cmd.append("-v") - if benchmark: - cmd.append("-b") - if benchmark_pytorch: - cmd.append("--benchmark_pytorch") - - cmd.extend(["--trace", "--trace_output", trace_path]) - return cmd - - -# ── Output parsing ──────────────────────────────────────────────────────────── - -_RE_IRIS = re.compile(r"HBM-Buffer\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s") -_RE_PYTORCH = re.compile(r"PyTorch\s*\([^)]*\):\s*([\d.]+)\s*ms,\s*([\d.]+)\s*TFLOPS,\s*([\d.]+)\s*GB/s") -_RE_SPEEDUP = re.compile(r"Speedup.*?:\s*([\d.]+)x") -_RE_VALID_FAIL = re.compile(r"Validation FAILED.*?max diff:\s*([\d.eE+-]+)") - - -def parse_output(output): - """Extract metrics from benchmark stdout+stderr.""" - result = { - "iris_ms": None, - "iris_tflops": None, - "iris_bw_gbps": None, - "pytorch_ms": None, - "pytorch_tflops": None, - "pytorch_bw_gbps": None, - "validation": None, - "speedup": None, - } - - m = _RE_IRIS.search(output) - if m: - result["iris_ms"] = float(m.group(1)) - result["iris_tflops"] = float(m.group(2)) - result["iris_bw_gbps"] = float(m.group(3)) - - m = _RE_PYTORCH.search(output) - if m: - result["pytorch_ms"] = float(m.group(1)) - result["pytorch_tflops"] = float(m.group(2)) - result["pytorch_bw_gbps"] = float(m.group(3)) - - if "Validation PASSED" in output: - result["validation"] = "PASSED" - elif "Validation FAILED" in output: - fm = _RE_VALID_FAIL.search(output) - result["validation"] = f"FAILED (diff={fm.group(1)})" if fm else "FAILED" - - m = _RE_SPEEDUP.search(output) - if m: - result["speedup"] = float(m.group(1)) - - return result - - -# ── Sweep generation ────────────────────────────────────────────────────────── - - -def generate_configs(baseline, sweep_ranges, mode="oneatatime", params=None): - """ - Generate the list of configs to evaluate. - - Args: - baseline: dict of default values - sweep_ranges: dict mapping param name -> list of values - mode: "oneatatime" or "full" - params: optional list of param names to sweep (None = all) - """ - configs = [] - seen = set() - - def _add(cfg): - label = make_label(cfg) - if label not in seen: - configs.append(dict(cfg)) - seen.add(label) - - # Always include baseline first - _add(baseline) - - active_params = params if params else list(sweep_ranges.keys()) - - if mode == "oneatatime": - for param in active_params: - if param not in sweep_ranges: - print(f" WARNING: unknown param '{param}', skipping") - continue - for val in sweep_ranges[param]: - cfg = dict(baseline) - cfg[param] = val - # When num_fetch_stages == 1, first_stage_fetch_sms is irrelevant - if cfg["num_fetch_stages"] == 1: - cfg["first_stage_fetch_sms"] = cfg["num_fetch_sms"] - _add(cfg) - - elif mode == "full": - active_ranges = {p: sweep_ranges[p] for p in active_params if p in sweep_ranges} - names = list(active_ranges.keys()) - values = [active_ranges[n] for n in names] - for combo in product(*values): - cfg = dict(baseline) - for n, v in zip(names, combo): - cfg[n] = v - if cfg["num_fetch_stages"] == 1: - cfg["first_stage_fetch_sms"] = cfg["num_fetch_sms"] - _add(cfg) - - return configs - - -# ── Main ────────────────────────────────────────────────────────────────────── - - -def main(): - parser = argparse.ArgumentParser( - description="Parameter tuning for HBM-buffered all_gather_matmul.", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - # ── Matrix dimensions ──────────────────────────────────────────────── - parser.add_argument("-m", type=int, default=16384, help="M dimension") - parser.add_argument("-n", type=int, default=2048, help="N dimension") - parser.add_argument("-k", type=int, default=131072, help="K dimension (total)") - parser.add_argument("--nproc", type=int, default=8, help="Number of GPUs") - - # ── Baseline overrides (non-swept params use these values) ──────── - parser.add_argument( - "--block_size_m", type=int, default=None, help=f"Baseline block_size_m (default: {BASELINE['block_size_m']})" - ) - parser.add_argument( - "--block_size_n", type=int, default=None, help=f"Baseline block_size_n (default: {BASELINE['block_size_n']})" - ) - parser.add_argument( - "--block_size_k", type=int, default=None, help=f"Baseline block_size_k (default: {BASELINE['block_size_k']})" - ) - parser.add_argument( - "--group_size_m", type=int, default=None, help=f"Baseline group_size_m (default: {BASELINE['group_size_m']})" - ) - parser.add_argument( - "--num_fetch_sms", type=int, default=None, help=f"Baseline num_fetch_sms (default: {BASELINE['num_fetch_sms']})" - ) - parser.add_argument( - "--k_per_flag", type=int, default=None, help=f"Baseline k_per_flag (default: {BASELINE['k_per_flag']})" - ) - parser.add_argument( - "--num_warps", type=int, default=None, help=f"Baseline num_warps (default: {BASELINE['num_warps']})" - ) - parser.add_argument( - "--num_fetch_stages", - type=int, - default=None, - help=f"Baseline num_fetch_stages (default: {BASELINE['num_fetch_stages']})", - ) - parser.add_argument( - "--first_stage_fetch_sms", - type=int, - default=None, - help=f"Baseline first_stage_fetch_sms (default: {BASELINE['first_stage_fetch_sms']})", - ) - - # ── Sweep control ───────────────────────────────────────────────── - parser.add_argument( - "--mode", - choices=["oneatatime", "full"], - default="oneatatime", - help="'oneatatime' varies one param at a time; 'full' = cartesian product", - ) - parser.add_argument( - "--params", - nargs="+", - default=None, - help="Only sweep these parameters (default: all). Choices: " + ", ".join(SWEEP_RANGES.keys()), - ) - parser.add_argument("--output_dir", type=str, default=None, help="Output directory (auto-generated if unset)") - parser.add_argument("--dry_run", action="store_true", help="Print configs and exit without running") - parser.add_argument("--skip_validation", action="store_true", help="Skip validation (faster, no correctness check)") - parser.add_argument("--timeout", type=int, default=600, help="Per-config timeout in seconds (default: 600)") - - args = parser.parse_args() - M, N, K = args.m, args.n, args.k - - # Apply any CLI baseline overrides - baseline = dict(BASELINE) - for key in baseline: - cli_val = getattr(args, key, None) - if cli_val is not None: - baseline[key] = cli_val - - # Output directory - if args.output_dir: - output_dir = Path(args.output_dir) - else: - ts = datetime.now().strftime("%Y%m%d_%H%M%S") - output_dir = Path(f"benchmark/ops/all_gather_matmul/tune_results_{ts}") - output_dir.mkdir(parents=True, exist_ok=True) - trace_dir = output_dir / "traces" - trace_dir.mkdir(exist_ok=True) - - # Generate configs - configs = generate_configs(baseline, SWEEP_RANGES, mode=args.mode, params=args.params) - - # Pre-validate all configs - valid_configs = [] - skipped = [] - for cfg in configs: - errs = validate_config(cfg, M, N, K, world_size=args.nproc) - if errs: - skipped.append((cfg, errs)) - else: - valid_configs.append(cfg) - - # Banner - print(f"\n{'=' * 100}") - print(" HBM-Buffer All-Gather MatMul — Parameter Tuning") - print(f" M={M} N={N} K={K} nproc={args.nproc} mode={args.mode}") - print(f" Baseline: {make_label(baseline)}") - print(f" Configs to run: {len(valid_configs)} (skipped: {len(skipped)})") - print(f" Output dir: {output_dir}") - print(f" Validation: {'OFF' if args.skip_validation else 'ON'}") - print(f"{'=' * 100}") - - if skipped: - print(f"\n Skipped (invalid for M={M}, N={N}, K={K}):") - for cfg, errs in skipped: - print(f" {make_label(cfg)}: {'; '.join(errs)}") - - if args.dry_run: - print("\n Configs that would be run:") - for i, cfg in enumerate(valid_configs): - label = make_label(cfg) - is_baseline = cfg == baseline - tag = " [BASELINE]" if is_baseline else "" - print(f" [{i + 1:>3}] {label}{tag}") - print(f"\n Total: {len(valid_configs)} configs") - return - - # ── Run sweep ───────────────────────────────────────────────────────── - results = [] - pytorch_baseline = None - env = os.environ.copy() - env["HSA_NO_SCRATCH_RECLAIM"] = "1" - - total_start = time.time() - - for i, cfg in enumerate(valid_configs): - label = make_label(cfg) - trace_path = str(trace_dir / f"trace_{label}.png") - is_first = i == 0 - - sep = "-" * 80 - print(f"\n{sep}") - print(f"[{i + 1}/{len(valid_configs)}] {label}") - if is_first: - print(" (includes PyTorch baseline benchmark)") - print(sep) - - cmd = build_command( - cfg, - M, - N, - K, - trace_path, - nproc=args.nproc, - validate=not args.skip_validation, - benchmark=True, - benchmark_pytorch=is_first, - ) - cmd_str = " ".join(cmd) - print(f" $ HSA_NO_SCRATCH_RECLAIM=1 {cmd_str}") - - t0 = time.time() - try: - proc = subprocess.run( - cmd, - env=env, - capture_output=True, - text=True, - timeout=args.timeout, - ) - elapsed = time.time() - t0 - full_output = proc.stdout + "\n" + proc.stderr - - parsed = parse_output(full_output) - - # Capture PyTorch baseline on first run - if is_first and parsed["pytorch_tflops"] is not None: - pytorch_baseline = { - "ms": parsed["pytorch_ms"], - "tflops": parsed["pytorch_tflops"], - "bw_gbps": parsed["pytorch_bw_gbps"], - } - - trace_exists = os.path.exists(trace_path) - results.append( - { - "label": label, - "config": cfg, - "iris_ms": parsed["iris_ms"], - "iris_tflops": parsed["iris_tflops"], - "iris_bw_gbps": parsed["iris_bw_gbps"], - "validation": parsed["validation"], - "trace_path": trace_path if trace_exists else None, - "elapsed_s": round(elapsed, 1), - "returncode": proc.returncode, - } - ) - - # Print summary line - parts = [] - if parsed["iris_tflops"] is not None: - parts.append(f"{parsed['iris_tflops']:.2f} TFLOPS") - parts.append(f"{parsed['iris_ms']:.3f} ms") - if parsed["iris_bw_gbps"] is not None: - parts.append(f"{parsed['iris_bw_gbps']:.1f} GB/s") - if parsed["validation"]: - parts.append(f"valid={parsed['validation']}") - if trace_exists: - parts.append("trace=OK") - else: - parts.append("trace=MISSING") - if proc.returncode != 0: - parts.append(f"EXIT={proc.returncode}") - print(f" => {' | '.join(parts)} ({elapsed:.0f}s)") - - if is_first and pytorch_baseline: - print( - f" => PyTorch baseline: {pytorch_baseline['tflops']:.2f} TFLOPS {pytorch_baseline['ms']:.3f} ms" - ) - - # Save full log for debugging - log_path = output_dir / f"log_{label}.txt" - with open(log_path, "w") as f: - f.write(f"COMMAND: HSA_NO_SCRATCH_RECLAIM=1 {cmd_str}\n") - f.write(f"EXIT CODE: {proc.returncode}\n") - f.write(f"ELAPSED: {elapsed:.1f}s\n\n") - f.write("=== STDOUT ===\n") - f.write(proc.stdout) - f.write("\n=== STDERR ===\n") - f.write(proc.stderr) - - except subprocess.TimeoutExpired: - elapsed = time.time() - t0 - results.append( - { - "label": label, - "config": cfg, - "iris_ms": None, - "iris_tflops": None, - "iris_bw_gbps": None, - "validation": "TIMEOUT", - "trace_path": None, - "elapsed_s": round(elapsed, 1), - "returncode": -1, - } - ) - print(f" => TIMEOUT after {args.timeout}s") - - except Exception as e: - elapsed = time.time() - t0 - results.append( - { - "label": label, - "config": cfg, - "iris_ms": None, - "iris_tflops": None, - "iris_bw_gbps": None, - "validation": f"ERROR: {e}", - "trace_path": None, - "elapsed_s": round(elapsed, 1), - "returncode": -1, - } - ) - print(f" => ERROR: {e}") - - total_elapsed = time.time() - total_start - - # ── Summary table ───────────────────────────────────────────────────── - W = 130 - print(f"\n\n{'=' * W}") - print( - f" TUNING RESULTS | M={M} N={N} K={K} | nproc={args.nproc} | " - f"{len(valid_configs)} configs in {total_elapsed:.0f}s" - ) - if pytorch_baseline: - print( - f" PyTorch baseline: {pytorch_baseline['ms']:.3f} ms | " - f"{pytorch_baseline['tflops']:.2f} TFLOPS | " - f"{pytorch_baseline['bw_gbps']:.1f} GB/s" - ) - print(f"{'=' * W}") - - col_label_w = 65 - print( - f" {'#':>3} {'Configuration':<{col_label_w}} {'ms':>8} {'TFLOPS':>8} " - f"{'vs PT':>7} {'Valid':>8} {'Trace':>5}" - ) - print(f" {'-' * (W - 4)}") - - for i, r in enumerate(results): - ms_s = f"{r['iris_ms']:.3f}" if r["iris_ms"] is not None else "--" - tf_s = f"{r['iris_tflops']:.2f}" if r["iris_tflops"] is not None else "--" - - if pytorch_baseline and r["iris_tflops"] is not None and pytorch_baseline["tflops"] > 0: - vs_pt = f"{r['iris_tflops'] / pytorch_baseline['tflops']:.2f}x" - else: - vs_pt = "--" - - valid_s = (r["validation"] or "--")[:8] - trace_s = "Y" if r.get("trace_path") else "N" - - tag = ( - " *" - if ( - r["iris_tflops"] is not None - and r["iris_tflops"] - == max((x["iris_tflops"] for x in results if x["iris_tflops"] is not None), default=0) - ) - else "" - ) - - print( - f" {i + 1:>3} {r['label']:<{col_label_w}} {ms_s:>8} {tf_s:>8} " - f"{vs_pt:>7} {valid_s:>8} {trace_s:>5}{tag}" - ) - - # Best config - valid_results = [r for r in results if r["iris_tflops"] is not None] - if valid_results: - best = max(valid_results, key=lambda r: r["iris_tflops"]) - worst = min(valid_results, key=lambda r: r["iris_tflops"]) - print(f"\n {'BEST':>6}: {best['label']}") - print(f" {best['iris_ms']:.3f} ms | {best['iris_tflops']:.2f} TFLOPS | valid={best['validation']}") - if pytorch_baseline and pytorch_baseline["tflops"] > 0: - print(f" {best['iris_tflops'] / pytorch_baseline['tflops']:.2f}x vs PyTorch") - if best.get("trace_path"): - print(f" trace: {best['trace_path']}") - print(f" {'WORST':>6}: {worst['label']}") - print(f" {worst['iris_ms']:.3f} ms | {worst['iris_tflops']:.2f} TFLOPS") - if best["iris_tflops"] > 0 and worst["iris_tflops"] > 0: - print( - f" SPREAD: {best['iris_tflops'] / worst['iris_tflops']:.2f}x " - f"({worst['iris_tflops']:.2f} → {best['iris_tflops']:.2f} TFLOPS)" - ) - - print(f"{'=' * W}") - - # ── Save results JSON ───────────────────────────────────────────────── - results_path = output_dir / "results.json" - with open(results_path, "w") as f: - json.dump( - { - "meta": { - "M": M, - "N": N, - "K": K, - "nproc": args.nproc, - "mode": args.mode, - "baseline": baseline, - "sweep_ranges": SWEEP_RANGES, - "timestamp": datetime.now().isoformat(), - "total_elapsed_s": round(total_elapsed, 1), - "pytorch_baseline": pytorch_baseline, - }, - "results": results, - }, - f, - indent=2, - default=str, - ) - - print(f"\n Results JSON : {results_path}") - print(f" Trace PNGs : {trace_dir}/") - print(f" Per-run logs : {output_dir}/log_*.txt") - print() - - -if __name__ == "__main__": - main() diff --git a/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py b/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py new file mode 100644 index 000000000..f4c6a57ee --- /dev/null +++ b/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. + +"""Benchmark for HBM-buffered all-gather + GEMM (iris.ops).""" + +import torch +import iris.bench as bench +from iris.ops import FusedConfig +from iris.ops.all_gather_matmul_hbm_buffer import ( + all_gather_matmul_hbm_buffer as _all_gather_matmul_hbm_buffer, + all_gather_matmul_hbm_buffer_preamble, +) + + +@bench.register +@bench.axis("num_ranks", [2, 4, 8]) +@bench.axis("M", [1024, 4096, 16384]) +@bench.axis("N", [3584]) +@bench.axis("K", [8192]) +@bench.axis("dtype", [torch.float16]) +def bench_all_gather_matmul_hbm_buffer(state, ctx): + M, N, K = state["M"], state["N"], state["K"] + dtype = state["dtype"] + world_size = ctx.get_num_ranks() + K_local = K // world_size + + A_sharded = ctx.zeros((M, K_local), dtype=dtype) + A_sharded.fill_(1.0) + B = torch.randn((K, N), device="cuda", dtype=dtype) + C = ctx.zeros((M, N), dtype=dtype) + + config = FusedConfig() + workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) + + state.set_flops(2 * M * N * K) + state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) + + state.exec( + lambda: _all_gather_matmul_hbm_buffer(ctx, C, A_sharded, B, config=config, workspace=workspace), + preamble_fn=lambda: C.zero_(), + ) + + +if __name__ == "__main__": + bench.main() From 2c8b2264998510c4f91f24b7e2d5b06a0b8ed0ef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:02:07 +0000 Subject: [PATCH 37/60] Replace shmem with ctx in hbm_buffer kernel and tests Renames the first-argument parameter from `shmem` to `ctx` in all_gather_matmul_hbm_buffer and all_gather_matmul_hbm_buffer_preamble, and updates all call-sites in the test file accordingly, for consistency with the iris.bench framework naming convention. Agent-Logs-Url: https://github.com/ROCm/iris/sessions/e9ff52b6-90fd-4c23-9b53-c8f959aa1e9a Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/ops/all_gather_matmul_hbm_buffer.py | 40 ++++++++--------- .../ops/test_all_gather_matmul_hbm_buffer.py | 44 +++++++++---------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 2c8b80224..34e36dc62 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -240,7 +240,7 @@ def _hbm_buffer_all_gather_matmul_kernel( def all_gather_matmul_hbm_buffer_preamble( - shmem, + ctx, A_sharded: torch.Tensor, B: torch.Tensor, config: Optional[FusedConfig] = None, @@ -259,7 +259,7 @@ def all_gather_matmul_hbm_buffer_preamble( M, K_local = A_sharded.shape K, N = B.shape - world_size = shmem.get_num_ranks() + world_size = ctx.get_num_ranks() assert world_size * K_local == K assert K_local % config.block_size_k == 0 @@ -282,23 +282,23 @@ def all_gather_matmul_hbm_buffer_preamble( if staged_a_layout == "m_contiguous": # Allocate (K, M) row-major, .T gives (M, K) with stride_m=1, stride_k=M - storage = shmem.zeros((K, M), dtype=A_sharded.dtype) + storage = ctx.zeros((K, M), dtype=A_sharded.dtype) ws.aux_buffer = storage.T # (M, K) view, M-contiguous else: # Default: (M, K) row-major, stride_m=K, stride_k=1 - ws.aux_buffer = shmem.zeros((M, K), dtype=A_sharded.dtype) + ws.aux_buffer = ctx.zeros((M, K), dtype=A_sharded.dtype) - ws.locks = shmem.zeros((num_m_tiles * num_flag_groups_k,), dtype=torch.int32) + ws.locks = ctx.zeros((num_m_tiles * num_flag_groups_k,), dtype=torch.int32) buffer_mb = M * K * A_sharded.element_size() / (1024**2) sa_stride_m, sa_stride_k = ws.aux_buffer.stride() - shmem.info( + ctx.info( f"HBM buffer: staged_a=({M},{K}) [{buffer_mb:.1f} MB] " f"layout={staged_a_layout} strides=({sa_stride_m},{sa_stride_k}), " f"flags={num_m_tiles}x{num_flag_groups_k}, k_per_flag={k_per_flag}" ) - shmem.barrier() + ctx.barrier() return ws @@ -307,12 +307,12 @@ def all_gather_matmul_hbm_buffer_preamble( _WG_GEMM_WAIT = 16 -def _extract_wg_trace(shmem, grid_size, **metadata): +def _extract_wg_trace(ctx, grid_size, **metadata): """Reconstruct per-workgroup trace arrays from DeviceTracing events.""" import numpy as np - bufs = shmem.tracing.trace_buffers - n = min(shmem.tracing.trace_counter.item(), shmem.tracing.max_events) + bufs = ctx.tracing.trace_buffers + n = min(ctx.tracing.trace_counter.item(), ctx.tracing.max_events) event_ids = bufs["event_id"][:n].cpu().numpy() pids = bufs["pid"][:n].cpu().numpy() @@ -344,7 +344,7 @@ def _extract_wg_trace(shmem, grid_size, **metadata): def all_gather_matmul_hbm_buffer( - shmem, + ctx, output_tensor: torch.Tensor, A_sharded: torch.Tensor, B: torch.Tensor, @@ -376,8 +376,8 @@ def all_gather_matmul_hbm_buffer( M, K_local = A_sharded.shape K, N = B.shape - world_size = shmem.get_num_ranks() - rank = shmem.get_rank() + world_size = ctx.get_num_ranks() + rank = ctx.get_rank() assert world_size * K_local == K assert output_tensor.shape == (M, N) @@ -394,7 +394,7 @@ def all_gather_matmul_hbm_buffer( assert num_k_blocks % k_per_flag == 0 if workspace is None: - workspace = all_gather_matmul_hbm_buffer_preamble(shmem, A_sharded, B, config, k_per_flag, staged_a_layout) + workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config, k_per_flag, staged_a_layout) workspace.locks.zero_() @@ -445,10 +445,10 @@ def all_gather_matmul_hbm_buffer( if trace: max_trace_events = grid_size * 4 - if not shmem.tracing.enabled: - shmem.tracing.enable(max_events=max_trace_events) + if not ctx.tracing.enabled: + ctx.tracing.enable(max_events=max_trace_events) else: - shmem.tracing.reset() + ctx.tracing.reset() launch_kwargs = {"matrix_instr_nonkdim": 16} if num_warps is not None: @@ -476,7 +476,7 @@ def all_gather_matmul_hbm_buffer( stride_sa_m, stride_sa_k, stride_bias, - shmem.get_device_context(), + ctx.get_device_context(), rank, world_size, config.block_size_m, @@ -501,12 +501,12 @@ def all_gather_matmul_hbm_buffer( ) if not async_op: - shmem.barrier() + ctx.barrier() if trace: torch.cuda.synchronize() workspace.trace_data = _extract_wg_trace( - shmem, + ctx, grid_size, num_fetch_sms=num_fetch_sms, num_fetch_stages=num_fetch_stages, diff --git a/tests/ops/test_all_gather_matmul_hbm_buffer.py b/tests/ops/test_all_gather_matmul_hbm_buffer.py index af173ea8b..6a2ca6f9d 100644 --- a/tests/ops/test_all_gather_matmul_hbm_buffer.py +++ b/tests/ops/test_all_gather_matmul_hbm_buffer.py @@ -46,9 +46,9 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a pytest.skip("torch.distributed not initialized") heap_size = 2**33 - shmem = iris.iris(heap_size) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() + ctx = iris.iris(heap_size) + rank = ctx.get_rank() + world_size = ctx.get_num_ranks() K = K_local * world_size # Full K dimension @@ -67,14 +67,14 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a ref_output = torch.matmul(A_gathered_ref, B) torch.cuda.synchronize() - # Create shmem tensors - A_sharded_shmem = shmem.zeros((M, K_local), dtype=dtype) + # Create ctx tensors + A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) A_sharded_shmem.copy_(A_sharded) - B_shmem = shmem.zeros((K, N), dtype=dtype) + B_shmem = ctx.zeros((K, N), dtype=dtype) B_shmem.copy_(B) - output = shmem.zeros((M, N), dtype=dtype) + output = ctx.zeros((M, N), dtype=dtype) - shmem.barrier() + ctx.barrier() # Use small block sizes for small test problems config = FusedConfig( @@ -84,11 +84,11 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a ) workspace = all_gather_matmul_hbm_buffer_preamble( - shmem, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout + ctx, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout ) all_gather_matmul_hbm_buffer( - shmem, + ctx, output, A_sharded_shmem, B_shmem, @@ -99,7 +99,7 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a ) torch.cuda.synchronize() - shmem.barrier() + ctx.barrier() max_diff = (output - ref_output).abs().max().item() @@ -127,9 +127,9 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N pytest.skip("torch.distributed not initialized") heap_size = 2**33 - shmem = iris.iris(heap_size) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() + ctx = iris.iris(heap_size) + rank = ctx.get_rank() + world_size = ctx.get_num_ranks() K = K_local * world_size @@ -149,16 +149,16 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N ref_output = torch.matmul(A_gathered_ref, B) + bias[:, None] torch.cuda.synchronize() - # Create shmem tensors - A_sharded_shmem = shmem.zeros((M, K_local), dtype=dtype) + # Create ctx tensors + A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) A_sharded_shmem.copy_(A_sharded) - B_shmem = shmem.zeros((K, N), dtype=dtype) + B_shmem = ctx.zeros((K, N), dtype=dtype) B_shmem.copy_(B) - bias_shmem = shmem.zeros((M,), dtype=dtype) + bias_shmem = ctx.zeros((M,), dtype=dtype) bias_shmem.copy_(bias) - output = shmem.zeros((M, N), dtype=dtype) + output = ctx.zeros((M, N), dtype=dtype) - shmem.barrier() + ctx.barrier() config = FusedConfig( block_size_m=64, @@ -167,7 +167,7 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N ) all_gather_matmul_hbm_buffer( - shmem, + ctx, output, A_sharded_shmem, B_shmem, @@ -177,7 +177,7 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N ) torch.cuda.synchronize() - shmem.barrier() + ctx.barrier() max_diff = (output - ref_output).abs().max().item() From 1f7f6f1b66563d8d47dd8716328a259492cdce99 Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Wed, 8 Apr 2026 17:12:09 -0700 Subject: [PATCH 38/60] Updated copilot instructions: you have GPUs, use them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced the "Development Without AMD GPU" section with explicit instructions that the runner has 8x MI325X GPUs and should run benchmarks, tests, and post results. Removed Docker/Apptainer build instructions (irrelevant for the coding agent — iris is pre-installed). Co-Authored-By: Claude Opus 4.6 --- .github/copilot-instructions.md | 63 +++++++++++++-------------------- 1 file changed, 25 insertions(+), 38 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index cf5f9a0db..48399d6da 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -12,61 +12,48 @@ Iris is a Triton-based framework for Remote Memory Access (RMA) operations on AM **FOLLOW THESE INSTRUCTIONS EXACTLY. Reference these instructions first before using search or bash commands.** -## Prerequisites +## GPU Environment -- **GPU**: AMD GPUs with ROCm compatibility (tested on MI300X, MI350X & MI355X) - > **Note**: See below for instructions on development without AMD GPU access -- **ROCm/HIP Toolkit**: Required for building C++/HIP components -- **Docker/Apptainer**: Recommended for containerized development +**You have AMD GPU access.** Your runner has 8x MI325X GPUs (gfx942) with ROCm installed. Do NOT skip GPU operations or say you lack GPU access. You can and should: -## Build +- Run benchmarks and post results (charts, tables) in PR comments +- Run tests with `torchrun --nproc_per_node=N` for multi-GPU tests +- Use `rocm-smi` to verify GPU status +- Run `python -c "import torch; print(torch.cuda.device_count())"` to confirm GPU count -### Docker Development Environment (Recommended) +When asked to run a benchmark, **run it and post the output**. Do not say you cannot. + +### Running multi-GPU tests and benchmarks + +Multi-GPU tests require `torch.distributed` initialization before pytest: ```bash -# Build and start development container (takes 45-60 minutes - NEVER CANCEL) -docker compose up --build -d +# Single GPU +pytest tests/unittests/ -v --tb=short -# Attach to running container -docker attach iris-dev +# Multi-GPU (N = number of GPUs) +torchrun --nproc_per_node=N -m pytest tests/ -v --tb=short -# Install Iris in development mode -cd iris && pip install -e ".[dev]" +# Benchmarks use iris.bench framework +torchrun --nproc_per_node=8 benchmark/ops/bench_.py ``` -### Alternative Docker Setup -```bash -# Build Docker image manually -./docker/build.sh # Takes 45-60 minutes +### iris.bench framework -# Run container -./docker/run.sh +Benchmarks use the declarative `iris.bench` framework. See existing `benchmark/ops/bench_*.py` files for examples. Output includes latency, throughput, and bandwidth tables. When posting benchmark results in PR comments, format as markdown tables. -# Install Iris -cd iris && pip install -e ".[dev]" -``` +## Prerequisites -### Apptainer Setup -```bash -# Build and run Apptainer image -./apptainer/build.sh -./apptainer/run.sh +- **GPU**: AMD GPUs with ROCm compatibility (tested on MI300X, MI325X, MI350X & MI355X) +- **ROCm/HIP Toolkit**: Required for building C++/HIP components +- **Docker/Apptainer**: Recommended for containerized development -# Install Iris -pip install -e ".[dev]" -``` +## Build -### Local Development (Not Recommended) +iris is already installed in your environment via `pip install -e .` in the setup steps. You do not need to build or install anything. If you need to reinstall after modifying `setup.py` or C extensions: ```bash -# Requires ROCm/HIP toolkit installation pip install -e ".[dev]" ``` -### Development Without AMD GPU -If you don't have access to AMD GPUs, you can still contribute to the project: -- **Code Editing**: Start editing code directly in your local environment -- **CI Testing**: The project has comprehensive CI pipelines that will test your changes automatically. You can check the CI logs if your changes fail to understand what went wrong. -- **Local Validation**: Run linting and formatting locally: `ruff check . --fix && ruff format .` - ## Run ### Testing From 9999273d18f246dfb0bb05ce1f2902c6a5ebe0ac Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:24:45 +0000 Subject: [PATCH 39/60] Add benchmark comparison plots for HBM buffer vs baseline Benchmark run on 8x AMD MI325X (gfx942), float16, N=3584, K=8192. The HBM-buffered kernel delivers 2.1-3.5x lower latency and 2.1-3.6x higher TFLOPS than the baseline all_gather_matmul across 2/4/8 ranks. Agent-Logs-Url: https://github.com/ROCm/iris/sessions/0c486480-906b-44f0-b594-8375468e2a44 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- docs/benchmark-results/latency_comparison.png | Bin 0 -> 73832 bytes docs/benchmark-results/tflops_comparison.png | Bin 0 -> 75345 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/benchmark-results/latency_comparison.png create mode 100644 docs/benchmark-results/tflops_comparison.png diff --git a/docs/benchmark-results/latency_comparison.png b/docs/benchmark-results/latency_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..cef2396c2a1ee58f641489cbe47b644faecdab46 GIT binary patch literal 73832 zcmd?RcU0Ba+Ac~o#vN-4cEJWJARq`LB4DElh&1US2;!nDy$BeipdzRsy{Rl=2ChBhWFiiUQ#EUfJ;C`S7oOl)i^ z)>bF@1dj8aI=26&o!u?l)BOCF|NS@ktZhvB|JJdu!KeIuOIFL4g=NQY6-lP<=dj7vosV_|_uP?%W#D%wV^VJ?^v!hGk85VX7_ffJ^nv z>G7PB{@|^zr>ukEz z58M9x|KUIG8hNkb{$IbrQnG!W+W+>A$GjWf{BPf2)nUDY=kPyYN2&h@FJ%6WJ+nDO z^H_6>=jJ^MJ#^np&136+Hm&c@abu9-gC0??hk9TYzJ!Y zZP_99VzhYS;lqb^@!8^AEWO`Yrk$V|jE#+PtHwRqEn<}%WZ&7QFpSd6OG-G4=i8~4%&$A&nk+`I@h&;2p`qdI>Y8+Cell-y zjMfw_v$Z->T&E>fuj+lgpM(sPTRrhbs(!)nEtlfsc4cgit<|czHYPNRrMHUZMu*q0 zUkly-a?yUYT^&n7{mPYR9UUFLeSNW)0=N_07H1cF{goPg`lg5Ip<<4yk#4hEo%Sty z+P}=l21TDSw4ubDa^VsVLiLkS$B8$ z5O(jOM~@#j4Aw_C#mN00z@;i}Wo5Nt!v>W3>0F1IQU z4AKtqFRv_)FN}MvCao?FNRq2uT3+t^^5vj`fkEf(GPdF7l(6mm`r%`9=cMO?sAFOzBBnU-zda`ocH`^{+v zQDga%t4oD7%BQCD~`r^;H7^Py*%~iKNR+i_w z7e+E`8w*?=n^JT%IOPMgO7-I}v&}GDl!{hYT=S;3?vv4+=at9G{1N#H6s`Si zq*_lZIyt>NP_dw08N2Fy84ofwgmk z5;e_isr|eZQXTX>CWa&0y1Q^!&G_i3dzZV#u(h#K!HZtAb}dsZVAZDBiY zY}R)4)G5u?mBqA~jzLC_b!u7Q9_-cFsvv&(JZC*-eYA`xJ?hl$^I<0~ZBAK@b>y}^ z{ybGHsbOUFy7Q}@Nh#R+wj2fv%_D)Z4EEGCz++yS9mpZ>Do6%&uTGx@0aGcKaK zX(pzR$>ibZ=cj#MU7At~^cCH=Z(r`acdeVFPTMCQRt$YHw3ILq6CE^aABRPSmJv>A zeQQ{(FrZ;!ksRr<;!GDx%b$FVPmff%o~!OUJ0eifUFb%89UW~y)*)IQE_$M+MWeAg zSl~kKnW-mejf%zM;^L#T_U_nZzAFpNv^82ehZ0iib8~asd~~pIOP}tG8WL;}i^BWp z`~2BZWZAY>JU&|HX{cu|jm8-D|MtykX0X2abN0!mzN#SMYk%EUxc*VedpoZRdV}I^ z$=TNHp(56i#pW7T-}=L>r_pSJJ++Q)pP!#^Q(PR1k1`u-h|SGg`Sp*XMeYjvn_CVS?@#Dw% z60z-9pjo}&eoXUO{MI&=jk8$Tc3(dH+?1$ZB_zS0$EheQd%R%lh7C5FM{kPh-Msmx z*em6MV!PJ|^aZtCn?Awt6=qt|<(xM|SXXsj1+Lbjp6K~Qi{nKio8@I?-!cowuI5ix z^5CAr`SkPOc<Wb#=p2b#q^* z6)seci5=tV6ku$xk8=~{{@6|FQpJ8&U+ytA`Y*sSqrgs82w zMyjszojdu!7s=>B{Vs+^(tiH__|rFcbfOMbhl#X&3^_4VE4eyY>a!>B*5`Yv21Ti( zj2woWj7e8G71W6YY9DZ7hYX3S_N9gf{@ zF3U45*-jHimJBLy6;lFlKw|0Z1^U}vCvF^-kkH3^7;Z_6vUkEu&s%4WduYbGNp`-3 z3y}EsX)W!Z5yL)~v}6S)cFE8oiOP^S0bf-$#Wb}JtX6M6K{|JV-03=isF>CloaL5 zherfBjO9;VUblXIRdMl!$jC_2tk_)!4`2tY%E`&a&1>ga@p_lMMu7|U>~1qJFNq#|rJpkI^gB|*H>DSBl z)|>#mRn^6wZU%6i{^HG_h~@;up$hOLsB7(_BI(^okb9PS3!nJX*J1+8E(IP-!0PUv z)>=l@7;a6ECg5juGzBl&yer>%OHh&X)QtkXGcyJ?1&iT;hlfXQUS6Vcg)I{=(ucm}XI`{7)sBX)31wDgdo`VO^i&%Bk z-T(P_wIX*nXJ_Y%+FB(5=$De14h=NA?#@uC8_)m0c&%NcwCV456$ zl)PzsH$eQ;9xM5s8K}d+hei>pDzTR?$;nmp{$3jz$g7ixl^JI!ARypC@9PbXdhsF# zyG3`jJzMqGt+ySWoqaTa9T8DaRI5lmTdce^FNB~*D2zzvxA@h{?QiB6n2WYLV_n1CQlc4SnYl7YiB|D!{VK zOp}HpJreDG2cK^F?T-}nEw}l;pt$DS1VfOwR29fe*D!E1VH+D60aLuu=W;;CrRAPZ z1&;+vxgKe=WbvbsJ3BeOIsae-*(&|jVFO=XwJM8T^FDpL#%oZh8!hdlvN$`co@Y0t z(_Z-K_amX&tzm*DkENfor{J$Jj}uOTS~lO+-gR%?M!{nnD$Tb_F1-$E za~;>Yx4bZ3*koIb8a86{YGs)aWUYqkac9Q%J#5)VoN}Pr+0G$s4%g7%CHVmWA zqoSz+Ui9_$ayU9VDu$n`&-37wp%hE` zI|-hE-qH2rx+A<33WM$ ziV}35b+i7x;T?T_{nB88>k7qaXo^#5n&}7Wqc4nwC=DMTdV9+|?xDM)QqqraD%K>d z#A&L(+6VV_I=sv%edosQ$KEoU@8AEi=h2bK@>^tgmZAfV_Ol6_y`bcoMmm?iQA_Zn zXf7|zWM*%*kn}$H=TE!%`1l;sfyH(L*$4tCTtjyheX@!+oT20H5~bQ6;ZqSxy3FiDR~`*z+s|2GA*H{ zU2Ru{HFe+;I*X5Kl-tS9orf=n-^^JF-|Mw2MAD;>vWu+wP9ZVJF(E~IhKW4$onx2t z5&1o^b5A?L`R%vN%njiRtt*G&F1;a$g)3Vcs$| zHI0UPq1ClrX0KaOtA1^n&mPm=tADNCYP`FMmtiX=T$Aix)aD)^Gk3{HTqG?e;*uS=2s)(y$9#?8N4c8#V~~`T6awv39*91KLF7qB;CY z=4G8UeR1YjU?bzaeU{_TEsAKW%K-D9nVFfCb}rR81n7- zbErNFhMmXwJm~siHq^|uTCo;S^hp`J>qo2Aj>p&t6=SxAz&(`RGILk3b3`DO7~g;J zU>E8~emVezLW{8Tq%qwFrOfaJk4BBqNOp@pE%sX`H_w3i@IYjn<24l(zlo)(T3}(& zfSn-z+1{K*V?!2gV`WdqOtAEPfIPJ0XQrl3oV08=KC?Vcmj;sZ!TO3uRU(_^$<#n? zcV+53PAWs=)%kVIZj{!n%L9Y$5#1^(Do-NB9CxE`oGhu&G*yCZGdHky>zQk6DcVhM zbaON6p^98De|8|wJURu!QCL4X&ug+2@d%D-zWqxm6+L!KT5U_3fe65Z$?{5Do_$hh zdQ_LNaQt33uNClF&!nUz3IUuxA3l6I^XTv9J=Xb#+K#{}PlAJY79Zc#(_7(Bu3E82 z#IpUx=Si!+;H-k8k-ArZb_!8je1L)_s~9Kkht+6Id6x!vQ>|lxXT*zu#;i=Uh-mt3 zE%x$;|NQ3}@B-x<*a)-?5n8IvBpoCv3vh&f{K%0bsKc5%lI7gcv`|!uidbD;Eo6lS zuv@_B{)`)z>9(Cak3#RF+o7uxE;0FKufEo#)b}OBsw*u;ZCghDL$QOalNM z+P2}G{37^|rjEm{4<#j1U@li7H}v*Sd*sYR697|xM|Omqh|c=|Y}%v-%1?2}ozPe0 zmS>U#A1b9ph84|bF7zHeogVcnj7-76-nTQ1?W-QY`9 ziax743I`Mga9D?w<(QTLMF-=VMc(Yb^sF*}yQyGVF}>mWz_7Au{GJqEr58t)-;e2Y zouG^}p)1?9=sB94DzMzhs+VV{jMuEtxeMEiuXB5+1vH>w?Md0rC%UIYmcTC&T!W z9#8JBbW4F#Noh~CBbFfsu(GnoWSCS{c6RFFDo=K*uKV%Fo4w`FLrSS&Z>02|c3*l& zweC*E8cqO|AUmOWsO;%J>&hsy4xy}6m6eq>ZiZCRG>sl8`cVqqGzlG1F3;HtrBH}| zYRWGe-EqNTJhFk*xiOPnbf3C+y;OtzAT*bO; z#J6=B4vfGJN)e|`Ts#1eXjdKPMj1hUeGL5pOPzy?JeE0eo`$W<{Qm)r(Yin!gTHt~ zg(!aRtILi+tcrM~s&;l+*o>^(w#5R2Xvcd)W5OqI;3&;BtsVX@dAQEDnc7c$k5iNzE{!I71B>}XXHkh420Han`>YzUG_h$D9FoW z?=;Nx+UNd^19ZoLNTw-tkTeQ}6;g3f6SHo8K2QI1!LZn~&EC`0#r(pX`MT7qV^oF42pF)Y=OA84g24cff-_ig$;lK>z#C z^+eD~gghCyo?_eoQNZ3`69f{A)B3Tuw<@k|=nq{84|+Q;=ca%yv_rikat^3T)AX&C z;ijax*Nyq7wx~jUO9Gm2p4-8%FJ#X&+v%C`teL@JXfNFwtUEyevjC(-{r2tK&CIa` z)p#M(n!Vt2O=IW2rDTlyaYd&N3i?y!MbQ8^v!7`_&3EjW0$Nv%%8u)Q{PBm&NP1cF zI4W-9#vOdY8)|;qve&6 zs00T`1Q=^rlBpf7n zmp?*3m6DR;;^wX#TfZG#x(SyzIx8;?eRJmT_4|T7A;0i5CaNRETI_t%``+*rsW~^< z8zfGT$!CUY#4MoBd_>djX!WIQJjmN|%-Sff*=+iGT@h+El}x zyJ2GD>P$t(65=Bp$mYzQ1nPS6?AbN9)$&76Z$eQZ@<`{>`mO9)d^r#bT%ea{Y`bp7 z{m^Q$fmnt3eX&Hp^F9Cpzz$`DnZ6H&6!w@Ds#-;peK#c9IJiG6w8EuHc|5B@D4n}c zS_BJs%l5rt*vQm`i(cpbsu)mjUx;28H%IXv1B!ZP|F5QXB+}_Ts;KJf%5u}Y+t<}o zwAK461L%-jD09CY%WjHVqTZRep-T>f3{F=Zi6R0@YHF%We~9Tce7qa)o&a`c->g4R z&=NXr4E}jzRPuPkmMzT}wu&1s$he|Qrc$|uTxTtaQ|4#2lG#b^`vc8KdvyhkL7}J1 zbzHm?!WP6a8wZExjr{r77SzY~c`O-PF8}?jl9H0!;%H8@M=+RrsC#wD@6b?Vz*D&a zi<*+PR24&o1bnt`-P-c<+@HO}!-z0Y8>Q22v8AQS)RxZ{gNRJ%-Yj z*5UNkg=exSH2vD&Ys`O*wwpTOj%D2oZVcrK0!1+UM`;-uCNR=vM`xIeL=>+K;^)T& zgD1*En-f;_pWRW9ot{n6g$+F2W*Vin(Jwqa+z#Y^5ORew{GSwh8&$%ex-AipqJH{p z96G`-_n$;>FHBQ-_48X(?Zvnj7i(KuT7+th(lz&eIR(QQTE*_uc5n24eKIO+N!oww z+gCARDe6!5h+KvZ**t;$R!d(Wg`%1vv3JA9jdY?e?~a{?s>9Q+$ z^l7(+teIuN5G!8KjLscc<8fXU+ql*0Ahx8;PYonNv(xg8-_IuQG9CC>nZDf{^x!P? z+??&V8{a5jux!t&nAv&aw;em;foHf5A3ki*;+i!1C3s~xDJ_{-1~zS{O4R|1WZFmV zB&}SV*lc0Wp&JLZhI1_#84a(cyjfvr#qMNx`t-vCPtQwvS|3h8t_F50KP|!kc)~(w zEEj9jH>eF(CUdSH`r-`C912*6pOtAHztxS}r z(<5WF@n*1vk+^KlV`gm`u^{cq%ZqbNRNlvr9@RtMziIYJA+PQIO*463UqX)zt9XfK zObB>v>&6CBIJ&36Vt0!L$E< zcPd3Uw?S=_8#Ec()!E5Pz%nP=vffhOWiKw zwlKYShx-8a?%lh`wkgF@e+r2RioTkQ;t&l_HiqHXg-+d=?>wc;f65$ccIPTLG3MvL zeJcm%O6^k|x}kji22}IusMV!1yQMSW1H;f*^QKz6cCgO69z!(c{bLeMxvTRFpabM$ z12lvN^m;rnK0I#)ycSi|%g(hC$t!usbJ$MZ`fFO^%$bfcjcL^J>RY?QM69pEZXqy~ zS1(TuMX?cxD5WSe$F1C7he4i%dHPvyM&PD~mGdpsfFS&SM)pFwCF3MrXqUbE1k_FJ z;G?w9rh|X1XFpz$)5*tJ-%9NaXqD*Li;@VK8UrEi3gCDHq|cP{a#{3}w&h|Wk*6ge zsXc{@V<)nyer*mNHIvLxLs#}EyH1pjosreCIB0T*y#tjN1t6TFDLwGYQu(#zD-ctO zhe1}`KR_5z^vhk8fM0PeoA3GatWGCz%`o43^jWj__kVVoDA}Pn)oZ-EJRrF$kgY&Z zzb-=p7C^bf?b%6a67gV9ttAv1zv{;G^g<*9%-O7WN=J&v*xT4-@bu;fFmGRedO-)O zU`~RtCxDiAOO*bcTSK+iBIRl3?Vi$#vND-huU@%)`|Krb^x;1#+S!$ANB#AvdV(gg zI*rpYqbfC2om65)!9|Q~elFOM?`+$YW?;}zz1|UOaiqO2T0myg`Kwp|wNJ5Sg~hK3 zZ@K_WO&I{nw$HYfC)Z7(8(|GI*dWv%xyAiW)Z=x5(PN7$bGDGs6bC-Qy2|KS@F_78 z*cOtH@JO@kdo(0Q_Sw)q7PJkDIuFD2;10*V?-00Ns<~R@&Bu#p#8-U$MR<5JFtXXi zmy+hdY+&eVbaUe&SH6=cucD3!Y;$Y`N|s!n_AlQ45{76jU==ZANCQ1{=1lMScpA{5 zCJ93%d^^a=$yxmVBRD0)UXe#`ac2}p4xfF=`FH1 zN1)hZmDZ$Y3&<=A^JkegRlblh&fbM-BJ^kuJkryjmF_8WV34pf@+M_NPsCfBmsG zW!)gmAXQ`I7(U&1FHvY(T_PlG`=20&W1hF-Hd%3`9g>Q$@A`drj>|_GxQ7Z{mk!)O zvD9(P5720Dei|Fb>wut>3eVN#D^T*@zkg4l7r^IDlx<>e#DsPJTr^R-$3b$bJOQOz z$73mA7oNKWyujEQyj?0ud~c{h4z zouARC>Sq6zCaI_dp2t|P!zd*hFgmZM>07dOL2XmmchdNGI67 zpcz-1<-GBYaeL?V`TaU685u@4`#ey-X#(fkioz0+p&LPVzyA7b&R(6X_|YnDU2zS@~Le1>dwvu-|j7-@YzmTy`Ut_*aO zvW~T(`aRK~;#>`)>WgOq*=XDA8mMLg#03-_Ic@gs>*t{w9nIFrT3_uW8OwX=+tXMB z&eSP0?GjSDV&^0*H~KpBTJoro(uqerY1!!uQJjl|P_a@!sxN-8ijY9iDpGI z64(j7Jw1mI$!mhZLE+TOkh=6E}unYJHea+*KA7@*0I8T)LqnpKcb=x;szi#n&wqi4LLhOUpZFR zg+!0)F`)lgti>BH*8+B|V=pW$bOFePA8W}L7#w`l~akv<+8si1bdMDj&K%DEpLMTtP|?C4`QusMt-xqIJ04 z1<+0cisW5lKWG9WY0w--S%3NET0}oWbXr#8Df`hgTE(Ynpx}WtDMf~fR=c4n@8yr& z(5b}n3>{K--e^4kQ0;B^&*&fn)9i;V|CW)~b42y4YY#O@B=1|hCS!Eiu`|#9G+0>R zms^eEX?O&ldK*nzh)>DA)6a_fF=!7A|L}w7{MZ(ry9gUS1 zxVS8IRttO8z{=Uv<8OF$K*rA~3i+1q$U9wk>FAiC9!g67*cHW8sE1K4&H>+6`VSR_I9#5sNKluvbOlgM5rUJ z*%GIqaBtov$kFLcwdxE_aof$xYFv2&S2*(60)nKaOH{pSs1Bf|@SShB$RdYwKnKOO z|Mb@nK6^zQ+Qlsrr?t&GsFq>cR13-iAMCmy_oaz41#g5U7Vm}M5rJC8YWhzxU&B~0 zKVG<)dj9 z>-;8@y(1g>9;vtiWb8UQ?=d@~Re+p}^KbI*8J#lxMN1h7S6Giy4`p5#D4JEe+PZS# zl%Qb4v^$#m{7%KO$|2!HNsAS;XPky&g9Jxc4vKiVSuWlxojq__np6cM!E~p=0c6#I zXb`98+kbzwxGPIfp}$nP8(R5Xmus57$Fe<1MiebgdS4DdWjK72%Thyux;TlVUo{;X zNFUp5EAPJ7iwLNMf4Z%L@rUL4+Su3#+E?`*Ek)@$M{-yMjROFxR{dNA$@SIPqU>4tVFzpk)Hqa$3N~BQri=?GUH~(5%+1^CKsC*&>W*e zC^z_8#pE=#>@t#hf$U$q9>Z}YhM?NrPsnR7)Rc2EMrG5~6GtisU4DbHsS*D9jZ`i` zd;<35>DyoK+qV)*g|Z)7D8E-L&yJEOrHJ6JLkY!KTTUn6DSfE**6rK%BwPclA+)&& zDwiE?(!4V-JE8SDpv11yh#e|41%-fFnn-~mWuPL>H7}e92ztK;Sa#i5?4mxio*t}^ zTNZYi9@O=j@K{}pnmJQ-N!bE2RQ8^E9p|>~6(wb5Iy80L57b{|1f%>R;+)8SW=3>$ zq3pdJA+-rvo$T@r06(LFhGNHZynCm<=uXefG(p)I)y`T(V0ARF?6N&}jKwG&Q675k z^~I1&vr&fn9&x+c@|`oAQ}xW<3=-A2y!7~2rs+y%^f;e{bzc$B(9670ongNDq&PK& zC9c2}OZIH@_iX&FjaV2aE|O3{Q7`w1SV60mXdpjv_IZf-blrtGk1X(vOAvw8fv`}u zg-|_6TBvj0GRGFBy4m9=WIwd=u&by4NU`MEffkn6R9*fXWBV^8Cx8YRy3)qBXAfbs z6!gJF-CUc<56`PpmD3bPqGFv%wD)TkOp13)(ryxMt zFE7}Wy`ha4=o{z(-JbsEy`?ih*|=1{?Np*UjypJw5%Dn2>xc3DDc>6(gAGGZB9#Pf zuL-7|8i`~6*&04vAMG`?j8aR|9a{TN?)H2IcJKyB!J$)Lwm-FGQ}OX-6`IUbcIU~L zhxqs|^A}9}K{zzp&AxlL*|OM?MQcbnE&!3j9xHdC-8L++Fph5;?WDCdHp#YIxm%=B z@PWY7NeWw+pkHN!MHhU1ztOpQ_ce#!mfOM9fHwels^_Xv*Q{F}^kQK)=GmJAaw~$B z3AuGamCxa+<4W%IgwDaR7aM{_)S`Dq8tUO2LjBMvi%9B6WB-dF5)gy%N?Y?FYarZs zHR3Bc`Vc73C0W@zu9XgbfSrW;~YZ25>)%y_Sz$4zEK&G1?udZ>~jcd8fmb z56sqRSib2~Q)RBVsLafr4*8ytrlwUH6A+a=-TQb;O2 zRKhK9Mog5GQwou^Bp625Tr|gfdt<;gT7?j(T_p90{6(yq3_Zt&2e~{&q;{%(mFJBrxFWCl|1s*;uLn#EcZfE$}Y(V#3Z+L+$U%BybhDlX~eBK!%M8sz=N(f5Ah8t3C)}F*o*919gy+u8CB$_lOmyb~mO zOnDd3ki%3EEV``DU6xgdQH}J^O-u;j-hBT0>ue8z3^Ed>_$<25A@wx<0tNaW75QlA(t!aNwIRw0O-XJ9d>BrE$+ z?+#X%LsUpnQ4vXIpFx&4_qTZb29)X(XZR%+M^4ZPX%axJYQ$6Qw6vhkDmaYb@-e~d zv@PT&(?ueONW|FKI>zoIOFsvcF10LeT*$^sU5)MH*wy#OS-ho z7Q3WAQJu$&IX5$tt?$UO3oLAlBueidv2f6m<(&MYy_R@H{`RZDq71#YoVy%5?Sm-0 zMB6+_s>R0^Lk4Tstl0#0m1cs=%#5qe08)3LpAr?_g-ByaBqJmujXL1g-m{-);L?o& zoy)$($(Ixqgdo#1Z;5x`neyqJ#&3`i71 zLS|2^l9JD0z?)EL@o%baby(hI>3zsIRpW;rejqu2Wd!q!OG-+t^05kux=FX`a_ZSk z8^S}|1O(|7b?!z>^>pgIR_^NYC;3IxuE!9Rf&)dcg=my6_eD#(5O|qmUKN5U;h#~9 zLftW{(>y^fbG$nKd@tKZszs41KonU5(dgy~FZezAVeL=Gl;x0=s#)l@jO!dcb1KxT z%VIU)fJezJir9x)dx)_+2`8LSLWU`{)F|E9X*C_W8~PFg-a^&mJw4ao(P(12tB{^Y zsN>N21r=2hEFmux)6djC*iRRcC4qi6eX9B1Pea-ShN;()FLN53&2wY?OTx% z8Z~E!DVN!#QOC3GuA#Wl>t8?p9q0V}@4x%JeS6%iMVDbn)Su@-_@R;_lhIF7eXx?D zahm3}%;X!nUWPip8-@DvT<8gl*5mXg>fG;sPmmNjz2xq1tbo9l&?-cZk(!~#Q+qr2 z{j_e~E|k!k2CB)7O9b7ve)dFGrvX%K8z&{}MXmlg^w`eN1LK1dN`C2p^@!JEDx-Od zYybYgtRk}p7(_M>6|v+CTaf&ZOql~DgpnHI;^QN}k~?+;ZAzOuq7-TOO|&sdqt-43 zvm<9`>(+dCOuu!nuEgW^=x0)4XnAIAvyIWxw~4sG4KZOwr-~d#LlJBhb{Ns5 zzso8r=R-*ButM;)oR5tL6-DS&*q6$l%%=|@KEsNkXts~-cw(3LdxdXXrYR%4-DsyL zh{d(JsK%=5d4dlK%j5C4az9C?1u*ZVka7g<`gVp)I>p6BT3Cb1HvJ$mp;+)J(3nvP!nUxc~~9fdPG~E%L1}4ajYS6 zw4^dP;Bdk{(?Fy~WV1(KfX0aShWy(-o@nDpEK+91yCu_#PjZ!;Cz1GXSYGe##uC6P z6__%{c~u(`S;0_^=hQ$DFfi0rBy8jzS_bgH=184~p0+pp<3peh_JbXy60ejFVjV$X zOlS7jcuiL(5*+Ily3T*(~sOjOX-j4K;Am#zpW7 ze4q^TdEIt&Y#}zq-n|w(yhfJ|iQA6yqFYp3#dOobL<^B&`EtUJ8{Pv>n98 z0`Q|lvE}$wczF0Uc0-Gyw>iK*_Lil$M?yoSH3F!zB7u#o=UN@=+hXN+K(2Ui=y-=( z25_VjNIR}T681My<(uc$xmiK1V>S6#tOHmk_Q!BrW<1I?4`dJ|qqX1^)&s95!c<_j zdw3OiK%3@}Qi|+EsIYlub+saVU;;Y9@XcFFo?AK|#pmaf_6|m^i_ppB#Dp?R8lKNF zn$^C&3BWptTg1q)J6}Wzci4L0#ve(e!j%PcVy_c#@V~Ow7}T1anj+!yY0M1`0~2MR z9585v*JegAYLAbITDF-w$%;dZAh~tW0RV8%1yYVqf(tymxkZFt0e>%f)BWt6b=9osXDFIj5 zhA$BEBUxhNRU-O}rx1u>h3VYa!|_W?OIgT3;l*8oq+T_22Y9oKqrf1LRQ<-2tsga2W53I1;7g z&__`Eo+35Snr!BK>Ad;l{4*|X!05!l#pZZM?hvtkh@Qh>jLmQmUqOxM*O};DkCu;Q z60t}Lp(OSU&1?Qa2oie$1dYjBCkp2K!%pA%_O|$*2nq>_!jYIK3}E7}Vu>XHVGvmW z*t_SB5A9`I3YC~e%x=I_lK4m-cN}du8MV>M)B5olb)%q+MVZG`waAuzHT~=HLU8-U zXeINqs!HOqfcYvUseyFyMvk~}$QU8G>NrS3JdhW$SZx5GI-`(3(FN(3j|{Ix(~Bp_ ziaaS?K2?A5&Ye4I2s$GdZA;Q!af)F#hEp4qTilAhw&w7ZKaath8Ag0QE9_)8PZ8lZ zj{cyHHt)ihJ|W@&o5>tH>EnkFzrXOlK2E%FOk^;T-=OxhVx^v_4nu>^eT7v@egUW( zd7MgE{kS@1g!CXlM`PFM>QF7x8JFeaQeQxuzo}stsDrl`5Sx0`R$fyxxNu>Zhr|I1 z7Q6T7pD7R0P^Z*EeudDoLC`elaf+Ed4qWJWmm|+45V;Krn5!TGP&KbFOWXtpB3ppC zlw^docqbH>G{}gma5!I)Yk(450lOR9g|L4_riEddE@Ln*cf~EpV895rGU8L*omiG} zpRuRu&@MulOj}ekkhV0aQe;Pv0arAPl*L&60v9qdLvjdkYvB^TXmrPlQ0O=6&(QS-eZ!*Ygzo!AT@wg^(Y(wK8JRl#})3U^=VA){*&)>om@8{TY)Xb zAqR_(jvw1_>L8xjYoG6S@A)z4@JJ5DVQi$-goEgxgRe6h&4dIj`})X0yw-CM(#LHA>=jDU{DB177W4^FrYdn9!M90 z6pgR~&)}6G&Y~o2*u1$3;El+*tahjV`7E4`aDV&%`{aYm>)8JN(|`ZP%KCrxbcFx8 znE%ViH2nYULPT#tjl)C_V&wO>kTW;V9+2uH&tN}KIUCE%PgjsBFW+8#JjoWL( zsrjzn&G^<|qXS4&lC(LQ@*{Z);4PA&`CaMEE4Wyu<{`aXSnejj-Pz`rx0&T=GepuP zfGf>z4QzcRo*N(+5UecM069Ab)mIQIUEG)+BcyX>s297>aN2D2nA{E3teA&el z5o=N>c~FQyy<=mk(9-0Hi=0{D3%}eP_EI~I(BRAvBBuWI!r`bB3&45e8=$aZF{T7U4XPeftFGD|cK@8mlF@3igbv98O z4xOqa`a3e1qbaTtx3vsB`9c!DJE&kL<+}=6-8@pXG5R3Go;d>H7&8zW)(6Bja=p1_FOStU$m6 z3GgC$B&yLYH^#yuOHOM!OGRY-JUa4_FXn>^IEK(%P~d8^XYzo1q2&9;;6B@YBppVA zNsA-E0;&>;5G1I?l4SBG@7#ZfKr!Ee?BgZk!6DI1o-R~evkwn{Me%RvEfd3R2~t%F zI5vbhfA&sjYUEjBny(VBDMkPiue`daoptjSo*onKPEI}_Zt3N5c?E^<<6StV11eqb zP+s{SeE;t2J}70)d`$e38zNl@nRy*Ke7L{er2pyKl*1l<#Aci9xQ7 zlpjv|FcGmDX-y|#C35TsX4**5gixFt&REp}pVMT~5@Uh#xKE$fMza4oh_Wx6*mrZC zyjrlu%)JAcW0--9o`eZCjBdvY|4h3xpFy}Cq*!@`(Gv9w@(9L-i$limfV`BEV{JrV z&{@1juKxT8f?7=k`jXTSHeLDD%yZ(0u}H}PvT>`uc}b5~GKW%1QY4f+Fc3h2c!q9l z-Ie>O;|G@g^^h#~Km8acqU1-0LIaK}A8ZNSgUB*K*(JZRJSHD);;Hy-FrpZ*u_&O+TA)h`2-Rkl_Lr}&%t;Dcv|fQBPt<3!rN|0GCnInd(S`^L340NwMdU2N z^P9k<%@?Kp$WbpO@tlX5yp;6Oj$DY_5e>vRM`W-9;}mRl4Urk z1ws1-L_CRz%zTT=A`YR^F9rpHtN0X*zEp(d&42{S-O50?DnJq7 zzM{aU%t+U<`d1gcgh1amjnl2y<*sGMc}CdbVnpxN0MDd{26d7E0)AxkEsg6f z4fP=pWFZZPnwV1axqsRXJ7MIIjV^-Tka%>;+!UF4fM!n$D*=6&K~a^JE$8Kr!ft%_ zYu?wjEWM9$2!;5!Pk**&nm$E!CNWPf5Z>~7K`4&N=%|FrEublo6BaQ0D~Gv;I(hyA zW07S@S}Bh{{@ZxTh{dBvj}YmC!0Q)-FT0kIF8&iLVFQYbNx$jfq33WUpsEfX7W5;- zE0CPuz^S{02|~gSkQgRA8caR=bRP?ONxxv-O!6w}U%dS664J6GV;x;245>TpeKVhh^FDy__5(6A_1= zo_URxPEtc;TfpOwOi6fFPQ)&o@83X* zlK8K2Q}56-F(E`E?xbX3@LArE#B|R1kb~07l~B(d(hzc%_k&oLTns7kIS=#DKb<@9 z-Me?klw1+Q($(Qd71+NGXQ$n@K=W(7{!}~;T0$ZQsYvG}GYtbwH|SGP{$fBY%}_;~ zKPM=*N=r+(S7Rm+ld!=9>E!B&a6|eKoN-gGS8_?%a(*!gd-O{=;fl@{k~q<(;#=QB zMQ!6aaPuEdCu2Y6Qmww@nwNvJqZ#AqbtDH+Oo33%DTkE7jnvdI(t?N!ULFVaQGBLR zRlD#`E@YT2O!PqK5jJRcBWF9{NC1D10z{viKSQ?iM60+qVbur<3DVa(Mo|vYU99&=Hd$n24`@ z>j45TO72P3FGwTydM zeT3@uaZUuW4ao6bB<6*}+5q^VhDlc+HnEf4Mg-tnY5co7KjEMX)E{j$TM}bJxGxrP zCK0+W#z)C{4;*qtn}b+j>$VIm&O!)cn+?2tW;gJ23zAk1Za~;vEiDEB0ni%H`3c?l zxRd!6!iG6;4%wklkjR`K8qXziRs?b&sb$ZfH{LJB^Z-fM5}R1A0dJXvbA>J2)yP>B z1PM&`%8^sJtn(*4-7!ao&fjaEjLZLP^x;2E?KDw!0Fu9lT_MYm2{5D`5}-2^WJ+>i zmdac?qyrp-A+(O|zqmC%1YfK_YU929FJJ#(K6KEAYmmk22!vf^1FnGkY&gw|D}PxB zSQ~?GiE|8c?``4iCijXFJk{{2p9oCDu1^f_-;#AUz#GA-7s0`rb%x|GNS(*QO!&@* z(@)l%!wLl@P`q8UhXZvMyf4WnRIF}|I^U(`b>%pZYr`q{&@3;%lJLMeuPfKU3Q+)) zxoVvMy!g-0s$aHU|9|9J2Fjl!S9CY|#HI_LhOBlw@3LWD*}FvL`)zz$J>+=SP>Ni` z6{lw+FOOG^cl_~>@yLGuqg zevtYZWs~LX-*V*GMT{$khwGBRURnEJ*E9mQON1zx!UR89TU(=7ZL`z78}Z@=E18Z1 zdm+652$mexcy9|@=gZdpj%Ss>zoIK|kw?L&9{X2M2Wwi#_S>f`?gRgIp)k|^V&EU5 zqh;KU{-I%j)JAEHQYE3dpP%}r_s^{obr6q{&UGi;kX#H(3M)H%DyUWh^vt*+aOD)J znK86{%pRKKXa#4slVqCn04_s0*cNZ&?w9?-jz{6d<4Bf`r*`jmn~xA98Uf1{*?)3K zvVK^Q_pgS$80Y(^cDkcP_nOdM4n$OPND?Q}1nLUrx{`1f6ZgS`ug~`Ij1h*6dVTc3fpd^3 z8?bd@za`^jg$kTEXSD$PA++o1UQrdo%P^$Ki4me>DVgA#TfFv(yQmRSLqXwViZ!Nq z@A{B6$;pYI)B+T=v3cF|$9pR<7lO_~%2Gf9G9wgiI`+h#W-J7#)^^*i}05H7K?tVaumoN8| z*7fK5t z`M&uicSR^Cyb*}9j|=d}vVP>WGh$7oPZ7dnW%ujdUdM!>@v@ zx_p`p_qNdJj=$jrsx0IdV|i`V8+(>l0MDWMTbQuw zg;&4i4`=*qoZ>cT>es$4efR#%zRjTS?cO)}@rR*TP_Ncjkc;|wGl;+6nIzQKaLk?{Q&9>*ENriW@1d)6yeRm6T}jgH~qG_R1itzcV` z{NA%zg^G`}I?pEeXL)GyJ$PgOHRc8DjBdJ+hRyoPpJL*_?w!D6D?fSU$PQ6au^qk| z60A1DH|x&ATWmIHlW)LpBImL`@bcn;{RS10WY&J#%KkkdU=ofLB@ynCG~kA(gUB*D zgDMn9e{^OctfGdTLUI)$4&<03Y*5qPOz=iLSs`p%=38)wDx>tHpbt1G;0hwxFs!1X z<1j-^BbCvhicLrvBsu}w6YM(3TJj`RsrBu{r-vX$DAU}KU+sn(j#dA@dpEb-<0r>- zQwmX0j34-WXNHGIY^deg_c*tl?RzJ?#ncTC-_Xg!|6Jsmi0tVM+JDD+-(}45Jn#E_-uL&<@ALaUAM;4{y}!$KUFUfo$8jE) zKafRWqSPTpCJ-Q4jBdxG(-w01tFRk6>F12vsCgxf|W?@&!!{PJ3 zlRAfJO>$TX2PD^IcmOy&=I5&n6cYPDO5UZWEEI!!l5VxtNU)R9=1*PdmjHemq3kC2 z3k;KT_!GdU%wC(4?|%`EptGw)PARXh4NTcs-C-bVM7#_`1R| z067lKvf5x@q(Zs|U~S+bjR(5w$^Q;A3MvB^Watpq!#nLy@0w$`efb-R0Z23fhqp+2 z?c$crt$^9oa1NS$WcEx*2@<++L>amp)(21s$AN6)h zf6T6tBg~u8hG>02EEIXJiTMGL7#){FR_h_htvC+JBFVcjI(isxwVDW1R0~suPrT#L zKi_~$Guk|vl%@Nu;R0P|v(gh7( zI1PLOwFA%4`F78vs(^=6^tT#KvKc0nF;8*mmoq$n=@YSXyWVJ%l+1DN8y+ue-Tb0JF8o+Tq)ZS%u)= zhX&j>j02lDP?t2FCe~M}?uO(|ZZ+7S!&h+G_=W5uC;YoW8PJNk*S?cFTv8Z%hH3){ zBiEwxz>BK@(zBXLNIQn{J%oyO;3w2llY+)X^4Zz8ttzlfTDB= z85+JLMEE0%0UZq*4RqkX=1x=q_4Xb_fLj3Mmmpda!=M8#1n;z|P5&8G&=i9JPbbhB zvBXj|AwmGg6Qma+)-6AO-GMi_0iKzgEoN}*)Xj~v8)vr(CQefYb$*^4S17{*ER5x+ zl5j~RsgTm_#|pPuK?d%gkfs5~rQwRDH~(!s-gwVH_w7^O;d;!gHh~|FaEQ?HSpz*i z%DCFT5_u6r$fsbu0Mwlfe6hH}2sjStP&lITIm|e+Gi}8mQK2O0jfQ~0Bx8D#j|r8D za5xJlGY!a$yj@*gt!n^b8RD5!J0AWT0!ayZC?NZ&_-WqC577KHwfBeWmjp8r6}+G; z2*py=ibR(z*kPbVQW#_(jq?2TCLbuXShFO)^r&-lt$Vea^GF(1OUAo@i;d4IJe$L3 z_Pu$${>E<}3A8RxT!{ti>#x5iszePvflRTvABZYK4LJlM4`EDiKSWGZ1xB3q_MS3} zOjZfj62D9ejS3o27HhE>a|j4R#}yU(AChN7lwq&Bg#Fe{>__=Zr%#KU)+Xlq83caJ z$vfj8xvfEFQHv?!bh=2AJvn7LB zKo7RMt8BkAuTj0BKTD@eNqE~KzBJR7_@SP4wgQPlN;K`U?uPD@VvljZ3TsWaykB_bB7>Mx0S1cilunJmIwRqo(UH|1T7CDyHdn=ie^`Re<~fzf6S5qc>Z{#j9Uk|BkMMbM&a(aGsf_SGZ*HrQ=z5%fv9RVNYzL)1pxu&Yh<+xy8vS(b^!G4rkP zOBhlSBM1!0_IHmPe+2RZl9Mhjf)kB6mCd-*GAxkXf{Kub$%5*BAC>s=H*h-uBIH?E z;0@2u+`SX6CInN5t$;rw5 zhR*ltkX>O}yFiIKUipQpm6eru==E#YycoE_7R~*ieun-sGoUung@E)wC4>H06P>HMJIt^jje3uM5P~hhw6Tm{`WtFrl63qQtVc{ZCxX5LadLQI}p|`9oyh3jDG4E7Z;w37cYWA)j=b94cnRh{mPXvJr4r(9)YZy z6%6QW_7c&(L`#ry1cY^h5)y~e#Z>|9ie3kZFhz)Cr)BE?;dAi*P0p`{{S>MXC9JR{ zP$fv7_-W)SeW)WyyGvr%=+a@lcf=X;VWGr=LXp<%zj8V{u6zHdz&BIjbry(aO z7@_nK(Qww#NkM8W_&^LfiVgz)K=OivSVevZ8lZKWy1PXnU&C7Q1&14%7Pg^zb|c^k zPClTyqs!$rR;Ap9;Q%G>BA)BnokKQ(Oh|a{O2P-I^TDX1UHbO5?HYs+_{=J)P43J;ajg}8(VYNV2 z4?lagm&Nj!->1M{z3H2K*(U_xF$kJvWm~hzDl$|7J068*J<7r+b-pL3Aya-Yu0&1SNW ze|vemy?RuDy<)85#jssTk3D&%Hk_QEzH3S2ob>A)M7h=_DJklf#p%rs84*D0>P5Cd zvwsf>j3MeIVkzNHZMcM35UMPZ&L+s88pG}dC|Tqw4I(L&mRk$_l|7LO^f>}CAzqR6 zK$HzZKIELAzaOPhVa3yu)f*SjM%hGV~1sK@o4cZ+jpD@JA`2gAw!#ZS~B-sKb-?VQ%a3Clb zeXM$YRv3m<>Vt44|MBRrYY;@JGQ_SWZygq1*^Q)79jSFp{<6(_SFP)i5=Q4^`uKlm z*Eo+XLm}=VyazBehIn{RSR1tY*so}%5ZngJc)Q60jkc6Ql~wF@2(T~om8*dyn+(i-5x8S2 zQtf?^50s$uoF)iU85c_Kfut1%#i)+Zk`mXCMPHsp&XMH&PMR@dTA^n$tbbrlZcy}& zTotfhb!CuxnQn||ETpo9>UFqy+2eAPMa#7GN1YRpv&+u6KpyrEESiD#a7EiJ$St&5 zjEzD5!#gMc=9LR5U?Dht;S+`3@gwQhyu6Diw3&P={ofi%Mk zu!h#1ZfM_^;DsCBgjAmP(;x%cM|NN;Bgl{t8-x(bLjLQ(706@~t6II#)9;x;eNy-; zAVJ~p+i^!5^O_oufl41!I1%h0P>f`%50x(&6WaV-0D4%%fb}h|DSKi1uCA5P*ATT= zw_>1PIQ@bOhxp+M4&wybN3SRpv=!L`hPxSh?dsJsNY#i6jzD7`tWK04^g{lx4JC*l zHt-7DZguON`q_X-Iue;jAPBV7^lYBb$G46NSnFna#t?hrz;P;NsUkYD0%;1myp66kGAn9e!Sg z`G=6hVB&OyveCyJ8x1XvMoHmVP3f>#A=GP~&Dek}J(0@7K!N30*6|mf#2@Nj_2(|- z;NY?eH-^^zePsZ0eoO4l# z`0Ke&!r^`X}cy$5hI|DHOh4!rh-mFA;EWY`1I1i7A890v{vq{}2W znl?rvz#}RyD-v~o%{3Wd?{3Rv?yP%DG|T1hEnNM%Pv|NZMyrg*Il^3dvjwPzguq?# zYrjdjg!oI0hxI5$6U?}9Ob*R`iX4g8`%Khr7H87!-opHN6;$H8u+$N%Kz4Vf z$s``dJS_uaO{Ece0DB8-N1{q=2YD^-wh90e48zLEt^jeNEy(pdWuRsxP^BgVaxJ1Z zAW~jErHht*#k%Er~EAWX?&m0pRTveUY6-n&Z%ciT+72Z3Ml9)HN6 z*5>b&-k@QlhM|fi;?mq@W}Vk1*}FANaRVZ@9j@oUa4&zOzV^jH z@e{k+DLk@W`FT=@ju(Khc744*Dt*@(wykhiEi`Zzi_)iFzB`h)l(UUe;7==$(=c>j z%n@zyaPqpfqX8HGP$6ZTx^aLR`bqBbZ}$z9P2e~p0rM@E9*&0Pir_?>2axV_pm$)* zXD&ud9Rg~JBMGLb#jm3CCgz6({{F!N0@Nl9BY<^G3`K8>JKC_lCBFkwd6~c1PKrXb zz@+%60g5NAt^OUj!R^=ia4y*{E)nBSo2Ibgf7#gh&o@B=Bc$tb4wJfs#K+y1!<&Y3 zNFZV_s5u6MMAKDn&z^P+&JsY>qE@X+j_5zK>Fq}ujYfjOZ-sF7{*Z^=2q&v;WpB9a zq)*kyWy`L>{lH0kH4_2^ftLm@(M&1z`ntNABP)QU$(}*Vd^dbU(;P$|>L69XMhb*l zDcqJ&V{vxp09QC|9!f#k*aUc4o=LARDng>hp@~u4bJjDLodsB+h_;9Jkda$k>#y@; zpb~ZnIfyXQcc`zgPw3@OxckeCEN1X7*ezrmuEWj>_MiK0F_;Y$Aq!6b6W;!QT24ef&!jD~JQkQw-BO5$TAGT?%y(N=D<2|MXt z1RCc+!+9NUVIy9`+BIe)5DF*Tb{|D`MIv=2s+%DTYB-Lmd`i=IlP>xDYO|f{&&5sU zwHdrI!A<`}_Jqhc2>FF|TdOe7ML)o(v^NcE5PkiYALby_3p_;py0WK*EA(4T)_3Rx zfsDTPS^yBIwoy|1R-L?V2viR3hPgfGFoE8j{s^728a8}+^7wJx&NAHi;$HM3Iyh{v z{}}Yl5||bG4nf2(;N$aum5kCtJWqs6ZZ-k~A-y-1AD52jY`FTvoUh|gCLHQX?k33c zlpC}RI}YY8#coHj)KE$wQN#F{{t{TVTf?91AVhl_Xz<4dBPqzp$Z35D8>>K?5Dm&_ zfl?z~3!3OvP}1e{DTO!LcF7W|Ejv%ybL=@~gn=*JC3(bqftE$0?nezx3UM;YdxM?O zj-SJBX)$4U&h10Z6bJAt41t?Gx3QHF)4RgPz2rX&4AJ5rnsGyIlJd`9t!u@+-b3ei$>%>` zLp&2WAd)L1ZkGbdLD}e#pAVoL?gK0al#?6M9&s-145r9v~(vdt+pme+~<4jHqL-5;xZrv-upYCk=jL+{(9rxVVUAA+r z9MPaIE<6O>AZsHfCaf?-OzR)0q34xSN0gzQ{gt379GnNGJdeu}Qs_<`0;zNYjBfo; zRd@Fn6y@E!Z;`J4^Fu&@Lva57PydSzTwQ8w!1@G2MMnAz>V-vIg|vX^O3vO}gNRQd zSjXVCE(g@S!g;KB8SZM!vXz*^++&cVKLS*kLYyCY5MmN=#H?6xS*ZL73WR9^=we2YV182Lp!OCG!RF@Io$OD&~W~iBTt@_XiS5MaQ`Ej!N&Qs z`L`)S(DEFGMbm4G%~GT~#_kvl#BH$Z3LIbxe~G}rAz~f5x{!=`Ynbiuv9A9R@d!zi-+_7bKIw4D+%xejhLEsP8UP>S7$F51&K!cF zqy`pN5-(=hfru7hVy5Y2{YJn8=jd_c>cha~9ES=PDTNCy>~Q{ug{mw(eJQWQYPX6B z0kkK>6&K%#mStcfpr3(_DZ&zEaG}iP>2mnEgGpc^>4IalGb)CpA;?8BPY>Ms5T-f< z4twkBo6bTyj}{TlwSke?Q|ZwwtYYL14c^9boj;w9R}w33#j@@6ciE#svpApe=V%n< zo`RQn9!~}hBvvE#?xN-$7D==-+StQglFSlP_x#OBsIB<-#PZwwK1a_nSp@IBMQE&e zg*2VIJ_&fFt$3UvSerN+#eIID^$!S19&qw9cm#q!A#E6k$}f)9Z*ziVT}bZh^uN+pZ{3^E~bZ#7fc^RiB=ty9Rf8JzYy{CefEM zb}Iqk&6{^4V+yUmbhzw%8u3|L$@!-rl)+=ro_&CmG%^3FgA`&tg87m283AE!D-we= zh1MHsY3a4FbQ$Xn7^m$j2EP^OoOa?rF2CN*S*S#tUcqAcB4CCXBvl+*56chj833Hv z5Iq|t3y1G6JuxJlAqd|s<9j|oEsuZ_$}?G;2|5RR0dAN*{%1ZX zzfj@Gf4Y=$K4e;OPO5DA-%9qLNsNFpcV|!=e+2}6#226t4x=s>s5NqivnQV`=S;jy zc2mZSTwyyf4}_f~gD%SV!oArKz9+9WH|q`TdYRej12BDSAMbk!f>ENCYH* zZ}BnIc8<=fV`8-5FtF(U&iqPhCiwl}U61j)JnY{I`7~K1jAmLK+P;bj-8NZBAn+pk zWB+4+^Y@cWG3T5D+jeKUv?I-`0OAnSy=&)AD*o`!G+kk6C(nW=t3=xkJ~2?I-3<96 z61{xe_?;lfL6NEn>_08>Yn(?m0S?3_qt8E`T6TwEkM_qH&7?h-jtjtq7-gX{Zj?Ie zA@TTO&Yu_%jek$lAf>&*&`!5lRX;fYAOBzo%GVEI_0E3vuFN|E35wYk2V6s7RS;>@ zxQEiWyn_qFP5#s6jPuKN-=5AnPuQ9YJ5tH7 z*mcQ(Xl1NBWW7W%F)|JBFX!8=ll?KQuY0AUahxVl>9)d7X7ok5qM3X@7ZJr!F)8E& zH~|)>ZuX~Chb}|*M&pDW*hx-Q2TJe!(dH{u%H<+df!m!A$^t^Z+;{cgM}qgOyEN>aPZcDBo*z^@GQm+uH+ zJpSq@7SFslKi&Oh$|~`i)boLweodACV7dJ)_x;b!{2uQA+-@Y+d9ZEk!RvPo3_M*{ z7=8cK4+mE&{xo&o!7I$>Un9@4WlGfLr{=xeImKGrxV*=4;6?EZc39qtgJq)~6Kjp= z;V(Erm@ZoO-YVHo(fR-X#{c$_h#p4UPA;5p-2wg+9A9l5_fYxZ!5a_@Ha?XZqdh=V zs*TYP6d>a8YL+2n1JuS;swgQ0h_UMDa?GPA%O~E5=wZ}F`bDs)sk5my6sr8uHMBm!(sK%q{B|%XwkTn)X_}?;aVOALZ>W^E&fWelyNC_y z_n80J=(Ojtk2bYqIl#shSYma@Q|$UOI{p59d|M7&3o~o5(7YKtZWkcN-?Z;aUjvvv zcU@d23uMqHnCw_U=a%rxmh0Pl_EMUK{G}02BKdUB)bb6g`GBUTCfF};vi{0}P=!Y; zE;ESul&XW2{T^|6|AfgRZtd#kq8y;wo1ld~f`&rsO6&o4)8sOBPE~7-h+~^P3c~z7 zopMX9%jU!Z%d4`>8taQ|d8;!%w5Jn@K)!-ly(h55CO|61NE*mhgpn)aJebeORnyS+ z%FD|eG6rtwFE5Sz=ZNDR8V~cQG0ia&YtQ24|$zlM5A1PXgTC_^oCJ<}Vk`M(cCQ|lyXHXX@ zux>#GLbw;}6b!|giMj7oS*oh4!Kf)TfnXt~`l42FYL@^h{BdI&c1=*V`ru0Sz=?^shu^cY@x|IQ)vh(j)dDzgD3H4JWTvX*Qwb>f|Of_L2Y0<9T*3&(G*V${&ioBP5edFaXyaf`=Y-yYAr8)*(U8hYoe?l2WKx z00o<~ALfda82~1rKH9D4W30H;|HuQ-=$U~-jYnfZFo4-Uwk+_1yoJ-kx%cDe=T~5F zdQ@7cC&W|Y(mjFpnby7oekZr}GbAksA~1+_@ow#~1IgyI?_SnfEb;`y8e=oMYD~^X0bCJ(_2YaN1Gr}Qo>5?)ds*K=z43GbFi~DFx|jH; zwIyqEWeQ=$#It+L~z}Z5Sj2;YF5SXH;OQQjs zWE5a4BVnw@dGB65vNgbU%D%691ctTho@pUHA|-)S=$K)$(CM`xV?RdT`MR@juJt)r znj_=P>WeWqwWz~WZ@>OZx-Hnl0(vZcG;xBK*H@GtY>d`nOQSfJ4^qm3e&Y_7{my7D zEP<&W+)J;iS(6(T8EFBsp%F@+{b)9S%b6VAP6HXfTa4A`VA1~OyX|N7JwerUe7USs z)IV5yZ;Bump4hoct8(Gx5P$-;87hwgta>H*bIYoPxp<3_Di*@DUB<|RFU!+RtvYGi z>CuP`wn*1U+Zzt_!)-t!P~`YTMn-OjL4qdqF{nMgzMjAO6q-z5_|k6m9kTeOB2Y7F zZ8dnQdRY5*$KZ`s4cg@MP$&5&bocD{6BTqGDh=p?7jsB-^m??n=F*G>d=QEm%Xb7a zht`o+IhmS{TN+2s?8eJ|uv8o1T>N~xCd|d$1sX?gtO;<~1Tmcv?9gr`CMNPby!&Ob zgq1#s7l_mqPUDvUoA9Mm13eW>?{~eEY}vlE!IB;U9c}UHi?B9R~;;r?XVS{ zMfQ{#CRUw3Ssy}Bb?V;hzvm`l>vdV_FWQZ*_`cVz>3H1FM+m zS+OWp5L?CUf_fLjAZtb5r()1%eul0~y9Yw=N~W^5_HE#%qNgcUl$IERaN_IHZ{iTh2RS?tifnL@{R&Li&~y6qeC;r8Z(bq_AEXRcI;kGy*%RxRy3He!V)1)u^i zy6mU$*yyCBq(pRB7#!9p3$?l~xgq^{S@~$EIk6vzPrh zdCK!e*tcd0-(QXeRAzjzn1bu{cV%dy0`(1gv!1_C0bLS+s5B0>aYQ z_x@Boiv~)4FPnlM4T^zu$q+XB;o_m8tQieO$CM z5&K#qA|i4#E1SbCtTp#_#;WjIr*OW6TwPG4VddCNdCp3WOFrfkU%PB=5(d>b-)>lr zjX~5M|I5f&O zM=zWF`DbT4Ql~`z-D()RQg-IWo#Ye1XalB_kpKHDJdWWMiMWJb(W3|scPKYR&OnMT zG0CjR$hBhiRw@Ym={tr&oLXSJ1U=*j;%JDVMVvDext||DOVGUai_|AbEDy1bRk6bL z#+W-Ad%U5Dh?uH}XqDcPnBN2cB^f|hiWud}YHD8C`Fu6y2SDLB0afn6>LJK zo~kuH9D!7QeCXct^3bc$6_Q*w{m|r-m3>ql%To^7TkoS~V;kV= zu^&Ng$Bd<(RKW~(=*#$H!337vpNkjhjyKa3;&U_)mYiO$-<=acxAJN795jwiUyZcA z@`|;{Aoqyh zeR*LZxe;f*zh>TuSkXJZbG&7IB!K_y%kzV;7qM?5DQ>Q97_&+6{xof?gm=$U8f_RK zcD6*gMM9LA)rp!#p27@PS1fI7a7_oy?!UUU9UUT14lkVixMThcgk))NuwV#7sd#j( z#d~=>K8$yF3Yt;USDL|*<%AHGTbv`l5Lb8(pF^!Svum8jha19>%ndFZf8)$x&H?i6 zFqp4X1ZNx7Xx+((!jr1`{0miGU0uyIGwsn~@ORA@7aiS;j7sJ3kWPLFA{!FEwH1kz zpTbicZ7?O0$_XqOgV;R5g8`AIGuACihGzc;@S1^WFZk!*h;!^TrqmI=GydHu%m!Z! zWcg4micTtuT&5zx3zhCIev6zHe|QL9i;V9&k34lQz&N>y2A$AIkr1CfuDCj{Y4gIJZNVgiqdwIisSfnf}X7yoQl>4R#(l-ODNKCvPl! z>$<}(Ynl7x(8%cqER$qJ9=O=lMN1Hx{8U1 zLj8|S-8jML6FOP77J~o|+@hE{He(;cWI~h#99|!yT8Mib(@;w0DaV}c9C?4;xd0UQ z9doB%e{l?J+SBrc0`fxcp=5 zVlme0UHjz4ZaU21(trOkbv$Ac zQ7wis4@Qg|Hie{DPUAZv6xUW|M4$@xf7lFsVx;YWa~}Gkyzssdg;%xQvQ$K{Pp=Np zLc37_6oTT=VVr&umo?HPGV1{5SjJU)9$4A*(DU~s z^W*C)Kv3*ajCoSwFOEGidm%K_M=r{Dq!&AIokC%Ovn#-WEFERRRC+U%lOdnd1?Z!R zCZ!8?+~J_)fUy8k?h2IHwh5ugJgdxT*~N~h_pRPmm|o}yTG15hZ?=zB2gRi(G}w*} ze*{eWp-F9K$qVPA<=hg*d)JTcoOQwFb`a(`cGcbB{PEE;diW<4KzYPJ`aGRwFkw9D z4|;tKvj{fI*J205O_5tmM(mq-7ds-B3sVqy2JDjqXnTT4| zyuar1<;&}JvK+2_q54;#MM;V$J`6yMg04?qhlYVK>jCSMu>N%=0bb?mkUt?ULx-_& zNgly?#WQ%~hf404s~yTrOJD@3P8-oDXovFZ`Lk7=*TXYA+<;aO7w!7LMIN-MXBvxT zpkb&)++OSE)ZWEGMY~9goeG_USzR`?qY_;yX~2>5$bZk0sTVa1HB=4+t4R zZ1L!^7$14KCVm(gRQ%?T5&Xb`cuAvxO)whi4Zs|56RNmtzmdHcvj?oRxrG~&R(asMC~*|az0`Q< zsiK&xZCjsGeNvWG6Y~|ftzcRp#+sGMAPZ*XB8J!;`Gs?J*@`KJQzDV&N6 zv9Tp~^V3DQNm~QknF>{y8h)4`M>rL$5sIclNUhpy5qo{XO@|;~z}ozAP7HTNtFpsh zioY%!d*_V$=+hXVjG&%~ z7J_E1aK&?--d&(%LhGRorczI#xzpxfl z(&Z|Q!LWCvHa3a4gA8OujzV`6z?cSu4r~^DlPWN5Nmi`E9xCrZPkU`qAmF9k}YeY5v&c`!o?mWD{ZEQOJlFz-q|~;YOq7O&`Uex z0@y5^GJ`)}K~ll=3MLDKiG(EywK{bZg1;z4Cgs6Ebn4nuc?;nu{naJDP1rAoR+_2{ zRM4SVe-Ua7t+XFtqJ=4#%{Y!#s(A90JEy_eu{$}t%z5B>mj;>h0@3hbU|)rO^eOme z7=UX&?NWeKj9iG=*}XGAkx@&I817o6Wb!{lw$m*qe$nP@Or%_(Kt9lr(AjA~GMTx0 zPq8WG1E+#e=`45%cRxl*bLmXkWfN#5fQ}<2;sr!;Ta`yF6&$HnW-$;z2lV1%!ozbA z2a}+fh4*I9{K~(g9+%puxJd0bH8owE!S7eGAVskL&Fl8r_hP%u14;q#pt6k)$`9I$ zq3N$Xm zU-E##eY9fF(##vIx?f099S-4TDOwq^jyShlFVeO;hywb-n_0-(Y{LHqBywR4NCo0F z5};}hOw0&)Fs#ZdK(C`m=d#ZI<*4_CBKdf#HkX%`oqA#1tCId;eTYq^;N=H3^d%Az ze@~z8dCnJCC2#^k{{7dN*DZN~&kUT>+3%VM7*G_OYvoXuhL%iSSa9%NYeW79L0MYE zi$;FJBI%(vfW$vY5GD7h+T2lby_-TQjF~v}6W(Eqx0V^bz&&rsfr~_iC_JbL#faP5 z>Wn=?SPYbq4T9F(k)FXu^3j6gxDcx<)jZ$uZ`(kr?k*n;Et8P?t+HxB-VjAL!$e3` zQM}2MKtq42FM=m9=N^lBopk`GL{&wnm9GPN~%=LCZ!V4aqm zoJKyPFXP0OmcI^-Scxev^;#Gjb7G?RTfZGuR(VjnOf*$KJSHk1R11LDT)m{#9Y--X z(%u?tCU-|K8*P>Im-;>qIq&~&jUKf|a*R4B7qxW~I^;+C;+(O*_WG0J4=s2)W3QHt z1qI3Dt)=dKZWB24{JN7T$jYq{6B`k}>d>@KhuJn~pEUN+y5@^U8&)tD(ZPzetd!g{ zI586-kq2YVZ7XGBHfUXhk8J@UomJ^3F_0lSnCM(^6$_(I4M}F+FSRSsgH< znC>#*R~+27z5a=kT05BJ5_%!zXa0%Jx$cC3C8|g{iH!MCzoq@@p0(#A1&rF;H^HR_ zkoW4Qri{*4rVa)DT@3Z)@maCxA*e%V0or3M(3c;?DZ_sg%ep~yEV7*^hgUOcyV{}3 zWz!LPpM8&Ns~oFNPabvitzHls>KEEJ{tq8&FtE-pyf!+G(6XAJ)YCGvAfIQ8du zf5vtay3HWeK)~qbxUV2Y-3fs)7+|V63^y%_KMD;E6)>jO)QdY`smN?yP=}T}1Hu$2 zKSG?egCb_4MuVgtfsM8PaB}$hbqLw1s?P^(Bq?6u&FP~hj1XFy5BR+1D4kIw33z@J zvhOQqel&pwIu3)=%WmI=D)o7`7rxRma$V|Hjj4N;UwjjuL}36T7bjz)Pyixrd%$Ri zz9qVeonF7Ej0Z=C$Ic+7@r>)BK(-J}o$Xz7d`jeXq|`D9*Ln1TL#YIC%ys^_i_(1n zLOMBAZY{%8D;*oQ0A)gT(_s_4TAy_fzYbA0IN{8@~E}}X9 z(VYAP;1x0ND3M=Y4|9bsV=Dkoca^RYKFV=JL2&P8FUUa*P#)KU*2z~V5s{Qi~Mf2)2chOkIw1g{?(p4Mc`;V__s~x6t#ZmAR#J_abdiTyp9mUG630za|O5 zM(&{VmfyPUpq8UH0E1G*nG;^z;sQSr|70HkfYcv9!3aD{sF=o_Eru~vFR2@nPEuJ4 z+G-m*KF=n2e+uSRdMHb&B^<#+9{GxxWGE(_FMzYLCY`a6KVvl5?>>UxuJH!a$0og} zkabDZ9pIU&{TZV|CTT{==7FWQ&vwkv1!V9CqzKPa4QmaNh$>HM2Pe{F{2AV9M~O{# z&NtxNZ(>sb>)9UM%tq|nB=Bdv%G}`%9})8!Sv_9ZSkeC882CPYLU$KTBzSNikqF=@iLIFH7S zd+Chz_z?Kni^owhK%4J0jP<8kVrR95mdwf5=whl~#-nCvXc+sTJ@UwzCkuR~>}|$| zD%q=<^pxn`Qi{0IhZ`*MYXBo>D>f2@K-3oBMC#`7RVVPTz*I#l`z+3VO4^29Iv!2=1M zk=Kj<0(9}8`6rt5jzqUjo{z`oK0Y`Wc?-dS#64f$vTE|1TugzW2D%tR#lan0B06!> zNuheHJ!J0s79f-LDwSn=c(0~6fn9Th(ewi;BR6reUU@&IZ67*V#h8^ zOklac%r%s_<38vBZVDJ53`inE-sT9xYA>p!ls}IQ$~^cX0T&bz8Gn2Rv=MRre?DU* z(l#yLtF#APA`;34#-H68hM*c&8wwVxYHFr71!y;D278w1LHGqDKsZR(ir` z?~Oq}(Zbj8agQ{ZP6sd%>;JGEWFDH2U1;ra&&pk(7YMR!Ll*|CkwO&g3VmtACqfVb zo!^W@I`zlLXF4O20mj+Mg90%vOGKzMa#5n8z6!=rYFr~T$TWj~hRI_LN9ye3Qqlu?Q3MTWgkK zYHy6|61coB_Ws=@D(W=%2ML;fi zqco$IdbE`|cyuBZ9e^2S5*n#GBj@u}JuI)N2u+mN?O&I?pQ+8<<&z{Qz&kUdL)rKD zg7zyaXrWhjSsEKYB@!B~7NWV-qYoWyPzRi;tep7Vo=^ga<$(SGMPE%-oxXIhF9EHT zA7l+Y>?UGP9t9HU?W$+G+~Kub6?a67Jd$>O0w{n3`*Lnh+)|H{w!z*OeF~|#SK*9~ zO>#PTcp1S^5Maz<16FbLqFN$6wfwZwEr)c8q%T31R(*F69c3B0{nu?mvAsZNKeR+0 z6-Guzo4Ql%1|4#Jln)=SdDhf1k0X7mnW!&z%eb7MevE`#$Mgki4m7b5#Bp~lx**cD zDQjx_GSY$JTVj}+bH+#kC1^Bl<} z@}E@WhkrNX>tn&-X0I7+U2y|aoGquh@lAFkA}NxGNC31EV~$w7S&RwL+k+7j62jpu zaa9Ag506p`h!x{QSG^=H92g)f+S;QLmD*O%cSE1kY7+;}K~xx_TD~Xk@mQ?hsU#kcrjTpC$YXzf17|}u)Jmi=b#08VpEJM6Tj+nX~R?vZd$^t0@iAX zm}RW((s`xQX&;XER3?|8hkm!@Vmb0V=3LwiS&5bVFy!es{KtR)zA=V`?y!{P$ z_-27Kx$|>rkb$JqZ9LF)s0cYwE(v87x4(Z(fC{P104tP0)0Ta-;;%WvI*<1qW*i(>Z?T~~OAi8_AhcnpnEKhH5J@|zZKZf4-8=0t)sHs~=wEWT zbD;Wu`CmmkopZ*|sjI0GyULg1oEZb)g;|0noAOGk)_dEd;`C5mkTMxq;w>zM9I6{b zBYxLgZticBg*edTD_wgkYGevr0=*N2SD~(Y48pP|Ioy00o*!B7rJFLc#$_F`g zr`xgHJ{?0`8L~cD*KFNuiXrW`5tR;FsI0F12Iu!pKx6COgt#pcRZYmm`k4Jl_lHgj z(8mJu+4Lv+!`(!w4n zNl1RFxE>rEUySte4!Q!v@TcngDOA-E)LG(=5bl?h$y1z5&1Q0%#2<4x0ymtYtW& zTkiL+d6OuUdptAX2$BEa`^K|yc(Sh{4gsFowx^w;T0lHd z?i$8oPfH-cq3-mf+3DC)ruTNYLs+;q#<}F4Peo09#vwQul5UM5jvaT!+Q+$EaSBS# zW3`j7!6XRf%qpZ?)!?g3aedc@>_5@KHmEnVw_5zj9fa8n562(d!7|yj-HU-dfQ-^; zE&+N9g)AvRq6H#~tlWO24}=A3W}bbYx(F#iKVhu~y1J2C-9CQG!;QSVOtrOv*cM=k zR#sQ9c-Ex!B}9$2eJLMqb62(aizI5`>ewYiq1I+5f*`o9j3$bB$Bqe#^O?&ZvW+6B zhN_+roHdqdXJ}K^qHH!J@@{xURQ`xvFDN5%Y9jV`lX|Ko< zlE|9N<06%B4^`^XLFaK1bL3I#nSJUNs%jzCS;L{)0n&rNLPO&=I_%ueKkGY3(E)zv z5xwEzqmI^DqUWsUMeUh*Z^vdKAD`f86P=;&BN+h*TNxy6E_+%B&%Eiujuw2$G1{5i z;>MpkH(rBOG!U@(@e4D=l&nqMCgx1`LfVl53%WN}q4h`ZZ@|i26*!I%hlXh+%p`w3 zwn*vB%k$zVKEAWs)_gM>`QK*e5BJtPHhJ;3xA(N>+|XJOC;?{4BKwA%*|XGQ=SxE} zT_Vs;F9dA&Qbn;kyt^r*Tak3;+lM=1*uHyqe!2^6@U@+a)bE@=zN$kcMf&x4`X|9S zQq!pbF03jwe*R!*dP`{6qq(JTmyI7h+dhAC zExT3&(33#)q3e{45Q%Hs3VB@R?0bGN_ES+LQ}jmxzqwmX`s@pLDbsg63b<6 zv7=pVbNO&rlyiN8^SG!fXpK1Sz9Pv8ZMJkds^J*JQj2bvDR#$4K3@B6_P1if+Tb}= zAwm2a0ssieEymlz!jO3JpLxD}u&3Bb{i;^=N%3E~sf|GyMhN!ih`v@M^8$%Q%8rJA zr($>P$brmYNt$g-LGFj~?s4Qn7-Vmp`l1dz3{8eybg8q$T4-vWv>ybY8|>LWdPS%4 zVRe$n&K1tzZv5VL_F`7(f(3n*o}S*#$vWc__pU9;+i)8A*{|BNgVsc?tS_By?nEeTu2z8TAchc3+RDFW7tK*x7g(71%L<^>v#-c|@*tf}$rPSJ3KiLj@qL||9x z$8RQMo$S1a;?_p1Z&~cCh&yy6QLoVAw;#WzF>ZRtJWf5PBr-8^Y_w)Pb(awjlDAcD^1sb8P2%}{3md4v%;;LKSRWc_aGV(p7RnzqN!kW z+Q%V0$?9;^`kp4u{`RQ8?t!rkc5NX0+>^_7wopwEmB?4K<*t9@`08SEd&#Qq!aKKz zXUxuXMaDind&?KU>uHs~jH|OZcal7{*9a<7n4k}|N2wr5d@)uU&@N*X8Rfy4J9nuc zl%oK}^on=>9gkf)Wv0e_SZS1}H!l@%+|B_9Nlx0r$Rb&=rc=UV*PxgNq zci_&gou-`)FL-C#jo3HT8WcPM-s31~JNDQv@{Yamb*C0dz7(IWhO8;jVXrRquS|$KAs<=!pQ3PpejxR+f% z2~Pz;`Ewf9rpV6w^tw8N5aoNDvK*6IZ`&?;;3>5yqrbDas=#-rDv$ktA?9%o7b#U> zNq22ugH4&s2kYQ1RfsGIAjeC5)FOgQR+- z>hrc07wgY<HFy8+Ceu zr|rC&Nb-s@@Hebe9BXp0HPS+_O!qtXIv}F8VG=Ngx;y7&Ul8YI;f7^?T^{`chDQ*xZMR!1_N_4*77aNPx(oGQ`Y$ z4{k~A)D9BvJ?ig+4&LBQhqTcabLro@;M|>E6kj{`r5zeHOv*nJ>%?S5#Xg9X-tILb z^pkb&Dxbznd}cY+G(?K+-7!ddtE9-8l8N}`SLjBd2dm1F3)%KBzxFzTcEr>JFv4Vk zM6kU#cD)8F&199g7|S+G$pV-~Wx9Wd4t|sR8Suq{iKV4~c}C|PRjoIFmV%CflU~Jy z-|FCPZ4fc>S6ctuBsB26+GvxAU3-{Jp*_Y|-`(AVr~BGPYi%1k8t%!teJj1azDUM7 ztN^iD_WhR2JmvplAoZvOti~YvQ}0vO50H8s{!9gA$~)}Xb=KP2dH9wv)b9|}&(@x^ z8%RcmyrUGHa5<_$>W;uZRxhr8yYa+@G0buX6YdaYJmph(o;{dlc03%d9rNem@DCDbfG$J=wh5upW5t4Q!VQha4^Y`)u^H>H-jNyr<>#^a5kG-@7R}QnKulL>6_g@?ppBvw{9mN zcAeAk>XzZRZ?3l#i`6I3Mykk@{dCLarQw%N|K)I#@rue8^j%|Nx>G21ExvB5AMf~ zp5P-UlGc<*Z>QrvEprURn%qkjYXeQ$d3htlH;{%`nE&!9T}$$5yKO_Ed9nDKEpIDA zs-=e;<1LFKy6wAt^*5b)7Kj9D)39y#9TZIFrH*){&-Q+yN(DF)t|YyrlhjAc2sr0} z3NwB<93Tk(d8^U*SuHiS^68GyD?7qV))&0eMxwUy#}di5E`StpK@B8Pb^X&c8MUHNlr_L={S>niy7HF zio$yR(AmB0V20~0r*n!Z!^#G6uUd#Y^N{wZo|;%8rG z-?DkSR8UZ;{*D$Vr@@CMPY$R7{GpaYYWKQ0Ymr&mIQmZ*Oze@JHL++P6H`|{>{rr* zZ6D2C3(ue{mMT4m`{%63-WcoT=ds7OEJY@@7|nx;s})l1C}@v`;%@$>cGid+*E^0* zA9>4eHoh+HSK4)dW#@38d{FAvIH`BCT@9C=bzXRjXN~j*gtbWzm$79)KbAtm)%k#m zXU^$aBphL~NYsR#ZlcTsCg=mwe86jOwi;Kfm~nm-El4|b#w?_5f0;(rUfw{>I%dy{ zj%rN+R+6XO=0XQKRsXgZa@FAA==IE=TFc_XNZS{JCE6G;qF{JjXg?=FXa`20g2nbY zbs-h|W>;R9nUt6U?WN9O>I=AKQ)4KOMhO)a6%Vv(OOgL=U8jEHswTz1OF%Hf zS@WTT-Sd|4(RYtV`s*xv``=s-YuYiR=`C_q8}v0?fCB`j&nfcWCd@(nuEP(=g^yuW zeva1~UC6>-yV4}gLTBGi^bmyL4+&p40ilMZ#oV>>8 zHA*84aDv|WcTyIQ-@R(H85<0cc>06fvO=k+%h=UpSumdvx%Qp*<4AJQ}@g@=9)9n$1=!-I}No-Oap)9IH)25cE zd_whd+XjG?eP|?HDCF6Lh0{%qeb(pFQh99XL-g6VF%Iu%$QT5+V-9IQ!HucjAcS(P%E-4RIyO|DZfXE$U{%v zxQE_s$(|h3%+0t4TmhyX@1vvGmvR`i=^GT&ceOL3wH}CCqDal$Y^ld;^yM?FisUK( zTCC1}>R7FaaDQ8w;k`4?5XL1NG)T_~u*lovD1x4{JV@M|(1g}hk26c=J#QE_I~pJ= zD1X#pqcJ2zJ69~|tPOK)$ZE4|3u&ujJ3nu!vL7Gae5$=HT08O7Jr~s{=Vvt68I~r# zE)?VFAGnR?*J@UT-S*z4C~0~j74=ka_xRwfpZXxhDt9owzU}*^*TtLXKg}HPNb)`R z{)r&kB6eXBbG^QF&FJ4?9cpi}=1cC0%NLf%pW3?#Q<3augGV%WyY7(uwk$MR;^RyD z6XS=1-{qPY2N#Kt5-2OzEtl+WOh5Ccy>^bo*vA;ZH`slCcm$^8BLSoR`46dkK<>M0 zgMum0#`iwDM5Qa0-ge{ZEiA^YfV1DTp)MI$H26xoHdl7x?Byy4ZFpY~dXD_5g-9bFFPtj zV1z z2}|QS_rSx=jhE-TIFD8mFt!ddjqSt5rU7Ua z>AteMl61>RnUBlMLBZo^_qKQ(cyVmMrpIdW_W;u1jVxGJ5$X{4&|(DY#^5q$E%);z zuh6JvRI}{|hAO?FuMLGES(2GHUAw%F{@`@%j0`9GRl(BkMfb<=q(ynY9 zyVnV8egr1sBE`Y>uhqm|mzL-p`oMHO>6R;fTSB*RxVxp=-(0b`^UNV-zHf5=(85uo|CtJFFq>>m4|`vHA@C&eu=uh8D4Nh8C=2F$&SnJnS^u zUO58ff_*Ur;&Uf2FLB%UGFAT5vK$b>O?P2U|*e0xwV+r+g-R3X=u&qsSeoo$dIToo#GAQt#!3-8~$jC+Pp-!;5Mfbm;jwaFZzpzX-4&PmPwX$vnAh! z-%B+T=2;R3S)J{|haQ`q2j>c=K1bjaSL!aF-Q&b=(ey?1Od*V9MLwAHBptCW;w z%|-G<(36Q;L3=>QY{FYy>i1~cd3bscuLG+?#}k#BXIxf-z7pmASJ(W9n^}u)e;({& zHHn%0`SJm7fhRb&NCjnamX5^M?#!fc?t5tv-W+iD2#(O1wfY%TYoP$$KIgE}rv_58#yDsX>VHWfipgWflc1hcW!Wi%9`N1f6!p$B6Skv5zf#%9IT(?J@v4C61C#9?mmAJ+73uNV#qEt|5t zHY)Z1sqVd_qRO`IVboT;wS{X|1TkO&1r?Aa!2nofiAq$WWXY1TZKV|zEDi5wm(DoekGo!z9z z>I$nf%hvXp=4KmO{SkIHHfPSQr|@X$Y1Pk8EtE)5*C^pc7JopTTxs4 z^${2YgyU3awK#qYXl{P%MoV8cpK9r3Uoi5c;wFEVOq8HOa*5OkU^9|w5MLcy0+z6# zcjEu#6V>0Z2ivt$x;BSB`m$o4JPuP<`Zv_AN57q5CUW~A4 zK)>|F>@oBv+~NQ^99{>h-|gBZRohDb?g+x*lWvb9{W3)Pc5+t7g|U_aRU zUVrIrIzjnxftOXsFb(?s1zg|_^AfGpk2hHFaf^0t!vnh{MW!>zcm$cKZ@lc$2~<-O zs-28E-_2RrAB_62a`;9KCtCWByv0G}rs!EC8MV62{J#}&?HYdO)>>kmQShH%b3{== zwVHHr!632aD*f$*{Dc#tZDw3UOKnS0wN3S@^F3k1@AP|fbB+yvn$(T?rVHxo{Z)Fz zCXH@n#q)8qMsci*%&g-$BK-{@quJ9qDAbXWk-2XkB5xt>vm@v1(jA7KW9+0?y8jvf z5H9y}o9kHjk;Sg@ayEX%8VttWWL^|KU!!Ee5weksuN?kf8aA_Bvx@QA-}$$E9mNy3 z{Yw|xy^+v&jpgJfWQ<3uD>(HvQ~P{)lTFSatF5o?N}o>RX$-w*7m6)ucT0Sl^AxVd zPT#GiVDWugV)Q*X!6Wx`atd8!nTuakewodhG7hZmv<{^OxET9BOM8ECagDl-EoggA zj(L-FXMggJE@mXiDJQM7Z^2AT_RASRY(ToAolpN4*`hkPoQ$ol!Qz*#4#N$Fm8+8q zL-udpOfr`l%#BXxYUSH~)1(^+VXR9UBm84@rb$Z}HJA+;vo?@LonR$FjvpqL4!cZu zD!F|-)n{y+VpmQj#g>N{IOXbhTO5)L4o{5SKMA~OVN7Jv(6?Yr!5E{3=DpQ<6Q6Em zr)n4GjnB3Z{Vuqyah^N2bdtGv*#P}QhRfnoOHZ$P2775@>{3&YbLrLRHB-VI1p(vf z4#Sv4bYsvDp{I9Z^Lvw)9;J)1!X z*jD|_e>6_JGuWT&A!`+g01KvWusDBVs`}_`y=BhuTr90yB-B-8=KROS(a_Pa=?#$? zIRc8e=-+*7(2{aUH5hN0>%{2PQ>a5Zqi^cn$oUtg zV%TplA!$Ojh4+dZ`jQE^iXqwy_&e@$k$JrDtw`=dngkX5=y2dKRLt!Qj|q{7^wW~0 z`k+R)!f7&b$Ye43Qf_Z5}L< z4I&HS-!?rjO+}x)6e}(i0>0Jae_VIf=?E7;lU`M9qos5Z_ePzj#sHM4>gZ8s^GjD_^ z7CJhF0~Y^taa%ZJ#g?Lw_zyotlXgjvv50`5U9zc^nn9cyW=ofsUd0`MC*tf1v@q{E zPFCPb1cBTP+EVl{PB#>GHMuQx=W*$ZYdL+o9`8I$n=7u7vzW-)&mw3Rlab?iqo8@b zpO=4duRd58UZE-gg@VYHq6K{q=P)Dt<-qX4s(Jd}x>E_9=C`vS69Bk{?R;Aw= z6bLvNgPIN&69b3A0GcK|@LWbK6TQgeNexFXQgZ?V3TYaT^iY7UpQJf`d$B_^*I@XI zG2gipTu*NvF|wG1FCais*4Q8Vy|*^Y8c=E2RoF9Ag!na@4uO zFP@{%SQhz)L`J1Q1_|&`s#A1JnMNKZ)h0~?H1y}YB2U1(S!I>3))+-a(8xst`gmYcuoWHax05u8?O8rAkjUKn?W5U?`n$~$umzhEp{H$6&~ly4{4uK z(VC@Mq<)$#tfFRHkG2rpoE;!h)P0uF5rcPqEh-!rykomVN9^GyKv`jF)l*Z#WE_eGAHb{Fy0wB;W2k2-sK0dAza`we>yzA~>Q z($CMBWegRC-*0}(PiJTU{b!%w_+M0x78ScM386mKe9tV3=v5#??$FCy*c4_4=<(uF7soBqp&^?QEpje=S{=$)`UX>IwYCkk zvL%{7#%F(qcM~>F>E^+MsIwDD{0v~N8{p_gQl2p6)^%%#uHlGO}1>Ww88E_6;P5iremzr=Fu%Bf>{>I z2yq`;S+oo}rp1NSDuX_to^VRP`ky$$JTo3Q`sw&Iqg?8}P4U*x`#NGSWcQJ>MN7MLq2NJ?^p1Ni9z031 zTy95Te(p5+algW^F`*0Gq*0{Kb4zD&U0jW6yGtq#vwqD4`b zCF%Xq0@Z^w$o>v|v{8)IvN=!u&w9*#a4rrQ3hI$cg?rm=MCGO)|04oD^@<AW6cg>K5$eUTT6^7(ZG}o%A8e*n&`$DtHA&dI>g#faF z)Y2?MZ^-fMhfix^XoK`(y-BU&_!5lg`}*i7?M0q`bZwJjPu0SxNO$mdu94;y40@d{ z7Tqi%u8tn@7waP^vU#6xwIXj>lb(L4-6V+#GPI=Hw7Y*99h?>NTEeq-pKsv#Ee?IJ z>O_~>xzD$O_;0mwEzxz|gYiu&D%V)wNPOpz{(0|=wcqT{0R z(+hI`M!QhBB=~8=Vuy%TBY!t5<)hm~lXU*n7jpT#Q4Tp%=sJ$kvv0)?JulZPqKtLf zx;wJ&iN}_X6R2$L`R`#;`y$TE^@p{BZe!5vw3(UYn>PeS-5+8pUU0(2F#s-oY#dKW zPUW0JbX-LmS_wB^4t+ZHJdxwTVt--e60Gz6Nr{nfAT>&B7Qs~=I1!7lOzJTzK`IWc zhs>&3S5FLTwl@?Yl%39PhmgfW2TT77r9Tf%;avM-;4BEMwIMsn^gdi-i}3JSS4`~1 z%zi5#TZnSo5CZsGv+Td3uX#QFD*ro-_1iIm6UJzsUBU@T^Ukc+szzO;n4;_FNR=^w zr@xRxhC4stuC)r2z}eM{+a{KKu~9G`h0kX3PTOS@5N)%u%GfK zkXJ6yyp6ilYL>k?VL_oLEqrF^wQ04aP#GHH%ljFGP%2%la!U+#dX2$xbSF~DQU<$- zjQSVe)6Si+xE#Z8ouaL;Ul}aGvl%mB*btqGz=vBv^98K_>)YEK73(9)Wv5S~_8S9w zY`@uwcW4eW>QR{3Mr+Qj(&aK-bAB1!z-hn`msbC?LU6l7SR+3NGhj85JH{Tl49u67 zx^Vx`N4eqKzX83@eU^?UAKeNUh89>RQDS?i&EmpMvFf8`8n!>d05SX4!{_Qot4Li? zAN@WNUqo1{8#ArDi9NQT9168Jtg0mYN_aJM8jW{#(ar>L8h=0`rhPTNq=S(W#=2VK z(0_cBK`teH>vzbH?#`v6!UdCh(l!mgo{;%&w@N`kA?d4kmc7YruM|PRYLH|CO8do} z|3v_}VbL2_J2xo2aJDT^*hs-QZ%PNP%KMO>H)qPtV!&v$TB-)ErcaZaa!eY2#VpOc zmNWv;o4s!UA|Eu0T3kG3)!ZzjoE!xMn}<5uND$ zOdYFJt(kdVe8QnkI9RZuP%Bf9gm4QP*2+}GA?od!LoZfa*w6xAdvXoG9mr)qt98?u3nldsU=NrwHJq;@3-R9dv z#gkAFnhbU{Ch6+L(1*Qew6E<>0C%!es$uuSsT(J>{~Y_>0hGv{_N|}rL47VB^=q-H zQZHO|(aPyhl;-R7shu-)kHN%f+H{qBEva2dQz#`qK6)?0z3Qtzj8digv=>R>*f&4f zEr%{C-Y8iaYT7{KF%OKRj_V3*;Fs9o0-UXO>LeVHnQ(J>`&dp$HmN(a;8BC?Y|G)P`Jq(3 z!PdEnAnU$a6gMkkK;oWvF?A;6KUFCf?1D=FLy;Hc>WTNSqyp|9;ciQ{@l!|J6Wn39RsgvvK z+?343BVH3?j(OQ@vZ}*bhy7c?;jdmuugn{Wb6wNjKL!qJ44h}W9 z5)HiM)n_jc=0(UYwOo=*rDKhmsn8y&m2(T2L=miSu`plID=y{jqR4`1k^PMJ{HZt=u)z5?rM`n`J7}T8o%l z#YBHfY-gcA;KlUGy@^wNkHj|TWe42f z_Cy>UTv_+5ohEwTKNS$YgVjC8j3k&V8%2fBY`TUy>uc`dNt#vt!8^bfu z3_v?E;WBdDb?WQJS+gLKq;QC^>&B_Fn-1&LdHqo~nT!}ai&6Cf*%e_w0r6Cvm1VY> zd26J~@a#g&&I_6?3%QRdOBnka%^1l?wWeRst+X9H++HNQPdhKV4FZcbEg2N*Q%tNg z9gwF`EkxbP4La>~mM{0?Fly0Wc5#j>pIR_scWScJ0^~B%BZl)}`k9+cJSR zu8Xee&Yy42@#ALQm%sc8_$_eW>vxX{xOAmC%4}z4;M_m^)nlxdJ7L=>zx`6V=F^7s zTzgg8U>nU>IbK!HT0O&u^_fPlZ^sG7bxhGv`S_|CdDrY>XURRDOtG~P`g*)g)64T= zQeh!$?@UwbOo|-Kv;7Au)DnaGCVO?&hcs-`p1yqPV?_&yJz-MPv4|^C*IkkXr}MeA zQ!8ZT+t}6dqvXQqrpqFt`tccdULA46--`JYw~l?f;ZH83YOuTIfa>(BpQ33dNywKM zB9!N6D-1g&SK8BJu?R+>gO|*9R9;84P;CaTZI!^{%YS=}xDhY+K*3ByFZv^D5f1}T z^cqXw-O$mYe?(jBI;}b8U6!S+8+CSUg4WD$M3uE@PmGwl=5Wm^?sTv~?2es@Cpt>X zqfDEY3opTvE6rSU?F3SFOD)zO-cmRC;mngz*MKP6e2PsBZ5G?=Ye%B?jJ6#gI>4(@TVJsFf#eauCKZJWG?Gxs?G3IyMoCO zS&O#VR6%<-oZV!TYVoD?)@>ntK|S-0PV%O*V`MR|uJiQpIGL9h zF*c@=I4aEQzowcrQ?&%V-L(mYV$R7qN@UU9LE;tMI7da zbV~ebEw&s2&E~=LsfPWHyzP9gA79*DJ8q&>+r~s5iTm>Rwlg+D>6YO+ZVTD5_C32E z`bHL4_?cY1a3L|n+G^Cm<-;>ibB*jh44j-o$(OD#SIewSSv)g@{wb?YQxObudyH^s z3RIP1mE|92t=RlYr(oGKV*#Dt2OYkh+NYfpVdggN$+1&FFZX9^O;nkXv-76f>{R@w zuvb}p^L!>M_h(y0oHFKORaFz-zn3;!y_Lf!sxHO9kr$%=nX5?`^JP4CSn2fstQ z>MGfmy6?=$q7fEe9SXy*xBpw4W;_~T<4C!f339}aFwm!j&K=L#yfe#}=B zw2C;Hpdyy29_1up^E{n-e#RtW=gA9O7#P;g_8w4)fgwt1VJ$#-yFAQsFeIVyyENpLY}IAt&EaziH7xf$Hnw!QR=Y zeDS1^es%SjetSV*rb8-jsmiYd_Wj_wR|@t&(3y3e^f@F@V3#l<8yGpo;_7Wi{h4nNNBtYGc%r<*o! ziL8Bf)?s#rO{Xr_^nl#6w!9epU|qRPpi+kTx^+#NaxewGZ<2M@<{EV+bFS{r^uS%q z_rQK+$-8R&RDOixH}c`Hi#etInhO~EYAM35u1ynemsu#(f)`Cql zJ$6q+g5sl+dv%86#>z&`N?Ylf-b0NE=2`8@(gbbx>%DV#o}{I5Z`aijx~ubAC%LSq zCRKjoQH$p;hY#Zq<$SEhW9eOnZ~EHMVkIJF>zWNiKJWP%*DilHs4ZW8hwGvfg^K;} z%^+N6$t=ghqHOwJ^ckB{+1M}N9;chwW>{#~{jxSy-F*0){G`LEO-G+Hvv#UD``*3# zC}fSWR;;1-$+)zrYnH=&j%yH)lt^Vj=kxg|LJZf4$Jj^h9%H9cdujLE4;|KBeEBB@x7!C>MVwAhib&{o*|ZKkDJ-2rqShnqp%5wGXsw}-D|4#iIez=b zWQ7ke5}Ie+{4DzU(P#VjziDYuT_2~yF9a(>tqjd{apT(kIdjOOf-aNOFJgyklCe>y zdCt^m6^RXRElh>8)h5JQ=h_)vyztn3|Gu`@>&cVF%iZbHa@c))r}eCySwEA2cGD4$ zEu8g!4<1NLQ63!LlFlNc1rI4}{?T++GT-tHb!`0olK|7?D=Dv^Hs0G|))L(saho=q zUL_nCuI-i}q!`ur{>$r(P8hAmfbQ9(5%V%KD4-`5LekdO* zsCiR%4mRoHr_%4El8_z0ZJ4)=!{*Acj!RuqsI;0|r01^f%XeNA)l!Ko*eReS;xggS z+FNc>SeL(hH#H*g_HFWXSPz+3?QC&W)zz_qg=snQrXk)TuJDHEt^!E-YGlf;-Rb<^ zZheHRk`KK3+75kk>)toQ<5b$-tfT7PJppo+n5r_}QYB$Nz6vv&prfZMii_b7Hs!kJ z>og=~niNhxkWd;;E;SX=_PnP&SKz{?9)4@j;wa1X%&Zg@N08mBP2k8tEyd8DRz03- zsGKN%FIR*_+T?Y0{eJmn96l^tPi^S7*03Y&d2n=pOEBA~Qk>nVJr zJ2kd)qg+ofR;Cish{3GgzF-kf2AC2Aq{K!{dTrn9`C6x6JLc31v3Rv-)ezcj&7W*G zi?a$B??c&fWNczT@6s=Y^BRE}_c{9G7>E4Wk*&7vP6BqdLd=2!`S_bnS;~)FM46OH3R1Ea7iHL}(w?ICuHvS?PG#Kogp`{yli#XAub>75;(tZ8CmHe@ZbDzQT zI$Rnw%s1D?C`w?A?QQKxXuCt{^bz}7rz_;UT7PaLhD=npj7TDh4lC&Fm&fP;12qD} z4*aH77ckCKaC!#`5$Pt+{L78yUCVZ)ySWe2#MDijHhq0Ar0XCpEq%WQl32HkJ6AL8 zL|>R61mz!>p8qJMdzVA=kD3%Oamkb{Q>NFR56t)X)_hKI5F0)J@+KGgk;7ZG>IBc1 zuDmY5@pG+ZeRRifqRd-=Qf(MU>Hlj-M+eo#FzT$n$CRhq)Jq$`drumVDL6{zd~_MH z2#OmiubYaS%YU%0FN;pTbo#zPh3F=Ba0H`-?vE_TEJy zs?x=F+S*t=%%qLvF+jhTg@whz(Xq>6$MkN#tVpRz?N%m!tFu(4x~i7fdk<;FPPO&- z?at!q<8#Ap-5}Q#REXR9+$S(GJe)kwJSZ!vWEZ`Cpn85Q@AmFkNqx4QYpky&UI*m) z@;|vhy45ylF}fq@eCf2O0LQk(_)coj4%W$XdY&6MG;?4I4KyXJ8stld6Qyxc_ELkY5MR z%>H5419BlYjUmZ-1~NOB45N>p-qO+J-@Rm9UrS+SiF`Ok!Nw-7c;ap+kugp?MmFPB zk^k_m$+{AcIJLDT?lXM=u?$x_=f#w>)Js3XUxm9G=YcAnU3J36WBa!am#n zdKqXzD#^=x$9a(TD3bZtL(8zw{5!xw?qcbs_w6d`L4SW0DgV>tExjN1O6OmH$bEz7 zFH7jqUq2>!c*)HF^>4a^$@mbzlnaM5lEBWa-!udXr03&n>FGJVlQr;^Ly-~_X3jYL z-xRe3eB%tbeuFx4@L+49)otu|%=CN?e8}5(pO;UU1ot&Qz}hrV2oieA+0MxOTyv0ZV?27v zeff$N`sr$$nV90y#@CBxx~BW(+rrTk8I5+)Sfv=Mq#s8DhBxEf78lU15jx@=FKX8s zqcYSbq)G*+$Le*A+~-c6I)$r`RY?fIqvS?UnzT#zTmNtz2n3lt3eO}T!M23=)y2iZ z&Ot$3%6B%)s`HMP+q^B0QT18$IG%@ts0}-{0?m|CcRW3X9X3PtvguW z8gri+Te{FP%5k3{U0K2xEBd{HVXW3s0(QR-(9Pin9}T!WU#8ni)thyM$ZpEO)-78M z`c=^xxO{sBm|zD=9uD(>3f#0|7Niz3Skt?#g!b7VTIv%jo@KB|pwoXY+GBtqnZ@*Ko1J^(rB`gAzz}R?s#Lx zig#cJOl6Kr?Pj5?JVf7p)M|yKfPHL-;Da4>PJ>EGFcI0xOqG}QR{PitDwW#r5u}@F zjnOKnFJq7n`%Q{+JR~F}5@obV$wzEm=rZ=72@-Mpc+JTv5RPqg4`m!am(0y~fpyxi z2~HHmRKF=6sKfbYq!^$*IYS?iyG8vjziqHiqVgbLdT3W!&y}We(*^2p8X1cE`mV6!$nB}Xp6`9-b576m#$&O3f ziMk@u(O~O{m>J9+Vx)9IndD8aA;Qz0EQ;AcP)e^eE#&~g}zrC>V5R3%B^VHGHt2nw7ZBM$)*z7F_1y0qL zT}<=4ANUC@z#3qMZ~z~I%q~+_Qu59(^zh{bD5nSXA5?6J!H zZbn^cOe){=@bG9tuv0LA#AD|%>RVjGfG?Cko6j8w0X=pf{0?nn@I}jsxfMkYV)IPm z=5(!@_siFb8Vsr>mB1{2jgJm3E~9s!tv}YZtn963f8Dl}6(c2Q+;{Xq{KU7aHweSn zb8~aGajKkK?;dtSiy)Qt=FOYs#_7%JpgNO5`#!XR{n8vpjd<0!(9yA!QO=%HGKk7( zAdasZwAMDCU#|3UL05V>DAC&q6|EA2(A;f8-i6phI1X`;QR&4Ed~jd3B83*{CxF|| zWQ~CZ9QpR#jeUG#LIwPg`zWA`eQpwXO!pS65gMLhtsVG|e;UkI#7dJ8@Ke#Uo4@}a zjq@;_B4c5JnzxsiSKEkS)yn%yi5g9xM_Pkm)3nYDc|6$R-kzn%{BVc+0Cq#ys-|V` zN;XwEW}kR*%=A|8B8M;glwDCU{70sIxcEBos$SG~oqo1q!v@m|=uHrD%ovc#l8|Yw zL{B3|)pt>_=N}O-kG*Pb>;x+7M>hSh)L>erX*fg;5U&;FF>RE`gR7|UzP`Le*7l_1 z*cH^ijL`DWKk-hA`FrS*tRv>puQzx7VKNO2d8Xd1AO<`|f&xs$CO~aO7R>LVK?@QL zpUjtTo;Lmvu|6hfO|UE&Rou#BKd6C^?KF16QEWhmrxHFAp1d+K0kn|R2+$*{!q#SV z;y(d=mQPuYU?k`>0|Q4H+*{dWi5>d3!#-BmYK6CueQZPPEMt?d1CfDd!ug;R#JHvEgZaO1l;HTBAD|C{-_L<;j?S;A;8!u245D$>m zId<%rtl1D|3p`k3Fo;KQ_@n?FXacTtSx_%`(+@~rleNM=x&ytbl8{ZU$B1qsOiL$U zS6;0pILo+>*@ zf4VZnq`}xLx|vPsdB33Dzy%?UDySvv+{EL4_U+r%n;srG6lybI2LsE#-a2Lh?iyh$ zr-fzSykf)Tzzt-d!B(T|&N`FdwWVp9=&h&M;67o=pWH8}ceieRqGjzl;4bKZOhfb* z+iY;YQ%$QiX2x-xF2n4YOi@0ei$0{YK<8VvDoK>V%g6-L?9SZBCeNCxRgfpED zmxayw9xfvevk)=RgCIhRL5zals)>npd095SR90{`Q`Y?a^LtqCIs~_`OG=E<0u47C z{PN{Xa$r5i^YkmWK)}~9Ba~e^HUfhs2f4Yq6_JE!V?sm{>gn-tY=-tG1JDb*0IHo! zk+vgNe_z}jm^1fL4b3EnQ+9OTPY#9dB(9%_^UrO{rW73p5uYniL>6$G zh?foG10MiAMoYA)QBj5&{jwn^v%SSH9;M%TG8#JPZ?=Ua%CDNx82J@!#g5QYQF?Ov z<{r2BgC8|COOpy0a>yqjzz`fi{5cOaZ*iDaf7a7;MF_0vP`!+0BUNcvJnMc%1&H4)hhAUsr9~nwW^w z|LLS^gNYWrgxDunjdtB8 z-)wdy@Dv3E$~f-CRJf=`4ku*7YM)7MjcTZnVpc!A1=d9xB1!1g%dO2YQ-1{ATyW{hYiI;5Km$lB z#ZAkh#Ff?P+ouzGep}pgrsMhAX$yRvemebO!;_6~?+@+}K z)XQbctjV!8A)FQnA!@?U zq8hXLQAcb=d$DnH&Rx%#@8S*XIT$;Xi@;6c1N;oLd%HJt8T~38KPr&qoU7E}cpoeCn|TEtIG4LiVYlxWcpzObZNFiJWw$3n@?X{ZkIv(3m?iU)0b z1$yFppU;Drhio?lPTZ!A&kq|mVACjU@e~UM>WR@AWNZ@y7byw|pW{{Hv|PrOTka|s zZPrjOg?UhMbj+C;YESa^{ztAW!7o_nVRh@kepHvB&YO=RE$WC?DMGwjIAPF4K1)!~ zWW)3(S$eo<(<*y&Sla}*Y^zS({S4`)uh(o<`?)8`NI1 z8{d0Kb)4(!Cs9nP_z|NCqMN-G0n0DHpD=Gdi0AP9`E%)h%!5e@3kx@ATSrkJK7{3o zIT;F9t$ALMxazlM>#!r4#b_>m19YG4#Ar<7c67)03VJik+iXgo)J$11`06LNtj+ke zva)mU9v?eH<%VC4s#j0ZyNyM%N>9Q;|2R%b$L=K8@H9M}Im7TUdN(gdX8iv9PvpP~ zT>a#}XU`sP-I8A~9`-#vsgK%;Okj#>dx1-CgnH@9ENWc;HJsOr6C&5Xg~3Ezne=$y zR+|j?2NC~fBoCR+u#{AvUWNehQue@Arwb`g%Wmb=^3hu?Umq5hm7`)Ehme};lQjNt z_?L>7(EN~LhHV7}1@so|1HHY6VDiXv`f+M!I8{2DSBcP#Bt{1#0TVU&=V>&5z)aV= z^fvv{+bQ1?7(+JB;p3{Sysg>yxiFUT>fCYU(hiXsdG(emm>$u6<9oz}O*PSUDar@i{EEHh zL?fog5#GO~qN3v5@#N&>Y(9Y^{y~kn%DnE9(o&K{b#!+dLdVcMW**G*T-3L}|FSXY z;}BBsd#s~ieQxw+7ILgTcbxhb3U6Eq)>Gdu?=7F2nF#=6P*;}M015Y1YpW3M7v=Z! zt;k-M^*Xr!(EyM}68hTFwdUUE@@95dZ_Oq-)Y@5(C(F&lU~2^27V?OD&0Cj{d!;7w z9x}aXZ03ASsAY7bN68VoiMFF%GSDYd`}px=Mh5DCF#K}xe<(Z!L;LLb^wdd~3*S*7=}~LULF_zwciq?d_xGi&xa0MdWTUOORavrRa0+a@-fpK< zgVJRm1v{%2dxDj6{rYv#cBQBrB8D<SBF*Er z;5~SuoB_)KQD8SvF z4oI*rKHG!@wFvEfVk}C`X02whEiWXNyrtQEW*bCC#AnT3(OR2)tmLeinE(6t@*Ds0 zk4rec_X-OOAqeC*jeyh|Gj;h+GH1{Jjx6NP@WjLk?8FnC`D4`EUS3uGyO;A@WvadF zcQQxt-~gd78F=V>$eLn+JUKv`<}H?)Yy2LPf3UwFg>^ax^rXZ%JpfTjAfj@A@U{lF z<=Pwa7hqQhg5Na&U{y=K37)wF^f{S7zTstWmE2m!^12f)Qk2qVZaeTZIhSpKjOcdp z-l;%V;ZA+q*4Kz?#8uDSI(9@A6*_m1@yn%>LgQaAG{_X!{V3J^`KIdS{~ILu=g*Tr z|IfJM|K&s61CuWtw$t9Zb7!plli$Es6@mET%{IlbSrOzvhqrhFz_Uu%LRf~s-$zk( zR|nfU%u^~NX6PIm;zV4pj59-=F~TEm>n`bM5fP2!4D|HAuXQXxTuHTqNrWlt!I*7$ z+XxYJ$yf}9@lI_M$Idh*xS(#smYq8jk-WgF>@M^mHK~)k1UCt%6k9C8%VMjWo_u@2 z?21B&ywXt&rnJ_2?>@tgZmV_Mj*7ww)FavEw)%Q*e5k#>?hLyMNVcDVoU8|#nhJ8f zm&L`KH*9!1HfDoiUb(#z_iJB!Zq*c$j>71K$;j~Vhb+sElmT#+ITDV+%j$qA06BSg z=2+y9`+oWI1=NTUxjWghydw-vPp*0|T?xvR0cG?zZw!3fLC({w3bGaW?79!nA)t+M znjBQiy82|*uiL^7gj{>cvw~e5*BA+kvKZ&-5w(2Bbg=GUY5)Lf=>ilLV5waNGV&z2 zjV)dB)|;4_6TsYl1%Mcd^NNKLYt|y_O1t!8`8??H%W8!|qfAjinul)!k^v1Z-6 zIz*O9TeY3fBPy?K(8UFde)MKuv$N^NPJ)z&UD+eJ>14_cA#%aLW0FX8J%&xZj1Wyy zBalwz+8urU3g>7a1rgwzO*#?knQw!UjI*i4U&+ivNNx-&$|rX}Tm+8zkaZno!zzRk z$jjecvzgm%VNTi%SqZsoM(!(Nkd)X}VFocrdH9kUNEi*ek`mv+f9QG4N!U%TTe_a9 zU4malzCYzVj6ljo*H#6OJP(Lk2`M%)_bC3RjcevzONKrZJHjD?FA={7u(qZH zHAKIN!y^0-HcM3Psk-3X))4HvFl~r@TZzZG*1OJF86PnQdD(tsz;EiV0jf!ZJH$20 zA%?9?3B`9Hnu7u4K;lrAXf{7HE@0ht7rVxrR)Aeg5`9FEdsTIRB25~55MVOxMjImP zE_$m|ZtO)+zi&MX)?q(qLtKzmKPTM;!8CplAUZ24KEl&0T)Gs-YP7g8Pp}~5Ux|3Y z?0fc{#)z+?l9CU$VHnm$u(bi_h-5b;2>|Qg2hdJLK7PFI-gYi=7*c!3w@{7r1o6fb z0@VQ0B-@R6E}-NG4OPv9OavyFh~+ z1W4>f;^{_%Z~{>(hLJ+eGFJ?i576X9p(g52N zXaKz<;hV&%w-JuCG@M}&(qr|9*lm+9R3@7zMDVl-%xbb!iU7!PH!3QssnD&kc;3FZ z`ZX&nYhyDC-e!XJ1mlN|YHr41)SsK5zsG6~&b$FFoj_Q&9SjT%d&`v))ao!|G#~0O zXrD-GkONwAf;cKQpND#6@!W`9Kt@O5{5F6z_d-KMF;*SiCZO#$pH7@H_U%z#^VX|< zd|D2joA;JWhYGh{o?QxexlnL^BgPX=!d!34yXAZ^^b%D2{EX7$DT5 z&zuB#`3{N!vKiTXq(NrojR8?CG+E{C*tjcj_^nEqqv?v?LBggNq>Ob~A!wSPxR{^< zNe+YqyW#XQJ_#XR%91nZ66>k))h z%;&6h*m64FN5gZ*NM8t`28HGGIC1Il9R!C|Y$q9R%M|25*(r|Dh3TZxf@56Z1nlO` z(U-AL2yBP`zR($wqo%=^F@;hJ>}YBf1;lK{6WDkZY5^K3q|K0J$7&ZAv>nI3Bes}Y z3ri;5kK&m+)do!Zrlq#1oy;8Czh4}Djy0%W+Td`O>m>I;%+nD~6t<8uW#{0i?4{{X zp<0`WWH<()v;S|+ZFwB`_3kF=yuKqlUiI@~{CMhoLraS~iHH^ZRb!PX)fQD&sN}kv zJOG*jI|$$VW0Q&2=V=Jx|L7v$cwp1-H3_MQ7(noF5?7~6V+#x2BD)_-Eu{)=YgXRZ zJ1k<`rxclSx9DmVM2_Q5-&~X9Je>l^5P|1u=lj z+iU-DI7iR+?slep+@-VHGw{dAj$1UR#UN{m(6C)5OAnpLu#zci)|XA0O>Q=8zCLH=?v96hu;Ufshf)UY zXK9XHt*$Ma5?02AJFx3&;Q3-vi|m0Io6U@G29@BSh@rIMRV(~BRYgTbBZapoNr_*0 zCyC-saI@j`Naqr;YwZ{WexLX3NqJ9Ebw0?$Q`g(lBU!jIYRzUgzI@UWu!g?rIu1Sl z=Fauwa`o<^Ixc?~M@o~rY2(I@=R&!1tglK+1T?yV#**7ZX*sxqB?S>ZHNr5&K^$z+ zmR7Zc;gIE}1}&Z~0iP*)4*2j+>oLhI)lxSBewov8?mpq(`GChw)*4F17FSW=Pmwm;`uOX|+Pvd>}tO!2QI|p8cBT2HSW9;lz_CPe5>#NgBQ4 z5S))pPBuVpCf$BW3%Fq~XbR;~=Ue+_Us*lYj~iSHAT-xPvU48Kzq7YD1`!giHCqw! zr7=(%WXJDJGtmR^3*nNde#JK+Zsi~F8+M*XMsED#x0QV;z5)yg$3M}>CO$Gg4x1cB z;(ZjxOi+5vXjyMacD6}KuSajf-yiR`oS)(h**g!(J)<&oQ-@? z@v0R#XIF-bK#R@T2oc3hG0VcIv4#G+xG0>O9(YqUU{|EM%-SJfVFMO{FiZ&-JO1^d zTvyG9u1cuIk3)L3C(xqc1GaD)aw*cS2LtX1KpkLY3{FEuON(F&t<^Q~3m|ucCeFQO z=M=!@MS6o|{Ev$vx*{;rR~jt=_JAEh<@)vO$@Ww%{oLIh1y^DOpqoIlJFk6O7FA#2 zi7fy;-2@lBGJ~v1)~cr6CVC#NsAZuOPg~C;{sFU<*8p9S<^^e41|5tRnOguWGHyzF zh6Lf6m+(w<(ZohXqHyf&s7k;}a6F&K&(a;-w&YXjDiO6DG&)Hf3f0)we7cmd(!nm%$yZ-pjq$BJw~=syGESUP$T4j{=rM*` z;4d5C*=@d-t)HHrMsb!jGi{<@vgmY%*!@1N#GL=_C9gC5gR$<W$J~Aht8BA_~A|`t_@${wN{vXn+pDO?W literal 0 HcmV?d00001 diff --git a/docs/benchmark-results/tflops_comparison.png b/docs/benchmark-results/tflops_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..7f28d8d8cf120e045ec05afa13c68823a91d4e86 GIT binary patch literal 75345 zcmd?RXIzx$w*_i6#%L6h*hRns78FsWNLPsVU9LB_G%rut9zm!`&Fx+weGt`M*F{Cr?bzk-umC);ftFl z_y3=}k(-jg?b^k6ztY4j-GIgrl3A3zUwP@&!=qQMTQY=g9W5;_jYZcNhW@r+F4X@y z{=dZ~(2het_a;YzQdmqtz^}8jv(-TS;U82Nu@{}`_mmQp)HLIz-&MgM6$#}PJ zw#m7I;rAJ?-EQ*r((`E|cAZIIzF6+`?ak`Ut(Q)1G4xQDa2;*Rw{98FS9YVkzVjPa zWB?CttS{m2yRni9|1OJ9gM*3RzkgQ>$(1VO-**4P6TWz6OQyt8`yIZcB@-$%vp@g( zi&gW}x^)tgDkY_*nz*L0HAAUhX1QeGePn81U*Azy$b(b5TCDc`lSjF?Z%@1wf1!>4 zv!zT&aJF)*m_y$kxkp^-?;|BrU0CgWdIf3D5~-T0302%a`t$_l+G`x_+|p@dW%DXh z9@847?o)k(g9o>7-#+{Ody-vO@o-CK^wJzNb7P@dP0-<;Q6UAHlL~*eWEyFu>*gy> z%ufz|zQ*ZmoTQn0IU&auqxH3TST@;`vdUX0a2ch(=32dwYXb=ZkDCjJO`3 zWvLksi;4!PV5RWwZP6IfiDA9MfwP_!vQnL{(v(12GlIOzWU`wI&T zPME2oqQNNoaC7QOed*kJ#g3L}pQWKJ|07x%Un%Bz@i=C7aJhvGx>tXq!x)gIvGrF{ET=|0aYocj0|dx~i_ zRZRSV&(f^L?c28xA3Ah?baa$YGbL!WXprmSjuU4uUc9*T*vw3ikM*b5cZb{aQ^|6` ztY|-~mV3C1^7Z3Oam;OnS!86S@QtrmnDZyDzWIXZ)Ugsku(-E#4gUXjO}bv82#=1D zH6!grovO^(CXZ3PKBx#ka(_3`Y#o;r=+{UjP)bAD4?3}J&)Mz|wU%tpN zc9WlxC@{0hEHd(%(hM%hz8EVfXKET}uxMy#s58grfd}QZtL>`}%q@xDepLI4u1kgQ zCf~roz~py$zj>km`ppMIYi@XWbQtuEpRK&Nl~q8~FLIDOefs9N_Yz|}d>>DFw7m+5 z^oWiyP|bB8lX9Q5tY~f)rWcDl4+kCW2r-oFIft#5VW87tTQ@e}!6oHxI^_8?JG^y0;-SfwyQMh>3U#eaG~){mY2rGoX z2c);QYAZ*IH`~qK{$)Ehey1cQb3cV8fW_Wba#%f~ueNxG)nPHLlZ^dde69D-snD^G z!c3l_M>0#t`CF1M?mu_#oKCIO!q*D7?t+r9mD`OD@$#OdB2pz*ad{l2k8_CFyv>rY z#@L;ZxBWg|D{#_APi1y)j&IMN)7{K}?z2?V?J-s=Y0$&E(wb%L&P*>1 z3JNj}A7!yvUw0h+IofkvA=u+G-_I&4DtrdTdHVYLBLf(ssf3pE2M;DH)gRW<(o%P; zI6m1Is5<=W_Ad!TycdG9=9TvC+c!CqTW?;x6nIc|IK8mDCQs?iJfe~Cm48;z^KPwK zXSMMC+ib!6>!06H+H%YiDDU6DAD@^wseE#yEEY3;w2L<3v`Dg5@KGI;@BvI_@nv~A zxfBDMi_Se^#LN-jHvdPDT1WzmlyGf-GBY`;fiX%SE1wO!t_!p;z4f`|lqYkuh$!p8Y3H{t*}Bs)&fl<*QZx!gb{sC?v_y{<+<} z_dO8^XXgU=(-*OO8BtOm8X5Xx^o2AYPg5vT=jjb#c1o!onb7Ok@kE=y#kvP$`aQ<{oS>uZ{+(VUDbJM z!L9Q3>qCNqmt+>c{>7)2VH1hm+I{2mLrE!fZ-n1SF~>JTwiui+w!0w)&P+zLmXTNd z;{42!y?f6f3Kx4X7Eh0~#RBE1w^iQXE|C_i5-C21;O~UbHYj!1%+Ai9!$gg6Wm)i^ zc`0&NWU=;Q{P^D8yA9Z2 z&a>lY{07Auh%;}Q0~RFuyG8vRX22=#^x<9DN$0sq3MOp0Fq>z0l*Mn8zNxwVrjdPXM^~{ zNL|@48_vpAtItY}ykO0o#j~|{?pMxj z*tAKQpX0aRo+DfiS09YlSe%>Ee13K<0_9seU4843%Nl-coI-Bnro0CaMg;$|Y15NR zlZIkvrV@RueA$~m2Y5=QbR?=o668=_RV6PiElpsK_iTTNQiu*fk^A&Wa*AeZ${%-s zA0HoYjCAcz{$b5eUs|)TvW4tCVe)sf!G5_1ANp%Urm)CT@LmT;M|CXFlLq#3jnn7a zt%3F0%e-h@UXvG}95c}M_V%`-Cp@GWv0Afb0b!}-<>i1OO>qjFF{O0lMa0Js5%y)t zn9yY06o!N(UTk)aOhc(DR)9!O{CergH~PV z-yEUPF_5QaIAgk30jrH};M!#t?(*%^Ir==3xz|LUsC)12?c`Q9GxT*3tjV($IrKNB zXdWpA7AtldqED=I3g#%6iKr+MlAHhzM`tdJV#dYEPNqk)Bmu4Pdj zVaW!xf@E7icJ6{Bj*D~7NFTbjH{9HGW&(|OgiCo8aCwZJ&`i-#0M2o6b=A7PVXMYnfHe&NEsGK5f? z!)!c&wfm>L2bJhH9nnu5dm3DCPL_XenRd^$>k_9ipI4e(#C}rlPf&>r?=9N9_2F2m zS#59_SAy$s(_#Af`t|F>Pg?j-l>(V?d44^^obT&1T|}(YG4z?c`dVb~7&EiHVEVz6 zj;qN?=Vi=uIsF`mHRi_zPQ~ z9J+WGX-@6hwJi6^FUBo_%HdDDH^&*Q>-L&4pig3uTk?o}M}RUML?#y2|UqPRjrF)22(P5z^?Sn3Csm`$JWF))a+WI+kbq)IS;lQQmVD zsc)}07k~Db=o=Y1I`k$#8~HE6(37WX(9|wYRV0YV$H#{`6N3>pywSUs?s>nGYHZSk zpqgG~6(M5Vlt-aU+FHAaU5Ik;f1))m<#r*bClipC;fP5SC)=G;W}D-LMUrRJCb)@P zTA$TXs5R6WpIc(r!K~c>NHT_vQ~Z2!!_{ZDuRbo!PAK24MLY_ZaLp*NX^Sx|b=UD2 znZ&S#p13;F9@XGnhQyR$`t9lT@%5;MJG7r~;`*5s#Rmb&1A>NXTUuJQvCO)12;zUD z2Y`^1%&*Z%ULE!Tp>&d!dlL&o^kkF>dy z?W#jYz&~&BglVrrsna%0& z2gr}8!5iKQ^-S-`mbz>!Q_)-D7ctOb=h$EKBzIA#J)@#v6Q~^&=84JgCWq+C6PZTk zQSpmhJ9b#&NviT}^!1-MWEodP9Al1+*|%ium-pX+gj(-ZBr*D;qBY+-q${VW$h^(l zPB~oY1QMCil|dJC>4mSd>YQVxGf@U%c$$bBN$fg?n?T9ev%%7}<;zPFdaRtTo`Nce zUGjt3EVrv3-M)UfvBbEz*2pRjQM-~M_b8twH)fBqu`wfOVxTTpWd`7kltb~h)puCm zu6lUx^fjKuos6~z>CAIR&hIpk*`40gU_IVft-}|RaLCJO(~jf+dkxkM4{N~|F{qE`SDn3V1%W?oFU>Arwc?fl$a`;%;`Zf*pjSgd(D?A(Stiu%=$|J-^5q-kHA@$#>A5yPm*a*(VR zr&`*qd@h?#aUHTP?-`Lee&E2HFHOmGWhS#VJN7qDPFr1(#F-xpT5dfsENEGnw2_gQ zuQCZou2*k!pBmm-8_a(a>A*xI^>SU(rTBZ-QLk|TO5gpBgCnBr5s%_-FS|2m&M?Z? zvF*0xxAtJYua+{hLzd=9y__ys6Cz-5g-Kepr{xUEs0s)O#9~X4TL#mlNgnPEQY2-! zlzUUJdxl}D)eU#|aK>X7fhSa|$pcOahEOK93h)UM(naJT+2Y^!bBQ^;9&N4v5+1Hs zP-TL}_mG4bukOr}@e5^XJ&%EE!x>$r9|NdzF~A?@v%XeA=)} z=$`T|58WmP%-lK*eHNYRIbn`xruSb6K1zv;i!*hT0?v7{*sz8$g1brjL4fOH zv24S+I62b~MtfcYB_j|QtUyXd5u4U4;RAhrJnZc3*nt#WPbZ_+B>OF}$kNhM0v9_E z1!K8O#3ppg3n7URAf(qSZ)0)0D_!pcJpE7|c#yYiM+^_ErO&N*qFzSJ!6B#11%sc8y}W^)T?>l~ z<>83C{tjFh;~Q?=g~&jvA5SkYWhn{3JF71rp0^h{Ch}Cbwdwru!w(6DYvMSn=W5H5>e-3GsU~&RFn5noNw=J(g$Y%4F{}qbDRKJMNuX$} zmW)W3v2NS&(a)c+iobdPR4kU{k-_@t`n2BZrv9QNGH7<@v z;S$1dQ_Q*h`soKtLCYB+bTACLm`%yC_^wO59I)0MTQgU45U5Le%+^EG$rNOXxcg+1 zKetQYxL=Q7B9~3O~sU+Kuf>}W)qswEra@_gHno~o- z<(P&K3=dnmvX_`v|2{R+c9LG4~eh(v^Tf~<9s5e+?9!ek0usxErFq5Mqu@gXs3c(J9xqB*Thkom(@>?7$*SFSw6 zLfMb$%cs!g2H!{KzRyIs`H5|JD1~Nn^=dv)8mEl6K9XZIAb$qC^!L*ss~t`?g=c1F zE-jAvL?k$mbsKqYPJ>1l?~(gZgOME)6$C8L7D-F)_735{}np^l^ar3WfnE?&>^2bf8NT?or%~=isOCz z&Z36Q3ci2m&Ib^e8{!`V(>D`T8$OLP>j)p;g@%R(;ApCW$^G>9d~568SGUEz=JKe4 zfm;!P?r#?u1~6>TzUnu0>njj=nk?&Gw2Topj25z-@>IOCsz-c}Rsr@_qSyTN;bX^C z%9rL+ht4pD=!q!ceRO&<4NG-F8t0b@Xk|RToM}i{BSfz-u2Up3AfvGPJyy)bV~aa| z3CPediUG3h;8pAkLM7upH*^pyU3q?HOq91fp4EXc%qK3cdr&n>i1iu77R|2&mu>dl z#IXgpi$!cu?4+QTX{hPsl=tQ#hXudI4qxx@z3UV%VWASrfcAW`F(J{%%>32g>+kvd zGcZdUpx&8C&6!8QC7|wUMr8`(AsIM1ygkQhsL^I5Kj!e_aYNL51Qe!C5GpraJNwn> z-aF(bftOf4rmf|>)F@}yrqX~6-0g6NtF*agRBDZu1w^<1}y$?=HGlBd&3$H z3uLdEpPSp;e<9?U?XI5^uqSoFKLq>MT!^-OB4XMmv%GM%R2t=C>13l~o1M|+xO^E_ zuTy)#J%j_4`%$A1e-dC(aNklUJyA94;DH2Q+xTow5fK`s zp5{bVZsxo|+4r;1pAzc#ExHWcMs7Oy#~)unoPqc_UdL>R^((OJ3Qzw#9qOO<=GI0` zqkz0EII*j59yB0*g&OZfvZOs zk(zG3%6R>9VG>wbXR{aKvK;!}dKy`nH=4Xu3^}HAb>GmNY-!WCzam~s>B$zwhXe)1 zfzcCJn)ZgeA>=rqq`prHz?JgiA81-u*jNekQ$kQONHxNvfP7s>p=x>Oa$a)&=jIP= zfmP#lLM>qGlA%p7UiSqBZK1R4AOR(VW*VUUnaR_IJ)SUwtLJ;qym|fll#)uFO9)1gWRKueMeXp zFfnJq2NncBynDBrZg?BjREoq{i8^)igDm@D6f#0GUT4qGah;FjC|?D{J#Ut+jS?;m z{7I<0CZgpd16AoEH+OdeT~Z87i{m)hUo?j<-}#MUajr`Xwg zYOKcyG|`3)8yc@Zk}?mE_NMVB1L*-|wS4WZXvUv3CtZ}?%BP`*AV2wX9d~HWK!0{? zj+x@7-KW&ZTQ_bL;*B{kx>T_U$~^$|H(*(&wR$*M8#Zr7Tw$>=bYJ5%-t4<1y%bQ_Ryv~1q4bao)Z zgkoR1kWy7oXt4~;K`j!_4Wpq@`QCubF zCq9Mt%f$Al7ua_2biFVikQex)p!9Y;@27sba__}7_I5A{t`B#MHMUFWK~S}CpI0*^ zpl6$a;nV*5kGQIP-(&eLTeq4;V%Izpv&qGa(1}nMhyJ;><{k*@BF8~O2Gh9SVJys2 z=nN!wi$&@4nOIFOrGIZdS5Uge1&d3dd?5#6qJLl@A;i$VI^$qX9@pjg&aTZ{c;usv z0sl$84cV5EpCF7FZdl&Us2L|p4KTL}-A+g%Py4$w2{rC+P*-#0;Ci)h-@fUh<^q@f zDy6DgzuPNeXR{B|L!^`kBWPntH%ck8#~>33S9J`W z*VYbAUYCVW&bRL|?C$O+5MkRfeapfJw!f$EC)fnx+-i=C8^}Vpz$UC$u%ODrylMyb z@jdPpzrvoOH);mZY%c=h5atGP#{fx)4tTuvx8ICsem$02{&Sr*#zVN&zNURQCb~D8yB`}dMgC^%^_iC$j>dC zwP~QuB08U$Ug`4v@ubvztNZs}zpXoVgwh8rhOw=LN(;L(uwufyJC+icz*s4b8KE8$ zw(Ha;UIO3?=kH&Q5xG<#vA8T(M}|~=sw?<>YaQ3|DO1N>){Cw2+;3`K?qtFaK&0he zGTtSWIXsLr0#6c>efWv1zDwUfvqzq~{>pvvypO)(K>g80+1BshZwTAAYaneGgIYpX z7XZ!PZP%Ix1VS^-N3;lkzX|n$dQSh7BU5We zIjux+-hVfWhK&^;hzbJop2@yGj-ymx8q>=bXasR*6LbbW@P2WyPmryDZ|jla_z=UK zX|9NYret*&g?#YjjmRvqz?{`4*_M#ah>k}Dwt)zH-P%ReWh4kGtv|E8G*h0gU;)Ti z*O(Q#mc=UUVaRiPInf;oN~O&Q&8(q2khPp%(OF>Kq5=>~91eu{V=k&fZ*ZR(wcW)g zTG-jyiGaE9iGWeJkQ5?mxTHnLeW$tJpEzeh*rKBPg?>B)NU-p}x=ma65%b_*e*wey z$#2E>a-J&ZzndcZK-YYO$h=q`<_~e&>Paf}hr|ZBx!w zUb+w+8W6br?%{(6@2(lD_`a#(>sZ%CV%ki>UAcnt)vD!PxY1Nh!4o%eS)ASRfTUeRsWLqeWL zMk-g|1MY1hMkdp)oghKfriPoR-f-B5j?&fD)s@#PQv@Xv5odT*RaNPElY^u>SE|_L z3NVE#o5(@nAQIsv=?U7B@3ZV}?a)D_c<`{O1kyZE3g;F+pMBlaV+0%^sGQpsGJE*M ziHn48JrE9zzc?tfd=XI%+JF2stAqJawEH-JpjL%hLb6t76qu59JW?`d9n4V>;tDBz zykJ*I=q=-yhVPUMlF3KB>8oju-x1wLvw%ruF4W&QcfXcK+d}BE4tD{?`$n&!` z3%!4`GVK-@7wyG0{)%;uE7B3zU1S_#8Pqj9)W&rmkI)Fqb;&~Y|aWO?klN|-Hi z*U_iJ{CWupP$b0=07vmrjg~g(@tktMltf8TErAoz#QjQjnk=VeSr<`_DiBRFblY=U%N& zMU1+->!*_^F~OH0U?|Z6kSQ%5s)j47fkl+`V)G)i2{|tQ;#e~gx8{B{@Upq&O$skR#qudUeh}CU#aX;lK~*kY`uJ-mhbag3>vAo zkgxVHm}#f1|Jm)js+!vFsSBnak%eB-jEPHUzQEE17<33l&lI9ck5d?G9lqnok9TBd zWo6mqr=zY&n4y&JFoP>2u&YejN2P7rQzKD@lP2C6twpg7u-n)Z63ml_^$cw`G-C+j z;7R@v#?=^BR*+jA^g(!pvT1f?5fOP_P~Q$Uf!LgqtM3-@)@Tf**T_2JpAsl__~%0K5k zb6!mwvw(n=%B5X}`V=nqCAmiAtuq6_6q=GlocPwd0TT9p z76>NAXL$+4)FmthCE0PLe}1}A%J%q)*gfxAw6QY1O}m7$4(xmfpuCMP)p_UJAmxqw1Sr(E@Z|`dlCAI`7P?A>KR|-FEY!sss+q4_o)=XNtetrqojaVmvdfN^ZOpkWy!C#eM2IE!^kU-=?R<8C@LICE^RHX4#B4}y? z&$bzcux6cMnS<wIn4=sFa4kTLnXuJLGC3maHw<^G^i(t< z%PO^`5Wb7I7qziGKzS4Vg`|+7mvt7}M^nDgV@@rYhC_L6-*iv|JNL<9^#ire8@VL4 zNoj^qEoAyOK*`Uy&KkP^u9M~u3|`62aor?Is+hqGQ|aWFh6@S`4S0Y_MCQTjH7#j; zRy|t$x_MbkUIXuT(tnkqc%$B?S6sKS7~JSlo5NZZl&JVFHTNZUxU(p?J&zI#%hBXA5(YQDe&%8gPpombtz+GJ}_!T@7RYkB={63ZAo_cQXxs8%{Q*@kkEWQh8?+nveJ z>MT;nXj1Jw+0%^54M}a(B}kE)hzk?34M&c&QI2weHv$716>sPP@)>3XiF}*3OUtwM zK53;92e3|nX@;AU1M`)s#A4WTKQ#9Fy!33}=C12ioFuQVh+`!&l0ng|tWq*9FeN?5 z6cdY6YGzrE;;J$5d~uyUo6k?rTt{rb-dbyDS5#cwKK1%LK$uSEFHnZ^6?5PdvyLj< ze-e}Nt}yEQ$y|UtOJKW*CHk1$4k60`=-i?9%=C+~zK8dcw-V?%mD^V8QAk8*x}g8>p$?z1N#TZ! zb^7?v4@_ZUkn)=kD1E@Hv*(*<=-IdNekF#PVOS@H-D)Z-WKr4FO<&6y^1DAhG2z(C z!AVO06T}yMeBMPkVTSNq9M?B6fD{$Ior}v(cg1Efi^O@Nt}tCf)X;s1mp1qG z>C=s%M`fwT+1g4Ix`lSO2GhXbM3e6tCn6NobM{lUTVc28yy{8!QvFM{Gi+ts9C_QAI)!` z{{Gn1)(1Yh^g-o0;w*ZA*r_gsM3Q%85I*S{&d`ezxemKI-qd9}Qtma*pybX~n!*-; zo1p~ssANVcX=5*F#6KSfFBciF^$r7`0!ylG*BeJgQMPtFEAkBN)zCt#sJn>(U8Rzk40KoTKD$O z5s9zaRb06pcF-{WdJbdz3Gqinc&x6t=K0Xc)wR&uB~>f4AXv(6JZ{KQ*+(XtF)%rS zalJ|}CVIR^ns$Somw(xe&6wB5&jUSEqJdAwVi5MkCD|Mu>O(XO2J`&<zb6!Q|ePe;ylzU`{V}lEK zySE{zED>-%E_AQ3kSG{+{pNcbX6{y+oPdO2r6oT%Q9T_BqnBWv4@4fN+OjEz9wP@f zTRBMSoZY#Skd|GTQYZ=&TGq-7$fJ^Z2u)3wd1JCce!_eZZ9pFj&ajgXq5vLnpqGfy zO#$Wv14AI0kjd*iuPC8wpiG>YI<~iK2iqZOX`(6Zl5)?Z7|t2JLbW`MvY{aLB&(%x z%}*QEi1Wk12UK!D@n*y1Ck*uiqTZ|C4;RIRorg7OQBvSaU?^m{*IbbKu_8T<)GEZJ zq!1rUZOJajX#pF8njT50O=Kx{7#r0D^QQu5^d%|J z?v8zy=BR*dSOhwZ~2oJGC0zYIkPmBOG(Ja){Af*$IqKJ&4 z6RSY`s5|@+AYp}Q;{{dzre()Yxr^mMW>-HN+IFo1TjfyHN(0@^I1X6>SYwpyB_|%> z-VWmP!5~>f9wy>5R*GJ$avm|&Vh0PM{Q@_ph+*FF^UrD!m?r^1h}$}0e8?{-AdX~^ zIrul5LA??4VRv^oxlz(z28aAe0+StjDH}Qzly<8BdD(Dca9r}wDoXJV;TLdtn3u3WEg^Etx$a%6k$Gz;t3N(1Zfxqr3(aNjo-r?UOwWxuErkv zR1KX@4|{g97?`O2kP&O3Tf#F%y3pok#xhalS;A@T7Ikxw$xOuz48x?a4k-o$ ze%RjL9%}(kOfCFb)3Y;wO@eV__AD_Wn#x?dqkV8c-!SzF9pP{UY6G|UA1>M z9m6ySCWcsg(Qz?Kd@xvo{a{200Tw~&=2tmXr}9GlD-&k)gC;GJ1f71 zj5wj?+{%{7Xc5)~GY9LR00ISRX+&~dX~{z;f*0suwew%aXnAX5n6$uK-Zh_#m5^=d%?VrT}LqVi~_ zX&RFxr6!*kuZwPY5QYgfbrQ691u9CY^nE?DhjGab zTvDWU_+v1Y+WT};!sF4W}q>_yK?yLzz?f#p1n+Sv88{I^!FWGdyk5g zsl6O^>iWL*9Ov}s>aDS+cvQROPHneWC16_EEd^^~u6%jE{H)z?xYE6S?xZLA^m7GB zKj>-7(JzApjbvbz)=6$m2FNi* zCpW>LONvin?rR0RFbrX5?TE?o-o6!$Qq7WB#;~GHixyBgH)GSPuhujtE8go{<#vbb z*Q=FJ+FOe)Zp3^3ec5f7RKRon!(W945K@AXcm+C$*mej7f~o~05d%|M5}J662ctYT z(31b_&8rT5`tpD6!|vB;-SpqTCtuurb@uBvbqwypoJwdCvQNY~0G75GAXpFi979O4+F}x0+I;9PEfJ6bvm8RL+LUx^}2zA?* z5<>3SmkXyRxS?SP9Y^PgffqE3Q{UU)NEcJcF@r`7W8dg#2F7L>Nz~Q99a^)F?aLD_!;3&~N)x1a#QIYc z@=PY_Yu8D{d=S7(P!bg#kRy`G+i|(mFOg>NlV3OwHz~lh@IJ)(R|))LRbdpWiFBP@ zIqD=R#8h;PFx&vI=DvS^q*?0j491iRqit=V^Y0R?PJfd{pxL8}`W-+-xMCK{Vh9gF zvku(-ZUu%Yq!FRdQ=QO;O`9d&4LnfVI<6b13Hk!QBhwEsVIKZ&>&o#CF#xl~?TPw0 z2I3GIUIMJpqY6t+y^F^|m8XY50ONXA;5-;ZS{eGJO@g$E5v+#$L@v98T3_zLj^{~9 zNj90tn5zyAQ>oM@G)a)ZLBw?{%eapThGRXShlfYoFAe_t@GkVwm_$aQ57vUo>jF-> zGM=2YamcXo&BhQ2m-UkA60n6*NhwNmL+1$t4rTHU>Er@ms|LCyU$GlSti{|fRLpEL z+PCjLctCm>`|%uPl|y-1Sr|xQ7DbWdfCbo8;?a~o42x<0Pgh`KwQA2xX5*41Pf5-S z0Q*lQJmLWYK1%^*Mh0Wrg`^X;*Ir*=u4{*$<^+=|QA`Jlu1oEsnktb+Q zL*&YQ2M)+#_cX%)LMD^*)V0$P-C95`r`bB1_Pu?KO7}6*D8Prv%KPtb0Idd^G#+Oi zSi(9dYI_wz94s=47!~^0PA0N zlX<$A0X#!2rdqI&3SIf<=KnnDEV42@c9VeFOu|1Ba}X@4_^ct2IqLBIAkoJonfA4b z-{oBO?8aW~8?`IM5kOkTsFRSX`xGKE@}w3a7!>xLC;WPsmvO|#NDVPB;Xy;tc%7OF zyh>^m9ttS6of(OME5x#cqRu*!0UMqaKovC(;lRB8J$PSYp(83Q>VQNrZpoPH1VOZZ z1n_{qc4S;{rhlKEO@BeM_WD=vRX10IOL~q1iELS-25eT9eEhkXI1>Em zdWClBumCh}v5DIbnGgMvDDjoaQp+$Zx8oEwpu(#1Xk*JPgvpw!swz8o{CW!hsG+xy z_wPK$v3YYWwz57p&Lvbm`Eb1h`_^RW>wblg=J%-~x;)(FI~nK&Th+-{HaC34w)4eB zw4K~rvE61y|GWLXb`#gxG5r82sK8J>U>-Q}6VRV@n6$V2{=122iP7cX5SLa`7Y+Pd z3Lc$AcT%M&hY9j(O`$+j#Wq3zGM<+Nb8u;>XXCK~#r4hM2qQJwF~gEj<5EQJnrl(4 zOp!(qz~`yZV1j#d5Gu>bazL$WJ=$5sR&^o7Zeg`)=Zov4Q}{nV<@2bAiP?iF!sxZ@ z6~}|!)I&F)1+n7{`%8>|K#e?Z#)(7=Y{%;D4>4lk1cGEZqM2lKch?)0dmE4vlY~I5 zM@tlyDKMWEFb^U-9siqq)gB|_5oecK$RaMV=5#%+^2O;0be}kC4#Gf{Z(Q*c8AWuo zF>vQZe5k`5?`TjF;(kLNZitv@-$?tk;n`v>ZF z?|GIe4)&p7s#a7nGpvsc;&TM^(q7`03nND?_UsUj)EPzt3qB(-)0UiSw4nwA$fH0+ zjMZz_C}*N*yg)^!Gm9+1Hqg%`fCW!I1sxswFS*m5)1*IDLZJ~{`M7B$W?5|^0B29U z-3}SSeI&ao4;~WIAxw6}l`B`u(lc?yfjdqY;Zd+J`Q}UMM!PnT0#*^j$S%%;<9lB| zHg5f{cb*I1s&-*WltZqAfFYXj#WzOJe1`xl2 zuu&w!kE)ZH=xpQxR75ia%m9}+QEH0-933oRbWBD@=Hbqh2NYi_xsyQ1142MVCY<2D zEia%+)E?jHmkEJGk-Bnp!M0ty!Y!Bw{>59I6xnGQ`pzS3U(+^6yrRazq1QiDOak%u ze*p|u=p<};X#Djibm1uFW)h)bT>y?dG9SKm%d zm4imh(LPE{!XmcqJVS4^`&Ff;f-uH7zDAx90i-J!i;Wmrz0jEK*6sfQc(WBN0K9v` zl>wyYhVrE;0g`)&WgSaE6J`>E+i-uH0OIg}7+hIdzjhY?hMDx2wY7=_v*T4@-;)91 zFcqPXI5D6S#p2eqsgvOEQVGy3^9EW!#I+JhlaZg8J)o9a5T8zZf)^03Z|QkqY7$ypFnws*&QFfH{|O&IQ2{q@#%Vld!i?<&H_(%bPd;83yHG7(U)cv}YVva373x zB`1%d(pQ3AX$~TQ(|Df(3DEYXQ^D}L2w;m8o$gqfiH!f638ejej~P2+bjLfJkin64 zrLnPr1ys=0aW`K+zC;d?xAL#b;~QhMILPMwpV#eM1gHO>UjJX8Zq`{q(h1_Y8ro#?nhQ7ri){5RTwEE5%hn^U*E$v)=^@OAiib*zspe2(F-D| z?0aD|nu$oS16j#v*Lw!%3EE@T@q~g;Xr8k^$1Z_JUBJ(&Yhpp>GB22DQRv4 zW9pdC|C%rrhx|9}vwP9Cl0%-Is<9n8&8~6oG!9ik(%L}VxpGfLZL+bkDa*Kuibu@Q z1)MXwlISF*VuP7#j}k`+gl@PBO_iglageVOrsQzpU7~jS8Jm;tkYn=Hc5nTo^BQfX z{yTj=sH64Pk16~1u(bBT49VXYjK5g5HWBsW?XJH!l0#t#o`a8-@Zn~JwEamW>t_>6H#^-WAWkX|v7OsRp=*KwLpc3FQXa2g*%IV!s~jHvdy3xqLBh-uc8TCr zXar%w57Fp_&BUvKE@HOYIAs~so82PB;I;DVTkrdoO!v)Y#DNfW>#5)N^Gii|7v=YX z=Em#+7kN$l4MGNW%jV62@gk;8bzf0sZh?yk9u)IrF1-1pO8}L+55V)49XY$?X2c_E500P61LV$7n4N4)H z2J?_(IhI9CWs--Jp@k4 zRWJGX;&dX?y?_m2EXvyf=x4oH`Gj>OeI^OvvHJK%@QtXfJdpUg_sIr z{NkPt#L9aIP1bc@fKSvJg;k`a+j#H;av?i2JT00eg{swG@NO|Bn-D)e*NOzYg_~Od z_N=u({`eNFMMb6WpIA#$lK}1p1q3{UK?$gFn@iw^KM4N}84J; zdWVOc0e8FY5A4E?ze;?({1|7|A+8YV7n&!a_$3e)8)!PXXeBx33`l^?B$4bOP2KL| z`RDnc{+uha+M!pN5gr97(V%~i^f6Y2;eZD%yxj?=Iy8uGx%*=ZZrciVC3`FB+SahjiVPHag0@jNh075!3aM}odQg?k3_n~uUxIKjh>Huy&!-Rqw;%KIM*;6$6A}-IR2(9pnopgd}*9_Ry74l)(*7=j3ZPw25%0 zB72_OM?lmewv82P_)wn5lDGBb&Q$(qp?y)F^M=1_ztZOl(Eq&t z?;juPW{O=-P!G)|-MJk>2Zgv0lF(UeQZ(F*xxEqj;Uyb=$TC zh$qn3bdCnwG&camF&(hP-)D6KjZEQyxFk>jXe7Q2oeNW$*J(#|fT;60bQ3PE$L{fQ zaeeMIghsnkEkzG#T1--Zq0~qMu`>9wn*_moela^l|E*Hav1hNjcO>MM-Dwr#XTXU( zHQ&Y|nvz@z)(?Tkx1V%kL(w_}X8=GBIi3af8$QHOYm{lbBwTF#JMf_ade|~njF>0y zqolN2l}{wAJueZl*T-HN)Nj!&v@>vKwQEv?o<1e6Ov{<|bKyA!lH7?|0{%B_~g! zO)MIMk5Q{Kw&v**q~%%~g=x-8tR>tnO0K8V|5yJGM+;ajh zhm4;8$*}EK8rfswx8P8FK#c$`tSBuQEN~*$RVd9k6KtZXg`D?}aZ&bw@|cF;dTVpn27zK2j6? zu)%?fque5WPn!C+@Vm3*P5hQumiB35{K??^sOG%sM>UnnUp86>KMhZ@@YYcp`D*l< zZ8rAH?r&`{QL4cx;o%ODOnR}>&DO!WiH>P`97k7BLI^)$jrlsJmi$P3}N~*lti@PZABi0q52$1 z#q8A2KsMy$Cjcu=*te5#P!W;pD!Al`<6R282*aI}o22vS@U5N(?LJD^k60_D&7v7i zPdq?%#Oy(uN)bnPGp2xsQ*eyN__uG5!O5aIDY?gcp8ppQLgfWK)qmRGH=^NWxHjP3 zxslTVyV$gj*B$sAG8g3V{WZ*XKWRcvA@wb)H)6#RCjE7%YzpbttYcvU`l zag0`qohs7#00ECMkqip@1e;JCkr<1*iZI3vq{bpW!{n5r!(Z+I6=HK>2X7&)ZD!eA z3K|Cwpl4$dyEefYBc+0YWguxaW6$E8m(#$K?kE(oqk~h8N^s_l64oCz4hF?!Oi4H_ z#l)ObGmRKW91)}~9!5JU93()98qzU|lrr#wfWm)R4_a@nEc&+_OxBtB#hiGr_`_Xr zmiGn9k_haS_1#!RnMGT^B30?&`CSl9NnwE|)erx69xh7Slaoc`5V^H!jmj!243wq$ z*Lu$q`rZiQZN5Hfo`@6&Vev-V z)FBcOzX4JCB4^sIGb!-DgIx+H&BD8FJ3wnrLGxxJ+6Fttdi24Bf1db1QN9qBUP1Kt z`En5LQWNh^5@e*WZ`b*z98Spp674Zk$$%GPE4cBR#2%MDE9G;}pBRsqk4>JOo*MI6 zCO&``gjhHWf+=l)*{InxsgPOvvXoF8l4bzJ94y5Yz*)9La>4{Ud%*;KQf6XQD&@f$ zpc=)2scAS~KuqjQj0g@v_T{N~@r`f8Mp;1~0GIw@t>57+qQ)VYSRt1rxJ5-p5%!Ec zIsf3mzyS<8b6yCP!eh7uQIf}_i}(VpQU?^!VCT~B<>i%5Cr4kQjr|LZpIWH-mCdls zZIwfhpTS`smyq^(6c9uibM%^o^dr2ymE!!Y9w1?&iYK^7qX`;@)w7t@EH=$Wj3_ZU zkTZ19(h@v_vn7sXmc)Rr8HNU-4nZL|_x{8=JQ}hj`d+{m{U7YTXH=Bwwl#{{w!00` zL1n85+LnSKqNpI*Zre~GK|v5nCPcClB^KSb(rva7P@;jHC1(tc2nZ-Sqmm?vl5^d; z3f=qcbI!i^e&-wCk2}V#F&y1Pp{m~ZeV+BKHRoJ&z5kf5>O~oB=af(?VLkb~_da?C z==1o>Ymbr40G4zpR)V<};VUVpOIBtXnaDYxc>|lJX%}RevbG&p39qb}_bro_HmIhr z0P_hK@ck5p?M^Ma6U`?cQ8Nu;4rI>Kv->9YiZV6H?mYKkc+MXTV4=#ieMpjlTAs`T zN)7_WL^?g^(Y!|&*5z;b6~WnH0#b%$sMY|(v?Nb~fQw<7+&<{*@Zi*TZ3ceaCLX1~ zpQ0PO#0zBz6{R%`M>5jV6I?yqK`JvGPchK`MIkLSA7C@!w9q`^2$qggUfQHk%f3cL z3c;u7&GXz2FFjwoR>J|(pP7@C3UtGrR?BF;v^XHArbi*~WH6yB4RZiKZ3=%02YZ5P z*)~-KZN71M^>7~_Hn2K-hF^{}1=hLaVYCzw8jhayM369pcAwxkgWj`%lzLj|Kt=XM zItf!CP-ZQ;IVcAEPI#goX0lN$n@+EHamG@UJ^NxrXkeI6}mNeV6ylJ&earq9FhgW+-?sR`WJ2e!ksQK@&Rlur>Zj1umzf zJT_0m;{L=VR@s;5UfGlQey@K8$82pDDA4sxNaCvV))Om8mxP!VPqH@@ z#e}>QQU(>e2EHcsQsS)Gguuuw|NWaEsDUIl+@da#l(wWcC(lbPc9eRkp=t5jyM83x zE^$?`d=8Es^BmKhjTw-whKKgeL!RA}oXmpVq-nhrljJmZtx2l>j*wRXxTj=sMJhys z&2e#0Z~FPc0{urQTSdo7!6xFRpCIY@g73)j~ zmC!I2eO%E46;yE?{(e6(^pUR$CufqlUKnsGf3og;jIdn;Td90rF z#Y>lnT=TYDCsqc%1)HEO&*3oDctG!SEGR&ft4wBN-9Vws?-Q=pZP+c8r<%dXM#S3gXC}!d|?1@v|ir{%6Iw*$tO-&)36kV_Ti# z0jXk$T3}mFd8Sqaa(}EQ6V(WI2-;Bgv!VGh)-i%w6@6*zpkpWT^f4*o@7Y$1W2K>w z#S!lUA=MrNQ$d$(@m_^q*QNooaze;EBduVm3pXIQg9o+q~c-h#7cTLAxT zgV7h5hl_(ytqT|w(aB^N+TtJs-K;$V=$VyP;UIDr^gnlZKEn768`rA^K0Rirk)v{XP2(%FA+9714Op~|} z#LQU8!;i4ry1F94$Wy!cD;Pq=lKT}BqC*D`T%`t9U>;ovZ<-)~2JMCH;Icr}6C{({ zs23bz=P)upVk=RzcMj9Pfrz`gfYTW)#ZnknjJ-Dm)&3-(@qDV!zi8E+rp=H`p z7K|RiElg?2uRIs8_>l>E{_JrnXqI-dZ~OV_*QzEI^`!PTW$7RngkKPBHi?fSyM5|* zg;fM25?R>ooClW^#tp8DcOpOo~R2ScAHYOO28qR!wc>p#(e$BXqB5%esu{|0FG;xlHtSO~ zd?Q96%`}~_RB!}U=8G34AD#d?sEjeT4ODPU-QwpPxv*XA%wWI&AFD6lk9p49xWGhc z5;^uMhS;OsJ*P|TK3KfIvbsno;-0;~^bc}Nch6rY*O;iFvFCqKu1ll(PH}2=UX~JY z{!0GO-_ng-`W+;O^;45!Kk`fO=fd>64!%Ry#r?E36nh(McNEQF?=nL!{>T9OZt zM=W+go3jCafBaVu3UH50zLmy*cqkcOso{zF!L`EiKe-574 zLYK6qt6)EzxdI#R~dty0O%cx7WmXWw_o+=YVLuj@fGYA-_I-;xBJjr_tffg z;J`A`GbMYeV45N=4rGNdw=Bj^FjRNT*;85y98G|ENLiVEddM7F)rGN3C{aMdw65#9 z#Ci&WXaVd-Bg|_^k0{s8GKM0WjKd*zCQsNjesG=I|K8kv!OY?MJ(Dg`@|#(<07USbV+78O<~a=E(sl4O1Xq90wT z3y`I4#|0t!0x3Q277Ww|WU(oV3QDahm+Oh@(n3S-(gpj%l*##JXq$*BCR=aHY9Pu>tn}W32S6FhKT6EE zBbQgd52L`6{}c@<_UPT4KYxBCvLoJNd8$~E^nv)a?7lpz#2LRK=+yQdVtFT155*+8 zv;x8txWU()aC|qJ2otIXy)7xQnTNm}!TXJR8q6T$HiJWtear~HJ+qwj(McxiR3_0k zClwfKdGh8ZHSx1JN2Reh^T=e5{*^R2KVmDM$qSI@Z4dXqk9I2q}oImI>as3fD*`>|nDK8q@(KSA9-3{rI7?pje?;fGG(bQ;Yc_n zl5!eJWCBLkFN}~*BhG{`%4OfKL;_SU@ve@7uq+aH5dH6dU=w4JjFL94&gu%|nxI0x z)luZuwmZIdQk#aLhfMXLKe1SHB4pL12?qXh3(SUu3(_r2WCWe#+>d0a&bwENQWovz5xwm)kpQyk4YFb`ema;S!8r!q!&8?njH zjFEtDBsB*r0aUsb=sc}L;Rx%z_VaS2F|t%aFOU25unr})K#hQ~m|yQH`IB1|QauS| z?T+EL(=Tr_pf-DKl>rsTvNLkDxq8<)yl~;30_m&qRdX8FJKW<^P>mx9)nvE_^BiUJ zL005U+2(+n=PlNTN)J#n?0+kW?*b7)c#Ko^a_AcrVBR)?vU>ROJihnjo&)!nftVS8 zoCcQ}rl=}c_yCAX@%MQ>)`yz_)~NO^YnHvf`kmZV8Sc+3uJi5XYrsNP*r4KrOfyV^ zoc2-%NBTs}S;(?79!$bHvj^psL;J&r4+l@>0I6eJU9Aav9d7~ahm&bIf(pHcx#`$k zDsb^(YsCIoZ58}wRje}2`)$9fYDsb<(8SHCh@i&M#`3lMH8m7HKFF40xUHV+)T{lM z?^j~L&1X5RQahM978rvm|#0ywd7L|@rNzg0+lKmo=u7s%p zS|B%7*#8@@`bG(%@`~#m(%nuhm=(isVxZR4kDS{a?mO;ULJ(t zAZt+!9Qq;j!tYUVRhvR8x>CW}E+|G?US1#NHE~C3LGq^39EhGszZ(=oBF^+>-^n#< zG@kGTW7%CI-BJn9ZaXa91I57YwMdyj!m1>bvTitTX#fr-rV54pUT8DP*h*lt99no_ z^Kz7&J!u*MRTB8QL+8(@&-8OawbL{Hb>^Dhv_^DQxA@V%Y;5x)VfDS z*#?Z@lMs<)&)T&o^Ti#`RL9(<>%tn0MuNSNmzV4(o+V<{hR_5RtS3 zXijYFIsV&2dGF4iJ9iL>QzaTFNI?$Zd5bp$)(ODd@Ah!X+Wst%Y!5av*Lj|pGXjD52o$>56wjV(c7vHA%Z*mkj_J-AN6gm<_DTe3M zZO~C~@{I)o0g029G)6iZ7`cy!5_Adb6~ii2xyRKE$*58B(?j-?Urz)iedg1rPf03H zYkt>#07|0ZWdx^28b0S^-_9l}G`#O((;x5R0LXG7)#WxSbbztJgJLc0XQM6y!c#C? z>qIn_BLPN)=Xcnq@wY_hMr9N({FAuXC|pL5A@#k9f+h(C%|&~mUYzs4|Ngr`pfvKz z*T`pfoP55R%Sm(rf2$@yn%D-a(2YSef)N*Hl3xWR3^t7p6jyG32Tu6%Lebg4>7geu zha2Ag*IMx`BrrrRVCPyU8Ap~(BC^B0bLEaiy;8VY<7lbC5n~5WDzZJ{tim^Z!vlgX z`YMZ;07CA|W0Sv(ZP;f0^m`CD6YEjk1@&Mi98?$>CHCTNqG?6N_X+f)O9i-HKC{2} zA{3OiDbfIT2)P+??GABxB+VE=XYE<+;^f36Zay(NH+ikH#p-`x+$VgxaW4ET#vQWQ z^XGY&*}(o#fl@1H_W7;PTBU$C=_o{;Bb;LP)%G)F33#G*nE63wpK$vS>)k-d;ES*S z<5#;6*qrt)xnhDdBmZli=M4=F6OPa{BHQmP`s?{~Wi}0aNXEH6DjN_~z3niF#6&GV z6fcUffg#?B38|!*g2r2)e+BdK&7^bGnV-j5(Koxf4hal02Xg9qfcQlQWJqYyaNpA- zhC_>44mrl^YHumDycdnv9HZai<+>$#Fs#300S z5zD&7JrFZc0YYR544<;=1#mwXBhrass307nPW|odQm6SVQ z#Ia)c1RnqUyOAY(FvF;@daM;Ok&hb_7a-z@zPmJ4poqAoyQk3uN`{jQxcfEz;;GTo z)IhlF(=S|9^Udi$R(WvFiE__#{Qc+GuH)1S@E=}ZCYpLQRHGOk#+{~)WoWJLIxqtf zw8={zB;;#kFY9nqGBA)46&_C9U<5>;u1uP*NkAm@CO%+zsWk^hXM4eE3MTaOkZ=1h znm!d{zyeTOL*0tpl`>E~0$spr6=(o>2(p*@VbQ4dNY4SSQ3PQikl>^f0|l^`#Ky>8 zh~|YRtE0=|AD@)Ho2%yGvC>2RQbEWZR+TRBVXv$#mrKUTL&;1HS(7q3sZ(1tYsQrV zD#=KIqS6Q`1iT6xGDAf3YB(Y&bstmN)0kmPCVkYNg1n#tMd+ZD%9l^xr8YYR0yhz- ze3E`DZV}-GO>od~B+{PdG9kSq3*$({)9P{D)*Sd4R7GkEW74E<_=2y#Is*o2;i5$q znDT{+*Du=g%e(W8UzinpzS)4b@xw@~ndPWjRR?`G8;CM#3T2ZGwh(nBQw=vZHa2A~ zGJEq8G^A!Rvo{+k7NcL338nTu(`IO_*E31*-ed!r9Zt<47y#KrKk?uGE_Iz#V;A@y zGjx92J41vB`wvkjbdWab7WG5PUv2vGi5ki2OSyFu&pi>jS=T8B(;0;cNO-G2$A5_O z(z7ApWneFEQ0XBa?pYUFXbz%&2D*PIlu{XwhM!PS0&ZNy)kpGJ*?vAMRuF3s*%f{= z0;@I}T?UX&efa`63qE&?3halT5?ZKbvg~j~r+x_Gsnh7fmj}4( z`mE7EkJQ(T|9%gZEe#}kWlr4?|*)pAjLBLi`VHO)ps5b#NRjjf>3!DJ3azUez&)9H(fEBTo$#^ci1z#angdXPG6K)_O!Vy*+y zj1p>ibZDVSr?C|Hn~rDIc#8M#wL_mWMbEQw#(_0>ZDyilRYh_SCM>*Xua`y3hG5{f zeLFalC-s*u;i@X{)eedn934i-l18-lfT)q-CsZ_@W1nQRoRS!(H73EvJ#(JV3w8^$ zacr^l>*%>B{4h^g-N*6b!l;2)yB!8cI740m7@6GqsURt1$9{?byHkiWJ!Ofn>i2yF zmtr}Z=bnQ)Arw@S2+mog@dr^#fpHJgA!gX)XCMAYyVbgPqV~^p7>W7dJffR5p%!Zt ze+Au}Sw`+MH&QY7n`f8d$Ayb44Pc)p_w&z7S?!ml@+RXp3+-I9ZpUGBBNuVKZ?xnc zU$8!3ZS=&8by4^eW*TZ%`6`+7^}0W|Zmo-oe`(;j@x_KG&#NygaViIL-P(IU#94`H zzrWs55<7T(X5fmC!r;|SYHH37{f6E#+TwaW1st+9Lqsk;R0j^leC}a281(*(EvItb z7mH`tYhQeShn_zaScXcDT@TvE&Sy+cM4k5CTT z28{X)bCymN3fqVud@5U#r?y(vyjFv~)a~8xCnWT5LQ<(4b)%DayzKLo@ulncnnlG0 zDwua4O*^sS1G&H>8Rc;YGA=DS++SznEx;c>zFPMEQzI6W zU24%75uD|4*z6E{WzN9DpTNNIX#9e;bJJTkd+t2@7Juua{mj?p;X&n}+KbmSefb1l zHF^qZpRh|dv+s|SgLPDXHGSn5DnG&7xeaZLe8S~Pfx#<+t2XP7e>)u=5{8 zt(NP)&-6+>T$Yk#;?2Gj@q2_wAXF;0^(hrW8#U1-IN7nPs=L|Xn}Y}a8lbs*85oTl zAtJL`&2hNvVfD%9ZLfW3$GlK+0^S#XJvhhb!(?crVfp#abz#bWJ(CQ zr!a0$b-K&? z^Ne;peX13O=HFlTm`l-<<4!I_drGvd}zX)pE9=0;B2t`+?eQCmRR*l26Wbd)o2_ z6G9ylOLYLug{!nm%vB>+>138IN9|49Q$v@ztABG5qUG~lE(V3wnbvptO3jmJjV zX4ER*DC5B$7?5}B7nT>7gj8q!R%a>pV>-1r4zekWKL+4=sUldnW@*0qfJXN3Y3dLdQwc(6|5gl9nIcZevtOL%GNGrGD0 z<(8q9P|)Jw>wLGfBah1P`Imhv;OpY<$*X+zx{ueDD}sRvdOHMJ^84&RHLg9Gk;9j3 zJUN84CxTj~$Tks$`^#htfOv#bDSUi9FYNdX1dhX`r-BHAlz?HJuOm!zIWsP(5~w|> zx@{v9E<~^Ha2(8$$fnCTa$)E&O#6yH(Ni5&5$&59dt@y?A0PA3U)NV>x_&&C+A}Dk zduV;dHB)y5NrZt$3}bi|>2>w?Qr&<+ZvmnB`8O28{29z{ z&XKNmQ)wwX1VN$-+iLJkG=7MuEv2>C*Zc!!X>64}TKeIGv&Dx-0bz9};;VX^#oWV& zpD^whDeZcuiRIi9WZKGXSj(BE>Hn+#k+TY!y@#up-E?N_iY|8l4-Xzl#3nw)KkOVw7v(0+_txJ^J$T#*tBgS*Kda~ zrW;eD)iB5``0}!4xh`o%(w0%t1=meWFYt9wgsSM8D$;e%wL9}7A85h%@b{OfOfgc?cbI<&w~1hd-g3s#RORB;vBrJU8yU*VI}mof66pI#}KEGRb;#yn6L z+lpmDc~0Z>ROK*|+)Qn8CVOAxq3%@IjMJBXe~&aE(`GDT?uT4CPd&iRGr>7ZHxi?= z2cP$;FY&l4x$nC<%NUsM5-mPgdGSjBC*SQiFQ23J{mI@t-!`}h|0O@3e`lESqv+{! zJ2AhV_K)ZM`u8Wd7$<&w_pR{Uh&8_**El75Nw#v*^N{5?MOu62*c+Hv9S#3*yuV-Z zia}-Mpv>qJjX*)>5bc>WMdLQ43W3v@>`p_gN%o*eJ3l;ka=_ z(fD9e+eQBJp~v&KL3C1)j^QbVxoDdGSQ(jtj=*e%lY6!ErXBoNpMP`RsL?P=Z9zn{ z>tKm%k6w#1Ti>bOn~*)@7I1l@LegfHXRZulz%t#Ta3$lC2YYn$PV8BkY89wH z{QlOu%^8eX#qZNw(ri12at2GcpQ)2nRs6oz+Wq}~nRdrKm2;+DhO2_+;?rJl^)>LR z*`W3^(buTdZ@mip^5yMTHJke|srt1M$GLv=gF#=x8FVp!t$vpc3{z^+{vFNMd5isI zcZgxE>v@Ei5lP`|E4_94v$s*Kh;DWk>uGk8xK^L4I{Ab8%SXOOxpn3qJ_cDGy@g&C z39x%Z6PdD6v{jQ$>2AKz5JQExkv9%%L1=lfVUL!l+en=-6k^4xhNU16(9;==8h9Nz zq|@Noaz&?6a=x4+l&y;{@JqLStb%q-4*2E;pjl=e2>pG~-Dj_kbE_!QLQiu!uMke% zk?rn$&5A)dXS#pPTVSnJhQ2!c*QGOp`uc(Sy&Exv@g{WBCMX2Ez`ep~xwHhj$?}i^ zk~XKF52zBd$SbeR9l)~Yz7qCD+0KcA$n!KB@|?hE*g`;If;on!WA_!-N}Gbf8i5px zQ3zfp^?ZzKAL3X{#&2}4)5~L}X6IZj4v3bZtPh!DkT;zXbbfz-yLI!_$pS(9%7aWc zw!Kw#8m=!JToJ4W+-%<8_pH{+n0*7JyGx=p=R}FcrT>^W)h!cx%SgK(QC2WxUkUEavo=uW|3RceTSDIWyQ@jlaI( zt~HK4qsNN@_Q}<_VH5*Ahx%KTAp6%he${pF0&md`)>Nyw;Na6m5(f}{MDwfx=vZa6 z`z&4|$$%bA87)-g21s8;7veqSb$5aIRX(oza_e}qo(itIN8`kB2|~ zq;(jzA)3|dH)FL$1=jFZ74#s4jhr@=(+XA?Pr1MYIf~2GpYPxNK}}v}5r2+BLul`g z`o27)pn#G0f|I}Dhf_l|L_#{53Q6z#_M{jT86M|MkCX)mk0sUMxaqZDc=2b&nk19u z;`&GMyURwbYc}7Qb}Do2TeE9E&tt)d4<7u8_YEAha)l@Huvu1sg5mvywT^~%+3Khr z5^B)k_2NMqf4g4VjRVZCpi$4sCg%O@n>~51vD^>v78MmuJoEPHf$44?&G_{6)Z)5_ zmbK|y!>1-{TzbtH%s~z`vJ-E8NWWVLjcRYayl?ybjr`B_4#%hO40U-E#nck|^pBf) zuk`zSgpzPy-J9swXJ7ySKm6-0xu@m=_uT=n;3)?7xdZh?K`MfQT9)?BXm0Vt;Ootx zwwO?{S70=cb2GS!+Bu;qRKMW0%W?u6 z^ENzLEouU+-Fox@qaGktB*y>jARi_OH@2Zq4e>$|5w_#BElAHG*H2fBUZS+H#pLra zAd^m*t|JMakc1H#9e(CfU6&EY`qpCU(W}xFBPn0p+9!DTCyBwvBm zzSzVT!k<^HhozRbwhtP?w=*T-_E|C^odVDON6M4nrXfnOS52UYeRcqU<(#| zOf5kc(JLYR2zs{IA&~P3$&u2OqAJkfyP%Y{OUFZ;aE7U|i(XY#3&0mw&mvgP(TW}& ziV3TVb2u&&5wdp6I+kAEdkx&#p@#O}*HQ|;Sc=i!orxMEy_HU}pVt6Btx{L}^OmpO zODmR*i|;sD5M!7l%Or;|QCq1JsJH60J1t67Byy7TM^wc$p+a!>zdnyg!`Iu_*W^Qh zjA`m=Rpr=SDVjx#F1`%hCQQJ*X;<8hb$z{2I{*9tlFG0*9Z5KLWg6?F6m)$Bp!=dN zgpekJVRwI*J$vqcU}L$&e*2Ga200~ZkAbnWvmuA^A1(`!*~YZN3fDGO>W+9AHN>g~ z$2fo@`ZR3j3-5ah9-Vg>OCtCe52QMH9a}{bMND$GVUsnE|L_fL9;W2kH;+a8aQMcT zbB?21vk1$fBF(x5HZh7-29kIf%l`35kE1&m>}Oo2C`1}@bkI0kWp;%j1CZy}Kl_un z3>L_7yI@r-3#B*3>6&BXne| zvqdez+{X#D;M4VCp{WZ3?7HH_`v9koyIoK{tI2=*>o#sS)I6pjQoLp#QK_oT#i6B(@WVk0SY&e;ZNIirRQ zFtAnzRrlavFh?GxG#6>Y*KmcwZP_=%0SWno0k)+w7&UM-F0(s|47hbMn1ffYH$fjK z@(NkIGsA`uK_`@a-a;O-aFIK{0x8<&yEy*J<0aPa zng+hJM^@#xVsx^(#0pKn-*B$JgH zc(KkF);S#*p{N?J7DzGU#%d0F4KXirt0^!okh4K(8DKE!e~=WN!l-aO6?M#SHI#wb zNOK|j?t-yhAzc!tgM{5C#2MQM38XhbF%vk9(od@z&Yr^w7cAZaHSJBA8C2qIL#L-P zjZB_K9vIykk3Mil;W8clgDZlRoB2){*uCCEal;C3qN27llX)CDG3}TaD@+3D2iWRv z-I^J23iM(?CKz|Ucs5lt^PrSx7dtUAQ6B4o$wu3+Wp!SpC9>qh^*~*w9@&Bg#RjVr zMnn2p^z!R9t;a6It7;c*v^9nQ)7Xi(sMVc40Uv)j_V1!+7zjySG?HL@<){bH#4f5b z;NVq^gDj&;mKRQ};;ChUa?()H6@e8K(V3MffZFVklao`&%Rn#tCDoxJmEC*rX@)NXGHfYJzU}j2B5i4m zBF^C6h1|fnznmt87b0*dI=n>bcn%#y* z^_X(^%7gn3cX1LRUTQ;&%}X?ycNd7ts=`CIkKC^2A(X|6wUp4ks6un6^=+PHu zV)-2j&0%q74o@gq;4*Y!mpd`i=a2Ofo~bhX`%%Br?>}BZWdVcoc}a*eibk-mJHz~~ zl=0|npUmgix@(JGTQ z^DkbxU4+gBNVEbV(E>jzvqHw^6g1{Zi{f#r<5i37$b(eo2w4MO0k}Rg*+sxQq@|Ty z<&@U!Y>p28baFB#v>I}8z(nfq?*bq>E~5}eqZ|>@+R|IE7hAgz`gdm{Uf#R3>2oX= z%|jy3#zjy(ewl&BmThQ=wd<)CM>sA))lR%J!C*AsBPZqw%Nwxi=9uq)@c= zSD*)|5kAK&phZ+Q+Q9wZ2|p;CXL+n1eke38f>|cDaWX9v)oGH@{*cWPdSA`3h5gaz zdz#e6O72t3TNWdebQ#E(aNmmO{1U?=??9o}iQQEQI-H8YHb3*Y$S^T1$fyRCT$1n@ zGmwn`TyCXJ>T^GMS0B|{|62)WltsZOzyfVBdRD)$@dBa+FFGuk1E@#Wp@+W+N|zD z{cv!gFRe4D#!w{(^nb2n|Ik<-M>@E>IplmdRun>45& zet)n|Y4FmjROhRm?&HNTIcpeI6Mw9;nNS#Q8xxD$&t3|QN9fWLmGOPrMpF&7kDR9O zd#0Yx*6GicOX|)e^9{LduoXm@8h8EH}2CmXY7ja)uHnVJzZVjetq?o!^+AwMCT{r38^I|fyGyxLb0=#UF$=q*-X z&m@HiN*!v=(9N(hCY3s<`Zh$U$-kVCxs>5sOJabX~Vb8+GGC97=yTJIa;+Tu1H{7f(!NJR141>vUS zSsUX-BKvGc&aYBBuZRh^K5VuD$94I_i$`UErs4J2v+7SCq$kmA26T}VwOdh9P%Y3<|086~on(EB!XTeEy{qDSdma}?jxJZAa5Jy({p zM2;n>_f+c%_%ejA3I;9_)9JYO%@2~sFK!BOyn@@xT4o%IYGRna5-gd5sW%Jc9Dm|| z31n(_Wxu;uZ;*|~YCVgVWXZ!ld^u}*A2;7OG_B&8UAcTEN&Vqo)4B(HR~Ti^yLo4*}TovJCT;s4{~EkAzIj2k zzy{{=rc6<{i^Dx>JFUFf&cg%atE~3FUhw4ia?~ZAae0P>z0&L0qVB)sBL7!PqFR+? zW2ZRLwlHa-grX&Hl$-5Kq(~eyl-TY@wcY zPKquEE!SQQ+L7KP53kEcDRVrL+pmSc@d*Ma1ptBW(AdG_lDNd!Snq>~Xq0QZ)%d`T zz?KB#;^f|Hac1J9Y9*!y=PBiWu2wu`qp%qX`9K%F3{w)kUdaTEB!U?$#%}f9MlMRVI1bj^_}*b~ zV5sZ7Q1uJyorW7S8(8UYew(uFt$pdzb}QSa#$W~KqD08l1s+Y)XO~Qef7;dk4t*|(fBP-EtqarBFnY!B3AnjEqCt*F^rS3f~Y+$R}%}TKBEs&XbcctR(V6ZxS zlS@~q{Mk|&r|tqJHTG>xL94LJ*%7*ArPg&L8J4?G_Q#+71kWG)8kzjcG)sR*U7BY2 z;vm@_OY~BT8n&I*s+;@m?9#XwCl=c`mpfT+5UE7Oz$8q2gqhdTWL}gCZ;wgu=>5Z= zGcVxrUX*!-2iPd1(>fT*MHpaCH2J6^*NYB#h@uYQv7RKR?os9iREc0F?$HFdjnA2v zW^2vD$3dNUiZ>UV{~&z2u2{mPguR}*XV1zsb6*Dq=eGCpEVrcC(bSOiDOO +TIC#zsCZp>AT6&wUTNBt9hJjyCe(QfF?q=1n#UaGLt9DjxBU-Qkq6Pvx;D!YyU7 z5);ffIn9h`+Z3!mTn|xi@e~X_=+9-R!GSVPC&%^OR_)T@;JleO;T^hk5kaD%p{KkH`-rnyWnA=7$RyZD+_vqH?@U?m{xr?z%;-+yo^)jPB%YlZVidky zWmRkD5tZ&foeZmzWVyG$$LMD7#L6w!E}Kkybh0AEAWLBAqJ%~DXm^uic*su|>|}ak zR%b!#c(49&L3axCP_9+eWR_v0yGt`*!AkSSoqaB&YBxBOEa`^fx%cAgxKKVWOjs`6 zC{Lg<-pD(3xj|-|bnvtV`_y!ix1Pe@o@czpd5WKYVYg+hvTiizY-N1e)!#k1@>3^E4<5fEf2dCT zNZMxG4F_KIu*PKgzCmR+c zk61j_)L$OA??%zgUFpVzs+01g#YWnf!jB4_`$a9UGf04yzl_<;cWC-}ZNlw%JssoH zmgm$?|pbQJH(@RFAic&{iA*s zjYm!v^gB-Wb=lRM>L#5(w}DqMz`j4nH*WW(wo1NO4=Re?9Zn6`CjuFvvw^WOPDf&L zwTp$ZQ5N6)hs&ov-SEB0^ zs@P#u`uPxkA>Nc3`aolU7m#1u8l&I`CxX+y;_+yzx*lhk_3oN9NL`C6(fsx6y<4Qs z8d3*7xy0ug8V6OG|C;$3L^M0-kCNH7vAs8G@Cu~Mxek^yTTkw79-nNffYdK~I^e%Q zWOE_>iEK$WO$i4072)2H>D+gzDq2j4AC+NfeB168*7?S;*3qo#kr=&4KUz<{<-5D{ z{8{#qoaMD*-)PLy=v=n&B7}^U4l7(Pf@Zkw`rb5r!;SQ&WdKAjlW z6G;1$E*B-Xhit5dNZao}sFyz7V_oC0(dDAUWWTR)xPs^}cuEa#M!;&%6bJ<)&MIRv zZQ|)q?`VmJWyfq{9;#0`e$>9lhv9U#HX+-t!IH^ddqcs8*KLBql#0>ys&VH`G78_B z1jovcAOK04PmM7O-3tW$-Q*lxS+2TulgA@Xf|yq}8%Uz-b-PuA6n1L~x-yNQ{Gfj7 z-J2&et+|tj4#hWbb`k_wW0~|Yx<;y}GIHQky>T(}jj3txXm*VIB&$WJZuH&-DSvaU z#yEh<_tc_6sNsS-vsVQxGyjxc;IQ{I-&Tbcer%QH=kwJtyWK~8B-pNGho0N+6D9BR zy&KNB*81O23X&FL$un*0Yai7))+R6t|JO7HXdKY6bZ{81zXSrvIL(SGfG_zGDz94% z-R*A9?dc;qE;@Hj@SQgU_wj(S)v%}%DaaGV1Z?Y!oq)z|av5)r0d@e1L90U_#R-Aw z;dtwjjy?8g>Qc-pJmC6$w?g{Pq*S zI0H&tUKVgyI_v;uY*@A^F5vsShLc)!&0PCyj+Y4=ytn*cj~qW%#&C5>{b*n1MqkTL z(S!v7XP*{~^o^=>k_`$dd>V2;B%j};bROTSnRPc#)u~9d|7u$r*(-+V8{Xt>uDoh{ zghi6GxJC_^kfcoGu~n3;cD(pzaXPF`p1*(XG@h1Mf^8V&d~H>yhhdJ*Q$F{b>h}GH z#@Vz^_kPCqfl_H!J#w~W|EE(h29aWY{ufpaAF#`ki=@$K0Tujz$8U-v^`Rd>=BOjG z^bn~XjC*$w^h4B&G5rf;_Dwz1wM3Sd4~7Kk4vjph-)tQJh~qxlS*AXv`v)Zjwc@ka z`B)ee8ve=ZMm{bp6}`=GGwmCn@_-nigoat#!^2YJ(9s`vVYj*d*uc^e(qqlbViT6e zef5R_(kvV9y}M@-e>H8->4uczldRNX=>+qC-cPkC`lySGwhc!csphwpBb@Au5=m=1UAzTt5P@EyGm%%eb;Uri979%HGU=83OLpMt?w8NK-Gy#|B zq6Z$vm%*^$>%<(6Vo(w~S9G9}viDl{g(uVm&bt{SXo&p8zk{eT8BpR3Dmg(|LP#0$ z7a4=4a)aOp4wq8WYt!O33Yu=S6L`yQ?o(s(aKKpsyJzS&yd>PZR5rqiH9s1gG?V#- zwvwZPKDr>12*kyk#VZ@34Pu}j)pX&c-jt=yEFcBT2*R48ER>Kz+Q2jiW6foV1lb&c z7&*3Ojwtsbdi2rUJj<0eDE=9R=!^^kjldm!1Wr_Xl{APXMj_gkqvQ8u#ruC*NfzNS zC*A2OV6&g#9B6ftlv`kA$A$t$0TA>HU6Vj>1`^Cpl!^l9egTS|R`)5>u|J661X$NB z9T@zZhhNrOswP?^tg`x2i8(WxDEA#u=M1Tj*RZB*h{PgRr20JY+&cS#v7;Hi3u!|!HkVA6ww4>3@u(m}>0 zS+%SkP@TMQrO2q|aAe

_Jfs^-Jj?6v@>MWp+N$=s+jrVCni;|1(6VU>+_Dp})7*Szg)7UmS_>D{;g5B4jnA0jIZrKW=yuPfq%inHCGp3`%I1~9a-;N zR}BLsr$wx@s3K;Jwn_keVtlF~0HZ^7quP{G^?3zwivO>E9xyFSTTAO9Vp>bKe{g&^ zmu&p-a&}rMwBON;mr?um^!1BjSr5M%(l8?#S_5&u=@kl_GI{iisCEh>cJ|hc)z{Y_ z@a?z1axRu)7Q@{88z!4Ne^_R3l35nVoE-6j0&WZLCq(C#B47!_xpe z>Y7e1*HNXv+hwyNXlzgy!&c#K%{1@_@IYcuCLijV(4gZa*PikG$=`n+x%~a_%k~>I zhQOUHpGr8ND)$Z)AsWcu6+X=Jk9C+7i3W^`fq3MrVm_QpuN)>d&zybZS+n-Zu8^mW z%^$(+8g*9-I!Ltd(q#o&gg2v2iM^NMg6l_F^F)kV`;V{JE;mBXG21}q>}qE zhDsgKqsMRP1XeMhdp8bdY$LC4_C=rFeqq>!3l~P1-S|>q^Ki@L-j#zuvgnsVj*v%7bV(H02`;QMrz_u_z(L?5UECL7*xuV}-J&YK_GFv|| zuIxDK2-N}yg)0^uQ8(!cmW-2|;CY9z#d45|Tm+q$6F?oyc=@G;T|Y&d@7x61skDg# za{6P$P2*=&scUFtNC?`ZWy=u!!vi=J2?)xmNC=Pr7zPnIpHq|Ll~NGT#KWb({`%`m zlMfXR%kiApoOtnB#|AInDz#iB1`>_Rm=rTl3X84-i#G-fG)7gXsJ`UJ; z8$^PK_U&6l?jcCP%bWb9Rpt*nEae@-7DTMg;Jg-N3W8zIAYP|Nrq9%LUb{A^4|L$mt(qr_mpc8Pzz$ve^`gQBATRPU7Z%;Bsr zhkF>E+ts=Db`0`mf2oj_>F& zKl9+%-2H|P;lph*Q$K6fWjR!(t;={t_$XO#`UTx&|9~{q;AlfuJ$guG^Kd8vyD)ib zG#f}`ho*F0O5+K^e`G@|jI`kQc_2p#NpHx$|LUu+%4r$v`TF%s%d%YJ0}i$t;FC12 zHRR8`%-;UaiGaf{@oQq=P33KAP2fOJH#WZ9MRqmJbP>jLMEn}L>$LDSOC5N;#mIiv z5Bj3Pf~y36k+Fw+_U!3TsaRT+{VM3FBp}_=5<4o2eIVX{SK0L9TTFk~OUn}4s}X8T z7;758XYbxH&_wvc)uLu(p>gE7yK|>#KE#vJ&+r!aPU>reNs9OVBLJo-U{Zg+jaz#@ zFpIqB+(6SD{O#j*)6bka<4=P--UGMvP&_rZHdwp%<6Ac^O~sV&>)eX+PbCL4zW%Ru zex^~-Pu&|>(6yvNaZc2{vs2v#BrU6psmZn%U|G}XjwjIP{FMDYMD8oc$M%c(HEr9r z^@iH$r=?{IgF_sjicJ)b5N5`}?I0{G$5TmRP`-AKH<;o^i-nWqW0uplsGpwq&rv3s zmwQ>_a*vWd5hfhE?*&cTI0d_gnJ0cD(Q4Oe{JEZ}#){9C#1J6&3^b87EezefA(%4@ z!}*P5XwEsY&QA8n6?d>nPCH#)4|%`_t<*z1oKv~KkFJW`ARaE2KD-BU;AqcuH@qjiJbMM~tqKqrHG=KaKe2VqE2i04~=I+OARX0^ae@+VVgfT4^=i&J! zYce-rftUe3uo8iB^z|clp1hSI+0z5~q?0&8YXr`f&`HDPp86Z=!OXg)!g!%Kns z!6^a6?3N^Hj*hnVB_M#6-V_nvs#Plk%F$EVSndN&D%ZMX$Ib0YatcJSLtX=WcYzY4 zk~c(AayxGCB=iQ1NXnkG?1NS`Ft;t~AEdlO(g#C1$^|rQ)=WM~vR7~*8*RFE?dp#U zD#WlHRIRYDAv-wd^u%A*=YH||=XVJRB(Dcyv`I1&WjGvPi0Vz?Q{~Dapo4R5-mKYd zu63A8jbjeI(}jdb0(^n+MV|I1x+1ql5v&r0N07{ngytu^54_jFtV^AzlRp>q7biFF zd5?tsc|vx}7M1P#q~DV(2Kw*aIS7JD{IL`U!OLg2Xx%zY91OitY(S-^Sv*Q{I(Y08 zZ;#x(c{A+u&K z(Kx}5>8tZeO?sTz^vn@dBvFugl4o&9F$lSc=Gmd_*7;;ZNm?4i{}sc5UOk`Gd~+B; z^#NM&MQqC;o{TWL07Lm58+Nv7mh?-J>#*PH;WAA#iDtj6xQdX5O`6Zg_(~vig_A>( zf8h!YCNK<`EVS-1EvHD26GwkCXFJK&JwSS$H2OJZx5lh|cKQi*3s=~S`RvPLkWE>p zGq@lv*2Tqd)~T)XgI~VxwE}w?3X#04<|@*|BS)4mbbP%%hub|wSK$^dT3nF@<)(lS zH~nT7WrifMpvuzVu_YK#NQP|jTO@-+08J87@Iaz;`%Lj0E(B$WWy*E*;)#jyKg+}} zi#NZWn62qntFiqL?Bu6Q1GQ`*-giCs-^iXxXkk-LxOlMW!TVM&_9V^y^$sjCqCgEkuhT7Od4G_HQtq z)PB!##!TV6mx+=#O70GW8?yY~ZzVBRE33d>(bvbUdp?~_B<~)Wo*iW$m_zFUJ6iD@ zPoF+LvLcdn37*ZPYFFd9vmRt4O&p3tls*F&#f*SSG}arYhE;?fxTf}4D)(w7f)5rso zNPvQkN1VE%;b(Vy7{W0N-(g423g`4B+&X<=6k25)m@?B22Q0ISvLjMlyJ>sscy-wq zO=W};H6WTjI=i(GoI{v`?S(fK2EjcH#)?HtaL-aVz15htzp_$sLGgVPzd?E1J^P0gqEo zk}ZQQRsSJF?vc@3j-{3uEUIlEq7JbBh)wU7ONa=0`v*>70J{tVy$E~CP=rGf9UQPm z2B#)8u|}(&zTMPlzi5({eK@Z0Mv6PSigaa)lOZEWS0|7^lZe|ycCVmqxSxcb9R|q- zUM3(WDin%#PJ&igR!EtyeV?sNDO|W!D(5CNvyEbUh zz<>ho&Vv_ij*9ij7!><)34E>$Y(DY2`uF&Fi-P<{kBeAYGqM=d!>$6fsm0N(&}m?9 zPC@WsmInlads#{J4J0bUy9VKQj%8fXYGta&o|thC@Cb8 zo+t_=h4e`|L5$oeVBj!EbA5}WKRua_tuP1nGduj*z^uzlcaVY2Aqre7UYN|*uM`C> z5&{8x?0_%IwqZ!wN?^;%eCMOIw=n{T<{%zAbo8hdQiChNRzs2tMPiGE;ev3oBvJ^0 zw#+2>h`#OwHQX5`25HrIdfLqu9i9N(I;7ThbwQi<#8&9QW`LDncj~sJmzh&qgf%t+BYS zizK7dkOfq@J`En^N#=7Iy~ypm^=$q4)&s1LgNkQ@>}auE6w1#hE{aNG=28)jhA&C9 zgW^Tl;Sn@Y#9Wa}$Sa9;s8~t6LeH|7De;Bj(2HVi65Q1edz|pK*xtNd5VU?4bCWFT z%j{wUX)S@HMD3Dz%NaSKdrj%J)>1UxBF44h=7cTvuSuYVycl|zRDNtW81u&3EVw+W!H zx*o%MN$4VZ|LD&7XdXDgD|jPw^Fs#@nkjR6LIPOB7xLs2Nlq`NU5l_m4_E?W&dkk6 ztbRO>MZluH7;d2G(5Lw0%o#I6IIQmkQs0C?r6X3MeiauKD+T% z6)@rmD}(3W85eK>jqAynM+CZ4wQa*#wzDklRJ?Oh${+6v8>u zz8m6^`x!Lf*7R0_$ak|aoA|RG!jV^*`OEzviMbg}#WROIE-p2W_ zH}vn->kr}qb7I9%XfI=0UKW9e*H#SIbn$;zoa^K8Yu&M*>Tk8?R%(Ien|O z8caBZn4(|&nqh2BSdPi9wcekET~4v-K*r3L%X(ZRdt$wKB~-X zOYSHjVo3MuQ~Mq(|P6RDF+ zJBwL@IB^nu&B+Kz{5WchU#50ap^K1Tk}?s_npP3hBFo@jrA&vysc&D-DW0OEvtM-v z6WQVO^f;K&rtyhwzx?tfi(iQGN+E^|0NJaNlY~rhTVI*ox>+-wilaC`$>b5>3IXon z;b1a^YywKACXE~4Skpx(8TW9SjT;%b(9d9iJPnLx^H!}6sS;(HDz(#6yI2yVSbd?M zVSwk3+ul8%EXe{>AvRYJxv>Vu0kIxG=oOLN_1vh~w_}YoAksWHJZ&bxoQE&?#$%&> zR1(D4+UJRuH2w0OS5XbL9WZRzu$-RmwGtqf^L!o?u={ll9U+?vIW-~#Tm;YvLE=SO~cJCV3_`mTy&iIsV6C|S74qAWtU;5 zL*%kHRg~DEm&q|~Q3`i-5x+WuQ-y&cX?%8lR#8@1l8Vd6HRN)FvAFl6FGbN*yFX_p zr%B$2>+{b1&{CFEa*v5uT|7SOducgcgoiNQd#Dzq*neS+WBc~)%?axT2}`8KSzK89 zjpBF)H|;O>nPn5aIecaI0}&Gk=xXZr(e+$`SsY#XN3OPTMUo4FivkoE0bYV&h7Ae$ zY+okloxWZH>u1cr(zn0xkgF9eMow$}VX?1tU|WK-2$9bwV8L!#6H*Q^40?9@dV}uK zWOEEz_W_WOf+{7h5LjQ*VPx%?g-e$%Ef$wd`~Pc_8550KqzyHwK#J6Uuxo74)hN4H0loq=^8;9_#!GLxnG6^=s z>@Pzh?ZRM{ABeBTAf;j8*Ju5H{dy+ZBnt{VE}?_SqPQ^WL#E>D*&P3>q(R4*uxzx3 zk(@GR*88FnW!a%+6cG}KXjAc{b2B!fT>WDW9xHA9SDy4(!S`yVsga(5|5aa;A1gXg07K5gy04qASAW|wwnGr z>NcxiL=+Z=0@`T$^u#94OB9KTNG_h#RD0!DNHR|Vv`~g*HEcL;cGQeWz)ZMz)T6@o zHw=wGF<3b!+KA>Y!Z2DymjeW4Vm-eoi9k6Q$WKh}ZXkV?Gh{CjE9L4oq>;?cIl?;A zX3hi|xhgRoIeZC2NUdCk=s59uWr2V!_^YH1)eS|DOMZzkniKpfhE`hUKg48VZqWS+4>RDuK;4DnC4_?I@N_eCkUpZKbD=`2;Ji|>h+PqHE+@9`ltTc9P zbA}TfcIfcNqu_Usl6MG#F9|V&Kp=Y=r0Nw4bh1}Q6_2pa489;4IpV83mCHJ9ZgGIK z1ATLaaR4b2^sxA$Z0Jl(r{uFr?1<0G(WlJkc-laTndZBY3=+0yntc&g%NXDQM4U5J z3N|$2)mLVZ5;lMf>gHIwCF9xs~GD>auwV zASQ#NQ^=zF!Cg*sLobmkI(7gIO|N$d$by!~mx{`ss-qOrOyBAV1qfLvk^&N01?U{z zRXIiDV0zMtd{lMJd+V(25??K&$0D(Jsygt7v!NBr2!6e6vpN56-efY!HqFOoriexz z52S`6D|rCD>PygBfOnq_$SD)8F?eY%e^;+M(>TCLP#7^v&N_q9whuY5Mxu8oJ+WRI zPq5%tS)u;Uwbl0aQMh>Daa$USih#`DLE5S4h}sCHnm1-ZG^!M;^}~?y;XZ7c^BjE) zg?mhGjRU7aPoz2E$KEgJ_^;^_-&Gfhujs6}tCy7F8xYKvab*b2@A{9lOcqJuY=~iL zIRR{$MdDi&?@=b~+56;?u+0lFAZduG{oqtz%E+uS-G;7q2Vw4WcsMPYZSKqRS;2M- z!^#HBd%YnQPESI$oPl6p{Nr3oF6LgGKZIuz5ub&QY-Qxj-e5VMwY48PL+h2rYEut> zcdCoV+7}#EcklHf-HY?_S~@;=KA@y8z@pwGL}`aCi)RpsoN6-A2N34Nfm%)@a$isd z)6)9tNCp7gd5$QU(t;eC_E)lb2&($C;Roi>T`rPnSn5a|1s5WM0t6}!ivlWQU+#@H zBCEZzcJm{_gY$N=RhX{arU()u1cGOjNql5{?q#U!EK+3_!u)jWwh9{lGT2WUz zRM3~tsvf)2TuiMIBw@>rLw^@2AsGgVVC~3;34FvbZRF6!8nM%ItlfYrbbHS$lESUR z`JgN*?-$L55pgW5eW!W*{>udz5k(|hXB|26jnIEWVw6J^AXd12k(8DA?Qs1x?^AY) z8(%0R20{tNz_uE$kqty>Mc8nb!hrB^%rvZ;K7O_Q{naT54a!9~$I)HnmVdhA>QsN;4B@IyInXqv|a|?CqlU z-Raus4Pg8o6^kgadDuwRe@(m^n<^=k0RUXnz57OL+OGKqoJ&X5HHA7{zp*wWE*k`P z8cyIXo(OjECcWW|L}n*RgOYHCSvJT?2m=F@qlF~SL}4Mk%>#~CQlxI?M$IL^f6nuV z8hadIjMj?+n|mPd{-Z`c5RD->j`%~C&@{i8@ZBF9w{p+VU^eW6G>5UltqFSB6z%CD zlcvnE;igY2*qxV9ryk246}sW|l0YZ9Qa^nkSBZ-lh|>yiS5X8*6_dKRrQ)jS>G}z6 z`-$CK-4}uQ!1f6448~ov_)Plu8ld7AvJH2XM6H3KvEf%z>_0j|Hy*<393%08Lga;z z`ob#vJ)NHG2+<=Xuk?VK0^4k@7U22vMdauIg5rZBC(S2p-J<$u$x7gf{M!R zrR3j55drAvn^P!52S=bV!;N`G6(LM7$9_iGf2{}~BjgQTgbzppgG}5q;BjcwN>S0D z9M?d^Djp{S)Fg-#NQ`F8+T5pP^u^g{eiB+#_)3TJqRTAS1JvD5KT8kp^bW^ih#*nk z5c0jmR*IaA;MRrJGEHDyxt0WgOp;k>{@m%wC~U6K<%vb>O>|IEH$SErnE1hS<|1=C zDeW~R3nX-Y`!;P3lq8%SR{K!-UQ;*m zu`nkpj0`}gQ0B7bMR(t@VM9rzNOfn~qz~MS97a#3zpu{~y(i@ZQRy=*6ZUr1yxw89 zB0x%frok93`oUFQGzmd+%$tzu>!g%H8Qm@QXdpCLtrmt$K##v^GzaBT*;{&O2xMly z?U9U^Pd%Wn;8a2o!)nqiDSb`Kz`GJUiLy*{?8M9%dA6j^B+w!xOvF6k<3voCcl%hn z%Eo73$qq>$bX?SPlmOfr^~W_2=Zy`*uO{DlOD=k%*weyREv&+be#$SYt4{+t0vsfggqgcl7>=(C zB?lYX!PDGafFh3~lVg>$rooK``3|GUALZ5zFX)|Jfx9(LXhZez`zB>vRgrKTr0>FQ zpP);g8kQrRAYSg}Rx2Xbs{#7lCMzO%mjE=1xqQs^w8D@m6@duv2omfHf2V-wBB6w- z?%*j7v5Lpm!l>6hlSYb#%0etuXv6sKykm+dr;+qdb;BkhJ)bMGb7gUQlg~bzDty+n zQLZRPRdrl*r?{a`ky=n_I!Rxb3?WWw2mgfO*Tc*wOW6>}1fkrmL{%LQ+ARFjTrwe$ zI@6?h5KB=;MAVP8Hc5tA;G%Y4uCA<2gLyC(={tVg;rKAir43i-2>}PeCbSP(hyWsl z3wRNuu$3pg_4Ajp&K#NmbRFSU9v&F5Q|y6$Fm!n4L#o0dO9`NOEGJHoXj_?!b_Tl8 z!vkiumk!o?4rx?|Fn^18nTn0M4=R`LJ(lE!rI>l@;er03BON`EddTW06GU#*wZrB_ z%(-|5NQ#NVkJ}$_(Y5z54vv5dYj4XnlYP8;o1>{h;sH`WJJz$XmUVX~9YJ2ssak`p7` zI^PVyJed&RLMh|U9kPkHt%$e#;bqc8n%Xwrtc^5C@??r%;O1k55seesFlmXk_XQvb-Gy69A;Xw43&=kaA3YRlK`NAMC&Yr;DyhIOu4wODFi7J+hU-z1N ze&R1*n+9H9lludYY8pOxy>tkl0S}jq@a#0Y*O}0LcfqkD!%)t$x4(Up+hW_SX`=O2 z_o_;8O3t8*YzSW|Qznn96s*kP3fj_^3WKRHPYL!dHPY4^e76wfp8~*>+DD!yGj%Rd zI3202>3RmM;m3CJ1p;BNwYaxJ&OL>;so2@M*qVxux>b5qk9_au%?a)M-FH)jwBKup zo3{DmMHNHLQvT{uAbEbA zofHtWr{=Mvs-~UfX-gB0&ObDGA4bS@0R{!S>WJzEoFIE%%A!?Yg2ZQo9zs9>6#=J% zM}PJIw0GuTRozzp-*fgk zXYc*x?~`@uua8T}tE=?hixypNYv{)``Bs-NK5^p2T^xd|=+F8A^u^H0=dcGISW3j{ z^Z4;A@t2kjNp$6+5&X!C4=8#UEI1DzYiXrblyGd(JXS)2Z`n%H0K<*thrueDj9MO1 zG*{$WVcsk0tAXC7n&N;$eVs(Y;+1C1*q`zE5q+zBEmm^r5_zhu@&5RC#&joB!0}C! zc(92XI$!??Vx|LME&aIrqQLEh5#Y^83KsHTxXXpDO!1^M5eHkfAB}GH)~V07?WX6G z;5(~B-M70(ihxZUKAn0RGD095$x63kKQSBN7eGVB>*_DawIv+s-s|CkCc(olRtyYU ze0M$Ya|d+@qSB3FP$)nmnbay~UCV%(keW(MKu#%jeW_5^IRDyXCi& zIP<+2a4~?U9oKdLwPUe?4cc=|$V042U$6i0s!?bU7~lOA<7hy{4tQQ6uukecmUU5p zQV15Q76I&K&p?{oWfdPMsDvgty77b(l4xO7L-!~F((D3d%%T`5r6$`!5)1)RX83J8 zKE0ql`qOqEApskTni^Sc00|b2mAVm+ZaL_GK*F$r%1F($qP5$z13l?1zs-njU6+HE z3u;QpLG$Q_B_xpny{(|iELNEqg`R~l!Zh2E@Rh&3m(Eo-@I0NgAss_mn^sT9yy_M2 zF=)+>s;YTIh`MFy(tw@rTDT0_Hbm4|(kK*UeSjcBTl&Ltz8*p@ zi9zSk!0vbJ-FqChrG(In>xVW8K67ZRZ+z6;%e2e7CLTGDJ}E&AmnXb>g0I*9%F7F! z7#*yYj8*)M<>zS(+UYEo(fZm+*pjTaCpuBO*GYIdUq7Aqo^h-5^Sd>WyOmQfK=q^A zzzB{Ua;3i${_is(TbI&2m<%uOet4apIdRMbqBhShh~R4@{+OFiCpW6Ho71M(lyMXp z6UZ`=1ajLp$AIK}brqR}$STRZc$B*&djLx#)wjZmEur74rO!Ca%5l1Bv}mT&jhV`{ zNyECu2yNRTE0X2T9CpGi_Vz@AXkqkyN8@SKfmA2FQql<{k2vK{Gqb@r^K1^E3Mh~l z2g6mV1|pwhcF?9%rz|G*A>gyk5UE4+ZjbSNcx_#s9aK`oy`0J$>YCWtw1ct7>G{q1 z&!sdyan8Hn+lL27vV0qpo!*|da ze)wpYk<$_gn4*L&#fDG;oa6;7(+L5xX3&CY+SJ!Gi3vpPRD4ngpe&c(C&7%;l12j1 zBs*vDl>vB+L2z8|x@oz+m2sgXp=lYEm_hC3oDr=?Aj9~|02a!ge(i?3A0sJ6!uou_ zBG+G2TfsOqVcIOLTN{OMW$0nz5blXHsrLQhPcIr2VjPK}=?ukuGy*hPsq$;cs9$q% z85$rZAcrCBYTXgRNlfq!Y{&cXl@8KRrs(NalOBPXRGgDQQp^sr;t9aS96n zOj;*D-{M9kO2fhOL10{q!LFY;Wjq zVD2ptGj&$I9a_vu&r1-H3v*uQZF%zk+`)X^;^tUl=34xe_38=)uhr(m!OozYkfF||uUbHFmEFAlu=69tu^kp=I&*JI zf~TRrGuzc3VVA)gxs)krS6k3Q7FWWeKaRZ;*QA+@Wsv*oz?O`O$*0@8n5K~dE&lyQ zO*89F*QmJGA>-WA?H%5~c9je+LK6$F7NgH{g*yV1A;ONbuRDP}gM#i&XmGiAHF;)# zCIjBL^!>JRCV-T<*qxsTUqskbX~7qSW9aGto1*@cf;ak;md2Bb>2`f3;W~xE?YwLU z-Nwe{8hY`M`2FfY8}W%UwH($>N7&0Wr<%Ow$Lk;dfd_6yyDGct9%%Bb?@40_UtGC| zS?@%~sbOU(V<%I7uix|@Lp_4v?A#|E3yLXpyLeOe)<+jO;{(HNq$d@WjPWSJ$|L2R zL`m0%lz|@!X3JeF8sAL;f~j`P8xmu;RF*odA`*FN2Xe)#zXYeWP0X#hf; z1s7OT<&kqaNy<6@PT^qXDHRvg;hdCmuQpRm7IK(BQ^BI#ShAg{vBDgiy(lFLqRH-d z+A1Ynou)gc&ObM*HbY>B9v0)Ii_Z5u!WC@1`LuC7PXQjys>x*TI~e%z@u0tA)3Y^d z42v?otv6M1G}%Nd_VzuJCtc~s`NR+ry{;eOpwKwc`M)Jzr^b7{y zoS$0ZJ!s*rN=rLCiRUl!t`c?xWJug8$r3AJJ^h(GmKf8I{?=R) z0w1Sl@NDC5tTeaW1lnd1(DU&L!XF%D6zxy4un|xPP;1=@bpZ?VFmr8FIKOxlq@NOA(Y+wNF!!?7uQ#>*#si@h zF_+2|1dP~cD=lv4GRo^yS&G!I>)1o1;rufv+nwT@SkvclE3#Y}gF%TT5a>!wJAtM{ zp9lN@)YAEv)eVWuuA2{*LumD2=}&mzQA0bwVasFOJiNt*!eG3VDJz-MzO=wxD~&5( zTr(Y?E%NzHzDPT%^8Qi&r6BgFZDhF=3uLWhT=(Sus+d|zzbe*78JD=Nh%?CZ{*juJ zQbQt*j16Q%Mc-F(OjeZbz z7PR)U5|onU$HVi?pgRDqGMx*%wLQ?QcFl3Bw^L^d57mmZBtsfHu;Hn@rm-`kgMP&v za5IJ=LP>zpG}aD~Ek7+>CTVeA^so4=c-G7a9;c*Iu319GKCg;8(b zvIgL!(wf|WQciHF)lp&Y$4yj}N zL~?IiYSGL2z8a1~l}dwWqZ(IAl9MM-S~_*t#XJBP>$B1dTXv)j<(JH6=B4Ak1adJM zKR#(~@w1uuy$DYINKXR`kzuf$KOBdr4XycB=1)jgFknNQG2}9&aZb zCGgyQ@$caLBqA$L2c-@tKTY)UIsNk5w)*RPdcJrSQ_R#e^Z@59=EHP34Lu1E*&A+k zB%o04aP+ZNvM|cOv2^Vh+>XDg+d@Zt5{LdM7iCAn$Y3n6nR%{`MO2Dj8fd2jQ4Da- zru2qTJ(Y;<9}-gY{37T)|8y&kmL4^o1oed%KEk4gyl~Wj`Hi1Fr^7^JFZ|yB?T~-G e89#fs8eP0zI`e_sHD?d~l=o=gQCTBqFZ~xYbx=J3 literal 75345 zcmd?RXIzx$w*_i6#%L6h*hRns78FsWNLPsVU9LB_G%rut9zm!`&Fx+weGt`M*F{Cr?bzk-umC);ftFl z_y3=}k(-jg?b^k6ztY4j-GIgrl3A3zUwP@&!=qQMTQY=g9W5;_jYZcNhW@r+F4X@y z{=dZ~(2het_a;YzQdmqtz^}8jv(-TS;U82Nu@{}`_mmQp)HLIz-&MgM6$#}PJ zw#m7I;rAJ?-EQ*r((`E|cAZIIzF6+`?ak`Ut(Q)1G4xQDa2;*Rw{98FS9YVkzVjPa zWB?CttS{m2yRni9|1OJ9gM*3RzkgQ>$(1VO-**4P6TWz6OQyt8`yIZcB@-$%vp@g( zi&gW}x^)tgDkY_*nz*L0HAAUhX1QeGePn81U*Azy$b(b5TCDc`lSjF?Z%@1wf1!>4 zv!zT&aJF)*m_y$kxkp^-?;|BrU0CgWdIf3D5~-T0302%a`t$_l+G`x_+|p@dW%DXh z9@847?o)k(g9o>7-#+{Ody-vO@o-CK^wJzNb7P@dP0-<;Q6UAHlL~*eWEyFu>*gy> z%ufz|zQ*ZmoTQn0IU&auqxH3TST@;`vdUX0a2ch(=32dwYXb=ZkDCjJO`3 zWvLksi;4!PV5RWwZP6IfiDA9MfwP_!vQnL{(v(12GlIOzWU`wI&T zPME2oqQNNoaC7QOed*kJ#g3L}pQWKJ|07x%Un%Bz@i=C7aJhvGx>tXq!x)gIvGrF{ET=|0aYocj0|dx~i_ zRZRSV&(f^L?c28xA3Ah?baa$YGbL!WXprmSjuU4uUc9*T*vw3ikM*b5cZb{aQ^|6` ztY|-~mV3C1^7Z3Oam;OnS!86S@QtrmnDZyDzWIXZ)Ugsku(-E#4gUXjO}bv82#=1D zH6!grovO^(CXZ3PKBx#ka(_3`Y#o;r=+{UjP)bAD4?3}J&)Mz|wU%tpN zc9WlxC@{0hEHd(%(hM%hz8EVfXKET}uxMy#s58grfd}QZtL>`}%q@xDepLI4u1kgQ zCf~roz~py$zj>km`ppMIYi@XWbQtuEpRK&Nl~q8~FLIDOefs9N_Yz|}d>>DFw7m+5 z^oWiyP|bB8lX9Q5tY~f)rWcDl4+kCW2r-oFIft#5VW87tTQ@e}!6oHxI^_8?JG^y0;-SfwyQMh>3U#eaG~){mY2rGoX z2c);QYAZ*IH`~qK{$)Ehey1cQb3cV8fW_Wba#%f~ueNxG)nPHLlZ^dde69D-snD^G z!c3l_M>0#t`CF1M?mu_#oKCIO!q*D7?t+r9mD`OD@$#OdB2pz*ad{l2k8_CFyv>rY z#@L;ZxBWg|D{#_APi1y)j&IMN)7{K}?z2?V?J-s=Y0$&E(wb%L&P*>1 z3JNj}A7!yvUw0h+IofkvA=u+G-_I&4DtrdTdHVYLBLf(ssf3pE2M;DH)gRW<(o%P; zI6m1Is5<=W_Ad!TycdG9=9TvC+c!CqTW?;x6nIc|IK8mDCQs?iJfe~Cm48;z^KPwK zXSMMC+ib!6>!06H+H%YiDDU6DAD@^wseE#yEEY3;w2L<3v`Dg5@KGI;@BvI_@nv~A zxfBDMi_Se^#LN-jHvdPDT1WzmlyGf-GBY`;fiX%SE1wO!t_!p;z4f`|lqYkuh$!p8Y3H{t*}Bs)&fl<*QZx!gb{sC?v_y{<+<} z_dO8^XXgU=(-*OO8BtOm8X5Xx^o2AYPg5vT=jjb#c1o!onb7Ok@kE=y#kvP$`aQ<{oS>uZ{+(VUDbJM z!L9Q3>qCNqmt+>c{>7)2VH1hm+I{2mLrE!fZ-n1SF~>JTwiui+w!0w)&P+zLmXTNd z;{42!y?f6f3Kx4X7Eh0~#RBE1w^iQXE|C_i5-C21;O~UbHYj!1%+Ai9!$gg6Wm)i^ zc`0&NWU=;Q{P^D8yA9Z2 z&a>lY{07Auh%;}Q0~RFuyG8vRX22=#^x<9DN$0sq3MOp0Fq>z0l*Mn8zNxwVrjdPXM^~{ zNL|@48_vpAtItY}ykO0o#j~|{?pMxj z*tAKQpX0aRo+DfiS09YlSe%>Ee13K<0_9seU4843%Nl-coI-Bnro0CaMg;$|Y15NR zlZIkvrV@RueA$~m2Y5=QbR?=o668=_RV6PiElpsK_iTTNQiu*fk^A&Wa*AeZ${%-s zA0HoYjCAcz{$b5eUs|)TvW4tCVe)sf!G5_1ANp%Urm)CT@LmT;M|CXFlLq#3jnn7a zt%3F0%e-h@UXvG}95c}M_V%`-Cp@GWv0Afb0b!}-<>i1OO>qjFF{O0lMa0Js5%y)t zn9yY06o!N(UTk)aOhc(DR)9!O{CergH~PV z-yEUPF_5QaIAgk30jrH};M!#t?(*%^Ir==3xz|LUsC)12?c`Q9GxT*3tjV($IrKNB zXdWpA7AtldqED=I3g#%6iKr+MlAHhzM`tdJV#dYEPNqk)Bmu4Pdj zVaW!xf@E7icJ6{Bj*D~7NFTbjH{9HGW&(|OgiCo8aCwZJ&`i-#0M2o6b=A7PVXMYnfHe&NEsGK5f? z!)!c&wfm>L2bJhH9nnu5dm3DCPL_XenRd^$>k_9ipI4e(#C}rlPf&>r?=9N9_2F2m zS#59_SAy$s(_#Af`t|F>Pg?j-l>(V?d44^^obT&1T|}(YG4z?c`dVb~7&EiHVEVz6 zj;qN?=Vi=uIsF`mHRi_zPQ~ z9J+WGX-@6hwJi6^FUBo_%HdDDH^&*Q>-L&4pig3uTk?o}M}RUML?#y2|UqPRjrF)22(P5z^?Sn3Csm`$JWF))a+WI+kbq)IS;lQQmVD zsc)}07k~Db=o=Y1I`k$#8~HE6(37WX(9|wYRV0YV$H#{`6N3>pywSUs?s>nGYHZSk zpqgG~6(M5Vlt-aU+FHAaU5Ik;f1))m<#r*bClipC;fP5SC)=G;W}D-LMUrRJCb)@P zTA$TXs5R6WpIc(r!K~c>NHT_vQ~Z2!!_{ZDuRbo!PAK24MLY_ZaLp*NX^Sx|b=UD2 znZ&S#p13;F9@XGnhQyR$`t9lT@%5;MJG7r~;`*5s#Rmb&1A>NXTUuJQvCO)12;zUD z2Y`^1%&*Z%ULE!Tp>&d!dlL&o^kkF>dy z?W#jYz&~&BglVrrsna%0& z2gr}8!5iKQ^-S-`mbz>!Q_)-D7ctOb=h$EKBzIA#J)@#v6Q~^&=84JgCWq+C6PZTk zQSpmhJ9b#&NviT}^!1-MWEodP9Al1+*|%ium-pX+gj(-ZBr*D;qBY+-q${VW$h^(l zPB~oY1QMCil|dJC>4mSd>YQVxGf@U%c$$bBN$fg?n?T9ev%%7}<;zPFdaRtTo`Nce zUGjt3EVrv3-M)UfvBbEz*2pRjQM-~M_b8twH)fBqu`wfOVxTTpWd`7kltb~h)puCm zu6lUx^fjKuos6~z>CAIR&hIpk*`40gU_IVft-}|RaLCJO(~jf+dkxkM4{N~|F{qE`SDn3V1%W?oFU>Arwc?fl$a`;%;`Zf*pjSgd(D?A(Stiu%=$|J-^5q-kHA@$#>A5yPm*a*(VR zr&`*qd@h?#aUHTP?-`Lee&E2HFHOmGWhS#VJN7qDPFr1(#F-xpT5dfsENEGnw2_gQ zuQCZou2*k!pBmm-8_a(a>A*xI^>SU(rTBZ-QLk|TO5gpBgCnBr5s%_-FS|2m&M?Z? zvF*0xxAtJYua+{hLzd=9y__ys6Cz-5g-Kepr{xUEs0s)O#9~X4TL#mlNgnPEQY2-! zlzUUJdxl}D)eU#|aK>X7fhSa|$pcOahEOK93h)UM(naJT+2Y^!bBQ^;9&N4v5+1Hs zP-TL}_mG4bukOr}@e5^XJ&%EE!x>$r9|NdzF~A?@v%XeA=)} z=$`T|58WmP%-lK*eHNYRIbn`xruSb6K1zv;i!*hT0?v7{*sz8$g1brjL4fOH zv24S+I62b~MtfcYB_j|QtUyXd5u4U4;RAhrJnZc3*nt#WPbZ_+B>OF}$kNhM0v9_E z1!K8O#3ppg3n7URAf(qSZ)0)0D_!pcJpE7|c#yYiM+^_ErO&N*qFzSJ!6B#11%sc8y}W^)T?>l~ z<>83C{tjFh;~Q?=g~&jvA5SkYWhn{3JF71rp0^h{Ch}Cbwdwru!w(6DYvMSn=W5H5>e-3GsU~&RFn5noNw=J(g$Y%4F{}qbDRKJMNuX$} zmW)W3v2NS&(a)c+iobdPR4kU{k-_@t`n2BZrv9QNGH7<@v z;S$1dQ_Q*h`soKtLCYB+bTACLm`%yC_^wO59I)0MTQgU45U5Le%+^EG$rNOXxcg+1 zKetQYxL=Q7B9~3O~sU+Kuf>}W)qswEra@_gHno~o- z<(P&K3=dnmvX_`v|2{R+c9LG4~eh(v^Tf~<9s5e+?9!ek0usxErFq5Mqu@gXs3c(J9xqB*Thkom(@>?7$*SFSw6 zLfMb$%cs!g2H!{KzRyIs`H5|JD1~Nn^=dv)8mEl6K9XZIAb$qC^!L*ss~t`?g=c1F zE-jAvL?k$mbsKqYPJ>1l?~(gZgOME)6$C8L7D-F)_735{}np^l^ar3WfnE?&>^2bf8NT?or%~=isOCz z&Z36Q3ci2m&Ib^e8{!`V(>D`T8$OLP>j)p;g@%R(;ApCW$^G>9d~568SGUEz=JKe4 zfm;!P?r#?u1~6>TzUnu0>njj=nk?&Gw2Topj25z-@>IOCsz-c}Rsr@_qSyTN;bX^C z%9rL+ht4pD=!q!ceRO&<4NG-F8t0b@Xk|RToM}i{BSfz-u2Up3AfvGPJyy)bV~aa| z3CPediUG3h;8pAkLM7upH*^pyU3q?HOq91fp4EXc%qK3cdr&n>i1iu77R|2&mu>dl z#IXgpi$!cu?4+QTX{hPsl=tQ#hXudI4qxx@z3UV%VWASrfcAW`F(J{%%>32g>+kvd zGcZdUpx&8C&6!8QC7|wUMr8`(AsIM1ygkQhsL^I5Kj!e_aYNL51Qe!C5GpraJNwn> z-aF(bftOf4rmf|>)F@}yrqX~6-0g6NtF*agRBDZu1w^<1}y$?=HGlBd&3$H z3uLdEpPSp;e<9?U?XI5^uqSoFKLq>MT!^-OB4XMmv%GM%R2t=C>13l~o1M|+xO^E_ zuTy)#J%j_4`%$A1e-dC(aNklUJyA94;DH2Q+xTow5fK`s zp5{bVZsxo|+4r;1pAzc#ExHWcMs7Oy#~)unoPqc_UdL>R^((OJ3Qzw#9qOO<=GI0` zqkz0EII*j59yB0*g&OZfvZOs zk(zG3%6R>9VG>wbXR{aKvK;!}dKy`nH=4Xu3^}HAb>GmNY-!WCzam~s>B$zwhXe)1 zfzcCJn)ZgeA>=rqq`prHz?JgiA81-u*jNekQ$kQONHxNvfP7s>p=x>Oa$a)&=jIP= zfmP#lLM>qGlA%p7UiSqBZK1R4AOR(VW*VUUnaR_IJ)SUwtLJ;qym|fll#)uFO9)1gWRKueMeXp zFfnJq2NncBynDBrZg?BjREoq{i8^)igDm@D6f#0GUT4qGah;FjC|?D{J#Ut+jS?;m z{7I<0CZgpd16AoEH+OdeT~Z87i{m)hUo?j<-}#MUajr`Xwg zYOKcyG|`3)8yc@Zk}?mE_NMVB1L*-|wS4WZXvUv3CtZ}?%BP`*AV2wX9d~HWK!0{? zj+x@7-KW&ZTQ_bL;*B{kx>T_U$~^$|H(*(&wR$*M8#Zr7Tw$>=bYJ5%-t4<1y%bQ_Ryv~1q4bao)Z zgkoR1kWy7oXt4~;K`j!_4Wpq@`QCubF zCq9Mt%f$Al7ua_2biFVikQex)p!9Y;@27sba__}7_I5A{t`B#MHMUFWK~S}CpI0*^ zpl6$a;nV*5kGQIP-(&eLTeq4;V%Izpv&qGa(1}nMhyJ;><{k*@BF8~O2Gh9SVJys2 z=nN!wi$&@4nOIFOrGIZdS5Uge1&d3dd?5#6qJLl@A;i$VI^$qX9@pjg&aTZ{c;usv z0sl$84cV5EpCF7FZdl&Us2L|p4KTL}-A+g%Py4$w2{rC+P*-#0;Ci)h-@fUh<^q@f zDy6DgzuPNeXR{B|L!^`kBWPntH%ck8#~>33S9J`W z*VYbAUYCVW&bRL|?C$O+5MkRfeapfJw!f$EC)fnx+-i=C8^}Vpz$UC$u%ODrylMyb z@jdPpzrvoOH);mZY%c=h5atGP#{fx)4tTuvx8ICsem$02{&Sr*#zVN&zNURQCb~D8yB`}dMgC^%^_iC$j>dC zwP~QuB08U$Ug`4v@ubvztNZs}zpXoVgwh8rhOw=LN(;L(uwufyJC+icz*s4b8KE8$ zw(Ha;UIO3?=kH&Q5xG<#vA8T(M}|~=sw?<>YaQ3|DO1N>){Cw2+;3`K?qtFaK&0he zGTtSWIXsLr0#6c>efWv1zDwUfvqzq~{>pvvypO)(K>g80+1BshZwTAAYaneGgIYpX z7XZ!PZP%Ix1VS^-N3;lkzX|n$dQSh7BU5We zIjux+-hVfWhK&^;hzbJop2@yGj-ymx8q>=bXasR*6LbbW@P2WyPmryDZ|jla_z=UK zX|9NYret*&g?#YjjmRvqz?{`4*_M#ah>k}Dwt)zH-P%ReWh4kGtv|E8G*h0gU;)Ti z*O(Q#mc=UUVaRiPInf;oN~O&Q&8(q2khPp%(OF>Kq5=>~91eu{V=k&fZ*ZR(wcW)g zTG-jyiGaE9iGWeJkQ5?mxTHnLeW$tJpEzeh*rKBPg?>B)NU-p}x=ma65%b_*e*wey z$#2E>a-J&ZzndcZK-YYO$h=q`<_~e&>Paf}hr|ZBx!w zUb+w+8W6br?%{(6@2(lD_`a#(>sZ%CV%ki>UAcnt)vD!PxY1Nh!4o%eS)ASRfTUeRsWLqeWL zMk-g|1MY1hMkdp)oghKfriPoR-f-B5j?&fD)s@#PQv@Xv5odT*RaNPElY^u>SE|_L z3NVE#o5(@nAQIsv=?U7B@3ZV}?a)D_c<`{O1kyZE3g;F+pMBlaV+0%^sGQpsGJE*M ziHn48JrE9zzc?tfd=XI%+JF2stAqJawEH-JpjL%hLb6t76qu59JW?`d9n4V>;tDBz zykJ*I=q=-yhVPUMlF3KB>8oju-x1wLvw%ruF4W&QcfXcK+d}BE4tD{?`$n&!` z3%!4`GVK-@7wyG0{)%;uE7B3zU1S_#8Pqj9)W&rmkI)Fqb;&~Y|aWO?klN|-Hi z*U_iJ{CWupP$b0=07vmrjg~g(@tktMltf8TErAoz#QjQjnk=VeSr<`_DiBRFblY=U%N& zMU1+->!*_^F~OH0U?|Z6kSQ%5s)j47fkl+`V)G)i2{|tQ;#e~gx8{B{@Upq&O$skR#qudUeh}CU#aX;lK~*kY`uJ-mhbag3>vAo zkgxVHm}#f1|Jm)js+!vFsSBnak%eB-jEPHUzQEE17<33l&lI9ck5d?G9lqnok9TBd zWo6mqr=zY&n4y&JFoP>2u&YejN2P7rQzKD@lP2C6twpg7u-n)Z63ml_^$cw`G-C+j z;7R@v#?=^BR*+jA^g(!pvT1f?5fOP_P~Q$Uf!LgqtM3-@)@Tf**T_2JpAsl__~%0K5k zb6!mwvw(n=%B5X}`V=nqCAmiAtuq6_6q=GlocPwd0TT9p z76>NAXL$+4)FmthCE0PLe}1}A%J%q)*gfxAw6QY1O}m7$4(xmfpuCMP)p_UJAmxqw1Sr(E@Z|`dlCAI`7P?A>KR|-FEY!sss+q4_o)=XNtetrqojaVmvdfN^ZOpkWy!C#eM2IE!^kU-=?R<8C@LICE^RHX4#B4}y? z&$bzcux6cMnS<wIn4=sFa4kTLnXuJLGC3maHw<^G^i(t< z%PO^`5Wb7I7qziGKzS4Vg`|+7mvt7}M^nDgV@@rYhC_L6-*iv|JNL<9^#ire8@VL4 zNoj^qEoAyOK*`Uy&KkP^u9M~u3|`62aor?Is+hqGQ|aWFh6@S`4S0Y_MCQTjH7#j; zRy|t$x_MbkUIXuT(tnkqc%$B?S6sKS7~JSlo5NZZl&JVFHTNZUxU(p?J&zI#%hBXA5(YQDe&%8gPpombtz+GJ}_!T@7RYkB={63ZAo_cQXxs8%{Q*@kkEWQh8?+nveJ z>MT;nXj1Jw+0%^54M}a(B}kE)hzk?34M&c&QI2weHv$716>sPP@)>3XiF}*3OUtwM zK53;92e3|nX@;AU1M`)s#A4WTKQ#9Fy!33}=C12ioFuQVh+`!&l0ng|tWq*9FeN?5 z6cdY6YGzrE;;J$5d~uyUo6k?rTt{rb-dbyDS5#cwKK1%LK$uSEFHnZ^6?5PdvyLj< ze-e}Nt}yEQ$y|UtOJKW*CHk1$4k60`=-i?9%=C+~zK8dcw-V?%mD^V8QAk8*x}g8>p$?z1N#TZ! zb^7?v4@_ZUkn)=kD1E@Hv*(*<=-IdNekF#PVOS@H-D)Z-WKr4FO<&6y^1DAhG2z(C z!AVO06T}yMeBMPkVTSNq9M?B6fD{$Ior}v(cg1Efi^O@Nt}tCf)X;s1mp1qG z>C=s%M`fwT+1g4Ix`lSO2GhXbM3e6tCn6NobM{lUTVc28yy{8!QvFM{Gi+ts9C_QAI)!` z{{Gn1)(1Yh^g-o0;w*ZA*r_gsM3Q%85I*S{&d`ezxemKI-qd9}Qtma*pybX~n!*-; zo1p~ssANVcX=5*F#6KSfFBciF^$r7`0!ylG*BeJgQMPtFEAkBN)zCt#sJn>(U8Rzk40KoTKD$O z5s9zaRb06pcF-{WdJbdz3Gqinc&x6t=K0Xc)wR&uB~>f4AXv(6JZ{KQ*+(XtF)%rS zalJ|}CVIR^ns$Somw(xe&6wB5&jUSEqJdAwVi5MkCD|Mu>O(XO2J`&<zb6!Q|ePe;ylzU`{V}lEK zySE{zED>-%E_AQ3kSG{+{pNcbX6{y+oPdO2r6oT%Q9T_BqnBWv4@4fN+OjEz9wP@f zTRBMSoZY#Skd|GTQYZ=&TGq-7$fJ^Z2u)3wd1JCce!_eZZ9pFj&ajgXq5vLnpqGfy zO#$Wv14AI0kjd*iuPC8wpiG>YI<~iK2iqZOX`(6Zl5)?Z7|t2JLbW`MvY{aLB&(%x z%}*QEi1Wk12UK!D@n*y1Ck*uiqTZ|C4;RIRorg7OQBvSaU?^m{*IbbKu_8T<)GEZJ zq!1rUZOJajX#pF8njT50O=Kx{7#r0D^QQu5^d%|J z?v8zy=BR*dSOhwZ~2oJGC0zYIkPmBOG(Ja){Af*$IqKJ&4 z6RSY`s5|@+AYp}Q;{{dzre()Yxr^mMW>-HN+IFo1TjfyHN(0@^I1X6>SYwpyB_|%> z-VWmP!5~>f9wy>5R*GJ$avm|&Vh0PM{Q@_ph+*FF^UrD!m?r^1h}$}0e8?{-AdX~^ zIrul5LA??4VRv^oxlz(z28aAe0+StjDH}Qzly<8BdD(Dca9r}wDoXJV;TLdtn3u3WEg^Etx$a%6k$Gz;t3N(1Zfxqr3(aNjo-r?UOwWxuErkv zR1KX@4|{g97?`O2kP&O3Tf#F%y3pok#xhalS;A@T7Ikxw$xOuz48x?a4k-o$ ze%RjL9%}(kOfCFb)3Y;wO@eV__AD_Wn#x?dqkV8c-!SzF9pP{UY6G|UA1>M z9m6ySCWcsg(Qz?Kd@xvo{a{200Tw~&=2tmXr}9GlD-&k)gC;GJ1f71 zj5wj?+{%{7Xc5)~GY9LR00ISRX+&~dX~{z;f*0suwew%aXnAX5n6$uK-Zh_#m5^=d%?VrT}LqVi~_ zX&RFxr6!*kuZwPY5QYgfbrQ691u9CY^nE?DhjGab zTvDWU_+v1Y+WT};!sF4W}q>_yK?yLzz?f#p1n+Sv88{I^!FWGdyk5g zsl6O^>iWL*9Ov}s>aDS+cvQROPHneWC16_EEd^^~u6%jE{H)z?xYE6S?xZLA^m7GB zKj>-7(JzApjbvbz)=6$m2FNi* zCpW>LONvin?rR0RFbrX5?TE?o-o6!$Qq7WB#;~GHixyBgH)GSPuhujtE8go{<#vbb z*Q=FJ+FOe)Zp3^3ec5f7RKRon!(W945K@AXcm+C$*mej7f~o~05d%|M5}J662ctYT z(31b_&8rT5`tpD6!|vB;-SpqTCtuurb@uBvbqwypoJwdCvQNY~0G75GAXpFi979O4+F}x0+I;9PEfJ6bvm8RL+LUx^}2zA?* z5<>3SmkXyRxS?SP9Y^PgffqE3Q{UU)NEcJcF@r`7W8dg#2F7L>Nz~Q99a^)F?aLD_!;3&~N)x1a#QIYc z@=PY_Yu8D{d=S7(P!bg#kRy`G+i|(mFOg>NlV3OwHz~lh@IJ)(R|))LRbdpWiFBP@ zIqD=R#8h;PFx&vI=DvS^q*?0j491iRqit=V^Y0R?PJfd{pxL8}`W-+-xMCK{Vh9gF zvku(-ZUu%Yq!FRdQ=QO;O`9d&4LnfVI<6b13Hk!QBhwEsVIKZ&>&o#CF#xl~?TPw0 z2I3GIUIMJpqY6t+y^F^|m8XY50ONXA;5-;ZS{eGJO@g$E5v+#$L@v98T3_zLj^{~9 zNj90tn5zyAQ>oM@G)a)ZLBw?{%eapThGRXShlfYoFAe_t@GkVwm_$aQ57vUo>jF-> zGM=2YamcXo&BhQ2m-UkA60n6*NhwNmL+1$t4rTHU>Er@ms|LCyU$GlSti{|fRLpEL z+PCjLctCm>`|%uPl|y-1Sr|xQ7DbWdfCbo8;?a~o42x<0Pgh`KwQA2xX5*41Pf5-S z0Q*lQJmLWYK1%^*Mh0Wrg`^X;*Ir*=u4{*$<^+=|QA`Jlu1oEsnktb+Q zL*&YQ2M)+#_cX%)LMD^*)V0$P-C95`r`bB1_Pu?KO7}6*D8Prv%KPtb0Idd^G#+Oi zSi(9dYI_wz94s=47!~^0PA0N zlX<$A0X#!2rdqI&3SIf<=KnnDEV42@c9VeFOu|1Ba}X@4_^ct2IqLBIAkoJonfA4b z-{oBO?8aW~8?`IM5kOkTsFRSX`xGKE@}w3a7!>xLC;WPsmvO|#NDVPB;Xy;tc%7OF zyh>^m9ttS6of(OME5x#cqRu*!0UMqaKovC(;lRB8J$PSYp(83Q>VQNrZpoPH1VOZZ z1n_{qc4S;{rhlKEO@BeM_WD=vRX10IOL~q1iELS-25eT9eEhkXI1>Em zdWClBumCh}v5DIbnGgMvDDjoaQp+$Zx8oEwpu(#1Xk*JPgvpw!swz8o{CW!hsG+xy z_wPK$v3YYWwz57p&Lvbm`Eb1h`_^RW>wblg=J%-~x;)(FI~nK&Th+-{HaC34w)4eB zw4K~rvE61y|GWLXb`#gxG5r82sK8J>U>-Q}6VRV@n6$V2{=122iP7cX5SLa`7Y+Pd z3Lc$AcT%M&hY9j(O`$+j#Wq3zGM<+Nb8u;>XXCK~#r4hM2qQJwF~gEj<5EQJnrl(4 zOp!(qz~`yZV1j#d5Gu>bazL$WJ=$5sR&^o7Zeg`)=Zov4Q}{nV<@2bAiP?iF!sxZ@ z6~}|!)I&F)1+n7{`%8>|K#e?Z#)(7=Y{%;D4>4lk1cGEZqM2lKch?)0dmE4vlY~I5 zM@tlyDKMWEFb^U-9siqq)gB|_5oecK$RaMV=5#%+^2O;0be}kC4#Gf{Z(Q*c8AWuo zF>vQZe5k`5?`TjF;(kLNZitv@-$?tk;n`v>ZF z?|GIe4)&p7s#a7nGpvsc;&TM^(q7`03nND?_UsUj)EPzt3qB(-)0UiSw4nwA$fH0+ zjMZz_C}*N*yg)^!Gm9+1Hqg%`fCW!I1sxswFS*m5)1*IDLZJ~{`M7B$W?5|^0B29U z-3}SSeI&ao4;~WIAxw6}l`B`u(lc?yfjdqY;Zd+J`Q}UMM!PnT0#*^j$S%%;<9lB| zHg5f{cb*I1s&-*WltZqAfFYXj#WzOJe1`xl2 zuu&w!kE)ZH=xpQxR75ia%m9}+QEH0-933oRbWBD@=Hbqh2NYi_xsyQ1142MVCY<2D zEia%+)E?jHmkEJGk-Bnp!M0ty!Y!Bw{>59I6xnGQ`pzS3U(+^6yrRazq1QiDOak%u ze*p|u=p<};X#Djibm1uFW)h)bT>y?dG9SKm%d zm4imh(LPE{!XmcqJVS4^`&Ff;f-uH7zDAx90i-J!i;Wmrz0jEK*6sfQc(WBN0K9v` zl>wyYhVrE;0g`)&WgSaE6J`>E+i-uH0OIg}7+hIdzjhY?hMDx2wY7=_v*T4@-;)91 zFcqPXI5D6S#p2eqsgvOEQVGy3^9EW!#I+JhlaZg8J)o9a5T8zZf)^03Z|QkqY7$ypFnws*&QFfH{|O&IQ2{q@#%Vld!i?<&H_(%bPd;83yHG7(U)cv}YVva373x zB`1%d(pQ3AX$~TQ(|Df(3DEYXQ^D}L2w;m8o$gqfiH!f638ejej~P2+bjLfJkin64 zrLnPr1ys=0aW`K+zC;d?xAL#b;~QhMILPMwpV#eM1gHO>UjJX8Zq`{q(h1_Y8ro#?nhQ7ri){5RTwEE5%hn^U*E$v)=^@OAiib*zspe2(F-D| z?0aD|nu$oS16j#v*Lw!%3EE@T@q~g;Xr8k^$1Z_JUBJ(&Yhpp>GB22DQRv4 zW9pdC|C%rrhx|9}vwP9Cl0%-Is<9n8&8~6oG!9ik(%L}VxpGfLZL+bkDa*Kuibu@Q z1)MXwlISF*VuP7#j}k`+gl@PBO_iglageVOrsQzpU7~jS8Jm;tkYn=Hc5nTo^BQfX z{yTj=sH64Pk16~1u(bBT49VXYjK5g5HWBsW?XJH!l0#t#o`a8-@Zn~JwEamW>t_>6H#^-WAWkX|v7OsRp=*KwLpc3FQXa2g*%IV!s~jHvdy3xqLBh-uc8TCr zXar%w57Fp_&BUvKE@HOYIAs~so82PB;I;DVTkrdoO!v)Y#DNfW>#5)N^Gii|7v=YX z=Em#+7kN$l4MGNW%jV62@gk;8bzf0sZh?yk9u)IrF1-1pO8}L+55V)49XY$?X2c_E500P61LV$7n4N4)H z2J?_(IhI9CWs--Jp@k4 zRWJGX;&dX?y?_m2EXvyf=x4oH`Gj>OeI^OvvHJK%@QtXfJdpUg_sIr z{NkPt#L9aIP1bc@fKSvJg;k`a+j#H;av?i2JT00eg{swG@NO|Bn-D)e*NOzYg_~Od z_N=u({`eNFMMb6WpIA#$lK}1p1q3{UK?$gFn@iw^KM4N}84J; zdWVOc0e8FY5A4E?ze;?({1|7|A+8YV7n&!a_$3e)8)!PXXeBx33`l^?B$4bOP2KL| z`RDnc{+uha+M!pN5gr97(V%~i^f6Y2;eZD%yxj?=Iy8uGx%*=ZZrciVC3`FB+SahjiVPHag0@jNh075!3aM}odQg?k3_n~uUxIKjh>Huy&!-Rqw;%KIM*;6$6A}-IR2(9pnopgd}*9_Ry74l)(*7=j3ZPw25%0 zB72_OM?lmewv82P_)wn5lDGBb&Q$(qp?y)F^M=1_ztZOl(Eq&t z?;juPW{O=-P!G)|-MJk>2Zgv0lF(UeQZ(F*xxEqj;Uyb=$TC zh$qn3bdCnwG&camF&(hP-)D6KjZEQyxFk>jXe7Q2oeNW$*J(#|fT;60bQ3PE$L{fQ zaeeMIghsnkEkzG#T1--Zq0~qMu`>9wn*_moela^l|E*Hav1hNjcO>MM-Dwr#XTXU( zHQ&Y|nvz@z)(?Tkx1V%kL(w_}X8=GBIi3af8$QHOYm{lbBwTF#JMf_ade|~njF>0y zqolN2l}{wAJueZl*T-HN)Nj!&v@>vKwQEv?o<1e6Ov{<|bKyA!lH7?|0{%B_~g! zO)MIMk5Q{Kw&v**q~%%~g=x-8tR>tnO0K8V|5yJGM+;ajh zhm4;8$*}EK8rfswx8P8FK#c$`tSBuQEN~*$RVd9k6KtZXg`D?}aZ&bw@|cF;dTVpn27zK2j6? zu)%?fque5WPn!C+@Vm3*P5hQumiB35{K??^sOG%sM>UnnUp86>KMhZ@@YYcp`D*l< zZ8rAH?r&`{QL4cx;o%ODOnR}>&DO!WiH>P`97k7BLI^)$jrlsJmi$P3}N~*lti@PZABi0q52$1 z#q8A2KsMy$Cjcu=*te5#P!W;pD!Al`<6R282*aI}o22vS@U5N(?LJD^k60_D&7v7i zPdq?%#Oy(uN)bnPGp2xsQ*eyN__uG5!O5aIDY?gcp8ppQLgfWK)qmRGH=^NWxHjP3 zxslTVyV$gj*B$sAG8g3V{WZ*XKWRcvA@wb)H)6#RCjE7%YzpbttYcvU`l zag0`qohs7#00ECMkqip@1e;JCkr<1*iZI3vq{bpW!{n5r!(Z+I6=HK>2X7&)ZD!eA z3K|Cwpl4$dyEefYBc+0YWguxaW6$E8m(#$K?kE(oqk~h8N^s_l64oCz4hF?!Oi4H_ z#l)ObGmRKW91)}~9!5JU93()98qzU|lrr#wfWm)R4_a@nEc&+_OxBtB#hiGr_`_Xr zmiGn9k_haS_1#!RnMGT^B30?&`CSl9NnwE|)erx69xh7Slaoc`5V^H!jmj!243wq$ z*Lu$q`rZiQZN5Hfo`@6&Vev-V z)FBcOzX4JCB4^sIGb!-DgIx+H&BD8FJ3wnrLGxxJ+6Fttdi24Bf1db1QN9qBUP1Kt z`En5LQWNh^5@e*WZ`b*z98Spp674Zk$$%GPE4cBR#2%MDE9G;}pBRsqk4>JOo*MI6 zCO&``gjhHWf+=l)*{InxsgPOvvXoF8l4bzJ94y5Yz*)9La>4{Ud%*;KQf6XQD&@f$ zpc=)2scAS~KuqjQj0g@v_T{N~@r`f8Mp;1~0GIw@t>57+qQ)VYSRt1rxJ5-p5%!Ec zIsf3mzyS<8b6yCP!eh7uQIf}_i}(VpQU?^!VCT~B<>i%5Cr4kQjr|LZpIWH-mCdls zZIwfhpTS`smyq^(6c9uibM%^o^dr2ymE!!Y9w1?&iYK^7qX`;@)w7t@EH=$Wj3_ZU zkTZ19(h@v_vn7sXmc)Rr8HNU-4nZL|_x{8=JQ}hj`d+{m{U7YTXH=Bwwl#{{w!00` zL1n85+LnSKqNpI*Zre~GK|v5nCPcClB^KSb(rva7P@;jHC1(tc2nZ-Sqmm?vl5^d; z3f=qcbI!i^e&-wCk2}V#F&y1Pp{m~ZeV+BKHRoJ&z5kf5>O~oB=af(?VLkb~_da?C z==1o>Ymbr40G4zpR)V<};VUVpOIBtXnaDYxc>|lJX%}RevbG&p39qb}_bro_HmIhr z0P_hK@ck5p?M^Ma6U`?cQ8Nu;4rI>Kv->9YiZV6H?mYKkc+MXTV4=#ieMpjlTAs`T zN)7_WL^?g^(Y!|&*5z;b6~WnH0#b%$sMY|(v?Nb~fQw<7+&<{*@Zi*TZ3ceaCLX1~ zpQ0PO#0zBz6{R%`M>5jV6I?yqK`JvGPchK`MIkLSA7C@!w9q`^2$qggUfQHk%f3cL z3c;u7&GXz2FFjwoR>J|(pP7@C3UtGrR?BF;v^XHArbi*~WH6yB4RZiKZ3=%02YZ5P z*)~-KZN71M^>7~_Hn2K-hF^{}1=hLaVYCzw8jhayM369pcAwxkgWj`%lzLj|Kt=XM zItf!CP-ZQ;IVcAEPI#goX0lN$n@+EHamG@UJ^NxrXkeI6}mNeV6ylJ&earq9FhgW+-?sR`WJ2e!ksQK@&Rlur>Zj1umzf zJT_0m;{L=VR@s;5UfGlQey@K8$82pDDA4sxNaCvV))Om8mxP!VPqH@@ z#e}>QQU(>e2EHcsQsS)Gguuuw|NWaEsDUIl+@da#l(wWcC(lbPc9eRkp=t5jyM83x zE^$?`d=8Es^BmKhjTw-whKKgeL!RA}oXmpVq-nhrljJmZtx2l>j*wRXxTj=sMJhys z&2e#0Z~FPc0{urQTSdo7!6xFRpCIY@g73)j~ zmC!I2eO%E46;yE?{(e6(^pUR$CufqlUKnsGf3og;jIdn;Td90rF z#Y>lnT=TYDCsqc%1)HEO&*3oDctG!SEGR&ft4wBN-9Vws?-Q=pZP+c8r<%dXM#S3gXC}!d|?1@v|ir{%6Iw*$tO-&)36kV_Ti# z0jXk$T3}mFd8Sqaa(}EQ6V(WI2-;Bgv!VGh)-i%w6@6*zpkpWT^f4*o@7Y$1W2K>w z#S!lUA=MrNQ$d$(@m_^q*QNooaze;EBduVm3pXIQg9o+q~c-h#7cTLAxT zgV7h5hl_(ytqT|w(aB^N+TtJs-K;$V=$VyP;UIDr^gnlZKEn768`rA^K0Rirk)v{XP2(%FA+9714Op~|} z#LQU8!;i4ry1F94$Wy!cD;Pq=lKT}BqC*D`T%`t9U>;ovZ<-)~2JMCH;Icr}6C{({ zs23bz=P)upVk=RzcMj9Pfrz`gfYTW)#ZnknjJ-Dm)&3-(@qDV!zi8E+rp=H`p z7K|RiElg?2uRIs8_>l>E{_JrnXqI-dZ~OV_*QzEI^`!PTW$7RngkKPBHi?fSyM5|* zg;fM25?R>ooClW^#tp8DcOpOo~R2ScAHYOO28qR!wc>p#(e$BXqB5%esu{|0FG;xlHtSO~ zd?Q96%`}~_RB!}U=8G34AD#d?sEjeT4ODPU-QwpPxv*XA%wWI&AFD6lk9p49xWGhc z5;^uMhS;OsJ*P|TK3KfIvbsno;-0;~^bc}Nch6rY*O;iFvFCqKu1ll(PH}2=UX~JY z{!0GO-_ng-`W+;O^;45!Kk`fO=fd>64!%Ry#r?E36nh(McNEQF?=nL!{>T9OZt zM=W+go3jCafBaVu3UH50zLmy*cqkcOso{zF!L`EiKe-574 zLYK6qt6)EzxdI#R~dty0O%cx7WmXWw_o+=YVLuj@fGYA-_I-;xBJjr_tffg z;J`A`GbMYeV45N=4rGNdw=Bj^FjRNT*;85y98G|ENLiVEddM7F)rGN3C{aMdw65#9 z#Ci&WXaVd-Bg|_^k0{s8GKM0WjKd*zCQsNjesG=I|K8kv!OY?MJ(Dg`@|#(<07USbV+78O<~a=E(sl4O1Xq90wT z3y`I4#|0t!0x3Q277Ww|WU(oV3QDahm+Oh@(n3S-(gpj%l*##JXq$*BCR=aHY9Pu>tn}W32S6FhKT6EE zBbQgd52L`6{}c@<_UPT4KYxBCvLoJNd8$~E^nv)a?7lpz#2LRK=+yQdVtFT155*+8 zv;x8txWU()aC|qJ2otIXy)7xQnTNm}!TXJR8q6T$HiJWtear~HJ+qwj(McxiR3_0k zClwfKdGh8ZHSx1JN2Reh^T=e5{*^R2KVmDM$qSI@Z4dXqk9I2q}oImI>as3fD*`>|nDK8q@(KSA9-3{rI7?pje?;fGG(bQ;Yc_n zl5!eJWCBLkFN}~*BhG{`%4OfKL;_SU@ve@7uq+aH5dH6dU=w4JjFL94&gu%|nxI0x z)luZuwmZIdQk#aLhfMXLKe1SHB4pL12?qXh3(SUu3(_r2WCWe#+>d0a&bwENQWovz5xwm)kpQyk4YFb`ema;S!8r!q!&8?njH zjFEtDBsB*r0aUsb=sc}L;Rx%z_VaS2F|t%aFOU25unr})K#hQ~m|yQH`IB1|QauS| z?T+EL(=Tr_pf-DKl>rsTvNLkDxq8<)yl~;30_m&qRdX8FJKW<^P>mx9)nvE_^BiUJ zL005U+2(+n=PlNTN)J#n?0+kW?*b7)c#Ko^a_AcrVBR)?vU>ROJihnjo&)!nftVS8 zoCcQ}rl=}c_yCAX@%MQ>)`yz_)~NO^YnHvf`kmZV8Sc+3uJi5XYrsNP*r4KrOfyV^ zoc2-%NBTs}S;(?79!$bHvj^psL;J&r4+l@>0I6eJU9Aav9d7~ahm&bIf(pHcx#`$k zDsb^(YsCIoZ58}wRje}2`)$9fYDsb<(8SHCh@i&M#`3lMH8m7HKFF40xUHV+)T{lM z?^j~L&1X5RQahM978rvm|#0ywd7L|@rNzg0+lKmo=u7s%p zS|B%7*#8@@`bG(%@`~#m(%nuhm=(isVxZR4kDS{a?mO;ULJ(t zAZt+!9Qq;j!tYUVRhvR8x>CW}E+|G?US1#NHE~C3LGq^39EhGszZ(=oBF^+>-^n#< zG@kGTW7%CI-BJn9ZaXa91I57YwMdyj!m1>bvTitTX#fr-rV54pUT8DP*h*lt99no_ z^Kz7&J!u*MRTB8QL+8(@&-8OawbL{Hb>^Dhv_^DQxA@V%Y;5x)VfDS z*#?Z@lMs<)&)T&o^Ti#`RL9(<>%tn0MuNSNmzV4(o+V<{hR_5RtS3 zXijYFIsV&2dGF4iJ9iL>QzaTFNI?$Zd5bp$)(ODd@Ah!X+Wst%Y!5av*Lj|pGXjD52o$>56wjV(c7vHA%Z*mkj_J-AN6gm<_DTe3M zZO~C~@{I)o0g029G)6iZ7`cy!5_Adb6~ii2xyRKE$*58B(?j-?Urz)iedg1rPf03H zYkt>#07|0ZWdx^28b0S^-_9l}G`#O((;x5R0LXG7)#WxSbbztJgJLc0XQM6y!c#C? z>qIn_BLPN)=Xcnq@wY_hMr9N({FAuXC|pL5A@#k9f+h(C%|&~mUYzs4|Ngr`pfvKz z*T`pfoP55R%Sm(rf2$@yn%D-a(2YSef)N*Hl3xWR3^t7p6jyG32Tu6%Lebg4>7geu zha2Ag*IMx`BrrrRVCPyU8Ap~(BC^B0bLEaiy;8VY<7lbC5n~5WDzZJ{tim^Z!vlgX z`YMZ;07CA|W0Sv(ZP;f0^m`CD6YEjk1@&Mi98?$>CHCTNqG?6N_X+f)O9i-HKC{2} zA{3OiDbfIT2)P+??GABxB+VE=XYE<+;^f36Zay(NH+ikH#p-`x+$VgxaW4ET#vQWQ z^XGY&*}(o#fl@1H_W7;PTBU$C=_o{;Bb;LP)%G)F33#G*nE63wpK$vS>)k-d;ES*S z<5#;6*qrt)xnhDdBmZli=M4=F6OPa{BHQmP`s?{~Wi}0aNXEH6DjN_~z3niF#6&GV z6fcUffg#?B38|!*g2r2)e+BdK&7^bGnV-j5(Koxf4hal02Xg9qfcQlQWJqYyaNpA- zhC_>44mrl^YHumDycdnv9HZai<+>$#Fs#300S z5zD&7JrFZc0YYR544<;=1#mwXBhrass307nPW|odQm6SVQ z#Ia)c1RnqUyOAY(FvF;@daM;Ok&hb_7a-z@zPmJ4poqAoyQk3uN`{jQxcfEz;;GTo z)IhlF(=S|9^Udi$R(WvFiE__#{Qc+GuH)1S@E=}ZCYpLQRHGOk#+{~)WoWJLIxqtf zw8={zB;;#kFY9nqGBA)46&_C9U<5>;u1uP*NkAm@CO%+zsWk^hXM4eE3MTaOkZ=1h znm!d{zyeTOL*0tpl`>E~0$spr6=(o>2(p*@VbQ4dNY4SSQ3PQikl>^f0|l^`#Ky>8 zh~|YRtE0=|AD@)Ho2%yGvC>2RQbEWZR+TRBVXv$#mrKUTL&;1HS(7q3sZ(1tYsQrV zD#=KIqS6Q`1iT6xGDAf3YB(Y&bstmN)0kmPCVkYNg1n#tMd+ZD%9l^xr8YYR0yhz- ze3E`DZV}-GO>od~B+{PdG9kSq3*$({)9P{D)*Sd4R7GkEW74E<_=2y#Is*o2;i5$q znDT{+*Du=g%e(W8UzinpzS)4b@xw@~ndPWjRR?`G8;CM#3T2ZGwh(nBQw=vZHa2A~ zGJEq8G^A!Rvo{+k7NcL338nTu(`IO_*E31*-ed!r9Zt<47y#KrKk?uGE_Iz#V;A@y zGjx92J41vB`wvkjbdWab7WG5PUv2vGi5ki2OSyFu&pi>jS=T8B(;0;cNO-G2$A5_O z(z7ApWneFEQ0XBa?pYUFXbz%&2D*PIlu{XwhM!PS0&ZNy)kpGJ*?vAMRuF3s*%f{= z0;@I}T?UX&efa`63qE&?3halT5?ZKbvg~j~r+x_Gsnh7fmj}4( z`mE7EkJQ(T|9%gZEe#}kWlr4?|*)pAjLBLi`VHO)ps5b#NRjjf>3!DJ3azUez&)9H(fEBTo$#^ci1z#angdXPG6K)_O!Vy*+y zj1p>ibZDVSr?C|Hn~rDIc#8M#wL_mWMbEQw#(_0>ZDyilRYh_SCM>*Xua`y3hG5{f zeLFalC-s*u;i@X{)eedn934i-l18-lfT)q-CsZ_@W1nQRoRS!(H73EvJ#(JV3w8^$ zacr^l>*%>B{4h^g-N*6b!l;2)yB!8cI740m7@6GqsURt1$9{?byHkiWJ!Ofn>i2yF zmtr}Z=bnQ)Arw@S2+mog@dr^#fpHJgA!gX)XCMAYyVbgPqV~^p7>W7dJffR5p%!Zt ze+Au}Sw`+MH&QY7n`f8d$Ayb44Pc)p_w&z7S?!ml@+RXp3+-I9ZpUGBBNuVKZ?xnc zU$8!3ZS=&8by4^eW*TZ%`6`+7^}0W|Zmo-oe`(;j@x_KG&#NygaViIL-P(IU#94`H zzrWs55<7T(X5fmC!r;|SYHH37{f6E#+TwaW1st+9Lqsk;R0j^leC}a281(*(EvItb z7mH`tYhQeShn_zaScXcDT@TvE&Sy+cM4k5CTT z28{X)bCymN3fqVud@5U#r?y(vyjFv~)a~8xCnWT5LQ<(4b)%DayzKLo@ulncnnlG0 zDwua4O*^sS1G&H>8Rc;YGA=DS++SznEx;c>zFPMEQzI6W zU24%75uD|4*z6E{WzN9DpTNNIX#9e;bJJTkd+t2@7Juua{mj?p;X&n}+KbmSefb1l zHF^qZpRh|dv+s|SgLPDXHGSn5DnG&7xeaZLe8S~Pfx#<+t2XP7e>)u=5{8 zt(NP)&-6+>T$Yk#;?2Gj@q2_wAXF;0^(hrW8#U1-IN7nPs=L|Xn}Y}a8lbs*85oTl zAtJL`&2hNvVfD%9ZLfW3$GlK+0^S#XJvhhb!(?crVfp#abz#bWJ(CQ zr!a0$b-K&? z^Ne;peX13O=HFlTm`l-<<4!I_drGvd}zX)pE9=0;B2t`+?eQCmRR*l26Wbd)o2_ z6G9ylOLYLug{!nm%vB>+>138IN9|49Q$v@ztABG5qUG~lE(V3wnbvptO3jmJjV zX4ER*DC5B$7?5}B7nT>7gj8q!R%a>pV>-1r4zekWKL+4=sUldnW@*0qfJXN3Y3dLdQwc(6|5gl9nIcZevtOL%GNGrGD0 z<(8q9P|)Jw>wLGfBah1P`Imhv;OpY<$*X+zx{ueDD}sRvdOHMJ^84&RHLg9Gk;9j3 zJUN84CxTj~$Tks$`^#htfOv#bDSUi9FYNdX1dhX`r-BHAlz?HJuOm!zIWsP(5~w|> zx@{v9E<~^Ha2(8$$fnCTa$)E&O#6yH(Ni5&5$&59dt@y?A0PA3U)NV>x_&&C+A}Dk zduV;dHB)y5NrZt$3}bi|>2>w?Qr&<+ZvmnB`8O28{29z{ z&XKNmQ)wwX1VN$-+iLJkG=7MuEv2>C*Zc!!X>64}TKeIGv&Dx-0bz9};;VX^#oWV& zpD^whDeZcuiRIi9WZKGXSj(BE>Hn+#k+TY!y@#up-E?N_iY|8l4-Xzl#3nw)KkOVw7v(0+_txJ^J$T#*tBgS*Kda~ zrW;eD)iB5``0}!4xh`o%(w0%t1=meWFYt9wgsSM8D$;e%wL9}7A85h%@b{OfOfgc?cbI<&w~1hd-g3s#RORB;vBrJU8yU*VI}mof66pI#}KEGRb;#yn6L z+lpmDc~0Z>ROK*|+)Qn8CVOAxq3%@IjMJBXe~&aE(`GDT?uT4CPd&iRGr>7ZHxi?= z2cP$;FY&l4x$nC<%NUsM5-mPgdGSjBC*SQiFQ23J{mI@t-!`}h|0O@3e`lESqv+{! zJ2AhV_K)ZM`u8Wd7$<&w_pR{Uh&8_**El75Nw#v*^N{5?MOu62*c+Hv9S#3*yuV-Z zia}-Mpv>qJjX*)>5bc>WMdLQ43W3v@>`p_gN%o*eJ3l;ka=_ z(fD9e+eQBJp~v&KL3C1)j^QbVxoDdGSQ(jtj=*e%lY6!ErXBoNpMP`RsL?P=Z9zn{ z>tKm%k6w#1Ti>bOn~*)@7I1l@LegfHXRZulz%t#Ta3$lC2YYn$PV8BkY89wH z{QlOu%^8eX#qZNw(ri12at2GcpQ)2nRs6oz+Wq}~nRdrKm2;+DhO2_+;?rJl^)>LR z*`W3^(buTdZ@mip^5yMTHJke|srt1M$GLv=gF#=x8FVp!t$vpc3{z^+{vFNMd5isI zcZgxE>v@Ei5lP`|E4_94v$s*Kh;DWk>uGk8xK^L4I{Ab8%SXOOxpn3qJ_cDGy@g&C z39x%Z6PdD6v{jQ$>2AKz5JQExkv9%%L1=lfVUL!l+en=-6k^4xhNU16(9;==8h9Nz zq|@Noaz&?6a=x4+l&y;{@JqLStb%q-4*2E;pjl=e2>pG~-Dj_kbE_!QLQiu!uMke% zk?rn$&5A)dXS#pPTVSnJhQ2!c*QGOp`uc(Sy&Exv@g{WBCMX2Ez`ep~xwHhj$?}i^ zk~XKF52zBd$SbeR9l)~Yz7qCD+0KcA$n!KB@|?hE*g`;If;on!WA_!-N}Gbf8i5px zQ3zfp^?ZzKAL3X{#&2}4)5~L}X6IZj4v3bZtPh!DkT;zXbbfz-yLI!_$pS(9%7aWc zw!Kw#8m=!JToJ4W+-%<8_pH{+n0*7JyGx=p=R}FcrT>^W)h!cx%SgK(QC2WxUkUEavo=uW|3RceTSDIWyQ@jlaI( zt~HK4qsNN@_Q}<_VH5*Ahx%KTAp6%he${pF0&md`)>Nyw;Na6m5(f}{MDwfx=vZa6 z`z&4|$$%bA87)-g21s8;7veqSb$5aIRX(oza_e}qo(itIN8`kB2|~ zq;(jzA)3|dH)FL$1=jFZ74#s4jhr@=(+XA?Pr1MYIf~2GpYPxNK}}v}5r2+BLul`g z`o27)pn#G0f|I}Dhf_l|L_#{53Q6z#_M{jT86M|MkCX)mk0sUMxaqZDc=2b&nk19u z;`&GMyURwbYc}7Qb}Do2TeE9E&tt)d4<7u8_YEAha)l@Huvu1sg5mvywT^~%+3Khr z5^B)k_2NMqf4g4VjRVZCpi$4sCg%O@n>~51vD^>v78MmuJoEPHf$44?&G_{6)Z)5_ zmbK|y!>1-{TzbtH%s~z`vJ-E8NWWVLjcRYayl?ybjr`B_4#%hO40U-E#nck|^pBf) zuk`zSgpzPy-J9swXJ7ySKm6-0xu@m=_uT=n;3)?7xdZh?K`MfQT9)?BXm0Vt;Ootx zwwO?{S70=cb2GS!+Bu;qRKMW0%W?u6 z^ENzLEouU+-Fox@qaGktB*y>jARi_OH@2Zq4e>$|5w_#BElAHG*H2fBUZS+H#pLra zAd^m*t|JMakc1H#9e(CfU6&EY`qpCU(W}xFBPn0p+9!DTCyBwvBm zzSzVT!k<^HhozRbwhtP?w=*T-_E|C^odVDON6M4nrXfnOS52UYeRcqU<(#| zOf5kc(JLYR2zs{IA&~P3$&u2OqAJkfyP%Y{OUFZ;aE7U|i(XY#3&0mw&mvgP(TW}& ziV3TVb2u&&5wdp6I+kAEdkx&#p@#O}*HQ|;Sc=i!orxMEy_HU}pVt6Btx{L}^OmpO zODmR*i|;sD5M!7l%Or;|QCq1JsJH60J1t67Byy7TM^wc$p+a!>zdnyg!`Iu_*W^Qh zjA`m=Rpr=SDVjx#F1`%hCQQJ*X;<8hb$z{2I{*9tlFG0*9Z5KLWg6?F6m)$Bp!=dN zgpekJVRwI*J$vqcU}L$&e*2Ga200~ZkAbnWvmuA^A1(`!*~YZN3fDGO>W+9AHN>g~ z$2fo@`ZR3j3-5ah9-Vg>OCtCe52QMH9a}{bMND$GVUsnE|L_fL9;W2kH;+a8aQMcT zbB?21vk1$fBF(x5HZh7-29kIf%l`35kE1&m>}Oo2C`1}@bkI0kWp;%j1CZy}Kl_un z3>L_7yI@r-3#B*3>6&BXne| zvqdez+{X#D;M4VCp{WZ3?7HH_`v9koyIoK{tI2=*>o#sS)I6pjQoLp#QK_oT#i6B(@WVk0SY&e;ZNIirRQ zFtAnzRrlavFh?GxG#6>Y*KmcwZP_=%0SWno0k)+w7&UM-F0(s|47hbMn1ffYH$fjK z@(NkIGsA`uK_`@a-a;O-aFIK{0x8<&yEy*J<0aPa zng+hJM^@#xVsx^(#0pKn-*B$JgH zc(KkF);S#*p{N?J7DzGU#%d0F4KXirt0^!okh4K(8DKE!e~=WN!l-aO6?M#SHI#wb zNOK|j?t-yhAzc!tgM{5C#2MQM38XhbF%vk9(od@z&Yr^w7cAZaHSJBA8C2qIL#L-P zjZB_K9vIykk3Mil;W8clgDZlRoB2){*uCCEal;C3qN27llX)CDG3}TaD@+3D2iWRv z-I^J23iM(?CKz|Ucs5lt^PrSx7dtUAQ6B4o$wu3+Wp!SpC9>qh^*~*w9@&Bg#RjVr zMnn2p^z!R9t;a6It7;c*v^9nQ)7Xi(sMVc40Uv)j_V1!+7zjySG?HL@<){bH#4f5b z;NVq^gDj&;mKRQ};;ChUa?()H6@e8K(V3MffZFVklao`&%Rn#tCDoxJmEC*rX@)NXGHfYJzU}j2B5i4m zBF^C6h1|fnznmt87b0*dI=n>bcn%#y* z^_X(^%7gn3cX1LRUTQ;&%}X?ycNd7ts=`CIkKC^2A(X|6wUp4ks6un6^=+PHu zV)-2j&0%q74o@gq;4*Y!mpd`i=a2Ofo~bhX`%%Br?>}BZWdVcoc}a*eibk-mJHz~~ zl=0|npUmgix@(JGTQ z^DkbxU4+gBNVEbV(E>jzvqHw^6g1{Zi{f#r<5i37$b(eo2w4MO0k}Rg*+sxQq@|Ty z<&@U!Y>p28baFB#v>I}8z(nfq?*bq>E~5}eqZ|>@+R|IE7hAgz`gdm{Uf#R3>2oX= z%|jy3#zjy(ewl&BmThQ=wd<)CM>sA))lR%J!C*AsBPZqw%Nwxi=9uq)@c= zSD*)|5kAK&phZ+Q+Q9wZ2|p;CXL+n1eke38f>|cDaWX9v)oGH@{*cWPdSA`3h5gaz zdz#e6O72t3TNWdebQ#E(aNmmO{1U?=??9o}iQQEQI-H8YHb3*Y$S^T1$fyRCT$1n@ zGmwn`TyCXJ>T^GMS0B|{|62)WltsZOzyfVBdRD)$@dBa+FFGuk1E@#Wp@+W+N|zD z{cv!gFRe4D#!w{(^nb2n|Ik<-M>@E>IplmdRun>45& zet)n|Y4FmjROhRm?&HNTIcpeI6Mw9;nNS#Q8xxD$&t3|QN9fWLmGOPrMpF&7kDR9O zd#0Yx*6GicOX|)e^9{LduoXm@8h8EH}2CmXY7ja)uHnVJzZVjetq?o!^+AwMCT{r38^I|fyGyxLb0=#UF$=q*-X z&m@HiN*!v=(9N(hCY3s<`Zh$U$-kVCxs>5sOJabX~Vb8+GGC97=yTJIa;+Tu1H{7f(!NJR141>vUS zSsUX-BKvGc&aYBBuZRh^K5VuD$94I_i$`UErs4J2v+7SCq$kmA26T}VwOdh9P%Y3<|086~on(EB!XTeEy{qDSdma}?jxJZAa5Jy({p zM2;n>_f+c%_%ejA3I;9_)9JYO%@2~sFK!BOyn@@xT4o%IYGRna5-gd5sW%Jc9Dm|| z31n(_Wxu;uZ;*|~YCVgVWXZ!ld^u}*A2;7OG_B&8UAcTEN&Vqo)4B(HR~Ti^yLo4*}TovJCT;s4{~EkAzIj2k zzy{{=rc6<{i^Dx>JFUFf&cg%atE~3FUhw4ia?~ZAae0P>z0&L0qVB)sBL7!PqFR+? zW2ZRLwlHa-grX&Hl$-5Kq(~eyl-TY@wcY zPKquEE!SQQ+L7KP53kEcDRVrL+pmSc@d*Ma1ptBW(AdG_lDNd!Snq>~Xq0QZ)%d`T zz?KB#;^f|Hac1J9Y9*!y=PBiWu2wu`qp%qX`9K%F3{w)kUdaTEB!U?$#%}f9MlMRVI1bj^_}*b~ zV5sZ7Q1uJyorW7S8(8UYew(uFt$pdzb}QSa#$W~KqD08l1s+Y)XO~Qef7;dk4t*|(fBP-EtqarBFnY!B3AnjEqCt*F^rS3f~Y+$R}%}TKBEs&XbcctR(V6ZxS zlS@~q{Mk|&r|tqJHTG>xL94LJ*%7*ArPg&L8J4?G_Q#+71kWG)8kzjcG)sR*U7BY2 z;vm@_OY~BT8n&I*s+;@m?9#XwCl=c`mpfT+5UE7Oz$8q2gqhdTWL}gCZ;wgu=>5Z= zGcVxrUX*!-2iPd1(>fT*MHpaCH2J6^*NYB#h@uYQv7RKR?os9iREc0F?$HFdjnA2v zW^2vD$3dNUiZ>UV{~&z2u2{mPguR}*XV1zsb6*Dq=eGCpEVrcC(bSOiDOO +TIC#zsCZp>AT6&wUTNBt9hJjyCe(QfF?q=1n#UaGLt9DjxBU-Qkq6Pvx;D!YyU7 z5);ffIn9h`+Z3!mTn|xi@e~X_=+9-R!GSVPC&%^OR_)T@;JleO;T^hk5kaD%p{KkH`-rnyWnA=7$RyZD+_vqH?@U?m{xr?z%;-+yo^)jPB%YlZVidky zWmRkD5tZ&foeZmzWVyG$$LMD7#L6w!E}Kkybh0AEAWLBAqJ%~DXm^uic*su|>|}ak zR%b!#c(49&L3axCP_9+eWR_v0yGt`*!AkSSoqaB&YBxBOEa`^fx%cAgxKKVWOjs`6 zC{Lg<-pD(3xj|-|bnvtV`_y!ix1Pe@o@czpd5WKYVYg+hvTiizY-N1e)!#k1@>3^E4<5fEf2dCT zNZMxG4F_KIu*PKgzCmR+c zk61j_)L$OA??%zgUFpVzs+01g#YWnf!jB4_`$a9UGf04yzl_<;cWC-}ZNlw%JssoH zmgm$?|pbQJH(@RFAic&{iA*s zjYm!v^gB-Wb=lRM>L#5(w}DqMz`j4nH*WW(wo1NO4=Re?9Zn6`CjuFvvw^WOPDf&L zwTp$ZQ5N6)hs&ov-SEB0^ zs@P#u`uPxkA>Nc3`aolU7m#1u8l&I`CxX+y;_+yzx*lhk_3oN9NL`C6(fsx6y<4Qs z8d3*7xy0ug8V6OG|C;$3L^M0-kCNH7vAs8G@Cu~Mxek^yTTkw79-nNffYdK~I^e%Q zWOE_>iEK$WO$i4072)2H>D+gzDq2j4AC+NfeB168*7?S;*3qo#kr=&4KUz<{<-5D{ z{8{#qoaMD*-)PLy=v=n&B7}^U4l7(Pf@Zkw`rb5r!;SQ&WdKAjlW z6G;1$E*B-Xhit5dNZao}sFyz7V_oC0(dDAUWWTR)xPs^}cuEa#M!;&%6bJ<)&MIRv zZQ|)q?`VmJWyfq{9;#0`e$>9lhv9U#HX+-t!IH^ddqcs8*KLBql#0>ys&VH`G78_B z1jovcAOK04PmM7O-3tW$-Q*lxS+2TulgA@Xf|yq}8%Uz-b-PuA6n1L~x-yNQ{Gfj7 z-J2&et+|tj4#hWbb`k_wW0~|Yx<;y}GIHQky>T(}jj3txXm*VIB&$WJZuH&-DSvaU z#yEh<_tc_6sNsS-vsVQxGyjxc;IQ{I-&Tbcer%QH=kwJtyWK~8B-pNGho0N+6D9BR zy&KNB*81O23X&FL$un*0Yai7))+R6t|JO7HXdKY6bZ{81zXSrvIL(SGfG_zGDz94% z-R*A9?dc;qE;@Hj@SQgU_wj(S)v%}%DaaGV1Z?Y!oq)z|av5)r0d@e1L90U_#R-Aw z;dtwjjy?8g>Qc-pJmC6$w?g{Pq*S zI0H&tUKVgyI_v;uY*@A^F5vsShLc)!&0PCyj+Y4=ytn*cj~qW%#&C5>{b*n1MqkTL z(S!v7XP*{~^o^=>k_`$dd>V2;B%j};bROTSnRPc#)u~9d|7u$r*(-+V8{Xt>uDoh{ zghi6GxJC_^kfcoGu~n3;cD(pzaXPF`p1*(XG@h1Mf^8V&d~H>yhhdJ*Q$F{b>h}GH z#@Vz^_kPCqfl_H!J#w~W|EE(h29aWY{ufpaAF#`ki=@$K0Tujz$8U-v^`Rd>=BOjG z^bn~XjC*$w^h4B&G5rf;_Dwz1wM3Sd4~7Kk4vjph-)tQJh~qxlS*AXv`v)Zjwc@ka z`B)ee8ve=ZMm{bp6}`=GGwmCn@_-nigoat#!^2YJ(9s`vVYj*d*uc^e(qqlbViT6e zef5R_(kvV9y}M@-e>H8->4uczldRNX=>+qC-cPkC`lySGwhc!csphwpBb@Au5=m=1UAzTt5P@EyGm%%eb;Uri979%HGU=83OLpMt?w8NK-Gy#|B zq6Z$vm%*^$>%<(6Vo(w~S9G9}viDl{g(uVm&bt{SXo&p8zk{eT8BpR3Dmg(|LP#0$ z7a4=4a)aOp4wq8WYt!O33Yu=S6L`yQ?o(s(aKKpsyJzS&yd>PZR5rqiH9s1gG?V#- zwvwZPKDr>12*kyk#VZ@34Pu}j)pX&c-jt=yEFcBT2*R48ER>Kz+Q2jiW6foV1lb&c z7&*3Ojwtsbdi2rUJj<0eDE=9R=!^^kjldm!1Wr_Xl{APXMj_gkqvQ8u#ruC*NfzNS zC*A2OV6&g#9B6ftlv`kA$A$t$0TA>HU6Vj>1`^Cpl!^l9egTS|R`)5>u|J661X$NB z9T@zZhhNrOswP?^tg`x2i8(WxDEA#u=M1Tj*RZB*h{PgRr20JY+&cS#v7;Hi3u!|!HkVA6ww4>3@u(m}>0 zS+%SkP@TMQrO2q|aAe

c23l3G-k{6i1wt4CJbVzQAZu)f5pmyckI|Se8E7JZFC2|xlS)QxeU$8|hXvv@?Xu~T0nNrNd` zH&xgmaZX;DCLaKH2p$8IeSue21apT}baiz%s0U@&fvo$?H_WR-H2x>6D;Id~`3h!p z%L8B?6NkNXfoY4@7&scRPeH5a-8Wlx!$O31knto?JAX`XT7!8}aq?J12|e5uZt(<$ zBY|soUqV^p5Q(m1a0Ee6PZQY+2~MqrD*$Nw8Ux=lL>^O6o7BvXA8&)vX5>Y*?t1uD zqsu|xq0X4ehTZ2*+5~VUV8f{+6v#MyI_z^Ns^STu@U|trM1Xy@E&&0GDBexbsWsl% zspt!~EffzR1B{;L(W6K8d@0;;>6gd*NL|qdP7u(XFBIZRL>AHpqw9rrhkGs*WjZVJ zpccSn`<=M#myofQ=dn1X5dgC9dR?aD&eU)`D>hUN2fA4~TK#R%$khRLRSC9nAfCPi zI(2Q-WV=EdojL?U|Ji;8sg zGp=VEf)|HEMU0*}pr*XKbUdN;Oaj*3_B<^ZD@ZCkUC-&~_{yp8Tq`Yjc2``iky757 ziVdTnW#>aRCnaR^7UH$1K&N&3>f7s`hi%**?#&q97F-yd%bAWb%1Hcr_N+Ztlf=SZ zeC#RU`tgZO+s)mQ&|J`C@V9)6f+;N=of6)9cJgq{W3owif;RSs-t%;qF z{mF(7#z|fAE^3RF8C=}T!$6e|pf|hWnr_oznPv_coSbI1Vt{PBW1CbR?k0 zCczU3w+&1#YECHs@*8Q9Zcr&S*v(*Mx$BiaIIM6Pawv)Vav0q=6CtyJL8cwvvwS3& z1Cvb`i882BmmzAMXnig}5NjRU{-?0}yRAVh6*48|Q!ss7<{eul(WW_jYS^Wv7~pr} z`^a#~p4a;-CL$h+cD~GoUbfykPvGo8q5tZBOs7#{PET~DUtCsyO#L7dv+LxrnL!`Df;W;$k*Lsc~twQzwZtepO7cHlczbYH_FcW z8U-Ph@Ba-OE<$mxR>Fda>-;|&_h);B@4Hs?R5q)>t)i35<)_Zh3|+Hky|bhcvP@+?6zj&dt3zjC1erFXPRV-J4;Klgr*uS>}!w947GV_4m|Pkj#V1X z$lmIDerBYF6SyyTDAe`Sr-F)+h=&8!!JL2siS#wAq%B@N;(QX48LK@R-6MJ2S4={p z3=Dr_+>^}Pw~x*@Hd$7*=_G~9j$KJlB6B{TX&jzv5$<+Uw;rb zw!N9fiE--;l(%iN*WKyn%s$nzMA~8p0OM<;4HCh?h6P?l>r~@x4dZZo}o|G(-Xv_6KkfS~l3xp08&eJN4DX22ZzkF&Wd!h@y{`8^W9IG^^u}+|mVa zNnuTqbH(UX5mW@OC4R0QPZ!yiSLx{V@4;y&*ZbcQi46(A*`y(jc8klA8dgl6u#845 zv%an<%S=4coE>ABz4IFEt_VozBNUw&lO=vTitl#@Xv*{p!S*}%*Lg|q?M2{Z_BU|m z@T8pZlxA_B3C-MujG;ZbLaiaDzb0?Sp7ifPZ`v@A>1CPwNRsQIf6D;~nFgbcrpdpu zF7m2O3m8cHr@NQD^NN*2rYx+zUt}4 z!wye&e!L+5Scy4lm=lFFdUW>@p^>*U&nNqQSu;u;;bj+p-Vp3)*gW!ma+O9d!9k3{qO5abpzTEQM(%+-YEUn-#Js$uN&=kMkd2q_1)RRtg&YYvZe-ZgUS8SGIcsP z8Pfow>B?(9jbpxlqV>$_!Z0s5p+PBU9S7 zgaF`enIQ~HRD$w8Jr9Wx0nBq#0`gkk)z;?GErOh!{`NcluEO9hx4i|#+1>B%doDbH zow#1b=2hO53Xz_MN<2I%r#)GVNJuoGt~d6q2RxxSG&dWWHg(3nhr3p7Ikt<^+D-|* zvE=T!v*;uUfmn{7#|W$vNIcVp+?ott1JKVsj+Ov@<0<5|+hA65701t_<(u}S8ns1_ zjtPwFk++^4z&=XRj+@8sYq@lCUx0IkDsjBk;zmKA!h`&DTMI}oVfkzhPj3}cHCr>3B#85YwH z0T~gX=sqQ7oj0AvnTRhc;7sN8hq_mUnYvUS>>7BI`WE+Ru)d#_SC_p5V}*0KmG0kj z|0V_&-i|l$pHx$F^H1dB|O3A7a!sfTy+JdR6#HYW<8f#o^5E2`F{-Cm1=q$ws3D&ujqd^~y>-gF!?rVR9# zXo@Tao8#C(&ws^8JidpWdxor=|}d6pbI$0B#QE z7YM26f#K^@+qo98qN9<;`p|?Tz_PzPjT)IIO8gGHb2573)pIT_aU7F{KIs|n0xo%s zlgFw$*VH}|_hjT`r?+cD04Ht;6#-D|{4Pj->1-5rL%|uX?`BpJUg*_^DG91+;Re8XB7udYu-{p5= zxp~2ca_kA;q4Ik_EuRA2Yt-ITy`Cv3AhoGx>9Tge2M=CCM_hC=S$67UQGq5(&Fqmk z7o0};^IpW9YZYy15OlM{%{MmAuzvG+s`ai(`2!W-#eS0OW3f?*@m1yBxn>dI69jOz zw0Hd#6IYs?J9mx>W-3~+k?w9cJ%r*lubntn5*w9a?DEw}pz%6IJTHA~--44h3wfOq;zP&pX&+!906j_sgt~6lF z+SNpSDs=8kd;9mZqfPy+#HkCGJ4>BD{#4vO7wYSzQrYN%;U}pg8#fkW)~;CE;01-M zwe!Aiqx41H$X0c^!mw76&t}zglv9MNWb&?HpWJ)>z-!OuBc}g}yby@SP$HSm`<`HX z45=?OE*D~*V~ebmI_~sh$;;f+iAP)FTJz-+rMAdOij@AQ?G3dE=>7FhtoC|et1NM* zWZR>H^84z+jQ)6cxvoar77kCrKJC1wQ+beWcP&^XKp8c3cX{o@NR^i~Bo{(hQ;}$1 z6M)>g^8?mXdET^mtGb-H5k_5BTG!k0D`GZZKjk8nZP1>*3qh@StQfnk0cs~Yf=W83 zJ~-wT!whO*d?l}&1t3o@=7~A9YY5vQM1|UF{;&4FJSxgN+ZNlWc4DL5Q8c1R)N)|7 z1wjEB5`$VQf=tRJMrDv@I9z86>_d0&i(7??Zxs-@q9T2Q=Z%-{ppp46($=T4p{n$o4Z|Lx@Y5y zGM`yDX4=u>%Yj{865lMYo=6&`NdbOxq-OzH7+7usKO3ho>jt3<%YGZX`1)GI@#OP=vq8}YoAo)!+g28PBN#MLKukK#> ze8b8hhi5@itWgvOVZfM~^!eGhuG?UXJNWYg?bgKfic+jKZu65+I*)&oc|VceCE@^kmH zEhp369Kh~s#!y=pyAr&uAg4-CZ|GZ$#>WSbdfc5cI*yzu?fLU7GG;UiCm*H9atN5_ z0-BXRIoh^S?WvDV{-IhD{@q#Tcq}-uAmq6(6ozQK^q=W)1e`4)(Qm3N-iSdUiU7C{6`Q&k9SGUHq3nQ{5+_iaEh+i?m*3I$vx8(3BhX(xv;SaSUFuOC@nI;$3t5;k{X*C~8WYT>GkOiJY9~>)WI3(B(Cu zmY=*R^Z9qdI>5i{oXpGmZ^`JI8e2`8B>rvb`DSlf6IA26m8%tgRyE198O!H?VE(OZ zzE*;~Y7||SYoxJ5CQwE^on((TsZpq6OwEQ2LQux3ELF65h{aLL8DDFA>W?9d7c zWI=(um3su^(ooGt55r8Y`s-{wRpxb9HsvRA`4)ID(@{knZV_=~K|2Vr_vx52ha`5i zb6yJ!?Ou}b!Wcf@kTpdSL}t00*w{S`_7SQqT3lKUrt@TSHe zs0V_ox@^qtu8kr(4n>B{0(G>@?E}9|52!&wB=m;5NJ0U&r=?KSp z)R8~dn^0lwS_;|8<<<$Ax8P@(8@zPK3FZhD^J+H0R{c6P$JYC-68aS|{`Y0JATYQ+ zJw4|!dNQRinAR)ZNcV8Dc{Gty3Z(Md=Zjnxn|mBbeK&wCu2=uKAP$FCc6VACH`ly{ z4l)As0p!1gxtd|}fv{wJ@=FM++d`q=Q)jwFmEmF9-fy3fW(p`-r3Ly01oYaYApw!b zE7^ZcKR=XCG7ZmOSHcENDl2D<#=EfCfQR`v&n5ru<~F}l2&8^k>X5qucGkP8JBAg} zPDu!icIkc?E-{C{CNMe+G&vaJ8})e5fI^>ckJp=@#0f%8U_kkU+P`*~MSjy^7IKdb zGgd<@;+F+uuEpyAD*{kBp@W%j9ynJZtuV?CBV}cGBD%uiYlj{{G%R4c!Xphxw4PUf zrBC1H?!3Ap*K9(-v%)Csc};M{znvqLX?<>{J_eO^wnBnM+oanCIU=@0N8)*U2n~PT7^Pyu7 zvd5`QsmNY|My-E1&Vxx6J`Zkx^$sC|g^n_hGr~pAY*~rwI*! z(b9eST=26|1z#+|{|^GC&wD3&h5jtc9VpX78Av->37Wb=I=k*)j(+$JB&>5{$s!>G!?02JOe$6`QS&%S z=RN5GRZbbdzd#?GbR-8bFJ&N|UN_Prqm@u_Gv>pcj8#-w7Ad*#Q`AOHy>5W)sV(>D z$*;F71kr(}Zew9Kx>Q0=*YOxy9x+v!_aCCcuS32@Jy=plLeSQ*h0Gu5WP)TV=l7rW zwjE#&%mHu!m=jo*sXU2jp+|?<+@V{7GbV4KuW4bGp+=M^rL)Iu5&n z#RhqRaGExN0V1FSy?ePxsqp_KSOY-tKB-j_lJk}6Vwqmv{dDzBv3e^9c8ap+%E41llA}1cra0>DR|tYcE(H`Qbxr_1^Of z@FvrZ&gsk1u>$Mu-Ie`|9~i#8xQCXKt93I}#4fL|y#Ey;fz5O@D4G|M5gTiLuH(aJ z>IY$=(BvS25u}vUue-8YSbsRS4Xwo9|D8~d>GAi1Z2X;$3HFdP2o40O#8rG)xVlJ#PBggnGf)>=sgD`SqO3N)^BFq24Y<()(l91Y>-`R1O2s7;DM1497adz7 zw?H+7K@!GoV^q|pmC5b`GBp5}%`E_pY%Z#cKBZ^hEg0x=@K%Wlb1}vkjoh*%+@F)? zh+?#4kkgGu2#mmD!0YbvXB38E!5R$Bz1ParWL|zO;S!m zgW5`1d=jiC{({*W?^Edaw42wd#f=LSjWLo!QE&{RV094$Nc?!EkuYL~Lz z2GvU23wnyx>K<=iLH{bNy`}%_fPNq?qqT>VK2UMupWz^_KQtB{5C;i)o>uWUBL*Kc zaaI=Xk@}d4qxkYVwFNJ^2bvXnns}ljX{s|wc8Pz!b)FIna}65fC~UdxBQXwz`rVF0>4&Gx8IH$FskbMGJ6N86WBe}Ux ziLB#UPcn ze6fiyylivIno?vbnT#s==_;>RXT*=Ze0*z(f{p$j^@-QzZ!bX?EZK5tiToPS5Eq-B z*bCriZyoCCbM5XeV+Tsh$}l44s%SD0rCR4{vj_`Vdto;kWU6D_HOYCotx%c9+ZI45 zr-x}CRBdPjM6(z}w{;NAnaz~d;fJn!L1wvojyXI%c7QuY#SGON@N8*`R)W989xgOD z*6I0Lb_nx{`75#djbYo~VF6R6QRI$+1lht4qmmLOYQEDm*#-e6thV-D>_aGm9WA5h z1Vl>0<_ZM=WpKYDyNV=u5MWke_M+rJ;|xoh_MABADu*zdXqqjD$G8(q7e=A;Eiq4t z#NzWXx#k_;GhrD97Y#g!z+eDDkCGy>=$>F@JP6se9&2m|3_@J$_}C5#9FVf8tfPFJ z_><(~4%^>BezKD94z9sNCpuI64D{D z_i4IEOPF;OSv7qhNU*N&oN|SlhOB3l@DTi?~uPEAcsN}zGKQp=slnE_*ds}CCBi3~F=ySwK|M9St80Sy`KC^Xh|TdfeUsr4QX zn~62V1EdU%tjU0fv$?q4p4}TZ%}Rg)OtxT9{0|b-zSXHqkr)%3W#j&uFIKl0p)DoIvC?q;h)mA6W0`NOhE+}0EgVn8?X(HqcM<<3o_3VqMAR*y-PBzoW-S#q zbPM6-Nbm!)gZMzH>5M#nYnbV2M&I;O+dCMinI+C6s-CIY2A88j6ORRyW0UO=C9rGQ z#8sPmb7_h?d#=tH+o>VU`8VS#FMR4ZaaC|QW$d?ni@o1iE&PaY@mP7(YM#rC5!)fh zmNcmk#0vy5m+zIc{v?n`gBw;I78nIZWIxhslMf4uD_4Rg>=cn6CRbvRb~tPSX_>m((lz$%|bl}z$7#N&@J}icSGT9ty%I?vu3dEoo z78}aWf{yPN?72@1n=+x7joE{;ISA8{mY&1u5n5boYvRDAuof_h{k*nhDrt)V>}hHt z;>f5(4G{t|qQR0WnCe<0K)nQulYH{#CgF?VRlD5TW>&m7^tr@*#;W5Ros?vgslji8 zmj(WkHmygWd&@4+pHX(|y>P~c%GrbO>#e1cB_|HUhhng+8rD=x>KQ)2P z(%l2C@Ek0IK`#;H+K328P!M^ip!PF@1MeT=CTxDA5jU0EuILe+i%v!hG|&Lm+D=T@ z1vzob3nEWJ_36(-8C8zv?lqid`Y)%@Ob7vHphUGjS06$!jB!Q$a_GNG8g!hCl>HGe zKJG{2qnbhY`hOoKanBlJEB;q(GXqBA9{hmvX8bwYiWCz7c8!mBs8>?40B4eDa4yM3 zC>>*gSZo9iwc)EF-x|P!y)jz!kG?D&+*)B^LnoA4pKg@#-c%?87GA}2yBBj9nzgR( z^X#a0N{+??&3>GQYh@OGnln^+9_x#M49w@PZCAn^q6f6DQl{zck#|IDTxR5yzEA$h zyTJ+YX$&?ft#~@TDCFW8q7zzX9|<#EPfgCgy~mp zdet={0g9(O78^SWsBMG15ow493>DxaU`6i@ZaTbqD*(00#AM9Ox`oYDq@-yOD)9#YQEyM5`{klm& z8WdAOftI}!n>GQ|UW>Y3x&1|C85PijZg>TmNuma9#}At{-&wlg9R<9M%(=Az)Q6{C zk=5j4LiSQn6@5Y}BH9?&K%KY1z(2xk+uo2*3r!XW7XbrE!-(Y%ytxvakrKeA_P19M z#k-JPN~T{f+mO(%F2Ndjg3BOkz1w(yu^t>Cdz*4PIOb!# zB{ui)MWr>UpWHC-Atj;g185MxH*BsBKuPABHXQmH=+@?A2gwoVjKPOWoPYgx=5&oo zR}m}op7xI8@4rJpgU|SH7wP=ZXV9a>>vvD^)<-h4h9r1#AWmhL)xLoFM2*WQ-D}>f zUjUB(6_vnR$o<|)w{n5YC7$fd`}%WU#e&H9E4Fqkw&RgU_r4!3FZj#uqsNci1*LB{ zF*y*P-uu?hF6c*l#&3(cl7sX0j7_CA`2JhGZJ(RJe*>X^6GdYL26&&ldtg#zRG(=V zf-6V*Ji4G5vvGN3Qi(*(pX}y9TBNO*eO?l2pxRDih~sr|ttSkcN^k~cDfgPRyfq}^ zF4{92Fu9$vazm}!jxhQFx&`yW?{R%0?^SrUpIq89u!FQf20i&BN3D9nf8o>+G^{ z$BZJ!N1BT*;4na-2PfkIuG{JTc2xQ0ocj1T7!$&8Jo&;8Ibn10MJa@xKltg$)76Xb zdjG}e5cQr4H2TKiw0S?T1t7&_Vo0E23R+;~jnYKw5*i8J#_vC^;V3!>jNJpW38EO8 zh^YgKW7Qo(az%gy|~jk=|I?S;hbd}=a2~i{qH!;JSL`f zNiYwEK@4V(Ae-T}h@Y~FpHs^Y#dky-GR+E#fasa?E}H+RCX-&z`~P@e8!yPoLXYOR zzLNclsu4f~20e5SH)aWnRmV!u_?`z^Lzi*p4!2ZiCzqnPUxCBsT;^NkH(}Di=6kll zhbBxpSp~P`=>o>tD|*xN{2`u$1-$X7)k#Dt(otQ5KLT-uh^DMyz77aP8C8e-Opj}l zMg^*>#7y$7bKx?AJhh@#m$q;lK=>fG)gaB8J^Y*X1jjK_F0i@uKj2_W%?NmW`QvW) z-6$mu4_NLVtH7eEM3?7wxf3<`S$Ll1#TQAy!?krDietHbOf`3Kvg;0~3LfIBjV9@9 zZW;bud$bMQw2Eu~z+Z1=RGCCo{e^+9ywXrzJqS}(5uT?ieakSiUJ2>K5VR$kr}*9+ zl{kvDaLWU)~&sW%9`WPqVG-_I3R?}IAhmg8bl>v5!0JP zSe7{z*rg;1psX0*QM(PK$*zJmY}&=8nLr~|*uOf+oqPb4(F)rE=zgK%tXNdRovYMv zx~0rghyHvJ`v>}q@Es`H)m%)GUo6MGaaIX%8pSSgknZhoKRZi!)U>xW?WP(`$#SD^ zu*@)~3}>b}K*Y)B;-HR(YK9gpT?cXh4cZw&1DSVL9~2<)1TtEhP?8bqG4klkrm%LpA;^(j{7$Fi8^(E0|)91-faYW%082EYF-acEc!i{F+~XyAwf7z(x}VI$Fd!h$7;mkJ6j0CrCPe%$1sH+7X>fiV$Y z%A|>f!Z1_W>sOrMD)L@>rtgfZ-u|;#1pzgyR?W4y@kD3U(U1c_p>+NjEO36CuBGzt zfCaXnI3QBdMC#!W7MYg;IO}JQ{PJiO;c>K* z3Fm=Hdj51RtI)Ny<;^xSuL06_JfwqBWPaocRo4lf+Q4r~DfP9UWHVLKsAN=g&}`?< zk#G?ao2~HbR&wfy0tPN2!}IrUKe+hVZ%~+=7@ivD#>L>Mw{>pB*YvPPE$y<(d0_p* z|H?MpODEFspGa{5)ZPgvoxxFatRe8zvzp1Eoy&4<@pL6(8dVhRG~m24%bfu5l1C_N z$yBX(@czLXa><92?Fy2t;=FxAlM(uhmWr#mpmw0Lmc);w#;tBabExz%t@(3kNpv7w z)E;U9L?|_;_uhOL`j4J1~6)HT8BB7TfXh%^X_E<(?>#=nU(>jmNt~_($DNeqyRy2D_-78WTFmy~~etqYgUppR$=`;r;&Cz0LA*BGv|FsEb zm7yE+)^G@rm(nT!-C<{FbMoV}r?1p8cE*3P<^=Lr@e5cmvY4>|{_7eTWS`grG$PTw zKnTr2|W@P&y~F$nOQo${P8<3 z-wFu~={^yT1@ivC@-1z!z;2s9@^5opyE&uFze=`nhuh7s$MtJJ=mJOKy5#OUbd=mfA%|Q_V=e_i~DXTmlrk;YW z_1FUHyf^Ij@ujNg**#4qyPqp{O}it0u2|QSBI|0lsj9;ZV$I$6hhw#m75+- zXUE&`MHqdL$mwIZvWK}1DN7|}W!T5rD7I3!FVMiG< za5C{Me|AD-qnFM;^qsoD`R&zz`kTj%e>3{jW$5J8)&_Vs;Cl@Jop~_F_U>8QpT2PM z*oOS{(oKnFW;Mc#nQ5Iuk$ceFP>S1r=^cOU1$Y!k3f=ow-?J8SU)Gm9v-`U5`CWRZ zWX00m?>l!=3dtdG@!ogFCxap0ORheCXMB>GyTz($r~&gne8irb9j1RsD?KUW_I@9m zwQiF_!=}?UntL3+yH3-T3_)2BCc zmtc-jpFADr@fcr~PnyVD7!=4DNDMeH9SE80Z7Cv2ADefDx#xEeB;L+o6_QI$(Q+*z zLgo(kwI^HAyV2`+D8u_2@((|F2S2!<{q55^Daot`m$}R@+8SKf*>S0P?U83UzNcv= z*TEw~bM1%gh$=o)-tMl;Ee+h-=Z@{$J?l@UEMLDFBHNz}Rh-(m`OwC7H=@G+=9dc1 z0{g;0f4a4%{d0xXo5dj4{AgVzRP?p#mr5RE zH@$oPxBbATrV-gLy$<&5H8Q$(V_mSjD6biqLx?*rdG%K2JL1P$H`3DN%o{vMa;KWz z%T)smlh^r_{oQVs@qea@koR3o9iVc#JQ-A8jN0k8OZIZGNT$Mv(d4P+IRp%E0Fv}P-_W0 zZfLnXY5GxGvLOt6j7*Pg2i!0>r21($D&h6-Z{5Qk)b%1IUI%>nwa_1Bm?+4kOG)*& z3-hXg*g9EOhMRTWCU<5^8mWoiT`)BXmYJCx$gbU>*k9Xx{CTkVN_vR2`tjdIHt3?} zPxLrY`9QTkcqrjy$>W9;p9UJ_qU5D{BAjJ|=9uA?4SyBVQ_*ZCyxXDW=!C-a!kbOD zDJY21C{8vCM=G^7Msj{z-80FWX6mg zp+ck%RX{=(s=40LEbp-_YCVJgP$qg+6-)z7=Dz3Ol|vnJH@+U}$ut2I9^^>zVAC>P zAX9GJbv52@K4^}j!K)D#HE9XlUA!MUF`@ki$Zxa<-16aFy?t8j1eecrq#0Dj@Gla$ z&22@EW$d&7i; z>_&+QLk)JKP~iV(jRQb?u-v^n^=t&*Gvy9Rr_i2>=jYihxu`l^d-K$ii;4ZkYQ$|JuzR6lB~{lusr?t_7Q2j7w+Q;hu9VtnXoG#zv zELP4}dvlNmQW4RVkouuvoKLJ0$n5m`QnB>+a6P~oOK|1l+X%0AniC&yvAF_1_qHhg z1q~3PO)PV~zk=+GoIs%IAkFqr?c$(kCkIN_CKRwIK^-CT6$A9a)CH|$-pd#~&5bk5 zj$tX6F<=Rt-FNL>IEsF_@xXx#6pLu-PCd$eD}x5;U@jDq)>yDOY8 zT^wUJR(*W?_kr<_M=hfBUItD&FGhpBM}tIPKy=UM+M>zCIns>1sXi*KS^Yq68GDzx@8R^< zznCs52r^PYW7#5vm-*n%u8wG}>$!%7X{DvM`E}9u9p-bG`-q!3q{mzjuRW=uI&I-O z3+ShO-b%wc;GCGtepvNiBPJYaZ}OC4P(ak72p@d7b$%PhSCpeSXLfiT!Dbg1Sf;JB zHqwkYT<(762}m6n7!urO9z^%cqHZv~lN07GR%QD{7hkTaT6=ok1n1bBylsk+hR9k-x{`QaXiT$4$4AD9VHq}N z1|VBdFh#q`soKb@4Q^Ovz~g^Fp~bjK1Dw=d$ds3~f8t{B^v9=m4o}$|Q(%m}#N0a( zT7yFc`xJBqZNOJ}{j)mBD!1;Nb7GE!{Ge#0s7At3LOJU|kudBE#ffz%p!GAakMG+G zs&N-;rrwK~oEWQ79;W^SER=!(-dTa7g%1u~Sd zas^F%;H)2jU-+D*KO-$3Yw(v{QlC6Yye+HrazzsN+xUrN9v?&gJE*0NipBcNkFNCE zMje@Akd&{oC<6?8VbEZXu|f)pYw>n%mp^)IsyD1l$l;JhKo9*(%$j->S-ceFgRlk~ zp+`P#CU$(m&IgVH5zWil;*}L|%tN_RiAL=y!|S+}i`oPIU0&~0Jneg_r4rm3J;Qdv z_)3L_swz7@-7IIEXzX%DetTjArrJnB5VHS8hnZNgoJz(ojeqa(Z}fbtRqi;+-*Z{o zBu!dwqg&P=v~Re6$D_6mux?pMzmvqx)M znim9(8t5%7xpCuujaUEq_V7~U7iYB9hn7Um>&$!^+Yo)rvJ1)1#XEM!-&O|H;ybcmR4Z01zqOA-de7hhp&fU*7uIK@2URU#rkjA*UhIaRYvNgJ5N;~4(l!7 z;?LhY;&q_-Ew1UhWk~oPUc?7yR$Jyi0d(?ZgS8v8x&c9%lAa=duVSRNN>)rX1TnI5WytsbH@NufU%gWl^VX zl=24xDud*S&DgmtxxP6$Vl`mp$SWl{$uzd!$Da@7jz$_^#LPv^zrz$s<>G`)2TF$i z%BjhKcWXs5f z(O%xEo_CVHu}^`i$8jE8LK?qgsmS z=GPp!2CqQ^*%N^BU6(tbx_9W0YC#`kmmhKZ5c+%Rf57W{n56i zx6t0B@W>3jh5m{^Y=%@E&V$S#49aSWF;)?#Vg(^@B@xC^^31Muo0=F&_A&#ZJFIAn z*?tL`ILj_aunG4?xWe#dsJG^7Fpm_o%*48g+`r2n21b#}{`yOOHBVBtAXnD44;sfs$kGP#q7A<<` zpAU|dK$9tgKqy1k`#kpMMM;{?as!g+Lf{npI8)5;K|8boxXK~)x0pBU$!th^t2%&h zpXiK^ypu1e1i3qOZP5=cr1_aN+occ(-+o-x>z8`uvQidiwKZ8u$83BFtG5q*zt1RIsUv#Scm)Viv-n zDt5y#YB&){H};oU{e{nudTUxYQU#6IVm$srNa1brv}F(E-o7elv6FY043wVJU`JjL zF`DRB+`wWf#7(!k5Qb@P_;M%ZVv|t9PxZ&b{KeKO$i&$f>~M*!;z!kA5!){!8f&c(FPp25@tU|3 zeVEQ-JVuTX_zaLb%x;>skznf6^|^G8UXJ<&av@%CKQNR?Yanuu(9>0J(di`K>q53K zw(=o(Bq5GH*igI>s%L5RmexE-*~YI5&O;E!Cx8SI|I=18acRS7OK<4=McBR=td|y> zn3#C{0jTc22r7%&zBE0z!l?&hy+Yjn;8%5DH5uD@F4Fv|S?~Ofb(wXRAvZwMErgkI z6|18O=397cB8(j#0|^~VkcNO77lRyn>+RBzp`*Vj%&}JHP0)SuHMVIgopyFG3jnRq>ASvoa^dLc+_4E@pYPZeZfseD57p zgiE8Js!&8rfH`zfixO-$TeX?6QxM&(W!=>}?b$`>wj|!fE zwq>BH&ubIeD-gEU)|(@zFa~Hsft`ls2_AB=Ln<-0p*X@gMA+DIF8HAn%XWn2R#al5 z?dtF)iJ^h|5q|yT(uH+RQ8cs3rD2WR18f!8nVtAW=kQoiSAi^q35ibbhm)`$IlP+9 z*{5}@xt)eCKHfHLe%YnGmXv+>7s7AhTVa58MPvcH?OI}iYYt20ZL$3RjvJqVk zX}S$LA}GFDyevh|Sl3grXfekU+hGY>oTSnqWQrUuNKd^@wHbb*g9stfh;wzInb=%_ zLC|S7H)6bXtx<;c-l1V+5^c-`N%9qURvXGfwI{0_kp}5WRWCaaJcLV4s@LI z3Y{G!%#*Cdyw7WFE`*cSO5hhj6(M{o7m_$xw&TMu=LqWJP9dOlAmggQ0G2Z2FkVVD z|GVIci9YWLJcUg(I9Rd8V^E~R&BDe7Ku0pa=RDD+j9;MN9Y5LjGcge%Cu*l@gXE=2 zBW~$fA;pCSq;FE8@`)D$bt3!}oI9&nCD>u)@ELmIF`Lu{1LDASiSV9TZy3a?>6`eUuc(^(T@%Y_2c3LL$j+X zrz|r+sUB(1TDBUzoki5DNs>-@HTG0;@7EJIz?ftYr;N6S;gk?iQit73-)H$PMBuZ>_;-L7MeA7oZ0OnI&-U4MyizKDH zow%`d1}9~AVb9-DaJ5VNOZE)w3)Ls-P;drToHnzHm(6jqxIa(-k%neM6$Ao=b5qb6 z{Be%>5u_;8YoTGHR7I7k4kI%>ur=LOm6)(Z!Ibb_30DH`gqqdKx@d0EBzgk;6TFiW zRfBUZ*Mt800FErs71}?nzbDlNA@Tz#LufR;$3&NTB{|aAW!9}>e8u5@L#kr7O`={t zIC!1jEj9@}vLH9SM5;E7KBy!P4rUUl5iP6Q%(Y3N=)F_VsivbeHa-OPPdm+Z|`>McjCNinqdtaokoD`^aO`81LGO?>8!bnst_a29!EOX<#~bD z8kHy1L;*gziL7aOo}@9O?4uF{5`&v_#ez+o1l=TCBV_}n*<|xHTRcln!31S6L>g9b zvUEe5l!l-ajna^iQ5v*Ml?B@-$>4x_r~XN~e5~$k~`s)lyw+1^ES|7)A7E2Ju##drwe( zMjt~HX6fybXnPG!O1YQMFpQ`^<>$Y2okiN2ZjG`kd|cb?$m_;k&U;*_G)rY^ctO^G0(mCg1qy$wY=^77B-rONWIhaw5BaOVCxgX z^c(u)dywiSIFa>L2^wZsN$-WCW60&H8Ws$xNf3v_A#A%@R15v@4=fUqphv)rA$H~8 z>#R0a7$c1;xS(#Ro#f${D>_C5TsD^?03({ml4-*=>_{6>fGJ=B@xVz|PxM!^sirCh zQ$tMhng@ZfB#fA&2k-=$)dR5BXxRLqDK*E)D;dvdTi!ez)jg{G_};*M@@!E9dE9YR zI+@!j^pcwqi>y3g7pYDU2ZeWn^8-NQgnzUm9WICa1>tA``KXYzIXS41zEHuk)W@3| z*b9j!@Xe4A{UCN^j&XeTaJZxj^(1+Qkna&64Qx&pn`E!E#jqGhED(kKlxu)f-!5I; zOdtWJtng~k?KmkIrkbIexF_V)#iS=tyi(jk6?>}EV4)`LtO}}BCNWiD_2{_UkHL={ z6~+iO$eF@v6$mSSUeTPJ261($SS&E2p6*u;Dyo9eo@e_jY#M;u`bhx8G~RaZwNA=#AV*C#yZFyokFLO=mk)C^Lc!>@DWRnmE;19xj=uFBKWELzjAxMVSTYC~LxU z?mV=TBFI5qhKBL&3wG$f?y7O{d(eY{1k&7vL8gxEys<4ffHb)`pyw;Z{kOT`iAb+G zHr#U_jjqLz?8j}$gj(?sZYhdF=z3+KxX*n&*o_+Wa@1AWCG1q6y-|NxQXMs8QG1e& zj*TDBw!FFndGC7gCyifYh3rS&@0S`e(QSnU9!FG?b0esGzEttCeQ`)CKCk<7!r0MM zu~pR>BVROgTzY)LZ(WF^78~s#@2I@;Nv{2QMLr7 z6JG_VP9gO6nfIKe`%rd-ReRBn)s)G$Pq;tt%8fu%6nPI98jsPQUC3{}!&|9>bfZEFf7c$g*HuF|3kO}z9yIT%-*Nrxha`lC#cJd-Ige>4az3?sXXJc@D$a0L{uhSjV*R0rN(k z>PC)VNep^yB+-^r9+=RBcsx>7yJ>WM=@sk@SE6*2&=!GDa5%&^jXji=N1kruNaK>1 z!=>>6{XZ6oRGuV-EzS9AS9LHgK@WSo(EbRawhm@V)7G*Tj`)R1%d$0XrgKIh7FvV8D5SvugtESVW^%x zV}C=*JRQ*^bM{XhnPX@y-!fyasI+tA+hjG)^J;H{tY}_eOuiFbeA72$z`}VXC@FA0 z*t!9Js?U*IOHu{SX^A%(a2u_29?GJWPjI)+1lbTnaz#NZnt_IHjRN1C+|^k<{Gm_Iw)939F|>x%;w zrx4nL{V)ZMq{~1KcwJBdB-9qbbA=GVDh4LKDULd6eTfkU17Y#C?F0`$C!t zbgE+$SW(Zf=t&v81%4!YzI9ak~iFJB$V32oGnIC|9CWHD;us>e2Z z_Cmq+^^TM)u9o51cQISp7+X0pG3~}WUy&@M4?W66PtcGUS{(z62A_XN;lcllJaRPh z10!{3;QyN+_!y#4PAt+6QM7xEoR$(bI+OJl7TTyzf#Dv8vk0aVQG1JpF&hkcj2o)) zePy|ZF$gOiINA3=TtXA3ULy?9NlkJo%3>&Jr(#`Aptn%HJDrD#jZa81Ln@`XRMNBZ zCi{-!L6v!4Krv?6M=UDA=Sv}6Aj|-Lsf|NbGlceklJ-;ZT9tACZa~b+_k}s(jjyy3 zIUid`{>l1#;vq0VsyeB}q0enO`&qF$v!Xt`SKkG*c{G)m`zW6UcRK_va-m18u(ovk&jD1cy8Af3J zfCc6$SSu`a*Jg#vM%B7!5oqK2@Xs_peBFB18!1NM&Y2>2jSrzI$tKHX0|y8RcVSJ@ z=s7xv>rnL&)Q)O;UmrCnNbf^VDo8;oD<^F`s_t&b$`NEjtEe7OY(B}j3{o0QIz9_z zrhdUAfJ(Up?_sywJqG)mnluc<1ZO~gb-uajd{4M?P|=wVf(Y?XsJTeKO`V=7yPCUE zCzI5f)-L%%5sB?ghXSZFthkYe1w(Nh)q!}sva!)LvX_Rl=pnigJZ7si!ix9FniN3d zt-XU(X_3_q;NvYws}JXmK?&0Vz8Vw#$ttXK-#1V~<KV-m2 zIfA+#Dra+;CX7~R+aV8Ob8(+mA^HoeCCSok^&!y#^YJChC#<d4KiryLvL=KXZ~;d@F6F7hY9qvrA-+7iWm1h8FUi78WP5$CHvhdpJm75kBIZGhuw)IcSbq=$eSmL$y#@?X^uSS)++e{yYeL z*%gC*bmHMPR0~yELIejY+Vhl}p6bR*lY!*lj3v>GD!(o7X8~cfgdL;n0ytwmc~bI_ zTd4Tb@+{&ESx`MV*!N2r9qup@krh-3GLz(fWN^Asu)l76I4ZQuYUvG)3MRH6jB1#kzb(IjVNp1~ffna74{E_!nWx89HrDW0N*0P^JLftHkPjDyxS+XR=q;D7k+EZm$u=i2E0u?h#G6 z4&R?KXPFLXjC`S4#FaL@Ejvn@V-~qSg}ne~wvS8WIN}tlfezcI7A{iAgyGs6EWcgc zWw`(Ri*;Bs{;WQbqa8bjG)aC%;1!tz1EC-r=wRe79_TKhdE>>UBpQro`&TavM%6K- z{Rj?A>Kb#t=Nq`0b?Pbb^v1J*bJ$`lY?lj(P=TbJn>?LQta59LFgT0`y!Ffire+CI^QkhqpJJg%jccxAM%F zX7QOczJ^~E|J6@HSjndt$}jkwSSHb!Z8)TMj-zoJu$WI<%OJk04kOi{+QHnJkn+!XdF;#?vQv(@>Qq^yB;!c@$ zx?eQt#DD1y`2|2ptY|$&SBi*>L!=`dW<}~aX0~u?dR5s(&iD=7oK0*w6co%l=%j#0 zjeaZvY`B@=p8FWS%xtjdyvj!_%QVi(qks5L!d5|+<~-$JBB^IR)H_mWGHf{doafq= zcB9`Q^~c#uTM2BUo-|E1_%VyG#l>MucD!sLDAbatDxzA=AbFR;K_1mg^^s0VxKRvK zCF94zEy$Aw^ZGp3Q*RT#Vf9ZR%?JyF?M|X{~mJvsad)8a( z1-#oTpeG$b$<|JCI%=YIcu_t{-4^Wn=pS3|?kCd%Kg$1=g2Z`7=``8eiBkY7^vqe6Ocs6ET)X+dg z;TI)T-%P6SJ5Mb$YIRo+Q$op3!zIQraI?4IR67m_>&BmJez>tNP(=-A21_cx$Bs)l zF^#aKo)lp4W7!<{O+#I2Xh5%JhBD3;XG{VBW|I*dnN)4_y!i{xXAB_#Gt4g-6b_l7 z?i3Fu+WIsyH&!Ua14NIVRX?oQng$rFow_k_2{e82(-;amIhd-_JchMlCv?M#;Z{64 ziUwT5)Hnhw<1oq%1^+~9%M_*NglSWrNSqSmuJ0CWUZyM_Oolh5Tk;Hsc@CaZB-J0f z(_{V`;kN@mQHTHlm8CoqsWV28Q67Q2vnEyzG#z@JSICzf33jqVJ_>P4n~A_9`g%+J zy3X~MuWQt`49D(?>G+2=A^T<+^(s$hJnCeu@Xac2;$d~m9V1{y>xq;|vTfy6& z0<9d%T9NPq1WuA*kQp*sgqw*Xhh3FJw$b9AulG{@O9O6TO;kzfPtwJpgW0qU!1w2Y za_xh2&-U`K`H@pYPfw!RX%Nf^MeRy67KZiY{l(&TL%TtU3ddP$t? zFKyJ}n$9^(D`o2oB1myi5{cFC3}x9M zfFE`ED?@le886VA3UUe{zi*sWrPMPcV2Gq#TpxBy`6^h(Qkv&3Y;h{NXV`Ci6Rio&}9*^afC6$cGTDGK8dWH%NhwFP>hSZ{rk5FN^pQF%Uz?7;Mupk-)g5vm`K zxm?{?sKUzj7BE0ufyN`a4xwrF6<|pZ{vg)5R%K&2a+eer?$XY+n>L1H*B`Yg^j%~+ zOURf{f^m58GDiQD>?ZVVq4b$7?0`|yIf1`H;?qeqHHybVoCHOxI1dih(_(~LmKFy} z)nOZ@V%Rt_dKX;If+Ii~0;pQ`5xKn7haaG#GehH(li;GIqYUi;g7^UYzNz&RS+f%5 z$_+nyg73A)+XH7_Im63je)ICj=!DU|5BBKmj6UXn-Fi2c8CUuL>kp>H`}#)$8WO#( R<06IFntQ%Z`s(1f{~JSDA$kA+ From 184331c7bc5cb3a197d91cde4e371c2fe9d42492 Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:27:42 -0700 Subject: [PATCH 42/60] Extend trace events with categorized ID ranges and fix tracing abuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorganize TraceEvent IDs into four ranges: - 0–1023: iris ops (data movement, atomics) - 1024–2047: user data movement (fetch) - 2048–3071: user compute (compute, reduce) - 3072–4095: synchronization (wait, barrier) Replace kernel-specific wg_fetch/wg_gemm/wg_gemm_wait events (14–16) with general-purpose fetch/compute/wait events. Fix HBM buffer kernel to use proper wait start/end pairs instead of orphaned events with wait cycles smuggled through pid_n. Co-Authored-By: Claude Opus 4.6 --- iris/ops/all_gather_matmul_hbm_buffer.py | 36 +++++----- iris/tracing/events.py | 85 ++++++++++++++++-------- 2 files changed, 72 insertions(+), 49 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 34e36dc62..2b1afb862 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -16,7 +16,6 @@ import iris import iris.x -from iris.device_utils import read_realtime from iris.tracing.events import TraceEvent from .config import FusedConfig from .workspace import FusedWorkspace @@ -98,7 +97,7 @@ def _hbm_buffer_all_gather_matmul_kernel( if TRACE: _trace_handle = ctx.tracing.record_event_start( - event_id=TraceEvent().wg_fetch, + event_id=TraceEvent().fetch, target_rank=cur_rank, address=flags_ptr + tl.arange(0, 1), pid_m=pid, @@ -178,24 +177,29 @@ def _hbm_buffer_all_gather_matmul_kernel( if TRACE: _trace_handle = ctx.tracing.record_event_start( - event_id=TraceEvent().wg_gemm, + event_id=TraceEvent().compute, target_rank=cur_rank, address=flags_ptr + tl.arange(0, 1), pid_m=pid, pid_n=my_stage, ) - _wt = zero.to(tl.int64) for k_fg in range(NUM_FLAG_GROUPS_K): if TRACE: - _ws = read_realtime() + _wait_handle = ctx.tracing.record_event_start( + event_id=TraceEvent().wait, + target_rank=cur_rank, + address=flags_ptr + tl.arange(0, 1), + pid_m=pid, + pid_n=k_fg, + ) flag_idx = pid_m * NUM_FLAG_GROUPS_K + k_fg while tl.atomic_add(flags_ptr + flag_idx, 0, sem="acquire", scope="gpu") == 0: pass if TRACE: - _wt = _wt + (read_realtime() - _ws) + ctx.tracing.record_event_end(_wait_handle) k_block_base = k_fg * K_PER_FLAG for k_off in range(K_PER_FLAG): @@ -225,13 +229,6 @@ def _hbm_buffer_all_gather_matmul_kernel( if TRACE: ctx.tracing.record_event_end(_trace_handle) - ctx.tracing.record_event_start( - event_id=TraceEvent().wg_gemm_wait, - target_rank=cur_rank, - address=flags_ptr + tl.arange(0, 1), - pid_m=pid, - pid_n=_wt.to(tl.int32), - ) # ========================================================================== @@ -302,9 +299,9 @@ def all_gather_matmul_hbm_buffer_preamble( return ws -_WG_FETCH = 14 -_WG_GEMM = 15 -_WG_GEMM_WAIT = 16 +_EID_FETCH = 1024 # TraceEvent().fetch +_EID_COMPUTE = 2048 # TraceEvent().compute +_EID_WAIT = 3072 # TraceEvent().wait def _extract_wg_trace(ctx, grid_size, **metadata): @@ -321,7 +318,6 @@ def _extract_wg_trace(ctx, grid_size, **metadata): # (set by record_event_end). The actual duration is end_ts - start_ts. end_timestamps = bufs["duration_cycles"][:n].cpu().numpy().astype(np.int64) xcc_ids = bufs["xcc_id"][:n].cpu().numpy().astype(np.int32) - pid_ns = bufs["pid_n"][:n].cpu().numpy() starts = torch.zeros(grid_size, dtype=torch.int64) ends = torch.zeros(grid_size, dtype=torch.int64) @@ -333,12 +329,12 @@ def _extract_wg_trace(ctx, grid_size, **metadata): wg = int(pids[i]) if wg >= grid_size: continue - if eid == _WG_FETCH or eid == _WG_GEMM: + if eid == _EID_FETCH or eid == _EID_COMPUTE: starts[wg] = int(timestamps[i]) ends[wg] = int(end_timestamps[i]) xcds[wg] = int(xcc_ids[i]) - elif eid == _WG_GEMM_WAIT: - waits[wg] = int(pid_ns[i]) + elif eid == _EID_WAIT: + waits[wg] += int(end_timestamps[i]) - int(timestamps[i]) return {"start": starts, "end": ends, "wait": waits, "xcd": xcds, "grid_size": grid_size, **metadata} diff --git a/iris/tracing/events.py b/iris/tracing/events.py index 62d7cf8df..b8adb8bf1 100644 --- a/iris/tracing/events.py +++ b/iris/tracing/events.py @@ -2,6 +2,12 @@ Trace event type IDs and Triton-side enumeration. EVENT_NAMES and TraceEvent must stay in sync: same IDs for the same operations. + +Event ID ranges: + 0–1023 iris ops (data movement, atomics) + 1024–2047 user data movement (fetch/prefetch) + 2048–3071 user compute (compute, reduce) + 3072–4095 synchronization (wait, barrier) """ import triton @@ -12,6 +18,7 @@ # Event type IDs to names mapping (used for export / display). # Keep in sync with TraceEvent below. EVENT_NAMES = { + # iris ops (0–1023) 0: "load", 1: "store", 2: "get", @@ -26,48 +33,58 @@ 11: "atomic_or", 12: "atomic_min", 13: "atomic_max", - 14: "wg_fetch", - 15: "wg_gemm", - 16: "wg_gemm_wait", + # User data movement (1024–2047) + 1024: "fetch", + # User compute (2048–3071) + 2048: "compute", + 2049: "reduce", + # Synchronization (3072–4095) + 3072: "wait", + 3073: "barrier", } @aggregate class TraceEvent: """ - Trace event type enumeration for iris remote memory operations. + Trace event type enumeration for iris operations and kernel instrumentation. + + Event ID ranges: + 0–1023 iris ops (data movement, atomics) + 1024–2047 user data movement (fetch/prefetch) + 2048–3071 user compute (compute, reduce) + 3072–4095 synchronization (wait, barrier) Usage: >>> ctx.record_event(event_id=TraceEvent().put, target_rank=1, address=ptr) Available event types: - Data Movement: + iris ops (0–1023): - load (0): Remote load operation - store (1): Remote store operation - get (2): Remote read (pull from remote to local) - put (3): Remote write (push from local to remote) - copy (4): Peer-to-peer copy between ranks + - atomic_add (5) .. atomic_max (13): Atomic operations + + User data movement (1024–2047): + - fetch (1024): Prefetching / staging data - Atomic Operations: - - atomic_add (5): Atomic addition - - atomic_sub (6): Atomic subtraction - - atomic_cas (7): Atomic compare-and-swap - - atomic_xchg (8): Atomic exchange - - atomic_xor (9): Atomic XOR - - atomic_and (10): Atomic AND - - atomic_or (11): Atomic OR - - atomic_min (12): Atomic minimum - - atomic_max (13): Atomic maximum + User compute (2048–3071): + - compute (2048): Kernel compute phase (GEMM, FFT, etc.) + - reduce (2049): Reduction operation + + Synchronization (3072–4095): + - wait (3072): Stalled on a dependency + - barrier (3073): Synchronization point """ - # Data movement operations + # iris ops (0–1023) load: tl.constexpr store: tl.constexpr get: tl.constexpr put: tl.constexpr copy: tl.constexpr - - # Atomic operations atomic_add: tl.constexpr atomic_sub: tl.constexpr atomic_cas: tl.constexpr @@ -78,21 +95,25 @@ class TraceEvent: atomic_min: tl.constexpr atomic_max: tl.constexpr - # Workgroup-level profiling events - wg_fetch: tl.constexpr - wg_gemm: tl.constexpr - wg_gemm_wait: tl.constexpr + # User data movement (1024–2047) + fetch: tl.constexpr + + # User compute (2048–3071) + compute: tl.constexpr + reduce: tl.constexpr + + # Synchronization (3072–4095) + wait: tl.constexpr + barrier: tl.constexpr @triton.constexpr_function def __init__(self): - # Data movement + # iris ops (0–1023) self.load = tl.constexpr(0) self.store = tl.constexpr(1) self.get = tl.constexpr(2) self.put = tl.constexpr(3) self.copy = tl.constexpr(4) - - # Atomics self.atomic_add = tl.constexpr(5) self.atomic_sub = tl.constexpr(6) self.atomic_cas = tl.constexpr(7) @@ -103,7 +124,13 @@ def __init__(self): self.atomic_min = tl.constexpr(12) self.atomic_max = tl.constexpr(13) - # Workgroup-level profiling - self.wg_fetch = tl.constexpr(14) - self.wg_gemm = tl.constexpr(15) - self.wg_gemm_wait = tl.constexpr(16) + # User data movement (1024–2047) + self.fetch = tl.constexpr(1024) + + # User compute (2048–3071) + self.compute = tl.constexpr(2048) + self.reduce = tl.constexpr(2049) + + # Synchronization (3072–4095) + self.wait = tl.constexpr(3072) + self.barrier = tl.constexpr(3073) From 1b6df888dfee09481a363e81b81102e553722f71 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 9 Apr 2026 07:28:01 +0000 Subject: [PATCH 43/60] Apply Ruff auto-fixes --- iris/ops/all_gather_matmul_hbm_buffer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 2b1afb862..89b6acebd 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -299,9 +299,9 @@ def all_gather_matmul_hbm_buffer_preamble( return ws -_EID_FETCH = 1024 # TraceEvent().fetch +_EID_FETCH = 1024 # TraceEvent().fetch _EID_COMPUTE = 2048 # TraceEvent().compute -_EID_WAIT = 3072 # TraceEvent().wait +_EID_WAIT = 3072 # TraceEvent().wait def _extract_wg_trace(ctx, grid_size, **metadata): From 6b7005900ec7cd33284fdfdc44c3136cf1ebb5df Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:30:46 -0700 Subject: [PATCH 44/60] Bump trace schema version to 1.2 for new event categories --- iris/tracing/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iris/tracing/core.py b/iris/tracing/core.py index 317fc0bbf..57c007625 100644 --- a/iris/tracing/core.py +++ b/iris/tracing/core.py @@ -208,7 +208,7 @@ def export(self, filename="trace.json", merge=False): "traceEvents": trace_events, "displayTimeUnit": "ns", "metadata": { - "schema_version": "1.1", + "schema_version": "1.2", "num_events": num_events, "rank": self.iris.cur_rank, "world_size": self.iris.num_ranks, @@ -285,7 +285,7 @@ def export(self, filename="trace.json", merge=False): "traceEvents": all_events, "displayTimeUnit": "ns", "metadata": { - "schema_version": "1.1", + "schema_version": "1.2", "total_events": len(all_events), "max_events": self.max_events, "time_unit": "cycles (s_memrealtime @ 100MHz)", From 8607e3850b88f3cf38b07bb860e03e509240a216 Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:38:51 -0700 Subject: [PATCH 45/60] Add RCCL baseline and rename algorithms to one_shot/prefetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add rccl_all_gather_matmul as separate benchmark function (RCCL all_gather + torch.mm) - Rename baseline → one_shot, hbm_buffer → prefetch Co-Authored-By: Claude Opus 4.6 --- benchmark/ops/bench_all_gather_matmul.py | 38 +++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index d232d4ded..cebf280c8 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -2,9 +2,10 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. -"""Benchmark for fused all-gather + GEMM (iris.ops): baseline vs HBM-buffered.""" +"""Benchmark for all-gather + GEMM: RCCL baseline vs iris one_shot vs iris prefetch.""" import torch +import torch.distributed as dist import iris.bench as bench from iris.ops import FusedConfig, all_gather_matmul_preamble from iris.ops.all_gather_matmul_hbm_buffer import ( @@ -15,7 +16,36 @@ @bench.register @bench.axis("num_ranks", [2, 4, 8]) -@bench.axis("algorithm", ["baseline", "hbm_buffer"]) +@bench.axis("M", [1024, 4096, 16384]) +@bench.axis("N", [3584]) +@bench.axis("K", [8192]) +@bench.axis("dtype", [torch.float16]) +def rccl_all_gather_matmul(state, ctx): + M, N, K = state["M"], state["N"], state["K"] + dtype = state["dtype"] + world_size = ctx.get_num_ranks() + K_local = K // world_size + + A_sharded = ctx.zeros((M, K_local), dtype=dtype) + A_sharded.fill_(1.0) + B = torch.randn((K, N), device="cuda", dtype=dtype) + A_gathered = torch.empty((M, K), device="cuda", dtype=dtype) + C = torch.empty((M, N), device="cuda", dtype=dtype) + + state.set_flops(2 * M * N * K) + state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) + + state.exec( + lambda: ( + dist.all_gather_into_tensor(A_gathered, A_sharded), + torch.mm(A_gathered, B, out=C), + ), + ) + + +@bench.register +@bench.axis("num_ranks", [2, 4, 8]) +@bench.axis("algorithm", ["one_shot", "prefetch"]) @bench.axis("M", [1024, 4096, 16384]) @bench.axis("N", [3584]) @bench.axis("K", [8192]) @@ -36,13 +66,13 @@ def all_gather_matmul(state, ctx): state.set_flops(2 * M * N * K) state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) - if algorithm == "baseline": + if algorithm == "one_shot": C = torch.zeros((M, N), device="cuda", dtype=dtype) workspace = all_gather_matmul_preamble(ctx, A_sharded, B, config) state.exec( lambda: ctx.ops.all_gather_matmul(C, A_sharded, B, config=config, workspace=workspace), ) - else: # hbm_buffer + else: # prefetch C = ctx.zeros((M, N), dtype=dtype) workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) state.exec( From 63c978be02be51c1879c6b2472cde9a9d7f68b96 Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Apr 2026 06:56:33 -0700 Subject: [PATCH 46/60] Fix RCCL benchmark: use regular CUDA memory, not iris symmetric heap RCCL's all_gather_into_tensor expects hipMalloc'd memory. Using ctx.zeros() allocates from the iris symmetric heap (fine-grained XGMI-mapped memory), causing 'HIP failure: invalid argument'. Co-Authored-By: Claude Opus 4.6 --- benchmark/ops/bench_all_gather_matmul.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index cebf280c8..8102c1700 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -26,8 +26,7 @@ def rccl_all_gather_matmul(state, ctx): world_size = ctx.get_num_ranks() K_local = K // world_size - A_sharded = ctx.zeros((M, K_local), dtype=dtype) - A_sharded.fill_(1.0) + A_sharded = torch.ones((M, K_local), device="cuda", dtype=dtype) B = torch.randn((K, N), device="cuda", dtype=dtype) A_gathered = torch.empty((M, K), device="cuda", dtype=dtype) C = torch.empty((M, N), device="cuda", dtype=dtype) From 6a8ad6b43eee043989d3ede08758bc45e6b3f071 Mon Sep 17 00:00:00 2001 From: Muhammad Awad <112003944+mawad-amd@users.noreply.github.com> Date: Thu, 9 Apr 2026 07:08:20 -0700 Subject: [PATCH 47/60] Fix RCCL benchmark: use dist.get_world_size() instead of ctx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RCCL benchmark shouldn't depend on iris internals — use torch.distributed directly for world size. Co-Authored-By: Claude Opus 4.6 --- benchmark/ops/bench_all_gather_matmul.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 8102c1700..911736d5d 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -23,7 +23,7 @@ def rccl_all_gather_matmul(state, ctx): M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] - world_size = ctx.get_num_ranks() + world_size = dist.get_world_size() K_local = K // world_size A_sharded = torch.ones((M, K_local), device="cuda", dtype=dtype) From 292ee11098d18b8a8c864316464e44c067813449 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 21:41:56 +0000 Subject: [PATCH 48/60] Update HBM buffer kernel defaults and benchmark for parameter sweep Updated defaults: k_per_flag=16, num_fetch_sms=32, num_warps=8, num_stages=3, first_stage_fetch_sms=256, block_m=128, block_n=256, block_k=64. Benchmark now compares one_shot, prefetch_prev (old defaults), and prefetch (new tuned defaults). Agent-Logs-Url: https://github.com/ROCm/iris/sessions/9eb41876-b406-4120-8d3d-84e5b2bb69de Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 29 ++++++++++++++++++++---- iris/ops/all_gather_matmul_hbm_buffer.py | 12 +++++----- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 911736d5d..43225d5f5 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -44,7 +44,7 @@ def rccl_all_gather_matmul(state, ctx): @bench.register @bench.axis("num_ranks", [2, 4, 8]) -@bench.axis("algorithm", ["one_shot", "prefetch"]) +@bench.axis("algorithm", ["one_shot", "prefetch_prev", "prefetch"]) @bench.axis("M", [1024, 4096, 16384]) @bench.axis("N", [3584]) @bench.axis("K", [8192]) @@ -60,18 +60,39 @@ def all_gather_matmul(state, ctx): A_sharded.fill_(1.0) B = torch.randn((K, N), device="cuda", dtype=dtype) - config = FusedConfig() - state.set_flops(2 * M * N * K) state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) if algorithm == "one_shot": + config = FusedConfig() C = torch.zeros((M, N), device="cuda", dtype=dtype) workspace = all_gather_matmul_preamble(ctx, A_sharded, B, config) state.exec( lambda: ctx.ops.all_gather_matmul(C, A_sharded, B, config=config, workspace=workspace), ) - else: # prefetch + elif algorithm == "prefetch_prev": + # Previous defaults: block_m=256, block_n=64, block_k=64, k_per_flag=1 + config = FusedConfig(block_size_m=256, block_size_n=64, block_size_k=64) + C = ctx.zeros((M, N), dtype=dtype) + workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config, k_per_flag=1) + state.exec( + lambda: _hbm_buffer( + ctx, + C, + A_sharded, + B, + config=config, + workspace=workspace, + k_per_flag=1, + num_fetch_sms=None, + num_warps=None, + num_stages=None, + first_stage_fetch_sms=None, + ), + preamble_fn=lambda: C.zero_(), + ) + else: # prefetch — new tuned defaults + config = FusedConfig(block_size_m=128, block_size_n=256, block_size_k=64) C = ctx.zeros((M, N), dtype=dtype) workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) state.exec( diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 89b6acebd..40fcfa1db 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -241,7 +241,7 @@ def all_gather_matmul_hbm_buffer_preamble( A_sharded: torch.Tensor, B: torch.Tensor, config: Optional[FusedConfig] = None, - k_per_flag: int = 1, + k_per_flag: int = 16, staged_a_layout: str = "k_contiguous", ) -> FusedWorkspace: """ @@ -348,15 +348,15 @@ def all_gather_matmul_hbm_buffer( async_op: bool = False, config: Optional[FusedConfig] = None, workspace: Optional[FusedWorkspace] = None, - num_fetch_sms: Optional[int] = None, - k_per_flag: int = 1, + num_fetch_sms: Optional[int] = 32, + k_per_flag: int = 16, fetch_block_m: Optional[int] = None, fetch_block_k: Optional[int] = None, staged_a_layout: str = "k_contiguous", - num_warps: Optional[int] = None, - num_stages: Optional[int] = None, + num_warps: Optional[int] = 8, + num_stages: Optional[int] = 3, num_fetch_stages: int = 1, - first_stage_fetch_sms: Optional[int] = None, + first_stage_fetch_sms: Optional[int] = 256, trace: bool = False, ) -> FusedWorkspace: """ From 69797879c48abde21e2b356102113ba87c560410 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 21:46:13 +0000 Subject: [PATCH 49/60] Update benchmark plots with new vs previous defaults comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit num_stages reduced to 2 (from requested 3) due to shared memory limit: block_m=128 × block_n=256 × block_k=64 with num_stages=3 requires 98 KB LDS, exceeding MI325X 64 KB limit. Agent-Logs-Url: https://github.com/ROCm/iris/sessions/9eb41876-b406-4120-8d3d-84e5b2bb69de Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- docs/benchmark-results/latency_comparison.png | Bin 79912 -> 98348 bytes docs/benchmark-results/tflops_comparison.png | Bin 85572 -> 103053 bytes iris/ops/all_gather_matmul_hbm_buffer.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/benchmark-results/latency_comparison.png b/docs/benchmark-results/latency_comparison.png index f335cb194a704d8d2a9abbb97b0ce630fb68179e..288fad091c6240244152eadc2e37f6ccdf2dcd7a 100644 GIT binary patch literal 98348 zcmeFZXIPh4_cdrVCb1`0q=*Vgks=5PN>LGzF1-mviWEVlOE(r21d-y;f`D}C9Rw*+ zK$;Y(0t!m+O?sVmz|8#T{V-o=uInAH$0v#i_kGSjd#}CL+Q;*fyyT9p)LS=f*sw$D z;svD*8@9S^*szKFkKgc_%23~Z_|IuuaSdB#%d55ySFDXT$X&6$VQy(_ZmNIS-pJa< z)bjdCPVN(&0_=xPY;AAYoaW-P_&>jb)6&|Q>z%e;HNMN18y7WgHf%VsoBaC4IJ8b_ z!!H{)NL@Ima`WZzSBIN>8y9CMYN7)q#{RjhyLlkL`46RE_YSC@{gahV?ud%%#@n|K zU)=CZ;oo5o_73FFw(lNL&HYtrX8sQIVx{3J;UmAaCl6IH+4O$=D4c#KJ*O(a@7c$z z>6-Y#|MTNw>ahQr|M>~>`wPCe75?v^yS4Yx|305e=B1~ne=d8h(l<0T^vZH?RnU$h zxnltvH*E@FY6`qH^iTJ0Dk_ud_O?7n8|(X<5C7)o=H{{3R9adZryv_za#r7KNwafo zEcDBfcOEecp=QIMQ`xk$66zwQ7@VCKO+p#Ut2#P52J<)PNxd<~mT&mh`2F!0@|z7C zc3-UsWX*M7EilS)njVbP&38&Gxii<5>8*KxCmpY4yO5an*|Y!3EQ?K%$0~_4t&3C) z6>-&Vo$$-R13o#fp2X68=Je@`uU~ZpZF(EpEDIBl$-Q`O)%9U7^|z>4-Td4v^OoH` zPRz{AQBnaCo44-0RP}7M_>YOS&kM4=&tjR%UYEHWVnHqD@^w@Jn`VbAON6(Pm$wst$4p*5uQOWqdAR$^ zd}*zO-?w9Qu?Smgz; zss9lL~!bO$Jq2NXPg`k3bS)y0lg-M8<#2gD~dy2CGcmFm}+^Ixldbxte& zs+{$G|L_|%$LL1a4pZ{+ts?7tck9>B1+H&J7Kcw(g$SR?5tG{AmTjqi@!|uc=kJ>X znOImZ<~z@-EldqG#;L}4?__?rapT7Al?QnYFGgR~751eUZEDtYPm~MdAE*&sRl(N< zbPM=xD@EKyM9Dm5a~S=SmYyDPXOlbSrgEarTeYB5mc85LQj6D?aYIT8XU-v_3+`dzx?vcX}6^}AAD$x`R*-p6)uI>*47$&op8iQIRbW4>|I~wft3{@_m#Y+ zWW8j>oaQunBAyGBxuXTmE-o(JqoW~TjzqDvWmzbnJ$viXA>lZqN=mHEQ%-Apd;Y1G zeCOJGZoDo;6eq(?7V zkdyJ-cB)q{^OKtT*IP4U?T0@f84y2ryWB8s=|YGICnMv9Z3j42R#%o)RwA)&xC4%hiCT2+4Z)imQ*;v@Qkjxyz2yXZx}-1t%^E|v8q$3`;LeI=*o zGLK=|D+wR!5;L`~mUO@|A9A3u{buu%J(Q92Vq$n8-X|=I z=dWMCE*-?5IM@&yF+J4WCM0j2N5KYXX&s=Vdfip(J5;nwRE5H>**!QIm)-6p6L3t9 z$GafgtdXrTNk@C2E-D}>C`kGVThu^(Oq)w0azWywbR3^}x5jw&Q&R)AVPdvia%oa! zHZfxNck(=!RZ|vQ`B5XP)?9{UWVWcCeciO-wIUw#1&>h$BS{Z(L267gl!jZsY_jdC zc*4?^H7&Gd$BxF2&rhnR>WfFo2W#P>*%ZU?M#~4srKF^oBp3P6oZ@wwGHFcERMt#0 z^h-}qC-1v7|07=f;rjJDS!Vn+Ma*SX0f*R5{G8mg|Wib0_&a%rcLE+ zX*7ij+DN!AOevQ8Gd%MA>{yo@7M4dLJDAe@3==acUe5qohRwnywdg6d34tK1t$ihW&uf2+hfu8Q} zNUTXkbF(@sc&cWgO8A8>MH36NW0pTIhX^?y#p{T5Aa!XsmG(Lw&wqBz5nE)|824nd zx9a$=UAqL&oH>52DIrcfJBeh~`@85{zwpP+nj3QG&qm$tD74zAduzE#+cFk~VPT?_ zKGS{G&8Q~iYpvx_Q{pF)2pr@j1<_>{Ch5RSxwZq{cplm3Jh3@b(^@EicnwEJ$Tqnk|1scjm*Jjt67Eq_-q=xUcF|1)WlF&vWeQ?L8(b zDQQ&wQWMe8=scDmlWI_^Wqtp%bb|9h;p$3#L#(oDj65A4M2C};6Dh#&>C>mu6!zGi zbf^0l#|oQU(v7?J>dBJ4ExIzt(BAg@=FO(~lUc)SKkM$YFoZ!lDjya`9XprlFs2*S z*7#bn=mv*`H`Nsl_wVmF7cC$TwMMYVv>qbJ72dOsce_Gdm&ew!`9<5SB!ON$hDhd@ z@tP>Ds;XM5JlXoCy1cwR&wfNJsQFuK z=C$6z#`w~k!xc}C2PNwjUe8=cDoR{fSX>Mm85!AOfr9aHc8J&f^NFH@y!`wsJ(mgb zz|=7zkoq;C(4zK_oh z?CtD)>}l|NKai>|KG4lO*1dJGv_Zf{d{OTnqW{!g9#B*|_B{y<73OBuOpClCimH;)l59WH z>fxr7Ynz&|IFj8F?y_HUr6u#4`bz`vrMxHRt6c3sOWM7-% zwjS#on*a80lg-2Z9Qs1Z@9%93s(C4B@W2>H&_e{lcIv)|hgQil@+8fIqm$E%P$5Ts zP5OBaztC*FCaRE;4Ib3M`=pr1_Fhy94Ga3V4tfKpG3KciAF0bCd$KJ%t~WMUR#wvP z-+#4#cDUuF4I)C{`;_y{a4!OdCM$EgJ!5*0Tk!6#gQi2XQrh}X9T&1g%WB?zeyOdY zp^-4UPpWt0=B*cguxZP9I}bD^YWJSMx3z?=;QH6x64rv(Nil$FJstP9?&Gz+oUvH;^{<`FA9L_tq!2_%`hNI4>npl) z37AH&HDd~~<};b6J7cVH!A~+}x#y`~g2c9bt1gembRL(tSg-hN%}JjgeO};sR&_O~ zCK)d$kyC{i?Sw3y z3=_XAIe;P7II|PazOO9L>LEMGoN*ZSP8|zA<1jV)cI?BT@ILx2IqrM3Q4v*gtc}Fl z08nry>CV_UICQuk=vqnA&5t{6H`tGp?MZE{;y*bX6FH6qTso-h>0A8fDYEHAolQJm z@$=*ug2WW*-Kup3AR|uaZC2xhxvZt}qFsaS`ESEGCLaX9wIAVesWxxRs!CbG;chK- zFEkRgcDt1F_cSp zASZg7Nc^qFf{6Rd2fC*J*s?|OYpz|U+p<%zh^rP#Z2f(DH>xiCQ~=1H(a|KdYGkcn@LRmN;%Z{@ zI_vt^ICjmnel!G)6J4c=(d8~v{eEf*8doyM0>`ypR9)Rcb>eKPFMTucYV7udJXB4V zU*0J0+_`h1H^|a4N>k%x#UH=mI-<#AtivFDzBfByLjRlOml21})#FOS0f zlo7!x2T;R3g_BmGmaLmtAER&$fooP5>3LG$W6! z>fkey&O4L+v2k()kr4nFdGRmTmH9r8qh~EGEvsG%*7s&@apYe@(2*52YH4ZF#Dg^h zG1$%wYmT&L%HYVWqAv*K(B_XfKv$@J>iU-ycH^oSI387XXc2Aa#;?ddJMr!kZw8X3 zz=3bYPtkpHhYJ6ddHrkar8hKa&??@L&uHlXS}IcY3kxe%|OZ1(T1Uno7c z19h6hV@E|}fMnx=i`r`UHyf8@NfV=Ak*0^={PQc+J(KP-KkFMeQWCc9+^L3KZZPev zeVK7;MQ$q~f412f+y2Kr9hDT0lX=IE1xTSZHzA|gE=-v&1MoQwC1$e^(F;2nvcxSS zA?-zIplLk#NHOWMV3Dz5aZSlxAqG7Pay<(g$vwwpQ?w4u{}5gJa`eF8IE?COPMT05 zNh|=xN=HceC?Q8}A1nQQ(c78T+65u?slav7cJzxNozTroSWZ=RWz)b!;(|T)m+gj{ zs>V0(78NbjaQ~&moIf7B>2Y)gmCoe*`+H=|XT~~`&)xa$p#JheOlXExmqevCf(NJN zx~ALGTC)rIh6cPv=IJIJo*k|{S3jBvJW4d3186gT*3KPfF4uR;sA8{JXj%QN zG!K_{*2gpvM#kcoN5pEvt!GaRHu;`xuC24dmQtFgM;UhEIdSZQ1d>R$;{wW z{eeSi@nO0D6N3%n!DxPEdGRVbgrY2*+{^?$v>;b>3S8#H^Nz6wC(fGLilTAr-FIBA zc=6^ZVVdjhd8QibuP+^mkG))U&%@&dy{MbHCcZvRHQ_J4^c#9^^HE_6u}V?fhXTx` zXS)}MJ$-hGF4d;^lKh_{GCB4O36F%fbYtHuI(W4Rp=xwGRe*Rt%H~&XhGV|@ES+?m zG%Rv67gmT^bg2Y94eX=4jxY5PnVFgK=I1n`hb>ic3V!#w?~O zT`!^FMZo{vd?MP-Y{1H9FXnNIUsUxzi?QuM8n_Y+K4Bi)u2yPMAMF#Shjg=jXs9{a zQbW|VjxinI7=+T-ItmhCWg!i($=@yflPAaVzElomJ2|G<)!V(9H zxH_SCt2E>(H{=;euMs&pm2cf6%_BbVjv{0?{_UKYHgfZhh>g-*(Z_s!UcEZJJlEw* zay4Rr8J%BCTbp*$xnjSt$jF@Egq>%2y*&cB4J4uzLfdPZJ5BB^&W>fjc#(^kr#vfi zKdN^JBvq+^T{lXXk)a33d18_B*W1x6gY~Jv$Jzgb#Pm4X;c40*6*3# z178?pQ8zq#$|CYhc+J|_#RJ>*51rXNjpMTTm@hS=c%lgoWdgO z?q;6Jy$xs{l#w#bM?1cL{d&TQ^t_4fXa16$U+P$0G1eey-lQuVUv->fTEFK`j@{XH>&U zkkQkwUj#yG8q^dvYhV!z1qU*K!fI9cr&XZ8i1X2hLWzrjR~!+dx_LJhk+@S6P}O+9 z(A?X0KrK!smc0_)v?6}OREu$_X;Xq`Ynsu+Lqa#PwVfv6QYD5w zy4%fNdMn@@54!`fFTKn7KY(aqIJx4IzK1fs_X42&)wj%y;HK3BS=1OMa{!g`+;O#p z+LDI{IEFg+=&7JBis~$Rs7=A?x+d&0cjCjyQTJ#NduU%O2bbs9x}XwXHsd+o(Z6DB zEISJFjehL~Xk=k$mqqPL z@iO#Fw#tYi_~TiH4I)4zsohKVyz)8a%H)q9GPNRTn7Q2D-KFY~vq)W?9_!Gf;Wtm2 zdJc{R5zT;}Mn+p-h=!n5M4y(|b`Tkc&#G1Zy$*0C9TIkW4cwoW?mV$PI#)3J{{4H~ zAH8x#8QD@tm|SNrs3B#|&&{!%IB}`&VfZHi0quf<0yS|JNV&OKy#mI(WVA$qk`oF)1xALEZ~Fp!HC~>P+xSK zD4-E_bMaO-ES)__{@uO1cQt0{bH1HiIxWtAD*?=zN&?cq1f{%7#rBE6IBgg^Cr;>r7*UE;mZYN;i*^Z} z6kkOzigbidD2H|yqu}*7Welm(X>3U;jm*bbdosOg-zL8In|rBe`2F7TzwRB_9%-0d zXTu!r++)<9{eA0x_Sa~kG$^R12{>cdaMGOs;-kJcH=imW1H(5woPgtY4P>I)tVXe- zLR7IMKU9s&*h^*ndLn3ZHK+|_^9Aq(>JWK4Z4+J>Pp!yoXB|jq=7G3^V4`XTMr9cg z53{+DCGlz)eJucLB-oDQXo1v*ir&A^xYo?+I{xl=gtXF2{fE+MZF>69h&us8aq1N$ zfGaDF`>ZUTTl1}~xsd2K9l>fzmt&DP22dHf^Hyp2%nVu0rMH;7&V0>xKGkaB-?ic+ z-|ZI?RXm?2=Ms8QYI;$*yO~YUP14HVz6s!v4>$<>P<{1W7aD|&zu#>H{^hT3J}3P- zKafjL1UW+%9M-)?ldiFYP9Gq56Xt}mm$IR5JPwb-#=6$>}G$z302QYmtRvJpkeMXk(^!X3w0+8C#H>E zn@0XV4uY1#s*C8315jX===c1fcCHNEhCZK@mP*Oa@BX=ebFk)BBpaE3UNA839_uq_A}oQ(vkY#V!f`3$O~AFRP1R!*W?bmsZC=R03Rk4WhO zo1rPUkJ|C{aI-4Oh@3j7#T4J?*z~cl%6~K#^W4qE&fty2uZ(tz5TfWt*_LP8EGce? z)u#|0P{)j?6ZoIYp1wK$cY;>|veO}h(5D=%qR!@9=moiut^G4x=B-H&g$9JTc(2uk z<_CnhKv>8)aa*^e=#hLz_;Z?5*E6PQkYtD+_05mY)x~9ZM*iMqYRO%c~ z5tKHfnc0tiF*W%yIjNMYU&5GwGx+7pms3mgHkAv>1q*CyHB)omE6+4)%3kZnKTzfj z6?W#Yo?!Y7eZA|#Kn!8-r}4$XqVBo}I4{={Y0!1Swvy%H>{v%Lm=s=gtpX#WVY5n* zPk66?5dgN6_dO(BN4gjEZ)6$J@#Qmh2;Llj1~nK+G*>K)jblK$o1!bBWRbMBe`~6~KuC-+)f>4S^DyS?im4sdde#3!B`~V-g@+a(? zb+y>r!T~&mM;4w~@-4k~{SUbpyi#L%KOP~;ut2N^<{Ltf%KZU6y`hHupkE7AWCK3c zIbcGLW!;$Z+sQY;)F|D! zaU*b68g;_<+rR%Xvd$$aMtEFttO*fb00OX>aYf>&`jpr1*hg2imjXC>YA?G{MWECU z8h&qemw*2GhbO4Yb0wg5;7i0LgiH;ev-?v2=1dLQ;TQ89OnEw+YFs25(Lvy0zP%Oz zL|R^qI7nANS3_dOar-0^51KU`tycM`Pu%tgzB4(sgbKH}w|m?U0m9eb_X#JnI_@G5 zFK>fcB zXgLXcyv2RJdfa1P1 zai5ysoCO;L6GhJ3x5q(_TL~2Pqpu-dJ|VfHEC8W%ooAC!Aq@{24I}ZK23*_eUZSb(abYbNWblVi=)I{lo-$Ui>* zTDY*o(ZXIZvUNnMjF8w-Nlz&nyMvH3`}mBT(!2`X4`W3x5$aVFG!y}c>#?lt^`1!@ zspr8Vf&Duh?mO9(ZmdxC{A8(8!(l0ql*(W)8Uc{dNa>Pq4m?Nl40iqFpzkChSe&No zVnW@_Odkq#T9I>PnELm5VYj8vJTFL7OA;!S?1nhKVy<5qo6^5Sr~p-YPbFDbI3M}D z!jIwpWgVSq?132&sMGkjf5HmizJ1Hc3Z=?v_NzlC`oPLulr5zLT)GXv)4RL^HedlY z4Dz53h==_BNJpVwk;e|1$@7%Ci;rqTK(t*$mrdzGX`pFpqZS`Qa-Kw@MbcA!Yj1(f zEQ|CN03-+&AeP>3?wwiCgNT|aLv)TrMdzplE2W0Q)qvILsR`8s#h44?8rdNn8owES zSlp$s+dy(xGm}xmV?jkz6!{tS+5k?9uQezmOCuYWUt-U2U9^X4?o5mpK>5_mD938| zz9Bhtd#k+=b$82o^M=Z+mQb{aVOE6@A*}+J9KxR(2u?>V}3asoS-d*l1?!ELr9J=n)GJG+7mRy zpa4%-TODgU$>Wky^Z;=f8LJ$_1|!1c>})pm1W$o^i4ckfTrG7xxnY3O=m(|>SCb?|I}%AG!ZmM92hJ%^`w`OfH@Qt6og$D zRFI$egcf)2*0YuHJ(P&Lp(WR6Aw#HmxF6^Qt&8iLVwIWc1a6!iD_BfQGpdY+iD(ez zp%wfJDBv)BTQi7rYRa!9Ip@4JmZ){l4t)Fe3c3CownHuA7s4i8J>u+QRa2n(lTzP$ zlAM3WJA}w!5?>79yQUVORR%3_$I$&9G*>kWP3w=vyZ-v?uZ%1+;1q1Y!A1Cdg{U{& z-M!bs9BhG)Jw37>5ihA*(fC;IQ$Tn`d{rp6(03Nr&rMogSxnCP``vBs0*_B6B7jR& zdL4I>2cBJ7MebH}CI6;hIZ=x*UIZEC;ml>F!D|PDuF(cv<25XMOr@t6P`-C$e)F%t zYFMO`hcPH&oE+L06bhZU$ef?K-KG!am3x#q9EOf!!QQfDBiY%=7TUMX*8R0%sgJ$` zKk@6`fzbUzSV+kHy3J&F`R6=u))(Nr%-x1ZMyh__Nqb`WICsbn906au?O+0Vpzcvu z6OWwLY2UK3#Ls!jyNA-}j9g}l#liOp0z&i8LL}K2)F$LK#Uek^vhdMUs=Fq#tmfrs zuc|)13nT*7@lKzrlYy7{v$-L{g8Guo(dk}6s^d#2M2-{GD{x7D^a&rmeTv}z4ZiSC z8tZk>nx7x-`23*8IGoW2dHy4oEH$SFY2|~d-o*0P3Um3qj!*>I-k9d{rp06Yg(rc& zn}_@RsYX)Yhk|*MgLvmnt8lrvPuC3TG{FGm-wA?_v>t*QlU?-dd4jV@c4*31QK3(ef>$w zC+0`MU}o@QsWEh408dh0S$W(uLRutw^4dWf(&_mf6%W7M0n7t8*F2NNUQy5KFRWS_ zuc`UW_6~JHlF`8ic}}yq!I$))d2d51H*8vj*6-c>G_?o4B{I8KCiu&9SVifJHY4C(V~mj3AzGt)9$u7qA^m0w*c6d3%u$7zH((4y$d?- zMDwH}i0^r`G}R!|5PSx=XMdy#2nw2lj9U11XP17yoJ!$w^oSRqL<0ShL}m4e>oR@@ zd&I*JjHq?Q^3iDf$)6X*Bc!;hUVBsr!jA*}bkP}K$Qtd4{02)2N##feszBBjGtW<1 zhKS5}?};}}iP-)18f`$alY|?xzT(OX+LPBnG9S)-cmQZ01KNPGqTs06*W65;Df+RI zfI(e<-MpOuc&ccxcV~pYCfmy<=_L(9F$0F5}uVb!*YGi zM?yMIZ$7*I#`N20%U!A1pXp-n=Jo#g9tr~~`f!O1Bxbk!wspQ(4$_1ZR1)TSs zreF0J11)FUUACs*R0J^l0=@@=m?K^*BA20Q+Frj2jq?T$NEBU*yAYteHrv8=xG)HObX)_kK6S>SuF!F%L5=_#AI}+^G%&ZMIpQ;^JxsiB#D4_F0em`_gM$Oe?~A}&^?1c;d{Jqx zMh#`|iww?dBecJ1h;f9Lz!`n| zA>AsB<%vi<)1@4)i%KoLsB_;lm@`LAFIKoq9r&})ka`E3)be9C1xz4;*0 zID{0|ril-~%?<}S{LK2#;o1?*xkE6*t>!9pZ3#pJXqA5{;Ag^+^UGOlr-i9l_yQx* zfXO&1cH1MLo2)+GN&W%(*nO;LSK*oqmbDhsbizz zv)Jpr@g~RJyLU}cwJG`vQfBqhJ4!;a3yh43NseCJ2(zwRg8S?{z+Q7E&7Le%s-{ zuX-#b3&YaOFX#UI#Lxd8HnT6M`(Hk{AzX6Zk^O&t{_NKOr%$nio@ERuC;vkn!ezpl zBewBzF`Bk;1(Ep+00b1|g`R_d^E__|6Z>7tA0$=S*V0mEO#?R_bP?6oJ~#}CFsU#) zTwMR57tgJ~6+(e=0LT`mR4=1>kaOxNapp7m*nxtjNf}}Qe}2-Kf-NUS2U05$+R^dU ze~OUQQ|g|W1$jc__4&Q~BYM$8U0FEq{Mj(cCC-?xCT0pm)=;_WUjRwEmIU_%@-@XWRziUy~4D{MkQ#{74_9$x%QZNL-*5b-N6) zSp`Y$f4Z1~DZpQ=W|?aecPcV~dLTD=YZCy+7bmXVrapB&;r8Zj74`MXXspmms{nWt zGd`dvYpcyrZP=YEn}>saXnZ`V2&bblhPZ)iv3TGn4zw@-hdC0A(t#{tXcpv&vvelZcfbwh+<@5<_@Ni8!em zD@*e|cz06sQJ>V>kVP)q+jFgCFQZ0Uw2Q9q&`ldb#{a_W;z5t?2gA`fNO5hQ#BcS} zJCMT~3f6Ay725>baXB7P~F%#pv#E?&tyaBM+1OB(aIayB>-utRv5dbX0 zmJQ&5qZe!@K2^AEGy$fh8-9QstHq9~q!~()X3l>K226yoJb+wgZV2|IInVLOcG7IB zAV==uOS*xU(s4w>Z~OM`4ddU7c39v9$_rn{=lYwcIA@G`V{%;sfaTq*$ z(CC_V6u53(b6=g!+ZNiP0_UAN0G|P^BJXKr)y$YzhGRv6nyV*Ug)Wk#P_J%3C}XI7ehEemH{MqMf zjv9UEw;k)d0ZyYHxYHKk0@+6js9Om=X3xNYsf{Q!1|=YMj!GD1>i|GIX?Lv$p023u zz&Z*T_t>^=TRn@1i zyTgg*3xGt)0>W=k3w=P47u4SYtR_KcV8>N}WZwiEG6Le(;Ye^)0-2Ig1@tDyraxw9 z)j@n1M2+pJyW^xz_sf=OyS7={*gGGaqxN(5^7D!2j-=? zg@uJJAS!L4#(<+Nofr5^+}jR`oa|Uw*%aE_I&dxt3&$Xkc9~}#U{$(6Bv=3j7)_aE zh*>0Ip|{LW41BsWfXNQ!m;>w@#3rbOF%2TM6!D|oQCuTcz)M$7o-U@88~PU(7PdPL zaR}cWKU?>(^9y#IoB*vfLr=HWWrreu=(vjbCY`n~-xM2EKKP4)D7BT75N1_CHVTX^ zVto-56f(Jk{ML{4!WKWt%gY0*V1hGC5jY|x$Kvn5UlHDjcpfkhmFv2gMSP(oeH}V< zXcD>&FeNEEPgs>dq$J|0PTLQg*c1|mgdK3D{gSsE@pbTd&app8{AJT`R1!}(^a}F$ zE!)lC{0^V(Ym`PBi3#tIXgjd+wII2f2gDUeiKL7aD^R+Ql5REs)I9)Nz7qnGP!W`E z``99hBYv1B$D{%-zqlJ8OG-*oz}Q%1(gRLB;nHhQ6GAf-#NaByJ`)&;npB!oTTLm8 zc!d3Zi=!M9WB^gssrHCT{}ci&59wH^xQI-JXT_SMAFnAd?S2A=2^ncJxm?4eN ze*}flK!||gCtxxK1qE4=69}jL;Sn88*CG=bTeXPDLm(U>ODF?C>=?md$0uM1eQ&0y z=OV7T-~$segmw+-jT9tgp6T)LV#Ht!${_-xCYc@4^xY|=B6vB+Xx%Uq6Nzi60`$NIL zVxva}9$CLJ9B`H3ocS0kT1Yf!ObM0@EFFDIqHAG@OH3Q*oWO{wfqLy$WXeM$8DoAx z8L_B(lpc_HEWErb@Lzmj*uUEi*l5DLoeT^jyfiYCwKCb>En@CK70nDsCF)iBXh$KJ z)WT45p)Dl4%G_cRF3jecBeH>W?56q^mr-`&U|bxTJg9WbIIEkNLox7v*>g+#Mk{&Q6CzAO!F-&v*U~g9OhxH*7H0H0q)eC7P5J-Fx zued=6M0_&F1TioP?GlofAyu#dAqmSK{0n))w!ghW6}n;*Qw`vqBHw^0Y!{tR#bdHi&EE#3SX(b_iFF@v|nx0DO) z63=j+y^bR$L(IAmC(z@bgu|*mt-Y#B4xq#?@LtI$&!q(?C#Pfmw8)DQsG&oU+CI$$ z3ED)1A))PAfN&IxQP7D;4E!jj#BYj|Ow%d~Y*LvE;-u(e-Y0y2FQUtcX#*hs5g@ggKa}KZJ7Z(Sx69*I#??_g#m0_wI*Wugf6eeF<(1xTOE=SwUm{%HmIk2du&I)6CjTQ&T847r#MrNmJ;~(3ZGmiN0yfeCRATC zmW24*`RU;>18lO8uGiD1B5Qj5Cf0qNBXSe}6mG2S!{Gq(uC zE9vh4Y~0KXYJ$*SKp%lKqhI4ef|CI;54UsY&Vg{z%P6gWeB{nm_%3a9um*fLzWpok zQfg^#Zmx_BHeVC~!9%e_4aHQJXX8q5b#OfJzF1pw;Yu7CT*DYl{h6`62=Ed_@ADAx zOD_5;$PgXIsV~?8bn!vLg^`&uBnNcGLSn0=Zo#hN&j_M~n4K^cr#68!aj^QhHSHEV zwERWZvJq=G0wRZ;KYu=ac#q%**b<6G&#oV)y#4?6iZ_eIIk(E9y$^I16BBDh|3OA8 z;gj%9>i{HL%aQ~2%&oVhZ}>eFDU#eGH>_WSTs{O+$0G|NqP3!f(MDo*Oc6Xir`M1hEB=pw`E zIJnb*He=maS3I(S6yvZG{ENunTIgk(P!33siEbhevvq@?BJQIvF{YYObK+i}K~E9x za|Jda{x&Y%JQLNs%WR*)Twf6c-%Lho!m0piaF}^A)~dqtY-rdB*d%Ijv3vFYSyG^QqB&hfu!{Fi%HbSg%HG6}?h$?p;}EZH~%4foFCEpU7=Rv@c?E3g|< z>-?3>X9H!c@Ba^FKgdKUN_aZTc~K;50FG-wET>)OQVQmKX^{P->mL3!S^zmy!&j|; zXow77h-u?gCrDh9eNa{s1f-#iKW?`GF5qY9y8PyZnt1$o((X|`=j!|;3@aBHtW&qd zl|!7ZFtK@LJ(IZR^AJ@%&g4@#rRPCkV}YwP663Mf)T5;2A{7()4FccD=TtE;EIdCJ_i zflRKemAqxu>E75}cCTJB^ZvbiniBnikz~jSb%9W|gj7ZVYWUDnlj)$YgLa*%(<@_; zm7s02{bYO*H%L?gVKq3;x@r&BL-pp4OKTHW8#au4JiNS@D#zqRIP%y*3V8C@pe*9i z&=Q&l6R9y+T$%;h;M05`ZYQAB$D^@Pi|8i0CqRWvU}rY!8PHZ)Em%>(%CdjHi>_J5 zbD++mJ>E0a_2$hR;xAMif!li!ZSK---+SnEH1|bQSQL*c`u+v9bK_V*`Bi8Alh0)< zaQq4GfK@_cpDPkxz1dXL0|GUOX94sRcZM%DpEQJIg<1@#c+re?6w({f0*#-Nou_lu znuh0K2tC9@2chIRz~w<;uSd%NWEu*mSy+RbN;5mI8W(s)5XoV3W`;jQ{M3tU-KL!< z!JyvyV~sl8Fn)TSL<60+<0;J3MU&_o&1VRG(3lI!Uu*i|WkV`j+5|Mo5{^q`QNW2h z&5egKlCAL=$b$Dk(sk;SH<})MMc3_Yl7GySn+SDEs9H&vZZW}mjqNTm6Q2<7}%FQqN3G^2fLiT|YyFYg9 z;7fon{4vC>L{Ov0AzT0pD|DCbxt|!@%Mc>-rmQ3Z*j#b+5O(rUyq!wzkPKr=K$X22m7H>?{O6xpLPb+9?S z>H%zi2S>_c{NSJ#Lwe`V0IhJQz0_A8Y$zEuR{f)|rzZ+&@z`K0MD#=nRynU9Teok| zp#FLy95@;2r{b!MA04PKcuP;^aiMy}quY(eFYXU%m~P#1NJwMXA)%C<71FAlaYzcQ zGURDt_t4F;j&fTVU>4Ix`e}s2r7Bdk5LXV$*c=`3(^zgEfu<#Rf^hGAYWZVdM zXAqMt2EU1-NpiuJGdS=2*fna++==G{gj1rIaFOI9$U$_Ygz03|jP6_k$=@ZkjEBia z4R$}+O4229L+~$Rf;mEiFp4=aHJBs@A!jnp9GbBTQ z-7f){FQ-hy>~o@cWp2WtXy(X~BWhy@;H9f9L==*N;sL;yj11BO{4~tib$!@@zQ63m zV9cF6cktq-KWIT!7-%HHf-*czwJv~c_$K!sSN<T~*y@9)=$lFuiztI-t5Hu1Jn zsF)|Oe!Ms}@TCZ+o9)yg9c4R9mZcxNK z5kw3!2pNBh@L@o|K}dO3Z1q=;*=JMwxz=b=ivru}-Ir1c-GC&Z*-C-|>Hv{BJ{>xh z17K%Lc)-8(P(6}?8(AtWKpq%V-OGZ2>v4Mmp*=m))|?`T5(bn1w21+XkG`0-dW|s^ z?N&I9qKH41!f>DrXHf~goTqgtJV%*qX+A=ou?)1p85It@^?q#PTRgVqDK|;v`LLI6b^QVe=LFnyaeDOHDwpAz$={6()rI^%D_56ahzmA zE=SDK6!dWvx4{PuuiJZ|hG-{9s%>}Y>Z=NBt}TE;I{rL*)5SPaYe*Z6IhOKEZ>T|z zA^lQIgv!PebAT%h0jboQSqSU}@o$+FK`+iITAWT{8&oPH% zM0QOn!x&lk2(VZT$tk_*ivE~g+)?v44z%ms_xqmI$JbXwSMEPD;4HoClwa6u3uIZv z89KOlAO(g&4Iv~rPFMic7Gg^PT!|&v_3PVgGOkDbW0z|85uz&-caIeDjw&9+a*-MY z7+f;{5n&bDQG%wHZVU%H_cUp!= zYBZc+b|-{OE;Parhw+|sD7>m@e#o&V`VpQ8Y`qdHPKH70P7d8X4Gd|hwfWMWQ3F6w zMZ*fq$`Rydgp>kgg&Zu2IbX$96=HlNG6PUMnT9grdA@s5#R5>N3CR&2XVG%xZ^S<7 z@QDWlA{KI_3~Wa}_A1>3e1Qz5^bo}wwg@f;+*(FHB5xeC>2@?;|HHV18R!nTm3d<+ zE(vNO(3|k2zWk{8%Y%=&1m=X-HIy(+AmkZ(|KS;n%Oog&U4e%bJ*pM*`{Cyi#Iy|e zsPw<<;}bBHGWwvIK`!;j*c)xnA9pwSH>1*Xtjw zpErwX{0D?LND*I5BW58hCs$WyMT14%Mdmkd{F@0jb!b`HMvnMv)~lx-7{8!$CXP$x zpd@2{HpTGKAM0<$xs!qz0(^?$VX|A+4x zNdKo{{rvR*RyUpfUrX`-zxID?@&DRNw4Wv3`L%|x0XgX#L%BP5sXk0{CdeYy%9@+{8dsUM{7ptLp^tlL^)K=14%^&JE+a zC!cp|H*VbU?{O32`^6BWks#P{a&HxQwkd!GX@3YyugF+G(4qY66jZ6ZC#9YNj+1Qc->LI$?X_7;P(f8O{0sOg<=;IrlKAhx{|b!V z$0OiF_Yr`2EV{c~x8>a7;o)vvF$QNXNyQ*m{L2mNPIb6Mpq73xE$p^~L`X-5E!tUd zeF2CEaw9}MOc{e%s8(Dsk@ZUs3+KojY=pRWu zQ24jdbVXJiX!HAun95_%BTq2y`@#-bCp`Nj=zvNw_AZenICMbC_P3D!4HyI}IZL zIP7Nrk#L1ER=BM_Mjf0lLd&PY)BI2S_z-aI|WdE?s)r z6^zb->>Z3`5YMz)$G*NLd`CPe9^72=1UK3c5erw70FDh{)vQF;24{-ZL3P7($A$t< zu#l7!J5~@M7ipm(r^{Xm0S_NP4Njx(5H0OXlTs$~YCG4RKT7@}-LzJM!HZq^I*Zc6 zEQOzysdWmK9zK6b;swIxZDcZ&V9n?k8rj!%GCFwkFbGfXpu#8PFZvzDt#@izgaXXS z5=S}6U2nUWdalz?;CYJz)-Z$Bk5W7fafb~WS#^l;ao{mRd%|Acgfg88DxEM@Xt+NC zlhb8yTdQ3YQvh^Bz^^YbJBpY0U%H9t#mD0t@V!2U1CZEhaPSEY<}nQx@DqLnQ~tAj zE^fc(UpDC4k!kX?zd}TyFz(~hy$lfGnbm=Zz6SltKXNydv__&O@z4!_4kNRDAcP2Y zj)P0cW-9h-@(amb`hhJinM>)#1r9bYAGd17y2 zG(}qUOsjznoXm%S^$3G-l#FdbPk3Ez*~QhB++Ekwa!~XIh?iuX11W!qWb{2&b>J?b z?y$nHLCFFPCpQZDN5XI^PSZzB8Sj3lCo(o0oXNz72yN%!0$y0cAIXim=i~GG6oL!~ z>!KdQtM}K471_JP+;AC@`#~@b{{lh?M9qLV(BBEKak)J@XV46H`^EEFr8I3g9kQ+kJy8eD;y-| zko)RlD0xb-N<+xEn8>0`V>7)bp8=wW0X`@EOjfAcvDFXLmE*-`?JS&_2)ZH5f>{~y1cqt9_>hA zOiT><3?8Er2?4K`^wi&VAsYcnnLpuP}d0(hBdJl(i9_0{q|4lNZt8g6K40z@Y> zdWZ?aFd$QB=Tt#1XbczM1`33^Vi{p}s>05%@aq9iaFqVUpeUJRfk8wJul)l4gN&Iy z!A)dUZlH$ON(0UyF&|^zF$5wQ85HS9e+)g|Ym$m=+G7pkA4RXj3W5b*J4k{*6RN)& zvJ4-Fejt=fKs-t9)`Fd)5zRU8mG{2_xJ%(+J;}~?csZ+pVsKN3nCZXy%z$7ZEe$q+ zbQT$CPzir+^55lDnP?P%dUdfBD!{tPs_b~inz&Uc6CCATAikF(w`cB@a#dvP~2W4_RCd>3WR$>|009&ng*;Y zU9!Pras<`M5p350R0t)-9~8%N2!~pQZuvO#q5Y+-nA)Xq>aZaVLLrlbvdo{bN`zeS zLd30yjHL$UVD07$fjp`QXL2pK9`iuwOzu`B*BMQf9MHSahH8=rqqiTbT?~9Zghn8A z6{y!ncnUdma(Sf;)`q)LWTgE?VB9w<_b(a$0YOvRAA`>3rnvsi__nXq`YU=Gqr&4{ z5DG9b!U4ULx_<_AnhS!*0ExcYor4APo6lZwBgD&$-imZ$Xz>F{kf%<6d|G#v$%t*UIK9CBj4DX>b0ss97%-a z$}4j5)*8ho5UvElQ)agy(4p9FaMV&*BRLo!R3NSp_dYJL&_I%Gz&Qm*ih}%8nDM{7 z#haWQQ;;dvHa7J@v?}062#f-+rw*4TNjg$WNJ$t=?7AQ-LgH3cVl8ebaS2{!;igTS zkXs(HxC44W?k|Oo_=XH-N>PiryoquJV?vE_N&-_6uH=>p6uU+!PdHEaFZeQ?{w`%m zRCqi-=%kDOQ;q6+rsLnBmkHv4Bz<8J*Oyx;kgD^(NaCeUU=wWQ&|NQuqZ`~CBKR$Qy`+t(} z|M|)Pk9-PP^H#GQID29Y1uQt3BesUp?;r)2!z}^I`Qqp$CEoQz#HuP?IRsuSk|cP{ zw?QkU+`fGqNoGG8^&tEZafpezi;9ZwpcfIUSSI(rVNd05TwX7^N^y64?e@AHOM_1b z!=M5=MQbZ^9EigSA!mKxuam4TAabmdmhLGiCc=)B7f|EV_d0}NCIa@YT9AU_z;!xE zBpIzqgyF+OPnn^5@{DX0`|49>_%_EHt z248BSEs($^b0nA{smIIz&E!?;{v(}Ew zt#f!`=xfm^F9ByaCjSA$K4eX>$#S6Z%IQdd?SGIwlp`Jr6+_0Kpl5`e>Hoprdxuqd zZtcD#SuR(sX@=O1P9>s9)L6iR(Ih%W#X=E5Fe;)HjUXtB!qTWQ_KBbgVy z8Y>`@C`CcU0-_=o5TvtzV}j9qS^GQteAhYGb^bW4z4O`TobP#A zmQv$jD4kvn48If=2#+m65Rmi;cJ(@Xrx=CMXX+1M!XUcs`ahaCFQ$BJ@!^NcXOp!q z{2Seb3bn`Bv6l}G`$1JAjv93U+KyEExG>>8yoY|hCygcJ&yqO=zD`tmbpmvbx-Z{o zqT1MICyqNx&F{VH%6AG4EVrJ(Y9@Y*yndiE#xNZ0H7Otj9Cac7ZW;de!MW$h)?a0A z^zBJ}V7F%fN-sX7eQ}PKjUMIcuY=XbS03&CL}PLIMTYUVXx&;WP#S(Feez*KchL3{ zS~60{O(M!qdbW+X*f!xxvA4yIV!v!w-V+9({*IOBBppa!v>e&9H|R%o2*|7Bwv2z< zy8}1WVY#J8XA=R>z@}RANEi>EkmEezIE(Yj+33Zy+(w4RAh^Ja_!FDE{pW z6*jz3aFz~%<2Z7>?8YW1}0#Y3U5%yYOE!Qaz-eCp*ePj0E54G;k z#i58BdNUr2RwYu#P`T66wGTl@CckcbHTzIH#E%Vka=jWd6rYG*yI?< z`#p$r{qBfKqDyNG_L4lLRg`DEgU)4f z@qq&}L`{=Ua%_QA@)I8p)D)3BHokjQ#bO+9gG7cF+aD>`4b+KYli~McsVyw1jeY5v zGj719=!kUB?2TAz_Am-sh+SmMQ0fuw(>0Xy)RDnUcmSiq8D60a4!2yz zHha+S1Y$4qiV82&IVXUC;J1YL|1NR#F}fS{QFX|(({pff0d98g>|zW917h08zfF8K zN2eDt-e9QW&g)xvyieju39FzLPfm~6;NFSbcGs@E+IV<@+lH+P&oX_`QiXb_zRxCl z$g~s@bg6!_sYmIkQi%bP76}J@DDk-J$SRVNM5{+HK6I02rEZ^JDgLCb$ISaZ)GG`c z2SfB{?VGI~Z$`~pKG$Fs$Sd0MvN_uFWe+QaGU?n?$!AGd68S%qOFv2Jd7;gqJN1E@J~0uH$>eyNhBC?@7Yu^r(dURZV&2I>+2-r?5q z!@gi*p6T_mbft;6P7Xa_TlYp= z=M#3CCEc`EOYx7@WIOSYYEFhrJ*qBms2LoNw?VP8Jk!GO@%OcLb$Z2OI>mcvviR4o z+|F^muM}S?{%w1<27BDi{LW^qjrNv=XK7c9HJRwFqdu)1NZm5CS?0##zxtkyzZ1gX zunF|q$jZAPhe z5c`NYHQZ<)hZq_Y@L=$sG+d+p(HChba5+X&}2Xd@ibiHTR zAdJBK7v?8MppH+$v*|~yxaXyHp<+4A{CMb!sR-WX@PT4jklI-m70Y;>3MWEO_bAMS-%5-UmTdN|FRL|!ga`{UO*dOh-J;YmS8o6PUl-4kv@oM32@TayW1rbg zpZWe0cHzm|VJK`Q$qX9k#A<6?8pWU}3b664s*|ti-;68Fr#P3+%4q&ZNDq}MtzxWe z6H3+Wv0YaxhMm6YRiw)s2?Hu?faDZAonhUqzyquzW0g$05$=XB9W$Y9@|opsQo6U)r}1*{G^ zZ$dnBlugC3lZfoVE}k`i7(H^b+GrwZpJR1${+G?0H)p!~pp?o(+8@d=7)gW54XzW})cTlS zT6Y>c86&owadhx$$GmbG4kt|nY_<-ntQPHbDeZEf+&=KhsN*&>9J0E)}0MHRR8n_6Z%j8@SUoHz8MGon5| ztTB5~ym|)1?OT8*I}BeImD!($iS#eFYJc{bLcFNCP?ic|$NHZfHgxD$1VX>^dr-fQ z?QAO(4_}CrNy(x*OGhQZm4t1rYkew8lG~W1U6I-J!J)5lhC;1pk02{kYinyo@_<2t z3cL@EwXyH$k=O90hRdEhShEZ3$*)fvtC2fHr4d#k8Tk6f{q_3bTefri;D7uQ&}|tV z%@A7R;D+YY0sAm2$P|QV0TnD0;A968i%+CIAhLWtCB29A+tV4Hw5qOeeuqt84gX`q zc?zA=Q?7&4(u6^zh1+qUvnw7@ODc|M~T7^fZsYu4`%h&BiKc zQ2=#3S* z&CE{50~%(1c!*96d5$o4^WiO^Z&Wt5ZGVQZ8q3!|`O3}3Wq_yEHwV1EwUqjilD51H zzLhW8wDm)~t5^Ix0caB0l(QDG)>Jm0#U

p-w3E#Eh(z!Qle8WXL9E+zr;)H?T&S zoh*aBwjBZxgkn1tqJ;G#GesqWLT+wX$h1q@cQT?=j*pQ5Cxs@nqDtqv65p5Z5G@KQ9^`(mx>m%;DOsQ6d%>hDJ-XsPiC;e^^N~XOHVfuFBZ$<0COH5 z!Mn+h5O+9R?^5r?3#WR)SDRs&kZAH1k`kLwTpQB&Kz{2t>E$MdSqL-$^=n-IGWZJ7 zlL~JnsPo!&HQI$dMlx$}Fp;i`*(BZB*YGU#Akuq~2gm%Q#W_cx*y& zD>aA)CI9@83FIhmi)M{EN|U8*oD1CKe~a90>?r`D8t%JIML<_x$NH-wMZ8aaNOvZg zUUBA&>XEY#d_)Xr;TwwXRGjX!uxaH) z&k0i%pB~-IXG1~%4Xp@p5SiKJ1AtW`=b>HPgY?PrRNYsNWyN&sRp6%9FO5H?38-ho zeoc5*{=JR%kVVUF8V#1S*Z$*KIc;3;$pmcL+Z>Mf-1g;{f0%r5|L!)I>wcoel1Gv| zi|gSdecnvx?VD}l&Aw}Vvz;zBauAtT@&T4p@^VkeiTT;h|M3s{UWy9;@#z|nN6)32 zx^LFngnku`r2L`p3fzvp4laUv(*Q7Y)P#-hEAb@SOjXFcc~?SM$p5tY5bcliNuW2sj&o|TR<}UnY?=gPfA} z_@I~|WNXueAkI-hj}`d%hV%hJWGdVqy?r;$uNGH~-}1r|_{Fh+xwUj``D!NGN$ZB) z9w5JTfy)RaY0{QajpX`2060)#GWVP^6m7pFclgc12m)z)f1z z*vkdFW8N5_;aHY7Bp;+=1@Mw~syS!!D1exPa0IgNQq?Z$yodFEkTCN*VD5EvyH8GT z-EsEZxo<)3i|@#E0NIa{ZxI|RNqYVo=%IO^%P?C&=x^z&d-3|ANYE9j38yXMhhlEuUDs~`<1Rf!C)VEdQWeil>M2$AUtHWBYsxE^`RZ2_gKzs+Sg=mNq3mN+!Bd@cHx_%2!Xw zx>9`L2bMUW|}I|DpMf$|BR9%NMFb zrWR@zwG+HwynEg54kiwLhOIxPv-rc0KmR1Ou6F{+X~;FMj<}y%FBdBQJHdaMa#EgKHaqull4k-oj=4>kyf90K>jP5C)*iDu&xz0;1@W)Map>4DL9jePbM?{UdOT z(5p0Alp}VITmG{3c@|?II5g}gS?mz!s@20;lQ_9K-bBWB++SQEcQD zo3ihM(sH7HT+_4Smm(kN2brT!l-^1GFSQgc`yHz(vt?>a7RX#&sIY04!FfZO6@j}# zpaWJB&KX^9NEen}uJi`Nq8+d9`abP)JFsWrxIM7B4mElzIv8O^SL!l0Xw0oe7`}xi z66%BZk+C_bvoYZuZ6*|?v~b2%z-Ab(s0b&r_kb;zmNTi1v~4}bDUu%L#f>RXJHzV2 zg=Cqyzk@%OHa_l1qDN}~^GSy*nXVyA1$&#^3syS<{umjpcYGxjz7UG^Sar*B6t?!e z^`MkV-PJAg_i8*<>h7h6rG^_4`gzt2>91Z9@k3>+plc)UKiaDjV4*y2Nk=XX26E7& zQ1-y9iw@KCnZt3@DTzKHs6SNiz*9fNv~PO~rTWv1M@PI$QTxGR}QpV5uJfTlUdkBhz>Hx#*!imYmO-dq}BEb}>P^N2yfP{bm0#9XZ z5jA5db~c&T!PB9ua}=7?G_<-5o}O^2lGx>}1{kFJ`kF;j2#PHSwNZu*2vB7`%i%>b z+e(@~s*z%4ex`9agy=NxnAEmHu+rA~JGWh2DXDQkuU-3?JAlr!hE5_i-nInkd$I+5K;<6Y@OZ9GYG~tOQ0oF&P3If4iuRK!D>ldro1{&$|f0{D^jC0H{ya5 zu2z6ET@P{wAmhifW#7a6JmHN5KnjZ_;2?(?C^9JApn@nSlb2{o_+s|2k4RcFE=%BV z2q4aosu@8Y6*62_i#yeyDLp3{bUO>JBxp>`QH+$8o=cuHacz%Kd8oj=e4^?z77AzBjpA;mq9 zJZ9P1EohluJQ62_G&N8ZNGDc-8)cgGF==L*a@ConilZ`hT^Jk|Pfu{LG}EW-9u!u% z=tJnK>u4f_=3eRj{T;=HB5QwZL*9%S<>JuxyqzNr`igYSFJw zo+L0xoRT{hGiYGEEIdx*3Xzr{a)Vz&zXRgRV%JD@ER?Cx$rMu{a+xk(z_>i&;UII- zwf+FE_f+CY$RuEsH#KR8;#}5}9KnIPRnt^^bM`@WPhsMPF7+DTs_WtqP7HzG@)awg zXq-}9!6TK^U6l%4t`MOehi2*~SDxvzgTn7^&%T)NIPHZ8$6s)m-M9RsYFs3(i|Kai zD{jWkE-SE*L(fJXgA{*T(h~QC29b&IQg?hForHOQJHc?g$xzK%qNIL`g9ogGCgK< zW#J*%4w<$+12Pb@*>1ts+7-{BPsHINgGP=Ak(}lurOFWJ0|{Nk@a9qw$Yx|LLA2Dm z@+pigShqS|Mi%n5eyvzq0o)0J2=^NbH>X1Y28X8q{v1eWK?;gydY-WyscS}dlb%mu zUs11hg!QMT7{0>{Ei>@_+fe$O~+!j5KMR28SPc(Orl@d zB^ProVJxHtQajR9kDAxTc%eC`!y4Pfq8W)6CZJ9)X%A1;3e&U1dhho>Xyq}JlLK0L zHQY`;kD1&TUj{|VzzXIf1b}Qpu4JCDGRL3J}d z&kSxBB)&7s8-^dKDcBWNFT1yK?Kq|KO3;TMbEvQt>tFoQxhEs&|M=Q)pWT!9Z!|P` z_}4eQ#1f~cR63!F^k_irE%Gg)i4>6mE!I^(t*FYJL0HAfUbxk;PNhFSt zR2XtSeKzdnNv9;5E(Ix360BR$RgyD%vomAb2=~#QhTV@*iS3q=2q6@pQ}*r&3=D)n zw*PAPRT7(wVuVodP@#Q`RO(CG2^-niL)HI634K|JU=FNgzk29QA@yNR`jxP}$kWyfNMBuhT+AZLiC%h++QCz(Kt zr;TZH0sU+eLWqhTBBf{Cj5<#YZj2{79yS zM)OX+cumS%vtNcyAMf;X&{o-)4ADvy_#zxJ=FGttx{A9~~gZ^n)grJiyh`R8A5q z%>NQzudnTWADd04pX|;4-jut*3#d&)Nk7uvsj7dz^%w?o3F*>L{rLNS?e=ENloeIB z$Zw2^`gOoV)(1xBvoD&@e0;HMdiVMtzn*QQ{W7Nr8Jvfh!3!Czx%I29pU$*d%BT}X zP0d82R3Ve|pv1@S$(AFr(c!O?0-fuB)RwH#d3&~{=ocH+*}-=QhCaUC)3V=Fh#c+0 z3gFCk#SgE6xy#aS(mf2hNsTJa94>Nz(ohHZa^Ej*$GfFYKJ>*c;6|&9UfW|=Vk+A4 z;k;!(I-9~RJGTFDfFYSO+JPfY#M*x{w^_4hmchM#=}U*C`37Sg({ff*Sd5&kV$0!j z4umxgF~15E#G)!%!XgIYE0tq71x!#$ksN~OfBFz*yAK*6HQ9iZV{H;KMiqX+-HgdG zF;!ch!f){eW30$;Vq6qTPwcaNhdBw6ANJ=^b)JwCAKvezAiB3#@ zmyPV&wu_5xkfVp!vla8YBm}-Lir3P*Iqynws46=mP$~(BE*>+@O5>w_vLkZMr@W}j zaa0!`-f1HX!R5CPbxh+D9gOoywO*SaP&Ne|KYsk_sRvu6P>vbhywgu-4b|$$$>Bts zhvffsGmDwgm--hkcM0cN+=90&yV7H8_{-|hId`*zVf{TwrrEdbr+veH!7uZqgPN$J z->zM|uroSd4K?{;3Y&)Ny}o{a6xN2MuW^$M3U}z1)w>SJ!$RO$9%h^NeAVmIDF-{r zU${NH8-HWcu*2*X-w*HWHdM#+sY;cj`pn?qCG0hYWm- zbgC(~ZJ2S`maue!P@b8S_1fs4S$3yOVp6$$->}GdU1q^t4ViM#<&W>R4};ypI?p`L zSclKrwKKb;qow84r@$?Y`_*{=dU@nMDR6Vt!EGLK;I1oLhEhdfig33s(iL|CAA@oA z_Ej1<$CsDmo7z=Srau>DdZbZ{`xZ&nW?=WzmvqXqhE#-xMW!&k*$-z_WORP$k&60q zrRWodk*A-f0ORF&Fa0jH%a=U*{mZt)UySnoZn0TkN)%hM4-}O3_>`!b$OX&^bcNp- zN{IZ)#-q^J8vI+l`JPpNwK?_YURJ9SZ)Ee}#>$59Q>TVP#w3q;ecLCf{?!rR5)!Iq zME=gC+QO#{HS;IJQ0N@LGqhf8+q+dNRc6krn%sQz$JvC$+CqIV%ZL&(wC(d+@}~(( z15=YuM33A|&r*%`;*0I7)D6{YUB?8@+eD@eJb3Fptyv#jDdr9e>$uNdm7QE{zCF8c zOoV=Acu2^V&P#7^pPP0$PDd#M2G=>f^6r{A#;XmagE})(U46i+d?K|4TPV4q{&~a0 z-^a8~LvIKoC_QA-$2F4G0MO2D&$2Yf&YT399^hG;Y=Ca&dajxFAtHYjFmC|h=Qe{Z z+jEZBuNya>07L317sfvhPSJVTW+(&#U-6n(UAEU@pmBS{CG#c0uzu-o>tDsu=}F12 z-LHJPzTmpCei{z)z37SI?hz+Xev?npmh1=rh) zFBfhacR(-&RFlU%$icP^)!Es@8sG?B zeQm@=`~OVWanlgnTvR_D^Gg>Yko@qMs zG2`#7m|pI`^^5-9sZ5p$=SEFD1R$UKp-#Xy^{VnPIeLYaV`PW_(4h264~vcsw_VdC zUMk1&cy&Dj6DA396}^Z5tD_DJ&C>Q9`SQZ29zaI1>A9ItYukQ+qV34aS+X`k2BN_4q*KV-ZsECs;-&YMjHm}s9JLh`su=ad;G49oMAQht`B#M_VbT*jr_7Ov~Ka2#t^ zS66rH)TyemT|5*Zjx@LInL8oy4ll3J53T1dZ z1zRDb+tOs|-|=Y6`WX(0i7QYU519Y^zP>gILuEY9x6M177H&8TrChXaOq=9SO@?ph z;^wcp{|p(tz9y+*7+!{Sytg;_>`k4A4Q69nM%Y4*4n+H5lKyb}fW<#BxO1;KPvnZ8 zSR}-1{>%blhf zxI@=@JU+a$v@EE>bIl!nEv@OF@_ZN0EN~NVuEWTt-&lFsSeQ=X$2m;E$bX3i6f?~jYixxF})D=IzG@XU|+ZbK$9b>k`;DVJp2Nk4HYgXI9Xs<0YDjGR#Zk zoi`>#R&399HXz9Fi&14eTNSuPn@qe?>`>k&2#qdm_!hyYwu_``WF%-QW%H0byX4Xj z9Z~=^Q`n=|K?Z4^`>|NfcpgVhCMexT^f5IxRe1D`!mnBoyl0_A@~pUNBL|(ASTAn{ zaZ%Ob(=pu7t{!<=V{I%>OKiMpS{fg*H6hadXm5|15&bFw@2eO>e^CFMdH4g{e?9B$ z!w>(ZHLFjrf9J-_Nrxb^Yc#P<&D$UK9;@+8E7ZL;U6y>H)x`k)&UGdCcl0l{jR+G>3{o9JT7uBmYKIdR?f!noB z0J^51kb7?iHOL=i{PeCYqH~oxiZ4oPt^ZVR6tI6d1@4t@@ zpJ{5k7nnmeaDcwwNS)sm8SeJmahYr7>k4mECv(BHk?PH6=H`CJ z`e(l3Eoy$7gAH|eLv4D)RSNjQV?Z)a8w=dbSWMcoN^%V~crxdEo7e?UX#_oM{(Rq> zHlejOB*ncF`{b5x8D4s2P=zgJH$`vKx2{PfGo=dCja@oO?0v z%q(KD2Nf(Tv}Quim4+|8*fFa{v7P-Watsq(D2WgqwScK-ykX^%3%~Z(YN!f$772*0eGF3PLEw zBC*t$$nSpU`8hW5PNL(X0To_}K_nRG-ve-3Uj1=>XFp6?_Tbxb4fS;_F#~FR$(D-vcl71TiFn+cv?1Y_1zzScWxi zzP!>YHqTv?;O1-{F<8b9mDYnJ52nPUTP3rE&s^Gd2Q>eb#G+I!@~X-84JzZG`YEc? z7OnD;-gJV8fR6W?cZ!;El`U+*iYfzWFAseFF-ydl91;JW1_}6s1a@929_%N3Ii@!w zN3Y12C8?Rmn3Zk=G#<<1|=tNl;R)NWK5<}yB7Ld%wzN@)u!>J;V&-? ze{uE$r3y9BtXDTE-85*^KA&!&CeX{acAa?5peR88g~99FuCBN2b`~Bns#`$If6z#Q zbgBXquj*-q{Xb7GGU?l8@r94$N@8+L0e41%ts@gkQ~Fw;`YC#aKa?M*9qjU|iS8v+HC z*QsD9P~4WzUqiW4=wmW686Z>&=2Iw<3U_GREc6ZkeQz-=z=SP}bHXplIvB5d^(3Snt_?$nu^=4gSka{ySHS`;V?)_;ixLy~$lxJl&2xz%(xAOV_$kVu=T;eJRCLPmrLA zs(@3PuE}=SU-{VBw~hxG_eYS0>b85Xv#%GY5~fcX+u4^#u7Ow(2w5HSt25NLNu|lj zM4Cu<>txPd-$O;zA7mclG>l6Pz4`~xSdq*aozWDwHDz16a1&b#lWY6$_)H8X~u#z2kXASUc zJu|$_L<1d){QM9Dlrn#cje$o3-E$mX#|Y+3wXJvAyX^i3MUYYNY~frFe#8ShM(pW9 zYwGj}XQ^)Ww%1ss4=yDa25M-5jZSkJ_-XFBNLaHjI))@vNr7r#Mk#5+?u-z{%bkou@ZLUdIp#mEwyCw@S zLORL)+MOX3v%<{5aFo4<#IswAFL#!KxET?rtBuwa6KHzw7s*4$MrMRrEZSVw53%+% z`=EnbMp;=VEL#i!i@H<{1h=)nD*r9{*T!ghpRkj3VSAlPmOfK4PHx^^#%7wJ@+`K> zPl-(|kI-G&P&LBRw{PTfmlss4lL*_T8*OX5ORSVdh@hz+m_^IxT*|1(>-C5AS1yPy zSV*p>Umn=}N=ds%S$;!Z{sQJ^jUPY$I@8n36L;=8z>3rVH2834t-LaLU9vxG< z2+E2&-zLmONP2ZGIXTZY*E!Y>Yk^=Amq+{82Sb6#ii1B7AiR88#L_d&&L#H1KJU;i z(GW)$qK^e-cWtP=Ec1mmiMzoXG?`BIdvZn+GV5H_d9zCmj!F9WcTAg<-1|s?ifA*m z?TA;CVK`K5s1Xu{UAS_NI{rzhpo7PF+Gf;5EO&Tr8#KOH!Y(;s!tQT+lZk8ZpHq)r-==fv z*$>)M`OMy&To&(11lEKLT5~yx@XO+-xnO!eX28yw?v^qyxwHGtFO{Xy>Xmv@Dq`o9 zBM{uKy_7&Cs0gaO&(;l2{H4*a@3Y54RehnC4W-yZ z$Dc;wtMI6Oc{zLe$@997H31E`+6;TTjkILI^IC$@B|0Y>UwohBq9DMyldebJx~;yA ztj&8`lYPkKp5YR4QnGer_m*vQ=GR|;J)RnQT2OXaBRYcE%x=|iH6 z39-M7y;-Eg5W?S?!!slu7=xrd!Z}bYSCk`oA^Dyc2<_k5vjOk{NAW7G_(gD-=~qYU(s-1!zPnfF6Rv z-HHopv}u$L@=n@|CInNcR~8y$2Q+&^stGKK-5DUUQa0vO zKCt~TcPFyf$;@iPl8pq)0nB%Ux$qiCLtxpS{Mudszf!{@H6#){!&fL4F6A1Fw2Xs0 z9eom`?<%=l5UiY?@}9MCZ-nG znj%L!{|@+&n@Dgv8b9X8&hk(s!xm5M68le~kc6wGkm1=b3|u*uFaAN>;a_7eP|CUl z3e95_qt#ba`6BJ%8P3P-kWO#gO^_EM3p@e^=->P&#^rZ8iZh7i?LO!};4~szHw5(<-<^|AU zs=55)G3En-`qCA0^5l}1-&0a%8rxWuIrq^pAd3s!2HA4g1Y1#h(!HUqFUhahv#m<8 zaAARkpLmWZMI|I+MBj=DDoVq7(4)Qk^htG&&5N;oAag;u;ZF`Q=fPVcv4>cxQjpg} z9?tjHcgTEQV&tWt{m-2?%NN{%VHB2;lZ@}os1ff5nhc1Tfu$+sX3hYz+~NOzUn}GN zQp1ou2S@J3sC!q6_qpeO;t?n)9lFfO(0Kg#<9MD>ABKsiH7~wC?qAX8$Q;6vE0p|1 zR9(DF0+&QQkC{aVuYMm>M#-#>evWThUi%cvSnXXC>-U6 zQyL$Eg{Qgd3Id*#!4XTZtt5MFF5ws)8;97mx*9_v$4nhq9%7o6 zd?SO9YQO(r@`;G$_m@(VOa%6Y4hv|l0c*m^2%pUziHL2$jHXh+7|d`ClCK;d?zE<* zNOC_2)@M&nGdF)&X%u27c=-DD>i|@B{Fqy_1Q*eTF}0_59ox%al5x5aROow2E*7Ei zRP$hj7hpw1f7?xWnT}F^!FHI-(vaQ3O^%`Y*BtG-pQo_`UyCE5RRn`8hQXyHx?wGxl+=IA7Df+8k zZj_R$iWm}AwWhgN$T_*kL3-tl_c(ltbDwS>IBeB?TL6zM=Ili;c8Nz^DW_CKR|r;D z-OT$`>uv*WqL~XaCmrtDv!@bcqTgJM9ZD50AdAQOxT0(CYbRYH$qBbuYgB4@*H#dZ z)~t>36gz?&C=DaB67gUvl4%RP0X$PA&s>@(Ro)>8AEinorMlu(7;0x{C&fTJ@$H+Q z;NIF(`AR4%oU|9V`wHdSahQz5*ka5s$&p{m0kAQ8GN@;ivjh+=Z@oy83~f3#528Uy z<~CMl|H#24O$}*8$)6@yz`JB#N~pZQQ90FPLN|hs)it1ZAk8tdqCMF41E?2-t%xhT zHmuLq{a)-@pShba?o~=Q-{I#s4`IiWx!?aBaWxi4clfcDtPhxriHyIB-M*BsALYTp zuoe%M9S=@=PaG@W<)I-V(;oy0YSfTsDKRwGDU>1~VVmfsd`4xc>$;`=KqdlGndrIl zWwcC5IO|I##|koKlbs+46p?x491Y!?zB5jKrMwQc7Jcmo7p6#NgAZABmKR3v=*-&J z3TY=e9@Xn>z%d-pO5Lr^0^mXy#ufVUH6B9lg+Ba>e;6}DD4Wu%1#Q#`rQ_jAj?*1K zLJ)QOs)`e7G0*ty?3;-WMd^IQMKRI5Wbmcnr_cOvLMXzw9$?I~0kf|M;GGtlP$%H0 z(NaW75o(DCU|~{S1s`a>bQc$w8_xcdT4hv03Lk7*by3OUD0t# z02%CT%s34-9(_rG1di(wAl;?CChXcq;UCOL3s!d(X@2Q4-d`aH-3tU}$IXQqQ>q|Y zb(C~|)(zSs3mby@I3%jBtkdZ&#|qCrxd#Ei7ih1zWXPl-`o5!0!@jb1K(V(_%*~j!` zi167TBxhu=X`9-->(!&rCOiDh&hI%bXt}jW6B!*!<1>?s?{1V}*e~J5Zxi+oH((dZ zWr+AFt*0n-Ra+Y>w}P`GQ%?Q&DwD%0AVdykI|}sN_a&J)?I3%3zBm`qY2~%nN2l56 z*4u*``wXbhYDmA~L)3_9x*3b1SjI-4y8r9u4#OIs|GudA-qUhgQ{dLnO+Clzn6uZB z5456^K}35D_o}$bA<=*8DlwhiLIr5O`N^927bWM&JEM*Ft(zu5sp)6(c}=Wr`rZFS z!qNZqjl!{cA=IwVgP61h+-r=cfV4O+VhPr#LRHluR%X2qc~mvtLQJ zR4mpdw1AKU zVm|fl94XioN;HWuYpYD`C#QaE_Rh2K7F{~-wUAQ`-n}X+vuttLW+_*Wvmu{y_e3%{ zLCs}ep$}lzfVx|Rv9PPLP=$xZJwye}oWP*DJe2&|uf<{B_!BiU`uN>4^4dhz(}c2X z2KRpc)P+Y9>0T#J3ZS>&G?Nj7l)4*7P_bTSkl3=#u8h{yr8O;;BZqthr=IB#(>Bu4 zDF~i{I?;#EbAD0BzWsr~vLHXwdH3LcCV?~J7J1$ax2~#M(Vys#;%2dQXp}ksBJl^( z1V8)yLVc5V+hV+H)E6#o?HaUaVSI0a$0DIv|8%aI)D72$KG>81JD^nIGU%yf8pO#g z1B-U6`=L3M{j+W3_u{`Vnq{VR9!dANze1jd>{`#~EAqn<+(Faak2}f85rF_CL;NDo z2OGCSy|JBB6B=klpID-0Rx_=T-tg1g{TE#wUvu%|(G#1We{t&gaQ~`H;UXG?Nm9IK zpV%CAx+#=r*hIUmPZx%~ekGXCEK==2H)N?p9o*fG=xdxeb6=4R+Omim+W3X#N?YtV z|J+lOx^kq^ftB#Pp)P0GPD+&&rAkqp6hU2^C51+n*`A>}CM>P>pWkotw2YM1=~Tu> zN{sfZzD!I{DVOsYl8xLe`s(RL^sV-UtqFbQ#RC=aF193bmJPAE`RAKxbv(=k2&u)& zutoCK-Rj(6AC_a@Ze1@WNI<0kQXIYRWoAVp8Z!3O+ZXOR#0z^RZ$$)DB#S%>scIU7 z?Kf4J86Sl}a3>>@z1S-_u2I8@+0|fkbQOvR=b0dZn8~l+zGcVAg>KEGjtP%2y!OR; zjY6jOEbJqjQ+6}1!nwH3(WYEWQO`~s_kss>;c42#`APZZTCja=FqQ(LFysZcpiUX_ zFz60STGP+j-I+P7(-m>htgbK4H7~*|lPt_IMY=%6hKfZEe()s+$2Z_5H$t3B71XD* zcUgC(%h~su7mPKko+5>dUgwc--csxAKaXZM3li;)OLIXyb9%80PCgh4W>0_JKmVEc zZ=usDFaP|BTKGQ{{r{)2`~Uwq?)`stQ2Y;WrtK9eeZ|XQALwQ-lp-|wA%=I~!maLt zUTTMUV0OtG39+}X?u_D~pxOeR%%3DsM2{s&HLZYEkOw zXA~r19eE6Fuae-a6bTRr zmtgoC+54vMpFpI=Jg~a4M4i6STgY{Kwd?fe8H;aU)?1GHu!d{B;7COck6- ze06$Z2n0y)kZYf|o2lhFbLpM5?dHJkID%$Zk{dno#-&2Sbx>J?OJULTi!?q=!`+>C z@9YfUojVs@&wezxKBDOve3k+qw~Z{RNBy6Q-zzbP(`RxDcRNOE3MnRaCs6xWnO-$ncSd~Z%Okz$kR6~*Y55>wM zHb8_Olf9gt)oID^-*!BC=PzZ`g_)$zE+SZ69Ww85-TnI+w=OM>wzk}R)pL$Hy={K+ zZoU_$xH>;+b5gW6&Cg(|nO%WR_LQ7ruA}%%Xx-&BO0w_|N)>$PqR5YkJq{z@Sc}g7 zPy_L*k(Q}n+YkL|R%G16-TK4F(g*LOplay7M9L(q)^A^6NjHtKzX_bj_ZVCK!ZYPr zSTt7N^Po3iTS3{JJAA_Qfq%Y0n)@t?W=My8p~P+U-v%sya#R@XHX~mBF0MY91;-L2Y@xT8+?XhJwL=LSb&A+Mk(5JS4<3 zCu8nm|Mh2Kn_#09S)HZloSe(sg_!+m+p-mLRJS29^o8q0QHDjRlj>-zS1kvHy}IFn zJ4r-NaTjeJQE`P99v7yfAHh**)qRQ7!^Us7ufz)RJq|wA07lr^N1fnO){L7H)jH(= zLpkHr467Hnpk{dJ5#{NO9nK!Q5b@pstH=$iaxtf0?y|y(@g~RD9&8E)+_alNK{0zz zbi1z9k<@{hP9$^xuYVjZsx$;@FUW!4KL6#5#%IWX?`}L= z-YiJmDS*rqcUFn12lOo>(FBfIlzAm8`GuSnE%hpw374?&j`!P)o49YribFNpgJxg& zMUl+>(moqU_#C2QY>y2{>}1HNW)`yOn|>UlIZdhGf8vi1EPng0PQT3XI#eTAnd!LV z9lM1GOIiSgw+bs4@qy6chj!naw#v`}W2u#V6cn(z-TS{Si9A1+%8H(_urrB>cOsua zc!VD>0i{k<4WhVELR?RSDMMzROE=Ih)U&M9&%`==sextPj5??41SP{ByO~Q=WSk4WmQABFpf9w*i{UxkTi^Z+Nx#NAFB8*@cxZ9$ zh^4jwPe3ZS_(b8NrZO*PTSTO$X7{+iK?h}V*wRTVDci-?rn*f!G9$vhArwFB$&(be zTOzvsUXNa44uicPNF5+?S9$c4kZ94ck!c%!AtND-Lz&N&fs~aioqZDJRsi^ zB*Qel*xM@jzyP-_ORI-cQ(vJ54ghwUE@-&(I&mkqpNG%A8dlg)o~Klx04tRA7z-0` z5yMfM!gr5nfr#Rq*0pK1ny#5)`J#$;%OW)Y337N0{#A{YMCxooWF{lZP5H7xx0e@3 zgl!hDlq6QpOE(gIR*d|V4w{x3sT75BMEcEim9HBv<_jfytxCa(XT}3?qBj=#uWU6% zzTRhFHr^B`O9_sfHC6<1kdw;N3q-Uay9kjiM5(yf%D^B)nC~1u;2D=}LZFefCJJBg z-n~WelaR55W~>g+$Q#~XgIFyzUGyL@s&6N|@{Gj_;!c-D<|NnYhoQNZD#&Q(KQsf& zS$D=qeVYXdWx5JG#lno6xWMP40k3WSFWzpmRL%Abc@bt*jA`f*BM>_s1*DSUAWm6H z3d=d@3o>t<`ydlzKJRC}3v=MYGsVyT#t{^Bh6Zf&7mmQA%k;M#L5kO!(G!Vd&m7@Q zXLU61BSMYV&E>cf^zm!VvPL6jBmWWDRkfN6rVBZsQr+IP~C5(k2#qfU$SPA^SI@!!QzJdJIJ$tYs)!cm1TY}D|wYlC#7n6+Ojo^m3Nv>}3P^p@MqN>~^6|2}rh z)GL^auOn4_jkH4;u&*Z)=#U#vhz7_j^k#MrGy}tWIY_+(WV53XT_h zm3Tx{$RVZ`l&#}ceD#7~dThA@tKlPb#-z?2H@&3}!_#}U^W4&5kgoR3B~!%e4~H&~ zNWfqjBB=qDUz!lWVipr!8C2v)`8ilj!d+n7PcmkNEB27Wb97$I#(Tzp&zdhd*m5Mt zY44(ZJ%ZjX9$6RSmK`X1`twubgcQoog>EIOZiFf>Wqisar?+5H%j(SLk<-q=GF8D! zZ{wWVK}I#V(g%m!9H^{dFN&6=K2tHH{>6Hup;>Q9CBG0A%i% zL?4mN&+(+sz=V_|6@j*c^3N~t?L3;-G)PkuhjuRocG%vk5UmBUj7XABFVqe=+>uF@ z7xjgy{oKLw?Q0`!crC|cT$G%F$>gY0M-IO!^a`SqLN{qiqDa=qt~^@r?cI*b>@)pb zQ9j*w6y^@mG1AIK@W!@Z)SysS+?=yjG(Ndk`~4bC81i{HpR^0Vdx9QFwrhlE9R)j2 zyvkTgwTz=>TkRyxU`~pioci`=IhD6Ce{qWqC;Wy)4%J*HW21Xt!R{D0BT3LRL3Qm6 z%|T(BZ#65-B5HNI1Vm9~#lqjU_HOg4Ken#gA%cN;1D=X4O3aPWG4_AG#7w=6?=%}* zWB~EO3Oq!X(qTEcF;Bz>?vztB5wzZJ1e2 zx+_BVHHH#x8I6s&>F2DudRDU_aj~ifLq!?7bAK3G+vX-?^aiTu?3bx(f^PAx!>+@4vrjSfkMZc_$`#b&f!7zceivolhnYVO;afzHVfl)D#LQc97quACcmrnj zF$dlVxNJXs*-sU8ZlqMeHcLB#Y^nkDw<1n-_RZ$ZfH!bddU*u1E94g-q3yJri!Gg2 zi@fWxe2vL{GJ0ox2l~Kl8aBHr58q4)TzsiLvbU9aAHeZU_gg*1D5+VfmW-1ujDCYG zXIMVrfUH+9DRclk0CmCUPn>q^?y z=t>$rh24@dl1JSMtA-dz;BB-W#y7slK0hGI5FgJ~PghVp`Y8SztAjRm-6*|jv6DZg zX?~zEe>A>6FJLok8Y9G!IVo^Z_Kq$H?`vl?olPI|r8v7326W1aw_1_>-?@XF{b%uT z9Ja4+hX_DClLOBM1yFU{*yqFEjPBPYRhXU-97LOpm4F+Xz!HwHgsM@h;8Y^qoTR8V zH;*~jEa>l~-q*`zHNGYFEK~P-=5G2Ag{-Gf;tws&Ul@QAq(YFGg5(%P88nbE*gK<1 zWEj=@Ke2RSngbzXbh+&b7_lW)O7Dx9cJ>E1g*=B-pra!8-!)J<^O?5=z+{EZw2hu} z70xd{%Rs=HqN8U(&0n~@-^m=&GERO#*=EZHNvDRo#k`!UXW^$aFn^aXh9nyBS9e+7 zx>@wmVZVRwG%%lTm0FPQI(M50I^vU*?aUH>2r+r|x^zIjF~msW)MSVma!MrR zSyt1CnevF~ELyv}iRcWuhmmH-oGk{uWZz4}v$WSBS(sJ3(G)LbEj-*nyluMKl0g(o ze49g7TxID+o3SmEWc?84(~wO~LwYxEOz#d19kZ*tG$e7mBz7@$OF0nT#e}W?j+Y^B zRcWvtA@C@_^( zW7-T_BuO;w92PWgtXVDN0efYajp{+lf8z*EG_5qekBw{(=$%3)GhD?ECFl^WQ~<_- zCVcD{&sy=GafIMWQCs1lX7Jr!a(L|I8*jeUg})Mp)WQmBb-#f-<2Ef(f?)-d;yDw1 z0xgjnE(}qIJ4+L*hu!|*rfljEW!x`L0h^Ni7IpqE#8pf)vvjMs#M{0f-aTb!@_FYa z=fs59@{>{fAHGd4H)mw#-TXJ*b5}_m0YNmdY8ooLG|;49*ifI}u!r(t9tD5Lj&IcL zv*ricmSTJE{KMwSjV-57I18}soXVfPY) zA;U6KM0t}`B8yT64Plg7=7Y4+^k5t(Lt{lY@9NifVfDBJ)Zy&H- zA>VN%`_2<@#VugfaVje9xjN~Cq^{OD$wW$$=8TBwd?vhWy3P^3sU{|BV11*@WR z0FyaVW~VDi#*Too9ZX9RXX0PpbmB3~79(SKNI$J~q)O9+^bbYUiPQ_}*Hg)yLfgXX z&=@d~^IqJ}Z+-@U8=+SjK$BG@?W{sd5m6o))<|!slukVV9Y7AVxI4@bNHKW(I)~ly znVrvGn$Fg{2G1&9I3|tESk%C_9-_>e4g`=131|eumN6}D9`usc^MJo)vf{uGh)R7h zIn}s3-#EG0<4H!&n8tFQlhP*dd^Z5mOVS=)NZ?QJ}Z zegat$e`4;`za?N-%8pT>g#7s9YABzkpRcyfh&^8DJZ@)9)$gDn{@me7CQ&{7 zWxBMIXy_3;D@}_f1jhshX8I_kF!MFIj4Y&hoE!Um-iG#X?!DHmX`h8_SbX_TV(bY3 zL*D|;3ptm7=V0xO**?s>QY05G2JxRbzLJb2cvCE}C$1TW#f#3`^fNsSbR`oRq=QFf z5l_tcSCKO{4hq1hK((^!WR zwV9O~%0RFib0>~nTP5v0QdYCFMAr1ct#N}P5vwo0eTtLk?}@nU>Ps@Wqgd0Bg1It} zQM_H!86=#zklC*5-+7Irv)}4;F-^)BmXQNup{k<4Go5bK?lPS~CdYtdhKbM8g%KVF zG|i`z%<9rUev<%&rr5vmOC6ZdY!_yJ%wt7Yfe&T&2dsk~%Uybe{YAz`hlzHSeIRnL zKNH!$JpcCo-wb8Nl*wEZ9+=d!Qr!y=Jdg<}nogafQ+U^O#QD1aw|8FmL{=KUIMGif zWD;%lFt+FHTPzwsooG>`*S`JMQJK9d^QE;3uGI71VsXCVjA_~K-7j=x(*9mKyWSK- z@>y9@(n_5IC@zI150BPx`!X(tnomFD_}dGzI>TzcgMmO`3_W)zvow=Fq$b7w6ZbbJ zK$L`e^e?9q=VP-*LY}XbSwUzH!GHg^GO32xn!CJ4*kp#_Px@=AFEtlYzp5s`A@R;C zSzfe|PX1d-FP-saFqnma5@MEwag*UMzXQP~{hbq`Q_CN%wh)g-D(jHUBC8Pw;2||} zDt~MF*Y|g8Ip~k0NA~WPwM--8gv9!CwY^k*bnE#j#EmaaZh{!OkyrUfC4c)&dzJJ? z$TWn#6q*J25w1}%S$oKUB&o>7{rXoLuh0B*_Gl53KWi&5H}X)dczxw@vHnqp!uj+r zaC!Tfa(`a^xo4!5Ytjb5GRW)n2A}eeoTfc7eXEtBVpx5P#0FI&xPCZI&vNUjEn+UM zqIQrC*COtlKi~YUc^J!u$x+v2PxDWMY0wN@x$)OuTl|eDk+C*q2pLBhDj}F&dX#0D z!rI+3k-hO})*ds$KFRtk_Wd<*h%`%#p7;)Vl@>NFWP_-yYOlKDUz_*>d2x$yt^^zW zB8X&*H&wX&MWq)klsFHDO4I6Iw$|v0P1P2EJ4zM5c6SJDk6e<(28moUYe%Arp<9;q zGzJkDkJfu@!;3P5KeH!6Oa%o)fy%fc9Pze}`P4H6lmnBE-$rz&Og`M>=8x+ShlGSo zzVUW@zG#prz>tVO8K!cTjX$+MhM8}^h!TDt&Um|rAlbG;UELaWTr|Sj_p3`25DoLZ3gigx9weWPOdBtAE9LChN_?jf2u9X9mM15 z5hmolvLd4f17hIwzjIy|Gu_ODThq{-a1;YEvFfgrwj!9?kJ|UQC~=FTv$bx4C*fF(`+uiKhtUZv zR$87fyI-0t^v}GSnphF7Zub`&^-g@cZNvMTAs7IU#kTm`D2QishHhpj{gNgx7} zo8nMCa;8kf3LUnogNzk>RkhESCZ!5zB8FJ;Ppamky-JM@MYLWsk1p9{ z)zm9JS+gN)|BroZWeyL-kR}?jNK9}W(&;8LHC8C__Z-cr#PfMt8J)xsba#z`L3Kb! zDAaEovTY+yFl%{JQhCFP+}@f?&HU<7L%AOGC^oF~vNQD)D`$&0SUTGTreyOhQ7t}m z5fa|vsBpy6yGUoG463t;NoS-2I5-(3xdT?R2qR}m!eAyb(8fPNygTGzIi^`5$ygjD zQY>I787K99VrTCTaJ!8~74HwdADGsE0S$Jtf(q%snJ4Weosc#(ktk%zrS|p(_)9G# zP#tL2+9?9MQYFPSij{9R!=<LS-GwPhE&BZ_?L!=({$`Z#anIj^G zK1ikF&P&B*fd0J4NWNjj1U_j<23&jqkZRFmJ6VT4|0FE?lwJPsYO>q`=YXY7=U3G} z=xU)k2_|>PMBPg3@|Lk?@M5NioqZYKF&B+oSm_&>2_{;iLU>1H{1eVdaj6)-`w(r0VkS^>%WS~k#H#y ze;otCW=g+>7$R;reo~5bz)C-bl(;g8fEQSmEPv~lNq34XSyJ@%GkwC9R`ICXvqC&(n8uZ z-G6Loemd%Wb(R0iwyWk%wsLxs+Md-W_2 zlBaBvh|82U!Q+F-I88lX*N)-H#|dCZz`cF9QOpfT0@ipx8QbRrMjRo!rIa>Z>mH5O zD2XuJwJ*1Ng+Ke6$KVjYdPHz_merz_`*oi2p|fBZWh_qVlQ4BKbIIbE-Py-PRcZep zn!-w||7;5Td;_GVxBfF3kyqlz26UBCmNFF2Pa9@w zyH7VV8d!fWy_LhpiFyBynYE`%8VN)^-yaO*nAIotKe#W8X87EO3JU&T)V+6Hl~=Sa ziYCF;Fx`KdolqMh` zy~7)8V@u9C_nmj|`QyI*QxcHP3k1JJQDg@^LlfIN%fP5IVncmQS-7YWicye(k`9o>7m^x}48mLQ1QOj4 z;sTQuNOW;P975f4YK0()s?cjV7@Qu!TasVv#=i@FP2~PW`sCp6$t6QLc~r`8U_jq! zt=Kimf*dBP%>gFH6<~&AB9XNyvp1d0n5S7CQ4%B`Hwq}uf!jILvS~&e;A4QyCop>X z{+qBZ9sLDFn7Bk2+$~RGk`aZ|VpDs@P#>hYmxMQfE%3-Xh=iJ*Ab9E-K#~W1qZzWX zKA+YaV?KEt0bUU%pmGMOlHv$bhe!ZpqAZDW9a5|Y99kdXILWp_a~~g89tA&f&;E~y z;)G=|Z6|GNke32+&I~(y7!hR`EO3gr{X1;_VuBnZDQ<{l@DvO{G7azF?10ZBQYiTz zU`DFwe+1E#ra=UX3E|)%*M8{vZH%UH6F$VIT7JJi=z{=bz?b<#IEp5UouIi1OM~ol zas=Vj2%#X6Y%>lHi)(e@6@?LC0Ncri=vsXB(Nr%IS==94ZBKSapi>Z6v9TGc)TN%s zr%ypRBm$*;!l0U!5qTc6ABiy9XEqn1ogrjRsL+IUj(bgSA3cLq5~+GAsUxDe)SX&c zy9s(rC9Qv_rt1oC6J@urj$h4hs6>O`X_)Y6!KM)p=g;xqCE)I~$JjT-AxIz!qa>*@ zl9km3vjlcSCP+lr2NlN1B(Umxu0LAtC3vz2-DpHj+;1;OA3`x4oNlu z?irE3B9ud-VT%?GA#j>3hMoV1`GH|9%wO(??qDQYdt$9q3WOp;6kMiV-yg8E22jvV z@-Acopz**%ME_9TO4ML*L!>r>1pF~`umCy)y(fVRkxk%hl=XkdlYKCpS`N zM73_8DF4@w_B>4XJ(2JsR8`nU7ky2@!$K&uC8mc6?GRQH$&YwxSSOQgCI^CR%0F|t zvI~p3+>Z~xjj^i|N#Fz_;R(}fPr=zw4ytZwn>3o5Ttn8BXtV-X=W`f_=D|PN!uJrY zEE4rHvcBFb-M&-JyYkzTGh0GQ;*BtH4t3>%l`jPEmShA8U*n!)#BH+qK6CEn*h;KIHNLDn2&5xdFMA^1B6nhszfnwaXQj~Jo? zaere2j2cao<6C-_b=g1Cs~QW6$UE2*jeNX3p&2CVng%q5XCQIZW9T_E>c++Fo2(Cg15ZuwB%=i&0z(QF?&ywxNdU}! zMV~i#|MGbNadwjgpCPn2j?Fp2d)_blA@;U2FCpZjnpxdsfpB-BUv?gD79nGhXr;si zM>B;=Pzw}#Cz(e4v+lp4Hl-3*0F78rU@}N=V}eMK z%1~4sSRX`1{zUNZt*~>X9uMU99Lo2^`qi6J zC7hCLVopKe_&>_t!$UoA$JNi=A`B-I-fkAQhlM7&lC#Uc&BEKO|H8wMC5u6Ey$um7Q^{J*v={<{IA`~M){{LeZ3zwzmG-vUXzjvc)R{TSrHSR^iN>&UYHt)b&t#(u?aTVtrvDyr?Ti-yQGbFYoCkT{c5)Y8@CGA`-7z4 zO6Y!z+o1H_g}l`52tr_@Yg>2l3r8H^d5S~JD}*#?Ld9Mp>^ppLWn<%BN~6H(2M3XL zzIXV@5hhmFWVFMgQ13=X)!dWcOd4_F#8f(d{N;)qn(4^OBB~Z&URx>&kxo3F9sSQa zdVn1cZE%q-qL(4ZF%EwX*yp%{I+lCERDpRN8>o$IZ=C9M>I zIm%J3caGnTMn5$4(Bj^=@6^hbE9qCSmNbh%)2ZH(b(b zpIg~m$nY)yd&g3!SdY!lDL+Na-D}40?Cn*V2CHsy2$c?=w7fz&apEQi(3_9ZW@uK< z#LOJjw`k?KO~2L?qE}e*bmB#(k+RG0v z5$ zO@NnvCPU`v(F=%wsL?PyM4da=0t8lSG?J6Y@R%d!7BCY)Tg)+OMyQ5h-tz4;+urYY z88;O+Yj`swFk)fDWgaHsl9B0#HX5VQ=R!NpBME3GhIViXh-u@YQC0Ht5*Qn4bo!=@2=b^enC>sow0e!gqRj$=UF zKFtdWQA$$r-HuX00F_m^xGFkcmPbhGqOqAVn#ll!Pk;^VdZtF+`}p~@{e{a!>X6=B zLn#V$6HrUI$zk+QyU`yE)JT>{BssiW`i%$9e-3GObzsK=Z>`9sPQKN)2LBC)k|Q3> zeKaFRC;9){a`ZdFkOs7mV}X#01N46q_x}W|#J|ZrQ?n-Flv4b`Os$}%ydkIdAr=<_ zRL-G8Zp7^BA4PaF8_O4DhykO$Jbm79j>S&F>AZmH7#>)Nn6Xp6z;Sc6=gUnl7ART1F3 zC_$-$ar5Rx`2M{_yd_nNoBKHRW_Y-3l_c^t96ai!+23|7?Y^{*GrK+MM(9&h;O5eg zj%?sVydpIe$?{(GOdJ$wc+3PN;&(nQqm749PQj>@fd! zk@^PG0C;|W-uudx*U%M9?Oy<1%t66#NDO{glS;c-M~$@Rr;yOl1fc#GAd8d(#^M-4 z#bO*VMHx$$EHQ4(NapB4p1Lf<1AGK(G}>rJ6ng$^lq+75jc%U{Emwz8gA9RKj)0%=XUb?eqmXiDrvIGx|W+D=S%InWutNBCO> zUKA&Vo`InnIf^j7T;OK%XsAk!29Vo>fqnz(K|B%$ecNsmD!4CR2`tKZlr{pP;5m;}HQG{T;8A~@dn zueKA~Fy56?3wcB*UFblD$HIJXYvF?EwQtN!+V8iyT6cb*ZKwTEMEHAOlJ+C||JXGC z|BpYKdXMC5Efz`=7&7sQ{j6DJeA~mvvml?C0LYl0o?aP!>5+Ql+NE1CyY%R)ucO+* z-a5O;EITQZ4<)8hz$CYQouGS1I}f&OIfybxby-<&(rOZUS|cpPUZ_Y~;i;j8q;+gL0ifSRnj`%s0q;pbGY;ra8t;&H@BF-q*H9@>h8Yr@XFJZHZ zmUX3F;N&k^!(xk0YO0}sw&tK{uwLb(LkN!KCQTujxOsYyvES<3o#(QcRFydIDG#6pLab@%hHn7 z*JVD>AbkwTcpX_1HeHnu9z0;!xSB3}{l<+O8&d7U5;&(`s=Zur9I%qp%rI9$dr()m zsZES8SL(g6+Gu2MHf!duA+pk%X}Cv0qU&{FvgWT+4BX!B{5l5r@R9M)ZY}tis%1%O zvMFBb^39E?SAdQ16grueMsw&NOK<&Zxc5N=miqTcf~>>XmUa&B8kHU=-&C-CdCru> zUi8#xfEV3aQa}m;{GppN$9G;}_v1I4&0nFKB~NP2+eeg06cU!`wfJ}|&sN-uHLbRJ zy-n>xb(hwkr#~6pFX3zoLF>Iw=9q5-^O#vJLj$`Ow}#e9I|dTB3Dxr!I) zkD>h0iGONx>Q<0D!?Mq2WrfbcgPwp$`6#4KW?Fy59(`|)c%Y2Td%t3#o^tZTPcCY} z&!e`#j6HrsoYkfJCQpuCed^AI2EWbS&FsIfjZBpfJuhl7li6xK2bJORiSDWx4GDu2 zw^QCJKc^0myYTdR3fE-}^yUj7?B}5LO*L6Vn4*m4so?XIp4(_vzB?l7D|t05YnStf zt*jD`C+&V*Q|pU*>i%{8T)Zxw0reSX^RqUCeL1Kl3VA1FwBTNz*SSGII{Iayqm{OM z>8=(xkGWx=g5IVxMuF%8Y zX#)bgcqp4Cr;k$+W2ohWYw&T+PR7@R>Z`fERW%*ly$l>^{w$vdNR!1knuYhJ>{ z$9U^q8sT<3gO(aV!{z+DZ+pcT9p;C{VEOpZV1hNET$a(#W$tt3N^z3Sot5^Hc~X~Baipr>v61`W9m5eJ&4 zq_VZVX4L;I@{i1JA27^N`SOtXuVt|qq63Y&T1{S!)ad(jtqT>Dh}^;j9YIncbJ(P& zN*-d>R+E9I11JN4{1yeY0gcY zm}u?JTIG87`BR!9_;#ml;gkcG2T^quz`?|$qa{2uE@7nU;GAP%KX|`cE{JzBwbXVd z$IkmggQ`r1MO{#xc5Kw;)!K0n7{ti}`Tlk17rry;3HP1aubDPREsU_o{QW|fdf&O1 z$E!906!?1{9%hrD&~v*|+DA1~K5S=EGW&jofk%5O%E|6?svO+hMI)g_QKD`$#xvof z^gpdp_nR6~OK`Sp-ec&y0~#9_`|3<&FR$)Rw~X{&{$oU9Y5s-pi%7_=9NfA=s~~R^ z8bD<9OZjc;+%=h0pAvC6CF}hoH>W1^WVbs7VgB6rc@<4Y@-qATlb!2MyFQ0uRf_6zNtW(i4i?&646=JZlcxY<|0|5=#I zu(;idZ=d8bH4{}J4)lM%{Oh>1fTqGCe`@iv;YSuvCc{z=Kc_dHV+GbzOat3w6fbEC zT~eTSzPm!$JfB~Wdltttd^^7`d$z0A6zLZC<<++C=y!M&?WaLM38@@AcFg^<(iRcl z?d%b9h5^U^?ILC^cWh$ge9m$znOBXIt1znAW9^L_8tYiyiW%K#mHS_=kN-Z7dD{P4 z=9c-*l)fr%YjKs0Y<|;jLUIOyt&!nEpAXotzizp5$S%914}R*eB1Mw|pKBAo{PW4j z|6Uq&{|7&sf>;0bm9>|pK3j!P=IX0)i$b-kmi%D`dgbE*yb1Or~ zePq;t9x$?R^EG)cui@LWSG7|!U76T5N1TUkytM0a2Y-hIJwDTjH-t9C`&Yeh&u?tH zyvAhV-7{HHR}u9U5q!t4zQVCnp1KPsjM@Bq2D30x6$xg3)$?fv-Qvg@f-i=X|jMog>SzP6gGnC%@s%lJMN@Z=Gg+EtmQLAnZYFv_1XxfFIv&X($ z7ZoKrP?&y%^49pNk;+80LJL=tl4tYK!oyQbCnh5vEPeWPZ@!5-rD{IHy`*aXaj0;p zL-(!Wp$Xb{zwcx-SRhp}bEo33#QM#HKyOFlpLoNt4T36WRxp^NpRAgS@DBGy?TzjTMgUpgBt-~ z*+2XFzytf_XuZH1Lg z?wQ}1X2MO4yvcJJ`+Q4d6xcnI;#RZXbZzrA82b?UX1k0kK0BRVY`l#Br*8NZW*s6G zlm((ZTX=*9;w~+n7?r1eKKbJ5KZ1cjW}V6r-y$&)cdE638V$o|^!%^2)aY9b8{&+E z__GH_CgN3uXLJ{+V!Pzs4;|_qAFJb4dU&p)dt#(IJxZqU;wItq=9#WO8)}53jul%8 zPEZcnq4bd(uf}of&XUd}H1E&S>dtpY)G)@;i~hiDqeYQU#L1gZ@B7|NPHIg|^j&7} zZTx+izIo>Ck;ArVedcXi`!TxTe+}Ckb?uy&P3EGKl9=At>ie6cbr_fvj$3}(sI|5t zlf>8Pz3|y&X{r8WU-8u4^~!7$I$Os-I6k>vhq>@$s@7P#bJ?$M4hwVC5bpr~?!fA2R?kdCr_WEuDD;}c2dd#hg#jlQ$`xy6*-+@79GsLSYA?xX&m+s z;n}`@d*L(gUs#H6`1vWiqnhJuFh|C>E$Pk1y~m07vZp%Kp+za4@BVd;WY97!!i+PU z-n&U;%Wv-2Nj*xn?H=#v9hx}dm)PQX#OLzr09(_L$-wQ$xOZ(m1h=8Z#dIn$WxPN^ z3N5s(x+|p;&!-#rN?P1r3XOOilVkNY>LwFZ&d{2u%Q>!N0j{&N6^&!d=sayI|8+u$ z?*9ak%;PHM=pogwrv;-mv)L0|bs-5QUnYQ3G_tK@#y{3??|ECz8oS5p{n316K?wrt z&B${XLJ7S{7AOIS6F9nJq5{~0Z{A3T7q`BPnC^gsP{67+@uD)laT za_BXNBIp1F>g@gp4NGiWmECMEZ1P$b!7n*tufBZwlk~!an|BGvYw5i)z7*~xd$VNs zzFn+M4-Qcobz*%tHF*>okN4Qf~h+{>|)a6qKWwBHCwMK$=v_ z+-~q=5mly+LZiyjM-p?WA{LmH(&IKJ?G|Bgo>HzSURTuqAn1q0&C|D6On(*f)hiR> zKeBP!to_*3eskW|!;;SoFBL_ZY0}qDI%No5+i*2eq!PWT5>U7)OC>xLGY>RTgKpLZ zu*iI9Nu(|8dCW{Plefea>+f~nl}>nduiUAB5vc9(gSc29c@S`HADw$npT59XF0gdo-&7Q4FiiB)*o zZMPNK>IjoDLpa&`u^q@S-8sn1xFQ149yOZ9M}m`Q_wJK`N4>9IQ#*Y4k3TM6RBSZt zQvPLuG!g(Mvj>cKL$=*?9qTh2la199<5cfxdbwV*aS|_T@xh)R^5VB#cXeY?_MCWv zll0F4kA4$nL$t1j8;HXbti z(D2=1&cW6c^2YBt071oyjRng9f)ec; z5&}Tr;W`z=QQX;=Xs|{i`i^*uUPOl9gWbZc`%bhWA*klpCWZ-g{Td7xNvc8G zavH^w#L}Z;ifG(VvF;%E&3GEan)YAAVF#ByK&R*Yl+8^Jtxo+>L*TI#i<(wmr1{V0dVwjD(Uva{*G8GktYL#h3aKcgp7Wrk#2jYueLkiuNE1h za$WbHTWd!{m7+R|*n`O<(TT*O>?Sf((eWV7N?zaZbq|{C`_F}qm+IL zBSjgbB5*;Ayd^dFdYX%@^7QMd(3bQ>1R$N0lgFt91^7GPrSd})`JKg=r6i*_$vqsf zjpH!F8P<@h@8jU|PvoqSRxG@Qr|0n!vycO7RMlultD29E6M5tB6cp>q2Ab1ddX!h_ z+J~6yW-6?OKk;(Be|k8l&vx`xq)_fR-WZ)o5kExHA#sw3d}oUX3fieR^&6onoDVr= zlUUFzPa=V(3fS#$d~=y2)2Xz3sOXeAYuEeX)r)JPRgo8~)~%~(Uh$2E z^?2iO`5;||DkhHTESk++X*_rCT7&PtDYknFy7%Q( zv1t*vuGp0+!)}+ORB->Pc1Q5Lp}5xlY3Bj-e}e`*$Ih4PRp^ z%VO)P>6e}DC4;*^e`Z_Kko>p25$%Tum&kkv2>f6EqwcWQ?^XbAR>VeT4STI3>}q~# zkjVn9cl!q=Ae4<3>Okev()SCQh3}>GwR-3xS}DF2n&4R#!(>&XW(@H@`V?(3^=`Y^ zSd)IS!3*t0?0ugs3X9wA`M9?ozEK{Q*y*v5*f!i**{Q$uVC26B$#Sh6A|O8KB&wT9 zm}eR+^yxOtK4-3*5l12UpeLTlsw?FF<$^jm8qbZ&AUzTWVxaTpvWBwso5o`*%`L(9YB^F&(N(|BsY!^8*&e`9Z z5`dA~mr}r6)LBv*v+Q8x;eXD(`#1YPc#@MmJLIDbWvYK2bzvd2I50An5O_1OgGdPxMKHb+H(WxM> z)~FPf*Qjz%Z%iLfNtS!Qci_%=gGzD##9!&|?7quEqt4LQ9lmfcALrM-fAiU8PRcvG zKI12D6FvQV^!M`aJ{gf|mWW5Yp6H(8F>+0OA7*XT%NNAa0)g5-Jd>m*4^&Bq}`%xmP+ zaY;507e6DE>rgk`9TBAa+a3Fgh!mjJygCo>I+sPHoI88g(8dW_2E9u(Q~l*>@BDK7 znosHQ2vfdGVq>^y&ig^mw-P-)8sfGWZqFD^K3>+l&n1I5!-R_hRIo1f?6Xu>^<)!u zZRCJEL^Ouq>NVh~@ebgLVq@6Aou!p6l*>)WeKZ(N(mefl3+d11uR3(LTjXmd2o~PS zdi2xTxzXOlhFsU|@r6u3 zM0F&i(b}!TriK=1Y?*MKo%1W?S3cTbbWP>PX3ff;oG##2fN$<2mAs*DJ=IuSw{BKTOv~ zrEN6PPmWFA*Zh=fHr2qlUb=P9Hs#l)fhwc45m<%1iONbSdHZZ+AH2kDt7b1sw3mqmY9~SaKfp ziYHv7>s{;hty#KHFEH^rXU1}I7K^iq-cMrqMZjs+s$VBZ)z{At3Al-8m*Q>bren2_ zu>mqMjBNXoYkRWOj?Vh|cX++vGyiwBS0nu=wP~(L&~7k2cv}+!wuS9iAO3yW^w`BU z)~1;z?+#nsFnG8}-?n_S#mk|g*Y7X0pL*YtuQ4=SngI5I_l2g(o84mm?oCEUWUh_p zpEB@nT1SnMQapAkP<%Z2{-%8=omfZIAa97Z3BDIsp>| zK>9QiiX$q&)u+aOjMehl&{DV3W1dF{{2kf1>7}_6+c4 zZ^ScHotajojxWrn-9>h(e`?+(hKnLe!%}$XguX-5suDiwR@;-&o=-{!wnh_4UA21tQ zSBHn^X1sv~k7wI3&CCpv!2&YdM$e==1c$c9Uy(PGU_V723q^!!oDvHfBAPRYe6uUrL>CwoRvkTy@@)i)z>;kF2UVva72&C;G}I{khyWWOSdOG)~L$XB7lb{E=@aeiz`etzU~!nv98Y5h0cgV_Rwoq;(z4Rou$()QUDZ_3(NPvRmui6^E1xN-GDG zW@zT2j$#4s zNMZG;cA9Q6QP|8;XSiOs=+(@D|{jTt)Kofyr3@+qS!DWKT@=9#lWM8_k)3<&KaiL8a>a?CkSOqx5v$(p^;^@>odegIVp-{Z)Q!HBC#F#o`3D8od^AdGqE5%?oWHC6gx@YA~s6^Tu20 zGn(8N_<0)F4Kg*RS0mF^Rl8A$g8~9b_5nkknKQ^yP3wYIE_ zbi!ARY|H0Kx+%9AHgxpP(p8+e62~nOdbviy2hs9@tLz>P_Dl_rMD+{L3Y+xa6DW9D zBRc2JRLvye(v?-2c|oaWvaZk?rz)dC8DaYK2TQbi47#z_=VedW)$^r{NRt-hF6Z2YAAmsG3ClYyU zM*AM{i(&K{(p{sC1>c!iT8@E zB@USbu9bIL=a zJ{_2-_OcS~MAPc7q0coXk$uAQ;7aXiLP>ONdtYIMRvuvg`+z65%%O9mNTb{pp5o=J;inBTu+I#hnCrWx*P{ohnZVEL)l7Mp^$$9>C7TRa$pv zmoCdsX^xLSGp^4+G+b#Pt7o7A;ir}e0wwY^s+-m+)VPelc|q|9!TE>Ps^_ckWlOT7 zfr#PPh!@^(8eU@Y-Jg3?4%kQR|Ci*%$s;le<2kYtlhFl5vZUwXXnKk43pru~%W)emYKfwIeLH$j_%1DS4ugFZuP?oxM z`!-c1x(A2I^3IYCApEJ!+GHPQuqI>JDGl(d3wo=h2i~ZXbbGqvyA>4Wh8x>M_d7l) z*u3ZHJg01&L4;)YWHUUsEXSjJ%@Q<&cBz2NM11i8bT^ls98q)wX8}=)f8S-Bis*<> zId(N|^D|@ZJ_52-Uc_YybM28*bcVia7O4lCOZL-Q#-q3PW6_|n&Z+U)PZ#gm_eMXO zbR66_@$OlJ$K(U$)$9nNvyL(?xV+dvY3Xc?(G%Jq!*ZZwF8lA5{R)QNvs2#9si;fI zpsWwbA1+bJ(3LQ_iYMkM>ZCe}Pa`@|sox<#d2tIkkQ;+i{In$R&#rE#o@4Xt`gs$F zgGQ)E;dBCc4;7R`G+guNlF+8ArdjARtz2D^x=<2%BwfE(wO_kHi^QJgLWU)=$JhSL zoON?-pyLW!%<=S?IG@Y|+!{d+E#Ca6s=^MuxOEbDg}ddx%*^i+{0+RPF|W zQ_kn^8jlU8n9S>>AENfQIK?VMdwq1MDBZ1sg^+wt#xec8QVH4RKn}HmHxw%4T*Sh0 z>OQw=BQRepqa%`q3@&tn4&YlpkZsm=Vs5f^A))8NSwkynI*#J@ac-Wj2gYjgyrNUA z@=tzQyD^^g^=n4ArQEgvjjnU;mk*(uQ3{_G&fRlzXyNI3`;kFEY_F(3JJ=4P^--4> zapt&zTMoRv(|w(rf4Ku^k1^NDCPS}QG58LMjPO{eO=sN0#LvKoAru=!D}6ftKdoWp zY8GZ9IE)5|J6rl^Jfd(pzFV@a99c48N`}>s0$15~7^0Pfd6ed69%yoV=&R zE#|bw{$5Y-{nyEBl5QO$XBM8;cRVa&WLz8Bo9}#UYz!-3>nfbb0NsNJ$62|W_Jd1u z>;KiVu?!!gbLtrX5T&rcq)z&GJtP#}Tgwh~ep&wqZ+GmqX@P4yQE*?(@sp9^q!w!J z(QV3~Ll`H&uuE5nN`}+}gKJ&QPO%8bYxL$_158?)atHk^3SNi1&vj&JZF+F_;^73< z;?hn!5gF^;QQd%wTi|QP!zD|HGqlTP+9<&2&2cI5iSUVFXNT5)w|tYKb8c4U!qjLD zR5AIk=Q0qrn7yyv0162;+VjB!mHD$4eOEVziv8w!{@%XrG6@x|4#%m=dU|8+asoZz zU%o+Lp*A_=3N!3Q!8vJEeQoN@zH+{=i zx4hB3R7Dpb#(WVT+&nU?ALCYUrs02437=$EfGFM*bSdv%TPIjQc{Qu z+@LofEL=L%AkHSAq#|RbnaO*{)`)m{Z@@+sw`t5wU3~ zK?`ZzMM)5{yyH4Ir`TW*U!#S~C&hL0?BICfs7>z^^`Yeq_>X1J&1+N7)!tVt zYe*LzSLyZUv+dmO*U=m9)NcrHPge1SYV!P?9aym~{dR+SDzF$D)06EDl7@?g4c!FF zuPC|aCv^&^14n#s`F=j`J;RTRSQ@fk&p1yNF7&(IdSDWCs$e#CHF)UeA!7SWtF-!y zd`mO0>sxu(vADux?!CfAOw1;?>GM-Fw{1;NP2BnGFO~bRFdd#v;59!!BWjfq z_(N<+GN+2HR``S+_-A(IVHUE~4HA=gl+Wpr9Zv2@E!TD^!$ynWZhBG=pJq}-R>;Og zFvrLBif&)zTVlBnI6(wQfZ3bl`zynp76TTeLf}9jTIObK+!TWRDk(wpHY}IV0lf6I zF)eSjkn?CQt;z4>&vsRu7k-1$5JuP7!K)u5k*h@vGCkiD+sVG}?hh!{j(qw9{~1QL z^LF>?`4AJgd@Br|VwL^F>6{PZ74r z-BmdU8@q*6-H7P@o0Q$%E5h#Kq0hl3N9}F4@iQLl=c63RGxB%X5duQod7jCe9PCn3 z+G5U+B;%BhblI08=+YK4zR42Qr=^W9c`)9}RsQ~CMd6~<=w*rOE}2DxP4VTVy0m1U zdD0I@E}aki-KMo9V7fo8l7oUU=p@K6ggC^}gL8b7dOVdQ-nFkbXKYY^xuW>sr7P<> zDg07WHJimJqXh>vA0F1V>)8hG)F#PKo*H?d{@~#0f4R1Nv-s)OpEqs;PJeoOjR*r< zrVK^!vRb07S~0j+Pq%E_qbzqfif4K`-4?0uDN?y_7m>yDPQfP$R^~-$$`x-z$_98S z&-qkey+hxJ@tkwd+Zz=fZ%t3^8RC+abUaD!z$O{S8-+`9^NDW&P(+U1L#wg73^Z&J&2Nl}xWm*v;%s9x z1{=Ik`qUvrbG}aWk|*g{u_A)o_H;pSR@#{y-hU*bXI%m-nz;WXCvay`G}g26<<(@q zpZ3*M!&mD|thhB+m!K9}o1JDp)wOwQ#q#ji>2x9zFDjBS(%_Q3sm&Ylofzw3`z7z& z9(!q(svz<9yJI~_N=?cd@*E{@M7LYFR2vg__eJ4yVYA6cfknGigj!Zg=#$3 z7rX7yq51~b>cT<-_i)57@_nvx#8=!oq_ObiM`{SU1zBw?FAf4NIr}BsqwJ&(aWfm= z!0eP^K+-As=I(Np-+HQg1w9H#Dw&WD)0{_7!UR$MB6M%3;9z_>s%yV<4kQNt4V-uH zj(*r!@tbA9wns`JaQ;hyYBSGlHGf)kno>`y9^vkA%_e{L!>zOs8E}2h_YocOF>Ep_9L)tKrNd*1o#h(n+U{hjH zGU>ISbfY;7+PMBCe_Q+vNKv%1!N?;&lF~q4_4``xmw(h^Ot9~vCUPzjQwWcPM|9iP zgRtSUOS}qNSC$udL3EHkDp)5Tm8~|B@v^#uIZJ~CdA1pjmqR2 z2nEOe_18~!Jyp`^N=9n6N5Ng|gBGPKNSq^Lw4Mu(DY(j+@0eUHfpZwJte-w#JcLhm zMFgosCs%`lgArJeD0rlDTmzxbPki_6rCVRvDe zbsPnC&O@DoMeoIxUxo7UhyK0eLr5Sn@hBgK-o+Ga$;PcIdU+C;1nYbR>k_y4W>0wu zwjaI9taFw5%x(jgqF}3SFIEY>Se5dJh|cvoeY=MpcI))aPhHK{!JidYW#s}_nYqs8 zeL9!-@(+|zk`Fa-#2===hu)ixZT9n-%_ zeWj$tG$QurADnwdbQKH1_M<2V@Tv_9ucjyuJ(%^q^Z2K-K|1VV1|O9R1h+iR@o*)IISLoDg}TmEz-ID>i}OIjG~n(k{IL zTsA?2a8DQVzC8K{$3r?nVk-nw{C~Ac`>jmESFo<{J!^A?#0#JG*Xvk|9B-@u1NQ1h zuHVW;d(FKngDeiFQ7OXjZvo_Xyfe+WGf zX4oR|s}NnshV~WL9b9!R@BM1Cq+^BN51x6yl|7#7DjB5Xzj^1*7K*a9wYBezKe%g@ zCr_qyduU~@FMaa}2ZxT|{PRYzCmZ2wZM>C7*EpI9chPto^xOWuw0QQeh@c59%z@l2 zI#Ai9g92KH4Kya#mWh{rJvgM(;TLAK1oDQkV-z$_xVLRPObX{wVV@4hsn_Mp|CtrSN7HN>d4vkBNZ!et zH1P;@GA4r3W8mB-sKzTpRpO$K>yR|zrxre=+4k@soM@UoT1}syzl?2!mzsh; zKMk#E=;Ln{HmN;9`lz^HUVg_DUFfUP#3|i*%-GAzs|OODs%U9CEVh#_>paZQ%UxJ7 z+0vMP<_%7)o=q@g*EKurxR0~6Kitul^szZto#^&(A>#%9Wdda6WGNISR9cTgls5q# z4yRbRc`kig_Q-MQTt2!>dHec;z0 zusx1`%Qes*g^o=uq(x0&YKfW!DVj$=-VVJ-5Cqz~crSDcUf2M`Yd+teC2xma)~^sA z>NpI|nGDnVyL8($T&9UvdiycMeRUi$b3|;pz2=DWVi;G8W z`_VEInl*iBXr>IN-TmyYOH>Q1{nsbBaixGH38m~e* zG!bn{rWi%(v|O>|I>=Dx;pp*XxSY58pgEZboC#V)$VcHU4ov5F2o#NLc$uKtjAqPG zHJ6OjG|!fXdT9XdAbt1lT@GX&Q&p!xPMxDI_ZoFwqOvk&w~~(k7olpXS>1v2z$YQ0 z?b{v)g4)CHCD|bUTe?9@+LoR{ zF92uwa~8`o;c#*d;=LTO!{IvF@DoBaL(w6PlNZ>$ruG#szX zu`_vIRKzDNJT|Ml+>_xP=Y0lo`luim1TiTZrXGHFR(60v6+ma zG3*SPXkvqY-ni!|1Ws;n2phk7873YYg8F@qcyud+b5jL|eEtk#71%?vd+4lvuwwal ze%@MJGGF%5Kw~cjtT&>|#U|nWveViKdI?ltF87d&pWtI9q2r7kM4Wvg;}Hu*K_f^& zCa9;}hE7k6)coWTYM$b1104GS9B2}ufH{RiBXe&RUAP`-tdIwd82u3C#|y&=0(Ype zNg#)lMF&R@I;TS)+V2gv zhA_3|`|bfR2QgNLI9I5mPsCNwmTMG{qxV|kfd=O6x8B3?^(hq`FV&A!f{yg*z1VSk7S4tV>z+X zFPm?@Bv*SSV2sUArs|>5TV1D4Cg{kTO+&PwlOi?y=}2HZOOcW(fTR1`iReI}jrQ~{ z?dYd;9|O_@uk%yO(Jl_`KIk~dLkl&ft*vdu%89OGt6xzRqz?=>3+*#+io*!^Knm#; zv~Sh!9$g9lSDu>6@yF265GhQ;%=7e2GArXOQbidH?HtrAzXHMLC+QPj{mACOAV@cFfPsFtM?+W8D3}7#$&cNvmR1s1pedG#p5Rgm40m0B6iU zYJGSIJ^So)AuDDFIno|9AyPtN1!UZI^}K!4A0m5`duz{W>4Fd^KS(0S&Cbqti1K>YMvFH(L&vd<_)nqrKj_K3h;PA(uYFmRVI zB!=>5K_&u|{XQDrG1v^z4S$3P7Wg2y)X<9s)=C{s30|R9Mr=pN`9ViC8(Jm-^(J!A zjiRBPF^Jj?`7W6p`q>eG{q@)N#{OEUZgNntvZ>KH^yH}S(>+|Q;*PtExRyW(JQ-4S z3W!aJvQ5Q|-Ll7C@O!ohd|rBVTEAlChyQ>@ChO8esBN7f%w(cQdV)5@j_pzB-B36` zCNMHGl7@K{bDeyUsSSOSi7_aroBOt(#+q~pD{2=MN`w4SsGpzT9JI~PR8(WKZCHx# z=MBqh<~!R3>Xnt*4>TFU^rk|!r@pML$$TQaK$XsrM^NzfKz28q&tHE%fG719K2s8C z*j*yN!WJ#F(M2%We?z0^JlyjZvyg}(%$){wxry)D;sz%x4eF<1?IN`)x>C?l7#b*L z*-tdi_%PY>!MmT7l~q+&S9ZoM75NbYwddg_Jc0yN19}S>L9;AX_`<8lTKqu}B&sb- zZD9xwnS&FB&xYC9q3HqbmEA>FA7SLtc!F_ooIGJ+VWFt69!*%c31><&_FpaR%wT@G zR3!r)v)C8&M+Ug<>6g=-46umrABtM*d|5=#9XfQV7TSVnj&ln>V@OCngpV)yw!ekw zSL&n6yhEFjRltX|y3Ztsu;#8U%i9Q(8>1Gl{2ZjNeuyEZ;moVY`FH`RR4QCnjpN7v zn#fxJ;~>{<$ER51rtnp|=W>>WhoB@6qq7fXPd+|ABkq^jk>kg_5MjC)H=J!DA!CEB zK+^aOA;O*(!aDQO@cjAUkb3C!B*W1`=R9H3hQ<_nX|-60eQzH|X+wdX{J+K05T^q4 z7&?OwOU7O>c)_fIekjb$L=pc1j+=UNLafxq>JQ*ndg?s>F{J-jzal+I+kxmWn7c4P zljiDlYieo`t_vQQ6u9IgxoZ#2{?jPZN`}aOS6`nA5gZF)-ORkFCzSRPmyoUCED z&m53UcY*F36C-0Zbb;ll;%t5nx^Q+NAS8k@mqsVCAe=4tQVQoa5Fslgl((O_4hRm8)Ag8Bt9mH7-Nq>dZdGp-%9bJLNM!sWg-wCst%Y5MV7VVJwRHj4%N-=P62N4aztBTMKoo9(E=} zzl1p9q?DDE)!_Me^t}BQQfE6e8{KqT80fNIVGhDqeR1_>am=-y|2!?0#szSK=rds= zSN!;6cTX@DPY;R{QOzZ+mUWoxhH>VDK z3$H~BO@6}Re2+r><^xVX5ELcjdDRlmQ%3hJOkhFWY!|_);1<|l^{^v_-1_UgUKlz? zcJ>tb{c@;`}M5~YZpOKM~IcN)nK*D}?i|CzC z6bj`v?4K$c)o8K%iyB-76NGr*P;BiOa_oT6A}r+vSVzn~)jX$X$MY92j1d`KH zhX1$|sjI@-TZ^YArwyg(1-+08)*EWAL`e@s;GIx05zuK8D^QI)d5D^)dkX!|j{Ufr zRjl|<*VF_;FgqZ5o5K!SH=^7KK49(XHui#jO{I*XV!InJv%p-j98r58a{v; zSTl_n=nylF@e>fwtVMQ%VfE??m02`%jZ#ZG{Tib>*i1TzhqQ~>&yCeJLPHK(KWb?h z>QMX$ZDdc|+S=Z(Skg9WI1bS`AsF}8h51Ye=kfK=|Ni?e7EmfHGYl9;;t>YA0r5}% z%))e^fD1au{6Ec|d05Z+_xC>-eC;!gER$u(WJ$gz6s63JWzvigks@0q?V+U2_|9ff zgvgeiRFbr59m|A13emoeA}LByXuBUL^Syra{awHNpZmV9`?|ZXnX6KL-k;ZU&htFa z^PCe-9H{ciC#x_L+G#FfeAU2^h=SE?)@<0RuBn+J2Y_j9)2tHJDcy`_qYD@?Q@VWp zHDxtpL?-h)04`Hnli^IY4y0RA<0%cQ^N8T)wRP!m%bN3!@0seix2$6Hs8Kih%TXG) z13WVK#BY39etYlT>bC(48Ne1pNe{IoTjF1oTlbHan0wpR{v;Q2xMP_Ke2>K zt##a>ItO$gD)qy1#{k&Ht!kAt63zndgws_hl+6(^F797i0&z3O&U_sOMLeD=FRw9lLyAn%LDmfPPl!0!<)v+mLfN`CK( z?cPIE9JU1344wa0$L|ydLgpIxk?JlM0nSqcjXP3h?sw z|NiF^TI=l^X0Sqh40B|qU%N~$8oS`+kNDmYCI!q{*x>mq&5zTSV#E#eY`(bFJQW_C z)|}F=k%6fbLKxsutls6!ny67TXU~=aQA?n~C%cw^>rj%se2$;$eXhO~MQ5eBD4cu$ z6~rsrX0DjSk!kynuYd6h>@(|Z+>dKbuhtaAW^!uR(l9SrjK&nFVm%{zDnIxlixqwW zOA%dlF(HHC)R9j0leFqhUwY~d6&wTtHv=`=Grp<#1wJ+k;$tbc)NgTv#EnwW9^+J^ z@8m0J2^7K5w*bz?mI7=2A+?OWJ_!1;y0cF;LPoM?r}+Bv>W$UR(utz9bQueoWc;bs zA1<|}UlqhUSr(VfJieQz!&ite^4Rk*$cOB6#={QNsh4)JL3yt-7d^Gvj7^n$Tm=KL zobJ~#n0Sq*etr8^!$(AymaYt492(eY`{7|Q&{z?`=@@H&M@a}YWYKPd2Dd8($Bz~v!pR* zOr?GC?Bn-xIqFGlp3LMkW9OMhyDA0>-9FeBxFx2N0^TYnT8NRPYo$j`va+O!a~=)V zDClqtYlZh8Gzd=?yYfk)9R*4^dT_8 zw|o=*7ceRu8;`FzLBDMKX`AtpNM#3SBGl~j3r6nPx$_E~lsOO+GgtGy`B~);GDu4P z&78Q46(oBwJFq1P)?y6}Ov2y2f1gjtZew!xyNQ^0qLhq@bU|%67#tk@{n2TYjeO}P zm`yihJN%emw_=X6hMr!bV@it~2O#+F!HL%ay-$i#HdZbiXPKM`-)K&hS&gN*#2AMo zzw37SR6nSj$rmgwQ|`Ahdm#f`vxeCmkqk!VxS=2NKV+7DIFzQo(zvmjkr9_53W8En z+6)hvqpXYuh}|RXpru($Ftczn;neIoic@2JnQ0V8qi7itrlsq1Dh39Vd2Z>|n9cN8 zaoDF+np9SUH)9CvGKziMyk!}Jh8TTm(DK?O)a0DsqY#rc20q!9c-4O&I3YrD#A04! zM-WyN*4C^f**lz#e_B)H1jQCxLt7N3pZa?1w17kym;wez+G32(Y=TfVN(pK^eC(Jb z*X0Z#s99Q!L zUl<>==QWa9_(uVIabQbbz4&{}05@TPKN$;}2k6asJmjOTiSS6y(>|Pt-Et!0YUPad z_83AoYk+)z^xM&IZ;W9aIW_6&_LNP_De!~^NGiCijBzYl4+$hk-Td;}?exM!Z(gF(2An4&1D!OyE%Ps6khR9tsd>Os z2XTh`HOw9mmU_L&jl7PvvX(0gCwVaa$z{%ym9D#WTV-XX8zhGL_P(Q(E~=gk#Yu9Q z*SzqyQcg@1?|MsGh%)mnU`CvLvCHt0Bde))SJpD%ArW0)z_a?0r`$#`$SQg^gH{ zpdoGF9S#CODVJSYx60a@{+O}Q6efQ}aik%WPjF?SDjIlyPjWRuITF&cwpT{Z@nbnW z_cQY*TS8dcVHY>xnXVA0KUe`FCY(fUo6<8#Y4>WgUU5@@g-)>85sCJ6TlHZtrjCpd zGSc9G`0n+aq}JE&-MxFIgp-TU&;K1CqRh<9FCm%1c_WW`_n)(iWsL* zH7%02Ee}5(QUtM~rKi(J9m+ERZ!cO}%sJ?G-0~vt#-awio|%_dVr&snYZ$Ei6p)XW zZflbdhe$<5#aafj2Kw~t*DoAapcu}Q%#@H2FAIa#s0OtN`$9Cu`L!dk2=qLlbb%JR z=@|EC7h#34uv=n5&UeXjtieKg1eR|A2&fDiWDEaU(rPNI+f zi{d16An==*^lI|}VR5;^Avnje<}qQ6km!awU@7Kx0qu&l1ECw9leBsWFLa*F(0N!^ zSc3<|`x6X>n~!?!30!|BWGAy46J0vq%7be~05mNq2tR!W_*`>vLE>f?8Yde|E`>?5 zv==%_Q&!%H1-(UIHT+3KyT>q3WU@H z^qr+WxhWCpG}rQ9r8NdrnxuO;uXp>dd1}+dEbxceo7Yg*cD#9^OkcAQwlGH*n3UX} zIe;aisdl{FW^CFwlI>))CHlkGrP@fKz!c>#3NkZ|-3wiQkq-Q{BArk4;iBw2l;rI%)*<>tTe3UcpaTw z989%0)bA4rEsYyc+E*s-6@g?nBgi>9~Oo?`f!!`9Nu+l%n5M0lyl9Uwp zroVVch>p8T*CVDaoVhFMnh8oKEhwvVKdr}@NVJZ}S{4{1wwQx%#*L6rSPVYdrc3_% z??)Tg^jAc&%=ucGdTVHN{L{O4@62Y-{Bi#nlRp^Uwvt3e#)BB#mnki8Yw9Mp?R%G3 z3~H6M648^D5QfaeyMs$!zUk3olJO@s(wgyzrd**WDA~K!xqY=nNVA+#yD?mcy?>|< z9Jo%-Nn;}O?PBT0L@XN(-DzW+LO=Y^NN5Q(R~b6gvH#dbm$*8h@fWdrDV8%<1qKAH zCd{}3Lv)`2ZWVm2O8@>=47rK{Dq0WvOdK$LtZ!?5a8`pZQX~v-48|>ga*o9s;vpdp zz+ep-Q+7{Hehq+Q%I!MBU~B6b*C%twnVj7zjJ>sWvHC7!i?feunxj2z*svOtZ3M=$ zB?q}u(2--(vu=y|eU&!MoB;9#GvlIoBMIaeXlJIcTbbKcQPJryF73mSP-v<`t^k(X zvq!h8=5L-~YxZk(VSJX5oGWNvkaK3MRx>07R}uYjuRkx^zx2w>tZG!N@}Ty`kB z`iKtAT<3;mfm4FAzF^#Vp6D0qU%iUN@+=G1_lAkKRsj?A(W~7W+rk*%@gqiDV@}Pp zB6)+@ZoXy1xw;Ix(>GFDdcFDyAG258IKwrAU?>l7;Uxc@o11H&LzueE99paKz(2Mo zHZ|90zQEC3VFq78a&w)Lc_I=VM(~+JM})EwiC~Ld(s{z7xBrWveTJdO9zT9uLwg^I`&j&^r>{QNT|Adi$P6)wl=U&w{6iT= zPDEw!;Ps@ctlgStyKqt4$s%6ta-p7eOkT9z0o>2gQ|`S-dvOuMFw*TmKxV=o>O?|U z5(5XJX&k2hf<wOUu8|`6sqvelu5WsT?JI_jon6_GAWA zT(q^K$jdFmJ0w;Z8XL!U?x7;SEYZ>nuKI@3uf!+}&kqImg1k@0<)RmE1?Ul0V_Bmh zE7tC)z;c-d{)@^ygl+9U?SJ6FyZ7m?-(wF6tUS8>ti&6qyJkg$ZDgl08e3vp*bUPt z>+ji7XkmFg+Kr7LJ^B`CuGFZ&|SoQqSUA&k`O)zyIhDd-uDM<(-z#0WSgy$O_b zwFC%wgUMJP*&3i|0UME|?JgLVJiIxhDxwN=t$No&JL^xV>9=(aImvax8O2OkO=-d8 z9zA#HyV>d4m;G)57IPV^v|19isMl^Nm4p`# zwDFe*GLNy3B+ZskdG@dd$Mz*djWx=C0yIkq3wWxTUaYidNVh~m@cikow)E}SFSD|# z)9_G%v;-uV9U=gmn_d>?HI=6Xi3kyJB3Oh7Zz&2ifhxc?NfueVRgame6@?qihN97I zBH&Ckt|dwQKq7xEm9@3XkRe!;rLeYPI6za-1k26N-i4Kekai-QakuXhxpjCc^_IHP z!sf^L`gG{5cqg2yB<4gap@{pZP;w-U+jSJ(yLQ)~uxje~*xlfOcQ?Rma`h7&uq0Df zrYaKn3M^)YZZ0K{@u}gQV_BUxSpGx@_v&0Rd&L)8z6%Kn5l~86QHz*XNEP8+nAK2y zA@t&9A-ZH{k5C$Nlz}dXN4+Gp%||&ge%iG6_9fjF=k_oaHB|j6sT0%?_n>i(J*g}K zA>@*tSO-M8EY1NIkYC7`X!+)E{i86qTM8HnDLik~n-(_491%?t+@7_yP!b8GNyBMb z<(9MDJrDcsWa<58zfc$g6TFw{)Ek{4xS+;#Lt@q+opgMGauqGrNAJKq*wY-WJ&i=} zsqrA8MiI?+t<9sXcL~i4RC)ovA+;BVttu{S1UBR6V`Lb!Q3__f{Z~m9;R6Q>eib)| z66IzqOP9~<2^sKDjyb}_x}k@IKGL*^yT`nw9E*|(9cK9aJ?HnqT#M95IFGH@c5KV+ zICg_GDbZ;7+9F{)t(stQ67%pb*627UN0k}oZBMHyNh#59Yu*4L-X(e1m<<*4YLYqB z1!OJI@z_*qa~;PnI=@oF^~TD>5+rc~u!Dz#%*g8zB?kqWK}T1^b4R^>ySV`E2N*M# zM5z;|Pi5FJCu?i*sMu{dqdR@aZOuw={tnhnOtnm49qQZs%HHzXk(X~lJxbcH?Xem( zFsdv#jbw5Gv!vEY=udVfkBb;!m6BF=+r^OZC}t_UD2!0k=}czsSYy;K3;&Y$>YOWr zP8-S0Hg>xXpwAp?i%Q;B5_FB7fi#~H#OE|K1ukfG?>#QizITG8k>o+6Q#2DM9)g2o)=wSET;ulz z2A&m37U5SGCPj2?%qRlAjZV)EcsD4633Vq3s4p;}N2FpTWavZGr%#TkDoGa^*7=!? zTXJy``T=+QUzxrK86%>lnx&Gv@?~r;dl(GU;ig$KuQXu8W1DxukT|E!ost%dT?(9F zNgGcKizBEhqPY=duOJ9}8a@zM4~=pG=FBW(jkRmM1r8*5qa?f9=sP6(KrW47@&G^I z;uh@9(Jb^)oWaH1736dywCaluWQGKwUh{yFU)i?-yx2t?hDRo8Xm5`B7-oaCYb@Z= zY@gjbAu=U35C6_vdwgIvdkM2yx~QGmET7O$bCN;f(wlrPtVG6+9?Mt#wq~AoxGP2p z+4Dodt}rX$4yA_OnIys<*X$xHyY7tl-F)&jX>-0#bRbx9Vz07u<I-Jr^x{i#xx$^ z2LokJTmfAzg`)y*52z}Pk3z$eBYdoNURn?Tv@-A2=fHJ@QQl4DXrPN2)+o4~Uel7T z%OeVXfuFd99tcXoD)8Ka4&}QFUc>4ZcbJDc{^$u zq#GU3j3%5*czz+Y8f9bM&=mKY6`>1G{k*9<_tk0Rw+Tg#O@MC)5;K8tU~lmKZF+@_O+eD%!0aLQAzto;6%ZKls7wC%;I1{z z50eNhAa=HA$Q|tgw6{2wi?8a@r%$F^Cy%RdNweGGJYz7rht0Ar)zq)z*M(#;I~!mL z>qx-6ln$WG{3?Qw5WEADind5ol*YlZ<`IEA5DG_wQ-#X$uKm4haZ6C}U8uTya?u_f zzS&QxBf%kH6iDb7MB|rmtmb5eMT6$?TMba1xTID?``FaH6yFpc5nn)3a z6T_)|`srH2#iwOuYaktB0cM{YyC`(5;iTN|PR>~hL8$?}6eMTN@RY_LLH!I)F*Ap9 zWbFd-==MSs!a>{Em0{X}3EVG7Au3w+eaT`;!ZHSUq<2#rXdn?*W8B_dgTR4etWnUu zb}YEMDO7}9_=KtBNP2bt=p(9ztzK}jV@{twt#`A7!Ul=r{|-($pO|0kacphxokRW8 zgb9?8LMU?>e0#a>g(ZLu_=596R!PGoYlQ?kpX0Vp(t>!tW3Pvf#NdGM;9+y&=54qX zD0nVe+^l-FIdP&tL?N`TJyQU!tpg$Ri7W)jfM;TGfu5|2vZSO$M#PtyJP2J95yQ;D z^3A~=6%{?&Srs(=B%F{Q|CL@4@_4vd=MNOj=zolUt7pXiQ&_W_#G*e4VN(MjUx@j{ zrmUt!)i2^{)Q*zP0VG-OIa< z*ZQhlPwuwLf9vpd`$x_GT;-#+duKU+_0^vIZ9M`bz73fXmb?C|!A2o-PtNGPf5flo zhJGJmr1<3HuSR?_dR3nv!fHx}HGk>+`Ha<{IF`QCNi+Gp1yhGhGI#Bu2YeM46yT`h7+8^;pT3vr>WpESk2{@@ zfMXJEujPm_!gs$K=IR+~=`5Zz9H}%tfqv`Wt=j^?G>>x%6R?iu3OL=|?B$_1v;}TV z>p^WG3Qj`RqI}NINti}&EX7z7pPVw$ww-5`-+=c^x>=Y?H+5WcTt&#n#}-5u z7s&2~k(>=Bu-rmcPt;AwOX91f{(w{*MHnW|&X?-y>eLJgE=}gm^CL)K%!|X(i1tH% zX%an@MPj`y!7W_>{j-rWa}~xIt~PMvYA+YnwD5D7Sm?Eba0%xK?JPh(4ayxCGT z@$CFRr+9dH+)*k8Q~un0$OX8OXV@vX9B%(HfcRM=)#q62Y>{XYCeLIco2`6qr&_5I zK1|OUWlMY@N`7nQ!4qWeiTTYSFGXDe5_@@Zz`%j$dDG_^=SpmH2uNPoLTpt@#>CXe zCs{4Nt%<*muUZjs8t!Z?Ad$`8eOfD{?OO2l>(@se)yWjiC0Sl$tX18XBz~ke@2s=( z)}1>K$P*DHYfOLh&;&|-4{Vc~AuC6sV=sS-P5!Sqpz@#JIQ}p$cgWc@hQM1%X$v*MsM>}>MbCM60>wmu1rxX_ zFeu2Dvl2_@stTEyZs<8@m6fZOc(2lKD`{=emTVTJYLiCr)D35$udj=W*V=lRSM3kj za-5e<_$I1wgg`(faGAH6;J;T?UQU89&D_DLB0fE=<8pF36E}sJKy7zfs1pB zSf_Y@TVq%>rSdz#N|W$2q`i!LAqhYFD9kL&G5(2)mTvu{K9W-7!KFmPA@A}{T!kI3 zhPuiOor+*GY(n6*G{mMv^IYZ>E|7t^P4R;3AND8elU8?3?MCznYo%j{qHl5dvPWpN z9LO8ItRSwrg0w^?5nE&3Hf5%#4fRy2rn$7OARP+a=je#z5=SNB|Mc3_PT5JG?xfqa zqV`ri#Se#Z$X-SZwfce*qDW|p;OJ~DnmBXrMILb80b5{t;_J?Gx*4>3&!&h2zm(^c zF?S7AAuJP?iMu6G@@mA%F*i1MC|&(|&Yr5R!T+-8TIzLc)6M}3&7rPC;(NcHDrV~! zi?q!h_L)xdQaN$_cxvnWoRnqkr!aw7L^#4_*4Ag6;`6o-7tT~FH8UH8Z>MDTFk?P~ z`nSAYs}+T9G$UZmtWx2&+v6hv1XY^4KFx9B@X4zSM^~}!%B4WawPl<-?-j-q>65{R zvw%f8MhAoB-3+J-Y|gVQoPjgG&zAqiIG>6g5K?r+-Q<`xrlw5PW)4$-uI_&vet-<=4k7EiP zqws(Q)J(0!d&lrz^VuC`b-6NG7&Ezv=+Guc?H%Aj_!f;Srkw~4d*=5RgpcMwvZVQ` zwthNfu05I37;d4NqvJ)1A5}D#u4GaFO3^aJB6HZkV2rZhO70$+TEDYQV#z7C57Y`M zWJu7^)de3FJk1!PaQ+jt_qzl@JN*4)xd{b381?Kk#iMSmfCG%>!Mxj%<&+Raf)dez zB?cWmXqGGDD5!Q~`~oc$X>k@(Re)3-ddN$E5F^==XE5K>o1 zB~oh8&Q$aIN@e5MF4UpLp#0I&of@cW;8UnRzoSk<{d9*^W%d0z!=f4LE+YME3XE(D z%g1bdA0r_1Gsn2!Nml=?l~EGx^Zqu&MjS;6g|jNuZ>rYvWIWB?syF$Fe3jKV?2`)| z6YdJzC;2qMZ!T^2SK``*d8p-8WydCh@23zEbJ1BLXbG^chXcw&j6=oXX8;;@K>T-=>7;Y3UF)M%X z$a{2tT3(*}O`qv&p^ZqbP7H2QS+QGC8O&m7a+j0N1#D^%JI!i}3W`HbU>YkDJR&Ox z#-@uo3pSV{@At6d)sS0DrhfY7mmaA!LXiPLJ`n z;(YWAtZi(}2|7g>@bdDL^aBYI4z*j7B$$+a5`GN5nLR-yfPepfD^YlfNd{q8MU{{m zm*i$)kMFgjAaL`^VXuU2cCG4Io1$Bi)ulU0_X2_65^-kMg@uKUaMBs3@0I(-yd&!n z6}qNvgEe@+^G&fw#SqdsRfASt;acnGj3g8Wx`-Mmrz)Dv(Ne%A@LI%1B7P1$e7MT5 zm!z6ty58ySIbPe;afa=Ti=F28W%2KD_fSOs?JMUqwf~rP0l%jfa{TxVvQhWp!l?)P z!Vo|Za2kYj+eKBzeFR{?)MWk(n13;V3HGb}WfZ?dF%dNXSfoDP!``Q*Vp##9;2D4$KBwk z?sgq((^2u>Nl#~>p3en@)*&RsFMzc1Z=)-3#4p8Sab*_4$GXRAY4LhvrcGNWT2X>* zb;I|MB7DveUy0s)8Hy_o*kp(NN#Olk%6*3qk2CDvtJfoHRYD;NM4iZGJ|Jof?0^X- zeKEY#;&bzcP^~e{5xx{{FiMW-rq$-29cm6(&2_^4T4L<+; zs|UC3_FXH3r!ky5b*kv@Kd6)W!*3upgo=`Hf`b9bEjOgxLPI3~Cn#f%7w zUu_oL4O?K#8)YhXXyz2sH12J8xAwM}CVJhJwiYKUAitIIDWC>fuNMrQ=ZA6-1cXDM zv6}R*Y6zHDxRm24Fqe2hhRQ#Ru(ae9$uh&+RBqS_Co&F_8B5@LKuT%`4H|R^K7l%n z3mm|8pnNGcATH3j?S7;i6Tj!rzc=gcF2#~S@hC|hyoHGU;)~}zQv?!*!c91tjcAmF z)+B~LgjV15q%**(0GEv6GCu7g;-;37IUIL;9aS_NySiQm z=A7X_!vIL!Fp2swlL4o>Afvf_;`nuUoOiZAk1cWCuwm2N+Es>iL6Igh)Cz^rLU;)o z5ENf9d*;k_)wz*$`|^K8?-U%~JSo*w25AvTpRsKA?4)x-$N00+sE;Tt#kml3oJwhZ z6T!a$#Dv*FkoHnHeVDb$mghUmoR!Kp!%&}>G zm+5QL`#ioJrOk|;*)lTSNXB;&n)1CNTgQk0hb{i}-~>HQfski#w0 z%fKb`;Gvy|-5*K;6epvK5pRnL^{Dx^O=}3W$Ws(N0vuX!w*ZdP&9d``)O@WS`i^NV`R)` z&7OS$g82~23Hc6|KECN6-MZZehA&3^ZK$qUN(gf?enZ6t#&hR`WTaA-r*c;*4H@9U zeUr<;>}ls$$vRs{9IdkLv}F0F{e&^05N<07ms}%;sM$i{Nhy0Y&#`OZ#A`*C&>NXM zzcRdY`}P73F|7AE!>gp)C2tRIT>?+K1gG~J&E)9u<7xc-o%C(VSms2ubV%E&bw{(N zA~hJ~cccd#M2IYKF#m&}=vul+B1J7Z&+q^&ZZ-xv+OyHo(6wt5()3#{sx4*1+IFkq z_>2PB<}-zb7EtN;i{xZNPUxOFefk-|F$*m78r9W#XhNCxml-;A-ke7L(iAdboRBL} z;4MTFgz!86sRepP|GQX{E%YvUnbPJnMBi&EN$>p|A3uEntbPrtDEIfb?b}@%Juwk? znHVI>6G-<{=wYN@c74{_yQAJ6Q~XwA!Kpox6bol+s2(w9@D%&=T#j`_@d5Mne=2=F z=wEpB5VD*RoWc0tuW+}vKrgDeml&m2B-nJxVc#d67ZyR`%1ndh2GxTBh&VeVu&FeuigLU1)Kz|oW zuEjN-$BuH)E}{-*W>nxyuhK6s+vR8ij}4JSWrJ(alYBEJOqSjepzpJqG@B#&MicWG z*u7TcOuhP@Lb}Lb=h2susQ`zaV@2G9x$jwAW+@HJ$kSIeXXYK7^6}827x_EuNj6CK zP549Y(F_+JJ7Gem_u`#FwjjIw_zmZ{T>Fk54JS1Enjq(1gwLCC;D!9@$8xF`1H>PJ z*$ma4)^$>Nu^#`sQ#3A3S-o(;g2C138gWkdoP3+iIlo_n55%!8hW;~rU}8M|jQD#X zL{8+IG^usrT1r6f?WMPsrU!6cfVk0WYBNP>hl!xIgswDtlJWJ?Yj5`j@hgY)5&aAw zmEisb@Mf=5JN0=+4t_qP1kbdNRcP*}FlZbe9gb@OCJ+Qrk^JQactOU!`*9W~_*dNM zl*8b?Gi0_z@yKrOR}?*g6GDOJt7B77IFEt3mq)% z^YZfY$yTrtiwNdnzJ)g;UPvV8?jkyJ9N})y4r7kSP3-^i$EC%;c69bI17k^p4-3oJNif9-x<6^CClDhGd!;Mh<1^*}jVAZM|7Aj6WjLAI`g~paC{u!$Ro!cdG#YL8UN$Mpd$WZd#ksz@>swdVw3Z2@Mm-4=7C2{l1YR@c+wtH0+|$MX6<|pvJ_F8k!rWdqx+HEf;%CbG@nn* ztbib0h=f9@WgL<%a_|FJ)#NYK!o6#|wzoa!k8W zYH2u3btfP818d-|Dx-5&B#cfQ$1stK=j1yB)TGzXO9m;>f@#?9stkYdKA#5f`S9tI zDY92OH@9}^BsvHRHIBT>LoGxw6iSKbu?@puYNRbuW_$)L#cA@mFOO99r06!_b(%rz z0+4n^Nb`Y^KNrDYn9`JmQ9mNhz%lPT|AoArxPiAe*^p)6)A@sW06!k!xC|CD`LxUF zlPfx-!a9w2kgSPK5v_OI+qy_)6K?y{_zbkIhz!)Hzq--yJ+~zDY6RiPbE=c>kqthM zM^tIjwYW97yS&Z8q_03qPcx2&>eeJi<-U>EPrW(w;<&-^N%Rc0{p!P~!)0r>`s0J2 z!HH*Y>)c~sP*A022{|}Q9v@OZj+*S~rY4g6Y_kzA)P$Uk^w?|iI>s{P<}P(8PBeq) zFtxBKoLV&qmkNrYOVa+mdriV>oKM(&i}W45IX)sM6!#bcyL$6N`^D4j>{=8g*IS{O z+wa4tTfde&Wm^m;nJeD>5?jX8s(Mey=lqTTNu@B4QR1n#9HF zNKE@f*P-`}CL}GY-g3<2=^*#)m2Pfsk+BIMPfi2?;v?>nitRsam}T`EdH<pSE#dGDJ2U5gC= zx~!kXzf|L@l`fJEnE#_}fPsOf9A!{nLfC0ZK}bLu@F^)31UnXfW&1W8V9EEUmEDI0 z-3U3p!}L0yG=hTv(Gw;ZQ&I=%=Dp}~zFfJ|g~M}>Lb34a{AGKp=HVF>^Ja;aoAs-| z{}TFpNh2jYsyro?vW#Htq_?WB+jpoSByC2IGu#nGOrHfCgOTz%@~s zwVcfUCxo=fw(n$QWDjvKJb|RKZw}o_X*2rShxaC~;MWhEJ^xo(=iff2y$w{39Y>q! z(|Eg04+yPLPAIP*BvrQ&$?xZiV;2NKB9j!|sT-5~XAJ)C+i&Nanr6=mCvE%{ne-eo z$?R+dj}sW@BQF|v@YW`YllYkXnd{LBtnG}Ga@nH<96-<=CuOi6$?MHxO}r}Qz2~Iy z2^`M%vglw+X`-~fpZZvwA?dmYgil6`j0rm)(kq4fxH)JB>t;o)LXUZsROcq*aJd+K zX|nyfST?%BSyb$3H*&|Xe4g`e1S{TdJT;`Z073UU)(;}YG%^O_+yo2!202d%E*0$G zdWOKfv7YLfbqtD?l)OmJ*o^(>&%!q?!)G7qP_sIW|*0o0$Xk7w-^J2EHBmQ&g7;i+jP{NK_$Rm+%^6IAG8sqw-}16>rCV@ILz- zdGypLN;F0fXURYO?W44M^EF=W?;rn9eD&X7Q1Sl{g!n&stR~;Ow5)Dp+b(KijEOZG5CstxP*4z+CS4_<4agu+Xd0JNsIgZ}0tnb7uc}W}lhkOy1;0;Q5uc?)z%@x+gDtY3DZjZJRc2+DW)iqb*q_OFZ(u_H9b~-(70{j^j-nZH(}lZm#K+IkKJXFSd?V%eJFlp z&xzT0rK&c1rEC)!EQ$V|?QO{^yl(d!YUQ{sr>i(3tIF7YZcMWI1=G~rq zN?2GcNh6CI92`70HpY>%c!1kjZ7Dh?X85n&qIE^{JtueX-d*Zy@Xni-b)S&@W7%+F zb~?IXEit-Voz3&To+-N+1iO0BV z|LSzD*rKR!g<%8!BVts*vNM~r%*s0ZH=*U~P3r3h85#S>$CF$q@-FG=>1mXBtX>&k zHhyq`TXJ=2wy`BiGw|WV!x`;i2Kbw$;bv9cW}A}L*veooIy0FVbq}2q3D(iUL;PE5 zs@l>F0;$xa6!VtE_MJ9Ocy9Nbr^oyIWBU5~gmWrAM0i4EPH1K)ChHVEG4RZ2Pb`?o zuQU&R_3C^=@$K8U1$0!qy1SK=Go2Y@&$p%t{D7+P-~<-$8-r_kKSfubE?wf09oqy|^Y&`-InGzg~$s^>}sb2Z2MSmV60ULixOk$2)UsX9cz8rw6JPbF8~FZ+wx|F1&g5z<~pI z^4Fhdn>8LQ&EiVYZ^HAjXiqnC8P95MioL>+mzPJoFi;qE$Sy_ zCX0)UFU=Zb=|$agJr?`v#64YIUGak7qzfB;pwZ5^A2ybwBRgv1@-9}!jji=;X8Z2l zs)mNqMtRx=jsj~#zSKQTOer_MyjKs}{Ns-t%ru_{vC{J$945L1th)1+Crj3gi&y6~9Y&hibvwCq3e^k;+pdX>;`BYb5$A|8tjJ~SaDU9<@w3hKeueB*|#rtae9z$DUC{{@>q57h=)c+ zMUh*h*xK1WvM)*<@!={yJ%iO7l#`RgQt|FxOr1e#b>&jt@q5o}L+yv^j&N$_)}^nc zc&?tTT{F&TA8|iI`SsUdmpf?|569c!cL#Uc5?DJTCTyMQiew9w$f7wZ%9->#i4b3Yf7u8O_5P^cu-I*UdhN%eUwI;{vG3*u$NgDt?_P)Q(hhq=y??s6sXN}!Cbm6 zw>NLK|M=>M)j_fKdZEd}DE^+CQS$mhEUK!i?rV#K=8drrVp45;3Y|yVGvWjt$4rNo zasBk(KX-R`^V;+jUSx6*Ud}4*>Cq7t6+IgwBjB;G;|bQ2`crZ1JE0|OO9|m_Qx%$` z1JRU6s6}cqv9a_Wc#^b?j7hqltIB15Osb(t{s z81#kI*jt^9%>UNkzSs5jR|8KdE8tyoY|#yAXl$JQ?87zE^iq+PM*Ew9>s;ce-Px7z z-o2Z8Qmx~56RUl8xbb+a|8%;vlvD#=ZDL1lgjkAEWe}}Yp+Lp`63g~P&Cmx+{QXb!~alYy1h1*>hAb>5{ul9=UY}o4q^31ndIB!%tDB zrl#`s_VfjDYT7auIn+Ow*y8;u%1`tIZ}y*4Gc!uK&c2BW>#PY};YdqTXsk?#!pGIY zXttU!l7)qZwj83p876*`z6YI4$3;X$j7&^c7THiz@%s4g*M)>uS=^NMZ2z+D*I(1} z_n3Wrbz4eCCX<6n!1fRF9Wx<<4(5@)*Y+_mSnU6`v%Out%$Kn-&u-9uZcP0FlUT95 zpOesj5tqy!x9KYh`f^TA^9((kK8=jT9};m<3mU9>nt&xmO9>-8@VZdszwPnxZcfeY zOd4D5VmI|(x9JCZJTC(RXp+=3Vt-=(>HbA=an6+XFv+C**oLU1(?um=-hra4GmS?% zKa}5J|NPFUqr|i1gi4~RT13R3QK5nE_65(L9j<)w)OogGFx+*2^Y1x<_=TK0g>C6w zNt*luLPCRiS9BzvTC^lym655sQkG^?dkBlaw9fJK+h5VOMq=j!%?7GM1o49R{{B1d z@4rXYimhw?@WT&5v&I?fDJ5$zkvaw*3!k<=6?N~Z%f|Z^j=NlTJx$co=-u$7?ssBTuz{Hn{g2@*0UD%f+4KY|KQVlc1jY*PqDm>nqdz_rDk$ zl@u>8EyZhOncwj^sQGDPA{jN_Y^txKBlkv>=*rj9&<@eFXB8%kmy?a!J3BR*+`it{ z*Vhkt{P;zJYRU*6;1Y^FEj|5{FQVzbYYezsCXa>4YsWW}_vG~3cj$3=w{#FkeO;tD z?cTl5N178z`S@mZ$Asn9VakD~B6Jcb_#YK2%?4;*;7!eVnrKQz}Wn$hO|I%)-=82eRF{=7 zV~;9MKP_PC_FP}}`f}&)-N6IP*jG)@CAK@?yjfmaDsI)0SuZEH8ZgMmz;CUTqEn<4 zCAl+@T~X57+B$zUxnQP2nrj$MRlu4XH9=g0`4KRHhK(j^fj!hwp5ON@=vtxX51R5ZgE$7^{3LiSUy~pFRtJ*)g`KD81jWC z)i9qvU6;B(7InLOD9eR8WH4+|wT)Sf((p)5^d;a&SsmG)YBlBMbzOOOyry-BftO^k zbZwoSf*(D4gpKlo&#J?;A)1nZ{nJI?vRq(+ShFuFLOHGX1E!~g{QV=>R|duM zvDi?EbfPE63!9QOTYu~9^<1}V9>lsqnaZ5GVP}^iEQ&%FHn4=5=f z9v-BzFU?P;)QT>1l)b!eb^Gblr;_*gAK~KS5_g?zwUqr3z|K6p6!3P>o;^WxGc)#R z%dPRcS6`~8gonF~u#0QXElf}M)DCl>U+Nv@U!Fa7_H4t5i|N@AbXwz~oNKTD)aqrR zj=Spw?pt4-^yJ05a2RfoaO!pl#^YhTbmB zbf-)|zrBa08u%fY%O+1pY^F|JBlmiwQD!B>xGMGN(W6ayb|#Ap3+3YO=f(~_tzFp+{X>E*$| zXIVbm)}EwOq(R4}lgS}A*Jj{6Cr6>wqiS$gIyySKe0{x@v(n7WY-uDRWy*aeu{B_9 z%V31t)V=<^+-3{8#i{=06kW0L_KZsoqb*sPU_3?mR+U~?8x>`w-E^zTbh_aTLnFaMVpCp3D;Yn zQ7dXO6zU%E<;_QIUtkypN!r!1jM_}S(x0otMKnYCZL%|6Z{DO?Qkn>fIMOaAl-l_Q6=7`Svfpb~7bU^;c?k0NN&8I^_HZ%7VS` zvT0{_(x(CMKYf4Mu~E=yxXHf`tD>==mHseY+t}E6aL*pSEUV6JtDUPJYl|kOOJmD< zw16ypIp)nL$`-KR3=Z*Iheamgd%VUyaoG&vaV&NyeAV~v-OCF%u=Yd~=g0E}Ias$e z-TmvYXYP7=4dgzkFg)uyUCpmD_#_~}aH2iKB)kkTj*m+tv)sHB7^JgqGH)=PdwF@8 zp@W{!QtjE`;bns}_CxX+fBp5BvLyiaK!0U$XvyLLL&l>=ioV}!AD<~TU(XYfRbL0O zs#1)VNwc&YYfEzz5mVRoM5*G<+Jy{iSTOQBZIhqk+cmDjVD`LUze*PjqXS7=uqk8Ue-^Zn6 zsRkh5>?Sf%TwFW={L7GCgrbpmHA<4dtEH^0>_js2CEv#kJf;r^9hKvi&P=vvl$m#G z<{2A#_JBhe6F8Dqlruer)<-{g#$)AXf+*Sfk+oz2sq>rGJ#!NYUY;8-c$u3n*3+(A z8Di_gWa8Ot=-CU@*_tXUBGNtLUWi4^a461meJ#k-X|kugVZPI*BrDU^**U~@X2?Iq zuHwpEtK3{`aJwbn?nA1(fc_RhDLI2n2zueQcIQ}J7j zS5BB(ecW%Km^k=pbkw5Qar_DSApqQJG~&GQOR~K}>19Ey+=BdDcO5(pUez&@<;D#h zW6;)vYl@ef8~pZ-1A2447pGvYNjLY|AefA=(`l5D+l5}-MD};{FaS{=UzjpPy7s9vaW- zt(h;DIrd0cSh(-Y7lZvrE>j^zMAk=L(hRB$JQ*hyc%q`dzP>I6@H<}VCAGh{xtX`j zfbcc&f-DyG*)!|wLsFohUUS9n%OjyS#b+|nkLdNUP$*Y{wRSQ$^!H(Fw^lql&Z-^p zXI)Q^Xzd!m!Q5CoC{%`EN@ga{&V%RVvHhBX0dQgZycg?1i14gpwC2Y@etc%z|Dgge z;q+bwNl7m(n3!IVNSbJx#Dcr3ltdzz79=i7Qo zla`Jy4wSC+^p9p=-ai10@nNuD23^uvd+H$d8P}rOQTpah-@t&%KYJKJy04&pFQGVb zTA|9(^O(vhRf(T;i zJ1FWpXIc7BS)O~c(Rx>9S1p=DJf6b=$x0|yDz(@;p|gFDIt=93k_Fg z)nlY+Avg55i!J}-CMUWWO^E8v9jHfE4ikB1fCadf!^2Cih9I)kXk1RmSzb5DT;wVS zEIb$MQ3XnyCa5!5PrVJI=++eF7xEVxW^0$LT89CO)sI|0CKrOPTtf^|Lo zQ&Lilnq~_oi#1DDrhJUtL(UrUm^DO)b_`;b@2qUUBv4xcyy-AAbd^!eLrchUtgb4A zyE#=)92ERS?f5a7$Ik>EM%04jo(dW*tc(E2`^)r)_fA$Vu8vIt<`l25xo+qV$~Bp| zXVm+XweJ1%LeT(&CzRt2XG73ZjL+Q-OFp?J&3)OibjqLk(o8wC-w=v$GydCttnJL| z%5<&EY_n=}?hS*4)~hPjPk1v{ghWMkpmcBr>?id4qoc%krL<2Em1$|xb3LK2&ye-_ z@sr1nNurqwf*lafw_5X5C=TLV;N|-!HDUE85$?vOqrD|15Fa3=J#(HOP^bNDq?%)` zourm_F;4cW0)&jcd-u{XFeDIK1GvshBm4K=yPB~LE9g6fqh?vQH>8$S%~h2o`YAgS zZ1(!Etq%PkXhAmLe-$5jsTeoYyS~<%4m=PEQl}nt5@`9Ke|}+QWgSN0Uiwa)Hr6(nHvc-abB!fa*!!1=~JAGSGFO4Lqe=On6|UJX_deTg?+*X~;2x z&QsB!&J?3}D*L~Fb$fG`QRO~X78W*MUS+ydT6Nfv0k}1&pS=yWmVBdFAi*qn1XoOI z!?T`@h-8S2>gpI*J&yeQ@1Mw4MXAon)GcyRM$M(?H4iwYoge-9@qS!L?QfNSVrw^l z_j_XXK`<%f?%lgO9*ahm`5!-COVTar8M$;57?)Ec^D=>+$=U_6D5Gq6{uByD@W$uA z^+tw@T<6In%;GR{uND#zFavof)#j8|?lXL^X>GU&L<(y2^2F+VZ!6wDM_n3MNcbgYvp!}%sez&1km&{IDYIHeXR1%9Xm3G#nhK6 z4d6HE3(;+DYFG@T(9v4JUuK7*{CaGEFr8ypy6>(MJHmTv6*}ijUR(^O;Z0$+SX6OZ0UAb2Qz?HDRPU#fB z$jv>6<)Ci8uOdMGH#YXx+N`k%*N_O4l;Wj$J?~M7TpFSV>ubw4yw+W9>8Fk!eKX=f zeF`01B}FF*g63$sKeNi@r^MD!NIVY?3N)k^>lC?o-@o64x=aHN5`y6{1ad{(B!_$Y zJ_8dmPYSu{ot0=W)!YMP={-%?qb%etEfc{O4AcQZTt44B4tY5G5|evV-Mn$Ed5w!N zp#fN`I~t$u-hcX8-S$at9~o}chLxbX!LhN#A2$CCNv0|!3azWum4DgJIRQ$YJ!H(M zVxB$m|D5CDQ36vheEDNzW1O63QSr~a4{1GXt1`}jsPx>a*PXvr^wgG;}j%~jb*hWpdR^8;o^`+Jl zvNKmlb=S|nebgSl7`)1z`0T9B#qxmGYCq4_*50g5s7V@aN)0?gX%}m8u`Ynb&;ozTdYh+1r=QyVkW8qwkT0j zz!hH#&+qx|w`VFz8pES0B`Uy7P!L4Oo&yghTo-TLbU5_!W3?Aie5ce>&l8TwG8}pL zkOF)G_#yjJjTk>&q4u~=iV%qcs$_ajPAkEE`?IXq*H%@SS&jR_jnD@m&@@x+SGTq# zGFNqZ$A!B{MMg%ZXJjZo6?A9<6f0c<1vek6i?p@3SHhwOl29#ON*v^*fO-XIWMuSs zEL(x4s9d;kVQP9>5yh3)dCE|{_l+B8wsp4#I>sY_K{t2zopTn%Y}0QM>xB> zj)s_oS4<0gYk*{CTa?!5XO@e%Q)$iWCc{8umgjS_#3h|Fl$QE3!|6H=ZY6GF30$3jp zOx!$sd^BqkeP0P;xlwy{^%XP{l~6t_4(1D7a8-@hO6zjWwKp=1GAfI$_|5bpaCuudG5eRdFdz+DYU_07Me{eDd5^cZ)+ z@*(WC+@>tMYTw3apDPNp}R!Sd~5uti_7vqk~_PB^5bSJ&V ztX7>y$O}^rW7rfuL&8KRD7}1}U(l}syS{!(dagF17A!!Snq!q4HUgT0iur}| zP3Dd!kKLJU@8@~y8hiJU%jH9d4#8UpV0n7(#zlf?J2K7eO3!t5bv2(uQW_Wdw1kQ@;#qPxch;WWFce zYID(bAjE_afo`<9pwXMy(@^znS9cKn=Ku#QVGbQ)m=p@YBAbPC0nvaX)|H#|`)cLN9udX4H-C0@oQ0n?0)kNElx8x(cPA23gNjwV zk8+0ZMWu&K>$pu@ZPkY_snaK2^<)PwYlo!MNgDgE1+b4guj6Gbo|+5THlL;Tc+mgR z*r@b~dq8kvqo6KqSHMS=r~%o>r(d6T7=a!zwJKg@C_G7;BHm#jEXSB}=xR&g*_k)L z=@SW&2NdbU4p$c! zb66l=&)dLo7=>@j0}Zw2-uTi4M%3*(SroLVbK8mKfP@gw_DfSA{1O%K>%t^GE*n+{ zkZGfC!xzeVN1y*Q=)z;_IYo5S#8`_{G9lk0#5{FWleNysNOMupl*GgDI06 z3XR%N05=!8*yQA7(Vq!cCG^{S!9k9m#)b$v;Dp5gFtE**3MGeaz9`xD1-Iabu zrhWVM?G`*0VV`+JWF$0rCc*om+Qfq#sxbS*PL~Ex`QXrr=0>Q9{8;--#p6)^w~oswGPXWH4k{W**Q72^7pz$p*U7%wzbDGb8)o$9F8vZJYA!)={NIiChLhP6o3lY)X8!^6Yaz%Hw^t<%Eq!H$S+3oc^;ag*rq zcreB{3jxFv;W$dAd)PddiDf9}Pp2E*7fbc98Dxnw>kj7BG-eAZH~Z;*)$?GWCJS^$ z`qS5LO_Z#Alx8^rE<$5%yYfyk3}s~lJaq!E?}*woz&aZ6);Wuo7HS-63@VB%##}s= zCzi7$PV|gQn=KWlOF*zmGU7Q;$ww>AA`3jvRst4AMMcFO+pw|PM;RH@J0s#4uG|P; zbuJ9C=&ip0R^z1ttN0D0KmPc8ZE2JX&fc$j9(<7RqbG&H}bt6_6QZ3KB(F_ z1HSu-M-_&Pn(2o3B5NV?Pp_^ZJ#h&)!kuTc51%M&QDo$ZgNX|u(p<# z?MCmHl}O(xTI+gJPUJtzCFbfoj>&~vvVDbmBYop;Kor!9&eTG1+?cpH29IlqmS`t; znuyiH5UD%OXabNmH`yCuvZFmtta|bvE&c?(QZKJP-J%O0Hzy1;XM89x&;8Vt7;|B6 z(Ddy;d(7KXBaId%Q{H`c@t9(EO*?OIpQ&d)MSD_(#_o?Tj~a(=heY+fd;gwZa+w41 zm}XP&#XOJP+}s!7Qv@8vj1$^9zv8u%fd~3J@$i5_hWAmMNB+?aB z6KVfSYz2eI7n>U!xm$;gt{i*xgx|)R&4WZ|Mp{#B4#ZdjaP!m3-KZSr4UPEm5G*WG z6BPcYcD{&^;9M3kWHdXw?5vf0{Q`8f44MO=wHCm85=?#T88Nj9=w5Uc9zF_lwMByw zo;_dJDrZ)A3 zgLHn_s|rxMgW0xc%zh#ScWRI)I>5I#s@<{$Q0qqHaSDX+fN6M2;gokw(&R= z(e<#8cJgm7{Pvy!kWC@HOios|g{x$R`lYF#I)j^`m)ss`dnjw6bhj8!VNOwDA&*Uu zFi|Xs(eLJ|4Y$l?ZY<WeDCU zkj(C;*I!GLUl7k6R4A+ej@OxRA(1&>!95$zOHm~uX5CC+d~lzxkXwX>@&XCLP_alY z@Gx`8yxVr|YQdZ096$Dm0Y&KUwSu$}rk`MyPRGEdl`D_zL&48^T3gY4+|t3ZDnp zsJ_jDo0!svcVF^nOP+Uxp*GWDkMP(cg_W$(oP>~%(3YWrN5evCHzj%+ibw~6f?6pBFN6es1_olKgHIjsZicVl z^W#$|StQhW97dGHe;5U~C0kWMP!JTiTA^v~^;V|7Tt7OUn-|2rcBGk40v9IxO+Uy) zKnSoi&Bz%oRz>2kb{{_c-y4;qA_d^Z3CQRK^+WDqqwvoLwpT@_Z4*s*8GeW8c6z*eJo)Fv3*~NuqwFG=87GUGV!@u*& zPE9>s>d;VABMZLfi}>KvgMK-#^VSLera34)?2Y4cq6^Q6*11?5iWChDDhvjPwA_ugh5)?Hr;UYkwvR1Yebw9ahr#Ep=;?YA1C$ro_nyt2ts zhDm`(*@Q+deJu5$Ovu@Hr|BQrua><7=7*?CLNQ(r>Yjg0&&)JIa!Z;EQ6F1G_UeGg zFI5x-onmsi8zBvmZT?iqDOrBI)jkfjYrM8Fv0Fd~lDth2e%d~s%lZ;@iDW?|IeB@T z6~+XdCb}3V44{j88KI*he=`JQ;GN(_1sJnQI&#=; zbRx9UC*<{6{@^5`zN7V*n6k`?GGDxnkDIA-OkxLCeck;7+}mgruU?jw%`P;yJ25u~ z{Sl6b{EzC02Ry!-(_3ypi6XF$*W3!f z62{Cmqd7BZ?Sq*nfe0^Kd-!9e#D*ykz=A32CY zD*eNnwg@LDXY2eh_}Gr+YzWl(c^s~hnf66Dow{;wsPxXJ%k@tT55GhVr>U+C)-IO% z=_>P7#LlEYImRZw3&4(CJh6*w8-Q8%32IHD+35u6JVQ{Ac{004oXTOZsm15Be{01y&u1QCj;O9o?dEV@o^Dk9rJw07!DjT%WV-^9?NhJ zXCe#w*1vW^U`a^{LR~TFlP|s}-4vjQV@JgTu_@z)vJL;ni7rFp=tvb^g*5CBT@;TY znxGCiYN_9fqp1LTzrWC zOEPHEh10sK}Bvdpaw(epQO~fzLm`#AH zPgwBr2sZ`D7bNQm63%1Z#71TnFeH)X%{zhp7k`?Vp^dRu6iIRwMjGsoVF>J3F(3eB zfwVM0P~?V}Pj%K21mT9-I83ZhsG!fhX-^W@7gF}r{Cq2+)4%?H%oNp0!q(sojCLX& znavET>TopJiyH(gaFVvGT-GZLS)RFAw;4SgFT3in%Xk0$CcU&!g6>)7sL}IXOAGudmK) z6uD%<$fy17x5JRjDtTkzVgn!r5>%Bi+6eUu$o6BhZix;!9~G+w%hqQ~Ng28;TB8{R zVe|UucM#<}3FL-U83hv)B>70LzEte*0FBNRmqQHv&jsjlVKHHL-Y~?D$RZ ztTAu`!X9qqT=n=2y_kpFuacM*y7ea2aa=?E!VoGq@$fY}20?+9OFY~we?2{wncKT(Ea2WIydXc<%b82jN=u5Z{$SMl$Z_uMC9RbgcZ+M?UM?_WhqW2ED8| z!v=8qbcc>`V2vaaNz@xr1S&lqe)IXcQpBmx@2L>fi1qRDi5VJKA;H+wwnONZHqQ9S zC5UF6>*2f>``(4eyuxp=W}hQM25IGi*Z&}&BrkaS&Bl~j+e4zD3fQvruA((_AQ%hb zLnHQa3fB9dfBuQ2wf)zR7oe@bNv(c%;V&Mv(2Eb29Tn;Z@mrqd7Gh6zL8To?TIBvaZ;m!s+vEHnsqw_?>H5-zA zrqiSjWc?vr5g9|m=a9@YHpG^#TZ8S%PqML~#gKPMJR4F#P?4Yq@QZhs)mbz;r&?tYt&yif~b=Sisn>Tgs zAcGW6JAh$9>InN#MT7u(MP^dpkN_j*C?FQ}s_mJRRV*B%5r9-!yClv=WH*k!jExyk zLsUR>Y0-WA_p3c~f7nUBcK%xdJU&UkLrFjHq%ZI8)|Qo(#Xv|rde=)d0~*taJ|_$| z1y*R)eb~2YlT^q927Y3g1@!}cyg=`V_xTu62eu8}gv_sDNT#=`n0qoeLARtB#EgXk zBH2JL4H^%NcUijFm&6n z<;D^UArFkC^3%!=oeWs@|MyYu7rvSL98+U7G|^zP!%(!A(CHo4R-6fp!F3=9&;+?> zd#2}<5ZaCxYTYZwDBwQ;gNGz&rJSf%pE^NsH;v00rrjWR>p$m72rrrpNak9;o;D5svO*5HL*}gZb?Q)CX%rg0OiUP9)A|BvMTdc z=)Ht$i+p{x887G|78=RSg9Z(%yLEh@_{FTvD2Nv6XG&S-%2*HypuO}M@4~pFjbIP% zw*urBpNCvRYG&EHcLd|aW4Vy=3|uDQ6+u`HV2rd(OrDKp8?8ap|H+ehm@e^PkSODh z{)c?Jc9GYyO;t+D+i2S~K#N8aKZ#X0H8ounIna0)h(&?c{;z61t5+fD!CxB6`XtNTRE?HpcjUZn9 z?pQpK?yk)nb5c3i$ix*U(TIIP?z`OYlJ!<{V|U_UiId$Z`&2LnTmK~{fW2m+Q7%Ed zVbPALNIC7_`@<%wlO+E9ah>32Y7;C7Xb$^2JPh>pp>mv}C;%EJwTOLt`#bq?1cDtf zHbmkhxSU3)1x+wrAci&V)s%%6!N_aY@COlmf!Ihme+YOY2+SXzZVZ@0@9FpV@9)@o zNLY>B^ZE0)kRLLq>}NHDm8#?efyjQx%gcPCD~~Ku)Sp-k z35rLc2vVrwt`h=_n80X(>ShMuh@z_zUbYcud85x4%6t9ec_LopXMu~k(hdHt6!pW+ z3BenoC6E}5Zz_T7_e0f!KPZRk&L(_tn8_&z?(#_lWH!gzZ{5`Ojig$^w@yOB1)t0% z5wssCN5P9~(~5A#OUOI!xTmf1?c28{ya>pDd{}a$n45w%8Z-+>*fqJ#4f7}c2y8dw zZP~fQ)l$ugcAvxkTgS?RNDb zOV*lyqvn{RH7KCm*keu+$m^uAFfxX!0M&S$#Bs(eC*c@GVBk(gL7pAVtwtH1!P`Co zWv+EqC-?f1R)_YB&q6C>c~lWFdS>G;PCtd;M2rd&j0Yd1sW?;q{r+wiBJ)HZbQ9t)B>79V zBfuUXm5tAe2VF%ZUKJB_{6+s3c=m2I?nIK^#60rGmO>dZ!{&olT^Qp|(!I!{*Q0#S zBD`w>dyEi466MBtS9824Ticy~Y4SsyQYRynd7cv%C^=R)V+^xGMOyTF(p~{;yYg899opa9~L6 zYs;1`X0SLh;mJPpg$LvxpB{)(2-h?QU1Wa?1ZfS7P?MY>A~etO;KrYFEyPUfj z)%K$~okYzfuZv#5)-c@N0<4`Vz@SMT#qI?pNV<G~w6qtu%t9d|CMlm*Ic_qxLgd@n-0;->+bt13luwRwA|?rS2Gwidhcgw}axsiJ z7UGtWEYvaZn0N&APC{M4kZ3?kij6;G3Jwf#LC|NT;NEn4LK{5X(fDnS};#hRYz0k%dCy`~KfEaOy;t8^IXR1!{o5zn&S&r$*#K zzyIoP$mbLKNm#KqRAQ2qQ^8H9Z@vpqU`!xybC;jv@3WNKrrX-~MJXBX-do#JbB=I4$vz;OiU0F=;2mD&Nr$nJPnfI7YF(abrVkLnZHDn1@hK zxAYBvc?AFyH-M2qD{3?fB?-n}1&+cI5zq0Khmhk9L{Jz`a0)5UbzjP_Kr-RH_XY7J zlgkG&S@#T`R0mq2Jm@abr@HYP$Zp3j1t?Pn6iI9QZ#Pnkm>v{P(tqKiB>^}P1A*ij zV2q|<=s=39<;l4l&%yf15e{VbgJf|4fiUZ;GIXPlmHWob+*}K;pA6@SQ^{E-4-ScF zkhngv0`X`@frLm?BxCypRgV>+t)ZJcGZ{=>Ox-|A$V^Aqdp^IOYcs7JypiN%`)A z?nB6$SSlEq(rkzfpvS(!AIqJ2-PO!F1Q=eku;$G%_`CKZ8EycgrS95*i#xEYaVmjQ zgI*s=C6Xa8yu!ZzepV!qw*2(d3k=E;iAH%Hb?JlQ+5V+@-~l=*t$#n@0q3A&w)e)% zxs{aH+~vDRbow2f(UQv!a7;g3hj<1+VM>@FCw-o%8p|C?D;?gCt!pzKo%RXc*I10Wj*YnEO6n zlhNI}WB)UFq~&k_djgT|NgF20Zdaj0RGZ|tpU&_%o3RyxEk(qB8lF?vVl8) z>h|Rg>P&w6*5?b~2aukyI(yOO`Z9MnGTfsez zGxDNKHzhv)S7(T-gXemIO2$UORY@H-N79-Md%-?vY)FBX<~0sWV+Lb@Lj+KX%y&hR zFaTHsNiL+l-;Gb`{KI;yrenbXS-a=;m~;N(JUP*o+_cY4HbPXJARG9x_TaxrFjXQo zfhOEc<}f!-8}LLt50ywOndEZcT)|LojiTwH3UrE$n1Q9NC6)!yh z89NG#x#IQfo8H)I@@i^oXC$Gw+-f+?$ESkqB(ZPc*7aj3y!=2a1z#a)56tA_G${f) zL2!cY)g+GE*;hY){Mc<4Nuo#g?oY@&?+Pg&mw!SI=Ryc3=&BEiEHxY<3z6>&Iy4eH z!O*M$r1h0I|J>&Nd2KwosrYwLI9p4mvxfhW(<2DSgvNp*bK%S-?2%gy04gW(sHr%V zh8E&M@by2=PY^~iG(-+qU=(ptLQz7efTP38s1fON|9$`qs=>6EJED+y%clsr1hyie z01NbP!+SW0fJpUZ+DuWY$9IEpUeAD;3SN0^*u`V8uTSaCo*(i~PV`QUc^js%RzdWj z-poM3J*-Q{UV1}%S%1(Bp3QnP%d7Ro663$*2pFW?B1PbC6V(VqtK=Y(x;lAE>!qbI zh}2-(J6E}o(jwsl1cv|~Q}WRWEs2!Asq+FZjLk0KXhyr1xg9~J&r*$6J*vvX**LvN za|${q7}HGak1`6{U&hqVwr$(eHJC742(3ySxltkt5I+0ykJYBa`tN40wRt=8Tl!a!aQkpD~!h>^2)t zaf2K5j+T%7bl10zY^wyIx05(d2F{`~CeTR8WB>jaIN)cfKUfzVCyQeh6ZFJlBgaXR z0WNUuV>pkCXoNT#jYxasoT9zL%;a}&{CPGMu=Hmw$;v_G2av7`4lrp-34FjYHd zEqDx8bGap-=C*Tx{rP88wv}cA(@s2wq&>`%LrBFCd`IM3lW=EA7_8(V7?nh|NKmDK zI(70}at}aB2|%?v%FN7ZMJ!fZ+Xkc!q)%@&zVjZW$rzpmFS!zwjnap+BiMz73*Ua; zKvI27dD`Dj%rd(fPoX=3xf26m@0*A$f0s}wc?RsgZYcz52ZjB!Vt{c_|{1qsUpa_NfJ?h&=RVNdYh zv*>@>=&*@T-c$S0MaDcv>%MIEg({SS7uab;`61cIo!doO+1c1GQ!(A5-Si3N=UD&= z3;I0d6SRAuUCh0-sN6D_6uLd;8JMTbNPW~@7Ix?;WIzCla~94VAu&osGZ#3SqSz2t zk%0bg=^8EQupe1o?kaEEM?E68p%*l;Ve`&u#L2_UkKk%=0}+ zsYe1qW4c}Xj^n&Od~0&PjCd!B#}mc@DJ5s|Z5rfHGDIOIxUoC=s-Tm|%p4HmTUBot zXnTKo;egrj(ps}bpX2&>2Knm2<7#7x2!W_OPY(r(AL4#So0EBe$Lyk+QDmP}pJVxh z@xssBiC@h^V$J+x7JQ+A7`7Az2%p|Or*N489*&N?(lc^$9+c>_30pP2@ON`_<5298 zZa@l(96Lv53juIRBS9`=V`8{kNu3M`k?A}%g(JXwc)2=Qn`E^mE*-)d07!Ra8kIng zCPFWIjcutKzf}j6m}rzL=}xd0qm6@iVo_*Bpx59WB2~QYnJ&9pcHo0DcVfN6qzJ&a zLjs5-J>lc?G5PJsHQ#D@Rr6ZMan3ROo9d1`aea<`juKY?2fVDC+=)%J5&RmQhJjy? z;^TeBmfXGZE}XkEuNqK3UOpbJpz$@&VN`|8<7}Mz06P`CMH*m|ilhWOIjc6)upIz| z3YRz0eeTfd)PdoVSOwHOx=sQ7#!WG=5esqn^kz$G^;NX=CY<+x?wbg=_afOTBw0xE zjwGshqyp!BDO|q%56*sThI&J0wOkOLAfurW_`9wrg6AWMqXauw2~>+5T2%q4g?gxn z)Q8Qmbvf8DQM#znfU2V?H!Vo0HDg#Rp%@ybG9uulJOZeVz;__?`0$mF1ItFg+R($! zQ;y^?=bSXhYG}Y4BLN3;t`&*NBBhauqK|F^eMVWOa|e^C7FKN|2?FDc4ziCh$D@jn zSte35pT!8fc=g`D)4!7vV=pyddUrQGA`e7?3xQbrzkG=T*dVqG(yn0PSD_gKh#wnX z@^L_hi;Q=Wu``^lrH;4Xf#c}l;T*QHu_2)af~G!k6M`gFgOBG_6epkr^pqSdND4Ge zYPBWO|DbJ?lRS5HasjRrb)tA_zc?r zoSd{#p9kg}At#X{nvsN2-1zf@*}&)IbrG-?J<7bgajp|S7NJch>4~r46y{hDBaLx> zumz59y*tX@h;s&shDbQEpUB+)ybWd-Dq(%eT6HfuPk}UW91d8whWOa%Q#UFDUR2R~&1 zdJ_kR=%^SuBbpv@1AtKDhy;iebQA#qIRC2yRqa^J0GA>*JTY_^H>>-gVNg*SBprLZ!@J&RLfXoXKNaLejf^tF-nJFa$>O}Gf0wYI|NG}sF6A%!;y^_i! zMGpHQLs>)cK5=55!R;@&DhSk)8;=nnGtpaw)vWyCk$9X4i1!xcsOHFg=+ti|JhP3>R>-!XE8xQ(Q3UP&pISTHDuL`cii4=&RrmS-67ow`<3tT>GP=X98?dsIWA4-nfa3ugA>~(fL5>oC{n1zc7$?p(J>|`NF3RCj=!jA+cTmF*An5S(mWwgO> zZ#^gSA<6IoF9Ginq%jM$h=-v=P8-1LXM-9{LUY7}McHF9!w4rxL)uswjtPWB)<}}- zXb$E8hty~=Gjo()IAMV-?3j9>&P1%Q9B+M$r0_O3(eUta6{`@+b@b2}PW&x^7ommz zZwZaq^zL4lZ&z|ODfOss3YZo*VyVR~vx5y0F%5+$DI4dR++#7r80I_rllYly38Yk~ zKi@be@_hIcKC5^D77~I3E(sWGnJUM|``qCwwN>IU}z3!LCab3j-w?#9iBW zO6vuL<#?p0Trfu3kC8$k%D%^)n6oSOz!=-chQ(wkDp@m<_|FJo#1FKf6)53eG3P{~ zLYgI}S?RaT zR_*mqSMx>t|Dc$6WWK~~V=M^h-Ul1iavMV)qjxDhp{#4Ke>6*)3Wj(`T{y}I!Eh?oMn_|>&Kl03d78Ad2uz0oT7r1xdfG? zx0gH4k&Au4$CWEjd)N2K&-r8=1(J!6qom7xk;g&F#6?9$Ko#tna}~;|an>pxhfJk7lUx44C8+L{7)Iqa0N1U=xID5k1@+ z;q!Ki0@^mA+?Y-yw+ZLPtIHbGbgj%djdQtzGpEumvAe5_Kyh$5QxJa=padzkFYP*I zmnj7}Uzv8cwin%~A*eO4zsVDo7{^-|L9u?P;NNLvXjpD|7OKPraEn@pziTxQbSr+} zvai-VVTZr>Wjr~&#Iu_m;)6a#!W*Q!efJG;)B!Q_5VU_zP!psXSep@`8z&*em5L%9 z8izC5UPAqj*DlBw77>|6?=Z!UkTa^udxAp-3^E|K?M;IP^xre zf=EXY>7df2E4_2)+8flA^L^+0?|;V~M?lblZ%&~O0-2vW)dFJ50>ige*_Z`o!-}oV1rN~X5kI0%* z>urWnV_N`%qE;zb+GkApO`8{r0}F~K{a|?tyD@1+80fX1#1220_IP~w3N+~VL7P!mA8JN z>rOp!K53A(_?MgajK50FxL98zxpmt%0dfbK8V-WkJPI+O0N+n`(NU{YnaF9;#R=(% z5bpA|z?>2iGtMzaZ6zTZVc11wNjuuH9$~UYvFqF}H&tMWZd-2yU zh<{?f+p{V}`U`{lm#z7w@d+t*4eo_IXKoGVmv%{hkycRLycuf+zej&aU9xSI*i-Yf zGMU@Qw1z8&jZ&`EHJs@EvECNyLRX;MWJB(O{u?Yu(WGbr>kkB0kxU5(X8=%;i#U}z zxD)9S#{SRqP<_2nalX};kJZq!?&hhRrPbQD8<>7mlS|V)WX2kJ`E$k^V%s*JXT#Ag zC$akD^n~;>i~!1C*+iAOf*f3RttLe4p7eMhTFS$-kx7{fCgM&x$ZKdoCmrihUj*~T zGB4yhEOO~d+TV>}$? zYO92ulp$bg7)wCQ?wlXOF0`xpKisZh=^msKWDpD<(-P4gtEC@65NbQUwR=exOvzCn zT&;8xGb>Vq6R$W7-RP*0u+bbvN^%RK>*~V*I$!Ue4>wg%I03gF1DMbXm8Ae$0*NeG z?@e!eA!(*l&Q3h&r`gw+cAU2Oxu5a>|JU|a=to>NK#C+eBry(h z88|sn`HW61B^K;2BN1v~CbAo%8eHT$c5dWdkAYvq#x1<-P71da8IOAJzgfNp<% z^&%x4VHV*?MZ#@>r*{}}(rejb8WRxR4d?AZ!{vCDw*CU)*v>e05BdFD_=LTIRG zqoU_-Pn~14Q|T8ruA(bDkiWR5PUh;dt*mhL7kWrPNGmdP_&HY%;Pj$@ZBi&7%6JX3 zBNo&MF>hpb0e$1AF*eXBlRp)zC&*A76At7cZ$R3~FD%@1yV(UCed~)Ovc+kDT&ahM zdN!b|4}lcndtIG>0;Ka{Od(GPbk2&HqEd`nSgC+V&LZ+=nuh37S)fO+6Q8b}&W%ZM z=C8hFcS=}R*g(uxbwg_54{U}{!4S#q&C=kf~qPe#)k zg-?Le&B}QL&-LuE!9Z?8Fl_xvoq@bk3$PL(Xs?QiBHWCjrv{1Z&$}d3{&vq`c=52L z#+$?3l&j+x-tgHeQCU2HHFLn}J5T3{cIOn0SG1bFNKpelPId;Uk2KH`S?P-0LUy1? zjB0yiWS@4SAM8uih1C0W>hp6jWJFGc+`-_aE?9fAjk1}Q*eY#Vy=ILFrU(_I2_dkE z#Ha-fZBa9WW;>xSq`M|HO_(~=So?t!Gq{Y{-rm*Nti-gA(qzz|sQ|G1C4||m-crfA zah1BDKch}C$JpX)oH^I2PJEEGorLb9*`uu*)u$Zw6tn#`l0G+be|w4q=J=|mib9yy zwV5u7sDq&>&p=2HJw;_?XAUqKSy4%X3M+eh+=}KJ zq+43JQ(DlpkzfI>Jyg9o1n)XTI>R)FNYb=`yCp5g{IkTkaoNsC_CYE<@49(o4LyEM zoN=LmcKOmF+azSyE$IzlXoV8?X=M;Y#KI0!e}JOeXmA zWfv)5+w{%i#Z?u-$Y?Dg3EJj5c!%mQG>c>?OjrX+#Fd7lBJKcypnVDWbr9v(bVc_d znsXo>9-3=ofN@cV!@^buapZqSW?|#QJu#>eRt={@<=E4hr3ap5{@E3{iiaWR>O}x; z(xoYW(VtP@&@o1m9=zZw6#?QRsIeD>6@d;niW?gZ0iyHVs#E-uEHYNpAnn1AmwN0`65l~U{VCNjyrv5l=EePTzX=efs}En6?jQykD(f%#>QpnIy4 zXWl>9_ifsZ zXaZ)aNkC?!YnL#hb9ULV*IE=4tZ)zwt+g9AZp@F4MYP}m;#@5Y(i&zsus?tClfLP8 zr~JOz;_C*~OHm9eid=@f0H$4BBO47$DF!LIwgzLVsSv^(W{~#O7oUH=u=N4>Vpcdt zVyo+dHauVJWXyH}-)35S9>#tRT#Wuk0{aY)ei=^Iu{6C%rUigbfA`2}^QA&;5kR#E z1gv^@?;nR39wxK)6pDk99n!XD2?y#c_TdoVy{Rtyi=#y&=RS;4%nBzhQB5j-B^vj>ufB$mEfivp^l6hBC-OIICtxQ z`ck#(Cp~eTZ&jHeN_i61K}w7Es?g77bC!lMIL96tAVe&18SXsW+H(x_elA%&3*KZaNa8jr9lpN z0WGH5l`OSrsz5OQ46AAsfX2gbU*!TB%4{{JF!eC@%hbZz<)!?`NfBp^D$br)N4_Yp z&`{d;cW%9(T4$%DDZ1r#saqJO3xYX60GFj!+jaZiI2<`b`#m7elO-22Ri1GtIPYVE zKEu*6gj|_6fmpfu(2u~mZjiT&;&!+sf;cTlQ#z9CDEy9^=jh6c01LoF zOFSbAivnh5Vy6!lsjD_?$^u>|=JFa|pA?v|tZVxRQhnqh{AOb=4Q5~Kq`7Q2!>1!u zJsmPGr+jr*ZWYxqQ3-aYHW%s@ZJifM^OlMW*&9&mbWLCWCs-Vf&*^QPY{hv) zft{ywjLSmKwliY~Q4`n%tu?V4Q9|7Nd>6ie>&XT%sO)317n+D`s6#v(E#1;WzV$qF zlk*%dt1`^#y=vpL`s9w?NcXjR2X@eJKX^S{KP#?z|GWQD$Ip>GSSw%kGB+{tJ;+d) zHuZ=#^H@YP$63H$#&;LtN?tzfW{N7)Dcj&78L@+fAj3J+ll@`ZWP$+~Rlg(R3ezUp z+9AQgqx%Sa`)3-jVkC+-ChO$MdF)5{W(I*2Sr}ug569`Eg14#R=j`KN4guc;Sq|x1 z$Yq?-!aOY`iU>z`9+%g45H}IPU$uAN`|RV8ezDl$@ZR zhTdo$FM-wv#h}EfeE@GFOapf4Q;?(%Hso`_;T1`La&{n9hGUdI9g0yvy}{IgYAso4 zE$M%3kM}Z=us-<^mf>(*l4!1om_J5y;d1I0(*k^1X#HZz*ar@rr{TT&%`I+vqp#;| zdkzhrnO}W->6ah8=C-_3_&ESp91uXnLEb|8ZcKDYrHK|nzUANLggrtiy$6#Aibq5YMp$>)G-Vqg z>0pH;{>7k!m{JG;k{j&o`d-N`n>%Z+usxeO&6i=n{rKaL`Lr>#Q1 z!%4^r4OoLnpmjl6M3JZotwadqw0YIdC9B^Zir;Lu61Apg@4o{0fLA5 z+&L8uS9LHBjB0!*sY%YaxM1_I2oPL5PA#NSQYp#q@_~Xqb-yuHP@LD;fR3@zX~fceon)&fq{+nWpd#BKzrLdOj7!bB=U zSORaDU`m`3ZDRkmmc6u@keH5|I)Ec<0Dypq zCI~(w3>xuJ@PruqxM&jock;T#Cl^E;BHb3py^6qNKjCT=Gy9DH0$x(8M*Myu5}wiK29{KLdILR1FXB?#I3mZaIj93@7C zOhK*=Bhyo2QD%pLtXr#>WN`x}svvCJaRP$l57{?DYF4cQb^sky>MZIEVWMW&yjX{o zd?VhBY&qhC9WPTu)b-v3tUy7>B!wQM18FTf#V#|3UO)Xc+RIF@?b1(la^5U|=@3_t zYwXZV;sg90wYW{C7r0Co;h;;6F0(I#{!rMQ*0WF!+Du~1AQySEh1Cun6s@>ehwot7 z)UJaw{&@@tQd#PlXWY6?Vt(&mt+cUw--JY6=tz zN-R+4K3$%O5*RC?@jt+azq7U4v*pQWuyaP6&9IpY`#GM~1Vh()daIs2`p3`;rV89% z2Ua4`f{=AS%At*@4B=$}GYcK4I)v)5z2G+}`YMn&qv>R=?C6V=WVrG?-bfV41#-y3 zy1I-O6?X3@`&9YAj__0*;fSRKK?PzLAo7PMh64httH&(pUEbT1fWsn4Y6DjxWjQ%H z(M=<52-FXU!Hdv7fNWf_3|a>*vib)uNK`C|(LvXP7l<8IWzP!E7k?b#`D5u`a21y5 z2O{Thgc1t?P6VEUKNNs90h81&_6>3*P3)Hlar;nLYW6EFLEc{*pS$IzsE!Pg+da_` z)-_017fOA}{tEiz;c36|&*TyvKz6}04bTjb@r_J00r)d;EdqHqSbj*63p1*=m=9(I z`v75E8=U5cme1^}KL7L2R~Q6{z-g3xdoc@R$FX(JZd$uezT*D;-50X|@@1%P;SZ(% zvw_V1V2e?d&}cnm0^}+Jes8B67)EO1K^!4#2;|pS9m&0(arn`XpP)Lp7lBdm;V^t9 z{cerroRfFZ3$^l(mi~s|Ofp^41Ysaj!URbmL6)yyTckaBKqW8vC0L_DaRJ7|ULqj? z@eabgnQXd^4xM+K_$4BtunuVO(LnqAD_AO698b=^0**m{ckH5BSjpE|WsNa*g7 zdGFT%_IvqvIL+=C`u?L_{kzy?#|OXp-?Y^)|79%S*QjHte~f|pK=3etMp5*an`C0h z8!eJ;ph&`sX#2%04F?5i1sM%1oWddSEy17P z(7Rrw?D*iOdhe)0|0`1+4Z$Ei)gC>iwU z!}=Tp0^68{fIil*|DGPGLe#9{pAN(2y+aD2mRSySN+I-G#0+6Y-Xe4fBZW9gfOPO~ zGZfm){@~%?pA{)CAwX3H-|E^P;{0hB?p*``5MXL5Ag$F9=_TzN??QY<#xz=3XrW`i z-e(N1?RSY&>M;>I6Se)&l$+Yw8NKM)_`xR7YA|~mP#x-R&;zkTF$jzAqAnohHv63% z94f?0x8FiqVML~^v-;9I*bOC@hpo}A-u^r49R z%i@XK8^wc)VcwR;h=PRw6z_UEysztrgbx2@>;y>IF_H{51co6<`XQa!SVasy*^Q8r zyN9-+;10 zbnX1N!s~;be@b$Vm$FA)N|8<+w zx1D#A*D`xnD_#7q&eCVMag~bTzN2mpuO5rfdm9%O_0!oEn^xSr9OP{kH#YHWPwi)6 z`s=OY3fsj#{#813#3gh@hUcD1+?YXW5GJKmdm+x;V%bu*kvN8Wth$j=)8OdTtMh*3AaMyx?40hnIIDCsxXyN$&NL!%J z!7&aew!<%10#;UI!`z6xWv8{SzMtz{Tzo}~fizl6o!aqL?xFkjn`Xy`ek$lNeJn@n zB33uajY}8uNKRfB(oecqzppk{*0tw~v{!J?6sp>Ll?zYpkdw9Ezg&2*F;3sL=l<^A zAhA3*nUnx3*6O~yzrX0wsdM)Ax--EAiZk8TDi--2sYU>TSaqK7_=^*DvQnHg@;%9Ze5OxPeFS5R z5+0aL;b2YT*-&(x$EI80c8;<^Oa$Pzi{8^dKbt4AYP~v1WdRhu_byrYLh6A18K^yks?Qb5oU1YZia&Z;k*6Mwe zhEf4%widKPAu8k`UZ?7>BqStwwjMbslGehyivWAl0sMk5rn2&M`sLkB*M6 zKgyrDm1r5iWquLxAJcW|Yd!$RG+pH}!*VFh>hs8U~l-))7IYF-52s zD$XCqC#&_bfzN?oi0%4%g&K-Dv z%Cl8=nOB-G%7_dN4Fzoaeg~X9GmGU?!8-BCI9b!(Li4s&B6ldH(?z@TnwrKEkxmDN ze8d$dY-wTfBy&Z3E1#g8UqL~EecoZ`mFF%5jJ(P`*4HRoSsB~EmRIpf?{$1nQd(b0 zX3j(*a`{A)eAbpoT-eU;XCZ>kwLqlm|Wo{$}}rs~)<+p&-adW!pyb zfxd|TYkl-9={W{H!n%(kBhB1${LKsXJkILjonWPLsuW{=ZI!-*GI0gi<>^?KF%qdl zFfL;DEl=EA*77kuJUp=7z_k96U*@O5Azg!iX`tNd$xXm|s$Y+yjwLT%A;Zk$K}OtM z3RXJ&OV#W8jBAo_U&gJDJZ!XbRntV4QG!Xn_Of;hH$=B{lZk5lOYI@QHbVi&Bj9f_$I2C&KNPmcGnxivsT3Bo}WlzGy~NFAPjx55+(K{%u6bmHW&*?}p{b5x(w@Y0o>s7HH`Df&tSw!7G4F2xlKxXWSa zuj2LcYA?mNSq~3=#8n%8`t<1{x1*kxI$9A4^LvG=uSzUuxrmmiBE``#Xjng;Un*(& zbB5b`dL$LiOVXPkL9kyA$brT&+WEf61`5Aqg2M}d4-qDM|Ir(u+<>kz^)dtyj;({XO^vEs)4fZH6boinn23%O=kg zoT(c&KSs#e+vYx=&H2^RTd=LQHI#WFXu#eEBvvo8WFn&go#DRQzUI6ie?FA7NCs1{>BZ2`?! zf2=|od!IM2Z?CGXY>kwkI*f|pe&Z;D4xZEUmwEX7 zL1%PKpi^r%3a#wnTVwV8{B|54||~e@sSoPYcLa2qvcq zTy|@aq9p#hSO#9d>ya50t@GDs0pPOjuP0(2IExAyfFlKILvW%5Cup-QW^&ifxop$g z5C13vgH-!CYMenaJOT<=hE7HTC_@nc$c9ek3CdtCZ|VoO0nSkL1`uzI{Mp&esqDE zB4l=j0IDA`p=N3fhwJYB`IL|#Ptc*Y6lX=tfiAQ#x`t?2S5UhB{`6FA?Vl8_99mm2G znN%7w$@NT%42b^>CK-Dd1xg4q?F%+WS|2HyFivfBwGxX!P_U4FS01t4KFGTynElXI znM0;rW0TrY4zbXp5}xhHkFZ+MRAZSPMmOolxtpYB5Azl#YqfLz(GE~EtQNE`D6=kH zmGz-Vim9Z8)eE!s>Gp3BVbWXnghn zXOSN~@TUmGT``XC>>5>nUF((2vnt9wJO`nLRn zca_Y#0X(%<;MS_Bu9oTOU%?%_Dg(GuA?V59fJhB+&@ac(BCKt;df+>PD&b$Ivk*!050$p-?2q zfY25T(RWYJCgHD_=2b;?m|hitcb;MSL@rtVVifD}RfhBn?|CV1W;Aei%?+GtO9(}Z znHxfUvA-EVy!@==;83XCc>72muXE97yH{Ufq`f@7N^&dgrw1ANpKJ)eH}y9}jwJQU z2z_e|78gvX0ZhK|xHu7zUrN|DBB9PUgWOUfO|T%Yr9cV!5WP&*|1u?bJFk?oz23}$ z5`Cu{Qy%EgEUm137_c48$Ka}fjPd*`sW^5&qWCwQCQu1-=-FF)?xJ0?<1;K+k+>%~)+!cFI#1 z+;$b@1}w&IUPn#JIU$x=2YcTFbT);cnlZ&{f#iCuRn#j0-y~6`gln;oNQrtQ4c8j? zk&LEdN?ehC!KXqD+h-h+n|^!A964gKicF6$;7b`Kv}1)v(=58l7+t&RS_gX@WYGV1 zxCF_pde&@x6~YegATT}!$EycDpVfcQpyX>GIvJotS!8$b3Hr*%s&rE)|wJ}$0=>Js>KOU?|c z&LruYmNIH!&?ANAV3!ITL#kvsuQ1`n zEH)y>{!xLuO6XK6-7DIsk9QQ3xwdW{vjKr3IhYNp+5x_z*_ zMl5u)WdqXhP(pzs(iwBdN9L!756w(?&4^0XAK8g974&#|vR$649YSX?Je`m^;?weI zDXCYFE}R{wI6{)kIze|icpLFxi*Fbgm5W23nRI(*x>uiI`3iAOJP}-!tPD1B%+C-5{0BO0S3DVM{UXE0m=o z_6xPaA%zTX7Z*xpzr8>iKk@Q%6E&rh z#Te!;aA;3N>ydwF0{f!af7lp0MlBx?of+_IyQ*iKk33xr?#!Cn+HGeA>*@x;rog&?6m27NZ??WapHZtY0ALEc}v3g3wgfKKQ$Wy@DA@{9gG`Fr{yt^ zc{UwXC;As9xhB9Z9zrH;am1J7J zL!@%hRn*s8;V7PuJv{><#-?52Z66PE?n_xFPG*ua1`zcvc!xkG3;&YzJr*JZntb|h zJ;_ng(FKHEU~sjA1Svs(Z}3+;Pq)OTHrBq#1U6ufl*I+|7Kdm1c2g!+g3EHK{{D<9 zvfARA9h|QUG`kUe;WmrRv%5qwi2GG4@z0`{{Pk*@YpHiD3yuy%2(Jb)n@0KdZWbjU~_-3xs-f zD}Cc`?8bPFp$m&o+Mg2@VQYwUV?JUOixC-YU+{4|{@6H{ujk-GpG+7FrwR$$rfYGT%OOp9N>-sRqyR`BWZiDZTjBFw1Sdm zT8k)ICG{{g2F1?V10k#JpL|s(L8`MLTK*LQIy7PRuVUn*JPE}NF-vW5QbH5Hx%=SdY}>q^z%hhWc{bD26s!K%olgC0X+;fp?>R`d>( zBJX#=#Y*sIpz4E)%$EvipC+#v#hXvf$=VN`bFOhCHL6;Umj45tTIId(5w(Kdy?Z1w+ zL@@5b*C?}uoM&UB#b>yW?Ws9aJ=@{TD~(=i!av7XLr>dOwASTq9d+?!Vw2SV`SJwjXSNn`Yaz6nKBXbW;2y}sk96^jT>JL7zX=$-5u2w5L8Mz^Y@^r z`_@REF!vlfdMPzXUn{qjtihoh+vlyK;k5H8n2rASPct4hU|@pa*7rSJi&Bf>2YJ0g_aJkhsful^yU! z%*PYCJK~PI#|0>#Qu3r(5Ee343uZEHp2`Z}nD@BgkHh-SFJIzP#9$(0O2{E4RzN9A zP)TJymDxos0j&mvM?GF;7YLUt?E#PkO@@~^icw}yDY#ep9){-(y z3U|=Hd_|dM1wkf046ufcEos2UQHs`>Et6}?Np5NilK1jN7ef%t1MI$Y#r=;^;}1`1 z%1f|P{;GGpDsv9e`?)UbN=sJGCE=mcM*vJ6%9$Hlv5|ZM{1eG$+S5lD-^a_nOe5{pS-~wwUOcql zP4|BuW=$~s8UiG&v}sWZvQ6IzXn69W0~(hvGty84ZGj^3_FKdT6Et!XItRytR0D@u zpitV^`b??4E4KKs#~IPufzGriD1=iR-$HB7U;=>1%9ns1-j5OFC^Lc{Q?pymeSMQP zxb4LSi@3_JN{EY_G@cvK$cPs36Fz4COG@l2vQMd@VIVptt^h61 z!7W)M`-~=Pt0cGCVYDdsC0^Xtc_-Mp(S_m|Bw)VC6t{uZEkqF2K)gT+umyv}D$bw% z@W5o4#mG;2z|U3E#cH5U-~4}65Nc6Dc(Cqw1;c^91znz#&3xvlztVt&ZNY*^A%RMp z^(w*Fb}p7S$uOH8JN7l5dg**e9)2W^?e+$Mz}Zxe5&x6@=9H*)MQjypb^i*%KIwu) z%Z@tV($mvv(yv<09+;UUfw(M-H7Fqz@9&>1GXi1J@S+}dLpIyI9nSgG6-r{wNChv`HcR9L zFrs2&3X#D0QN4x->fnT|HMTb2K?5|_d!STenVe5cp8WJtnhI2TBL&zC(=W;`O|kQu zbj#(PidRu$&c5KBr9L|8meG@<$K<~5g`J+50Z>J1hsYo~277ut+-2^qbc_BUhf4L4 z?KMp_j`V|C^|zc&JLwFjm6+V2BtCG!fNE))nQE$leNZ&zoU^amWR!HG8Qlrp$yJ7F zXr6OGJBvBc!Xi}d0|*zsoq(R^UOnpcP1D1vV%Tt|9whD<%G6LGc`e{cJfZRRCsqtj zxW+42u2gh%gM~>S9Hd%v$;_fvr3}kP;!#};hwPouy{q67Ppo&g|3FZLh{V180f$DvA zW;M&9M0x`pUfK!&x}63QQX$aHtKcNt@Wq0rpU&NKE^o;Q+&YjH&@dMPJSrlrCf;)> zhAPMQi`luholD`0F#iS1_1Zki;1STy8nhlVwv~*8WML81E;%$9Io}zZU zr=I)nYq%^NP?ovF@7@6u*Y>Roof1+u#bunZMQ;o%rc_%`V;JlC?CZs^C~l;&_JIrA z2K*}p?J_;10OKpYi_3$ar`DZ{aDgP;lumWTG|w)CnAN3eSm#%#R&R=Os42y%?H zv9=&FU^MRiKA}ZWc@5mZJjx0bSZd^HMbZrQ9^FJN2f-p zBJ}N&|B{DSg3@Q0Q(?s$bzh1xJtyj#+Pr4^WT16|=fNlh7#x3jsue0Et}+qdB+PLn$R^eIzURR<^27UQ zr5X6wFoPPAIo!j0$rg+@ss~^@qzuO-AL6dDAP70*d3!I9!6^{4M=BGr{h^TIngae^ z6{53m-xZqwLhO&tTdw~KXjb;%pT9!?GLY8I0RMsE4w%JD{ZSjC zv<&;MXbCJ`IJV!$<~f|+R|*Igk~TtIvduU-Sr@826kr9sXSN6~U065R9)=}gW+-P` z#}Idn%4Gtghr!q%!?kG^&Q4LAhbW}78wUW%1OqXof+-Q(1DFF6A^2Mi`SzA|n7bAM zguu|!!Cv(-VG=-c`C97Yy73C*eGLwQD$abL4ebDEYP%n`g9u7|`gX*5nh4AMEq~)m z+O=rWq5ye2!)-l@M$tWQYPwghTqWk!IICk=7t{4rk!XZ<6RR>op3@N4oLe|j6xp>c zW~r$Pa_X{v)Fx?krf7|Y8OSDpQ88)Ox59g1oU0SZt-MT0@{u;dL20y=5eZIYjb`j~~?91gUTS!17wk-4@ zgoy4ao9Y>BlgbIeVso3)7g^V1JQ_IFq`kmfp~05I)TPn@*}i>^slScAEh;*8&^ydI zXxLuL6-h&*TneWhe+I$071fBUV(4D}&pBQ*FJg{`4KNzd@H*`m5D-vIp2eHl=WUAS z*;VIfdiH}NW`#q?ng+^!0DxE#{LCca{EV6ejF_bLxRmLa zOT73i)j8*^Ah?;6z(6&QN5;`1F2a2-+w}(>;$0IN_vtJMV@>hIh*YGp*yG5-+=9%tknij?;HziO{?JM{Y&OVHD%0_`kmEwE%r}uc84I|q3oLl5$WmU^V%Izu7EH3 zQ=Dlh#f9#7C4@F1vHEjdlSb^LaDn8st|ge>M#T(3sXrt8*Bzhh>cT2ZMRw>J$y!b} zgS&#w>||>u(i3TyJ?y=9htn6^BCMRO`1c@AYcvmVc=J8zv-P-peDg=#PL-C2sSF;KFt>#nAbom=TM zn;E>}Kn`@gY;>DAI?y#N~h-!&t7nZeNtmuVQj?ll~G0|_@amisJI1zIog&O%qRNvXpNxWEeeW?AOYmR2pAjDO* zHzRf5)ehOiW{iQ|Dv0aBkSEmH*Vw#D@xr;pR{ggjt@uD6M0+)_M<-;whF?E5t^@Cz zWKY2fqPj`r+(^(fy1iR_RE$A#1GvNb7n$xFBv*c8(Y=ff_>Q{4h739 zr5Oz(N`lPQO{GEe(PW$&5LHMC^C@o~s8zP}n{_m9O47+&2|sINkVO zhxWktDeip)``+^v#z~y2{`+kg3CZjcw;Z%xW_VpD$g1aM>5kvUL!1*=1O+@wD%6FO z4SdJhHzP?1>kYHfEoZjCu~9};=p#~w${9*hNRM!TfBiuslmMgb?^Lq$i%sik`d##9 zip@pukyn$?;-VEzWvX@Wtvf8l!0icT-=6fAnU=_9mP<0+mY>VLogw+@n(YsY<|5mm z;&?|Nq<;2rx-o}lU)dPr0`YLwcDs$IN`K=_o0Gs?*n$)u^WZBK2QFZ%-7QU%ZBoR@ z5gm${AYx2PSG=m_h@c|;n}o*zWXFb6b3(9qp__zFuCohHkgZf4mVmxq4Y6=Ux;67b z4$MuE#^E$$XwM@#uds(&A+9Wf46}*)gqlbZd#=B5DWt4oBB4f`y$J z$+28f&nwX2WN7*~nAuL?xv0s3TnvDf{m0E-GTJ&i{*3=rf|b(Ez7zJf=(M-5|_ioV!`LX;-BBv#dO4$#M^C9oyo5(h?|1-h-%B5}?`wWtf z*`qF$ydVm22{OWxzxoUb0O$c#3;@1kU{KJkVuvA&4w349svOW!wEe<`3nT>pQzho|n8&;Y=zo#?jEr;U7T5d4rq zX+_UzL+Gw@ud91GaPk<;7D=0je;-j-EG~VScc|&($s8<(hKSr`gvf8pg zs7GTeyXOiTy6GwB=LAdm;eWYVK_@GDgEq3gEwjr59XaS;<*Cv`_a9WQh=pRmlZ@(S zU$sGi?yy1J)!)5cP4@$31eg1!56;qixyqax2ZsndrSU;mmBq_Bxep!vz}EloeXQCN zo>vwkIg|Z_grH`p3TiwR$-GaX|6*tZ#4n&p2x{ zSCVeTa(XZDw|kYO9Tg7r!nP9YnP6)B#QkJ_M$X0B-N{-EufoVpLVE?Jj`VA98PtvU*c#}P#mQEham)Y(FvUixt!_7wY!ax6mtE^2N6w!31OGi?vXB5cjL@NvHqDhwl zQrl?yr~M3+o!dL1jy7%>sT0kCWQ>{M{>u+{;W+-R8{7C*y>==4X4QGwnT#e0 zS-BV8iGH%FYyE6Ew=GQf50sCd^kxWSrWu{?`7}F^#)}U{!iEm~T zb|WYd17pG(v2B%m)18hJyX76WkulM;TjV$6u1 zN^YnV2OpHadD&4NQMf#0#@}lq^{!s_c4m2Bi%*5=C0WJ5OGEE8(8Xo|s+p`pkqjYt zJXoy^6*E`C=9htx3YbDw+ud-~ibO2JnbxgRt){09)(?-ouOnXwnASxHa$B9%+6#8`0(oZrx zz#~S}{r<#5CT0U4wy-nwdU2v&+*0$ou+jB`&Q5oLWff2ur5?YK9(i4nb&PM*mu}Gc z)~yxr+BtWRsx5z?*Q7SyC)Aw~+_?14=%Qe0F|eRTB%=5k`@CGIH!l$waBh+`Bu6@7qV3<^AU(b1YdU68f#G`aa zti-1c+>g?icP-goHW-7QeHr;7_&R%J$vT=Xl>iP-CQrK$RrbaDm^|b$4>yLkZA`sZ zSmt-LwY(=aazB6ONcjGusTKP7ehE%A^putv>6tjuCBA&2OaF=-3s)O($(3#UDv|MS$`Qqo6&Z9}(pHeiEf+fq|;H{fyru$~D13QgC^~$$(2GmAA zh8C?JggggqHr_?8BlTW0Q>4^F+k7GfCvBh1)%bTRd*2*hfh+NT-uH-GwEpIpj-iVw zt3{@1Oogp)gn`;sqCk*>s*+H=WmVJYE~W!#nNvthuH}{MA^mPyHb%7!aqX-Ybi38O z80ARB6Wcg%^=>PBck&Z|5{6$~vS#~MWJa5<72!3R)K~aMm}^~QqtyFrI`se z@=r#qNbakle{#~(rje-cqlUoq&p_ zp{mnOw%FHbkSq}5)F_>?X!W(JybDS*6)F>D64T?O%`Vf|1v z9bUmsc@E?daK|Lp({6C|0||OBskYW>kF@xV720_wHdf1kp{zs`T<2X~-Ry(k$c@F? zdAwMusvHuy0j;547jn-Hl-s1Xw#xKA`9!Mzvd6AVA}&xTd0!3NX=)n1Os-~E&&@Zw zodyTHx#n@bdD&Pu-;CWDRldsKtaASz+@PHAy18~1mT5bkAWicJqIMFUR>@4GxLC)e zlE(hGc_FzPZ5VnlDe(OY z*;$f}8=sf^Fg7k8dW`s39546wXGVVdGC?FR)6ijk2ASbvbh}@0Y_iLl#Flf6{Qmje z37xU2>WLG*5!{5%+w3t-n`k02fDlLEauqpj6yqsS&GD9owqr*NA*GxWtR_KLtC3Yen^!ljdMb9iBvuh&=o->Kk7 ziZF$}V(#4CYwGA&Ui~1qJ$vl+t$?AS_zg@7Q9tob-FfyF-yNoW7+q@b!+uV~{5B72 z{6#Y?YjQ+n6YWb39g01K(B`-+0n!d;XLD~F9K>e}4x+<{5dXvw@b;#DF4t-qL$ zrxG@VhB;LlWD12kBkl6-Hz0lSLXL6wKoWmn+yOronZDqLj~5qBH$RS;IUH$&onkJo z^z-j~aI0giI%jfS14vZ$#~3IFzi^Uy;e9(V`PT0CNC0u)7Y9xbUD=&5cxpvLm)wN= z>y`2K9?oXL?)EFS<5l1^BK^`iM=8fgrS>|>v%-`uJ~)y8YJS@cyOvJ3Np<;=q}gIk zU-p2?w(bYbrxyqa1g?oQNa@~L72rHjoYn)$`}Ym$$L>Vso7<(WRz`nzyv0v)**gE!&8`Ya}D*L8G;+O>`*-j7|-1Y2= z-TpQ)G~#85GmB9v(|fIVYGMPkceU91(#ftIR9Q#*vTCe;Ln=CerKnCtRw8%DS5bGK z+&d>q_k*5OU|fa6lgpaUozF5wX*wSGgqDJX88s85&50v#^Wvpalx%D~hkjONFrUZn zzh7h3Ami!YsGGTUL9mCD;n<~_Cz#TTfw1rLR(5l)jNb0ilk75HYbE76S>dxG(zq&) z@4;iG)TU`lqR-~md;KzhRgrX8tACDjvN+3r;av>5w{xOHDqoq2P{rB!~~*ew@T`GQgsB#r3Qkq)b>nV23VtH}MMAN)w%WB-7-C6BcEWzj6o9-tlHW2IpTxqJ?yd!C# zq)u=>Zd=f%(Rv3l{3m7A`{Z!N7W>+y;x!AnB&qQV-!Tw#WkYy+LPGt~TEEke9|s?8 zk9E?SAJ$Dshqsx|KHUK1&f)jp?rfKPqdMbDK`pe@OAdKe6c5K9KS{sR&kvPunF_l! zsB74qGx{A>TDey7tv`p(Z09yFEjfo)!`!a;cBLiqnfU`(w~y$r$?1&lzE>zB-5)xW z#`DE2tB@o_HSacqH7N5<%-e4F<1dfk0@;BrR#iF0f4h7H7o8i>ZGG;I_n17-<&u-K7>q}nsnxpl7=f%pW zn)yVThFlisKsuaWB+uYpA&gAH$?B4!n@I_uxuD12^w4#38*W^1O<8x!1GnJ7O3tj2 zevOW9C8&eemC6NGIyy4%UkgM+wdU8;re!)gfrtnGGGC+726!fpTEx`x+~`-SC|DeO z|Hfs#^^>)WV;*WB77}M?WZCnEl?4hyZclYoChANdYYO$HD!^hhd(8Bhy-91Q|7@Sv zU#Yll+LClQNMfX0$FOEx4P%7uZxWqQznQCgNA4lxl8Mw$qI$!QRR=FzbZT;RP;l8W z9ctek(01TnbZ4|?O$2eSwX*W%@8xO#1Ah}8Hu$8_@jmgymd#h#+HV>qc-8)s?vjP^ z%2nLDDem7{j>vq;{0?3%N^XOl0W~9~>f^IKydt|-uQu>d`L#nTQ)4Hi;(GV@iub78 zcw-wUV{QQ@V%javA-QaRBDgbYpH?NWx^7SCN8d^J-t|&3jhHbNk9_?2biLzHRZrZUaiGHI^hTt!jsiVfK-+R&rW*fy@!qf+ zY?#KDHBF+Xb@s!=0f49e9^#xBBC^sT{aDSxOhE0Xd+@-^?B5|LreZGrErYO%#Vo)x z-_G`3nLXD(V$ARUtpMEwaqvMT!CBXDErJG00S67`p->g}Df+g-1VBlrnHK*~6k{NDwkAE;~^nCVlBd zsC<|f=_knJMR`pQ>SqV2j3tcmEVvXq$Yg=6V@R6?QiR%-`Ljv$O$`#0b3sPHQd z@>iMY_P3N7l!!qHEDDtpg?p8%V{-JO*h}$zrq9bRx6kzZcPCl;(Ip^0Pp&Ys-e;7P z>KOU->RW-ppO?(XqE&m5biSW#Y8^dSvCd|(yhp21a5kjj_gqp?PoJ&Jic-Y*qQOQ?dNOnV|z%SWP z%tEVH>t&)Q-@gUE+HeQ!odN($<7}{eNP&hxC*=5|SU?MTD0!h;`< zKV=Ka?uB}`U5C2^U~(LZ>RD-F;h&tQCLg)G`U)VnQYU2EwsS{S2 zLnuRg(0JNFN(jLfB1NI`Y3oIsT+CIbDnb*eU|K83Rd;9ZE7+F~Ya*;3!+cUASrB$x zb#{rhyEPS-w}-0UzrWe*+NPH~xkD4Pr-yDQw99*r71oS!8$CIPMjpUb#XwL7Lffwg zJxy(RzFKs?x9~IDIW`T?AaDq#SLYw}7aV(CpLY1}gy%%7&dHD0F|Ug5zpMqhd?0Y) z*JEAUaYewwu^~ezdTwxg>Hk#wZl8eis{sZ!<-E=&V^;B|kq}$-#xL*|{xDN`c&5wY z^$pb4J|_OPwL{lurrzcx+m7{z@88SPbi!6X7kQi9^bJBfRYDqrG?AMGORw9zNP%rf z$a(Y%f8rGC9HMI1KOFx zwZq(>KGb$lB0VVNM`8aN^4O@GF22O*=ywEpNbz}YUDTK9!{9m67Qh2n1Sd}-JSVfH zpv|WH&0wO$=6QRI&*Q~pL3H$|Yvkn|z}++Qi{NR3|7s2z0%_Kk>E*MH z?GRK?@H#a}EC_bgzBn@!KQIP_{u4-8hyNvcw`0^8xcZs6eKq%u$5%V;U!48==47*f zPNr-#a)dV@8G=RLD?eOK43<{Nw$|S|Dm2{s^Mf31zcV#G4Q=HZABMUT_$g8O5&aTq zSch!qH+ekt?pem`Z8Pga;_Y;>52iPbd3-sI)gsBsW0f1^t}t{{59OYTdup6B)t@`j ztHj#=R&<6H8K0Fh_zhO+QH259Z0S^wx92wA)$?C0Au-l1oZ7N>_nNfaE{s>{nN9F< z8L?e7Q_DAfnNf4!ZKQkiF^i3B=c#=?JH&hTVuHjpx{F~-){3XJzdYl=`7_+spyshv zwohy#uLmBwaamK-q&jy+BxtqF>4)=_9b6alurL(u+pleQX)~IEgUl(nzGB0*yBN$i z;ELC?hil!tfBd2xJf1qxE%V-y93nwkCQs(C2zNch8#tBx(Hl}YcrO{MLdWg^C@e!b z=Li|y`!c4hlSXX?O61k3;)$==UDSdAWPfR>pPW&Q)+2 zU{*x>Oa6o z0;Q9kI!qQ2pHH_>bjL11xO;fUbtJ^O{QEp!sqJs1QaklBtaS(GV}e1?Lj~?VEyD+I zAK%UcwV|3PVvzEBGH0?bhr$Gt<+&j=7)aQ@a(BG@ZeJ5x1rfHC`*toVR)MWfB^GL; zg2j>7v%+2Tcq_8JX4sTvOMurD`r=pgW=XkS_}3MNi!*de$MSgpuj<}Btmpju|9@eI z83r?Bo3Uj~#-1gzR(8q|p;RhMLK~q%>zH9?Y$Z#QWXX~wN(-rsVY0N4_C=W@T9Hbn z`rXcpd4JxY@8|Qret-O~>-W2UUDq(L)a&(pKAw-qIrnov=iHBucT`{Z8|x~>?9{hh z@3TDRyXj%ov$ax}edwgh=}7pue0S{sNj0zCow)@z$9miQ-&mDY3?y>hjQi7#X zPwt1{joTK5Ha+dUwW7289Pi$B56mq~XaN~3`%3Z3NzTdeHx0CCon3f346m@*z_3I; z(>&mKyS~wPtkX1W7JCOadbG-}ejM@Py(Sz#KDhg$@PxkE ztFkVC(NJpmc+I<48)qxgzLw};UCtH@mN>q?c+5pJ{|lV#TU2qo4fMZyrz|TU2!Eo! zqgGws7yOrWbP}}Qo?{xeKfm@zmvY?hJ6{hZ=3HA3Awh=VexK8(7tWubLwU!wRf?;h zVPl=~&|AQLEl0#y4Ag7k8+8;ukw>&(ifF+UBkf?Iq>k z79B!-WvWf+N0uaV@hBb4p*lqweGYc|uPoW4{2vtFQfT7$^i1x0cac85v$8V2aC^t+ z$7Yo?8`o;LW^(yUNFw z69iwZ*SXhW1b+`t(v3;8_SD>1aBKo=qqqtlm3LI+F1)hzN5al)tFF#7#_E+f(kn0UdshGap3bV1qnH8cSlj3Fdi{lvPI{roW8a)> zYz}<;`IOy@OpSts0kHX4DCJRceyL63))tNL(q+{aojmEK65w{m^oCr+TJ?{lQ)IZ% zYfFZHC;%`?as9A9@-uKz)CElGuw+>G$Bg-e12<4|;N@{2pNrz^7hKxI)?+oghuPHc zN~?65;=K!{PoLTs>FVKC0cYNFIm$2PhI{yRw*!B5-Bg;Q@}q-80y33O?RPkB%AYWu zQ0ze;0Vwj-?{}WZ)wLfuGxtpNHPL2JnKMWpz$Et%pS=3aRJqKMDI(|c^jC|gr)aNA zRjYVp?U`6#WOD{fsE9Pm4`?~|E6o+}Yf#X!dUWbWjMWm!#R^H>=-JDs`J8|4N$d9v zwyb{7xyq#D=0T5PcT2Bd-<-3x9X6#N zGS^)lk-B1&mCc~y!=J}T#th+O2Q2)5oQ}OeSHg3J@&5Zf83Q^}+OF2W&}Uh9TCn@l z#}ZC8MfH4B(;<^K&xxW>t_i6irAy9=D06fhz;9M$c0!M)P4dK{u7u5Fk-By?o ztcMTBWMn<`x`S|%{3l__W6KokweL7?#rP^J@FNL5(kbVc+uJA>I7WW*ljGk(L5r#%c6RgHfegX3TRYad)a9S;{Hgo6l{3tuU!Lu3 zy6R4F=<}0wZIF>KP)`-UFuy!3M*{-ud4S>pB7u!~+ z6vw8%C}?Ob;@rA9zWc?|!eo8ff{*yLc6_myGA~9ND(PTvTu|VcNzoTw8!OHxw3~m# z%M-ab@8Xn9GrxGPH*>rL-;IlLXP;koeQ){V;jp8V>f*`jh1H#3IH49@5v$l9b~moP zv=4at=i@7LeQfuBMoBFkV8x8VT3ySp02=;wts9_o{GE@x*k#39OV>`Cn}5DCJpYf? zDPMgrMFKkBvuTT5LN{Xe`}!cDv1(PUVV>)&!mkHN4s+9z#Rg}A?(1yw{@zsJ^7c_; zLqp~PRiO0h+n#QVO93)h73~8a&OfM((iO(X<-Gxu7WLad-MTI3>Wzuk28-&|b!sOS z?Qvighw9m2&6CD%PM0WbRLvCT#_L!<OzQ+ZU8eggNT z&?~#@?c?ftq#t7c?4;u@YkWBF)h)}A*Z1?C)-+s6umrONm8K0II6vTc&H=qAl8PPl z{Un#jHvcn+^Hm0aXY7d2@O z479ix{9b8w3;#N}`T3srn@Z-#+_KM8NxAcv*_-jX+KoR}SHct}$Ggkd#l5~fQOhjva(^eYjJRG;oKu?W&-$3@Un*R_BmMlzjeRCrHNAVStMg;p z`VKuV7!<@A2G`4*Y?$t65Ot})U7XQOWS8o74=%X1|9fLbuzIH)+pk*xEIDM_p&>fn zapg};{m%ZlU)90v#?k>tH4dpZHQXC`lxnQ7gI(|0r!BwxWqsRE))sWujxPB;)KJf@ zRjbU}Ke?B}3oCDyI3y=-F1$H4;$=;wi_5zd*O0A~?-{>+;$c(sOZuMoXZp#7^ebEa z_H(s0`}#V#&)?1P(>__~$@<5Bj-_losjYH7K>xi?Gi$c&6aR4DoK3OBviWm$9S z;E_8nEPk{i@AYla-L@SUhwG_yvrZcn><2FQu-Vlr;AY4E>dpEi2Y)v;e)zIMI+f;~ zbW?*go!^$73%H)}+3&lq&k9gdYIehXUw`7Wv!yxex#Np|b9Ej)Ss$Oye>KMDr)GAT z8DVwEH;FrdBbZvz+_WUdz2!kpWJNob^Zd80RvMX?6@2sR^Spkei$;9fWnb?{H|G1N z4r_6}_jOa8#qX*?NsU!Q>B{_Zt>iNlg;s+$HC9}&YOIRbRy?N(8+*@=i;FYLCoAF6dvB``h)wyU9%aV65>BJ$aKX6#jq+^Fun6ov!aVte z1YTb(ze%o-ubTs2bv<(N&o0a5g+JLiK@{h%9 zzj*h_rO`aTvR6La<;O?=Jn-53MazdQ4(d0-DF0|Ly%}46IqaVD@x3~^tEklPAHmOg zO_zn(HTihgJI8k|x^_SPPCFz-YrhK(MrPl?^b=BCE*>+9r) zJs!-66b$Ea%+p3`P7I&#?D=Xd!_gUUxQv{|E+!p1K;&OQy=xoF3No09{*?7b`7`AA zZw~OPkiYwte6O4hdh<}s$+j+JA%f9uI@5E^+7mK!mIeZu2uZWepgXs3pGR8sKmYoQ zwk|*X@Ixu7gZ-*|`%E%mh-2k-(on|qhG!y>7|I~q3BgA>a140;B#g>3o}7z?a$L!r zfL@k2OWr(tAY*wC4PG>hQD?g0%~X-@S7pRcmGL-%I*JXxcd4@Du~qN?aPh8oM_L6i zall8F;n$6)$`<)B;Lbc;1IwjQT>2tgX*D)#efY4zl#c3!n<%gI4n*|EbhB&e8%^ih zW&i1?JFJ~T?B0QrQ)M!KPiNQe-7&>im>|FL(KFcULyXU5cGHn$14}LS z$G@+dJ*K3>%=_3yyX1~{0|V8Mrk(C&sNLbV%B1r{mRz`NAKmbBX2*NFX`UBnb-Xv; zYL3CFofZ-02GeP0IKy0r)eh>TXIffL8r<%A%#oso=z(W_cC#36H7Db~-B{Jr*_AuLeYJJ?_S${M6HX?0FR<&z{~B&VOh0 zb5HM@Fq4k=4h{8b^jRG}Ia{M(=Ap#WAM(8P%dGyG{ApP9fN^JAFI-laIHl?CL@t4g z@BQ&gvO{)yRa{!;nB=%{z+6j*E_QQ*#*RI);G}%o&|TI6gFn1~#Ca9ppBwf&@M>sG zy4N1d7`DwChS{kO{_eY{r^?Vm8m`m6Q>VD%E2x?t+}iO^hv5#h2DhXrqV>}{E54oD z@sFBe>f!#kUAI4&Zu+U$S7~2<_~tH~UoTFX_pfhEwG{KWZ0G>pWA3+-<6Ilnu4_4L zUEEdgq(n;|<-0x{xd|#CHdty23uknCR!5ywMtyC3R1@?7T{yR-9AzjQJ^I5Qzgz!t z^Jc|7*B1He06lzZ{V@^ya0-ySwY?qtdat z;q5x7e0r$6F$4^~`}4Hx8KwWPr+Z4nyVVYDE}fxw+z5e*y>9Pc&!%N~{@$KrQ)%4e zv$Y408M9avc1nMcdsL|X^xyCC(y>*Rhtu~)A4XsBG-8tL(1>WDd3Oovnuv9eXNkpO z@a^IBqz_l$_P<|!^yqPU9uiId+n2|&zXk?VQ}o}yJmM4n$Isd9`*-)D{PFDGZKP1< z-@n2pwEeH*`w#zMu?ffU?>__Tdj4;A_`^SWCE*0_e!}g|Q+~tZ%l~FKF0FAh%2)I| zmlL*ncjWqlOGNNv*FNkQ?ANAWyZV|;^9V+d)5-8-$DU&zie^yyyw+aRZ~ygIDFe{b z5b;_PyKrcc=RRX^IdR?9`f z8+j@YezW9AnbGX14ZL2s}lD0d5rY;TZTvs($Fwiv0}x<4TT=>o0`TlO->W- z52Gw&#oev!($w8H?Fw~q5QU{5uhV1b%(&@}w>kBC~nt?C;NPuUE!(cfa>s zOTxS~?^-zW&-0l;U681ucoC+Jf}Wt6s2VKn@^@1}AxWTfcPvDRl9l6$QT zH^roLO~y~Zq)s6nA)5Z?=H{`8d-NP@bx=7ajf`Fglg|BuZJ*pQdi3bgkY&-AjU#Pd zM%ba2SNiBQ+S4z-bEwE!^3_*g?eg@@BnNtmF@tM!BW)=ZyMO9%n<=$8OD{9d;kQ#G z+ju0-{O6V2)E#cBrIDKOv728k2r&wu_0vy3U0`rD0ym%M)6L1y^@FN=`)V3<7mP^^ z&b-@1$Gi+fXebX)NkcL9V#8;Ou-|;wy?A)Cr@G}H=1mrl-^b`WSyx0K7J6@fp7^aR zql{>PkJCLgk>)&($l3sHZEYD1%{(#h7*qQA87(+2>YHJ^G*i$4GS^Bn3SW@aLyJBLyLLpBAogioOx@e+wo zEK*Y-aK{??Q|~}7BKoICiHw!)ufsVyzkU058A#{UN0Im0$NkzbmjCe*8!NlF-}iT& z?!Nry)$e7-ysuW%+eHqgX*yb3TJnGbJNz^v;_`eyjd{OzR>?q4wFdc<3#hiptXKMq zPa?vzh<;!{)obc)7uDC->o7n3^t18%_&56yt)3pi!&p2vat2i}r+@qHvzfDISv|S2 z)w|d+L-pm|{ogkrt(6|A(?wtqEtYAdP(!HlH)Py+Pt9#tRR<$LXr}I@@1A^Mpyy#V z(+jU&y&5yN?UdvQ_ZGK#zN2S?d1PEOr6#BU{IzpT*`mgNR5SRJX+MLg>kFQ}AyTq3fl%Q9~#mD~2cqjuiDb7!!cT5`qD@7_I(ZMVVopF_N-Uv8@@&;RwR zf5Fpjw_27&tG&0sdLyA||K?_UlrNjF)AM*i#qHfx@2i-$(5BuAkLb|4>ejUF&@dx? zn#mmMQhY`P(C2O@fYNTw>y0p{&MHM^IKw7oSS>ps^nOX3 z4Gw*@T$VCDu(B#>tOX5o+erF5FgADQj2VM|_+b{{^w8$G0`HzZJ0qLQ#B#Y>@4R!J zt}@HqJ1_e$_rpCq&EDJd3`1M<=>hO%|n{!&!{Sp5t!@iNVILa7y<0!k3ZbJ>`OlpA zW8(w+-ZIRZLESxUAD}f5!4MT0_>0|kY`LuPa+SM7;CKl>c`+_SoGRM3X&nFjo(u4@ zV$~6beTb&OL3-wGWfXSZuwybtKCx?$9_NaSE!e8}ie3*wO_UMZ2F#t7iIlf*-yTyj zhmqP$5m|x+7SOsh$7PG++geI1f{?P5(ZWpEW%ljBSh|IE=giqqHl1X)vr__HlWU(fqSLodv%u8z(lw4Mg&u%ne|TE|=F`%)ugwz;-|+NQU(Ta@ztOr5`W zsiCB;M=*GTL+Pj(y&YiT`|CficDel8YhSAg4-v~;!N}?eT7J`!Z?#6R#e-Edp-iuW z6I$uWmf7U)R(Y)?(Z2U2#$CoF7QH^=n~j~YCC-eLom$P4e$qSP3V?W8tbhMo42fHh7~ENhswesUu8G~XXCvyE zaUaKeB>esN-+8Np>RgzTJ8amnL_lTnrQQ4XMIx8(S2VsBvmxdsMlE6=vR>J(+q9_w z5{*+24-5=Ue1BeXX*(*kCQJdTKnW)T9jsiQK?tY#c~Q|SWC2I2SH*4j&8Dd;$-Z|l z>R^r*A;y1?%K&rcFbgvO8D{Y6r=NUsgiphYoXM_23(o+kX`LGQdv0BQ_10GX26Cze zOL+Xx<2~!PhhFjcfqKWUo;cCZI_GN8%-OSZY4S@-v|+f*^YQyQeJR$BY5TH}&RDar zC<~Lm+OlMlOOSsY0izLeufs-^5c`Ds1! zZ{ytE-AzEGOIq?2qZ3-1>$ErZPQXK~Vz*anHWeSxJ6bdz?4%W1(WOh5q{NHJP8nQX zyRebpO)7U8&hFGu59fYO_V=GD1MW077SCZlF4AZQePU}(m0Y-Sgw?e_M%B3RyAePk zV+3rH_KZ00ajc()Mn=xnb*rd*ia@^l+IJ%c556~T+O&&QE3T`3y=8KD6_xdriC)2N zZ*dLsH|{Lcz4sL@We`h6JOkck1o{xeOEze;p5O>&vjhM{InENqGvqq8 zezw}&9(|vh(c~;ZiS11uA))9J-sCvtnwk>)dTQ1lkqDe&-obb!8J7+IE7LkVVaevr zSIyj;9Ac3!xnLH1J!Ud4{eqjD!e>N8r}vKtpaPFK!3)9q#_A@78#3|a&Avf+p+oWi-p~WEnQ&w zLxR1k4i4d@3@t{e=TA*NeSQ6-)ot!hbay*5_xR+kR~;*i89i-ITh-zzQ#@2f;CbSv z=X^5Za!?W}JHot#ouTxUC$ZD^Q>>Xoq_e)6oC~nq^ZsT)^_XpEI zD%J&IOI3gOibJ8Hp#r)YAP|J0iY!H7^;n|4sm!G{d}ge;nLZ`5&4z3L7#2gG$jxVX zxVwYi^x1Hn5MD8EUv%{NPusOKf276MNw11;xrBOj@RXlgdk1=W$e`qP?4Aq85f_Fm zK7ZoKk#7WhS~0O4IMiRqu4GtE!NP?L3EvOmsY@d!V?&b`;|lyIZ`FsTS$JjTovvSh zeTsFVL*0K7Pc*i+tROyqSNGu;(8dt}lM}T*Ktg898;PGRplZrt5SA|n`^k>Yt zz1lXQ;m$+K4!eE%<%#D>enUr&T##BnVt?z_t(8_75qq%qL&CS9y}ibjQct>oqj{sX zS!p~f%tI6G#Xg5VORayHi3;;ro2&)cL_u!MN>|n4G3zg1exi4ReVqg>{PRd`i7SD` zZ-OWa+#2Tz-sF*|F&ep>+OqLoF7@^RoLMn={DMJ9&g!ZI#?y&d&jPjQ@Qe=v7UJXM zS(89;ofYWxIy$bK0kxo=A2DGIIa)1mj-c#*0M+tP{Fn?MPp!{cVcEW8$8?A-38DNa zY?|c_BGig{a&XYXY2JAwt`MUHqmGEEOW#y>+f`V1+w8EJa(UoV2gF&cBXlK}i0*S^)3)oA>!XmOCIjvG)sgy$cL zc4KIbM6OuarI=|k&4-5?s&(kpDUykF-z61gE*hY!nuVf|wcwhxX1eS0Pn|kd&R~(s zFxD^=Tg@1gBub~X7}#)(V~(jaKsdsLebLRo%@pIHx((>VbV zKASxP!^H`)#p8#e^dD2!Sw+i6PqXGAd(vpej9-|;nZo;DzC2QX)qlMG-=`{m-@7+L z|BSAvbTmiCy33k(=+GgYm?6*g&M&?qKZ6}#3Re}vQGQ-kwI0xW7)^9h+^Yba`ov99 zsI5b@5bHl--=;^6rAU-H@;0G{;eoX}`sRbl&?=YMoO zu4}@+RcydWKuhy#6V9_G@2;^XxwraU6LmF%=}0+@t*)*PhdaFX0F1PXSgQLs->jYI zrx79VP&A$q+*-l(OS_8LtMgVx;VubO+KE-=e?EjQ^2)LwkMd{3>qQqZ;NF3scIYlK ztG&H_rHxACpG7$g4LK{PiwRVfquKxVMYCkQyo4jfjm3TSycrr{#I??s-H({Z*llmu z*1UW?8@Kxq<=nGalQX`bADQH2x8%ZNo!75l6ODKp_G(E~f++;{mIAgGpsc<)aL!l6 zKl8n>+U9ELl$dvZjih8SzQy?R@}B^{5vw0`&qrzM+Z)>yy%QE&c8 zFd_-?{FVu1Wh`qZ+M{_*s_PI{H;#9UHQV-bK(|2)ZiY-EbdqqFI@loKViW4f8Hj?_ z&;@+Wb4C0SFRw2_F}jce_9~B>OKNpX%4Rb}9vQTg=mjihS+1jnm6xrFyEv3kv=AHo z{{8#6w3be18md0yz2_erf0BWPrUbFR8$SK1tl*5z=AkEUtl$f1+YS^=Ej@dpXC*@bKoWK&~IIizqj|wzMcu~yA81*Z2WOiSz;opgf_?} zo!4x5aaxuW*}5YV!_~4ln5sBH@tRckR;>zKzR%rX;jyQ`^3Q(!sj%o2)+f8u-9aDz zeAB1ClhMzu0-}cn&8*;;qqy~@78ZfZJ7)|AxV7Tg8QtMKkxsk9zh&X|Fx}3`8Q#b^ z$Hn?btbKHgvt`4UsO9>x_A)?gcjOvQi(9k<@?(efzEq@Eo* zc8tJ#-tEL5B@uEBqUkTwcpQ-2%8s2o$+|o%XZuC5dCnE>YBi^$ZV(Z4$37F&01{SW zKFQb)R64w~kPi7{;);FZdNL;_&Zi?M3es+D_v&LM!FF{d;sAt7Pbo^MtNti`Ek z)m9a_KdMOZR9XLN`>vM>Mvr9gNX{Zq3L!;Q3zucKIi8@-1SQA{W{iYUA(cVL)7oAd zHY3BUpKb)u;~&TJ{EtEcB*nddGdFa^6~(f+-8J1naLaL^;V9M^VQ`1_>vt10wOsrC ztDk*MKU-s2^0O%zzJmQH(V4NIhV6}gehpQDfvV0#*TYASd?)}Mn;g-9=B_M)x{AKl zDYENn|8aEJ3;7Ou;|B3yOayVqNhaYKFo}rnWlvk=_BEVqZRXCI!$tlSje?b&3+wE# zGcb>-m}0-5hJL2#A%^43T}O*a5`;no8scPb9{R9P`eR1CTt^mmxpe#X3=>$kSkhny z=jQj3F_L-DNXR&rUxqaZHjRlCzZD5flZ%03O3qD~D1=N_nHACpIZ+`W0Lx_b5N z*Junm51aRTUQ-jhQ&k;B}P zU(+udJ@(IEW7)k+msLX8%e{c*c=kh$!U8eQ##qoBIbk5$F@4Mw?js`HL=ZLO>bga8 z{+M^gu}DtCwPTtv-X%6T%EMk>%RzK9cP&Yvk>!&<7ujwm0C*edq*^?dpQoqiJm!Tb zU0dWCaPXi79T=Cf>>La9U7Az!pL?3C`TA<2Bip@i-^z@TG-o&7QJi{|n(DxTChU7g zStXN=uzi*SNucIn>2D`-q z^-dVY&rV|6?#w7zP^h^anp0K~1CI&s;Ogs6dz2hg<~sPtH1Pq-J&9+`zkhgmSzqml z&;t!d9K76sRv*m>5Ysfelf|UMH(nKl8JGHsB>`^c?_2P%jt zE*BOSj!8i1%%j_2{oA;w*u$Rex?<~&KPVLAe$$yTbuqDk6*OA}bgjwSwdXaN$k@YG zb(>Yk3ESSDhy2R{t-?oTk&}vz)Cu_xEzVJFff12>#PCt09zEGsHk_>_5v)@@XH7OH zTUr)i?7Gz30*4si6PKIa#!IW{uf3aETsnbmVW^GB+EyT&8IDiBmMa_y^m+wjEXRg- zX!?ol%Thczoor^+6YcP6hty1Snp&4J0F95gL7K9M?d;OmjN4fbcf~S{warNjUVp%N z4Pmu4qEy;K1Yz5rspVeVbPnPp3DZ;6ZT#uer-ws-a91#tM!b(LlRF}j=ljBRlH#4Z z(xrU%&z}kl<<7YK;TwdXUh3~YANPwyGsHVXIJQLP7M9dScwgQ4Ot*YcD3pzTExO2SOoh~jeZwp$Q4Y|}M zJkJ3-l6*Qfb2kK-7H<}ld#106cSfjrD3U64HOz@GiLMP-G5LdZw((?c>?4MmDM=`JN~he<3*YpHswV+6S{*~TRESIKIdLxa zn?XiP&M%nC=Pbc@93t2dREa~|==rp)EEvEw7%GCz@Ca3p&cs?&qjWTm-f&V7z?|@Q5l<9qEjH)KwopCDae&BUAy6TtJN6#Gfbca0 z?fGEZ4W;%kBqg$7A+y?Ca9E?;D?6;9BDk{!U@FDIk&B92E z-$~JGOw3nv(3sDHhv1tk-ylf|cd6l2xlBELx0aS945I$Nv)jbR24ClX&;>Ih5fFtY zMG+NZj)CJ5uU>5+s8z?12QLXe?8=82ZVpidMyyHc>x`s&2<01M!#T_b1ZVOy!;Qjj zTjw$kVkrX`8hiR`&yCHDvdLaa*5?Xzv;dBBh9i#Vnqyv`^|L{R-mDS0nX2ofW=mT% z*dg@jw;0WCBh2#HHBCOY$yg-;R+?4tGQ-D=NfSWYLS%L$vf#$naQgm~u$+L(;M9!I zdtop${<@ z{FDih4OD0;#HbiajA7m2Pl9TmldruB7h+5l=j%%ZbAMOY!7G`M!K+C{?>s}#zGE!K zY6zdo#GB&I0isEnnVA5)lCg6qV0I-=hrCy=JL^p%A(7bzr{gc~Q2Z+?`eC1ZNR57_ zOq9y@X!z43r7Vqof05b(0T;a{Z@HXwWYnKR?sCYZNV9}`jpykK;2~(vA|zcwh<%wr zcbn~jhC#*V?oH_u2yuM|RJh0vgZsHDsDPi3jsbX-4afWKD56=10Dl|_uQv?L~sgy%3KDd21R z;)}}Ms1&=H3Jt%B4i^mDjdJh<&E`PAtG~S8>NkiS7FA}FTT1>x@IhR2gLN;s&w7D>yq`l4n?5QzMyk5KLjB#f0N>F24y`xXlQ@i|!sTrJG|S?<`wx>a z<2%P=Y}mMRA|)Mw6)_@a8Lp~&Tln(%CRQ6JlaI%zJR}jlQV^qh_%FYFB1SH$pr8P( zW_R!2Jx3IFm%`4GGJ6p0_Kr(}nz6a*(WWJcP@HCM;nLTFh6^1f@#b@lIGu z(;IgA6Jzirw$RRzcm&z$6u{h)HIT;&BYi#BjRVsC%P-kx5`uy;a?wX#Z)wS0TqR4A z%aB7GNwds5HvSPakk!R`Vdk!f?_3nWOz3)&&ktS4O!lxY0lZblzH^f8{J?)wl(+;c zJn~$@6{{K7=OgjX%9SgH5|%g)(@h9dQ5jE=C`lbS$Aq$I7`lwpvpYW^-|y=mx(;6+ zNj@tZ8s3&UBJMW8*IGD23BQCH$R9WMour*3f$a|}^;+Fb|BnSMZ7#Wg@h-8geuoSU zzAe4*G8-ZEna>;8uN>6gYIk2N6DWxEOel@X@ zR8a`givdcoPKT3d?q5kx5Flk`Y5Du_zaKtwWUZwoXeS)3oDH-rXT($_a|z&qex^P& zi*F?++7#aWiESzNEI34QX<4aeHe ztPANVyFQGhNWgtXH8D9vQjtcu>|DKi$~5?{GYPM}l#UADz0jGOlCZ+KDn@t4!fq2r zv-_Jq9&=xR6(-4*qVY$ufaC^35R@4_apJ@zM}|&4#4h9)EYjD{3n)qQI|m7OFC|6K zySQjmvvzRNcuAWvv9D!t|Nhf3nvW^RlR|-KNq(961(}(KlAL3IgqBH8YQ^^akP|}QgTJ)!%pF6uRl-?!T?Aulc3<7_J#KY zGxiZY1aZf7hwZ?Eo#ULpU;{cWObUdW^l~CgY+w{&!B?FIe3FI73ri>>^9qPjc6?xYdkrmpnO3{UJgsxNv zSkJ`SFM<3aeR3`0UO|Dm6cO}wc{P_GJq12^P17R5HLf#G??;W~$}C?jw3W4IQX z1!C)moCj6^P!&#ucOYkE9Yt+J7M$?N+?x0!7it{4EyC{l%O4Odnns#x5tJuGVg|mv z)ubYZ<`@8nBA{zucj*%0+UV%(n;XBB1ObWC;-aGOC4(xyfx_XxkH@pEBm`smimR$d z=nzyKJMPx7F8%z%=@^z){lbN5kX>=t@#Y~Vi@D5I3FG_9pYT{t#gqC?zf|+t<6e)> z^gVP8FDP{a;vR8K@Hgp+H)e+tujUHO!iQYQT-zYfaQX7(M`x?HJOXnm*>Rv?7D>f* zj3o#oo0kP3sxa~YE37eDO0aCzd)kQS2q zk;z*&9JQ5+ctYhv5$1pyDjl<=^ab{L-s6Qg7B^ln47HO`fTQT=Tc^^=J?)&H3gEbI zo5g#o-c||bq*NrB`bUtOBq3oQ--P8V3PZs!ncM0rZCN$RCrSazJc?dS1Ga_E_Z@wQ zlvcG*pIh1!p@Kfv(h%zqoi8v^S+XsT|e9sL|USPmy<^UR7e3KoU$%C8Ew zx(jb5nH=JVyw$?_N+JoHBnVLs5_hgaN#`UWP`8LS3N>Zc@?$dUgCUpFON*}cq%T+9 zT=T=6;f~}nBzJ-G_HNKexHrzN3tjAc%vQv%jWM;ef?1_&RbRXt{{_}1MQJio~r(}W?xUZjx1c6cw4HxM&-u2Iv=fowX0yqil;qF z<%F2`Bn=iul3dxyzzWWA!$?<|BiA+Pt4%ac+M z>lUzej=&|$lK3whIBC|BFc+f4%>S}>^LuD46OO<|;E(|}T7VS3;LN&&JAug~^Poj2 zCe0*0P^cz{2aaVD7`xBpEe|1$?Ldt&z~wDFo-8IW5e3PgC(DJei%uNcwqH7cFfX8G zi>I7R_*v_DN5@=|piS_(JY!-DiQRm+*7d+ouiVkTTNuksYPD$?MR4xf?R4hMxr|vS zOOtV7$xw++rvBX4)WDeq%ze%*_am$lZ+b5=kz8y!}7I%}Nhf~aw=!29>1}nci z`7q6CA)vxnr<%S|d?5MebpA*MLkQnYpQ_|mlz%F@l~$Qwx6umq0CB*6KcSR!m1G}L zPv9Z}o z$NGJF^H6KZ_E+hW_Eg1~xYZCLqia>mPnj>1$YkBM6v5}j4E#nb zzM4v8zmy299Hey>Ct*Ymjylfw&w5xY)B;#D$m?82Vy<8>MNl#=^$s>*2T8p+#Yj*_ ziO4X{B`&yRNgD|vFpi6kt`f=`PiA4+!>n8N1KKfcodx-GbcI47DyV$(AmlsDAAyZB zv!EW1DK=uqTqNqq;1mhAB&JNEC}`881@8>Q+qY8I!Y}(=$@G3<5i?h%i{esO$sdVt z_DGx-7xQ*ht7WEIx5U@^O8G3iaEN)FOHXaL8>JZ`!B7-)y(J?rwenanr0`=tR)yUM z!a*At02RgE>FDd~?h<;JXx|#v`+`Z-CDkA{Z4R^~faH6f(ds(hQqGS#dpvJkzN*ed zpfjn;K>U2^h}?>@G9=EoOcvUap#IW`xOdZpQx)VFX&9tGQ;-hK%lX!ywqJYux39Lb zQ62m!J7^h1Y9{FXy*dmlB#?(3ujY)EXAQB&baQ6?Vkck{Us7|ih-#Lg0r?t zzi@~urGv+g4b!>KMwE(m@Y{o;*Dv6$ckS7ehSQV&F=;T6{ApZMt%+0_a9yXkKDC!F zvG%9ZF`YBxC3~umU`n9_^9m=ukm9ZmqD9%|-v@yp1?->}J^W;;7T7@c0#{oJ` zf@-wQi(Ys}_mDmY|5-^%N#FIXzExiI6%d^2y{0+>*$-6^i{V{Z`?;SNYgn)`T9ihsyf*E#netnk>G@V zr$vphfeM1L#Oxe0jij3TRkoih8$+UdE1@5OIwhXL0EEJgN_aSHf7M;5DUq8(W*@OM zB;1dH4a*^gw_e5lz5UUzUf}tJ^AWmp*u;rBQf~n5Zg6tir&0(y$$6kdt$FF3`n0ba zm=5ngT<`c~4GMWepHZ3=2}6rRX-rHk2gGX-$(6 zHI|fTMyqc|!qJWpJ`%haGd7SwO88KxwkuYyJmu@FjLN$i_lih zTkR~x%t`?iPdtZ+rW|m6MZ#9Ghdd+FRduD1Ss}3Wb_J3mHJ}jGx$viU&0!&pbyZPP z*vj@O=j>=uoilXtc`qtL|DuSlDx!i1EN&JlL(lA0o|=}DNtP=k722qKbTJ)nzQ0R_ zj0BdF+g_-xu%L^K>W42NcwSD8JH1npo!?Qr@G*o6Fw9oUODH#D4g0cZc6)j{W%~Jd zUM{gVTJT#cw7CQ!amcs(X5Ufy-NsVFb0u6%nntQua2`2=r)rXE6<^Fk+_`gStHb0) zxv7{^Axkr@2b;OouNGQ_pNuoOm1skf$Zo$Hu0RsDW?P?i3r%>?=5BPFb3DgVGN=3Z z?-$VK)WqQ(&h$TJb$11M@Og4In5BfoWcPbC*)%qCt%>E+vIKTuwEgfL6p>P!opEK= zZ1w8qze^>bfKhSQ?Ea+O<<}7qxwzt@jDwWRN9~;a_Th9h2P-KJi_)1MiY(`RD7oRt<=1CvQwzzf5tTtwbn8!bnS{iZ! zg7L|KQJQuU!cGGN80zfAcu_f$QS)ynB;@vS%3FaQ2`8DF%a({uUSlcsHNGGc!}Cpy zh?LB(5Gi0E>j0@8ky=4{WHFW3A8=cmjhuG%ZP|*#QyByPG)zd9JJw|WrZ~w9 zQAuM-X~;$&DV!oPGS10T;Ss!sXi`~Hj$@IOlcb29@}*3kcZ;N~SUqNe2^BU3A}d|IZjYr**W8&$jkI`DRP8lpsoQ(1%Pz_JPur#%7SO zBhzKG=v4E5?xqF%=YDsPfDpIc=pGmj{cN!k*_YS4bb;t#dnQ3O3M_7*q1hnLQiD{OJ-kX znPhIOcj{8r6m2%L@Nr@%6|M5-$&^(~!UNO+*PlJv+^zl$TU0VqzNUkALjOnz-r=*) zE;ILMOP_HOMGmBsmXdYN%hV36jM?yXnxu1tbjKMI5p`(aUdraAa7&Zf%8{!QI)!Ra zkz2-MUVkyPw_5G5UwH(Ns4-JPdIAjjK)^kitCqa7Uv|CoCaUNlhAl{knl&Eh`OEK8 z^H>QTe)w9b^($73+An3*-p`%2B(*5TJxPhfzWeSE3G_KpLsFu~ z`X|cBilXC0+a4Z?PC@6s48+oU$wnfUDUO&dZNcUbMN4g1> z47xk?tjERa|Gb(N*JeZP>#w?Y@BV{8u?rwd7~-o=Rc1Qg9B)vWHPBp2pPlj_z*@K`^a*f}EdFK*mEez5>Ls6ls4lpPckb%UoBFa=TIsn3x zyWOXveMz$40(v!UvQ7}2DDklBI>=b*l_Yn55_q$aoo&do#G{-fo}sVN$U9HFs_FfE z0o5Ee6EOV}F#T1sM4HIWnw6+vw*x!49!aYfeFTnx(0QuTatgT=I|d30!g{In^r3Gp zZE8^(w1Qkq=tE9eI`pV62-W&5;*%+k)BbtN|JyTje;?&l@s(iNpmG02Xl30m3d#D z-+5@(p45Xwl$|H~ajsZI_+PhX*BCEX`u^5YQ!1quwDm5bLbr)iB0ZPj3_lL9l zPfM`7oL$YQp9i>q{nb||p*#oicpxn+-%w;BPn9a^10cH}bSlgJ;Gsy|$AouSG&@=E z$jGUf@W;fQ7e7|KNOvg#dfvCwFonV-69ML(GNT8N#?Xw8eu}Voj@!^7Lv%&5Srt0V zh&<7*eftipQiiZhT4rc8%lDlY6w!a!upeP302c>UwNY7H0u%FpKA( zNuUxz4n?O&=?)T6RI5!FlyZ_!eL?k*ll5Fa>g}B#=4;73dIypn6cTish>P6VVlSyY zO?S3mZ0qH?jR?F*(s50bub+N7C3zNW4R@{hu-3Bj%2)rj*1daQB)Kt^wTAoIpmGNv zVW*T8pwL=kK60cGUU#9JJMO?IdvA_=97XO=*xn1|z$p(J%YZ9Var227(y`YO+iI)` z!YfbNzIy$7A-#T=o+Y&PnjfUTR_ay(o+fynLz~@xxQs!TqXb@1#1v?d;-yH#Y;K-=7PC$b$#nd`JUiz+u@MviNsq0Z{vm?~%>flI!sdq;Z)!a!KHjDEHD#BU;kB8#DHw>25>iN%VZS<;A6mMp=`on9Sk?SIfBAKqnBefhw zYXaY%6E~liwZQb2VSTodvrIoZy`5z5HD|H!g-;Meg_l(SzVfHCEC7UfbxCegw-80I zMR&lpE=Ec^HjukWUJC9B?7S|H^>V?(Q> z)VC=t+}&%sZiju~f&D1xjtAiYofGr)GT@oSnWOo)t1KG`C8RD8OGO~F$vDpC8dW6c z#hVLv>Z}FcktYQnI2_o)d~=(ddug~(QeN--h(ZN_d|a#INs0;nN*$29b|e}CNj{VX zojWegj@^3o3WiZSPvx6;AP48=X-}eEnR8XV@LE;38?OD-oo$5)}M!oCe!7uDwa`t^q;-n(zQ#M z(}ckJ_6dEqA$64Rab#EXXNqG|EI;18KZaT&xv#QD`X%c%4v`i;@-^L4UZu@XP6nupCh5RnRlff{es8@~qVOT^mEU zD@^Go@pDwiKX?54Ottv}fgk2`T1CgX)}$L+VV2NU2qHVMHAG@i$$LnpJs$puTht7-g*ZGf*_xT^tt`~x#buq3J`;TdBBhs(<@(c4 ztHy0;Z8s7NwSY1mv8rU;wrTDtl!pfF5q$?N*ihOx33?vEuNSqQr6c-DnN{_g%k07srY zUzzQA=4GuyH}fi5mqMYgRDDw3<8zL=5r}nm0y*%vM_HTtPB&{j2L4YC8ZG2w{*SNs zn*UFJh9W3xSKoi7^{GV2fme0+I8IhAdsG0Q}_ieYL~*lE8Jo#5J$aJFU9Vur@&~VJC0a-IMf(mN9urES&QLSVwzI4O&0%C! z`JaA>o6!PPI7_)4&D-%y67`T(8Zu$Rgro)#s7TdZ08yU};3(%AU*8-AL7j`({j)95 z9dAjb4WVIlL>Uh&B$;v8Sh3FLBEZ7D&SUf&? zrg^JTwmDMqk2#O0DFLyP3tMpV)&PoC_{#3{ z-&a$id?f1yp#qXJsHZ$hp}JE&nclPb<`vNzTUDlH!4(0O?dX!TrugE_XARtV~Z!W`gZ}R4?9SbGNg^DFi$X*f>5V?^TDu_ zPuv=9r43oa3c~LJI%MqANf^GR{`x%C_3qbNJ|f_?Y-Iwgrvq}g5}=hy7I|Oq38#?X znc+=Hn@0Pcbc9R3gqk{_`CGcZ+@BT;dz?1Y{AwV;OO+1r@2q1f7sSy3(ID{ly)#N3YgMG8ZUw z4m7fsFu*782}MVM;Z5c`qmWi2oS7qiK)n;{A4p&6ARQ44qR(OLy=u9LDf8h^`1^yR z;m2ip@Q`XRkpByC0rkg^^H%$urgK~RA*d=Or%^*j+5|o@zI+q1FN;%m82#*{PfLZg za0$L5{VBmciS*_QEdZSX5sL!_DpF_wB>lOg5_1>Q;)1=LKf@j?>G z5`=sxjBifM-VQHy$hWB?^7~nP9&*mpsMM}I)RjtswJ(i~bQ%u?RL$$P~&)pM}-d6R)Jh8In2^;VU_y5^}-b z5KfeZrSJ>Z(n2ZR0+dygPp+gG{>&I z{h7dOxpZCK`>O-A9U==+98$a9%)=7hE_T9Ohyan&Bp><^%o0T0U`jE^wTsnV;bz$5 z5wzfhiV>qh>LJCDBVznzb-euXB64l`lU545#Y&Sh)`)ZtUlT-ka6>HmL<)uBADGj(UE%@gs2@?b4AGiQ z4s$R;HzA0gKuiiTKU~C=`SMu)bxMzqx3o0MS8967m3{nrKlb=PBlQ0HmQrNzpBEqg m{{IZP{I|>a|JQ{zx(B^H^YC<-D{6r`xw4oH<=mEMuw!KgtI1wHgGP3gT$i6S6f zkX{w(RX}>X>jCq=@16P0Z|+}r?#%r$i2}kopQr7;_F8K{t`cI`X*ciPykWxzTFQ+p zQX4jGcG$4tKkUE%itki}dhEhqycSpGEpC};TUg&Q)7l_<$KtM`iG`t_`e7?AGjlx? zV~*48XHK7IIjn18ao3!ejm_x4e&V!=nGV}qCCh5O%BH(F6wEhlpxaLV|4ApbPHMwX z8#Yj`T#~j48tJgM+1ao-J5lpWXfx~gi`(ryI}LvM<>-m45?AbYZ1fI4dN}_v=OyV# z%7&i`IhpJ1$7Sq9B4<^PGA}N@^<36U4GejHc)ZDSl+nDel4rJZ?50^|e!pL(cA5g- z`G0=g$QY60{h!|;pTB?lQ2hV?y?fH`|Nr^c>LvjJfwTTMeVSfgdO)*y&ZN_@+2pMM z&Ye5Gj;6f&Re5Wq*UOiO<7ztlsseBEvTTt2X1*(ia`EE5hJM#gt= z*Q<+ko#3JFotvMhS2t|SG!lOFOP+Ll8kyT~L#8(d*C&~xu{1Z4>%3Z^)mBp@?&Rde zynE|*_t~>{__Q@uOC-GYOkfo?GSjF{zpv7Nf_J7f-(EOWAYZFZcHdur{WUo;;cBhi zmSxfue$_4AanVX8*Mc%T*5T>q=5|)AJaRjuaI|(+fPO=)ci3w5&p-cMpQ!Ricebsi zrL3bvCC98+3jf!KU;U#YQRPDGLReT>fS_~1ZWj5MMUS=)V~m?=X=Pk!=)IV4G4+0U zAyn)}Cs#&gl*zZVw6(V{UCg`XcIm;ddfz_3<(~Dw$&{0weL~2ErabUmQmkx3Y-55_ zoJ81VHXh-;#QXQ}_Y4p3(>Nd}C#O|Ly~-%!BmK4H+4S4r4t*#q8)Urt*tbmfU0i;C z{{908dWVK$pVD)bx3wvmxoG#_ic*`_35;ym#H9Sm>0`5F?kQx|c)yW}b%KF25>me*2l57}%$V zIJpd?Hg3n&<=MUL>JRN^$Aas={6vjBE*^5?%ozzR`@&R>AeUkD8CKOi`KxZbMF&5A z+y3zHznNKBZoYl{HrKr0y*h+H4o^D#=#Z1d+xr{2?!5b@AwlV!g2q5oQkU~-)0=iL z_;&NSricb|Gk6pig$X_SUbwm}UKMywd{9Da*H}wW5&s{Uo6m*rj2(zg_>lTAGukHe z>Lz_)Sx6?I-S-=6&dV8{&Z~}pKYDbWLK7#OAeE+FMWc1+&K9u%8 zSC?9m3#~|_6V_X!+4$qjzufopsN2@$T91dsFCUv8>MAU}EF^S`_o>{kvn=G{4l`AL z+eALupi1w&H1X*Eg9i!Nh9<_-*M}c6{9@FW$yySb5uJ9MsU%9ow?5lcgWqB9qlD@k zD>XD&cwfEUxu5t;JFYo>yYRgKx=DwfaKrrIEhb$hCk&2 zv>1^1`{6_Rp?H-yvUrosN`^OPy}}20Ek1k{!!BsZvo-In^uP7x%a>fIS5*yO?weZ!TNm3zK^^{l&-*Vt)4 z&R-FqD;~mEi<94wq$a5H#;QuHzvS7zg^AKbT)H1$?tF4qFFPlvtg4DNs^~_>N$Kd9 zWcy(ma_wgmw;bS&^ykpg@2?K_;YAL>)|K;NQK-)`@7Hwwihb`g$|M>PZ8OnbR!67G ziXZz_A9HKE?09IT#fg}of6Oks-3)bN;EL`itF~t$w{dIPQ9M) z?(lepv{<|8p}l8r-#e|E=T{fjYO%WFI6c;(g!7a2z8zCO+MFUp#mJ6%mTB5v70vyf zmbM{?$GkpHj;$e9Mg)r>jUd-MFmS@!+Pb&Q^ZK)W=Nj?4-|Hh0qWuq zoDHnv_W0xX-;?S4SC$vo76~D3%a$#!iggYjUtV&VIY#yJVN*-QGB7(Xd>8gT9ULo{ zl!y(Z=>Eqae|*BZm5G&R&dJR^i700`oRGyLB7fa``*<%RWx~wHjT;A>Q#2O76>arq zRpO2k4U{F(3rnTrr=nN3s8P#ip^hN-D3 zj(R=hikl{)d0G`dqKIFzY;<&Vak-g8#Xc|ldV6<`n&O+OX(G9+-gSk}&K(7gZ4!+! zw~i+$Wr@-5y zc6curN2HnnUdvb{iKxqeZ;rX~;%KZw+AF*717W*P$quw;80dH8S~`fWdjB~;C7WZW zwego<H?O+T z?yHZLIkoGg^o_BxF*6GbFRa;Sy3_H$((cto@=#3E7ELeh%Co(NV}A=L(2G%IHxCaF zGp6~ZN^UwrU_ym2Tj?xS{>to#Ydns#%ks9G2^`(QVCzmvvTH_0;&HIBUaCl^PEQ>L z9Ofi&q~-AJ-u(7fJQprh6cq_!6>eo2cUTP7$NUm3;&G6l#?%M7tu>cRzy3_UQCC5} zR%nZ1V|TGzL%QBAlE@Pi6Y;>(*oQLl^6XrCbytpEcTZn(dM3v()z2F@ zM+-gL()z|aNv7+%`~H-n@$tgdOj`jC?aDLtQ{TRQ^gJ%kaCP;~eO9FPPXo0PG6-2@ zZ&X)TV~fkJEY0KW#O)dP9}q};QQVux#<;Tb%;WJdBV;y|bIgX?bJDCvn!`-LmMAog zwxqo_?#$Cm3r!4rQ#mP&-PUs-M1(}@5WY2*XxiUciyyo8RAPFl!JErK7PEW`xps19Ms|L( zw;{(|2MLB%wdQkVu5Tn=+I6e(P8DnoSwM=Ofq`g?!8&HO0*A?|sZU6!F$fg<+$oze z9}UO?I~*J&B%UvPAB@<%>*R+|pKghXiIsaXdt$T2BVP?w2VamaT|y?5ZDJkrOLbmd zaal$JIy=&peCNTP!nnA&FCrY9-tsSY73z=-c38oG?XSO)IX|*6H~Qiy<~;;gl3QG| zva-U)02yc(&l$C9HmKgdeLGx3(0VM0$9k-t`)fv88i&UrL4n5Rv5q%%ucv%9oxVOh zSBc!tzW-2@#<8tG8L4=zE{{87A9A$uRhN~OFn&3kd3(Z?3Y6E|NORvGrf0o)M#a4I4Rw}&iprF?g@)} zB~R&Pjh3gI)MTf5t;Y;ox)5KA3J^*C@qXP7Dyml{Lw95f1U4nAq{U8sd2~oIW4dUo z;QK)VZvUHofM%^vU*jB_>KC=NwDe72EziBiK4{dypW0J%k#jkFr&pq769VSbY6RT4 zLx5ca4)NP*a)jW(1#)PW@BBL7+|uIzh=yK$YR+#zzg>N7!mZb03Jq^82B|HYXX%G< zQacQ}1FMi?`v7o@2l-FQC*RJ~NS_{gy5PVSSk+hRaY(7*l*G$>VPf^m*iu}J3kxN4 zPGhYZ6+;|%h8vzYoXoV)|1hTsAWnRe#DeH2xOJ{@h3sW-c%3kJ*u7+nS$A5;wc!vR2|H zJ;mZjBZl(YPHGB2I!alX?Q)(TdozCR!j!I!O*YV0;wiaA28B@P)#WUKxz7(64A@eA zSe1qV?73(8omZ5DFIdaU%lm~by?yW-0drOU2P0Yw9N*w4_PuVo&ucZJ+aM>EYcmmk z?b+U4qbtZf1l%cQ8E52l8FI(r6Il_{((7ggzCgd}JP)E75)?CpP4cIGwCHauy%G;i z#KvJ1F|@gZ%pe;&u~lI1qTZ)h&u07{`}ZGp%gwV%(^uv7q{P|1E1^S6Lz3%KGK_}%d=hb|fc?`ThTJl(6 zTGrLw2M;Fx^q*gK2R?>PJQ;p=u2Z5VRf|^3yuUgl7PTZ_5zPmJa=cHvam2cEGuXo zevmE@i6BNHO*^G}=c#;~Uz-4=%5=_nZFgQ}$}lOIFNrlozH=GHJC&olyzX)ESw_C# z>e4ODsc2)@r=wS9pWdtjXp1svN@NsibItb+5p+%z3+BDLctPFlgjzw~mw8^h@46jt ztl3nJz5`QqIV~pK@v&1H>ncnFuu%$jt_t9yZF`}^4#<_$ApEPO4vz=#!N;wtQeQ(H zbEs1x8?oi?^;~{;ocz7h7h9Ffpz$W)^e2QvOVmZdJm%NlSdTYmn|}4ezlwD9*Mu6p zd$@_2&X9rpnCp(?G+LOkI;2mF$sUTR-{*cetT!+zGd!s_X4{%3}6?d8Rp(NFFN zE=WgQqb*u-UYWms!Fr6-i+2C{kI(N7kBuc_a|{A@Vl(?OiH8{8;N-FU9$lpw5{(i> z=C#;?1hqm{9Q!(qXBxq<#7h)UeoS}GCxbfu?*OJUlz zqNfIGkFhFcR>qFje^U*p(k5`3l#wW{7{$fKi&lVu%iq4egh4f69ym2hXaI}ohmdEE zmk6xHA$2GCI6Sqn9Pi{WTS)Qtl1Ws~+2c_b#FN%Q{f^`PwCT{d@%Eeu>{Jz;ptM%G z1Vyfj!hPI^x*5*no%utx5s%7!+1LQM5eF6dQP!pQ#@ve7D!3$3P*5;AJuTB)?ww&b ztzWbP5+c@is!z5Qj}U{PyZ4-NkZinsSib%2&`3)fug$lY_-+)U`Q9NQtJdTLTse_Q z@j!Ra#6krUfW{0E(Dw4%rFS|l8i6d>Go+et*D#c(n54>2&OMn$Kgsku1_`v^5IL zx!0YZ&n6jF)sCcQUWZdc{?p3Y z$tPuEclJ*?PxiPX*Ty5Ri}P=sZxa#z--mBh^J!qX0`7TKfJ;+-DgSifqM3 zHu>a#Vv~?-=!Z0`gIYd%Bt$Kuw!XcEmiF1KOoptIy>9=W&# z@bmN2E~2uluqmB9+gX9MM?ds^q~%krSbfw#`?&Nqo7GUB6|FRBQD*&A)f^_S2DLt} zNbB_!7f9hXM&L?0C z?Hc7u>qQFhL#eQ*wHY;w)+#7Z+Qt3*_g4}yIO1U^z6x5Rc4pT!znj|}%iDePy?uS! z1%Pc~3DK4#BO{fh$Y~izETOSHaNt0dFPmCsY!jGHA?!W-n&edgNzRc~ECelvA;v}a z>PoZEd@z@RPs7UEa^NHexb0+CkM=~xQ8dm#Nr|*qAM46~(&~J(LMX7Ru}N;BF!X@$ zX_eGgy7{z2U)oFxx$2p5GU83E{Me6O>Zfbts55lpsSqlCE%xw3&9<#uw@wwVuIw}F zc$l~9n&Z1hI|0Gw>n_?Nt<59OOt`ZRWjvWPyr!z%SzKH^x!=`@G7EBL&kT-US8NS| zo2&;8+3L*>Hzn#0@lRhInF zxQG5T*(@2#cTq2?kM;x3k-+(6C-Zir-~z|R^3F~HfiI;>p!8_kkOf?3IRUR+q5?)o zN4<}WA0(uMl(xaOJS44F-mGZFEMueQ(}3D{7PTs89@U3|O;XG>^l#ulXEh>^hq{SS z=5EQ{51c$ub*`(vHsX3h$OHCGTefglN_cxAmQl5sihoDml}0(8q2F+_BDM2Hu%NSQ z;qpu%Nt{4XgV=epX^*d3%};7+RRzdUrNKoO6%e=u*6O5cUiRjlCtkk$`;R!hy_uOA z_05|%VaE0Tu`4T!W!^r^zS^uVCndnSG{ftTHOS!6Aj9KyYr{`s;F!8_ zEzpQZKmlw-?tZ* z74O$-F`@W$&4;UA7%V>9p2A}>@cf{l6U(`CS-%u}`RQot%VLgDE2&VwfBPboKT)!e zpmZ`%K%TLfCm;Xz9g{1^`&3U#MLw>Jq(tECQ>cVS1056tI>={E*oP$s5vZ)TR?^+w zeN*nQnNRogNC6d)#D|Rs5FHtF>os6ceNF$9U8iWidJhvoM@W`99R<{?RL7^=w`o8iDLa-kzNS>&*uvmI;`+w!5mR%>x`p%lrb7)HgXj!hVEbF!*hX$eei2H-?^!uDuZ2nBD zF@~kl?4HNB?0rgZdwUaVJoTG%EMW5Px4FAvWD2Y@{@`eetj5}N5>{UB`!xMrs>a?Q z8@dI@R_wwrM(B4Hhr*IIlPe9ROkk zXLf_|$>5;A9#B(0&&wMN@=(7$J0;g@^rX0Wjl75JppNn7q*c&JyO4G$xrx|*cvt9Y z41-s@$3QeeE-~@GsZ{9X2w8|xby)?7Fgf^?-08rYjA+3_$J|BBAJ&lF&N#HFtD~bc z-}2()?0G&uWlYQsiAj0&&=)Z+A6^`rZd5W+pq>O#62YNUy=SPWCR7cKyac8 zQkhTqHc~=UL%`!3Q>;8y9k>Zw4pL`$dA7?#&X65R;%NZ4Q7n+$4XSpf-?rCXbkrf8 zi^$8Yn?mc{x@Pxs>b}I8yg^OXEDJ~c9{_RiqQx+i!S0&q+|9Ax_G5_ad}WS5O6_Ek zb)he>6V=P^B+Z(7meXlf&2<>K4dd^^nL3V?_?!M`6}%anXt=jIOSp+aEucnq{hq0P zI6W=REkn#us=}V0YbLs1J5KB}L>>k2t06y4%k=d2G9Ef~C@|?EgY31}2t%@3sAJqF z{BX=pojLPSLN@#E7y0o|2(|cfO*DcMX3<}LfKZcx+(uFW+v>q@E2^u-kl#On{URv8 zBgg#u;_P@9PsBL*H4VdI86S3yYsfd!giG*JgS-e%D$_9Zyy@jorv<${&sPxwf9z&c zjmI%0u|3^i`Op3PlwjjJSjW`hpqx59+kjofE~HC?R}wG*IoKM)!>zy}o-ojh;)=96USf=;(udHis!t zMsb+RHRmt4lEMQ?Pkvc->g;xb938d9o`Q9*lIKj(dcnMFdaEb>ShJ3)$R00GJBU*OQc*WqcToxU*E}2)?rp@+iF4uJuC-J@+9J9 zSzFrL_N_GM-+ue^v;91EeNP`zInDcS#tDWrff;Np;N$1t!T(81lgX0;gi#CPIbEZ4 zOHrjAWGhOllhi3C4i2z_jg#XzS9?0;E?l@k3SGz*L_1S$0p8w2tBdV2i{S)FwdsMYzC>UlL}>O;By z{)4U&xLAY#^bx`9N4LlPO0(xSbKnK*#NOE#zk?l3nZIdaWz|Th<`4!h;4((q1UbwC z4{?CU>2gsZM1XAnZbRG81q(vr(C3l9r? zB#AnLL2s}witLFyjrsd|%szB=2?~5^dpcDYC1M94d8bc>!o9-?e4p{Wb^R>YPz?`7 zmZPwR`f>3bj|22xSuK;o<;>nL$)w;i%{q{3U%wYWf2`&H^;k^E3(B-A<0FUp$w3_C zMihPVSnajK+kPgV+oUrd+)UME8h)Uc*R1EUu-A!gNi#d_+2-fx6||FpAX6a&5i(E@ z@tRbmQJvqu6g2<b{wdRmoFom>Qpk7mjtq_Y9pN=*wQca*A+AWS| z$wW&WnV*~c0AvJW)}8BBG=;W|Ur7m)O1#fzgeSf9v||hS0nM|6J!PJ?OzHXMs=c#k zPa7_8<1v%+|9MB(K3R{ROS|Amv70spE#O1wUT#Cp^JDdOD!2`lpp^O@=U@HV+7(p|Atjld1g>M^SES^7qt{Yl` zVbuNgpWmd6-s#j&@mLOdC93A{9i6waw;x8_a$(bae;uKOwoH_ehpQ{IfoLPVU!KB9HI91 z_WQI;?U&|K8ZrRcEbmh-4eF!sG=o(py6c*jFI(t~#9W*lWM3a@32LqhXOOk+$dMyp zLiC@eF!Zg6>I*J?djPK1_=LHoWrI$rqjrnC-6(1wNU^)V+AsB#?NG_HVQ-`lH=;bz ziPT(Hy_)Y11edX(u5h`ye>U}l+WqIM4wQJ{O+p(GoeT=xzM11a%i;t1nEY4B7JJH3 zClHd3>>9wV7k9d_X)LGus%%Q>JTYr~OSZ1AESfx>ju3dzn5c3}POeE;Fc63x0XxtO zW0YJLXD1tdogyLQ>PkH(I8CIi;BsGE*#sO?O zhZy!KS(MxErp_<`5LJ@)0IxI%eoh7%B0J^Y{c<0$gh-MD@C6Xo4#U+_u;RRov3awp z6=?E20%5QB&%K}ap4EH|3Oxa5hp-dCm4R4pAcoh6oaQbOaK3-GFVKWZgpauwnb4;t zaJ=KqYXp4Vo)0f%r;j@Sba*K3GxYSmf=+qRVj~bi46rc74fAx_7zGO+S{edsZ-4)5 z>9h!f#3W(@$e`lN!P9tQRzjv23G*S8Q45Ng4XNSEn`EoXxPv!Db^ z#7ITOw+@5qY)n!!X%RqRxKo&**_!=$F6fGw_Tord4KZ6Fg(^X?tOban$wy*||OCW|oq;PqOAYWY(Et2gZeTEqBY;u!`ICL}IwVuq*b1W~#R znK^+^{$71yTo+lzCl$#_mOR~Wha6r)T4n|x0j;YZg?WaRxWc6O_r>SveHZ z#4B>{+&O|+u}|w!lF%42eqR>^1jCuHRb-3^Nt@yNIz5=;dDXMiyhD;`!|r$H8w-}2k% zOBGgE!b8C%XqA*aelJduE&R&RYc|%NmejT8kJ>;fry-GoSr;2io5Eg@irg-<+Z&4V zoz?$Vl&=v*S$!+`4mxHjhLYw{5MhwB*-MZlQ}(naXJwrO5#1uu_)2tNoDHGcG%mdG za2=k0JerG8-J~2X5hmYNpkb2|0Q>Im+S07XD*M!#E1(Qvl20@cN(9}8|qjpP<|CL>U=%iu8 zhJ8#o%%16#c(q_FJTwQB_Jj)r9v2PRIX?Axp+)>jXq>5SVo6jmQFuWkeYF&DwM|?a zcV2bN>_DDv3n$pwvQe0&W+n8 zduhK?s-|lTD%5Ir9ML-1C^dxkq1bbqR*qN+a$E2h9a-K-H<4c$MOWyFfLX6?k2&Z= zT4UlJ%XZiTPlFx?g(~RD^m693>eXFs<5t?{BeT{$I!f4k-43)J9UWC$5Ar=8OmLUI zJQjucP&~|^IbG>4T0g88_YBO@)qQUghP!~nY@9od6mra>st`dNx$38;r!y4BL0(T3 z9Nde@ZMt67~eDsUjyJl7p>QxieOQ` zW*F+hDuq&TNcs(njTtl086HI4h7B=34qNfM{Y@NWF6>qYw=l>(vdfl(byra_cuZ9q z!eJ!r$s|fG_Npg!GK#?Ww%QtC3gV(PzD9rvR@rf@n=Lw;hi0;Qv z_(a-WgW2^=>(mQ^e|3@BHa%fS@K$6-9HrFBJ_OUPi#W1r#gUC?oN=%?bpkm>)Z#)W zd0z-TuI9_*1&d;k);u2wweOC1Gme*p!WJkJa6P2qQ)A|Lb=j(=`Ki*F)yRMN1JB1y ztcw?sSAOWB#I^)xXb3aXw9h2(2sT2_n>TS73bFUya{O5mP|~vJ!?v|T4!TcJP!KUk zaT&MEgIJQlh*&E&UV9v{K`~L8=K!CL1QL}bI0Ry^A(lLdm#vvbfxMPOJ73F#c}a&H zN<>c{^S<=?->Q0^!y9GPoP24xG2zx5t5NCfyLW7DYzV$aa6AcpDuB!26{N?LAQ3Q1 zduG$9hjPpLPoD(_2Erbjh_ohUJK6IB*b_x9gR`@9f^v=&Hi2*>!~4}(jNJezh1B!~ zbKQRs+nYkF=8K4k2m)0AkzozfhuBLj0>G;vMKqYMfVsDr9+E)>7Qt#uLi_cCa7+Aj zR#sL}+-_k`8ojB)*w`7L-1c+0@&JP$Z$l1Lno1e^QIpEc{b!78785oSwY z#BmhDUV_d}WIA~r=I+M3nq&GKk^W(&JdATGhvLy^y(;wvL?R9_yv--jrRh$~c7bI9 zpt9uPOM;3NLwtge1kJ3hd@%e}t%_HKGzEi-fA9uI)R#Za%|(zMbiS8hODNdja!|^8 zyStAPpImj20#KYZij54L3H6R_(___?GnwZvT!;hVCff^dmQ%V&P9``+08L+<&Il|< z?8U!+E`NUH0m^1bE3XLOjgUjoniC30&B8~XiYV+YA(U4I@!S#(;Cz8B0qy+MZ@>LU zoOvjGB*BSr>QwK?I||{EJTh68%5*V#`Z!YIUmeEtXDztjmz=7zh@PgO97EeMYK;{FbU;x40 z;6l&8O<|g$r@sToL;#15J9rxgJR#X1pyZoS)DlhzRU*Urk`Ji7Uqgr?0s_QrZ%@x# z>6WXrg{y;ztG6%@(l|D~)xk>e>yupxDucvE%!)Yb%S;x=V2O!*Uu6}Q4C`^#Wq7aj zThn8n(6J>VepG-F2VX^CHi58U@`?CTwqU+YY)wQu8} z;o0N`_X&Zq!G$+7}3i3@fFKZT&CeN);i6^L>G)Pq+lKb<_%f6eLf| z;Ci~n`#ZrLb4d$`-iYX@gjxXDzj9U_Bi~;CxN^IdmV9>OA+qZ*)WurR)zd&tBJxqUzb$45(`KrgM&vvHAX~098_u{6@>L= zlnu-~ng9Le4fo{eH-(a~RP6~aL{|8!EQqBp55ABkgRqhfWN$R(M?x)v>PbX`%S8^O zuu=~oPE7DJ+0TriU{N`HmDuvvKl1pPAYqCP9wuKNW(1j`hnQ!YsD?t2==W3$$U&^i z*%CO2wGbmQDpq2~$5fK4grpUO7}6d9%3SyrhYkNP8#k6A%czzDu?az}77e*aUVH81 zp6_FNfi;)lHdJHv`S#YtSrH|)?tmvh^6gLw&Ds#xe_`#?D5-~Pwfe$(+r|@6BSm~q>j5PBj6x1q!%56im__VZ zfIC4ZLtlmnYS_y}Hbd|OCQm{IGK4~cjd}9nDTUO?Bd)C7;Neo_J*FonQfx<{K}7JE zz`}*IyXLAsQ%_4Q!8LezsI)Qw4J4Pqq!THh~%4%wAv|(&}`gkHctJAKc z&lJrOsj>Sq+(Sb{WrB%!2dkprnU@3l>IfSfTNK6f8aZPyw2+5_C*&rw)D!MkA3sXK z%zB+z^O2SaEkx25-khW@XjuU9#B*3|V~i!vur;pDhcIUNI1FWr%Q7|xFGP&I1tkzh zPuZF35%i_u>|O@-OO#NErvWU>c{icJ0>?c6LbRnl9y|_n$xf*Ffk!SO(D*)hAQZk` znH09{8YR&nao~oh!g5Z67PdIZKQ$1%`3U$Lmdc?5j+#2N-Og=-=sW0J{?FR3xYy(> z^zY8OS0AQU=(Ksu79Vg_N%)j|STyLoWWL?B)QuaD8wx-z44}X!Rt&<9<6tVnhMuKS<4QliH3H_$8 zvF?l9uwejM!^NNZluT?SR9(e}wjb~XoiCEGc9G0TLT^OE6gm??2A(gTL$WT3QB}Sg zEXk1yY;}T@lk?Ij%%&m)h2*g1_7p|JqU;2JmI4a;8E;ib-ibkn1hoFDnen*qnqWmT0fZH%O=NhaGo8wz`g0{`WF1h`KR@ey?Ws9W$0v@w?7@V&X3lo$eq683ulQ1wc%h74-*ZM|B69_E< zf-t@E8fa~eI6Kk+1PHRtfb`v^FJ9DcAZBapS$PfN=noY&B2qYaq>qAd&84N+QP`9xGj&&SBKEE3P!5^oPATx12r%LSJ%Y$X&aY!1>NX4IZtE7cE1Lv|F77?R-# z2#gwmI*^&ZqXp|Con@>-a^cF#isBoq1VD@Zg`Q8<0S{57`e>mGPyuSnM}$gNEu0=8 z)k{HboM?W0-$2~#2qjOCD2$`JBKyVn0S9j16WXk_2O{O_bBzm)}J4(NJJH z)~*06hG$xudETNci&;F$$%M3_ydpl15I$RVnR6)xt1I?_y{PPD!0!<|3Bf*6PRk3H zak+&5#@U~)y?%(^M~n?%9^f(xG$hegTr@HbTf&cCee!-!ps%lQujAuWm?Sh?5MP5G z`0Yr#vK&lu9b`pLgm-kuHQ&7nNj&fM;9sy zG4O*N`}h6%=S$>+7Q-fdBzJ(+5jwko_-B;O^dfYlX#=uhQG6<}e?34M?d)wh*LwQ< zb^MAsPJ)sr{#X)OBk3rRrK%fB4>|82_xll~y59W@Kaz5ACP{ma&nOc4DKMnm!`DOu zP>o~??>9F!WjDlYDP?^9{P`t>L&Z#xF`DMDYCe>OP*F}*+Z=Cg*>2fbOVk455b|PG zD^Lb=`^-EL58(Bn$ZcE>C4&K=t&bz1;MwmANE~y&9iWALhX~TB-&lcwV*nuMYD{Wx zQj}<2m}Ch>2wT8WV{N0@s;0Q+D%)e>vAc?h`#oX!b*HPBD zp?^`778+f%nq#GKyvLCjp9ZXg{R#U-~kT>KrOg)`-Z^ z6jha>=GToInIWcQJ{nMH$-s!KQsB@ck%U}4Vcgbe7%Ax)U_=bhginWI8Y+$n)=DUn#!PauiC>J1R4qKtwWEqI_sK`oQGcCot}u#)=>I^Z5Ac6A?nh62b1 zYmITi=NZ?i^k{D2)z5ulV^xXsNHK7lXbyApiUw7PM0#nh+GWzv{kaTGK8c;{A zVJ@i?bxJkQCf!oR>qHok**(Ram{c=?^ao8_KhM$Ve`^k~%*>6YWvhOA<94JLQJlN=QBfg+#??Ui{hra|Nsk^~53l|LE>*udIc&@R^M7jOqoz59MxENRL+9_l zL`d@fe7^hWdWI*JXV{V(TS4;3A2c+amGAd?l5zoB52sfY_$jc6jqn1es~2y^9x6$8 zMwgas+DG31XZG#ManQho_JP8d_QV9K)MbjG%b@C0iS7sa^_#cQ&`9IEge4bJpFpVM z)co)#q1C~&)@Jr2F%xrDCIQG$yqWYW*H_MF`uUH_=v#nWUeKFJU0-vLk8my#P5>0V zTc#Ll%nUdP;8(X*I2zn9Xeaa33`?q&=7&XER{F2Zw2mnJ+l}WcAi9lL4AsrHIfI~oEPg6`_ z*azKG)>i=ZI1F2mYpFlOQx6XhWQTa_Llna%E&UKQ=;(aUXPkqRf8%dJvb#?!M+4Rq z&mz&cGY~**?d&2!f06I*VWI2M7rxBJC0DW#y zf6!@&!wSXyy}erJlcG%-8i5&U+ES`Y;9|8Ux)j>^pJj^G??HVL@{Qa%fsb#Y7uU$h z4IBP3IIS;r8{J>|2S$L$Xa zILO1(%kG6$DxVxj+&u+bhRgCqa4*MXY9HF8xhw4kuAc^1 zNT8lrgmB>Z&5(8=yn;t&RrIEISd(rB-2O4Wnyqh~zkY>5}Rpu<#+79~}I7 z#)NC|e*Jw%yWj)ILqHA?HDyzUJ!pTogg0ME!7weO0Pa`>X{;KF?@V{)cB+i zfIm=mcA;a5h`Q)HAs)-HYA{I&+BzAG$d1H&iprO^H`HO$6(!UDQBz4@evjNFL_67D zJh%IdA5ywLXa(-sjzVWOoV+yQPSMeDrtE8?LGFoor}nc#uNh$8xr(G%_juc}wM!}r zojUaUK1QP=5BewC(>>LG6{%o8cWFdi>a5_*`R-%H~d?vB;`a? z{*v_k^%%B((49AUoV@}L*+iir7hZrnA-(%y)u0OZ7+h{dq$h1UaSD^fBTe{23bpI< z<;%opE!2V)WaaqnS|0I!M4JPJCaC7~RxcCRB&dGbUiANxKCpN9JszOOQDA)Kb5iCc zW$`gzh0ZVNV^8CY0Vv0jCAxEffMc#q9U$<91Z|UNBPdu%yy0|Qun?L@x88R|W(Iw3 zX{EAf$i*Jq?nJutS#r;fCoNs1c3?aoB1am1zN3CGIrUaCHKc``5jQ3%c&`QU(ZNAx z6n#PpH`8*isW1ruGr2zgb3LrCZd|v^ZrG652P#^-8K)t$$__e4N~;_y?KnG0Pa2`i zmo1R+bQimjkBCneizrOR8Pb$Dwg=LKN^2hOnG03&PKJY{aQWK)QcL%%k`SFsN{6po-X7) zTZku@_aL+lKl*Wr&Ald2K)p0=Ks*@=HMO<1u_IVsS*=W;)J!ntEq!Qmp(zVN)PNWp z$cYXXqP0qypiu=AbSs=H@HH@2wKqF5yru)lxT&YhZEZ|-OUAuFb+|6Mf~PCCKhl+vA3*8cGP5~*^pA9CDj zz=Dh~2?Br=!-0!vM2LmGa!)zv*DB2cXcEIFuKng`ZAM> z$z>GrRo;;afhivB!k%)X)AEc%N8d98aCnn+ZR1Rm+8sr1vqRsNU6w5tgT>P~#ZWJz zis1GlO}5F76Wv!yg(e{zeq~eODEcSq1LWS?bx+F?&j!VCv7{eZE#wkSz5v3wQ6p z2}2l8N-9e+zvWQ94O^ijYA&GHItgw#6VQkMUVHiT=i9p=|HF(#jU-)zzzw7~HE3}O zF`2dE9Qr{;iDW8QnaSt0B#*6z1lWL}n}Lp7(O_N~ND$}h)3UOfkku2mf26)c@oY@V zvotk*QCR3qolQYHcAP9bPW%W!zA||>ra0^VZGx0!-;TX{#cT+1X*(hOe8ZCm1 zQPa@6nq$34FIW$Zq+Fj~F(BQPefsn%br~ESp>qOYJ|bWSdf6@(c?q-}5n>N9C>AqE z-)aLWkbpFXBaFnSc>;QJ16uM9Q%JJ`aplZCB^@GI!;klV`VUPhRt21PlLWLPCgS%C zuBoV7H^*GYOZ%jPNzenjiIqJaD@Q~Toaj26o?i4O!@MOe1{MJlN>oq?#n=!)k7BbNkvr}3r^%D zx|W_Uzv4vIhm&4@=Ib!<3hq_t*Y$`&U*MhHt*D~*Z7&fL5+br%5j)b^T8G0^Ml|XX z+MKYT%z<$4W4EfexCb0=!|gOiEvbyH+)2tg>fW0BEK9y1f{~_K9|bzk7zi8wpk(b$RAV zYtX!a!rovxbv^ly(d;?%Pu(>zgWbU+%nEG%s$g^Wc=9vGfQ@q*g>h&AC}?7AQH$(S zK$$4=zr_hQU}`;1_4mJvNZpl!zwp^q<&$lpy|B5`mm|{~c}KTl;3!->BvAZ*hcB9b zyBwJg=wd}9o-xFa_zF;(S&X9sRGl)Sq8F6KsCd9SXqDFKr9IWbZTb`&CE`8CF3H{) z{E$%i2&O~m;C`GE0(!7uc}6L)<{D-5AAfN0J?Lm}Cyf|PdhzQU*M{rAlpSzjEzj$@ zk#=>5QJ+W% zq&E@(c=x#Z1xODN_0>_HkONAA;tho3p+=;SiD<;)DNsWZ$Ko*0d z5#zOmy>>5`ek25+VO*6VhbRSu;3;T#@N`$;4lop4pDv8&MG)T}tX(%6dkP@TfP+^7 zRqwghZbI7FQT=GaHGy4RjypmitTX^A5SbP6xBc!CQW9;Ns<0HHXn@ZHm&x@<#7G@B zg2?J({*+J2=pm{>#_u~liQY{N-?V$qT*OI2hmwEK=g)`nNV`re%L8-PgVEtcvpiy^ z!U4hmoqV6%z!F$H2-2UR3z;}sQLu!-WWqzYGRkmw7$NJx4KpFgqEu6XLP2g!0qtB5 z|IGn@JI)e?7YJlYs8JL%;b9emgp~L6+xiZFUhZEh*%Z9~#XklyczMG4Gv@p0eZxkD z#&l!z^^J#P!9NzUBx18PBC6avbCiMDR~5A%T4MKu`y?dEPIq#rAUp0_ zT@|-97`LpDjva7?QOXxL#6JXujAZ3Ain8W)KyUjC8#uRukM8aT4gC?Gv|Hzou0KvB z6U*Sb*t=mv(qkOr7@R6Y0KM^c#gZ(IMdd%l1FFD?N5_gb*^pTVC}JpVq}|M}Mc znThp1@c+u5xR{R11%VgwpA_#-^2QhUv-^;gbx&@q2G53z%7k#bM~farh^Y0rKQICm zF3GGU6r&s>)nS!C$93&Ao=Zm8Y3h#_f;H;x?OhWdpq5Y^xXG)2vqPIfl-3fQ{hlwm z_f(M3aB-3#lZBblIIyP`RaK%0@Q{J);O9yL&O9D1T4II^2KRdMgteU9uwm&D@mhhA z%rxu=eNA9AZl=H$c&8|_YJ~+bzcA?N^xI?{m`KQjPQy&PeIkjJiPGP0X2hRi5NzK^ zxJ*1h-`sv2ye+zOsa~Idkt5G{g94x-ix6xH$WEc6n8CbscWl8Fkbf0Gwv+*gut3Kk z$s3Thpp`=h&jYW*03`*{Di&4IguZXeeq8+LLYxrEZwqx0=jjFzAE+oK0iADB zXMW6M-Edu$8}YR(CSNQ`z|o72tYkR^v9WD&NH zNCazr3P}jmK38hsL4fm`kFsC^zah|TJo1$YwXY^Llwp(xmhau70-%8BmtUCdJNQ)`f9iPZpIG&}e1jfFtMKnZ=?i?Dfe?J3@((m{Hu z9h(Jh!$6cFXeGj_PUvtaDmO(6tK6^x|2esnfygUZ9PBaDMux{_E$O*Mx=L;`;cB%3 z0J?=9PjS-V2R9&5IS`p;pqb}=g&jPcc%{F53BzJ2mJ@0fH39&Wx^N(O0=YL3yB3?9 zNO|NMw7f+!)_ZfmZ`ve@wIHT{;&CWzrTeidvT?OfQ|d!o=!*$6fj5~%Y|*1kovx7I zvfh?phTCOP}(t7qVxiJX^aWoXF z52*S9Pi~H0m1LJD?lTV#m2R&R|)dK21ffY6Qt)M*egiK zr}{H?umd>LwHPug!vugG`Cq-Mgdiff;6S>_dY=iS8GeL9ju`j`@IJ48Q9iIX)IbpW z`nVblO@QeFF#Z4cP^%%EX#xE<$f@Z8HRi5g01%7bt|6!kq=>4=6$`}KR<@vtU2e0B z0M?6q2hQKUSh%tf$1n)KnnLA1B;XJYMkgAOoH*fOSBnRnZ;lNC(+c^R<-&!p9^Z(4 z3sI9AiTdnnn>h4T!lc2?*dm4N%xDQKL51ejyEFvJP@hX6ScByy))x?uEcW(`n!|2C*7X_a&l?Z` zZUOQ(paGF!JJhD+1{2~}p?ESCpErK8l>`YndXe1!Z1{64dW9w6NhN0&$14sx)F$I$ z(ygkXiYH#4tYC9_tELH3u8A~Nk=+82`U$}X3@Pj4)&smdjY5Ht!=MHvl%K-(guWvQ z#=&Ez8CxQTm~3(B9A+pUvnhhGo*YZZ?nf@xK-0o?Y=LWSOk`XP8F)IvwJxA$ltBT+ zU=sRcR33nDg9wvobGZc`14fWA@*U{An}7dZ0@X}{kG;pK|Bif*Bajz}#|Tb4Y53K5 z{`u$g=ou)zuYj}R5*)@rNpJ^ew0Sp6G#0IyPaCB-OrHKQi5)O{gB|8-rB-jUeDy;B z7q$RcjoO2YXxkYANCe~21X-DQU<0SYO(Y>Z=%er`M^7Lj4iRKvXw-*?FY+eSt#n5+ zx{K;y!>+;=EeX#PZrMxL9DML&bomJFL%8El65Jax9#zZ@p;FxoNsHWc1Bsn<-s0Lo zck4wGR#$fsWO{KwVQnB<5sg+FccQ`eeGNIwRp0zat|4cV%o2L32o@%U40r{_24_SO zEs1$t{6O5txu8+N_Y$^y_iiT2ng~)eP|!e^vh!+>3BxtiY&;YxIPAg?NDE70RnkEZ zInI4~gldmCe~EZZ?kXg$4_dC|THFmc^g|tI{Gol2olx7ya+31bkC7%IPv`?a16$PN zcWE~H!SqxM!c-c8o^-O1Hj4Xexn?fuFr+gKpJ?%n@p?-SWw|fC&+zK zphig`+;Lp29{*0eh+79^hyw_D4+K&OX(mFKQQ6xs#Sc5!9Uq)wMqa*#u=@!`$~3MI zE0sS-7E9pzVH3jb9VfwA6WoJV zJdw;5C`7LTbZ$@!=KJ;)6(A{6q_DkNyO!KohAwi(K!|^PHhVvAiNdv?2Zq^^R|z9P zdUnXcBw7T+Ah3XFqdY-id7Wtcxid{}Xl6jhZOG zA1C(1cU-Qk2eyvLsDzQk=4G@m#2)uV!H)?DMbIn(wZ{UOzsQ- zghLdzKxr)<^sDa+k_D*ntT*)hz7@{|Ar06Jcfa%*pwg!@VEiKfp`mKj=_({vd~9b@ z-!zNxG6a$=ML`FQ4>b~-g-Ne#4OHx10~JSboin+O6B9=6lLbE|38%d+vm+huQ6(9P zLGQ!!BR-ShO+~2W&Tc?4`Ep#O7>NQ}ALa!%#sbjPYqj88t$Gta=*n_Hy(x<2-S{1_ zf1wPdVcV8DWepKky!-KR@K5lTQyJYbNA>Xv97s(ispB}V3$XiU+$s8%B86@N&vcWu z)utt@-F$}lZQ#PCl0GiHPaG6kZZ)Jq;mMW7>EYvmmq?oTf` zfG|8+(x-`=q<}Ia89a{hLI6yoISrs-U-Wqr?BoK>TI32Ff`>3pX`EKC3)aTO0}7H6 z^3R-z8@Zr=gX(TP7C!!TMou6uEn?m_=@KC10;W$AFr{a3Fb3L;2o)B#SP(2swySH? zzI|=lzrZVzu|#ed0NQvw0}B2D!+0SsJ0?zk26ETA&m;&JebQkWDp*MPRG5Rny{MqE zRt|!=C!f3D6`Io!e3JlVNtehAhUxJxH3H+nzmVIE(b#!|#BIVw;Mz#icHY_9$;Ks1 zp4^812PS@p+uD_oUt|C#rC(nBNd?V{#862trvYdsH+1^YpYq&`WBE_qd(xuCeBi&G z)8wT#YzQy?*DtjGHU5m*|F4er|FLfc5i=|^ICMLpG&Ag#x-jyDHln0jz-j{4*rm>J z7`LA%SK0u%X5o^o7>EF1O+|^5O-{`*irjP6X9_#p0-8ZBy5Daix77iStT~;LeRLOF zQ#2mAY(10+@^2U*?f+u$&Es;;-@osS8NRlevG2@KRLV|5G4``8Y0pvyNuo%WA~j>| z`>AD!Z0%8sP)cSjZMHU9B7;y-iX>6@^Efl}^}T-gbzk>&J?{H){c+#T@8?T$o}csc zdB2bMalDS#>vafQnB*)3;JWvRksVSbs7!L~=hUUju6{#|HZp!`eM4@qqD$+?Vw_LI zLCnRCHp#wuG(^xWOc-FFPE5PEi>wpE(^2ZdK49z0AgGQdKtpmK@V(r_KQ+8GfVa;3 ztx0QPnvuaw6!@R~0Zxq20l@cdDo-uU?BV(PkH$^E`_78EQpl2}UV1^8W$XGARrZFx znbSq;l^=qWjxWpiYEmDZwRB_V0mPX70&&&IqkQ62enM?hlYUOKVqsK}O->Li(F);I zau!&i_*g-HtUoN#diPvHpO{)h1MNjbGUnd+tIKb2qLK1~j9187#xsH7eWRnJ<@948 zOTwiLq7y43Gx6V{tErCZ?bnAIKN%+PuC-&C;^VUG^4p-(mvGt-;^s&`YZ<^Qi#Hz=XZQcf%Ej5jLrDOfIsb2KzkHPhCk+@FMAXzEUeS|zJl2qtm z3#rRu&z|*HmmQ$QQr}34Yq6lyw21&dCjw~ZF{(j=REJ%39OWFYq}&xl_W@UJ$e98P z$y5EIp-9pf`bK5P8PpD%Z^u!x($#FgQ}vC1ux0fDMH+2t@WW1WMv^+CO+ODLFqlp#=yu!S7Uacu@%ZaOTXZaG#^0fj9Gd3ktvT)4KBkMej&OqbV-wq{uGylcSw z*PbViMhE&=>gk`K?HtmKB$we~ z^JJLH?qc+Dku5=wmFXBn1lOgH@+Lj9Xh|(XGuHY-ivz@TpKsH8L2Qy8KYZyUZ*ifg z8WI(vGKfm~{>W!63{^=sKXs1c;oH}*O*jSK(jJDcXG7m0W_aJD1}-jT^M`k);Ljl& zeO>Hfp*jjDd2BEAw#v@vlu|y9n=pDiPa4{FUxg$->Z&R97Lz~UWriDj{Zpw&f+GqoQkkSN8@`nuqUuWhHata5%@qd$bv zC)?Du=wK;>NC;$vi84qy0=ahQmL5?T&-Vd7;=6ZpRIC(SJki$^NsTH z*L?o?!{y`O&*+fyxk zUqvHIXn36DVmEpuiTo-Z#7C40gmm3Pu_7WP)m5;bt(1qlbC>JCb|0AZjS5prY$NC! z(iO=)Xk==Vhb2&!;7#X39G*f@mNJjKtCJ$9KE{w6dQJZQFx1sz@wb6mL~nCA;seoI z5C!$#?nMeNWJ3?s2;!qg-&Z^iXRvJ~Qt}Zn{VgjD9QpUfsO_HgnIdAlm3SFOk5BG1 zT)qejij1^01q3W-U%<(d5ttd1`3!&wKxnRxf^Tr9oCp%UJ6j#nrY|*T?{s*fk|~>DpDe;K;3J zgUcCjjBKOF8BXgq0pptBYk-x2C2NafgzApuXg`u09F>Yh-5L`pZHw?I?6QKbo9nyI&x`6hlO9_4=qE@wrVfdKGyGI)$F_UreC%?TL(ZGTpFGlN8oy&$^TyokJxs z7M*=vn*m8#w5YVg#M4^3EsfS6v7EtkZs!WY}PeKkN8^?2wVv$+jxy z^Y+ht`LfG=2ba1EJH^P~D+z4q1b3=#YoY0}sGUO}B2&VIyD!J-FaGlkeRIZI!pWr1w%;ydSF2Xo5}_fGK-LnOQJ1K&Jpj&^w^K<5mAD)s+7q5^sY0Xc;Oec@B90p6mWzJ zBNSuZ0zYC>Ad4~=g&@O+EZOI$a{A$GBpyyGcNv2Bg6e0pO=s=pWGDd#6dSa+csS2U zzkk}Xby>-fITzIg)Q5o>^L8d0H9iEW^zZYf1Nnzs>_Kq*`m?DR3eDGjY$+DX?u0-auP?1r$%hMc;?zH8nLeYhRJ~ za5qah$CW|s069ZGJX-&q`lkZDUOc2oFgQmJi2-LA#xO*v+38|e%|2Gqhbu@;M6b)A zcMLTwx-$9=D@_jeprH6(B+E(?Z^60bpl;f<>DDvOlk-Z1K88~(Tfw=*HB0siwZG-_ zb_X{*y&Jx6ukbln+pwc|#pE|rMNRLVO)<0H5;ZSc_pPT$U33zQ2=N=0UI`-^76BQ{ zYvNh?qh#QFpkN)No^YaFAT$!WtCTfRmhNQT08U|fPlQ;bK-eHYIl`sNaBV7i3^$qy zObb_w3Q37y_z1^#i&f(L9{2o|&-SBThA1(W6T}owBSe%))LbtpR|e;7d*vYvVp754{2n*- zisQ$p&~sDj7;ofus7>m&y>Rh7ohq--n~=6HoA6WjtFFr~DNFf*i*;6zz7(R(Qctq} z>Jg8Vz2yIMxOZ5?##}pnPa_&fSv;M#h_f0rY$U0;FrM)!4+DBDrd4tFt_x&D<=c@I zSpCGL5Z~^KC4Z*&Xf!%A4fQ)|m5b)K4w}dtADDC_f2Ws8*mbKk;140jbh%*zlpnQ$$!MACP|P6he73UAQoesNF8w zQ2Am1o%{C%Z|;PVxV$R7=qv7Xia7D9%*9Pm=+@MXLGA12{RN_#NsE16c*)bJ^WV2~ zoR1nrX*)~R*6H`1>T`YG{TiyzoMPSg!sk=ViF8Y8yS6iHdAZy_G!3MYofW)`uc(30 z8=J!uLzZ7PnD`_9PktMA2Z_`R0-H3Lr~t$^3wULkNJQ0DtQ}y4x6;*seJ%FVm8gwH zKp+HSBHP}N`M?38k`R+Z3O`;9z<@Irk|qkkN7d8rRg4@7+`1@2kT$(xrET+k@pF?) zB3Ce{Hg-eWS_#4mmTkuCKW=MB_ym2Hp1MR7VcH`3C_&CA2%b#pT76{@b*%_97Gau8 z#6J$I6hU4t?<=m_;Z|tqfuSc$&v?_OO-P75HiKF*+?n50@z!}3+_VK3R<8wz|VLwUH+pf|B-18@LQ9V;!2(@t`xSb zxNW}bT}Q1OxuGZd!AY3_1M+L^F+gM8(+1o*0xP!*Crp>m$lG$p8rWk0` z4Drc2lGsYXOzyGFd_W0lT(tD~Ww-7V=gev~YF0lE&U?YnV!Avz_w zQ8umF@B`*$G9QwR$@IZtX3?sh)Cf*--Tp+$HhurIs#dpZGB3Uk~f`HIXE1S9Cq#estdKP&vHOBd6dOM9V1h1@(B9Elga+>w9Yi+@hZ&mW^ZU3IOS zvc2%}^A+7Qf&*7c=o=k@6KG_cR|T1w1Fn7|STNJnUH^~El8$P1m7h?XG*_p6chyO# z7SxV^UCO_%ujPNRpfzc?y=M8-)N*AHc}KO&ZR>&1)s_AI)~u+Pn*CrQqdSZ?$=|DO z?p@y?cwPLnPF3;W2FD#BDa|Qgd+pV_mq)~Gy~Inb!YS)c&<9C}OGKHHM>kLTrj1Dm6XalFE8{yoh1J}B?dxBdU8fx~nTunlsU;$Lt>aWt|jV|`7cSG5&$uLRUHB(}&sA?1E?Hryxj-$mK-;fj5F+CK~ z*XcE4nXHRm*jLA9;Ej~7h{8zPvw$X_Ja8a=T2ebe!%(!`zcg&zD`H2^Me+lFU~APf zytplC&HI36K!DlNeUxF^yf@|L(}}*i_X&r$Ngy6SxPf7XioHT@tmxQ{i?eO0rSE5) zI&3P7qw`f)3h?zP3nYee9;a2g1MTY+njD1|n3Q$rpfd5b&84WYPC(5Uk z>O0&oCmKvbhJd`(s;lu@>E?2I*_>2%i;~0BL_$Rij#O6@w;cA0;+-LB+i`TQV$4}S zJc^0C(y40nZI>`_Z||o~WaHK8YZ>?%zfj$*uBw}5jFYt-u8|eEWRxI#u%zQAHdKP) zKw@iB>OkHoT~(^XFruh+I?-5M1_bZ##864}W3BQ@=`Tj^tj|spI%Hhwu4*k!>dzq& znHLrD*E!4B<(ums^^N&+?4{GEpD)HNE+k(&~X9LkFNJ9j$$|x0SFxMDPXuLaVFM0sJk*VJnYb z`{KSwq}Uke`ws?jraxz+e=D(-DY{~?5+8K11A{-7jK5ZzBSQ#0`Ij;;6obuZ(pj)t z^>g5OFi7UbzmRjaDmDmIu^zF^eA(rty@N{?ki{?+Wbw;a--u_3jrafqYNvR#07wd= zb7Vbt*IGj%ds78ipyc5L3Z^0T(~&^xMsahc1doJDCbI=T{_(7y1cG#|NK!|#pkjE3 ze)GtAb%50(>!vT|6LZzq5022gddI8gm7erwY2Qv~B?&b=pi*uo=XwV7HDDx|QBGkN zsFAUbZZL416LG=MsXLx7G)BVc;mO!Y$o@WXAUtVq3VLzG6Fzt4jthjdjQIm?F2r}D zfwq1yQ*+6)Z9Z&&|L!qbgB+w3WP_s9C&9NOu!zxpMmbrNjD{G(PU#RWZ07GRwh}1b z?@~(Psn}jHA9ry0(WjS4YgS3V<{0V8u8RX9Ik8-b&8yvW7-S_ePyk$BQRk`{P;w&KKHwsf-duRjFtCw9w-&z0wv8q628bMws zcDOSxtc@3&D$*_|?wm4+mdDISN^y7wkP!Zm!j#lZpbOwE1t+dKRhcwIgzKNR&8Clt z6BJpjuG2DOf`(nzHkW}6DmEtc2@&x$!-*w-5*LR985F-D%p+7T%D_G1S^`g|gq(YT z?Rs{Bu^8dcj}!lYLAxQE4U*2jQ2aP0#DQgO5~}NPue#b|G%s`zk5O$oAruvdGiIVm zfd#r$1Y|BqWt6RF%*#|kDydgsaAel<0tY=HiFza-li|1gVEY3OvdC$Y^b%JgH(=z#n7FydcBQVcAuZ=*eKJ zMXa;!d4v1P%ofh#RI2SP0$#0_0uEBBoX`LUl68qF3&NoIBxRJNZ-HB*0!E4?vmWg!Y*vTz`IAbHz5@?YP%~?bAR9XImqs88} z8dr@_X|GSHyjYt*Kd+l3)>%UCi#~yRNs+}emj^42ejMdvUSeNhg$gVerkwFz4nna- z6jTubke|AAj>Tq;-k)fY`-tENhe&l1ZTq=wSn?pj0AMJ3XgjFbhGMK%%IQAx!n31?TrttpmGaMHaVI9*C zJ9Tdrtv@2Ov4EWFMf53@w1t@)_Var`v569wn!=lh3QR4T?<+kdkwaoE^5f?J1gfOL z4H(*2IM_gkFb| zAwr9vL870VUzt+*qK)p1oCVcHTk(hoU`q*S33&VK z0`vje((ko`_W1H;txMnJ%`v;L)zwuF{8U#98|QJ-@ikNw>Nd(}QY-*Mmk`c{MaN%? zO*)t(-d*>1=jbsV{?n*;r6YBUUF+)JJ9QX3{`&@P+qB))%+Wc%YkQNyW^Mfcd^Nw< zxfR7-Y8JeR$?XyMrsQ|OGxz6q>ocTbPODXEVL3)qrd-z;vdX^Y_|a#6Ugymr{NJhgrZ7Bq(3nu&095~PvB>0fTJodOk$th#M z1=q{RZQ8aCCeTZOROXy=m_`&RE~u+L+oMMhp~o55V7+Qnd-Y~o8io2cnIa+hB;g?1 zek!?<)zqmwgPh9pK&{$z?ku^l%~eNo9kCWg&KY+r&z`+5wKQ!_ohP$LJ!A1pHmt~- z8v__kb!g*vP~-Z7Q%a?>P5br^Z8K!=vg`6Fd>ywG68=S!ne^_S(+97*vN}NdC`qOa ztDm+uZmz8m(jQf&4vB80{%jX&Q*gGuwK#tpDGTGaY~H+^Ue?<+C`M@~FOF-yfO=0R zUrVt4z)#6I_Eb(oq)ifAgJ*NDW4EnS@)CP&{3ni3k69DlPi-xXN)CCOT(CDke^x-= zklwu~vORqP6V+{I58_X(n!JMAcY5&2@0l7BGK@VhKwy!PqnfXWq-SL8y}Q^iZN90g zsWP|hb>;G*os2Iy^&KH@_J@stpRbJE^< z4R!csh4Ag0G>I9xikpo{_${;AXT-g!sye%+P!a~|iz-*>ROKbtT0ntF3UZ3cr?_a- zwd<0$8Db>NsNFe7mNU^=&e_ATUT@n;oZ zUY?rnfK+HH+g`5e0~~#r6to0R`iWb_^v`Rr(+&}dG;g$CszG7!l)7e+!aBSOwiz?# zS7D1bZoKKMlqm{DOn%#U4o#XgF`70FD~#p8R5Dk4{0(~0cZ(Np`;%>z5ppdpEj;zG zm2%*qLCr?~Iwhgqxem=6MaD|9N{%tqKJPqa50J?~&v!NkFy@CoK?0gz@&3rBZdn{; zabR2G8zlytKz3U{nhqW?U^<&K!Yp*BMfIUWz17vNqo&{T-?<^lp*oOxM@azr9;NZm zML|f(MtGR*fs3`v`-F#Eso~AY&@vfv7KxU320J!)R9*EngR1=HmaD#gRu_-}^ZA$a zA6N=%tD4T$pLyAJLr=dxo*>cF!GYMUZCbYc`BYD*=~%`LM&&LCBK?R{FSmPqx;JRx zKxRWM$kwY%oycg5fP~u|(am&j7F55nT)B!?l%_E`uwREuupx@eCT0}8+c;{SbLgn3 z=hQQ;*FV^txiIl(9f!f4?B#Cjt(!adD2vzf#nji1j*bkniQDe$wf?Z1Zad>?)B3%2 zNriTj!Ar$vKd$r)c3LLvux2}Y20N8I&}~|LZy26kBSxH0y>R#LT{@#@9QSt59zJ~d z3}1}4UvTsN-ZfhPlQcFywQki))cl;9Qw|wEc($K#d-im9%;f~XICpLsPe8^C$UP^w zZsxfQy9nQafIdjdBTJs8Bu+Rsou;>uLa|2-ujEW!aDbD>=ES8r{jGN}vHkRzdGW-_ z$TQV>*ZfDVB1MRF1>wwiQ&RAzM4lLhg{p7#JaA3aE$hB@U;jGl=JTm>fx{bBJkB>U&VC8;sy6E+~_KnlC($c4 zRDYjg!@BJfGb>Yt+^cOB>rxdDg+AX`tL);Z7f^@w;hX~5)jd7C_J$7r_*kYJ$|w%< z06fad9#5-n1aTk^hxP!qfnI%SO5UcnQ=dJ;=w|{6-7;<0$pFDZ36W;KEuP(-Z~P@& z7f?jbe?od2_pcATz54bX=juX=Cy!_Q9CQk&PoHl2sx+abujG($kWI&pyWHY=xkJZ} z9fz(m#mVSShzvs(R&ZqOojZ3r?<{-zR1|Or4jdR$_HHo>3EOqm_5u(O%^SvCAP9#Z z8LPgQqhOWxT>ZK6YcIQM?Dg{tVF$|e4sn&Br*877eTAMfIeS{|&SBMJQTqPEl5S+4Pkdq{xUNvAv_!olDt-ubD_Eq>dK?>#@u*;npXtTs*q=pqm_QWV%?Z`HbWW&ZM;50>WV z*{+!a$dJQF7Q75d?F9+e2g*mm(xmm2wGuGP2vt_v_Tx@Y5Bc#cs? zmm(9ejU#30!>uGCI1@1wuUQTv$u}drZjl#z-qTP1J2hT~3pm@@Mun%(%7Z{GH3I2O zn7^%uLg<-rhNayT=j-*eBn6ZYv(c5r*slKT8v$ylgXAdxdJL;6o5lZD3U=g6K56q2 zM*4}P3J@Tx$CiF<0?!LD_5`_^U-~^%qv0Hf+Czul?sf9*+qXGu%927iZ~m7s&_%`% z_U=ZmUa&M@L<6kRt=qTf(3psL`_|2!KnhSbbX8Mq$V`M%O_^CqRLRdt#7RiZ0mL$X z>y|Ck1wn1y`WV~LI<7N~9`^WPJjNwnlk$Noh*hS%S&!&Fb%v9fKTlMbB?;eHdU=s6 zF)%!?+`D%OgXQGjf1bsyyJ}9m1&A3v9&hO_o}O}uOF}M!e_6Sl+}R-M_RI(`D~qNs zi14bb^}9e4q!N94F_cHI(nG-P>hzu=+R^X#SnuS&sQDxX-PXSTiBY~~xj>?&+dM|+mD)h2bDbsf={+1(h##hmo@_YcK5P?50m^u@|m4y1)E zTD%N7_56lk-dppVp@C;S&&Qc|@v`hVlH7rnaS#@dqgJ!;zRP35hRSfSE)E<2Ok}Z( zLw_Yb1Dj9lY@@>po~*sBaph78*?@M!)-D+>Q+y5Q#;1o4o*RF^rT?`|j`KEsl8p!5Yd~Ah9nL@Sg;*xfJ7irE!a;L ztS6`$$c;@hTV`13UUv1pxr28uKlnNOyS#nEF+Q3_q5F4x{+RQ_ zNuBX6TecKVO567BL*c#nSGg^1NP;I(2SZ+)7wv!J$z43;yGmzb7po7O3$^{z+hha> z1zC4{-aW;3cWUY+f&H?x&88Vt`iSsY0Q8dEPE8ufr-4WoxVA#U{!P9(Kxj?GV}S@L zo~+1Ngmfj5PDkf>PF^D}`tJ7`58UNVwzjJ8FL}=#GiQ}UIiHfI``?ajr_6PAZRK!`}z(cX~KEut(nGB)mWx{M?sCw{qu5T}vN zb?QIIoBVpl(@L@671AN!1kv9pO%pP8|95gSefibYT&uOAKLM$6x7DM?I3;tTpiOP@ z+!C%jvN%zA`|RF5C))+$k#q%wXaYqn3{@Ux%*UXj90A&CxB7&PHhy{6jC9tF)`QUD zsn#?hlE5a=ed4QX^!@w2&9i=+Mv-k$k=j%E(Bkw+H%z2V!g3)2?5;q;^$#c_zC}S0O=j7c=UmiRx-(yVJ({L%|EI%rl7~Ku^z?Mu ze(FWwUR$?q+p7+Ye=TI%y1U3+>@B)=?mTnhA&wwwY9|6v`n791N$mUo`1+XE)&I3S zkgXzbr)Smc*Spmv?%UYd2wfU7!IBS9TWyu6SC!dDR&cUIu*MRN2{v)23l@YRl%5sg z^dB!s<`U@XKJD;nx#LQnq7H2eMHW!6M)*aP9Ma^(5SdcO4KNnrtM-r~77U&18+!<7 zPIj^zy`ozSMP*{naWHWa^b~Daf=U+XWRTMTneUM+zow9Yjl|TA5Tsl!> zZ4^y38D7GidKp}5>^@`0e)Vu_hz;+s`pRU1=rlQ0?YXu1In}#@342D4lWG1O&Kdy! zSg*gy^P7Qv&g!qudz1OhC0XO5BnsXE)HFG#oU9IxSaW7pRGC?H;Dx(qb_`QJ&#k{$ zf9vhmSYs{d?|%ap_*MBVehGj2V#UXUqmmL6ou9k$YR*Qswt<{}8VV{M%MlGbHE(oq z)T*i58aG#ZHW<;!FD^fEzS7|P61)&NEm>Cn*5t@Ps=kev9*<``m>##Q?q3=fK zg~k5f|L0%DjaWc|JD$cL);%~*oRzpqVO&2)0`3PXTT*!IhEaJG!a_@ySpZYwZUQ|0 zF=sb4K5+D=`G4)LJIpf;8;;`O#zBL~E1z7vbo{%g_|Bi$X>R<{mV{dsg>|Vx{h!ud zb}fAJ&a01IBVhYul&yq%%^p3*OdDDLn}sKble&T&e#6c^fq{J$QnD7CnJ15`lDCL> z`RZl*tI!}hVV2wvffpt0R+$+nKocE8Q}-!=l#vAQMO0f`ckRj}rn_c)0)?NDG%P$k zJb0l0V^N*R9MdU*BdgEYogeuiy~e_nW=@uIzoa0N@pk9SXhBEUeSAKOid1X;KHmIr zMA+hG2gd1Gb)Ys7vkmjEmtD_rkhGs0Khvy;(h?LxJVIFe3#lwcl{h5;?88V&SvC7n zhoYh)+H6V?5qrf$3@gmI+ak+4h)kX?(di=pQRvin7&*y{pWDID&UMcxxkY1b;*wat z5wz&SSN9$4?LkP2Lax>#4AfAdk`Jq`@K4VFFyCb_4FeG@l7LydPb)kVvAE1ix)n4j zg$#hl<9zpN(lI@ELWOi$?5x_RVr*D+j5(mo$yUD3;A-CNva7@5eps-t3T_SJHt4T|nHY0Z9pONF@VND@=vrAkeFD_GaII zR`BqlEseVAB#OdHAVy44N>dd|fGw2>n4u{N$!X}l8P;Mke!rl$mqN-Uu%sS6duq(C z8R5#wCjy2pUAsz5k?hZ}XMYMJ@isA1!a$OXt<-Z~v?xL{1E~+WYT+^mf2tH8d*b$Z zDiJG?Jo4G)m+~Hw4^ix&vM3v-L=iH0rST)4SVeQvxm;&hWk90k&EUiYzlB1!r*vVGso zg*qpfzzGr`aYYtT!_Q{j+qTtE2!G20dyvf*pDKk{aqZeg_jDJRA7|Heeov1}rH3G;6E8c- zz2ppvjAQ(i6T41@zmtmyhoRGexxWdkzK7j<)|oCaQ4Vs6mvp<;lAx_*-@{JPg ziEmN1N+B5>8fvVRYKkB*W9P^_Sb&(Cnfdzq2GdYAN?18BkA`Wp=C>>C7c3~gP*a*M zHm+2UGF|Tja}H%nmrdOH+$!m`GCMay1nuH9t_8A$p?E*l1{oBqS)r8ZDV_sVspNQP ziwDu{O!`=nAqd0VxpT>3v=v5^CY_wWXWHKdkN5o_>u$vT_S@H@O`@F5qn)l(NoFoU z0F&y58QvEA9!6rJR9wQS+xT~qnga*O7)1G@;I&{3mL(ETg?_+8nR%6Ft|hfv zVcDHuKfY#$7Y!nih@a(&)7jZ8dM0+$f+87b&cq@oCTw`Tj7ez&HGwok+7dtkqLmU3 z2KyKL#PMFAO6P1%oH(?k_`b#jLsg&7K5?*-fai;@dG;As8r%pz`_S*~R_6f}b+3EB37XfEbu_$m;AIva>m9gy z49A415K5El*2&GMm1^Sb9A${@9GLdZ*(n4s$@Z~95DgrKnGAw;=t-l(rkaSfT?Iir|?v}Av=%{OF{w_c#ksS)DW@H=#{6=l}KQgRa?|)}xxW>e$h-a@zpB4Ozcag`te^geJOA+2Eha8cn;*qp>Xc z3A<9bDj}O0xlHHrpF1IWj!Qw`^?w{@V}`^2qHSFJ<_^)g*Dbc^Ex^FS zM~_S%ozv3Nf+ePPvM)FVc?RHD-;;*=`thmyWbQM&E}8y2FL2j~T>C-)%@h$njL6$^ zb@HT1yVa$|=JDdlrp5Pc>-Ilwyr+6qo2j%*c@K{#JM?@40wTG&Vz)j1MkUWg0Q@QL zlxQb@y)e@~S(1Iq6QM+lbq+iebOpk(e(*Xju`SO-u;7P~z(ik0w%@Q(bD??Yt{Q6; z`*DP{O7>lsdM0e4>dt~VOHQv1zChpPHh0!Gc=G%0+qav~pFe-ghi7HAHeJYa^Vp)s zZU+3M`79ooPVB=u{qOi->=^bBir@5n>wd}644Du+&^e#%NRoOye*y|Ziqih>Ghl<& zFGw0SO8Y3NVl8OyEiT6KQ)d_1Nw|0q$q`2ErwrDlMg^@B_oPfBO=W7&U2WVAXk0+P z$PM`jI6Cx=c2P|n6BwjS0|4`@bL&r0K|*LTf=I%{z5lju`T55k=&Q+0N!FVaofZ=H zTfn-+e;3ESUOi_`SCwI<<5Zs<^j=Q&wyjk$!0EE9UbJa1ReNo2+VzacfqAbu!XA3n zdE$~Bbn$$NbIvN*G3jY((`i>VTjKAfS8%AzvV>qMY<@a0a0br5-<#Zk*0&tR3r@vf zJe;5@n~7Fx5I~hyj~+);&+yFXEQ?Er5Qs$Fq!hI1b7`Og?6)V^Rz}Nkc1dCM4STTY z1g*j`$jNb4pE9cn6E;%AoUq+SxqbKRJ?uAgB0On2YeQ4Ps_*wUb<^4nNjLK%=|D&p z!$WZ*xLU42nCI{IN=0@zIy!Q0M8($bTMC;>+#_-IPTyo*`mCmvfa>S>&aJniG?_~a z%CNH7@!GxU^d;Z&yS5|#mC%-mEA~r*VsLhta`a?!*2{oLG2sGew>{#Cy#53Z)eB|B zBxkksGRa}_427m}LcF}@5AG!LDMgm>b>aJ^lfAOu{%7F!ZC#JktxWnK;NK<+Q`Un} ziQP}n%-*3s}Z<;ZegJrSm?+G~fxt{D$XhfVF_*QdE4>augc``kIP81WhS z(pWIKQ|0wDO9lp9FC5xfRokz=A0-V4;%hPJQjQc|B+3~ee0=LRV>z7fcrhp51A1U3 z$yLdQ_O`oWZPK8LAlA@|R454mp*WKjp_j>HUurRb^weFMx+&u#C2O?x`m{o$SFdzD z4SGG&3`Zy}dH2>GJ9IN2IKmw?oi*zKsZcPukHlU&Xu=aCO4{cRJpLi& z>eXVJ184J-WJGBG@rs>=&a1lgR>9FbRr$YfL!L)|<>tZ&XB~(B{?E_8*{z;oc^1#$ ze$V&uZ|=wzhjK+pcGPgx`#%mA#Xk?sX2qZ8ZNWdcGU4qoj1VW5jo!!wV-w;F<~>$E z{;QHKv~AazS-E3$tn__W>P(zvx;AkES;EntevvaMhBrOF?0WXe2z^79O^@}sapRmB7}ZDE#&!*Q&i~YrD^s|BHXJ7=+|S1kPBattmZ` zN+@1_RfFXpntu=K%tz$6-X(z0-?r8DJJqjD8lP%m)<|~DADUfk!JGS3sjNcH`(Ji# z+r9qhM{%X^wP)z0e0xZK{EM%)^_DrBO27^NPaRr}-yGSnCS)~u=B{zxpBwiKX&8J*;9bLHM{NrJU!dzWCmY!%fIGgXjec{4Csd7{E@y=Xm4h=DVO((g6 z9h(ci(|5@|J6sI#rF}Mb=aXS7cA=gab&|?)1Z?T5;cax43qtPCvh{&jQD6C=ZOL1^ z*T3~_d22V`dgXhtu8ULfx4+FX*(43zzXkp3-{JjtKXQ|`w_AT3c3^WMR}e9D zuKwYQ_}R-o->r5R#Fax8n^mxwc6ZPbud91X<-1>eeZIS2d`Inn-?zN)115}x`;!CR znWj=dPH8FiDN04@ot2ff_`|pf6Vm7OZ}Z^|zBTPVlF)_OlD*&@xk)%&Bmz=F*vF6i z=Mx>icE`VpTgjKjB|IGlFMB+$p4AJJIXEO_5wN%iv_?@hd(zQSrBVf)%8@`0Lkj_I zHDAwQRz)YW1X5ptAIR<<3F~4}M}jT`*|TQLs;~btxmw?uSIch)qUiB-&&0uu4`lH7 zpQh_@yA#q2?LrYI$IVY`6>tHF0p0!kDYKu$sRVcF?p0! zjVvHIMuL3(*P}C!RI6O=;*ys1gvF&p1M>{1Y7f`dEKx|0jN{#{;Qr8pnpJ z3$qu&u%9~o@hZP4NoeZ7@|{1sRHV;_%%`DX+ZauqdLcMw?!0*#3i`jN!kaZ|vWHf> zpp@6HU7MMpT3xDty>!H~pb_%B!}*-Xle6OH6&Q>hxw!brkTpu4F^Z8!#CYpVy(I*^> zNeorpigP>ax2~E=l%OAycJd%)FpkA+(0-nG7ct_6x+7($khE!Y*|9_f9XfF@G&tC% z8yRoX#S#|pBqhcors;kDeUsC_Rq?UZ)Y7k$HVh#zU~eZOMu9AS!r~Kw`9$FPbLSRP zOM`{@(}Fw!bRzv>oS2OFU2_3F{KoHj*~g;7LW^E`lBauj{(03j`6OW;cj|4IO#1#8 z829p&D&_}gLekSv2(!t@&MR>ZVD>MbOvlhr0GVm*Q<*Kx3#pm({d*O!6t^_!hAS}i zV)&pXDl@Hj9mqWQssq2=FntD{9OigEpgIS-2#8bLn*FL*kzYo;e;(4ebY^gHuyEI< ztpr(J>`9@-ybjkxqaRhKdCzUYLCvO zrKe}n+7WhL1SA;An6^)t7n3p|;JS=>Q4i-8ELW%|l+bVrSO<(_No$2lyeCf2t&ab5 zg=(7JKd*b|W>8UtPO@y-G8}Ww*+BrB&6XVc{r4~?Sj3O>hLd3TpbQCx~ zF5(c#J(^j8NUvPJY@H7AikTCG6dAfvnxky(tI@l62H+4R8)5fbctNxla)!(z!3GA*M4il& zJVqfBh;ie7!)G$xBmUoS99}Q!YlMxN;p~L4H>Vvd zS~O4mt3z2$wN~ZjEj{jUfB(U0hv!J8RW=9meoQ1&fp zV*0|t5di);r{ysfESy}W{jjUpMF^NiV(7>sW}uKI5HIzSP|AEi-23P9ykjTGqt&!p z(_ZIR;5W^Vr$_bDD7sUeJN&|e9gRH8H|(z~3VW#RKYUnzMF%zSnXNl^3{Oo>4R9_) zBx7ak9(*~k;zI~SV?Z4}mvvEu7=pAKGdZNTQQM?N*>+~syT#uL50p(M#r1UAm z-HvS5eOn(@t82ux$NiFDq`+Wpzp~W)L;Uh|#Wa8V2oVp?oS1u{8vu6xgxH~p_ z*7R3HmpB(sa+k{qGip`!GBX;QoW;r^`Zm3vJa!w{cWO8izv!DI&7O8>vgX}Fn_b<~ zr%s9U38uwlN1#jr(p+EQ6}^AmI{N`CXIk%|hP;9=BNW2AyiAvQ_lFm~X~BHscRoQg zuJ5-os9XMN$NbPxr@F^iP)Dy>6X(fwdbD?O`Bb^jVgAs4ZAL8eSyyoKC#75YtslpK z)40!wAr`egCxor5D!npWb^PR)A21l1v*7W#7Vn?7Zc%VrvEHwr6}Gp!mFFL>ezI}i zWrNyt8h1}I<*ZYJ^TB(ypURX`8ta?`2d9PHEUzr7%t-0pe%QnC_p3iKyhy`F^_u4T z9TkcX9u@P`=IqYQjJf>e?Ybu|KE1l{a$?<#U7a1WOZ|6^y3Ids*5%2gSo`6zbH{Ia zPKMD!V!L+@fbYWeR>9hlynZs;P%*@dH;c)8ECNwH9UEVTqxW422t2-hJP6Py( z-nkby1JYRYl`A`)_e#zW7_5~!QthzzkJ|{y=92*=4NUK6PqS3d6a(Vrm%j zxT~!C`KV?~+7EWvDeM2hTelP4^i8d%9#nEi8cm!yvugwB80_@l*?o=! zoXF6?Qw{}L)=u3#CPk0XFS?(KN+?HI?sQv!y+x)+{%Cno)Rs>$R@n_InmxkvqA zb?q8Y{l&jmxVq-l*46HsKTYkNLV~|{(@+tVfMS^h~R%az?slWyd-E@m1mc`3nn!W^y0b+ZH4Ozj#v zd5_kJnUQ10u2@>_>bb1I^FWS#U^P809(L#Mo+vkDh-Qq9g8kOVY~`6)x}#fmbgtwh z-#fc-&y@2QXdif8{+B(LtA7-n(inP;u_)>XduKXP;xX|L4c`^un4_{ouf1N1k2U z6j`+UM~zOGZY(rkZ<~};b6t;_2s2vDN-$qBVr8PG7b8Qg{&+D5hLq`A1vwi-KxjFeq=_kVnrFuVSIKJ(#1 z-GvFxcaJupt>N(?A#L%%qO-nz+74QHt3}PqHGh5csq(Iaz1M>#&7!F1&M$Y!Sl)Y8 z)mJsH_1mkBUBw^kKUAMM`*c>MUn!+Tt6fhzcL>|FY}ukwj>=BhPedhunsSL@=`Y*u z=d;{!ey}L}=8H#%*A%S26x(E5(y3D(*8@@%E!0dV`@S=y(^UHi!U=&$R2JqwNYoxtW0OzHf+CHKO9x*51g-{bT~mwe4N z4i1W}4NQbCfBjdFnociYws~KX@xj4V+3g`i=fCZib9r9M@yEk%o-aF7B%hKmlYvDb zOb7f!ulck|DtWb}`i3rtmvO+ZnWt}g-H+?@?#_+UQdgJT*D5|PntOCg!1f^Z6CDPQ zTd-Hr=-6})>lp)9_b*6kFnYjUN<5=QNJD{&a;KdAu357_r~3{aYN=2tNaJ))UZ69H zxpU~wtgL%K@yL?s zUcF0Ro>?Nf(WTqb{+p7M)zjiFbz5*488`gOtvt`px8lA_#G0E1SL{@XM*bgc9NpCu zua`Kd-lPo{a@94=cJ_+wyIn`r%!nGS+(g~OZ$EPeYFY`I@d+yop7=^_pv=_h5-v6z^9XO|w?&oy_dEY>Rbw|jdjqaTZ*x1xCP zlKu86CgG3vb{hHd(-61oopej?nkkY$9u0dmXUFz!dyhqyELl55-ukd>Snla?kEGg! zDb;5>4)WQj?4kHjGU%rD)R~$(f%locCL!{{ysKd=1BOc8b?Dg*3!<8O_i9jmZ>CDgTXQ3lu@y8ARVRUG6^#oiDqkimTnfl_9iC@14YWXd-m;Cu0 z(xZj;leNGG!W3f<=DbXJ72H7W;;T%r%pR4=X`TCQU3A|q_4_G90$e^MRG;@~cR7g} zt9s6Bv*$@IGulGAH!#W**MovTH+*xu>cF7q8x{WU`uf9LwLaDpO6-d3Xzo(9%#NoI z3s)6C&O0KbqlZrpTx@n>`NNU8zGlFbb*i zn{sZY$Nkh$5~98efsDgKV+fFpScZ2=T3KQZD^c9!GVV? zMQnz9FS)sVnUt?Mz^uj#AB@r(f2-MZ!1RFug{IMg#VcM#tlVYLtKj0r&O86QdvwG8 z7F{FWjQSL`y;=MDZmyw@ab#+C$lc5K-D+!_?$}{5WZJa&A&VDUJm;l1M%nc{w!UFnpxV;C%@-^m zaOTg5i!d&w{r_0!IWP*2EqiGhg8lRs;QfPi#;Ct(d;|yw5-%hG8!h+`BB?obs*i7c z!Kaj!cLa9zEtd~@_}pgWt()@XY#Z&~v#*w+?I8Jf;Va80O6{ae&ryotIU}PR@2jt| zb)k5^m#`wz!iL|p?b>zr@p+Hl6dWHl;vZPS?ejBsq)9ul(~d#|Rj`wnR8!lac~Doo zPk869&uvlW(#WpM0t4|qaNs!CDKI(K_L==y>Tt*f$2hw@z5XBm{p;bT5wRoImLfG5 z14Rgb0<6vnSW_B5Q5yIbHo)J~91rIfs7T{CMwGqS=MWel?$qu~3#Zx}hcHXNqVn%o z^XazoGc+czu6;)P{3VCl(m{wl$7d>(F-0N3JdtluQ|tz zdaZrd{kb9d2nS!Q9WA?d)m7jc<{5rnm4y8JhY$bouF&9@`?`W1O7q)ie{dV)t9qa1{^s_5QLakSd_kl5{l-X@twVIpj0?7~VFPz$gNRsWI z4ng%ANwZ+xDor|mpCaAZ`ir@IdE6&B6+;X2{AsG(%dzu1P_qNyvGJXUF22QT zUGeB*14JsRIJg2^->&;qP{;L5J$sTOtW*ac`STlrxsPkt&v$XKdYG+c74Poe>D0SE zrN`y`G5M=y#_X{woAbl5KkTd#0NG-C@Raw>5ysP}jj5qu`7Go<)P*M=Xl%E6vdSX{ zt{HJ&yZbEOKy9==SiD@nZ?k1x)Xb->tx#1V@}2rG^QE6rm%qQk1Nqp5g)}X5 zwZ&o!q!V8=nbjfu8W6t??`G3s71lQG*;L3H(mJQ%zb)}0E_Trk8a9$|0z{gSzM+GI0~#71f?fn9h?i9PcCXHp($`k z-A03!3j_`OJoci~_=yuE;Q)wAfsU83HpMdo#~>P~i#kTDauV&vUs@{N5s#FfyG9u$573jK@Fn}y?sTV8M?T|{(( zXVe`LLIj^5CbvB7x6cTs{6Wn`hc1GzI*b7=eSOuy5DNq-3UQiVklj1^C4%Bu?Oik? zPfvqq$N3@yq`<8G9?Du)JKXA@QS$t0G@9Cf5N>sa=-ezV{lw&fOg+GmzB%sw56Bm3K8<{-(n1ZE8y_Khqxuw$W7cy>y~<(yX{q)&Ic2 zYWcX(-Oyyv4Yx%Y8?C2}{Mo_|Qmw)iO#8^ZAmI#huTFeulUsIEg+v{W_>X6}2Q?x! za!oP40g+EoJ^ z^E{gR{!cJ;#q-O;9l*P~G7kgrOs+&)g&Z3G^(JYDv;O09e?jm)mQVtoSoa^xCZ%t4# ziElS8^?6$Cn_CR2RaZci&^lqS%5sD4?H;yPy@%=-lk!BAhbnMBjbW>G?^8xKU93kx zg#=BZ{F@>N>%fXGfE&}AM8Sq1qz{UJoMm)SC`Q%_IfAe$1Qi~Z7PJ77pV07^hlRY3 zf!w}@rt84Gh8DlfcAxh5 zCH?)syAUcgk%{%-+A4Y^;az4|;&6sQR_k;cXze8VUtoceq^p(haBJO-j!0L*4ea~kV ze8Y?H{zu*Axi|kE_v%;2Fp5wI7-hmg7iTJI6X9w>h;Y8+!oH*dlN|2!h49MF?KB-X z4Fz+iO)x$YZ9C*@ZB5M=?JQLX+`q5b3Ih_Q z(ubb^nV6e?J z6nH3ov(wjZ9X9K+2M5cQ=QkKQaG=Vo3pl2>ww82mUPa_zROG=19-3*p_>i*P*B8?p&f z^ZAPvWbh1?m!pkX=L-Ca(3v`pk^8Eb~CEj449 z$Ud(Hx;C-n(7*VolWmEB-4YE3YQzWx2GeDb2JRWh$T_Vm14g>Xl7vLdks?;O*RJJ| zZ_3;;(z_xdH^p1Bl7uV?Ih%n4W2q%Yq9NHFBtA?tLMP{mg=_tn&l&B#ul1?mBA<(I z4c;&azk69>uCZjSrsYI;AshqelM+&jDJ_XvHaYj1aD)%$DL zGqN*#PGstc)|wjay65e@jUKOjIjIXw4d`tFu25CQAVXjZ=wvXybs@?rgm~UdA$9?} zX@SZd5Ih5@>q7dK%-0dOI==+mu}siRiF!oiu9=coh!oEUoSguTHNH6Ve2sE%Tzx;b z)_)__$SEH_LpbcZ`uyfjzvM({aP}j|%1t5Mo6yxb&@=6vqJ`&G9c~sjC1Et6anTB%|5JzvwJUA{U zzU-|cgsnP>J`@r5%kxlo)Qjj53z7z*49132w%E`sMe{Yom+5yY5w#wi&8$jaifH@| z|1zEZ?^@EtFXCgzV^p(J{LinkpF{pmozfP+D+G=+b3Z3Q;+iB0(EYU%Hy%xuD4`G! zVCetq3Yh~G^wnO2U?;s-5b^tHXpH}CZwzk+Jy>Cn8cu;lgp=XR9{)zRVfHk!6sgHq z8E}-fnIMc5^1*5V1M@lTWnbCV!oyB4`ZZT!ey0g z+86pr5rjt7OkH#eVx*Qk4eB?}AY`JV1JNnOnMVe3smpS(PgX7BbL)jZ{NGVpULO_y z=TGZ&Nqs~g(b zbY!X>w_wrT*kGYvoIH6l)<#9?GH>u77C}eQaQqo@0g8(<2m<_=lu4gM!i>tavggm9 zt-NIT`Q{1$Vgjss>C&aP-MXzaTQYn0?8@{$(LV2I{LPim?=C+(=M_1Fa7?Ej7$#=5 z;+r^h2^&vSF&yKbU!;^_Lbow_wx5Q3Hp)FNIERBwW*{?}B`d_Pb~W+}?Gr)-{QPAf zRUDY9+9R6nfh|O4BSLNtZkclh1~&ASOpaj!10a%2nG9J+I-v~W$yxh8|L-dY>tFty z?%(8~q5lu6*y>;V(aO(d&(fL9EHHTgXgxetGlqDx@*!(l&p$+tuMi&^x2LHsjjIt& z36Q|1RhfBtViNI#o}M0wmJ!wxeiZ;IDm~+1#%*TJP=PV3H(EOL3tk|@X@ao`oum{G z%3WR`k6l~|&1nzJ9QAOlbN;r#5b21x>w({)Yn=dZOWfs{M|QTj3=1MLF~&1iVkYO2 zjGN+}abGp2KsDjZrmu>Bt9pCQ6`uhi=WYLQlqIV7@V@_BeToJ@u%0W^sbTcdK?>oPGZ@n6osM=Qle2Z@${jyY4{e{UN{~!I9v>Da%-Uly78*r0Mm49dcDs{Yr01z<{y6fze=GSkOR&+F^Cp-&Tpn#6I!g zIg;E_O)nOM=Ai#B?`hM2ZGdAc*Iv^<3@q$fMW+H)6$@l^8sY(d{5Q8)hk>+r0TmwI~-%%1yp;WtU|_IHV} zI_u+(t;dyD_Y_K)B_?eDA zDE92q=NFp%Q8hMmm*k;>l(Xy2c-;*nQ$7C3xsD?5ZbAnc(@;+`=~s!e3M)P4mva!= z#rWh1*k^UUx3^h>Rx-7Gxgn1ACJRey4slcc@Je&V41Ph)e|}JO&rpk+Dh%h!TQ!$K zgN%Z+P=DRPm5p!f^n$!d^_H*hDsyK`dSYj~N2c8`4t6=yk(*nZcURbz`Eg$BEBNHN zFO*$@pZRRn@$=MF6WcgPhwQF54^z+ADU}8v6qy@H6%v;^K|B3q6 z>`|!SecU+~2S!m*=p#m6x^&Pl-Cn;Qh~cRa4@Z5U3&jI$^(d{>#-{O`ug&u3Guv#A za4!7Z`_`~V`tpMZHJwx4+o@sEyJJ?ZQO4y~48oJFQV8+Gg{;dZ-aXG;)~6gx-R){m%HD=Us@;dD!e>NJO1 zrV^Ti-v){49DT^g&SOVt*6ZSHSSoe8RE+wQ0d(;Bt(P>uE8};)(4EY!_s(^IK=kvm zn{Mmkw_hB4m0yFWIVu59xJah4pTA*kb^LhW9_vl}>moy6u?gxM9B}3Vf%7C}T9!lU z;T<VRrrrJWyCKA$g(k6J_Q%3~I4~F7rejVJh@9^@=e?&TrHzo_%=P%cgkxik#xi~UW8}G{I zg=Vi5!q5Eh61uQoq;a$!HY{!ehU*#y$LVc*+@ox4i_7(s#Nr%VVuLYcQ+w;I|M(o! zZExjhr%P{>GCgN5MG#h9%{Nyk2mWwST(5%XidXX{OgJZWr+H6>%-`71a#9T|5%YBy zu3F16>+#*c+qD;%n>MGZw0|<2D3(nHh_0tkw$muMfxl`xWj~t`?&%$I=1|YMK)6?H-M=?pzw6% z=F11cOMZ0o`QY;-d)(`e`#;32IXyVlJx&%){mFdv>B}?j@3>lH84sEkqFW^A@3rO3p5zoV zRT`Z~`{*|(js}48_Q#D4n6!#cT6K0%w-2^(*DYt}x3AiPNn;x|vZp)z6`PQu>gnL} zq56Ao$GSUVeA%-*=@S&6aSY9T;38isC@=&`S7f+|GsAv14Z}l0^1A~qn)Y%ab)^~( zy&8(EtXEMSd+Vu|e^tpcaL+0IH`C*MBKvRkPkmB%C7XQsdTI2VtJ!gKTG~vg;=cwz z)+onRlPk40)^9GzpHo(A$2Huq=H# zcqOzKc{1=)>TO7lf)9{dtNbo&sBwGzjGKO*UX(?>d-H`R%pNbp?v&}mS4ru{%oPGD$EIm?wTQVG1bY7fa}k zcTF*7xaOp&jl8|#%33uIF>03c;X$5uwUXY172J`3#(cKU@6v1QlR5FFn|=8nUu%|BWET? zWk6%UCp!OWbV=(#?Y>G99Hkp3qer#LsfG@LgEu|mrCdBKV)ez7#T7(Bhm<9N$&N$x z!Z=Y_{!k(+dEMY!G7h7p^&+hV_)uPXeFUa8cEmk|;H*ekNhdFyKNU^$(#NlmJt+PT zN4APs4a>p)hS5OjD=!y~vkNjM_VTO;4@IA=7yMLp%%VNOl~yO+P5tzblI&c*llhYxCyM(e~g#65>66uGZ1A zu;1!b*q1>-rW65s!Ip!x9EB3|7({8jB}X@?VN6xSU3d=KLXo3~2?^}PR-0s_h+Ky; zSBe4MZpdrrt16-I^1}L|_9z^k40M{WisV6bXMAVB9L|BcElqAb(=xG|{VyTL)4(;@ z9LO*S&173LTcHR?TUAH-x-{4k;C^xf&4*1N&^~Dzk9u0LvojOnPk^PNaNQ9Z-a&Pbk)BExy~xTS><-$MYvwYcR3-yx2`q&F)h?`or} zKKC&TKdpH+W)E2;`)OhzT(8zSdM-8!@9e2W9U^SFkl&7n5QsSxvheAPy8^;^V^8`;OK zvx>A|?_=QPCwTsalJXclD%u8n$TV}{irnkpDgcfaEq2~cim-!ID@4EiVA>G$&U=(J z0){;T%HU_N{5&ZJ72*!~{8NcY7HD>KFUCd$G07oaBey@YKy8f}rxQmL;z$4kXwuZl zlwM7C*ZjG>lD8Y60V-d8Ls9OnZCwbcdPfE`i_^^y=|Q`xGkb|jYrD0zSI~NJ;4Tjt zB1~Sg5bBfmxp1NaE>4_ZVJIc_i!r^wVH;uTWy=IWj{?9tl&CL(!Gi7}<6FMS98dU$ z(>@U5Yg8?{;_Vma=ddyms|UpvFp|(Vz`6%6F*|!OUx9*wvZa@&X9$|hYC(zlQ06~k zIu3qh^XHPj^5sNPSy>G!##Cp8_D2nuJAmpYV%zaMdIad0YQ1(p@zU){_mhQT)z3U* zP^MDZ0?TlpH$!ih1vy5RN?LUpAx~%BIC@1s=Ow+h>hXp>&?OMXG-)yY<=lr#!V{V8_U64?P>v{wuv^HF zusiT-GI}z|WkS^~DtypuvgMepY|6@lQFI=VqE*MAk%AXQQ#HEhlE%Q>vyCF4C4Y{q zn~ZiHc#8FlI7DZf!RdKOjGAcZA32?Hhb*AZd`S#pPSf02(4myFpQT^8Y?UJC@B^!t zpKrwa%PY8NioFs7cf7m1|5q~Cy&CH36sq#?PswmnFx%d0j&4MTB4|w_V>3S(@WsHq zZ(E$$KAo}AQPL;{k8C8o$w~J$1)#KN-94P1%)lg1+RlaSz4*lA!4{HMQHrvuYXw+X zc#@2j3?uKO^%D|t#Y|&0W-J(xd=Ra{x_$b$uEuI`U;F0CBGIEc0tE)+@n$rzg=-&a zbz7Zbd7gN+66aYWmM~)R&sf2f=3&;`x^SU)%ZdFG-1Fi09h9042eq%al_B;ZO^ZaF z=Cc&1Fqi85@GT_Y?w`+)-air&65F%;_t*jSC311nK&-DW&L=a|?s@?Vv$x;oimoIV z!85@B=4y6O;qE=$xgB6(s2GtoOkrN7P)ks$T#TT7oEBPiql4NUn?}!PrY(!UVaNas#|t;ge35QG8uAT&(Chr=<*Ydd9_sVIbNoFqJ>d$u zlzHsEk1HhmpkV9+SVC~c!)Na!8AC1U=@*x zmsOqK83W=H>6%Sk#Sx&pi%+Pi%92U&z%HWKGGD0Q(-FIZLqTAT`;SewK#KSUm1@Mq zJ14>?|42NvidiF9^YDrli|cAI)4#=s3%ooy$Jf|@`TW|HNCPp!l-Qpw~DE3_) z9s1x-gV&%0=K|9vb#iTkzvnW09FBRKScYT1f^wO7Ps7`$5}Y{n(n|V#CJQ!sq`wGw zqt`k8d7-HR?=DHKqP*Oj0>{RuCk63AcpB(qk+}=G1tkLdUt4(jt=eosh$v{OA{GQH zHtAT3DsUA3N@xXy;SP_xlJas{;-U$Yp9>}D$uXTsf^_t*po=n87ToLm0I7pmLsW&cOAc4!KE-7UVHMGxJ8FV)+|^H3GFoa{oaaziQ_ z6A}q`&zE1CgWFxXH|tbNbz9EVVOr8mHXX8N-%kKAeap3NkCM+PqoLiV{(gR|1;=zD z1M^i;QfG+5+C^1HMOnw>X&Oh z{chi@UAyC9cYi~uW0|N10{^l9VquzZyX+*I@W|n@CSGO9c3sGSiKPf>YE5iMq;+UZ zmmCU%@}cI4G)Y;-eT$WZk`9sOqN2=`Z{>AD!+PnKeQ#@qU(qNFUvI0n{^M7^^sazd z}dloLgNUvbc}5`S>{@}NUI^QN1JskB8YB$w{kiEr?aJ4aZ8 z&^Rz1b`0ZazkoPU(0i0PHK6kSf^>sI`IZ<#aeG~V4`BwIPoa9q0-U|+2ssGP z1Az0MJp%h^w4}4ozVemXfgN`0>KZ{DTN3_fV^Zm<2X#t>+Lu+1GG%b*`8B_+{V7Sl!S!dbc( z2>qLqSNS@{<=(58TRMy!LRIAAbmq`*$#hj!T1fJ%S9j0-pzroau!mZTO2Mq1@8KJ*bAkBtu zVxx-Q58tkg5&CvD+C67`9VL5vNLo|;jSPq4K!#M>Hb6r^&u!e35t0^~QB@&V7`L*E4<0!UBPOHNz(uA!;Hi+-_K5p0%%=sA}b zS@MR(S921HJCOT^?q_Jl6t4h#^Eyr}px(WlU%ysUTKZw}D=@&n!h*31-$)?pjYNcU zoTwuZ1$V$%9nE|Y=KPG`JO?!9dHc2;tIzxAWN~v3&HGI)ok8@HCXPhtUI)>XVhd(M zH22*_NK{0cLI|O7A{tiWWw5wD9MFkS+^BBiuY|P)<{%NEcM-xLvF`)7@MqdL-+V(t zO9%MU5~Cktfa-u|eYQwwkLrwm*bw78#e=wv2)$yUY#{c2;GLg9_eB{t*d&6X-g})h zu#%Y1;?IU8%2>S`%+aAeG7QQNV+%SJ+I$WEqe_ZIIQ?GN73_kDr#s7Lj7TwvD{mf( zCU#If5(NU9nGnZdTeu@1wKaHuM9s^==n~Jgs=;l9>)HBWk7wIX>oy6}7CV)9-X{8_KDsf?vL}z@_&U(Jv9j@J99RdfBkG0st-6xpL;YzXuYI+ zUk`&x&*VkAi~j{ry^`X7^JY@gt~cF1;Xyn}Lw~P9N>Cw#^ThW0riKyur)xDC=p}Hs)qN*Y7AlLJs2Luf*mTW=!idwGhz}9gp8AgN-WzOa*Nt6yTN*RpnN| zMntU$tx+TFn4z7J?-wQ;SbTNH;GzKaZG;u?qnUx$Y&C>X+)JmTP#3!plr8F>8W~G3 zTxjgxyu)|J*Pj14w7K#tv!4%q`DGFRwHs2ZEDx*868QHFC|PB?RN}L6eB?yBTCePzzF;*s|hg6A56KcJ* z4Mx>kJn0=Y`{}Bna`fMj%=212!@Mwh&XD+C?Ps^LCSTu3Z+8=|wJT@0ZoUnrxei#} zL-ZuP(I~83xZB?!xT!FJUIJ*ln!82CW^jRBR0Iu^W?jE8Ma?)_ktgHLeM-@GAN2&e z@^@-cwyGJE$=Sxb9Pi%VUjGY^zqEPz%Zl3c1tR0?e6sfVF&P<|oavgloVF@F$$6|T zHr)YK+5A=FK)8 zx6{#O>;*#6j1jP^I{=}}(k}MZ8YdTm4yy(d*sWb?y@uP~Cq|FW9L#7O0ccHbz3<+0 z1=E_#nfQP1G#3`P8!vv%G9a%uo>RA?zTeAd&(R9aY(Cr_1!ZMrk|kq~JUl;7&pkhZ z+52?%Kzu$+I)=y?+C*q0HMD`J!)S@tJYSPhqKrw-RM%#w*^%)Ho!&NifZIMcejuf(>;4U%;zJuXJTaJb;ZH`TA*~9>s2iWi?u*Uj zgVQt_ENri<$vyfRL(**A(FER?67=+nPUmGT&yGk=KX0~bGR8$bl7LPWCQ080+I0EW zxW%bVB|;y{HqguzzpJ{oR(CXnYLIensG9$^*Nmaho{|o##|xkh)zwC~Qp;XRNd>Az z-0NH!MT*A!5^Py~q{;$qq(7#rxEC1NFe}lN73m3Kb~uHDVZ9C=Uu}&SMvZmLq^Rjo z&gnmXttk(XbaJGHz^t2X?~iyFUiHJQbmhG`^ZMOn+Mx0w6U}%;NTonUsWgBWmdRsE zHz9g**mi`NZQ{k|ef2oL2#P|a1ruYzp`j!o#+=kGxVnm6yEQWS(I(>}c_zpeXeCUV9Z`x|#zTzEs>)l6L z{edQQBlN9+u({8*0(NBp-wd~)1r5pip!bVQbwEUnf|ra5dVVyUm+Y}@M<;FFa#+Vv zid6Lil1JV>6-Kc%c0Pu3Z-dka06MFkJC zy_)xLWW>L?a}Aj1n$Z%Vtu6}}DZ5P`!zq3f+upN3VUv$EXCPG&fm!aW8HqY%;1Ps#=&xo$btB5H%H>Q0Y4E=N&rM9j@Ax(qK++8G9}-Tf`GI(`?JG*SpQVBM#49 zgu58+N>zAxB-JCSuVJ6)X0x9rglGEsjazKDX)O*b`!pOD8IDX6_FD#M0Ulqh>RUf}QDkg?aO-apf({afF^4~6ct6zoq z>;>u27l+dq`393eMB3)A1sv5uZYUccBk1IcMuDtn>uBB0)iYhJGj}>uR zRBX(-)vH@({x;35K{wAi!+0UBFLrkRx3woDBa5jOVjfoFb~y`+92)1c`Wekv7RE03 zf0kRv)?P`jvC;QW6LfrqEDZxQMvF5Zxo^IZC(`YpJ^V5wS5naSq9#m##il%hw5R2d zZ>Ne(34y1RQlmxVX%9+Tv5*PNhjsMy#g>%NTYP+mGUH6t zhSthHD|DL7S6Jwq8021^Ia<8icOPfPLYL4cQ6m)zv#HK>y^d$qJSw-`-8TEWYh8T4 zJX4VDhN*tDvFCb5d^vexm!e{5WY`YX6r+F{B_ZRx6QTp2T?36V3C6|Z^p0_BL0V4x zSjOsJ2NuX0NWRl=-6XP2TfC)Mz-P{n6Hns-lnrN%SPAuqF&e2qR^BH=K4+ucbl z+g+6Bh&!9l-L5hZiYZp;RLx6gVk{OW5`${y=FYWE$=*#d2`JAT3e?QM#z`$c>fOPS zyD-N!wxRUSZM8L$Vu#ZGPg+_UZroEGbB*15JJ!cMZbp{qTvoNha1xqqqZC+UE6#gL zhYFZ9;SyBA=5)>GNyH`It9Y}1z+`+J-6&6TdEsBJ@K6SB7bam3v_21&s@TDMkBRB_ z0QIMX3fEYKK|CMB>=tKPF11G76YALgq<Ba%G9Rl*8C2^*NK9ehwJ)eBL8pXIzcQGlZ%(uA92V%~s0+y( z2}UG`lgYi|UnkU`tei2JX`0etnw!*M)wKK4Secm3)AbQ^%K17nH#YR7mGDq$nTKya z+Xur#TE`v9xm`^xTl^_Kct&l;u5;2Tbo-GT4SmsxqZ5+zx4TUS<*!m(@AmGH5&Yu;IVL71 z=86v!_oWKR0fSIX(?+SH9DQa2(2`dh7Qg447w{8Y8P1W2AUJx8^Q5ome?=@QWkuvBVE z`Bz!QY7Pl8A9eHVjP>WG)VVlQ`)vzp?QW&^6AioXNQm!pNouz3BdfzS!jz1G+yxQ$ z=VQoj^oDXJow6riAG7PYjUl{EQN}OKAx^WQ=gzr=I$yoo%w@-}1y~giiw+&T;l^I2 z+hp9w9@UP3_nGAE)*6|+`Xt^(Q9;hGjX4N%CI>f(={K``en|cpaP^%`rvM5Dz zN7$2#Cy=Lb`Dj?F4&u;knS)45!rHyr{Ml9_$|jM??o&xSVHeZ(GegE=Br+!}x(ELm zXHU#kTh?{T*J-2zSX;gMj`4C9^`SA7eW&wSQ!N`TD#j%k298YjDvH@{*Lkzwo&<)x@c8M6`Q=q&$NI z>a8}Azq)LQYHV!0@qUC{|ynQfZ4qpTVd zjs^1XA}-D{uIp9~QI1Dbl=FIv)^ZCx5j$ha2yB>`^y?)mId72uWpEhEq6^UN+Gbi; z2#?#5tc+3wn5j0c0$oKsC@6}=3h6z~LIfXev3PUH<;*J>{)JV7WQ$@j85%N>m0P#V zOWH5#l?B2{8Cn)9#8E90PV=)QNQIOu?tH5ef@suF{J8RQo|8raK-P*htXsVN zOfq)q`I{Zi9smF)L)(ldF5?Ajggb7D!R}=fRw%ML+a^am+hqEldK)fDQee~SNwP|} zx|^r_-`~l_e)#eUEK=m||3EGk(@Te$1I$5w9=Lh?&>TdWMAaloc|YmF|5k_8K;&hG zLtFhpn(6v=70)v)8-W<@BnvrCBki6Ms3v+)FoV>FWq*b3I**^=@fjPLpWA2LTP=O< z#djB5Z2Oue9kap~@3NqppK(4?kyc;iySani+mCuvs1vwa8ILR~#DxgXK+^`?Aq>tG zT7=zsJJ2Kq=FVxfQl_EQ7rN6$-EJV+0NS!_3nWlm>(nh|+>nTDwyc)hBzEb7h*kd` z0#aa|E{92#FXn91x97=fcqz{1{?O+4x8emrh}G;O{OJ8)0=0g8;~2pY z2%5qNB{oTAR1>tR7)k+Em}4ODdwn%~CpPcQ932)J`CN#!cfn}A_J?lMu<+R@zu)|J z?^%yhw*41Q1A>U9+b>dL5VX~A$Qf6fpU&sd?ag1)J)S!+ki2RmT-n4&p4j(h^wsu# zdtA#}V^E?eM8HJyP2>5*rkTl`Td<#Q2O)bNc;!gF7gJ=dvCwi9c1j<9EPV~Oa)(nI zVj0mJIT8w)nVW(SHrVDlGuCT|G69Y^m#(#5ls{$FbJ8?=&P_Bnq3yv&%{yCyX0=lq z&$%n6h+Fl9_Dnx=*Xf&|lRN8Ho5KpKJNIP_+klh7LlDz4!!cK!(D#qJ$cLab$cn(C>8GP zey~IyqSAip4hU~p%XE6#V#rE1RqZ>|8%5&s4dr)kZ#2yn&t={J1FtKavS|=v2lM(e zq41{p8NI<0UcLTbR&+H0V97^Gj62!dQo=qs?zLoXFCOy}hMQbK}isZ=dht)yOD%{SPMn-TaHg6xhKwI>L;on`MDw!mIR3AI_wA zDyh{pNzjw)M*Z$d?OPbHtD4JOW-)utJ@inx>{KCuhS;^TKR+^RIPA{G6uj5$C~2KR zUiCNkAUj%^oEt*GE-~^`%cTD<`O(?>M^qXQLa~&V%}}qV4-QdizFfW zrr82AYAL0+t=j_H07{Z9)Sxj;=BkEGXF-lVlk;Jz_jk^yU@6a0EYO-73mPj%HbW7{ zBUNSf=wp%;c8(i3l9+t~eeQ zlPXKUCpGqHVba|;%5F!|k>$+AB%+O(Xx>-Wl&q%H^fe}7x6J==i@AP*ily!SOjO+FW^u@ZnG@^9(*JiRr(5 zzwM~;`g87`k5}=Qf(%!Mcmo+A{#lj-`RyYKTL?-T#AJX)V)CUJ`~lAgj8#gl>3n&4 zjV#E;Bx31DKl`btypE6V@nHEmo%rF2cD~y4{~!eus$7qG(kSP{In?AjEnorT#Cm8w zf#e2F&bQq_^M&?#R4q)=U)95nT~rs&bUkoqplRb9vVszPb@djp{jg4m@*4w&*$E32 z*>v@CRLT8ckwh9E=&t>Um{7(zI@)vGNdyT}>m47!n3XTnLt$M(Uh$`pdB~DTj)W{I z0+fi*L7Tlu>&R@IGto%YOUN>NdsJ)sB>Z0jkcDKl`v0p8WU-F1l?V9V>BO+5K{lBO zpMX2SWO=Csgp5gWcv=rAR?ZYp;*ff~KZ#~YxP{Q)@6e46#&2%n@(6M-jcw;O{qO;) zfK)!Q(_1Y}E!8B-Xs9kui>8f~cD#T@k+w~35daVyj8j_@^q=3)8h*TrZL1+BgmC8&H$bFTKtpC&I4wrRSR5?E%{5y!Ipgm*FANfaJ@K9)#iH+hEc5aX_-!)k{5{m zO92c5RP$n4!MfV!>~@>aYmdbeWZlI)efF2v`ppYM?^jeN!DUKIkAOrxaEtBK4ZBt1 z3mjnE5=0@#kuA*K1@AiD0E^mAL{zE+lP+G8qL#f$nuWheKa7bfBOty}VQYjzN0eC& zkD7N8^lyk&BO;zRGI2j!%U!fKUf2AoH1i&1`%1;bZ1e@0#SwD)ay^O$9u&mgy@sh3 zL6VXP+GNquq3^Cx6YRGj#E-RXbr~6K$u#}T%`*xQu#!faM9LFc6OHGijDX7*dQk9V zgb;i*GV7Y=WkJj@1FMI`!+M1n8~>EUq-P}Y{t4kwK~~0xk{hK+MyMsJcmkRK4k`;Y z6t?Xo63reMdW+G@LvlAf>s6@pGP8rzo~L_gX=;f-=#Nqdl}4OTP^_?Q2MMhL3>ROk zfyPcxdD#ddAaef}tN-#o#p=xHa7k>ERKwfWT5l#S`vokOCo~NPBdaGT?&gIM`1YX-!bH3 zU%Veol|RS?Nr;0K{f4W3XtSS$gKf4kpl}1SH#8qDB+w8^X953rk-~xVXUqNrTt0py zroVXK_mNk0?eAZ2<3rdh!)N+3@0WBWv8`hSW@lZMdhs2EkDor8jPcW-)}*wKCu@~6W7K)NudM9$sM|%pU zJWZV$$R76LOk7zxK78sf|6R3=xH6X-amj_Lif{!k#r>d@IIjsW++w#IWrs6Y*Kh>F z4lY!ID@K{w3R~3)a+sh32z-GHyBRL(HsBA`i6D;LQjCr^{-~Cd)dsaqhtV5avuC9= z-W80D%w2V_R0#N@-bD&Qlp@gLv|j1sP$aU-B^PEhppyysZy|h(%L}%h7gqI#O~N$2 z)4LRptX==3BP3~VbRb}i7)suxGz;9q`#9oppeN!&W+C{LUn*Q0|E_AzO3%e=L{hzr zkY3^GIS#f7jY1wHl*cVzP&r9?i6ne2{^)PRO**|7cbXGWk|0ymQScVJ zO;=$J8!pIFSiB+iFIgS(yIq>nkPS}RD_=7uCq9ciyYO=OL2(0SZnje$up zC8~gvl>!3HcK^cfRJyt>YPSh#QZv;`jNoDyubOvYwyh4x2@&8lUff?9Os2Q>@wTH+ zq?z{~N7UlFr3BTivA~JHUFvu;`xDgBEL#}pW^amyWsM@#RW`y_#gX*9JSwF!&1a$?169D+x%4He9PVO?Spg_l9Bhi zYh&Khh#M2fAw{ukJsQ=6d2ws|`n)iCH8nr7Ff_Pj%UAub+(8004^A0r7$C{ZS3Q*9 z8U16R3N_Zb#)DJI@OPX0_oDeT9+Q$d9AWiJ_M{#I%&yeX8m9%zd~k{TTqObVjHl$v z1HegaGRf2dYljpN2{PvTmGI&@7;R>5_jtNeH3;`#79)s^rq0(lc*}t|WVbTn&FCO> z-xQ`QHTC=MUieXy)Hxw`DGd-6B7R-|!WN?wO{Q^{ApN%Roq7Bl%#d}mStXS14-U0J zBI8fMV+2xt7Q!dK{;2%l^o(twq{KTEi!+(7FQ30~P1=Lo+k^9T*sa^9`oi?fED>vja88ibQU0YL_hs; zZy@&Ndi+6j>5co(z|_3ck=<7A@N^bdHv-fbxG~JEgosWiG8zT^DwTE#kTs9fk}%h` zB9^hEzk}22)%lCBR$%G+0mhz1WUy|Px*Ba2^sel!)hXzN0a*h4^e!Zv{KOa-LAcPN zXJr4yAbt5BJ}PeG!>X6 z`(jnt3f`;57DURV#!mowvZd9V_g|2|0&Yy@LqLM}2i;Z+wOp*?YErJYytc(wKYIi* zZV2`~-7@6pgBX|agaCI>U1ebu9g%+h#?g^RM(2o&m*AYK_Dyh6B8mm{VKie^%{(lF*Z9CK#rpR830Wn?iQ18+T|7Hf$MXHS&h7y?M*w%&Hk>) zkf+Olr;NT=6sU=ajE%hb?fR-aej4w>e~E_$GX_MJy7w*ruJmD9p!z9Pnkp;8fP z_je{-s8t(@d=~^d%anD~xu<65^N-^6?3oV9Y4;{Y2;09MiHMBAD!yJZ^OAw1Wpk(( z%R5ZO`S7!kK4HZ4;o{ZQYyI%8on27L?8#klJ{BMdz(qk_Zk_7>zL+=A z^Pjd{3PYGq?ESY_&Uv}?`_Z#rSy4V9vKJV;oSbX+tMyFI$>*Enyx$EwTqeO~OL;iC zqyKA+i7`mX7MPgjmd@`!&d;M@juayfT)y}26klo}W}F_9{7ybxCg$SK^@{5bZ78?p z&LBWn@F=Tdem4m=7v|`3W;3Mfa+yU$-)jV^36HY6*`Yv?j7j%Jq?M*QGc7~9dCs4) z?Nev$IQYl^;VHazxOO=G@$Dm9e34zp@a2>VHbwJU6>QSeMx;q()o_gWCX;t_ni=Zu zc7$g|f&jno&MxlYn@Gb*c|{Gyo*%fv_6lWUsyHB3wUbQ(^y$}Fug(R`v;g5DWqeZn zz~mjiG>hvYau)6h@!$jB5@kh`_T}%EL~NMf(3Q>|(RT)0pwfm=KC&eBM<7P_q%;g6 zz6Duhxkv!A_zJ#VIh?+H)kdCDP{({I!o3e4C^h7q?MMBoKE&+9two5C6*^O+tQCOZ z7WED;tcB;?_P_q!D03iXe8L7!q={GyjgUa})W$S%6~0+QUHmAzOFLr<9x?P}4gv=> zl0GJsGvexxek8&OlllrY9j7`es>X?Wv?QYjKA==3uG&I8t&_iBCGE;p~k)wU@G>+0;d~udij8 zWT&5$SVtscR|>iUz$A%)*|mv?n8-*>A@YANvFbBnz}XH|au3|7-L;3CV3SC^yvWLx!5R#M$2%Owm62#`A>F?Ph zy-dF5_aHXvNx#O~xdy!|qag?Rs%OfB9^99N($5OpoDIEthckMR5QFeyR%@S2n$mzt zuNTEv(D)Qh`=d%M3b9M!Tkc{ozW5=gp=(zwoO}h&E73w2#MJ<+#$C{tmjKHgYdjwp zld!NuGfqt7Yz5h#FwN(jB~2F>;BjdziH zA_~N|1eJwze^=hJHKhBHtvhrZcT|wzv87*p^qd`jU@$pbUnt=XgAM_pHv03^ zb-9GWOL7E)*??&&La3u)_dF`jI?9{P4;S1rsE*oivI+4(@Kgp+%K~H{GM}wa`*0SJA2W`-nn&@`vH=ZJqW-8=YMVPLK|D zX$&AbTT%U19DLY6Arq><(wwO{lcy>7q;!eMoK{i5^%u z9PBaMaJqi>HQo&4BEGXaKe)Hz0 zeLKidL(^P>6qCz~x#~C>Zh+~|uKfuoRC&Q*^0)C5e_9x^>0z9K7vl^DkekZF|F<^1 zjiA;bmWKdH-6ZK~&v=5!H{jL!d&r9H0{#Szk^h7eD-ev^+(ipEIIo6 z&5ZW*_(c6q!qg-fv!bG6gIOWF#gng(kOw;^wc^hiyYOMq_GuKDg*Q|7;t2T_meze| z*RyCyJ?=PPAe3{Pz(fR>X{kp;BY#Si@x(FGN-hj+D>4tj-@^|i{J3#{QhqTkgp6e= z^B(H_rv4xQVyZ1?F!2>hU>uocq-s0S>qVBNj0`i$4a2gF>H0L8j)f+?I4=ps?NS=O zi}}l&ySQ_7h06moJx81^UZ*d!@#~+6&7GeJd~*2Q?AQ zH?3os7z8KNgFa!<&-&Z{KxXK-uS5EkSnot0&N-!FY&b^O3zA99rpw4|`%?n%TDx53 zUP_7}fmjmR?ZU$zbsw|&)q3Ra+3h$@r1P&Y$MQ9%E$A>t4f2{&GIY@;M+fU3VLyOm z1H)SyN&%-`#~8JMzN`S0cJi+IE{&Jd9*cD&m{(J2sF4U#N6DzNS^$|`51TQ7YX^C6 zupA>sJ#bkNVF+PT&Zi@gQWpJ`E%|{VY5MvNX1PnL=+uKQlPHQ%8Vte%h(Rw&?c%a0 z8o6d9(IkpW1C7rp<2$^WHMqFVljxdj^ATvNTKnZ%HxkzPCQ+zK_<${vMHFIs2JaqI zDv(}r26qNwiOg5TDsTw0^S5H)Xi{km4i{lW0+#3?GYM(~p8Lx8KWO!TUmuWR>|}=87jA!L5V8rK#>Wo#Lakr+&lGy4Jd)&ETku$-;`lo2p5p}#yXkJQCt6@a$r?*Z0R~I-| z+Wnnm71=B?J*iNm5NG%Jn!>>A(5V_TA0?61Ok6)PG8n%rgcV6-518Pr0N0Rk99&l8 zzJW0mZuP(xZGh;auZ8u(l_kPo8ucuxrrflGd`I2v>H4AbgYU{5%xqsVad3S|AsP8M zBT3BD{clZyr#-lkz%@iEZ?M)6YK$zH+4?ih^JDGN%!%JRE8I0j-%8Kix0EiC_F@UY zU%X4NyES{ki<@<5Oe*lSWrs`WU%<(NITJ}d9n7u42QQlgUep8G5TWEj@JpuDFZ}TZ zx&{={bLiPv0K-h$KF|iY#J!gsH7`kymL)K-A?8Nt%9Mw?agX;BL0BSYolXz%igFg_ zhPp|CgTyO;RLo)6M_buHEe?W9NaM@y-YzWyCp`0krE?`j&A^E@c$;L6=Y&;!N{9vWe8x(z*#BOPpg(6%}VRG<81IU`j)ZG z>c?;S??tsn-wAlJkd4=N{P*UGPomnq83u*Sd3oPCju{nM*u{|)W(qAW?_wo zNa|d;V=rl09D%?HIG$0xGvh5x!9Pa6uZ5D|od0)(mQRD&_aF&A_Fn?P{vELMZ+1sY zqv;s8B$vVJAfJG;Zzvv2k`!2>IzzOhs1~(yS&-)q<;j?l3P#OT{awz6GpMX*)J!{3 zS}Wa{Gkk?ogvu3-^3$qmqrE!kqQO!;Zb3M)JV_X*K9(sozl~k`02r4Ze z6u!sA;srlmoMJBiGyb^%3ae;~&_SMd5WxEV-+$kz;NW?$e4O8dXZXJ1~vFmvCjCFh};Bm)7je5jyU6B3z`0buu- zqAK@h4##;f&Z$Ds8MUX3+e9zEi7%cYj1@w1-mvGCCrTro@P_-I)4g9@~bzGtXvCF42RlDEs$`H;-9ZU9$9Q}Y72`5AI zBOaO;H>6t+E%L=(=}q5#Od@feRNEU(=-Py_J#1qB)ck#^bNSj;_9s3@1P!tlV$W~n zEsJx@OnPyKTwBzxetuv&IXgGx6hxL(*hLnBbfab3#3I^%>jrPm{-EjV@E369E4?eg zP3Y1@!q{v-Rz{Q+kWh24BbB^(!9T5@WXmt}AIb^Y&tuaSNI`t=?;OqNnEwGaXuIqI z0-aZ=yP{R{?ryLH8cde-H&(mS>_O#VsbmT(tLk)MAb~%O2IJD zDBdq1itTf^exMr4`{ha^a~#MvHDn!eQ3qjI2O?G?TQoFewOrY$g(Z{xTiGhJxVUWX z`jVkv#UUQZuho|{b{t5qGe`YY2bsAr>Y^5ao8sur1dIytkd>xEzSF|S{S!GS#><)F zn)GTc2DU#4$@$P^!vEq6D$&@qjqP0mQW~#*u=8LFCgmkzBp>#@@UrI~e>!oo=n*X2?wGn*Gg3LEs3;Q|8O6iKP!*68P<=@>J zb2=F_lgncVL*VP&?CD_Dy09C5KrKSiM5V4=6&R_i@j_3%69py>Ob z%(H0=*QE|IW6XT>AZMdrO!2s z4D;T7(*8sS_#yQ>@e$g`AE9#&A$Gdx>ahLglMipcptuiky*uz| z&7~>_sp62kpzwi^>%9CS!O8Lt`ifXWen{$Ie0GD;KMip+x!zey+L`dKPw#`8n4u3( zx$6T-HAq8(@3YdQmTsH#^4O2>z$ta6JX6Nt)BlMKh5Kf03$ZCFDKEA%jxI}Y5!wm9 zjp5oOdEo>cAC!7da7FHPDGe0mHfVN;Bk#-8J-j$_`M)8HN40_Qn^g-Qk;qwUPY=9! znr`-1?_KpW&+f&gwi+Vca6bHDD4dNfZe8rZASn5#8#AV8g*Q*$`s?GOVrnRCxfy{2 zFXPc1-pZ&Lz6{2?57rf_clnB2E4@gQvaR-0knab(0R#NpyhaIy+~~}qu3T~P7?I?W6YGj*H(9($*NO6E z9XThruIT2KRS7bwCc6Z>R(ubx?C4MIHuv4sANh-o$eJJe#^yCUWO|bzJo+Md)dl;< zi=}f})2-y@u9Jn^M@<|?O}??A<2|i0>(bADK|Xr>r@%{`Hm?IZTqvVYMER4{N6tjH zf0OK=#ob+d+q#cGDNAh^{!p}@+Hu|b7EuE#-Qw(hw%!uvB$L6(>SfE_x}|g=outzC zKnR%JM4~AQN7^kOFKMjy29Z%wl9gtHAzQ>Vltq5K`rurjOQ0jb0V*v}+)jz#jY9R4 z^>Pb7sIS_?#QIhGYb`E+PTF!$?vCpsj383d zTRAE=wes3*B;TuWn17Ka8MrS?Ym_IRXY?QO5*-K(Bc)eV-h!F;A|y~_5=0;YSd0`~ zU&Szt_y6;ld;~h&4yq=oel;dqdt0+Jx8KI_VqM92(z=c?hNB6qMBP~gQO9Xw05hXn za?WPKUB+)W$pUI+|2$&91x=L*6=6ElUBnrrt}}xW)Pn*V(3>4@s4h4QFrGvsN5(VV z7xv0x4=HyL0UCAGE6#p9V2QXUK@8qw{`>b5Hg6&TcO|aBKFewmJHbpj$gfQbRH%xN zp<#Cz8hbS$)7yt|l;q?D{u(26Kd^%07y)9|<&UocBA~?y=#Mpyx7bXx_Qqw6O2fJ9 z7V7FdyMvq@39#8Sj5AeX{`i{cE>#Ec7bey;Tz2x!;w!K5zm;}7Bx7sc_F?aN zU6ElgK09!ZFEH%lHq#xYnMqvq;qg=I#|xYby8La$Zi##DuOp1_zQ5 z1epx^rEy9_*o!lt**}cvbhgB8>BDqa|F+sZukm3!eo^@Rgy%{vtzTsfd#fYTAxa%1 z?mghbQAtxxl#htPQTj*$TVjopnxe>0!H>Z(AOyA9H%BCkCr8?=;2uM84#L7>+5gg+ zP)d7XF&Ucw55Y(GcfyLG*AaHuy0KZK#f0TbIC8{A2c_OHR4$0+QaNCqTNK?t|Cx1`Uk)f@8=jC0h{Ih*n;830HbJkFg8XJP@jS};dblReQ2-`h=SXt2 znw9_INgq(PVDyta-g3{3tG#xL<#ZK(IRC(HS?x6uw=y}Xy`0NrjEx5j3xnWW zVBg8D1rwm7qlrOtyP@r;8X6kn`h|Cp`c2aihNHO~Dl8e4+}D@LQ~@a5A3FC-|EIY# zkLx-A;(Z2VF!=dR$r5JC+G4B~$ubkN7j2TrlC715O3g54)F`qhM1*!!N>PSMC^cx) zB3eYLrbs1npXY0S_wl=r`^Wv`{&BBA%t-ovKJVq6*Lj`SIj7vF(?#R!?eA)c&Dz%| zSVMOA!|VG^ep?_>>{W7EkLWV>JB$rwZ68wAja*$_r8+n1rc0XJSSgt1eGRxLB%kdk z+)q;)kVaXz1i85m()(DN`K$&iw6Bq{Sij zuR=NnL~4FvuKsF(L*h4d4W6-sJY2;tnyjGfmzwd_ob2pSdXR|l+Q_7N?@bj}N|DZ- z8@BkKDBi=gdlXC+Rw=c;DW+9&nG&YbUtiI?P2F|W+kgrRr!q(FXz=ymr)1Oc_9T-9vRncLr{B+!GyUoq76dUAfb-m;OFP1Car9ym!}2g*k8dT9m4!8BzR(c4XfbsZuk|C2j;6*Uu2ygI`BQqa@z?X{^mHK7V`n&5`(>@|Zp!o3QSVExsMsdVe}t-c6^m z=5ha6^QO~&XXt}tbjB2sJnU+vFq$0T15`UyO`-!0jpG&`Ii67|H47;?;tEKP)uQ-9 zr!~wn}R1%7FiP&2i9ua*H@y5XzuC3_BN=AKi zs!?k2-sgLLFc(f&I79}z&e=iIm2H|#fpeYIFL?{mg--8(*F0aQ!3bdA$3G; zug3Lkr3&yE);_25Cv3r|Tgzf9s2llIX}w@%Qs49g)|!@xuhG;GGh#+hfI1Q|s{n7lxw7%`WjKj4 z@=Th*SzoZ!g(usj`iqOALYDz^>V+2M1i^-l#zNU5U(;x>Ep3ki9}!{>cxUocvose; z<2o+cj66rTHtvxRI~7Mg|BuEa5BZRQh;)8};a~KMd_-D_n&EXGwC-R4C zT?=5s-iH)9T9Ysf?~B@Ov~$^?wkW%Zozlc+$y(w*XI}qce(krm$Jsl~Qepu`q&4-o3iu_xgkEH zYdT}2^Y;_9HqpZm%}%44jTwDClIyOzqpeitowp)jtf@_U@ zrdL*eUS1y_Qgrv1z&9#%z8ERxG-b5j8^t&ImkBfPe4|4bWeI()ov)U8&}H7`63IU1 zTlCWQxjA6rU;E@Ubzhv?=_9C5LNrVBFEQM4YcXQZprpt#b{M)e*iat=sgRax^VB8v z^IoZBx2^QkJ10+0oNE3alj`V9`&w1VaP9fJ&vu2JJ9n-xnod^f^&dX(h26fmmeb}; zJ5k418J}P1i>c;5d~$YTx3xbxyNdxejscx;u*zE#`mBtedZXvQ{8C9z!Bg8sQU_rPTJXBC*NXI>=1#trw;pLDD5Cf}DP zK(85Zc9t%bC}%ta&XV&av$n}0@!4*P2;@Pf+EofY`%R$T8&5gRsHA(c%18R)y7L{$ z(h{cWwmBg|N7;>QAKn&O(k4l&@hGN!PM=QWKCu2MRdRyd1Zn}F?jloY#A{cQy}mCM zi(;##Noy=ZcuIcgjooVG_(dIId zP0m8TdzR1AhMj}oe@!;gRy;g=9ZK{g?Nu95S5~~Ex8VxfzOl@eU8b~5lZ2W8{SErh z(+%EUQXF@#i}cu4&)D>%v9coQy&`R?O*wf6OU9ZKHb7=E=regWIdT+Rw{;jRq1k zmmMsd+>TKwSn}|$Mx@1#)+z1fQ+#OBswx??>$MV3KOm+E;pC(Za(`tkNOCQ>rh(Pp za459M91+*FhQMr(b9}YQ-RI+@;P%ZAFf?T>cFce_oc9tj$|AyO;M!k#fwbWP7gKCE zwY8f@qhxB{+;eTFnZ7xnc|*R=_4sI+u&ZqnHN)4p;(E;kb==>+3PI(XM&EH8>45B7 zR0>QfBZ0sD0cazH5Ymt(^Dzlqycr1El1&=f7UvVOSM*|OWUS4l{&mCN<`d$f72iWV zWac&ikLx3cK%6%nhE0$HpI=LmZVZqo5ft4*L}V%bh-!9tdrw@T-=OfnCSh@fRoO(# zK5TMs^1KY6{Jt6v-!$<+NyB{(B^*vdXzh!AIA6fw^1;0H0S(D#P*lBGGUywAucTI3 z;Z{LWbRRO^bgSAWt+VRVg*iDn%EEJ&6fk>D;io}wqtItA(&6o?)UHXv z^fmerxvmT~86ll#rIu6;$0W2&jo%UVQi4JO&~kyYCA|vtsmfWj;J#qs1w=L!B0e8k zs=N^dDI%pz9v03aOtn0jd=0zvJU_MFweH&MERSd1ZSI&^*&m|xa+r~HSys<*9Nd6y zsyMpR@2((9^aPBcKAAyTX9&bw$&JwZe&sT#y~AKreTfYR?lA^>EUzz6kNX zOuq*Q@0tHH(9d&?*cJS~Vi*Q)UO)veL|vc6OLTJLJhjd%u^@iMYQ?{Q+GWa}-ZRzq zOP4t#lSQ!}m)=VUN9i2A{q3tj3Fy%6-4R6uHHZ_^)i%wQU{c+(Y5Xn8U6P8yPpC&k zFT7Y*B`iB`3=e1J@<(UY3Ve-6NkZ2Hl@UWX!YqiSHWpK1hj34d+ykXK8C|NEawy~K zy7XM$aS?(34_J*;hxxxAFmDPx{PV>517~(NJ$3isjE#oo?-3^XNX2u@J@XV350bpv z($>*dN=M&XKKrT5py37$dlo25bFBwZuw??HL7CP^KC*m1qDrTk&j}wOKp!5!y@aad zTfR|O3d4^=2dtme=*9afi^2C{`Em!=Dt_`0GrFDfVK zH8`mceT_Xpq(?cR4R8%9Ex@aVqx^q|&X%W@dKeH&%h0F&?2a36jL$VogEd%^V4wD$ zqpyJ6B12MGw$Uib6|sS+DMN;v?f5~YHfc1fn{wyY7lXIU&N;Kqij^LXyDJmfZ-XrN zp{hn%YXT=_ppXU;bG9-p+wEsu)20xDM%V0@ZmH3I z=>X?UMqHJ6A}Yb257R^>5I?)P(&efJVci2HtG0BYh^UCegvzp~NvqK^xitb$mGKiA zQh{EEofzR`Te;x?_2MZXyZmeUFWaMMTK9fPAH%#FNul&r86jc+e9-wLquUEopn zl_AaN8*r{M@wiT+%Ecv6D-4F3MQ33~SnvlsBPC!R8e{#9j)gYu$5O^LXi=VTicjN0MS`rsmeB zqT3NV&nLuUnXafxw57R)EwpVK`sk(C2tfxw?cHgs7hdq#KPQQ^b!rTMuD(#7=U_D9d#+*qGs!B$)fy1X_^P$?@ zmnu6ICUGpkUaqz5T4G<6`_A|vc2b#bN)wVSCLqWB{WSnwSy1WwH4lKt0%$l>BT#o) zI+m&!eZWJE!0T8or7N~XvVtK(EZQ@k(tKY7I>f5EHkJE?*-@2VO3pl!>WN8i+W0~I zu75~UQj)@2jXEq1R1T?T+;Guo{8O|~v{p)d0bXwaZJh;#jJhqYEaU-%>y#6!LWRL_ z$sH*G*#oM9LWWnTPIZ47dPEUIO7hZ&A#kY|D6W&L1ko~~=rSe6`_co^89d)=TzL4m@K zsm2%IOD86465BSlQgI3=R3{$t-#M{o^`i8!@`7vYS}HmQO>%P>4IyO<39KdD5Mo7T zAqXT$ND8t4MGUE|Q}Tyf-5`l=uvpbvvCCL#DD*y%ZIM>bJ!vp7QWR3TJ8spVNY$S| z<-;4`R32fIYgIWS!}6i6~if`N0PS51r{}*yfN#cDdDQB@B}_mkK{V4&rk8mWj-3! zJ4&|zNeV!*zN0)}&PijZm(Zy8yhAGbymNA98>EREd!AZqLz6?rS_%HHPviwOq(U-D zCsx|s$R~jiE$!7b{M;Cd(TD0Gxe22wJJ}v%YF2Ro;H4scogNUPmW}diol{akh^Dak zX{@M4%=af)F$E0;eEHvzdF_%1L(@GPomge$hASmiF-6^T1U z%iLtUwBdNWR@R$n=3x3;1CH(#fvl=U?8D0VkN^5!g*H8KxpF zpzcWcPiNb_fyo#W$to%;W4QArw|6A2h>e4AmE8O`EbnE)GTDfj0Wy{P^~neZOybZ zI3TeU&?^JE?J)__C8u2DClmatNLOWvj{8{uQRzSJU7&UT@J_PRio=T=UjAs2{J*{8 zAgj6_pXk@?l281xhfk?%ypTvZ;wx-!8f3b)baYWL>w6Y>gtXvV7CDw;7F30EC107p zJrmBPkIX<9I-2}Tn1UIXraR*KoWY2BwFzZG`cz$LFp|SqM<%3R6IjS1n89=xhUSZf zRUM4_$OmRv)$o!>zjs;*#g~N?aR&3{6uIGG(j?0ig6lKnH5Eo$s9K8X-j_|DZ1&gD zd4Xd>Ti`9NUJ=(Sa3n{wkIY9Rd+M&t3kBEIx>LR=usbICL}qf-*7?$nM*%r#xOQ4W z?fQGZTB!hem;5~=Tevfe9kUomVzQCGQ)%S2ER@q4t+M3&rOh;G++Rk@aLLL{v3m2F ztdaEk$(EvzrB6I(nwy3?s(EEy?>a)=q~q!4udnM282?#6?&Uo_Vi*%9uHjmh6&Y;0^W_V8C+rzGbjQ?hdm z7!c^rb=CUz+nWsh_MBb;OXe`hKOJ|r0;GF|L2reeVb46HwRA?&ts>1s(K4Wo!y!v?|KI5I9~DgR&u9Z zgy-S&XU|G2?kIACZ?W!YBdUfWwd!~e&zvTefmJ+O!*coh_*{mSu!Q92Pu;)x z;`=F6rqnzP^}T;`a?$x{uHTaV-x@}5oi>7BC6`N=9sJC@i2YP-?3kZrV|)Iyv8y_< zv2CNDeifZDdG|9Ha-|M_;Zu*DKb6c7*mCW|0>$bwwP(D6}Te>+2mQZBXng z#s^bZ_r=F?UAuPe)4%`v1(TZ&hMqfD2#c#pHs&xydxUAkVMdle!o#kvymu&e_Mwr| zUCg7}fGO_RVVV039QYx1hl*14Q&^^c$L1Y5c5FRFN-P=FY`Y16b7N|8FtH>i3K0U- z@_76UduETtK7G2}D#hb!avjs0*LLX8;pN*8_(jWw3-`@C`u!i7nT*AC=-s=wS@ITp zSyQQ)0Ys10Fgg31YyFdw6O)f`+5eK3Qxm+}J(}|xl^m|oia)ukI5(1ZbwB_7vzUv+ z2M<;_&{47Ph6f?P*cT^0@EbIF{iREnCTwe5CA;VO#@MZm4CWF>-A0l$Fld{M*>a~08e-I2l+FEGOuKU*4I18b6g>y__Enw>FE}Q zMZT=&{rlG$9d1M!9`5KgzCu^2-!#9Db@RS#xi;tQ_+88}y@-mgm~!a4*!N3la~ZMDw>7mZSFW@d;~2ASvipnEzi!0z*16xkd-n>nk~!i`UgUctiQ2_{y6w&a z39V=AIXsz}%*1}c!0G#$a!C91>$i^V*A>bDIno&$xjkaYh_xl@a>6ZW~ z?{d05z4tJyGAC!T)%ET1*MT^1;gioL-`EsYJEgVas|}CN?af}fB2)XsufygazX(fb z0o4*ol4-*78%n{=ixw>!)%M$Aq72!-$qYiM+jt70n&nlO%2yc4HCxs_&mwBm{QmpD zPI$J^GoT2uLoF7>VZi2RH#=`AxOn=IrsYH1l`F@#Xmyx%fI}3_MB=8GkJ2Z189Q>s zh`Ue_8x-1=W)l+=O+CHGtd2cJFYzSYmT)rdaT#@2)*M;Wt$m4$%VqI=Y+;G-9cO3f z=em9j-@eVVXbl;XW*-Ajz0R*ZvB|OE;vaA8N;Z&kb8Z_51Q?(0ypfDWUDfB_R(tWU z>&2ROvFxejFpw?P_$IwW;vjZ2_1Bq+g`=7eUZ ztYR`dY0R})UlJ~3ve6at7jTcbeY#<6*a&7A+_xa}@r+TGI zLe!xR3HVz2FBM+hblyjK#J6KN|diG4&Z!^p(zokj0lFE+pnq7)_P%`L@9Vp0^X9m>^`)a; z+D_Vk{J5r(pnrO1N2%$#w5X+LRy1slTXVlZo@5Tn$%nik!@FacQ!*=Q^T{rqI-Q+# zF2BZ7!dHSfBJXn&gw=ugvFfM;&PCIrECOff=3);g!6t# za{UzckC{zcO2s2r>0MTH8n5_WPk#3<^4FYsxMTD#wV8e$wSSf09TKf3-&9V`j(7Ou zIG$aA0bViV$f#iEzvpIp%uFMeEy7g2LK+&8J^X_x9hWa(o@6!ZBklQvHd0=HWlQZ6 ztBcEjYI^bD7Ne}~h;yTovm9qWJwI?Id5AuJ`>rL~8k!t~d$JH{E69h2orTZ^VtL2n zU)qR#$FxK#wZyZziLt+V)sVn2i(NC%_qX3J;@%#UMO!5(gP)XUcfF-$Yt5tc{++vY zDFR!R1WsPF1|5_RAeT@o$+ia#!sF4e7C^^s1f2?4jj*^BBIY&oVZv7w!q-KfB!ef{ zk{L)JVgdu9@XY)4>9d9u%H`Yvs&kw0(J{11 z2j0)Ld#_#z?9dV{;Qd!+t;e?X9A%SI>{69wk3qgnGsncD!4o&%0RXJTw_Pur`|;pG zWuEUu+Mjv!}Es*ZSt0XFf$o0|`bl-@kufoSuF(Pz($<5CB#{gVT+C zHyuVdy>4z!V8V)Zl};xmok0qBtAO0tfyr!igxCQ=-9dXg1PA62$JOGv;xAknuJD%C zxf!S+UD9^H!#VZo-Frp(?Y(0$ECAG(d3*NmjR$`}xwb3@dDuz^yBL;)Ux%X)Y>Rp~ z(kyZxykfLNAO52Z|7 zTiZx_o%Ich>}O`gysJk0Y)de@H^6j*3jTs&nsV@6U2*Si)uaQwfA%oco2SP}?wq$O!i3JKks z@Q#W3q8gTP2tN}m@R9%8({QZL@R&_-5cVQ3#IU7Q8XY2G*gL`lDY&Bm$p{X5X;?D!Cl>6OhdP29M9=|78q?bb(gEfmP?geUBZfS?#hnMm*WDjrluq+6Lo;x67;t<7T`o{39m20^8Cf)p>)l#Yr{k= zA-3XX{Bl|X__r`pMMMpr6P;7r+sMjZ#4=i7x*|PV8_mwn>RoholU>q8?P2zP$^c_g z08z8^^9QSNpP?Sk8u3&XErx~1fE=8de!AIouVYRWV!M* z-i|0MFONdPif`tK9b-vIJK0@$0tQKV&L_%OPzRP~X5#ZS0RZ!Z4eTKiFh0g#c622-W7*mZ|i$%rv7^Cot*mz6(`)zY(&%O-VWkrxs%a`ij zts8g^2Wv??V47B61NKaE+^T=|(DZd8*B zB8RW%@&mi6jI;ncMA~Ja6tvg9d-vCY_Fp`0+yQ$4^*dpG&hUt(S1-HwUJGJ2sPP~K zWWmIHR5^bS=8PwLDZ1{SkQ@o?oEN{rT#iy@)`=o9k*80e?xUu*ky!m)_R4Q}OxD;+ z%q#W{ReY>Y%#>f8vB?YsbWd?brGkAT4x8uuVzs`KZc-^{|H{n?dq z5i|IaNwy^*yMTjC+rpJ@Ko)&$l3TREuv4eLPIwDfHG&!Nf1v#5uvNeof`(QfM{~{nAc)BWDhwjFe%!}W@e}A_x zzE}tjRu2PHfZlT83=Z+>V8`Kf8x-Xuq~RHYIbK98G^0 zHNBp9zL4J&zbmfpea~O6BuOB)d_$Tp1&7uuUmmbfK z9I9l_{*A*V@Nrt&+F8~qcG-ivfB9t`MR8dt!jj2eXplW-@YERxs|Va5jfJPYCuMw3 zoSB(PJGA;I_dejAw!D31DgKko%#JjLt$=?qLvgtw=feKLP%q6txw#u~3QQ=sBMOPb z9tOb^$`IA6cMw?{g(5Py<9sh+Y*DmD4QD0_zbP@aP+nrJqL*Hc{Bh5}-%`2q zF4DcR!bl!e=n;u1xjYv!mPPp87X~MX_NeXTZM8=c5)eV&IJB>F?J%X{B@!y^GbV^M zpYC_&gi_s)kOCb{7A>kJ%lWHmq{V~JI(B4xYf4j7<$QRSzOtR#y<4}-@K^ zHP8*(LS>P3C`kutGF$bCLwT^!Jxk)lrK-sw;arz}&g-hSA~4UaXke`{vD?LQ!5` zbywb-pJY#l)}ZPIp}E{h2X`lquRG$QDE_qN@2AtO=oFjXkeNxvMk-jhscS03>jhRH zE7!f^(drUxuQ;bHm+TpBx>lh5NJRzwEVqdZ1TABdK6Zdp~u>2v3CEJ$xJ6QX9`LmDn8J9 z$D8+r9QVr`3eeenY}K5?ZI@u0yXa_SWOT)|Z3{{JXbBWPv5W)8uRoaGD;>>_EwsOs z6V+{{{3CF0-_bW=TuN3$|6SqIyWQ^K{jo%d89E1EBAn46-zE97I(GrrV4*@TUFIv3 z0gZ@kmXJ;{G&Br+oM!o}iiTO7AUvFmfLvk!g-sr?^ig3DrqcpCm~VZI8}oQubbi5L z31n&uF5R+Y#Nj|qNDKM+v9D|AW)Tk)4A`zYQn-%|Y}qwW>7{i|bjLMor0z`kdPxRM zg1t-XK4smG33d-TS&^n>%ALAvLbjbMhgA%t^v8exOi)90?Z5v&8z2{U8Ym z^b4006Ls5I`&WoW6Nqb$rk9WQPhW|PV**dgrmFd30x_TGB5Lq!6P(L_Oqg=;%6r~$ z>E_S&g-H&k7X*Ogw}4BfRR;Kheul#FRp`2?Iq9stQPqQddGF?W>b&HBV%_d$YTxg) zgixNtDO^+RoV^=uZ9^mk{p_>PUST>6+UC{7Nq__%s5BV#t$h)OE-1bHl2GbDeb$*9 zEtI-Ys}lI&PZO|dZSTLA_m%!;83tv~T&B1vIUJAMw4EVlW}&lH}v>lc%@2YYSp zIUjO$`t<1=!3_pQL)FzS@EckSPU_cPW)~CvH~+o;`r#pS`UnHyNx|3ihOHYx3TxK% z89dnWkcRn{ix80zp5;V}~ zU;kRZEG8dcybAgr@GezCMKf$u(h_Uu>Rc72(ViG{^cwe$zXHA9U2%4Y?@_L!!4 zY#TCk=z4%%odMKlWy97tSp4-Et?j7U1dIszm#J<TnZ^3-OW*WzN)y`gGXUlhL@gx?j>R2QDpnihKNA4;j)2hg^z_cT7mR;k zxATG5>el*QZr!|z=35UD#?6wf%!6#mA#Lj=#L}-ZAR|D1#PH#>@QJxd9^a7ct_A~4-0?n<6zo@+$vJpvfmOiKkwzmIo%{W;wsp9; z`>5_W>Z^49Qr{Bk$DLrEmD_)K8GI@sM^# z4=LashPVC!e4Gy_AYX)(pRMOo;Z*cB0NpJW@xoukSf&KB!3Le}Klh&0^;3xN?fw-U z?U+%V0TQ8{0k|3WpGxD+|7iMqtNuuvlxxr}XlufNjS1gmBPZu>o(|se&S=k`J@*2J z_zoCEI6I0DfiJS;kZ(zt1g=GB5YBQ(a9Qg{Kg2N@LO}24uF5(?82YE5!AOEh<_b)u z4#eZi2pE$#oxM|>YGlDAi#3ky1YB(duwltnm(ZSSx~GV)gs6q@%p-zdoZw(>q&jLG zhY{{!Dr|;mya}725!6+*ms;S;-|FFFCn>9`MSg4OlbAtiPY6-KOH5Zjctx4x2fn@l z6zH$#a#p?cEjCw^_sY;}nc?kz3!bK8*>47})dvx#Ux4E?eD>^F-NZls_`}E7_YuJ2 z0`c33Z@--m5PyIf$_<^PI#7S>LYPt)uhgAC|C|$LrPCZ=u6u0EY9BEprs0c?22R|( zFEv$Wx158YctJqYtyeF%?54Qx3%L;o)O!rlx7e{`2QRs$S*ou{A-_Zm^nj8)IOWoe zW8)*NCV6&Ka+|2wwg22)2X3zU5C7a;w@Gz>Z~Lkuj}pIfh;4Ty!{jv;28 zmhM0Jh`QVJ`M_v4r8>)C?tuMMu+_E2R~MmTe1IbEJSCphJf0BRlahz)>o4U=2q z?>l_N2qO|Jht-2EC$F2ZvEUSUTd4N!Sj-apAAa~@{Hi`jnlDX~pEhi*(DfTo z#{_L>k`420JQ{u}+AXs-C5j}mI7GB9kKj9-n38s3;pyqZu(`h{tMIN<+Gm}+tp&dw zo9uo?eY(1L@&M3i?!L-@F3l8{UlJRNOOrg%t>s!*R00xQ%3UDx*z8z7NLl$TvcDIS zy_R4PR6JtH5JR@tVQnS}&a=J)Z{v(=2wE4xHYxY*djSS#JqV}p;){i>`RhN5kp8Y2EQLJ{6BF;IM65A z|Hh4})`?EjNWBxH9!MY^Cf1BUnv-!L_bwOQZTHidNk6x@x69IFfZS>fbd?61C)Z5H zJe#7%{{X7$|NV0BBJ+J`xiT+F=;v}JCGK?dg5WTS?K)%xxfhP{BF>e%5k5Aba*i;7 zYZy76k;WmrNR>nq-+{wtkfJn?)cOAVg}BdaBT73ft(w3YKJvrij$eZez8?EsMu!$H z7d53lYF(bzr42N2{46Fv0fK>LJ)9R%@7*Pvm+K-#~^dKzrFnIp87=0Xxn-G}6?i*_|Bv{o{Iz(6>c$0?+Q-_piN! zrz~Udu4x5-btB%oHoD`wv2CPKN4_)}Tn4T%KRx{yMSIvIbu<1hdb~@L6D(N>Z444m zW5JKTgUHE=wG$p?9*mYTPG=n84PLB0&tt7pVn@MIB($gY7q>m1g-ZoR?c2>-IsD05 z1&lNee*xexBt3gG>r^kx?oo>}9u{c<2jCIPvvJ%v1Q<&tkZU%^Lqj20@m41A zqy6soD(>8Kut6kDzy_*uwlk55I?E|90V9pCUq8a_<|5bu$Qwh(=OWp)yH4ewWVlE! zp5xiq@vr1=f}4sx%~*NVEM{=(rgoh=+|iHToc|MRl^bJYgR9787dX)xIzPhTEy5qx zm9Na4OZ1Qrn#4$j@7mSL6Roz?eA7qen;vSqe@#9~Vu~j<1+N($(caim=^@7^QH@2} zL--Kbk1Oj)_%MsO(QR0I=Bq8L&N>_(Q!w~Rul8-S5MTocqhBHZw2b=eZ@nnKUYvMU zcCa~t-gvAc-lX7qn!a~(Hf{}UFcX$598(ugf*UY(2GV;b98?ba!^J?7gn?UO%1|$i z`v9Ah1OxO2C1f>{E)(9pwtcu#`%D4@Wa($5sd#RrrQ&tZoFN@^#x~7o9mtZL=aYS& z2m->5&+HcW15q?9fcP_nF%7f0Z4w1XU>d(V(vd?)o;mlu(_^;a`^N4 zIJ|^Q>y;8_!^>}}e-icAUkkx5bFdQP{%80Pic+=lFh-5N)lG=3)ZjYnhpR)m@?kr0*6F1-T4j0n1=Ky5${=&K69Y1(^QfeJ7KdHqgf#< z+P=j@St(INPJ564oOW$-b%4}Qr~TG*dU=>>);r%(pZ;RYmydo9RoB+<;1&jW7@@BIqo5KZ9gn(+ zf1{-rlO``n;$9j43wC=QhQQ(C!Lt~@ch^2wdcQ*+2)p%Z&5@cxT9hk&T63pgvj<3o zEw=!wUp&+WenjK;>j}x4N=o~TmS$UbVZqO16%Bv<(dl$oC5JH_U!}zVd9#(AhRE^N z_*|x`7f%PG7`f&D1k{T_Tv}a{oIqse5u_o_h~RGlbo{E*RQ!`cFZq*GcZ5Il*va+z z(F5HSlUltek>vTW@2mT-fm8BjrG0m|Ik?ep>VNr_I8pHk|NXU9E?@p<^zVPZe5a!C z|NhfZ+x+M6KmGUX11oPZrtH6eO&yxBpVQHGqnjFam)TMHeovws6z>9Q>+3JMCWOl}?Wn_`!xw)$>> z`0!)o{^o@#7AbnSnp!$MWwGZCnI;WA zz1G8nUN4cxvk^F`+4JV*VybhA@{6oxf{M4hnqFP?KR@le%f-3)!>3Pf%Hl*Hz~Dh? zj>9Qre4ZqnIJxWaCGpi-VZ(@BsQP?xaKs{qyA5NcDt0@yQA3PG4%Vh4&sN;CB5}ia z>GI5XHr{c`pK-XOw|g&H&z6Bqe!2iSmi8bop9GPq3kV3{vCc=-tt5b3f(J6q*Q5WI zv<-=feiUIo%IG15JLdRX>obi(i*Ci-KRWscfwy3@(JOa@#@(&Q%SYX~Kh-aJ46dA4 z%kexMEn|!C^nd696NN!OK)OL9W4NEyi89kcDKYSl^AVH< z{O`WN{gvAOSjT2PzQXaW@mcM@EZ&ZLrfO(TLnqNIW^Ww!?NfLnd&5I3S0@>{F ztl%jz1Rm>gI0p5DCvBNa2$KVlABVP*T->twlSJ3^{L0s&FCcZ0`IlYdgYk0-?nEC( zBqfpyoSfYS#nnqxx#Rp2-^8y~TUK3t1Lx)BsJIlV=<`m9tszWK%mBvJrMPL4C(g(3 zy27^!r7QqZ{Uy;kQ>gY!@?WHR??|!ixbG=p2GQ+8cv{fs!Drb{XVxEOLxp)G8}Je# z{vxTqUnISfy)9f ze1893@487uJnyAzUokJm)|g3qdkoDB&H8RM~0J~;6Z zdxD{qXBctEt#iyc);Uu3yIB|iL42HAclLa^%@yp_-W#^#Y z&eJgC1qh_dHFps%SRTDguR%G|_QATSJ!ZVjMFx*&BFaDP!+QM@|8hK_jv@lbUmeBX8|UMh97T<7g_`jgK^P#Z>MBB+nOjUyzk zi5@JU>oC;i_^3~}eo_a^ELA*CowZ_vy>iH>3y#3#nGr6Gy=D!eM-nQOkfeqY)cMq6 zA^kc}_ZzuPkWR(~)tf2zstG^O0NwS>g%O*8RH?MFzc3{ZOrGP3_O%ybA5f{HOuk3erGU25qMeoVT35sfy&za)8dcaU+(eGws*bblA^ff zwsOc4rg?Z=LzNZ7p56}}J5ySUE$8vo}9YySAi{aD`Oq2GNk-)m$z|HtU*%YXhK D*bqk? literal 85572 zcmeFZcT`nb_bo^TwaWOZh!Ic;3Ia+NNhZQ2N{)g8l7l3XY#9m#$w`8sP8%6S> zA{EtU7b>b>hyK`zSJd09#qq}(J27=TB`ZTahpRT%sAR9&T{pL~GdIyYeB+vpt%;Q- zHwWh_j?-+1jqU8N+nzaj(&B&p1c#N4(aBd@_Vsv|Kd(z_*iupL-9`TU#VEQ-k?I#J zD$2!k$~PmwbvxXoR_Uyq>`mAkaQ6J4`<(XpB^+itd(l&Bg7puDos&B^oI9tWlYCV6 zEcJ#hV*l|!DjCR1n|$=PiY^aJ2*IanJqrO;9)o3CE#U$ttO~?%x^lr*L|(~=Jw0l+IF8` z-|O1`Ct0ScJ)1=kLR3^5hV_vGxT3q0rya(%Ej{iUyng+fZPhhkFZ(OanwX~k9Y#h* zdsq}N&W^M+e|mMt{@cf_uI+mllxu$N{NNd?L=*N%hB?*D{K}OpDVZ8-YHACMi#5Hy zx^Fyow3U19ZFW5rSRL1<>n_}4`!Uxnw9b`F^Zw&0!(073BqSvEu&H126`p(a>u&CvrS*iH)z6}>spPvc{I0?+}{n?umnzDy!mH7>ibq#R!Ydp$+0TN-KTS3w%&H= z^rcPP4@ugt1x?KM?{Ic@c64?gtv(k0;n~$UO>AzS-rg6+h>6*zmC;Mz}+)(}N z*A2?g4Ja*X8cHXNzu#t-dGusuY2N6^p}@QM?wKtAn5>w>W4UH!6>aLA-JaGIFRk@h zt+&**SxD}Nj+Oak`ETF8^?&=uW@TlilBTY}u9ZhV%dWr5e@2)yS#qp{*Rtz{^8CVN zCB4OX;8d=FQGKK{&Atqs63vO?X`ci9mTAwg)z&mOEBW~Nuqr3{nYLx5^Y^-Nx3n3_ z$UMxm>dk13l}N=+vtBOD>FVn09~w$}eQ!%DC6GD%v~`SScVX~ILELPI-0E@lXHUxA zBKRzxFV45;Swz(79};#_uCK4B85^vN2sM=nyUG_a`z^b{^h1)u8Jqt5M;REhbJikH zAt=2j7O!5tnw%$W-FLTiapc5?jT=WACFpZsWF3iZs}4S)mLlOx$EuZ=My}xV=chIU zHOVTes*5wt^7gYMDqeH)!m~jmt?4Hyo<2UURsIazwuL1AzQ4Dr8YACOQSI;%u>ER0 z+L~TrbM`D1LvQ~Pd?Jl#QV7Ro&EGa{e^+vAD(lgsefR$Uo0W%0d1-E}Ime`#$M&mC z$>K9l&-?e^$58^=S_RIW zsp;(03g{mn=b)T%UCjB~5N*|1-`?JS+|R!dwxgAjKKBp zpU%lf@X1~-v}<(ebv2rH4G6h=_xJ8%MZA|m3nFc8{o zUS>b48X=bAwfAJ}%y9F-w{MY8zZkX6R+#ghz`JmAauzyHC&)cM6Tj^spK;dCMhVLFd z{rtwm46#9D;kGiXW8I@GEOl+Rq=WOGN(QcZVWvhiq zFf1&Sy@M@kx-GiyD@^?S{IQo^T#B*j_w3%SpJvva;O#U!5_~O7vgvDGMEYjAK+nDL zZyWw9wjBA8e8zpXIP1{9ea0%r5&ePkB8^=IwiBZ_pFVv$(iHC{wi?=B|5)&h^ITeM zrfw#yN=niNkL@&_9$WY870-RGf1G;c!rw`V#rZuuNQeos?fbl&d_#4Z_7E*CHYDes z6RPiy|B%{6PoIfhr4-6>xh8(droJUrjh(KlNLyR`VVikP-1eAAZ>0_3@r>7@Zh;-2Lr)F&#`!g5Xbev@a zW(K3(HNUAP%F*7tcW?h#l;hVhHedF#to(fb)op>IiaHkpb#((pBO@ct=OQ$h;v_U< z6%60r+p>4&?QVJXJ#r+A|1k}@aFGr&4;#eJ}4?G+G$mD{`vj;f0WEL zNvpl3Vusxtib;rp*a;!V+9^s|EU_{!MQm>{cnXGtpo8kgRlo&-{^C zL=ZzO4Xd(bwqczVx6eCCM_H@Ry!6*^-guii<(jrUUw*XEjvybHn6Og$7RD2vn%;H= zk2yCmG&D5s?L;HD;k&9lbyB!KoxAYr*!oJIJ;P)E@%@j+H3Y&Uw$nAPu!m!fjg5q8K<6it+xHF7M>@-D=pZ1FA0 zrn@%9;-BYfozI`Yea2xt?a7nF`j2B{W0~cn(w;p#)&0Wy0>4$y2a-uP^ZC*Vhi5cK z^}5bqX6Qj+nNIeV(}+b_TU*mKr$mdmcJy&4;9@k(+)DKH^z2p^?Zw2zhQGCEhYLDn zkgYh}_=j4W`qTICFXP_5hEupP2Ra z>(`$ly@Y<{`MCd@8DqoA(jP}_7enK)%}p1k1~zZs&Zo1(VWOucM$GGBBTHbKc0rS= zJ3xv3c$Wr}vXX&89BN5?woyY05>VImc9q$aJ2hsR`|GcM~;ccnO$Z9L2WZ<>_-eVCU5E}KRv%;cDPxh*Jaw9q__(2 zgGR<{sPdZ25h4ryfgLZbV^M8QOf*lLfB87Jb992aKM4t(Y7{J;Hh_2ZX`;ORAz=^ zeQV0g#Y#&{M;c>2(X4ZMQ0wiV+giLl-r_-*P!Xz>WXP^X^D%A3yM_+ za;ph|AKy|hmP6#nr{8kT+ZBUZRRVl{eUSqJJ>y4OQb%*!bec?9 z%(C!sr)jd7*Rq69oq9jgJKkR%==AyCUgbE+oqLX7I=k%v&-;^Q%X;5zr}`)Ybu60ha?y;u!LxCaP_-IvSUy1#NFAZFW*qLWRU6IeBK zl8`Kw@uRpODenWkO@b4}v4BGDUHqtf;4un_T$PxMf9j_Ulji{Nx_QTrh7XR3bK(Bx zYeh_qj6bai)o4_2G=jUt1q z7Tp+APElsT`q#9#YcNR#T|$PM9ZAg`iLffoV%=AL0+5gA=JzZ5Jn4)qy4iV*vgu0` zWoxF>U+XFsfBd-VD+`i^V^K?)C?Eb?m6SEOhPT8pCsiRM;4HN-F z#EJ@R2NM>1{HA&a_cZ0%4J&#nY>0n*KIhz>jr+yyqN1V>2;F=pvbJoS>Z;^MlUUx< zqeIZfXl9vZmK!iqMX&qHY-oAjUR@UjcFmk;`@}M3!nm;Dp>7KUd+VY_MY}8IITqaR zZQi3uaD{o9UioIPhCu8^E$uBxl7S;ek?aE2C7fY0DVn({B%z^Xstb7li8mu2xV!Ti zy>==923Sox+G4A&{P^NqVWXpci&>yr948G4C})8_x5h4VM{`m8|`-Jm7NWD|N3sf>s0lzk*!LBMg0sXPQB254Su_CUELs^w{MT7Sv zt1lPdRKVhKhomYe$9Ny)9~n-J=CgEpmY=VN%{zmoPeF9?Em3s7JASFymadV_+|5af z)5|ySj^zJFzPq*j>9IqP0+Mjc;z52(cHL5Kpkrl@i;s>UJ$k-D zX!;>1t51;n+XWCilA8~lu`>=|T0?Dfnkbn!w=4oAf?S) z)|KVu!}zH-q;R%YtKQPff;Yas$6r3=nyGnN`1|{lXE_U~sWt1c{KJ}7)J>b@-qMvh zt=Vtw+;v}caih6rZLLV2<7)V^z`*xvW?`ucBW!= z6G>pTWn6B)^~-N;i%Wf8C)1*iQLxF{z)I|Ah7?@aR^|x{P+G}<_Yj*t8A+L@+}wS6 z(gSrMZn!y-uq~(YfFfF9cM&ys@Y$3&r1CsjjQo+wVdNOrIBbNTY zd-pgaqZPyVx-pLkAv!h0~o7++AH*DBIVDjSX(nQOXb9a=t9JJ8_5>R>N zR@0WoPIzPK(zqbyL2L`wOh!@fsajLrb{!oZ)5aLFJh=&u?j_;Sy#t{QC)A~M-Z4@J zYJ!zOAuaM;S7zIIufG1>>uAQp^k6FL@BTA}_*eFjz`arlE@M_b#Tw_%olEwydlbgf zvwPPrHcLszz6Zy~HGU77a@@Rit1>YyAX>wVhP}ysbvoLv+^@MUqBULX#Hmx4 z@VHB|ED=Qqcuj-qUXRTjM^XjNd;;D=5?*R%DVL|D5egiDij)*v9-B__58*_OhKHbH z0QxmCFXg2U<#H8-^O_}ec%TTSC@1SF6uV>&8X)(kXys>shYfuFnm`yKwjBw@30`@Nc=XzG=(DkMC>d}3-tPim!s-GqP=#R1w6dz~zO589XJ zS&WR6iN$@N((v)4I^+U5l8``^KRms#iNNwU@xv?F4D6-yLTk&@;hg$^lZBohY^cTl z2iR;nky#u=!>$?E(SY=Cl2v@ImImquXJ;SL?{00{z~J&LzB#*5Ku|FC`sdfGPCo6}?YYn)n-@Lg1W>#S=N?HB!Wd|tN zJN3D_In^w^^0b5Oy|=K3WlEfpWQV+krTcL$BEq=lML=C~JHB z-21#|&S;Xq?mJQwf(p`eJc>pA*_Dvhn~UXpb&qp!)Q=SR_4Unkm}Mpdx_Lf$;P2(- z)#5uuxB(cg;#lXaSLb?qdZuP(a*kI?N=m**l-hrK`3vO%!#C;^sxsKjH)XKLcp`cnGMdYo_ zEUfZq!C+J~aBK6!1-EbAMQz9D-g(d8usK;W6Hq>FXhz)){#&hw*wGCMIvsCbqbCnt zDx$5jcm$kr@zb3hY@T_bC+T>KUADbQ;LSglAUpATst3qRgFha3aF}O%_)!s(37=Ju zIVV@tojVs&Hrx#R+}|HZ=nse$%_uD`Eqg{Fb0DSMa}T%YUx6=Py!cog^>uw?iWDX= z3NL-jPNpdelVK*`(Ts8_y`66mT~lXU`tf6*dm`ILVV57zq(V8^-QC?w=6jq8AF;=N z-Ad100E(gRGLZ!S5I&Z9xhMfR^k6Nh6rS51gt}fM2San=V8tx+z7LSzKEl6b<8G&5cJIotm8u28h$Xe7U8-*2pVjAM}-orw5s!4W@h6 zD*2d4H32>3xUad9WLW7---a|dTQ4|47_ln%FN|tHPN~V=N^-xD;$l>*JicKw^(7Lg zI!fc?;}x^TAceytqwm7FNrF!E6jpMZ@bd9dEpuCW_}W;DNLx@Y!TwSwTCs?2h>wQ@ zN5wpo5KF4Pja&9|m`@f2sq|{x80)x}CFHhza~tg$Wm1*E9A**GBTxwxq1VkWj%Icq z8q<|p9@i#tO<=%&FDaW>4sM7hfBJ22PLOSed=Q_MbR4{f1qC4As#g{Dw9sL~GH1*7 z?X9TrHcS;K)G~$$4pW~;S!QKplfkB4950$kb7PYkhGNw+HBf6m+{9E~Ud{r65kdku zea75Gua&|}EOc#KdPtYzC* zNw=`i(CLV%<1pUUShlumrf@uRrb{kbclfNXW5{T#>}z_5k6GcpfRy+<#lg%nd6`p9M832Y-_=Q#q{3f)!;32lS;MZlNpL=oujGrh zK`i>qOL37>jv$rLUM?9MC&k3X*o}N(sf!d)0{IT;(!=WydjYqPwlg$(W3=xA!Z zbm+)~V^LMl^?A)dUc!dW4d>uD0mVm-Ep(bq5zF#sw4dnd9&rkf7Rj6n>~H%Lg(&T>yzO; zlO+Up z06*2cWE7Gv9r30U)1Tmw)|6G*hJV*>~Ix>N)gKAIhDp z&=Np;ZUMNY-n%yuk_gz`Wg<5qeY1;?%O3bsO)8T16W*Anw;?c7-YIUm{*eaDmcN<{ zKU))c!f&aa(iy006woWVfn)?p+J7?xwnB4zw$W_GL8~O45+_!*j7ZPDCo=$O)MZs7 zc~bP`w6YDo%Xj<@eotV4kdP43$klWl#gMyQ!d)4?sdJ4RkJ=BtKi86?qEsEgL?~mf zWw$23W!I^Wo>EsqX$gx+(0E{*qZFdhqG!XNckJX9X_40vL8nvRDai{n1y(bnp-?j0 zsznpq3L*KeY24d>Bxmy8DoK~lDeC$6l!)g_Wr9TNV8ufpKYomhHCIV%%rQ}#o#@qt z!ap@VtpwQ0mUm3liY4pOYcNcD_vNf`K8qBfhLnpBcCn)S)8d<~v7l+*^&)HI)_sT3 zNkP{|le(VrK(y88Aio?32L~&(ejS&|a-If;qF@#!aa50+T~?nfN+O{z87icfl$0QG zW}uZpP@w(D2Zb=MYs$qpt+8+oDYp(Ul!bYCcueNw8pS4k0s{kyD7Zu9$Jw&g#Z)nR zmR|ZTG;EhpCYdN8p^Hf0?4D&nZ4#*peA$YYrTI!y2eP;+Qwk!XCv`KBwDkobAsqi( zX4UEl6U`x}6=(E~f=}w|V2`&zCU$Cwc~;y9wF)|sQeo#Y`t)&<#gE@G?_qDl^ScBDB-qP{CbUX56v-$%$-b1mhxtIEh*tVCIK$4t^=c^Cm=8jV}sK)=Lw!Z zZ46nPW(lBG9gt5dyZ6qf9jv%v#7yn?Twxxb4|?sLoq~09EE412I&xFv-tOG3M3Z;z zMo>fKi=Mbc>1xkc4pjYtb@du9G=c&SxuZ#a94Zl*=1tv7$;=3Ts}TT=7RX#-qZ_|fnZEgL{;WNht5lH2vyY>nr4L!q!N{MO0D`>#t39_?5`}BaZGalP>l_P3W&;-<=mRMLi zqXW@DIvU~c*PJfqMMG1|(va#n{S|*Es>m;V83Mdcgi;ni`VoO%+~&w**7~kbV#WEf zgc~-m@$zw6^bRIP{@k*q89#cAEYPnHNXZ7STm)xpwtv{aW=MO+?v%KNH~K*8m3-3iSOYQ4G>WX)KM-~%>N?gHMwFc!#ib&5uSd&6)V6aF* zL>Mc#8)puzjv0cM%M9I(&$EXJ9awaUJ*>)?AVew0if>8aZ>yUa>@) z0p&DGy2Np9XJ~p~DI*p+`vv8Q$(eaZqA&qQc@N+x%+Sdk_GC!R!O|h3QEK^z6c99)r#_R4eBN4c@!j~Nb`x( zPkVx}XzaPtvAC*@MHmB*Pl{5lNqh?nGc4 zw(cWNeQH`-3$`8mP94V21`IeZEiIX$NYXIe>MpRAK!^MN?G0OajNcz7iZWms&s^<0 z=L=FxE7qbmOC=VgAFCyzh0$5NG0~Gw)F|-9ip9hzCn!8Nq`YE2f?3lF z-jFFXZz8{X^5d#&- ztLdWoKGK}%+p-9Nnu5jN@55da&{uY2EF2J#Jt00c4!}bNJ4oK%U8^lBvbeNAz9H4m z?>`$hZ8BW>SZ&uZFJ7b9ofgzNVWDL%J6dW`y~``+Pc4R1e8B32<3hsYweGt} z8cW1;P@X%FH9BzW%GtmBswt2Wn^5`KT34Wg4o8XP>Xx~wmUrc~UfT;jfl&4o#dy#1 z#eRnP?$M5;1K(zuL|t`WDK_1ArnF{Uju@%ig3!kE%T;ef0pM9$!@7GrF3!_?GNbV0HYpC{3F^BHIS zx&^H3LQP`_R@O-2Q#N2giH}(q@Jwh#^U`(8L|RsqqKv?Shz_kU08TF9 zbBMydQBbgUY4evaUncfD7=B38XztrI%A%A&(zH~rE0Isl7ZhsTV_)6blnQmfVyZ;P zOJH^)kd7tMbnE8L&mV;gE$}>Z3`_5ej-ww%bC=!o^5#9P2E5^VP8sy5vrcpEM#Ro? zU=+07a5AbF{lvxjiIdpAK%PB%n)9f=2^vB{p*{2hlRelEc_2F`GPxu;M!$c4Q*lFH zzCk(hG(UPDx@Dzelg?;@!Um85K}$+%Ilrn_(YY?cXz_z$yi_uXnfMV~)7bCV@`DEb ziqA3+IBJ9AAmGIt6<|k!;2OGeZdFZZu^1auJ8x#`iPLa=D4OuB`U_B4Pz^(L+*j;V z^SF)bc`|;Xpti|ysmuDaB# z*f%?eiLz#DipUoz&01-r5Xr;&j~QIVhGP@4^`v1BANep8wv&d&P{sI>c1@$S z63u6+2fdrnDM}>*qx#_3SUQSnGWts@z#BT@hO0?Orb!*P? z_Y>%xqI26 zbr@D>2(3j9nu{z9XKNx*5=sWCbK98)cznN9csC;{ppK^_=8d5d&bO=q9P;Enl)Qtm|J?4Q=58T@Uh3Np7fn@lGXjljy>$uLUpoP?>7nDbO zi#xY5)v^c&mWe`AChZQ~i^jtw`-Q1%!0R%ZrHd(e_0eizy`jKtkDSQuK~QqG3>EOv zeAWX($QvMFv7pnVU>-CL1IT7}5UunD@)Oi#eSr~&?(7hGrmhC{U=N4(>2BApbVBO# zqANK>pV+pa>#eD2$2W3Uf9_`97cEOc^)Oz({NnmmFSE<$=4ll^v}O;V{2C>kx z_1x4&GwFT@o>X{z#-6Q}kmp70*oVpWXkVo_h%CvVhUbYDVFQzDpm9JRWh2UYTtFpL z=Q63kNDh1U?mfmbsR_I$=eoKuNV6m~`F8jIGl(i)HgSJh3!bVV278+0mkzh=Gz;{V zbj9s>9qn+g&KUKE00s=ld*vk+F*C*7FSZ+QBEm*?|7nWI;t<0Zq1B1C)st@+loGGO zj6}=?q>%$TwNafO@*3jO_}wE;nlU9gCi*>l;z^^4+L;*EkY$UFvJx3B(1k<4R2l6Y zVv+j7{T+EA{M6Ok00CBwrC1$Ru?H-PHs~-tQ;<0gh4hPaa8Q2Y4M&?8{M1LHJHY+R3#@1GP-tI*^F-MMc=4X~0pU`!gya#L85jYc|%f<_6Dj?Uu z0eF>~^Y=}gQl)}f(R@&baLL#`tqqWB!Wqy2b})k;{(hxtA|viw7;poKbw9#1Me5{WaJ;`Ey zF&-YUDrkTI2D&0w{YoYkZ{Mm@FEE*a z#B$oG!1CD-2arJCAr(hlYlf6Z`8qc8m_H=C#X z)a^aK%LmR^;H23e=ZWgtTIqbt?&nUjs>zDwF*i9Q+!pl2BnZ(u(k@r3+sVy5h5*IuVuh~~$-~E#$sPaO@1-2q-LjscJehgXm z;Fp(|myJB#n9>lZ*O1@iB#kO4JN*N?u(-8R0CLsw)2G!TPHAWwa|4^C`ZuF`z?Xbg zMJUjwqjEU~4Pau!DK7*gjQt2>P1yXmShS#MYEfR%iB&@*rjC9vTN-`%bRBtw^*_zX zjXUTf^|RvCkozY!^HjcRq^h!~-fqVoe{_*&WMm|Yt?C+`$pcT%=i$scQiooR!6(5P zQc_%;K_CKLo9HqD}laWL-zK+buL1Gkwz(vIa#~Frf@S~U{q~T`9U#H>PA#7 z?mbF3VE^UP$jf;j9&g+ftA*< zd!+V>L2_hQe|A`jp`sKP%!*z-k^BP>oDkF%4i{J^T|c5Pt-J&*QB*z!oR_w_)zn({q z1)WfnBh)3lPh?6uvGmaIEyReXeqYWBzkTslkD zObv}_38Y8832LhjYJTJJ?V|#ul4m!!F?Rna>@AoM6R&Xh^z(0aa&}JCJiVQS z?z4+OeVSbtk`?1~942(2T1sOl1w&H?TaNwZw~fXytiho#Y3X5n-NB(PK{kTt#y52$ z)e-ByYT6^Gxo^opQTt<`Gfu*Ist)&Y+bk?jaN~)iBqyOVKq@6&l!szaiO#`4DSXQg3?$S-1Wr$i3V6KX&|8wfS(J&GenJl3cNpp$I*)BTe_zdYI`cw4`Z1AfLQ`j zftk?J|H9jhnzRjQ4WZnMUc&Q%h3d3OC#%i_)!dNg4Lc?1U6o;aeUCEG^6ct`V4xT@ zfo-l|_rty!Y0Jz6nN`Pis9*)TX_aoEeMTNWs_6%mYxc{9%A;+W`A%c~*C^2@7>6Nx zDY16^z3rf8F50q@L~SB2>&N z%QnHMIa%qL*MtWa`1!{un=|ux*6nQKvVAD?oU^uoMk>()<*;tRE&)ca4Aa#t6yJkw z_ps&~Z^Lwpr)o|t!rVj{ERlYYZv1gwSQ`}#hoHlAo`6yKA!gylxVA7!1b+hkNYy00 z8{nE+IN-<#2jW>X$M`AnPN3aEcx8Dh0O0)GTq|Pm+VtsYm#GJcPXo5Z$PQQJ(~4e% zX@S!P9dcLbSQ*2CVG0R;^Ii_NO`A5spqK)BOniW(JB?mMw`s4u$ihXCR8^EuOarjN z{fcr&SjKSS{t+_oQfNQ=q1e$D6F8^p+G(G^#(sZ_F0>XtgRCeqq+-jEJ!n-t`8RRr z^7q_)^R*PB22E`3+)yKfwBc>Czi)Z`RUO$py>Fd>*~r4aJOlIH{#s&}3K!prpV*O! zHo6I>4#1}_LTiYDsoq9_^{_qsi0^@Bm?)U;_yDh$jlj{aT0AK%BR}PVSRqsb1?cJc zS@JbZWq0oOIB8Da_-dIT6FtRL3|hqTv(?^&JQ zyLZPT8&ISo&$!JDH5MG&*`yC0x}vwlX*TBL`a}TNt)1%=QHOVsAt0$O`s3P0(>|9;J{^8dW_^J)B_C-HAo{PQFfF-IVP%@Zb*(6xQNyXYs^Z$o~_Z_Tgv z7pkW9csD_0Xe|C;5eDRFV@|JmSq1yB29plA{hXySUUV3*Iv-u z2#_nS5;-Oxr3}q%4>dI-W-mzR9_@89Zf4nA%8pS=Vx<8~Y;)Ohy6e}WzJkff#i`npEg1KJMdOH;&pD3uR5U+`mfIrkfr+yFn4B`K z8{QV6U1aEy7+4|dVO2P!eJJ?xs?6g*ey4i55j+TEAPI#lU|GS8-59JuO4PWa4KCJ_ ztRxX&HwT6?2Da0J)Qy=>Zi0fq@W|jL!83-FCLo>A>cN0wKYhAVGFfpDmW{yd>};^t zL=-v}R#w(Glf=h$p-^oBtLlg2@ zpVZH@;%_S|MlgvH7!eUs`lHXwT$tvc5Y{2e=RHRNlk~g@8D@P~b93_$+EDH7RDSdV z{WtK-{SvU#@UGslx36m|Wnf@nqF7p5s&+zgOC`Nt{DoB^<}L*8WHgubH+#Z=nCQ3( z#d9w)7~H#iH%T+s6ri#d(TVq#HFI!q7-OwfRc?@_N>;K9XuoY0rwN*2-JC2%x#^Uo`xc|dtS4qc}>nad!KpemxrZ>537q|WSXCpKb zCJGeHypA4NAji52l;8?%#U{xG(|9cGtcs418XAMSHz)NN__V}*>Cyq%htRxUgj{Zq zExUs5lMPr0_GAv0&$;3e*dF{@CeZbWu7rl531e^o0X4&j36er~cH(30C;P;Rhlv>I z0Z+LmzmYpVyLbrTfFL+P?^rPH6P>;i|As4+?BHsHCSq-XuA>S$SOqK|#*b9kBU!+} zf!bjn@P7X75!u|!_Xz^v8iGho{54Dzpar*(nf=fm5!fl{H*DUrMG5_(8T_kOt~0tF z1h_|joN)uX@%@4WZF$Sk?LkmI$ zkDmxGP#AKIZ80H0SvN_eyP;t|4Il(?hgs-q9^ds;8%BC}fV(Wv2GOzzDk*WzqxC{W z;_Eq{wD1f#2^kV@G0QATAP5-7YXH+Y0#hMGv(^=}FjOH)EbLb*{Ef=*7YGo9uz>J? zKw2gqp0M%J128-u+WZLI z>8-ovh5QFr0#Tr_xO1oNM}iQh%p^5q;)tYXyaH1V51gZ@S6Z_3Q9A31c>{5$#Ss(s z4D@V5QV7iA#Z<%6NG66whzA5&8zKNw1xf5kAE|iIeDg@;Uc?6S{{4I9r4nc$DV>Tv zv!QXY)CR;ss);L{fZHq{n0W|EtOY6nz`9{}H=(r%d^3PdveB`bnczakPA1N$fNBG& z*?nYXu8WevJ>ul@6`Hz_Z5_y7c^LH%kTY!_jwMsPL|prX|1y*jZ>-4b4}FM73S_u7?K z6SBpGv=1@)V+>VuON$DmkN2e6gyaUV1&7l5V-Y$|3dbipxD{Fo?afFrN>R^>;j`>I zPEQ}rCMt*hLXrz)Q4^5!MHq|N@-j8D#bGQ4I*6j-&`v@pf}BIJefxvoZY0`ox~i(4 zZ~3^mxR}Rr$^b1B#=1NxFz=B%lR(W)VeKOQ0Za@&!477DSq)>|^{wkuuR@XA>4c6` z78~4G88FP`RS9{Xk%CSZPb;otwA7c>I(*F=!L5U0j7!x^iFLrFpke@g;Hzu`%L13b+uC7)*cCU_? zxtQHS<|PU=^Z;0n9C@X_ zclYipNLHDi*ZIQxGaWS!L?bZY`EbLt0I!BRJunAis2Fz43_!?vW3c89x9{oHd zS6jx0_FMf;v(ZAf^5#!%5TziF(3y)LMExz-O)oJ z<^&Ax*7^TH3Qh#l+E;z8Mv%^9+rb2lY(ov%Yt{obd)Aw(aj*mnN$3&4d@J(&&(#gX zn2R)ElrlOpBQPz>MV`KDPSo$ex9*R@DpCTFV1nZ-nel}C`Kbv)d;{Ge4IU`T0FZAo zSqDRI@7FJQ_1)ALcgS22b2^G0vr&k!$+HzCM-u$67Z3Z! zVt@XRumA79WQ*fF4kI*JF}eu!UNd%yT)5C(4(%6*VYDE9{QL{mYNXBsqcYnRJY-mh z4}P8F{Gs}H8-xzy7ZJz>wCiJ69?IwxVE*1l@S%&xdYqiKJQ`Yidg?U!{em$!TLJMO z?|usyKPrS)2!{kARiCaBhfLS@Bf3Y|bBbR?)c}`|5CI}*EsJ`!eSRuCtnhOrC z``OQKd|O$0;-Bdj>{UyT-{NYZN@7rou;6cFW43~X)E-vDJCyw_+^fe}S*4KM6l7$! zZiudGw&4o>89R#l#0aE>_sG3-C;tJk`Xwz^vKvH~Xe6T7I{;fS|^@IGe07RnVLAF%3 z^dQNqf3uB1|K9ao@^TH08Zq+Q*4B+MT^`-Esiw=1f548TQjpYX$xZMdViE@bBO8XVsS}i&uj99Mk#FOqOWA+mD4rZLoxcLATj;W2^Y=VXGx7mq zwQD%qh>4UGKwvU2N!-N*Y@KnOigTJDPsec!{Mnz$L#Z-EU(O!{1M>Jod#yZk@_nR$ ztqnT|`INIGKlk9_wIiXm0^18{&l>U?zds8RkhtMhQ1gk}0DX73gCFq+qr56Q3HD$C zq)BgBgr?F^u$qnnj&4D3HwAEv5Q9JS##&Pk%DIJ+kvE z342JOWtk%X$e5aG;L3rSnVBi7=_zE;7oD1tO#SsMv^*U86BQ6i$iG%NjXY&Q4DON% z;?%)76bu(bc;s@q-T2j1Ahlep-k#%gL1cu_1PxHaA26aWv)*$e$Z(@~_)tyoldkRM zQV({O=Q>)=aVZvgCMb#0!1ldh{slt znp~gbS={>p9?PT?=hSQjsZTI@hQ1#kE2Ve z4L{LGb%vJHYK6P(#KuIODL1yyujy;6Qto@Td7cyXt6kZ|6>WLyJ%5mqvh zkMUX@N#F!2h>XXjig4%@Cqdp$LDs;NPyp#C!HEgF>Xb2*<=DMCH(w3q!{0@YEsKeb zo!CE$h{w4bgoNqC06mEBsrT(m!YM|?7*6(HaY@N6IZpsZnQv(kts$r}7szQG@Y9-N zjE0!=e2j=#aaKov@XAR+XFTnyzxvRV_?czOY@cP<+WLMI=aAr-zqH3?wRdeLnuIhl z5TH$IlD7>r@4%ys6rdfxPR4XmSl(MVk|fm6K==l3o!v5sO!k3-GQ(|34pt%VCb&go zFerN-047`VSB9fUal#SU!@b~3;sh9&cJVOdF zRTWFW%};PM1yRzbpqRVOfKL$ZX#nvA=pL!aL-XeDpnPAf(DsfXXJat2wAAzk;j;{F zS3tN`5@hz{l&*wSV#Vt(h8+P&vKpRjl=A=-$RelgUXmZ<^pZ$3}$$i)Ug7 zNyZf_$VhKlnb;CE--E8=5)!FsSrQ!%6q6j=5!+4l7ul2O2&3woNo zHvN)hS`Y(OZMF@W1?|WRUn}&Hn?QDn(p!QdRGP6sU!Fjr z&`umBA;!TEJ1|k$h;ayjJUZDHCpHiraZAa<*NA{!wguRQCb*MAG~xVf!A3NRcDHgp2M1U-dH?5$m=<@eFP!wixv56-_onaFs=iZRvTGP%Fn*wl_5po^y{G{6y z_%le<7;u>&@rQ}zqdz?)y0+k(WzH4$0moiIpMdMVN_`FInN`F006jgwW}wG$kd~;) zSf3iZ^`?VgOVvQtfVtyw@gI@!o~|xW%e%r^NDK*ub1X1JILe zY8ganu*i@vnziQAYlP>F!&LV5Bk7#vX4%k!w9e{d@!7tV@H?Zper9dcZii1 zEvu4vB(4#tNh%Pxtp{}iQ2eC=$dL=+GMeQ>Fqj7(lRvP+?5#cwDxW6pSqDs`9y-%o zN#`ts>__9u%=8R{%$yzMPz^L4$q_&*ThYu23OeoSuDH|{ep#SW2PC$zS-Mah~0A9~RY_;n4>kQk+^k|H@RPl$R$G<*m_ zfJHC`U_?vE%L4f@m=U6<3PgSbED887dxV@?+?@VQ#{NRJXn}LTDTj8VsSKO71B36JQ_e(;-ctceLzZ| zK@}odoR|g~lQF(UE*`$alN6Gm8HX{7Ez6q_7xA3yO{=U9g<{h*^POV-7SmtI_3gXX zbsmFb|I{a{g&hTl3bX?}Vm>JaV5{KXLq_Hx`CbS>B{WV;)yPi3YYA9b%{?v^=rUk) zkm?G`_MFKOcXYTc3926+c_J~WVf&D?Mh_i2WQWrtFunT9*oZHJ97U44f(CXbNxw+j z#EgOQ5MpLXg;RnIfv8}HRNr6)J#x4_#RYCyvY^aXRaREMF#8W#i>VX;2rTYZ19X;o zgVWA0iH#lesb3jP?CPO))tK{XQM;hdX$ItAx+7IYEnSn0{gatG^mhbo=nT}T}7DTy!@Mc2^u`IKQlEt;Q zK-I!5dqAo+xvG~dKgrmwWe@G~&vTckT?|#BcJ~htCqoQ3h9+CFNP1jVen$f0AY%@y z-Xs?h^&g1w031>QoOKsq$g;;dSWwjJV!9#BrOmBj(igI(X|9NgE($(H(PKL;nX&cW z&n*sfLzXE$hUwwa>t6e_`wcNPpg@o_umF%$JLktbjckR<#q~4w+l2f>pf}-Lx(aQM zUb*2?r&&`>aCfNt;M@;JV&hcjclmK$KdAoQcbG}A7MZx=WSo5oVD5)8Mu?V47>6SE zL`Yp>;Lz6GZ?S6TDB3Q3Bq|y)mW< z$U$b2Xlic|rF z!(!20r~;X+2?sBQr#GC#BbveK3PikDLvRw43OZHn(Pu z{twR3PN4RM^XJXC?~ib~th#le1AQS-3|ak(hmDw|eu`sjh=GTcICyI&irn!gLh8GA z_1OsizD0w!0U4MaTcl6s=E+s!I3FIEurVbpEwxOy0`bupJOfRr5MWAZJrd-M1JV@| zi-ri%1m`FzakQtepM`T7d^iYbF98u#3k+o3iJ2*KC=EFfDHoa|nF%I-BXp~YF`J04 z8cwCjVSEe3c&I<(B4s@=oF}I4} z0uUBCSSslhN2A*ME3dm=w!!MuQR4cOm;gAFzZN?47aXFKB046{E56&mKJP}rDOvw2 zq>=B>o(o$ZCx82|uc@v74@*4%Z@m32HgYa0PN5N+ht}LMmiYClKiZR=`V~pwKQ=Zt z=+owuCrqr zGPU&3i@#{CJJyk>fd?Mev4-+U&O;+71R&E_8%7|R=n{t;F)ZOsME#O#gXhLZTw$bE zld}!Lf(lXkj-WA?$`^pa?o_0m_J>QbC@I6;$wVQC7+|c=x?%c{KmK?RM9tpn=;)Y? zc?$vz$y63bt(@lqL!gkD{P_Nv^iJ^c9O%%HQ=%bU5o%A$F{U7pl4TIfg_-MJ^B3;W zmP|k7_}|XhIB?^*s^=0-TYb7_frT38f1J<6@n+Hm;=0XsNtt`ZjzN9|8RP@(Tr&{%_jPjV z!%z{hJ_8H0Fn4S8Y3uW3&It$X%m83sDO*JvkKt$66)36)xVZ|~aeL2;eBbt; z>mM96vQO*<3YZ1>hNNFg<|6w?MohCx;7B730a(B&SRL*0GE(M|($m4Tj7~@4fJ9<% zudo5iCtWI}QZq>KPK(2d++9WiscO3=Zpua}C*-cz>7FYSSE3U)i%kFeFO|-;&B+eD zz%0?^atRvGj{@x;7HdL1zrX*E!!Lk{a4c{vnX9mHxPJXT>9634pFjoI>qJ}VzWL-`Vj&bJmJ6s(B2o5OLUl}% zFKZR4ose!2&INp+2C9*hNqX7G-*7_a`u)HzNZLpcK-Uyo*~JJJS&Di`J>SS0U{~Cb z6Y^XmI!bozvZPdm!nXz(qIzpGs3Ejq<-kVFxE17Pl}|NSp)#Q(ze+N zpKo<8HpTp9TLaTd7hTRfxv`lCHd$rP`~+9WCv)=fxm|00<+tC)(V}gM8)xzE!LLZ$ z{QC;fDMZvYBqc{3M{-aSDnmmJ76x-8-o#eaDB_+;mm3jIpSX@73nIoE1TDp#EA=M$FW2ZU+<~bFowkd8 zE1QTv%@frK1rk=khDQ;&i{E4VVRxQ zLq=i_dQPXFo!d2D}RL-XoMNoHTz9BRPS0+Hs%2xrU#MlUl(@|{($OGcI zOZ%sxB=9v~*xnGm4YIU8Tv#pAIMV5rF^o?Cx_nnBl-uOHQKY93tq9;Q(gtj<&`u6+ zwIy&a0*}&Y(f%QuSnJawQ7uI^C6WCA@DR!MpkUKcetb9VDGY9q^XX*3_qUSP;d1%% zQR3R^*QXy$XD{T|dBoj`B=?G|=?+ScQYHzdU=sTw;MPPb3`MYpwIwizM~9Z9;+Q7Cqo=1bSU(W=V5ENcFr9@ zFZ@9Xzy!&ippiRqwn1S@t%jRy+6c*7ysj_LZn@%Hx+o^zR0#Ly^AOz8jO{jLbz;Ck zIw}ytlaMJ7sawNQeKKw{q0At{MtgX^8tDV)zzZC*wGbX4Sy1Uq85jKnU8pZC=Fk@n z6ft|dEC(5A5x33$rPDU30}uxu9Ffhvr_;Sq9RLUE6(R0DBuEllDiX0!lZ_)A z!zQ3l5FZd1cI}0n9QrmVEb4WkyQ&B9D9u<(y$78Z;PoMosq+9_L*h7Eo06g6AmWrJ z{_VKwcUA9|Mh)#R{!D%j~~K<>;~1* zT&|70yeDy*$U0GTpC$U3Lktp1fr)_kPe4Ag6(%#J_XK1lagIr&gk$rK2Wdm3E(FrX z7Ei>!_*Vlg0iTfEqnK0TM2abhDP$c?xx4P;=0GK$Y`wRlFk{746XhF0@r~ReEWxX| zl?oCrEjt-uj9eSGnwa?aX&`!}8&JughO^0u^XJbCRNzv%WJ&ddoD$3w=}be1gy2)} zTeo~EZ{8Lvfv)}iyvBfM%r>7pcl>32FTHsyu#g#e_wE^17$}}|6+f+8XIA%COAN21 zEV&P)CIeNQFu60N91fb)YCArDe$UEGs{64_yKG2b&GD9S?usEP5wTAvBj(l?G11~3 zi@KWZvs#5Jdk&XeuD+Q8Hj(7s0?NUWsE~2Nkd%;qC}6&|je?2YO=N_`qDo2x{#AVr zArv^wA#a}=5SJ3LGd0vNlAuxvl4vK3fk9l60)MOYn);Tz5E4t;JQ^gH9jAU9iq~gI zw`+g&V)Ys%Gbo*upjTta$L>x=2?knzPOA@JHWwb!)sxE_qM*=YPUuzOwE&v6e)g+5 zDEr9DI&kpd1X9*GIBLAjBrc5ij_}>{n?7h=-J@7cpmG4i#Fl0p{RnMim#2iMTTngc z7~v(_SD|$Ds2a3zptng!V@S5%t!QNjsconp3*Owad{v*=Y0Lc(*G)nnLLB7Br=Dbo zqURJpAsJS>^Q9nJ!QbHjWmq!0j<+WUh8-7JMcqmscH&rN^D*Yf0t=F2?=#54t%UZia|E)(X|02k5YrNzK$IzW`0 z{p?vRGPR~P9GbXu`C_3>4v6wvg5Q1o9_e$#caRMjO|9pH2ceu7rILN7E)zK{B+ti? zga+QarIh)4Ir*^}j{K4QiF{IDqq&DHVM0_9d7|c%TKOqNZ6Cnj8Sop2GeOxYPfRR; z8txYwtkd6H-C9q6a&-9@Aq6+_M{xq)H!$$b>3e5_{C#rzJD2a*V4IC3#1id2Z*8Fm zpu~Yac$o5T{!xuei*>1++DYo&Bx2nuPS!kZF9`$ z<7O)UbIB^je;y-ai=ZrCik(QCOnpI!+e!u8`NSI6KqLZqvhbW%-h;_HXwXMt7K0?8 z_|_o|SUo8OfE9ziES`Y5fRqT-iE%kNslVfV?q&IDP-t~ifFX^-gj3!+(hnM&L3wWYFL~#OHoTaV3ePqunXDyX( z+r=f_ivsL1GI%l<0~K5L#+7$b$BKcME*{*mf^5K$xSj;pOgJ#IUqEwTo(cYmG^(Kt zPKHBP$uk}agauUXq%5j^e1sd}q~xRLIPm3z#Qp@D7}6qwT$@Tv8@5A(OC}H@g*&@v zA4zTg1Jaepc>$cNT{aO0yCfOHZ$j?x%lN?6OI!6t3bY5HBOK?|b71v`_K(E-_wPfm zbCh(>B2B}^Y;kp+Ygt*i17)Ip7Cb=mFa*aYqYVo1&o*X{K4)E|UGV8V7zIu2m8j-z zR=XbKI0_=~B4>&UBAQ6f-f!M7cAZ5@?jq{8eJUuK0Qd$>3|F0s4dmNDi1Qd7q z_paa+a(90ueF~aSRy*^KriWI1&P60{@SpQ)@dfgQU0sRzJc?#QZD>e~g#7x{VE3Z@ zz!E?cd5K69{QL`q(oL$H7d57=t9w^Y^XD#Vvbel%lb{F&CYm@EgB^G~3n2+2fD(g*$WUqlQJH6ucnUEj%OUjA%Gd(jvOVn7w_xEK07Zrkp6$$mI&|3bUgay1i1Ej zpIIz9_7(gC=Jq&qGU9`DkD$0aUl&b0!P>zY5SMR@RV4P{RXy5p2@-q@JNN3M2(bGq z+KS|Ocm03j@NQgu3ZE?nipH?3)uPHKSTriGYP=bEgcfxbi!2x|k|22lBIfRpKcGq~u$5IS|0&$)qmf zNasMxSrYvYGES@zG8X{n(-%I6=Wqm@?&+uJu0khbnm|X(6Dr%Esc4zKnE7VKpI2nO z?DK8|XaVY)4D>KqZ!diP`X~+RW}5^aw80P*jC7)34I()>1g6%LO);wc&|C170=nP~ z06_ODs)s|MScu{UkjR>~Yon3R3vJr8iKl|YPcL4(!sngV_BhHF)CEQv3Ntwf;v7B` zBp$^aTJUDX?{bADUw^&*dxfoWkhK!IHsRX;%YJ}b)vT`f&uih5i*n0OwVv) zUH*oD_k%wkY`x(*f<`)my|@E7l~FIQH_Nx*lqawSmyWoIZT2e^@5w=uqI2K=P#2^bZ|_*XAwxg! zlt1F|Xe0Ty_hI#qF0vcWC$(>ojKpCz1yQ4QH0M7GDP`A=`&rP^XtUL z+Fd5LPeOv{@dX?4gPoB-zHT{O71pZFYtGn8-H9-go7|4J=+<{?bv+F2?)pR}hdnp` zfUB})cg4+HH)&}xF~SZ*R#wXuF00Ygt>f=AvNR(P{}BBT4iLhV3PL8`O!XVL$spd= zmfH>r5CRz_o~A{PjCHTMG#K0h2Zqn`jwzAH&iBcfgtHk#(~l0lZI_yz%=qbpub!A$ zC&(J^|8F27zVEv*2`}6B%{k#4G*x;3B=^dN$q;YRQxCp|2YB!Az#|-{ChC0o-8R{| z{r-9Du&+`< z7>D@a8d5zxmNNdwjX&O1vFpp4WOv7!wim2r6fRLsUPoeNLQ24I4+85tJhs3-t?vD``?7A(XUOL%>i{Jw_6F zKKtc;4chO*<=MJtCF1n@>n$c)ou*o3x+*qW_cW-|YTe$hkacx#wTqbL_lQOBM~3I9 zabiy`HL-wgpxf@Q?R4XC^8JyERQ0Rg8#N#N@?E?%s1EV=wQkb?R_-ay2tF=A91M^L7lgwR z(}?m84!g+oRLKA(Y$C*HT~>g{bP}>oB~hfJQl&*g4}$1tNjwQKIEAFk^KN(v zy_WeGSEx9yT&aHGr95M3dgS2M2*u=M`ngXI%uV(x9DW{2y}5FEO?$h047|sbjvY5< zi0WJHtmCZSP3{u3(kT0h=U-WS^ovqy`DtliM<0LM6Sz|-ptOhHjg z=8U4!mbodvH4fl%@JM^y7ARLjubL5*dGGY;ir}ar-OQeBeh;-9*M2bZ_GyS?mS!Zt z9epZyVVFKUc1yfdJ@#&RxPttvLhC)39)<3ezr&1)R`GuvZ+UHwge0%)Crs_K{lo?v zA4Y^oX^bc8rrKRp3%R0xc4N;;F#~G$x^lJCD)|?$RX-`#AUimwQNNQ_VFr2b(nXWvuRAxdplF zWwu8v>{ILJe@-&+m)*So1X5)Hcghe?f7=uElR3{Gs^p z2cMP8OGi827QXFk5Yx9eXGlByZI5fa&K-Ywem2KpGGT4j`G>g;$!0SR5u|YM)@y z@dus0SIJ+!GsAMvE~9H77v?%1UWt!)<8wbrOPAiEc7(KLI?T;G`G5bd+QaYHjKlgu zYkr7N>KGozFE!W{Nh2!Ek-^SraC#+M{Sh5-7$>`tjnN8 zSyXslq)X7QM+KB_otX=GJt5ckh+h&J;Qd{R{02L-$4gDx6jgD2Y z=lQ>*5!>)Z?dKdica&%-O~R5oG?gY_+1Wx*ss)-2qL<(#58M?m*0#P$>y0xDuH-9( z5%JT`10SlY)a}#Zy>xp5=0e9l<}_JP{w;>O}R$Q$ppv2vIvf#Z|?(%;OkKWn-vT^%Y#j}H=qem-C9X3P?E3t zw@Or%q>e%h1o0yP1{??+0N2l*-UscVSdwl`$|}Lt4)Zz2(xv4M+J`XNS@Y{9nhE=_ zY*&m{`tZ^s`|{IAHZ3W_J@(+XMFY22h22`m-**mOX5e;9R}B}$OG2YKtH|CaLHR`@ zgb8)|F%Sl)(P~ErjLf=!0kus3>;$>2Iv=kgj516;d*x+iSjVyh%uEJ<4EAQPgr#5k%V^f+3j{LzMm41xkTBJyJ&9NKmJ(-rJyw zGOr`b_zZL@%!8ssxV4Tpf@pCJel|fvWRv2OxD`sVyWB?ph@KzX?G9P;pGnV;_|wvo zGox3qd6FU@nkpdW8R@W_jz6bLMFq3#dj)ZhxpCwK_LzzSFLE3I`aC~4{7_`ttv@jf zh}{9wa)lRD13ZO>HKsf$Q%R3$VkWf2e;2rk=>2s7@+YtB@$bjTpf|$tLKAG=n6e*& zJ>@ia9BeBU(^a1~XTp~iKtUXcsXz7je?Lm(_1O1z|D$q)bF@uVPA3K_lc6G%4lp`* zjzYyX0w&gQSBT&1n z{U(A|`~Rq&O6#-#>%g2psry5GwFGm0fBNZuQYH>^_=TD2F1XC#F_(`oeEDvcqAQy( z%0Xw?hh2Gg>EVi^7ivGA*neP!f34at#qU=9a^lzOm46%y%1*mGxG)&ze%jxCO69f2 z>1s2P(_Nd@PVQTA{Oa-T{(D8f68YcRpw6 zs!d+EN5uq54!x|OmM(=6g_pJVw$+)vfsY$q@NCiiku)TMztz7xWm{M13PI5sOJ{Ly zcgL71+piRR zqc_O;4R7EzF*?bb31SV}#9Rp5Q>U+znCjS?6FSalACews{s%7kL4b%@QpldIZ5wbO z29DiSy7>+9Ida?y11M_og^MMWx+8iVJc9Fh9$Z_+>G)cFRkJ zcKaAgUzkYl1sw>xnQ$<1OlWZ`hCQZo2Q$`?b!k9d9D{{BUV9uW^h!It@itYV{H*uO zvWIf|iB2fgZM0YiLKU<|^g^=;%&GcBbr2+Jx;|9U7t# zDxpw3O)zx|`2+0vf)I9NV4FfL3z?y41Q+iPb27jSKu#hQH-<_hjcSD(o%Emry9WaU zDVSZf+|xyh9s6l-HAj<%B_GS4_F)%QdQ7H`Lr;P5w@~{DjrtC>PSCx{D_jNmmmWbk zz{~9z@CSQ+JAYaB9GxA4ouUhpO7acGW(s3Ldd^ILDh;YuD<2}e7^wcTjJ>S2w6p?_ z@Htbc)pSqx53>zfAX1Z#&Het}Drs3{FA7ZX_EX-7T4tYHx@7u<$b^41( z!R-!_bGksY+BFbFAuc=M%10ryGK0c{*rWjx4~9R(`&Hcp3I|gNN;}9GnZBFFMs&ot zou7jbSPf9X?cieUGO@8Tlpm@U?T23294HeBYPJ^s6Up;HD=lAJ8Y1GwtrX;_CvgG{ zE3l1J8s_9#*&YX><4YlaffC)mUGx2jT3*d1-y!mm&+`i%uwrjMKR4^E2qq$15|Fi* zUtplwWP*-)s;$W}i|0bV8C2(ocbn2o$)XSvid%TrnHet8w+L4voQ9yia2quh(^Ztc zbx>u~BH@^++2vN9=NX0*SJVCO&+|e(l}V*xyR}aDgcc6H{?y49yULOgQe|{`NQbwa*p|b$i{JtijEN9K zMTw|$2G+va_-+J7T==)cYD0iyzg3dS&z>&ykFlTZs}l*-v!y2G4(0G-*1c?Rq;D!D z2;^2_LtQUrangV4BDETluNK=<(e=A8o&ld0ful!b4Lc*qrNN$e1?=2icjJBdmMqYe zRr7_OtirW-=x{&qr34y!IGZ#|&g|Gt3`n|=16RV{SdSSQNKCfM!N}S{45@uh-nUaS zV&-@f+#^X0Tn}9#_i_XS=~pN)$Fk`2)>kHdvVfeY9BYT`QwXG?du3WK9web0AvX%4 zWag8PXDHUJ;aoE~X7XpbSQi|m^%a&< z2>RHZV#@k~feEODK6}%E8xbqochZSu1Sva#)fYiaR|8xgFQ@611wq>#RT@JHKjmx1F~U*@DM#=1k# zN5-b~i3wgk@bGXguJ(CFhakD=&mXDKRU|8l1;AqAN6~>kL!dC>)YG4-L_#ZUS6>Mm zlH!<(nO)HAR3BzEM6hsLrRArQi)UVk@lRQ>yhyx~cFeQaz20&f5A*Koq?vt*pFWMD zLjcXy=5~KT3h1?gOn3ipL`9t8^OakO3Si$Im?-_9MMWIsOyNIBM)2S;{7F4jV5mLJyCGStB(`Qc6))bUlvj(7*y#V2XoF*>%rCx* zHOKGo*bKHVo<&{&N?f%xoUcnhA>fp34b-EdJF*+ww< zpFTfm!;|irXHB}2XOr?Ln@Nbt{&0;)0&1;VbfS{;@bP4I@WRcRC%;0+EX*HD;I5k@ zNky=w!LO+AynAqEG3NJ@Jj}XXd2wMqSJ_0K+H)0>wsm3OYZV#;$Q*uSmEeU>8Jtt$ zdLYnj!r!}q4z*5CY*w}yp$6btn#eCsoT!Sa$c5g+4!ckc8n4W8nP~D2!(8Q$tGZbe z$)3u{I%^R0ND@jmG-R3W*2#&BhbI4Yd!#@4dnF00+3<+ZI$y!*!Esnvl_cz#FktOlEV74yV6pus}^Nz^~W){+(3Y4IuXt(Ts5}(wpB1dlF z;-~SMzVwLme{0X6k5p>RY{83=-G?oFPDp# zfmV*dEvpIcxgrZ`9eUft1qeITttm&54G_wWg;d};j&l`vS z3xN)wub#`%A$Q=r2;Fla5V1u&Kpp1~p`O*db`X1%V)R@YM$IGi$5*p20bedf9uW*V z-X0RXYmj7bRFmbZ)%!s=tD?I?QE`sXNlz=E?GZgc9^-8D_V&pH-N3?YE6ZQ-%WZTu zQsI#lV6sP*Rt@LRS_u5Ge)Pkv>u^~85q|wB_JrcRP-%B%W%p9nSjfat z=g;@kOP`wfts5lfd#Tm$jUM`W!hr7;YLiHbk!+6&!E$0cN+X+6h$dj)Fqvir;4WmW z0a~=16CAi6=MjL3L zd)9gKC>^uynW9H{vZg%EShC%nN?GmU?)2$5ZaXZs1js2W1xD-BJ%k1utLSklK`6lX z@YMv*ry49vFJuK<3g4i6t#G6sT>h{bSGKwRkDVYqX*o%e^k!QK;=PA9SHr zJ?q3{(O`D-{hc~#7c6cEJu3~t|sP&N+TEM(=g6{ zU@l6WIc><^bI5@;%)w)E_>{T0HG*!^v)eAFjvB?>osUNY4}}UpO+Ax}S!Zx=Gqti2 zUH_@rz!tqvd8`7VZkRn`NTq$Ot`4<-^@A>JxcT??`J~Z)@f90$if0V7KfSHh$=@U1 z;$|1UVm*ywUi0`!#ReHMhyI54l8yTYPZ}762c;P*Wzpm{Vj{Kjq+)m0uW_Gl&caaE zty1qZoN_+bcC~I9AHPk2FiL=mG(W3q0q2N3aBCB?Ady!gI7uoJWyMQvvy&)6JY54< zaOK{1_E8L148MF%%X4IZpPyf6t=#bO{z^}eTMehCSz}b%+AujY`YM&_*PjGe@Q1tb z&zam7H*if%`TRS%Qgq|7#)%(ddipMCb;?j}Wb9`{vEWOFEXGxPG@iY#*~@;0*Ru;- zMj2)V+{1#uyzJx;TvF{kq}&grhmxoMEPYqZo%Oye%|f5P>EAooZ>a6z{(7ol>V{E6 zfIqXqV?GseY_+@n&#g=5=gekUGrDeH>QOKKLs-So4k=={I7c%EBg!-GvtAO3HQY3X z2O*Y=LqI_PS0$3YYGidJQzXT4>?c#Gte?8iMY}OeYONc0n5o>qI~fyE@TNZj^AIG5 zvjwT@$y8t7(Q=y$wpM2coAdv+;@1;3pe4BQWo0um#aCDX&J1jGDN-`vV_QT!6B zq;zmn0>$Vi^`&U*T{;Bim`;-dbX2!_WhKDql>)*rG^zMzo!(t&Wjj$oS7LsfZc_nd zj(UzwCR7?cX6*$}a&t+Q-mZE)27f@I1RdYiYnb;VSd=$WQ9)DF9sZsr+Q}-k<9nxC z^fXWU#bb2D)YC%gQ~g)jbDoNNg4Avscaez=Qq4V5BK3)-SK3*o2A(4?YHjL-mk6k; zlwv2>;a0dG5HY8>md>($8)|#3e!FhwtI01*UrqnmlU%PvbWZ=yLJ&=6d6Xa7e-CnH z&CL1fLdB%LQEAAWvO8%?wt!Zc$XqI^xD`kfj6tc!#l{x>OXjkIclQotE~@Z9tU+P5 z7ey8+mcL{!Bl%zB&1wGqySQzu6>$U{YF=T9F-U59ZW$ zL;EU$!T?c&(3zU(C}i>D?3#$`JflKNF`w&D&{w_!%!&^0QrF}>+LT#&iJABAbEPtb z6eSF$ ztOJLW?do;R$T+3!k2gg|Vd0N7PD&P8ku9d7n7w$z$2=gX67%4JUP^oBC9pZuFIJD@~RMQ zH(Z_i10IX#Znt(BZ%lA#i;xnj0a{gt8p==fwBORi(@d{qx2=T><1yxW!v%7e*Kt{? z=34y)l{#uy`tSyIANEeD*2+A8!hlCnAz5+y%&a$G*cG4g)YY$L5+rF=iH0mcC}b0 z->EQ6b93&JU${(*d}fdzIv&za{yo6Pe!x^@ZMF;)ue8Lg5@k~mB!HKnqVuUj&iQ>p zntFkS=CP1n#V^dlG_%`sYhui-+6#2jtaMI0^rklX_$@m#cZnnK=A29I4r_0@q+^Ye z)X9N&4@;)l&Fo8-0b4KXeRUeGx|(~L=1Jp)baQ{Z4Ol7StZvWf zM`}gC+f@`JZf>Qvh3wp7q$0bO{B+^3OVVtnh8TH@T$vnjp7Xo72EJZ!u9WvT%<8S$ zLV`eN<5HCK$3mo)X$ksIf3Cd48=*N8y7pK0wOpx3DK|IGf#gURSUx zuVKuy-TIy*S4xHj4KJTfvTWSJ{R5w#iQd_|CqHye=Jh>s8)A*6Ay2r??$b^iD3_^; zKH?MVSSc@*r0c|xS@K-#$1lJxYDufM4xerx)H`+jZg=-=-6}g#Ay+dCM7Y+ua3O;2 zE6!^0jEeM_FUfoheegup5p55{=7grh-QhdjF84{9tIy1@!^`cUn7NWm&X5EDSn&h0 z0jg4`e>hPCWBE9B^J3-X)mn@D%q)%d6h%KN^mNxU_W?7DyGN)#0-b)TO#0h%HtET(ek;oW7kQKusl_dLy;An-0YE)jrc51cry9aK} zdR?axBH6Qu2{I})Wi=E8ybP9#!@$njR!`4Kc6DIp<*=L|Hgrk?{FoW38nIEBJVlH2 zxV5$^MmI~?XgT)nUl-;jZTf0=wb{@^C*B>LshR2S9u$ly>+w}7X^2))o?k1P>1@Mg zB@1??@dxx7880w~J3~Xlk6*gf+9wuMzioAWm-q5qS)Dbwq)%%Vrt}PEUhzLk_ebE) z#t>ujg$UKToIMi}@$7ACw%yni{v`VFjr1keTREo9vohcI=WZIm?mvT`7 z+@8r&*VB`6IK~>o&E2Qzy2~b9?BKP?aPbWf_oQXgBJ11TYbgdj{m0LDBq#tEn zxpL!Zu8%^fF2Jmqq^i-o2S+o6G-#B{Hv5{Gr3&lz&RL-*B5Cc4`+H^Q_*|__+AR^| zUm)MW+hIJSBp%hYf0=b}01n-_LUfPAvFm9SC2~;8`2!cl&!zWOZ;i-(|C{|neU>^_ zjfvY8twf14)scR%G~WLp?QDO2ucEVUK*6VLDT4c?J_mwW@&rq7VuFC(mLpo3gHvzB zFHH3hH~=SwRyS`1WQyzDIG&g5c2~S3ZL|G`WBb?(I$4g^-m_Dl>n4E;HU6I5;$m4~ z+n>nTmFZmQE$d!(Uo>ZERzqGUy6LNBk@&Kza^bR41z5V&n z(ZxOVL4fcR4MB%v$7l}yj*YYVHgx&;K(TTjZh5~;OO{fodsgz^_f>R!$?=02>HQ2> z0yO3oWKU+z^cnd?O$~cSxO>Y>ueo}_wAIOQXprG_T|W&yWNy8){Te6rLMdc%yM1c5 z)Hqq_6GH%+GfLAp2!YDAPqgZZQs98o%kN}6dD@G-ihVUw3;Doqp-LW}AB&Ld3>H}@ zTGCjfs)~^H=;KFHNj{AU3qbKWc(-o^J#Y;IOla-lZ)v@u=A%DXpgH+>OL#K)A zsO)Y(jnOudk~jJO z3YGQ^5*=nUpo!w!JB?_RM4vsv%2Vy^SD#Ew)f~2Pu zciK+dhtIjYHzw_4m{YYeeLmB}jyw`IB@Qr%N8IJjE4@AW4o~14aeAo#@^u1h)|fx5 zRA}nHZ)7{|L=OQFV>M+dJ>0mH)G4!QrKd77HD^!T_xYtEVO!mhC^7&LZa^J%GpO|^ zxwlUv1K)-&-a7rI`!YV0q>becFlw>KSNVpyFB#>P$>_mmlg70kvDw2<3ooQ5PjoBN z4_V@QpUgV&@UFPyaClaCW%7`^MO6M&lb{cH-vbf*5(WNh*opw<^s`N6L_{&5h`#zb zjaFu@uw_GHDV08-SflytY1O}yNNJ5$G_ig3qjEj@)li1qgvHPQF<*}&g>wY2=2*3D zkvNl_(pY~8@(`u+Vt7F)!$#WhqMpP^+kk)38b^OqKo1(A7EV82X4<;UOI}i6+i9R1VC6B z2y-<^5)iH1;2%^Bl6VvheHXrx-fkq%R`D-hRT+LJVmUpJ@C(R+ieUZa3kpjPCSgYj zE3ylIV@^oT(ZpL-BAeA>t3*1KVY1eVuF3NIJB>ygjeJZ{dxKCgDZu(5ZGIb^kRYnf z|4DTYc9CyJ7iBfT{ZP9KSzu`Tei5JrAx(;KsCED%_|!YXH){1!DsUVq*7py%~jyd!9`xCLkgt2p3WeK)?7> z;5p7(nFOl&f$3*x9goVZ^$6N=FXRqphPw2{&CE8?!@=H1qSZnt<;?zt(Lg!}HHV*n zFwhx6DqJW~7=nR6+@Ra80NAfQ$KziBwrbI@_R%7-N*-W@O*#}&>(*&I$!3(yau6^B zS4Zj)e_c#;tK#=pUrN_){_-kXX+w}vN#>E^B8*{o7{p0vl-@ohob+SK;k7B{K8pF z;2)0S5jfH(0av6~O-=}UqIuso*Kn0r^TWKny=i9U#{dp$s4r2u5wBdb{AyXaln6Mi zqtqMER^{?d`SfYj4DmBuO3qIVleY$+M(eUrSxFd~mteAwKa4mCAwVU5e#Wql3sUs- zAf%1%h3u{E^FvM|bLeBvu9`qg?L7+g2cIgMxCXeE8(!W_z~!+a$x7ol3>&k-n`=n< zo48VLdxRXDWxuC8R8_y@e~o+({2qn{7xsA{9x;c)JdPk3Na9udpk2eljDp)7p=A+r zrGJ84!M0qgq0u53-2`h(`^iJXO*S*Z?3O^EXa{!tQiLi~#xm?$qHYYs$HOm8K;?LZ+|W_v<-U|P{q@YJ z4-d-x#GZ{<@LPBIPIJ#`84`!@ zp8_B=J9Febh$31i{3_edv!Mg>rw~?_*6Fom_<+-7d`2M=mJHUsK$4L&0do~R?bScgl4=Ik7 zvCxog>(H8$ow@%?%WQTf^AkV&a+<>V|KRdcCd`ws+jP-nJF81ZbC#?vJ-c!#Dh+PF z+L_$*J;pCf#d5ckSq4^g2(QMTFX1MpB>w#`lakK0<`8BGK!r|IX8RF((JZx@*GbQx z&|T-pI~3a5Qylu;J}9y!kqZli+NreN-z7vHZM(gGJ>D*onv-;6nW~FzFPk0d)N2@e+;@p)&2ovk86yZy09Gt@%!_X>7}pu3Zu++5JNyy3(10kyIYFc0{#?)FY`FB6@ybbyw0X$&my~K4XayV+FSsw5{Osq) zstQE6A$Jye&VV~`BDT`#o#z}3APo^-C==}6}z{< z0;bGNajuDM>V~Y3>uMnUBKZRBxN`-iQgo*eW?fi+LN7FAvQ?(C)m}sWq~8+g(+n|7 zI`Q;y#8ivMj6(>Yyn6D9$HMdfo8V3(5~ie=>}+fZYDOFXhg+p%!oPc!-7X(1GvW`s zJx*Qt6mMvv?~Dy;@P=Sl0<=HLAYdF6W{0~7&HzYFJ80}>>mCAW- z`Q#j7vVjl)tpoYkcGt1;bl__-t#+z6Wk$yU+gOOvmqIJC?~W!M#uyiJMbUR{K{6uZ zxI}`?T2Od&7E_!98bk0E4b>QxMyNt6iuSasiTbQcSp0NbaXEUav`-&)F>rVAFC5R< z-FcTaH1f)qLW-T6NTN_(00-#fR)}TIjvc(-zzoMX!Pq%5R`SlIw5#9D_=@pLo^Z7O#pwId5(pYd49Z6mNQ(i9rL-`-TLCn4a^8S*QVf3c$!)v zq2f5FwxD{j@dAYrfZt4S^M!%t5ug@a9?Xc-r7uViFXQqGQzA8i*M>wG3!v@d82R1D zBNXIo6!$o!{F{X3Md5E3`8mRvV$k$(LjM;%VOqm)gJ*pzJ*4>)^nw!f5H>DU%E#Z< z$+0ylQTQ-ydJ6*8mLn*5_s*0WngZCrKW8ZD!q zeH(0!2m8(8)n|%m6JpFAY51V3qPuU$-?^|g>tcWhgAMB*DCAdt4CipMIlK|wBfcbw zvPg&Zx+D=wz>Sm+fwgBTjBCa)c}Wr~y9-3cirHX0L54v?{_u49dc}g#{!07)fSfxU z+&@kq6ffAl5o>n!nswcSs4;w(tt|6}7Qg|3-~$vX%%?^o{KExpPm$%e_Qsf%FXK0^ zMw-*{>$1@i*=~Y>*iXNevF%j#NIf_S4SR~*DP671Pu`Cnm56JL>x{r$= zKmf^#&>bIEy$ZjYVMzd+AQ=aX4Z~T6OXb;alSB2Q{CNv5ha>FO0dBQbL-e6`$Fb+T z2$GZ%Eggh!ChhK1%8-hU`6s)X95H}%j8|(BtaXX9Yyjdk^2Tp)Uwy70@84l>-XP?$ zW~}K?P}1m*whnt_rFt?8RTNmyzEHb)O;ULf3-VYSdXAbwVk+Y*+W2D|0s?EEz92#+ zD4j1TA`P~I77)+jS1U{NuJbj&f0ge95k;_0TB>1;3Qw6F0WakT)AK&PJLo*X8uO)W z#Ep#0Y7Ka_$0PZt%wJG+nU;t9*G0u^=zX-$42x4UC`xEbBZ@;mzsujQ{WC*WnfhJ7 zbnpmY5Mdk(3O5D{z#f|U+c(aW4Rn>6Bm3W>>Z$qocSE;P>G{B^MFf1PwgZejvSWz041N_3?a0X5W|iB2DT zuz#i`F>C1kMl7e?-2SB8RH$M@p%V2!Tj9fljg-^{M6HzDK7Zj~bLnS~&I$&DG|kRZ zjY!bR*#!{JwlB;n8c+v2ZY+GdS5&U%?bJs{IeFCb$zRcXAA zjc2K}b!fgvYfS4I))&UVX(;1m#pK&a>iRLJxEbL-#6-)@jowDST5o1m-?9#Jq(s*2 za?mEg(n9l5jBTvgD+$AZG=XIK>Gw;=w8T#PJ&lP7zjPZw>w!R7 zMQ$*iNNh=Jku_QU+_9Eqn}o3%zEo&Z2gQ4{s%P+&Y1z>?IS$TNJ@0r~RoC4^)ME_t z(;+1rup4-)lYQoefTzf(4ltC$cP5(zm$}W9@{H%ZH0a~bv<`3!eZ|$74^Ol`=pCz3 zwMl0~DSV8UKG)NEbIHoxn}|6JK{UQwB@l@WRrBqivWATsI|e9{h2G`U$?l?8r7YF6 z&b}JYWiJrI(t47Wdgzug-5}S^Ikmxn-(&vi`>tkYX?&SZzPL}K^l1PkpEYxMH)qnnTUZim8PQZ0uTC#EwtpoiPHM6mg6()k0hr3JrURzN-Zx$Yu6@K~A zu|Elmj!FeNCDY2b9EcfHbU76Z1F=&XV%Y=dvX{9vB+rE`S84%+ErZ!%|ao!?+) zqB%8{`&AgAIC(kyqim{g^~bb`a$w0d=iA*SZA$!Z^X%Ak^-gHQq8QJLUw7lQ9`X(- zd(-#N&1z_qDCo^IWBm{!oi5CPZEY^S;0stwu#ng8I1ECS zEbyCG-4kca`ma#_$m3P}LozJ`K_J9{ApL^mJ3jYDm4v`=%=2{wIv*?HStxVf(9@;+ zM9A*mWSo@HLeG+`VQ~P%*6G`y0Q$Tza$T`A2#hWsNEm760jB6iTBuM5Zh?=?d1fd< zw+MoZ)iw%uW5oKM&QL8$z%VmGFI$52ZeYekLCw_Ha@C5%*)gO2AO|Ab4*%B5=<7r? z#HIJo6AV%*Mk)%jl6YYG?z4XjJ5$=s9>0YK`Pc2j9j&7{;rmHSs2P3X_I5k zQ~iOY;!~kgkHM=y9=XvaP_GrdiVA2c^X~}3CMPL)oRCj7k?3-_%;>2VGMn#JB_TU_ z04#SK=|F)*euJ)iWbtOa%799l7#mKSWkndnfThvg!i1?sdruz9CR4nGZCKbDK#!$> zBpgL|FRmbc;%ZUGyHREg-or0`)Mbzo5)~sB~LA+4H1T z!LjY|nPo2BL1v>x^m$!~Leh81E4u9Mn$$WOqvF!;;i@IBA1uS?vJG6&QJOsQ}QDQ2OU4wF)*oX-#yAR1t`6xudA3L0il)DuQN%Xy%nHP;OO%`$U2piTz_=1zMq zjD_SAq8_P@L+0%7mC$q>6J60nt;QI^>@gX*2y(c5h(hBDmw=uR`ioF_ubBe2kd58I zSEqOSbdF938R9{4hPxEu5g>+`1?X0}w4;95W)TLDJv_$exz`ht_=JV`S3VbN*#{L< zM3Qu6409G{OOAXSF!1yUsndj>q}631(Odj@wbpA1#^V%*nSX5YF2a^ygX)2!AMcG0 zfwur?gZR+d>^9QKl3v7%$^#9-l?^TJC*B2ABEBAeUvjq4)u=l1_RMG?5frTs4CqSVX;gi7xr%D@hW~q} z@*RhC<(&7x&CER`I#6&CA9T`dk_MD1CNh$;jQ8Tc1$5uZvD?{GY-{@pq6333FC_*k zrPjK<3>0s64C|~Yv~_S0ebi24GtH1tkitnHV+o0B7r7%DqNzIi;c>TbB;@Qo`Zn%! z@K$rISbAnzkgTH>k=~910mlX%Dtq=oGaTtx=6Cg>$)-{$rW0g5mAoO@fQw8A8Qmz{ zEArjvOs#dAR^}^wx%bbLqK$P0WB_2pUe}#8!jx}dqXpBdl$ogmmnBKqy>a3qp#Esp zi15x=BYi}Nw}g9ditPXjKVJ&`;Tn5RlYTwgcE)ct-@=nQuWAxgL}Ez4Q~LQTXvY|@ zc&6GTgxY6RgulNU8M_SwT?3dVCeG%p>5`qIKD&R0>~N+OD!0j2CqZg~Tf2J`=C)F( zDB*&pMqW{A(BWxT=3pxsxm*>)k{`J0tN`tdu&zpXs_eK3{N_JtbyJYhRusw z4P?6J!$6uZMFgYKfehMi7|OL#Dd-L-ic0j)STjQRnjR-g=0iYTQVB%U?w;jv>+?CD z_)BdB#Y5-1yI*l-zLfq(0h4~m80`zv7_em6<~CN2X2584q|H<~75EPEhb?Gp{ErwB z(}t01C2{L?_xa)ocvZFxWSnsYStqaHa2$w!O}a^`uo2es>6xjFp(8OLJZKQ7YkS*3 z_g&#a#G+L{mjgqVNCHVjLNw@d<#;juOk)wXet+~+b`3OCCmL7Q3dL#CD^Da6eR2** zpKRddxPNFPhoWL#MA60-YmMFS-}IEzXR}v>p>6I{T?;Th1dS%L$iEUtbydmEIP|Q; zn*~a(f?%VA>YbAp8}yw_U=WW)RzvtzD8CFi*4zAw3M6ME9gOMoJ-@snotXCd6N6wP z*xn)=98>-#`alu4KJzMl=GlMYx^arn1D-&%$^bernne%GaRBiH%B>kK$kjHU{q~w% zUWqAnzgEJDT|p!II2BcqBky~VI*BA2go_4zM1(t#Uxib8KdwGNZq&ryr75$QS8T$<76qB^`%f}; zv+MqqaZ5VklUAT)2@JH%vQ91i(jYqn7k-B zl?v_){J|gp`S2A7BFSif*4+Dqb3(P>lbjd#o@suc{e;tQ28&Xg!BK`j){>-EV~U6mW@Ay}?n3BufJcS~ z=~E}vu#iwwUmzI+)p%!h;Oga{dw=DQhgk>dojf^yV_s^|{wge6yTPWF7YjxZ7EKUo zTsh{5gsxKAy_pc`K%aC>+)&w(WDY$zKNBjEAQxmTgjtw_R=CG(uiJ;^hOyD09>07t zilsNIW@GGT8A4k8Y(vD?IcZ_Ygk?K!OG>-CfqfKAh9GlDQv{C$$2;95*+9NrO@<;M_DTCNLXWsJISQScmP5e7i z$(JKMUz*u_Z|M1?CfLt`PlBdU0G@Ob%BWUw&8k?X!T;kdWLJwH3#cYy_XDYrU=KRk zRw%1RKYMlo5k7wP_m*~rS6f4q7IQy*ckNNe))5`_6%h;_Uf*R{>mh*pwo)-ECWthf z=EMC}1_%dfqIA_;>FE0Aje|zaFMK>-ew!D!wcR5tWIlxLOFdup=m(Xz0PJPPFqhjD zi7eEF9yCtVE9&l?#K1pKlPgv;Q)t8)*-EEw`>YnslL9f_R{|iHiUNEL%#!mbkI@Lb zh9=ZXekAmJ@v;%p&x1YJ-VKVOP3+BIH?npqd1+*94B*>H!-up0fZuLDa~zo}IzD{# zY>T(f%tP0;`QWwA-4y`fnY5Rp~2_RgUr~pj8*u>&7_eHv}Cf(hDnw-;nl`M z+F1f7CMKj=Idd}g%TGYMR-v^_fju6h7=_G4b~tZL+O-0m%W3T%b0!~O-_Ui{`SW|d zPMcv3z@V`l)m`?RG4DNnv1F}TihpVJ!rR!70F(y2J2KMvR`;59K%gI;``2&4?{bFm zTh`?^^2EqU%YXSEvI&==?w5e^e|fkh{z-N?gtg}8(l1(gtbjeZZ!9p(5fJ9O_3O5m zAbKe}usR^4g?J$fSsVn%?97Zn$V@(eVjak$Z2|>Ax-Q^Da3%Ee#~YlyZ5iL?O>khX zM93wR;>$oSa_=44SD`RBFh0;E3E!MzQXjY>dbNhTemNlC_C$ZH(oRBCB3k`l?7ewh z&UxGaf7-@iFf+EX#Sn(<*%F0e$QDIZ%9c{eUZKr0#*8II$WoSwQjzww%$O01sFXHa z5^b_nlNFm?$x(<=b7lH{+lt!4+muQ{DC}Lf?(I9Y;bC`vYxlms+|k7awXl=VVUfh?zCzkg_tDu_r*?sd+0f7cCA0PFyONlKj7s~Y|{Ww~Np%RZmm$Bw+^0?W)8=3D>o+fO3{ntr@%!0Ul zX2o5;-(f(%ewhUkpMJt-z0GE&(|=j9VLhmmpB)o-t1MYcQ^wiK-;eA4_xb^a;=LIg zWeodUa``KgjQ)Dl2S59_a-_kl|3+GfjLe)zKt`FMi{GxlcG=_FjBod@59B#cn)HY3 ze5FoO`35|`Z$GJFYhF9A$xKgcV-;=mp||K@{#$ zME>Y$42n0Ki2ikVJ?ol`ts`KHJx(#P4<}j}tt*VGlGq+1%X%m|(|)K81oBOzs&g@8D|;#MwU=L!7M!#o2&v_+%k zUL%~)+!!y=0p;+Ac*8M_Mg=Dz%ajsK2+{cGh%pw=B(~$Yza1Z?AXG( zw^!J;(Z({1y++otr}K|AA`s2mn`+y1lfwiD3{?sr%9f_Qel^t!-IjqU#P+@MTUc$i zBb_*Umj3;W%wIV0i{!X2bx5m!+(l1s-MS^Arw>_p0)rmPC{Y|`_c4v-d3Uz)8tY(($vBL+ zw7KW}V660kN{w+kG-CNaWr>52fnjatAk(W4tdj=DN1v$-x4-1KuJ*jv@ZuVJLWN#% z^>_Bxq-a3$8E%EI|<+n|vu7|)Yp~?_C=JhwdI!XdMSSL?unUi;T;ho{>3l4y| z=5=%_4oO#TuBlU}N{yd1RN>?(2^6+PKav$|h8N@D7gYeaBUN=sy`&1@&D%<84ax*c=QbsYo96Q>k9cU^5#?Fa2T^kwyn)Sn&HpQ%H) z3z6Q442K6UnyL@Z6ghVsv10zzY15PpkTf=O`$)M1ars%Or|BviBRn2=1gy7 za_$RTtBl#Ot%iO^7I5m4RCj;PG>dF>-3%0R8@4iggTo;p)}?6~=i>5_Iv$pE@UG{; zY!VY>EYv%2uZJ&evcMXOy9R{b0JZrm|XhMje2_A&Or*kkB!sf;yeZqA2z?V9~`WeK?To)e`m_b69N1!mEY zV%$=vLnDkK;#4}4Kyf4|cS;F4Dc}TDW0H`t06BS#*|=7k-%!jtivE&`oJ@TUXXaYZZ@ zortH8A6psOR}vu0s>r3k-L>#zk6yicNpx$cr<5qCfKh&R}j!0KY)$R z3(Z=YAQUou#|R*@G(VDlU|)0`s>&6f@A3%e`$adcF{C+WfV$Tv>-0H`^u%MBrJlG7 z+@4Gnz8~I`BUYf?YUB2hu={TqkXa_n@H&^iBCjQTT7B)a8)M2JER}v2;^j}ga_plF z0zOvu(U2=}0YbW|il|bajR=V~qa69vPLu0YGJgNf^L@o?1KGC`nK3obl4v!sGHrBG z%&mk3b?fqr7YCZ0c!JY`vQppw^hHLI2J0|` zu?Y5y=JLd&0{orR;^EoqA(!1zY?&wlUeZHXl&{ilioDK;3YXf7J+9rTmY+H5$&W&? zN-;Xszv>hfpI9&)rBv}1w61+Vlzvr=2tM;C82( zkL~Q507|mw3e*uuEqWHNMY^czV&2~nfak$DWGwn9jfqlbY7WpH^Lk<=9Z7_|bsOjJ06~_C8IFyP3_F`6?BAK_TsH@+Mne2o2=2EvC$dZl77s zljM+WAzve>;TG2iMWdlArcip@aMJR4776>l{a#eaw_YP>O=PIu70$3EjiCT$!p^Lv z(ZDkLLWr_81(#@*{grY}6e}XAHCkjLB0uTN)CC4cw4QW?I&zcZYr5oV5F)DAS82HC z9`0|bRN`HzTpm{@(kYo(NB<`2=ak~DbKpcMB}|($&8gvdNoOgM5>XZ2pTw3(r(-zP zW&oScyKCh^GE`BCCK49bxT$K8e^{5X>~LPHzHUxNUL89SH^OiDmZX2!bJ;t7?`qRs zs^_gl!+D(eSx2v<^pYE!!p7MXkO46uyHKu5ibuN6A1h}F9nwMQ12blP9~oXGVt1j*q|=%dWC*$PudwWj)r;(iw?Aaj zDDeayeHld;#zDR8zK7NB+G_ORJqeF=Pwb@e6zHscb8i#=Vn83VsXX{=kf6l#s zw2fA0g!doH;52CN1n(~??Y{cw$X-j&zJLf&hN=$RY^2{$(h=uOaiMMv+aS-}|5}M3 z8Ci9U*h4FOIh>oR7c_rCHi5ow6zyEq5a+*GEU18@0S1xT=un@A{px;!gSDC-wZB(UC&g z2v30JLm16l2pye9(4uaN+XD2k?#0r2PBur%jFEXTm={-gk!ly54CkZD4{y%&RZ+g1 z@_MEpb7<0AlwImj88R{v|q416O0BEA%l5B!ti_kw5ptzN57yPsPn_i+`u+JDk^Zb_fHvSJQ!tphtOSUp~fH<9AK(T`91cTFFQjnt%W!rNPE zIX2(61%{jBB)vwLG}V4TUOIXTV}#5~89NI8fT1APn@&_2mb*)y?pY=GMmFM>G|u7K z>UNDK>!YdzI2xgG4%SMV<T@;86lUe`T=c=46h{fn+H=jAMms~@MgldQjtj9ucy}| zC@$a+CHb{N`>(`&?q9r(e~p9&aXu)6U5*lM50H#6wE9Sm0OABGjIHMlD_%X)7GcJp zWJDuLM@y3@WIOZEI)O5E+z>*m(FAb1TQ+FhlDGMrLU{oyw?iOLiPo>kMqROhz97xw zyBw&mx;{Yw(7*cewuui8*{DjyJ#J4q0vV|&5xE1Ii2Z~ZOAjoy_)j`K`!A3xi>^zB zo*-@fLlw-$+>9`-#+k&ve``D(L?B||p$_y`j_0&nbe}LUmQTJq*2USm!ab%h5sS2W zH*i0kzgyE~Va~EkozFwh3(rjQ9j0_~tI;n`CURB|EJY8RrcB!lrOBB*i)u-^MPPwT ze%}e>X?2+Nc%G!YC{z|XmUF@Tf7dWC)UeOBi!RR|#UA~O+N5-@=pkq_vsvF7DqI_{9<0m*bD<;Lq*bm+9hX%h z9vBLLARPqrySkBR(Rr&3P71|Ef`FBuqpMhIH;MZw52A z4zW*?_z2mBthk+Co*b=fkkso??&tqV!xNZgV;kFlR6f|7Yov}NVeB#7!NA~fz|1u| zw>1}A+8cZ~$SkeM&=UJLb*feI-iQa2BJAgMQ{w!(vSh2LFFmtP|F7GFIjV3oshdLH z{Fnr2bj;U$q}2Rj>YScFsFUhRVS%(Gh6BCHk&<4y&q(1uJXI#9{RbK^R!EQJ?DZ)# zh<%jh!bsy*u1#d*xSQD(m3&eHad*~15i0{Xe46wqmZjnHJVx7EVSwl5pgAF_F8xSk-_Vi0J0f)gz%-p=prQF^SM!HYs;rJ*qDh0eu9D07!R zs{C>}*=Bus2O*s)MZXnORbssC&Y#--EpzK*((-wJDlcOQ$&{V6AwZ-9v~E{z`;u(c zjOEb5(yo~0&V%Dmq7W<8+Fv%33F)&RYOjLgb=*jFoeyFpBRmdEn@_fPx*O+e)3)L9 zyE`6GmC*D~Mt!z@MW2v@ACneV7tB*OOkgGvYP6^N@si%sNrpC@;Yzk+vDilxNKOU0 ziht3r`K0|`;!j@s+W4mD)_kbCpg((ow4W%{+Em;@+959q9ILFPc_KU^CjT#9JJH-SFE z8SDOIM`o?E^+~FtrT>e~v$HAk6gF>Nx%EqfAd7;7sYh$aSpR4{sas6B)Yboz?rE|Q z_r6+k5D)8e5cZ>?^8Oc%?N-|&+1VYZG)Qsf~EADkRRdQ0^t?H%1oL-y7o z*A@^=)dKX?47WrE)}FBrpo6reyrgM@p)yYz%HuEAn?3vWh1@$d)Yjcl_x6jzW)lPJ z1OW%9mq^^Mn`TN?;02&r?2j7v(dQ6Z}q9nRW@fA5Gfo93aV-JC0-}vDPR+X>BX@6 zMos_pGAdMimqv7yPRkO*N<9b3LQx;dTj4WjKwkMqjrrT}&(q6kGDrITpFMq=Q*eUy zlwD@;IR4LHQq=zTY080o?>}Vb=M{E)Stli45kayP|6MTfOd2$w?$siGdzRX#P}W$Q zOB_*&N3QeMkNGdCIeM_t?jP9gn~kPCKCtJ_v5fm?xH0G9O4{}O%R1rC2aW22Rd6OE z&Gb7MXnO3CNvWFlRQN;X%UUAL?2=ObKGCaII;<3mglk(~LT(>hx!SmZlf1H^ZTf|h zp;ql081t?3(T%k-#(~J6C%I_)eDPs!0b?`FEcz{s8f|Z?K0o__$mTF;W{dDipt?^u7f%K>L4 zUquZaPqzZW%|aqy(3Yki9Ydy`*cw((i7BFX{?$ZAe%)*Or-+At{BKOE38;%hsr^kSH%pPeXwZ4?7l{` zLT_cBo4d==w{_Y0UJ3I~nf2Xya0(p}+Wu4&WcJAX$dK!a$2V5jlpdSrbJ_LD@gaMw zipwL0q}x5rcZ@V$H+FBjZS+7wLpRDgV_6x)fUyi$$ z-|l*TOGW8$=Qf{qNd0fT*XZ;qxBlrhAG@@ap&5Vwa(4ZR(!GCtX;1#g=kT-5US}j9 z{r9i9qt0zy)Zw>xdw#l_4!b{HSodQ?)r&KtCWq}Q^4)xL+``se$Y)R3CdpL3Ht+f)EWGZofBxz0THgX_wR`D?F+-QG ze5^L0SFi1TJw7dD+_FzAF|Ps7@UuOgtdbu7`3#G`8T;pAJ`Orvw?LcyHhWm|OOZBa z^O`hnQn_hV#?)Ik&72+-cJ+Ky@BI{tfokAF7`^V4trUw!B;-jscMU;f)Y>J6);zOZ%AEMzmU%R~&idDlPo zDxCl z_e|dy6|p_5XaD1FyESkA`N=15o9DjehvN)>pIa~ZkxQ5^)5TGNoQIqDoqzc>Q@h=P zDfcfL>LXWlVdSYxnv?eX$Bpg>E?l^H@#227PHrat^jqlsA+NcslF|Lx;qTWr^ftKA z7X9OLC<^8%&!NCgX4Q+whi)+-&{<|Q@+P(r$d*Mg8`rF|dcdl8hR%ze29_vj2D53CCjdkac2b^S>kuRPv^U~sN)Wr-5bONGk zMpM+)Z~mo|;9P%haXC5?qpKS(H*D1Cezb!JPteZnLHRpkS7YD zaWsCO2UQS#Xfl)!Ni#QZ0wK)TO%v+Vr|2Yu)lX6)5tY!HiG<{I)R-ZNs^@CMGfF^I zJE_hR#H||Avb((N0u!sWi+y!QZ+zq3s+(4S9U0nAQoE^6vWb0;`%16iWoU4sC!A64 z-P(!&@@fF`ARV7Krdt{|QPn|kluEGJ?yIj(fg63Y=pi%O&HKfR7iuGC$%mDGpKC<;3^a)5D zC$EcZ-Q>e2#k^ZQ(K5=lr@Hl<7jTi>R3U(?5}t0@hSEiJ^_pA>S=g>)$Cn|GNS2>N zuVl<}&jFqtJaXhbN*tU;(Aq#l6CSLKZliD7wX+?cqHtxzCU<_H>b<}h z8$F@x2~HRx~H@Lpq~4+zs@L5yvo z0b|dn?r7EZR@{q0t+6;kc?pGPTRu*0c4I_R!Rr<}Dtr4G1YBGiu}*F9;Je};#;EMQ zbx6(U+K$$5Gc3Lgxn_U9Ys~k>H@c*@ess$Dy=&}!-esLm?u`)^15*^@g5&OQ|FodQ z72D`Y)I&W5(T#RRZ)L?z-1dYKy6+v)GQ5*+G{&AIS}vlLHOEr1?%9oRUw*8+Rkp~p z3reu@xRJ+@mXAMpHdosB@b%2?FrZScU7I#%=tw?+ioWydm6t9J*3p|V;Wxv*Yd*eT zqZ4_uO9hd<=bDX~2eezX>$iSdE6Z-J`xw4; zYtU^*w{CB{cd~KVirE~24@K%fXFmBpwUyT?JF`{=J6g}ycD(+mApWe@veb%Kajgf} zZC0}Z^Q=0J>svUz$&1$;=EN{rENr#L1=qkG2mCiYo1Hh+z1^mv6St_us7AYu@totX zR;aE2+H6v4t4F8o-nRzt-GJgXjjHY?G`g$HmY*9Wuu;Jpfnn0z1jDa7SGfESh|gvP-(CxyLQRX&L<~+momq* z{C_S@_;%eoJZO-P5&c96*8(AmFW>;?EqO2N^EfQ1EVf{1{XS^f@x*7Bm=A4Q=uu}J z2gbr86r#{Ru-?kxM{;MLYobW8MqTMm_^sTuBnWMwE~im5G)GU*EvCBM*u&$YORWiBxn@D&eAgIqhxJqK-Fp<&s(?wF5D+jq}W(d#9T z?ml9aoV@ydGw(W#WPf$dSG7~!`?;@t1kqv1#J17SAH$a{St3vRo^(B62AbEaZoSrR zuQ&Tz|wQCo^x3(2rEFU-hy7e2tA~>CL*cp!`+kAKm?K zjcDoh_uo@~JIHrMO6i9;34;nn=6E*-FDZ3GLv8g7SuI(C`mg_Xl47U3tKE9AlLkgPf zGt7`#hyzTcUL&vYTP3kkv%S9(FdMcuRMJm(_WYFeb_2C7WmHT0Qm+H6dH`+8DXNcV zAUU~9PWk?s$*mW*R%*3M2bpQe$KTj=Lvn|T)hmZNdVY`JLDMrpr<1z*?%#gw8KvDxD`_zquhE;{oxjD1P}EsI#x7(< zbliZR4rg~L*_}}>{9n|qPPML26i;w6n?;RJh|DOx=fdy&>8GTT--5}GC zy9-kep>WCFxku?eRB1<5%yY~2cU(b@gzue2Y zVt%W2&6QgQiQW^{hdbwYg{v^m?9d^9WZ5t^UfU|Qr1DgvB2+?R;VDZ;dkDL!RJTZX>r+kiEQnlOcy7w38QC-@yV8^Nt`qkm7{U=|` z*Z6V!b{)5`zyA78c+LCQxo+O2rKK%KKlE~`@z7{=ZLV6etA34fgVmSMcT)`Anz_ZV zonfnaoyyvd`(c)*!mMpsqj`RbiEEE{3;!}zU(s=Vv0~rs)YfK)6boC&dAWbLrr;M% z`0&xWmBD$vroEZiPj*ykhyS=oWY3d0?xn-vzxa;nJ_Bgmc)eX`CGqq)yY6J{E2}%G zk3TMdKW0bgmEq%2#@+`@gO7aI7cv?~Uq_G63MjoHAEL1M{*VBK%@=pn9ngHaS9*JV zkCOhdDm_bdTk=sf6SqPZP@x3vu1c{nNa5v*1YX^C%K10_j#tX zQF1zsRRW#3j{jmC0-11;xyCOjxP4c) zPhr-8@L0n0;I(;E+x43>1qAFzKJ$6u;s)amO+3_S-&bs&4!gQERe0>`-}Ld4{kaOW z#)hq89)A-ytij>iBx;7Oc{-7-;0#T>qFH8U-e0$AgPj>J3r-5KyzI*c@4r=-RP;M? zsa8y*yXB$oBj=ThhVvvxkB(Y%x_P^P&k7eeENx`oJAO^JS8J}35U&tT^Q;vsF81iz zGsGfErAgDKGAd4Jpw^RHC=8n#wi@>X&uziyYr7~GXw_fl;|cro9_zAj1Ml$`vI^}6 zE|@=i_H5Ha@}n0}JtHre8Ce{f)wX^6bI8A6kfhXu5EMWzd+L7X#@f<(FpwSJF+z7a+FPT`OSZ$l%|+2M77ds5&~DeMQ$L+f8f(3K z^yra#c3OwPnYG9vm(#eV64^I7>yaM{1}I7!sHdQEmid;w`t(Uy)1_Tsy@}8c`fwqt zI(=pyzl*@&6#K`IQofQH0;EW4=561-nC(>aA%ExY-S6&aDIPbX%;zqp%BN{$_?=75 zN2;Op$WjeocB^fe%17k!YrFKxnWIj+E0v(zj~v@PkptLw=%Fy{!%h90TeOEsRyj8(Ais}L22H+xgLuC;p{j2C_+hJWLGO+&(t(!Yugq*7;<9F#y4+b znZUcl`F-Z~N(=+s!=OGra2pkCBZY@6?$xBt&t^dLi$~Xkoy7^Lz&SIVX7Ah_*PyTroM5EijLmpC4b0ngVu~FR6Mr&;olxB z!>%p8;Z1z^aXBA9mWD@Lk`&IKs8FjL?_QzQ;qiNIwF+*_V3%X#X(gVp4QcY~O+rgfbTLnfrCf9hX%^S${l=+i( zw$+)pP+>L$CMu15BQ&u3_U+p*kl)UXb%~AAD$dL$@RwPq@5r7O@)YN?Ny-pzy@d9- zPr;Kfoi`S^RfO!w9ksIhXGl6UfDXl9eL3z6#cEsd;f>FOx@_LvJ-)%X=lm;tK8K>? zq(j&4|5)q0Z$R~xD6Qec=O$l>4~nINvLN~FF6WvL7COmpy{26)U)Nh0G}4l%Of zVRl9H^OC$4);qsmH`P>CH0`F#PoWT1Pt+v)l4G{Idvc3ZY6X;hOo5e>UVCM)9@Pdh zE9-C)ZNqHgnkw>xD~C9L++h3ap-EN6t9e)f*+3$j8EqTw)_!0`ah6YM>1tsn-LieY zVg%+)x9q6>wF3rBbY+Svg&X&6XkYVTRVzh6cjdy!3Ss=rRQT7fNv@6d%{J38YdXxa zhcRiE$|pOzy`$0Dg&)YKaDC_)H~P=N{M&#?mA#YsAI~BM#ZRj5T(tovk#mwSOpfrm zd2D2$x4y{-^XDbqI(72p*=x9-VJqt3B`mrmP#G8ORAnd59Lj3FON63Sms?54qQGCx z5dVRaF%51uWK)?{#m7M;0FI8Z@n!KWIZC%qXoew9=y|is^#r@(RwBa&d=4IL+oVa8 zzC1}(Sq4+5?sPkFAY}bZJxajNcAm_?W#~0CST7*UnD_j4qi|KCfe-=OJtplu&%G5q zxP0cJQ-w({Rn_p2QxN-d7DLQ#em$0IGovf!dzATl)AWuw0rDyW)m8hB8a3Z-4y}H4 zk|E`7Nzki5>@^A%tP_I!O+*v!Jtw;+)WkICu8xN_YpZlMxr*-V@Hb|SHE=xp^Y#^g zv#+HiSF9AKh-dcJ2&fS7mYNmqFR^+KUL8EOMj~oI%)1lU-=*51&2p3d<36t!~5>FARv*w1khGsDK!=Hm=NBKr3 zY^)8A*C5Kyrp4!{>qsvHf2ch)Q9F9z%pY}aLn+izc`H&M8JvV$GRyEfXK2!33B`NkT>(4GBv6#e{@ z?t(H`m9bW&k2pklZVl^HQz!eo@>#85sR(qAb zxOdHW&V?le%CVfH3_OsHxsTOEquU=I)z(;?I(2FUx}SvV&<5R8>Nj%SA3f^rsn`yz zYr?ltOQzZ{D69GSRu@d*hX?aDjgF+WrI!w2{65L@9Y5axWopl(XH)iW8ZS0BrqO|dmGAKy%qnU-Bgub;JK$?2;3`NPIm^7HCF~%$J$@+Afpb-eCyI1I*Y& zPnMi?otZOdR#Yk+b06u)kR+N$-SQkvp@hTi*C}pz_;X}k&hrc`#>~XE3&@h)J$mi4 zpS|@%`uc0_Q5kDQdNPew4+LLZ0#!t;hP(a_N}U2mozcU)8Z26La>S~K7g4|ZV&e<$ zp4!H>e+RNu*_$8a*j};EgFtH_SQ-?Yt9bkd4>zitJdqv=4SX7tON-0+o$j<_#(X?o zo)NLmr>3TA)!YkulV~b^Sd~ltZs<@wpzK`Yu{m5~S3m<@sA4dcXORd`b94CWaJi`I z=SHr##9u$K-guPV=*!xR37mO+l;i@{PzcJp*kY!{W?9E#SNve`W$cLEtB0bllWJ=0 z?P0n&ngaOxFYVS=F^I02?y87sOzxIjF6<;I1jr;D4%?;IiOQ{gNg0aOL$%n8Uwetf zGMP6i1B?3dWKF*vT%3egHgL^dP9$oYa901qjM%1fMx=QaYOYZML%j4B!J zgd{ZcipAcYZTt3kIVWeZIsw891mL0tdV_R>7Sn1OlNIuwhHu-Kq#}CECOm^K))#ysI$!@|oy|X*-2bg3L zSS^0?b=UaRw6@_{{8ToqB$M zSBdukK9pqgh3hfO}sPu5MiLv zxN)0_V6zgF(|4)g-;4PE_SUUirQJnI5F35|^5y4Q5hLvNMdl@QFL^f)#P^%)YevE_ zcf0c##p+*`Pf?CAadG#z5{-Vr5uQc}H?*mK)`_DwRhRSLD} z_=YH929x9L4}-AaIAI%SOQbZojuL*?vh zRcRI@$XXDk>_mT!{rmSz6f6RKSl?>;RoH9?xzcn@2B=G;C{jd_=9o z)#MAA`_Yncgz4L%BE|fOJqK3IJjuB|d+uC6jK}!#<0I*tAH3+-hAdp)-o1ZVb@la6 z*!!VR+B6PWnSZR`V#1(X2m0()wZwkEOmhMDS!D#BWd`=kfi1rHA{EkkI7eD^1Sm

_Jfs^-Jj?6v@>MWp+N$=s+jrVCni;|1(6VU>+_Dp})7*Szg)7UmS_>D{;g5B4jnA0jIZrKW=yuPfq%inHCGp3`%I1~9a-;N zR}BLsr$wx@s3K;Jwn_keVtlF~0HZ^7quP{G^?3zwivO>E9xyFSTTAO9Vp>bKe{g&^ zmu&p-a&}rMwBON;mr?um^!1BjSr5M%(l8?#S_5&u=@kl_GI{iisCEh>cJ|hc)z{Y_ z@a?z1axRu)7Q@{88z!4Ne^_R3l35nVoE-6j0&WZLCq(C#B47!_xpe z>Y7e1*HNXv+hwyNXlzgy!&c#K%{1@_@IYcuCLijV(4gZa*PikG$=`n+x%~a_%k~>I zhQOUHpGr8ND)$Z)AsWcu6+X=Jk9C+7i3W^`fq3MrVm_QpuN)>d&zybZS+n-Zu8^mW z%^$(+8g*9-I!Ltd(q#o&gg2v2iM^NMg6l_F^F)kV`;V{JE;mBXG21}q>}qE zhDsgKqsMRP1XeMhdp8bdY$LC4_C=rFeqq>!3l~P1-S|>q^Ki@L-j#zuvgnsVj*v%7bV(H02`;QMrz_u_z(L?5UECL7*xuV}-J&YK_GFv|| zuIxDK2-N}yg)0^uQ8(!cmW-2|;CY9z#d45|Tm+q$6F?oyc=@G;T|Y&d@7x61skDg# za{6P$P2*=&scUFtNC?`ZWy=u!!vi=J2?)xmNC=Pr7zPnIpHq|Ll~NGT#KWb({`%`m zlMfXR%kiApoOtnB#|AInDz#iB1`>_Rm=rTl3X84-i#G-fG)7gXsJ`UJ; z8$^PK_U&6l?jcCP%bWb9Rpt*nEae@-7DTMg;Jg-N3W8zIAYP|Nrq9%LUb{A^4|L$mt(qr_mpc8Pzz$ve^`gQBATRPU7Z%;Bsr zhkF>E+ts=Db`0`mf2oj_>F& zKl9+%-2H|P;lph*Q$K6fWjR!(t;={t_$XO#`UTx&|9~{q;AlfuJ$guG^Kd8vyD)ib zG#f}`ho*F0O5+K^e`G@|jI`kQc_2p#NpHx$|LUu+%4r$v`TF%s%d%YJ0}i$t;FC12 zHRR8`%-;UaiGaf{@oQq=P33KAP2fOJH#WZ9MRqmJbP>jLMEn}L>$LDSOC5N;#mIiv z5Bj3Pf~y36k+Fw+_U!3TsaRT+{VM3FBp}_=5<4o2eIVX{SK0L9TTFk~OUn}4s}X8T z7;758XYbxH&_wvc)uLu(p>gE7yK|>#KE#vJ&+r!aPU>reNs9OVBLJo-U{Zg+jaz#@ zFpIqB+(6SD{O#j*)6bka<4=P--UGMvP&_rZHdwp%<6Ac^O~sV&>)eX+PbCL4zW%Ru zex^~-Pu&|>(6yvNaZc2{vs2v#BrU6psmZn%U|G}XjwjIP{FMDYMD8oc$M%c(HEr9r z^@iH$r=?{IgF_sjicJ)b5N5`}?I0{G$5TmRP`-AKH<;o^i-nWqW0uplsGpwq&rv3s zmwQ>_a*vWd5hfhE?*&cTI0d_gnJ0cD(Q4Oe{JEZ}#){9C#1J6&3^b87EezefA(%4@ z!}*P5XwEsY&QA8n6?d>nPCH#)4|%`_t<*z1oKv~KkFJW`ARaE2KD-BU;AqcuH@qjiJbMM~tqKqrHG=KaKe2VqE2i04~=I+OARX0^ae@+VVgfT4^=i&J! zYce-rftUe3uo8iB^z|clp1hSI+0z5~q?0&8YXr`f&`HDPp86Z=!OXg)!g!%Kns z!6^a6?3N^Hj*hnVB_M#6-V_nvs#Plk%F$EVSndN&D%ZMX$Ib0YatcJSLtX=WcYzY4 zk~c(AayxGCB=iQ1NXnkG?1NS`Ft;t~AEdlO(g#C1$^|rQ)=WM~vR7~*8*RFE?dp#U zD#WlHRIRYDAv-wd^u%A*=YH||=XVJRB(Dcyv`I1&WjGvPi0Vz?Q{~Dapo4R5-mKYd zu63A8jbjeI(}jdb0(^n+MV|I1x+1ql5v&r0N07{ngytu^54_jFtV^AzlRp>q7biFF zd5?tsc|vx}7M1P#q~DV(2Kw*aIS7JD{IL`U!OLg2Xx%zY91OitY(S-^Sv*Q{I(Y08 zZ;#x(c{A+u&K z(Kx}5>8tZeO?sTz^vn@dBvFugl4o&9F$lSc=Gmd_*7;;ZNm?4i{}sc5UOk`Gd~+B; z^#NM&MQqC;o{TWL07Lm58+Nv7mh?-J>#*PH;WAA#iDtj6xQdX5O`6Zg_(~vig_A>( zf8h!YCNK<`EVS-1EvHD26GwkCXFJK&JwSS$H2OJZx5lh|cKQi*3s=~S`RvPLkWE>p zGq@lv*2Tqd)~T)XgI~VxwE}w?3X#04<|@*|BS)4mbbP%%hub|wSK$^dT3nF@<)(lS zH~nT7WrifMpvuzVu_YK#NQP|jTO@-+08J87@Iaz;`%Lj0E(B$WWy*E*;)#jyKg+}} zi#NZWn62qntFiqL?Bu6Q1GQ`*-giCs-^iXxXkk-LxOlMW!TVM&_9V^y^$sjCqCgEkuhT7Od4G_HQtq z)PB!##!TV6mx+=#O70GW8?yY~ZzVBRE33d>(bvbUdp?~_B<~)Wo*iW$m_zFUJ6iD@ zPoF+LvLcdn37*ZPYFFd9vmRt4O&p3tls*F&#f*SSG}arYhE;?fxTf}4D)(w7f)5rso zNPvQkN1VE%;b(Vy7{W0N-(g423g`4B+&X<=6k25)m@?B22Q0ISvLjMlyJ>sscy-wq zO=W};H6WTjI=i(GoI{v`?S(fK2EjcH#)?HtaL-aVz15htzp_$sLGgVPzd?E1J^P0gqEo zk}ZQQRsSJF?vc@3j-{3uEUIlEq7JbBh)wU7ONa=0`v*>70J{tVy$E~CP=rGf9UQPm z2B#)8u|}(&zTMPlzi5({eK@Z0Mv6PSigaa)lOZEWS0|7^lZe|ycCVmqxSxcb9R|q- zUM3(WDin%#PJ&igR!EtyeV?sNDO|W!D(5CNvyEbUh zz<>ho&Vv_ij*9ij7!><)34E>$Y(DY2`uF&Fi-P<{kBeAYGqM=d!>$6fsm0N(&}m?9 zPC@WsmInlads#{J4J0bUy9VKQj%8fXYGta&o|thC@Cb8 zo+t_=h4e`|L5$oeVBj!EbA5}WKRua_tuP1nGduj*z^uzlcaVY2Aqre7UYN|*uM`C> z5&{8x?0_%IwqZ!wN?^;%eCMOIw=n{T<{%zAbo8hdQiChNRzs2tMPiGE;ev3oBvJ^0 zw#+2>h`#OwHQX5`25HrIdfLqu9i9N(I;7ThbwQi<#8&9QW`LDncj~sJmzh&qgf%t+BYS zizK7dkOfq@J`En^N#=7Iy~ypm^=$q4)&s1LgNkQ@>}auE6w1#hE{aNG=28)jhA&C9 zgW^Tl;Sn@Y#9Wa}$Sa9;s8~t6LeH|7De;Bj(2HVi65Q1edz|pK*xtNd5VU?4bCWFT z%j{wUX)S@HMD3Dz%NaSKdrj%J)>1UxBF44h=7cTvuSuYVycl|zRDNtW81u&3EVw+W!H zx*o%MN$4VZ|LD&7XdXDgD|jPw^Fs#@nkjR6LIPOB7xLs2Nlq`NU5l_m4_E?W&dkk6 ztbRO>MZluH7;d2G(5Lw0%o#I6IIQmkQs0C?r6X3MeiauKD+T% z6)@rmD}(3W85eK>jqAynM+CZ4wQa*#wzDklRJ?Oh${+6v8>u zz8m6^`x!Lf*7R0_$ak|aoA|RG!jV^*`OEzviMbg}#WROIE-p2W_ zH}vn->kr}qb7I9%XfI=0UKW9e*H#SIbn$;zoa^K8Yu&M*>Tk8?R%(Ien|O z8caBZn4(|&nqh2BSdPi9wcekET~4v-K*r3L%X(ZRdt$wKB~-X zOYSHjVo3MuQ~Mq(|P6RDF+ zJBwL@IB^nu&B+Kz{5WchU#50ap^K1Tk}?s_npP3hBFo@jrA&vysc&D-DW0OEvtM-v z6WQVO^f;K&rtyhwzx?tfi(iQGN+E^|0NJaNlY~rhTVI*ox>+-wilaC`$>b5>3IXon z;b1a^YywKACXE~4Skpx(8TW9SjT;%b(9d9iJPnLx^H!}6sS;(HDz(#6yI2yVSbd?M zVSwk3+ul8%EXe{>AvRYJxv>Vu0kIxG=oOLN_1vh~w_}YoAksWHJZ&bxoQE&?#$%&> zR1(D4+UJRuH2w0OS5XbL9WZRzu$-RmwGtqf^L!o?u={ll9U+?vIW-~#Tm;YvLE=SO~cJCV3_`mTy&iIsV6C|S74qAWtU;5 zL*%kHRg~DEm&q|~Q3`i-5x+WuQ-y&cX?%8lR#8@1l8Vd6HRN)FvAFl6FGbN*yFX_p zr%B$2>+{b1&{CFEa*v5uT|7SOducgcgoiNQd#Dzq*neS+WBc~)%?axT2}`8KSzK89 zjpBF)H|;O>nPn5aIecaI0}&Gk=xXZr(e+$`SsY#XN3OPTMUo4FivkoE0bYV&h7Ae$ zY+okloxWZH>u1cr(zn0xkgF9eMow$}VX?1tU|WK-2$9bwV8L!#6H*Q^40?9@dV}uK zWOEEz_W_WOf+{7h5LjQ*VPx%?g-e$%Ef$wd`~Pc_8550KqzyHwK#J6Uuxo74)hN4H0loq=^8;9_#!GLxnG6^=s z>@Pzh?ZRM{ABeBTAf;j8*Ju5H{dy+ZBnt{VE}?_SqPQ^WL#E>D*&P3>q(R4*uxzx3 zk(@GR*88FnW!a%+6cG}KXjAc{b2B!fT>WDW9xHA9SDy4(!S`yVsga(5|5aa;A1gXg07K5gy04qASAW|wwnGr z>NcxiL=+Z=0@`T$^u#94OB9KTNG_h#RD0!DNHR|Vv`~g*HEcL;cGQeWz)ZMz)T6@o zHw=wGF<3b!+KA>Y!Z2DymjeW4Vm-eoi9k6Q$WKh}ZXkV?Gh{CjE9L4oq>;?cIl?;A zX3hi|xhgRoIeZC2NUdCk=s59uWr2V!_^YH1)eS|DOMZzkniKpfhE`hUKg48VZqWS+4>RDuK;4DnC4_?I@N_eCkUpZKbD=`2;Ji|>h+PqHE+@9`ltTc9P zbA}TfcIfcNqu_Usl6MG#F9|V&Kp=Y=r0Nw4bh1}Q6_2pa489;4IpV83mCHJ9ZgGIK z1ATLaaR4b2^sxA$Z0Jl(r{uFr?1<0G(WlJkc-laTndZBY3=+0yntc&g%NXDQM4U5J z3N|$2)mLVZ5;lMf>gHIwCF9xs~GD>auwV zASQ#NQ^=zF!Cg*sLobmkI(7gIO|N$d$by!~mx{`ss-qOrOyBAV1qfLvk^&N01?U{z zRXIiDV0zMtd{lMJd+V(25??K&$0D(Jsygt7v!NBr2!6e6vpN56-efY!HqFOoriexz z52S`6D|rCD>PygBfOnq_$SD)8F?eY%e^;+M(>TCLP#7^v&N_q9whuY5Mxu8oJ+WRI zPq5%tS)u;Uwbl0aQMh>Daa$USih#`DLE5S4h}sCHnm1-ZG^!M;^}~?y;XZ7c^BjE) zg?mhGjRU7aPoz2E$KEgJ_^;^_-&Gfhujs6}tCy7F8xYKvab*b2@A{9lOcqJuY=~iL zIRR{$MdDi&?@=b~+56;?u+0lFAZduG{oqtz%E+uS-G;7q2Vw4WcsMPYZSKqRS;2M- z!^#HBd%YnQPESI$oPl6p{Nr3oF6LgGKZIuz5ub&QY-Qxj-e5VMwY48PL+h2rYEut> zcdCoV+7}#EcklHf-HY?_S~@;=KA@y8z@pwGL}`aCi)RpsoN6-A2N34Nfm%)@a$isd z)6)9tNCp7gd5$QU(t;eC_E)lb2&($C;Roi>T`rPnSn5a|1s5WM0t6}!ivlWQU+#@H zBCEZzcJm{_gY$N=RhX{arU()u1cGOjNql5{?q#U!EK+3_!u)jWwh9{lGT2WUz zRM3~tsvf)2TuiMIBw@>rLw^@2AsGgVVC~3;34FvbZRF6!8nM%ItlfYrbbHS$lESUR z`JgN*?-$L55pgW5eW!W*{>udz5k(|hXB|26jnIEWVw6J^AXd12k(8DA?Qs1x?^AY) z8(%0R20{tNz_uE$kqty>Mc8nb!hrB^%rvZ;K7O_Q{naT54a!9~$I)HnmVdhA>QsN;4B@IyInXqv|a|?CqlU z-Raus4Pg8o6^kgadDuwRe@(m^n<^=k0RUXnz57OL+OGKqoJ&X5HHA7{zp*wWE*k`P z8cyIXo(OjECcWW|L}n*RgOYHCSvJT?2m=F@qlF~SL}4Mk%>#~CQlxI?M$IL^f6nuV z8hadIjMj?+n|mPd{-Z`c5RD->j`%~C&@{i8@ZBF9w{p+VU^eW6G>5UltqFSB6z%CD zlcvnE;igY2*qxV9ryk246}sW|l0YZ9Qa^nkSBZ-lh|>yiS5X8*6_dKRrQ)jS>G}z6 z`-$CK-4}uQ!1f6448~ov_)Plu8ld7AvJH2XM6H3KvEf%z>_0j|Hy*<393%08Lga;z z`ob#vJ)NHG2+<=Xuk?VK0^4k@7U22vMdauIg5rZBC(S2p-J<$u$x7gf{M!R zrR3j55drAvn^P!52S=bV!;N`G6(LM7$9_iGf2{}~BjgQTgbzppgG}5q;BjcwN>S0D z9M?d^Djp{S)Fg-#NQ`F8+T5pP^u^g{eiB+#_)3TJqRTAS1JvD5KT8kp^bW^ih#*nk z5c0jmR*IaA;MRrJGEHDyxt0WgOp;k>{@m%wC~U6K<%vb>O>|IEH$SErnE1hS<|1=C zDeW~R3nX-Y`!;P3lq8%SR{K!-UQ;*m zu`nkpj0`}gQ0B7bMR(t@VM9rzNOfn~qz~MS97a#3zpu{~y(i@ZQRy=*6ZUr1yxw89 zB0x%frok93`oUFQGzmd+%$tzu>!g%H8Qm@QXdpCLtrmt$K##v^GzaBT*;{&O2xMly z?U9U^Pd%Wn;8a2o!)nqiDSb`Kz`GJUiLy*{?8M9%dA6j^B+w!xOvF6k<3voCcl%hn z%Eo73$qq>$bX?SPlmOfr^~W_2=Zy`*uO{DlOD=k%*weyREv&+be#$SYt4{+t0vsfggqgcl7>=(C zB?lYX!PDGafFh3~lVg>$rooK``3|GUALZ5zFX)|Jfx9(LXhZez`zB>vRgrKTr0>FQ zpP);g8kQrRAYSg}Rx2Xbs{#7lCMzO%mjE=1xqQs^w8D@m6@duv2omfHf2V-wBB6w- z?%*j7v5Lpm!l>6hlSYb#%0etuXv6sKykm+dr;+qdb;BkhJ)bMGb7gUQlg~bzDty+n zQLZRPRdrl*r?{a`ky=n_I!Rxb3?WWw2mgfO*Tc*wOW6>}1fkrmL{%LQ+ARFjTrwe$ zI@6?h5KB=;MAVP8Hc5tA;G%Y4uCA<2gLyC(={tVg;rKAir43i-2>}PeCbSP(hyWsl z3wRNuu$3pg_4Ajp&K#NmbRFSU9v&F5Q|y6$Fm!n4L#o0dO9`NOEGJHoXj_?!b_Tl8 z!vkiumk!o?4rx?|Fn^18nTn0M4=R`LJ(lE!rI>l@;er03BON`EddTW06GU#*wZrB_ z%(-|5NQ#NVkJ}$_(Y5z54vv5dYj4XnlYP8;o1>{h;sH`WJJz$XmUVX~9YJ2ssak`p7` zI^PVyJed&RLMh|U9kPkHt%$e#;bqc8n%Xwrtc^5C@??r%;O1k55seesFlmXk_XQvb-Gy69A;Xw43&=kaA3YRlK`NAMC&Yr;DyhIOu4wODFi7J+hU-z1N ze&R1*n+9H9lludYY8pOxy>tkl0S}jq@a#0Y*O}0LcfqkD!%)t$x4(Up+hW_SX`=O2 z_o_;8O3t8*YzSW|Qznn96s*kP3fj_^3WKRHPYL!dHPY4^e76wfp8~*>+DD!yGj%Rd zI3202>3RmM;m3CJ1p;BNwYaxJ&OL>;so2@M*qVxux>b5qk9_au%?a)M-FH)jwBKup zo3{DmMHNHLQvT{uAbEbA zofHtWr{=Mvs-~UfX-gB0&ObDGA4bS@0R{!S>WJzEoFIE%%A!?Yg2ZQo9zs9>6#=J% zM}PJIw0GuTRozzp-*fgk zXYc*x?~`@uua8T}tE=?hixypNYv{)``Bs-NK5^p2T^xd|=+F8A^u^H0=dcGISW3j{ z^Z4;A@t2kjNp$6+5&X!C4=8#UEI1DzYiXrblyGd(JXS)2Z`n%H0K<*thrueDj9MO1 zG*{$WVcsk0tAXC7n&N;$eVs(Y;+1C1*q`zE5q+zBEmm^r5_zhu@&5RC#&joB!0}C! zc(92XI$!??Vx|LME&aIrqQLEh5#Y^83KsHTxXXpDO!1^M5eHkfAB}GH)~V07?WX6G z;5(~B-M70(ihxZUKAn0RGD095$x63kKQSBN7eGVB>*_DawIv+s-s|CkCc(olRtyYU ze0M$Ya|d+@qSB3FP$)nmnbay~UCV%(keW(MKu#%jeW_5^IRDyXCi& zIP<+2a4~?U9oKdLwPUe?4cc=|$V042U$6i0s!?bU7~lOA<7hy{4tQQ6uukecmUU5p zQV15Q76I&K&p?{oWfdPMsDvgty77b(l4xO7L-!~F((D3d%%T`5r6$`!5)1)RX83J8 zKE0ql`qOqEApskTni^Sc00|b2mAVm+ZaL_GK*F$r%1F($qP5$z13l?1zs-njU6+HE z3u;QpLG$Q_B_xpny{(|iELNEqg`R~l!Zh2E@Rh&3m(Eo-@I0NgAss_mn^sT9yy_M2 zF=)+>s;YTIh`MFy(tw@rTDT0_Hbm4|(kK*UeSjcBTl&Ltz8*p@ zi9zSk!0vbJ-FqChrG(In>xVW8K67ZRZ+z6;%e2e7CLTGDJ}E&AmnXb>g0I*9%F7F! z7#*yYj8*)M<>zS(+UYEo(fZm+*pjTaCpuBO*GYIdUq7Aqo^h-5^Sd>WyOmQfK=q^A zzzB{Ua;3i${_is(TbI&2m<%uOet4apIdRMbqBhShh~R4@{+OFiCpW6Ho71M(lyMXp z6UZ`=1ajLp$AIK}brqR}$STRZc$B*&djLx#)wjZmEur74rO!Ca%5l1Bv}mT&jhV`{ zNyECu2yNRTE0X2T9CpGi_Vz@AXkqkyN8@SKfmA2FQql<{k2vK{Gqb@r^K1^E3Mh~l z2g6mV1|pwhcF?9%rz|G*A>gyk5UE4+ZjbSNcx_#s9aK`oy`0J$>YCWtw1ct7>G{q1 z&!sdyan8Hn+lL27vV0qpo!*|da ze)wpYk<$_gn4*L&#fDG;oa6;7(+L5xX3&CY+SJ!Gi3vpPRD4ngpe&c(C&7%;l12j1 zBs*vDl>vB+L2z8|x@oz+m2sgXp=lYEm_hC3oDr=?Aj9~|02a!ge(i?3A0sJ6!uou_ zBG+G2TfsOqVcIOLTN{OMW$0nz5blXHsrLQhPcIr2VjPK}=?ukuGy*hPsq$;cs9$q% z85$rZAcrCBYTXgRNlfq!Y{&cXl@8KRrs(NalOBPXRGgDQQp^sr;t9aS96n zOj;*D-{M9kO2fhOL10{q!LFY;Wjq zVD2ptGj&$I9a_vu&r1-H3v*uQZF%zk+`)X^;^tUl=34xe_38=)uhr(m!OozYkfF||uUbHFmEFAlu=69tu^kp=I&*JI zf~TRrGuzc3VVA)gxs)krS6k3Q7FWWeKaRZ;*QA+@Wsv*oz?O`O$*0@8n5K~dE&lyQ zO*89F*QmJGA>-WA?H%5~c9je+LK6$F7NgH{g*yV1A;ONbuRDP}gM#i&XmGiAHF;)# zCIjBL^!>JRCV-T<*qxsTUqskbX~7qSW9aGto1*@cf;ak;md2Bb>2`f3;W~xE?YwLU z-Nwe{8hY`M`2FfY8}W%UwH($>N7&0Wr<%Ow$Lk;dfd_6yyDGct9%%Bb?@40_UtGC| zS?@%~sbOU(V<%I7uix|@Lp_4v?A#|E3yLXpyLeOe)<+jO;{(HNq$d@WjPWSJ$|L2R zL`m0%lz|@!X3JeF8sAL;f~j`P8xmu;RF*odA`*FN2Xe)#zXYeWP0X#hf; z1s7OT<&kqaNy<6@PT^qXDHRvg;hdCmuQpRm7IK(BQ^BI#ShAg{vBDgiy(lFLqRH-d z+A1Ynou)gc&ObM*HbY>B9v0)Ii_Z5u!WC@1`LuC7PXQjys>x*TI~e%z@u0tA)3Y^d z42v?otv6M1G}%Nd_VzuJCtc~s`NR+ry{;eOpwKwc`M)Jzr^b7{y zoS$0ZJ!s*rN=rLCiRUl!t`c?xWJug8$r3AJJ^h(GmKf8I{?=R) z0w1Sl@NDC5tTeaW1lnd1(DU&L!XF%D6zxy4un|xPP;1=@bpZ?VFmr8FIKOxlq@NOA(Y+wNF!!?7uQ#>*#si@h zF_+2|1dP~cD=lv4GRo^yS&G!I>)1o1;rufv+nwT@SkvclE3#Y}gF%TT5a>!wJAtM{ zp9lN@)YAEv)eVWuuA2{*LumD2=}&mzQA0bwVasFOJiNt*!eG3VDJz-MzO=wxD~&5( zTr(Y?E%NzHzDPT%^8Qi&r6BgFZDhF=3uLWhT=(Sus+d|zzbe*78JD=Nh%?CZ{*juJ zQbQt*j16Q%Mc-F(OjeZbz z7PR)U5|onU$HVi?pgRDqGMx*%wLQ?QcFl3Bw^L^d57mmZBtsfHu;Hn@rm-`kgMP&v za5IJ=LP>zpG}aD~Ek7+>CTVeA^so4=c-G7a9;c*Iu319GKCg;8(b zvIgL!(wf|WQciHF)lp&Y$4yj}N zL~?IiYSGL2z8a1~l}dwWqZ(IAl9MM-S~_*t#XJBP>$B1dTXv)j<(JH6=B4Ak1adJM zKR#(~@w1uuy$DYINKXR`kzuf$KOBdr4XycB=1)jgFknNQG2}9&aZb zCGgyQ@$caLBqA$L2c-@tKTY)UIsNk5w)*RPdcJrSQ_R#e^Z@59=EHP34Lu1E*&A+k zB%o04aP+ZNvM|cOv2^Vh+>XDg+d@Zt5{LdM7iCAn$Y3n6nR%{`MO2Dj8fd2jQ4Da- zru2qTJ(Y;<9}-gY{37T)|8y&kmL4^o1oed%KEk4gyl~Wj`Hi1Fr^7^JFZ|yB?T~-G e89#fs8eP0zI`e_sHD?d~l=o=gQCTBqFZ~xYbx=J3 diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 40fcfa1db..3afe0dd1b 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -354,7 +354,7 @@ def all_gather_matmul_hbm_buffer( fetch_block_k: Optional[int] = None, staged_a_layout: str = "k_contiguous", num_warps: Optional[int] = 8, - num_stages: Optional[int] = 3, + num_stages: Optional[int] = 2, num_fetch_stages: int = 1, first_stage_fetch_sms: Optional[int] = 256, trace: bool = False, From 02ea2b656ef99aadbbef1bb667cf07efe14ef3c2 Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Sat, 11 Apr 2026 00:40:42 -0500 Subject: [PATCH 50/60] Fix preamble FusedConfig() defaults and add shape-adaptive auto-config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0 bug fix: all_gather_matmul_hbm_buffer_preamble() used bare FusedConfig() which defaulted to 256x64x64 (tuned for one_shot baseline). Callers pre-allocating workspace got wrong tile sizes — assertion crashes for M%256!=0 and severe perf degradation otherwise (same as old defaults). Changes: - Fix preamble to use _auto_config() instead of bare FusedConfig() - Change k_per_flag default from 16 to 8 (IRIS-0018: +4.3% faster, avoids kpf=16 validation failures on 2/8 ranks at M=262144) - Add _auto_config() with champion configs from K-021 sweep (1076 trials) and shape-heuristic fallback for unseen shapes - Make all tuning params Optional[int]=None with auto-selection cascade: champion config → heuristic → safety defaults - Add R2 code comments documenting kpf=8 rationale Resolves: K-017 R1 (P0 preamble bug), R2 (safety comment) Data: IRIS-0018 (934 trials), K-021 (1076 trials, 7 champion shapes) Co-Authored-By: Claude Opus 4.6 (1M context) --- iris/ops/all_gather_matmul_hbm_buffer.py | 171 +++++++++++++++++++++-- 1 file changed, 160 insertions(+), 11 deletions(-) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 3afe0dd1b..56c3f4e27 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -21,6 +21,119 @@ from .workspace import FusedWorkspace +# ────────────────────────────────────────────────────────────────────── +# Auto-config: shape-adaptive parameter selection for HBM buffer kernel +# Source: K-021 sweep data (1076+ trials, 7 verified champion shapes) +# ────────────────────────────────────────────────────────────────────── + +# Verified champion configs from IRIS-0018/0019 sweeps + optimize-loop iter3. +# Key: (M, N, K) -> dict of kernel params that beat PyTorch. +_CHAMPION_CONFIGS = { + (262144, 8192, 8192): dict( + bm=256, bn=256, bk=64, gm=24, kpf=64, fs=52, nfs=128, fsf=304, + ), + (131072, 16384, 16384): dict( + bm=256, bn=256, bk=64, gm=24, kpf=32, fs=4, nfs=64, fsf=52, + ), + (147456, 28672, 4096): dict( + bm=256, bn=256, bk=64, gm=24, kpf=16, fs=59, nfs=36, fsf=52, + ), + (229376, 28672, 4096): dict( + bm=256, bn=256, bk=64, gm=24, kpf=16, fs=4, nfs=56, fsf=52, + ), + (327680, 28672, 4096): dict( + bm=256, bn=256, bk=64, gm=24, kpf=16, fs=4, nfs=32, fsf=52, + ), + (8192, 8192, 262144): dict( + bm=128, bn=256, bk=64, gm=8, kpf=32, fs=4, nfs=8, fsf=52, + ), + (16384, 16384, 131072): dict( + bm=128, bn=256, bk=64, gm=16, kpf=16, fs=16, nfs=8, fsf=52, + ), +} + + +def _auto_config(M: int, N: int, K: int, world_size: int = 8): + """ + Select optimal HBM buffer kernel parameters for a given shape. + + Returns (FusedConfig, k_per_flag, num_fetch_sms, num_fetch_stages, + first_stage_fetch_sms) — ready to pass to the kernel. + + Priority order: + 1. Exact match in champion configs (verified 1.12-1.44x vs PyTorch) + 2. Shape-heuristic derivation from 1076-trial sweep principles + + Heuristics (from K-021 sweep analysis): + - k_per_flag is the #1 knob (52% of perf range). Maximize it. + - bm=256 for M%256==0 and M>=8K; bm=128 otherwise + - bn=256 always (bn=128 is 15-35% worse) + - bk=64 always (bk=128 exceeds 64KB LDS on MI300X) + - num_stages=2 always (num_stages=3 crashes — 98KB LDS needed) + - num_warps=8 always (fewer warps = 22% worse) + - group_size_m: 1 for small M, 24 for large M (L2 locality) + """ + key = (M, N, K) + if key in _CHAMPION_CONFIGS: + c = _CHAMPION_CONFIGS[key] + # Validate kpf for this world_size + num_k_blocks = K // c["bk"] + kpf = c["kpf"] + while num_k_blocks % kpf != 0 and kpf > 1: + kpf //= 2 + config = FusedConfig( + block_size_m=c["bm"], block_size_n=c["bn"], + block_size_k=c["bk"], group_size_m=c["gm"], + ) + return config, kpf, c["fs"], c["nfs"], c["fsf"] + + # Derive from heuristics + num_k_blocks = K // 64 + + # Block sizes + bm = 256 if (M % 256 == 0 and M >= 8192) else 128 + num_m_tiles = M // bm + + # k_per_flag: maximize for throughput + if num_k_blocks >= 512: + kpf = 64 + elif num_k_blocks >= 128: + kpf = 16 + elif num_k_blocks >= 64: + kpf = 8 + else: + kpf = 4 + while num_k_blocks % kpf != 0 and kpf > 1: + kpf //= 2 + + # num_fetch_sms: scale with M-tiles (more tiles → more fetchers) + if num_m_tiles <= 8: + fs = 4 + elif num_m_tiles <= 32: + fs = 16 + elif num_m_tiles <= 128: + fs = 32 + else: + fs = 52 + + # num_fetch_stages + if num_m_tiles >= 512: + nfs = 4 + elif num_m_tiles >= 64: + nfs = 2 + else: + nfs = 1 + + # group_size_m + gm = 24 if num_m_tiles >= 64 else (8 if num_m_tiles >= 16 else 1) + + config = FusedConfig( + block_size_m=bm, block_size_n=256, block_size_k=64, + group_size_m=gm, + ) + return config, kpf, fs, nfs, 64 + + @triton.jit def _hbm_buffer_all_gather_matmul_kernel( A_sharded, @@ -241,7 +354,7 @@ def all_gather_matmul_hbm_buffer_preamble( A_sharded: torch.Tensor, B: torch.Tensor, config: Optional[FusedConfig] = None, - k_per_flag: int = 16, + k_per_flag: Optional[int] = None, staged_a_layout: str = "k_contiguous", ) -> FusedWorkspace: """ @@ -251,13 +364,18 @@ def all_gather_matmul_hbm_buffer_preamble( staged_a_layout: "k_contiguous" (default, row-major (M,K)) or "m_contiguous" (col-major, stored as (K,M) transposed). """ - if config is None: - config = FusedConfig() - M, K_local = A_sharded.shape K, N = B.shape world_size = ctx.get_num_ranks() + if config is None: + auto_cfg, auto_kpf, _, _, _ = _auto_config(M, N, K, world_size) + config = auto_cfg + if k_per_flag is None: + k_per_flag = auto_kpf + if k_per_flag is None: + k_per_flag = 8 # Safety default; see K-021 best_configs.json for peak perf + assert world_size * K_local == K assert K_local % config.block_size_k == 0 assert K % config.block_size_k == 0 @@ -348,31 +466,62 @@ def all_gather_matmul_hbm_buffer( async_op: bool = False, config: Optional[FusedConfig] = None, workspace: Optional[FusedWorkspace] = None, - num_fetch_sms: Optional[int] = 32, - k_per_flag: int = 16, + num_fetch_sms: Optional[int] = None, + k_per_flag: Optional[int] = None, fetch_block_m: Optional[int] = None, fetch_block_k: Optional[int] = None, staged_a_layout: str = "k_contiguous", num_warps: Optional[int] = 8, num_stages: Optional[int] = 2, - num_fetch_stages: int = 1, - first_stage_fetch_sms: Optional[int] = 256, + num_fetch_stages: Optional[int] = None, + first_stage_fetch_sms: Optional[int] = None, trace: bool = False, ) -> FusedWorkspace: """ All-gather + matmul with dedicated fetcher/GEMM workgroups. + When ``config`` is None, uses ``_auto_config()`` to select shape-optimal + parameters from verified sweep data (K-021). This gives up to 1.44× + speedup over PyTorch on champion shapes without any manual tuning. + Args: staged_a_layout: Buffer layout for gathered A. "k_contiguous" — (M,K) row-major, K is fast dim. Matches NN convention. "m_contiguous" — (M,K) with M as fast dim. Matches TN convention (best for tritonblas). """ - if config is None: - config = FusedConfig() - M, K_local = A_sharded.shape K, N = B.shape world_size = ctx.get_num_ranks() + + if config is None: + # Shape-adaptive auto-config from K-021 sweep data + auto_cfg, auto_kpf, auto_fs, auto_nfs, auto_fsf = _auto_config( + M, N, K, world_size + ) + config = auto_cfg + if k_per_flag is None: + k_per_flag = auto_kpf + if num_fetch_sms is None: + num_fetch_sms = auto_fs + if num_fetch_stages is None: + num_fetch_stages = auto_nfs + if first_stage_fetch_sms is None: + first_stage_fetch_sms = auto_fsf + + # Apply defaults for any remaining None values (when config is explicit + # but some params are left at None). + # kpf=8 is the safety default: +4.3% vs kpf=16 on g6 (IRIS-0018, 934 trials) + # and avoids kpf=16 validation failures on 2/8 ranks at M=262144. + # For peak performance on known shapes, use best_configs.json from K-021. + if k_per_flag is None: + k_per_flag = 8 + if num_fetch_sms is None: + num_fetch_sms = 32 + if num_fetch_stages is None: + num_fetch_stages = 1 + if first_stage_fetch_sms is None: + first_stage_fetch_sms = 256 + rank = ctx.get_rank() assert world_size * K_local == K From 64a631f28b07767b6adb28f7b46696817e27c7af Mon Sep 17 00:00:00 2001 From: Ryan Swann Date: Sat, 11 Apr 2026 10:39:44 -0500 Subject: [PATCH 51/60] Fix collective ordering deadlock in fd_passing at ws<8 Replace dist.all_gather_object (which internally issues 2 NCCL all_gathers for size+data) with a fixed-size tensor all_gather in setup_fd_infrastructure(). The extra collectives from all_gather_object could interleave with data-plane all_gather_into_tensor calls on the same process group, causing a rank-asymmetric ordering deadlock at world_size < 8. New helper _allgather_paths_tensor() encodes AF_UNIX paths into a 256-byte uint8 tensor and uses a single dist.all_gather call, which is always order-compatible with the data plane. Co-Authored-By: Claude Opus 4.6 (1M context) --- iris/fd_passing.py | 61 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/iris/fd_passing.py b/iris/fd_passing.py index 4e8c13f44..e62239b11 100644 --- a/iris/fd_passing.py +++ b/iris/fd_passing.py @@ -140,6 +140,57 @@ def setup_fd_mesh(rank: int, world_size: int, all_paths: Dict[int, str]) -> Dict return conns +def _allgather_paths_tensor(my_path: str, num_ranks: int): + """ + Exchange socket paths across ranks using a fixed-size tensor all_gather. + + Uses ``dist.all_gather`` with a fixed-size int8 tensor instead of + ``dist.all_gather_object`` to avoid injecting extra NCCL collective + calls (``all_gather_object`` internally issues two NCCL all_gathers for + size+data). At ws<8 the additional collectives can interleave with + data-plane ``all_gather_into_tensor`` calls on the same process group, + causing a rank-asymmetric collective ordering deadlock. + + AF_UNIX paths are at most 108 bytes; we use a 256-byte buffer for safety. + """ + import torch + import torch.distributed as dist + + _PATH_BUF_LEN = 256 + path_bytes = my_path.encode("utf-8") + if len(path_bytes) >= _PATH_BUF_LEN: + raise ValueError( + f"Socket path too long ({len(path_bytes)} bytes, max {_PATH_BUF_LEN - 1}): {my_path}" + ) + + # Encode into a fixed-size uint8 tensor (CPU for gloo, GPU for nccl). + # uint8 matches the [0,255] byte range; NCCL supports it natively. + buf = torch.zeros(_PATH_BUF_LEN, dtype=torch.uint8) + for i, b in enumerate(path_bytes): + buf[i] = b + + backend = str(dist.get_backend()).lower() + if backend == "nccl" and torch.cuda.is_available(): + device = torch.device("cuda", torch.cuda.current_device()) + buf = buf.to(device) + # else: keep on CPU (gloo) + + gathered = [torch.zeros_like(buf) for _ in range(num_ranks)] + dist.all_gather(gathered, buf) + + all_paths = {} + for r in range(num_ranks): + raw = gathered[r].cpu().tolist() + # Find null terminator (first 0) + try: + end = raw.index(0) + except ValueError: + end = _PATH_BUF_LEN + all_paths[r] = bytes(raw[:end]).decode("utf-8") + + return all_paths + + def setup_fd_infrastructure(cur_rank: int, num_ranks: int): """ Setup FD passing infrastructure for multi-rank communication. @@ -156,15 +207,17 @@ def setup_fd_infrastructure(cur_rank: int, num_ranks: int): if num_ranks <= 1: return None - import torch.distributed as dist from iris._distributed_helpers import distributed_barrier # Setup socket mesh for FD passing prefix = "iris-dmabuf" my_path = make_rank_sock_path(prefix, cur_rank) - obj_list = [None for _ in range(num_ranks)] - dist.all_gather_object(obj_list, my_path) - all_paths = {r: obj_list[r] for r in range(num_ranks)} + + # Use tensor-based all_gather instead of all_gather_object to avoid + # injecting extra NCCL collectives that can deadlock with data-plane + # all_gather_into_tensor at ws<8 (see _allgather_paths_tensor docstring). + all_paths = _allgather_paths_tensor(my_path, num_ranks) + distributed_barrier() fd_conns = setup_fd_mesh(cur_rank, num_ranks, all_paths) distributed_barrier() From 7d3f4767041c60a1f2d5571e4573c9deca3f982b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 11 Apr 2026 15:53:07 +0000 Subject: [PATCH 52/60] Apply Ruff auto-fixes --- iris/fd_passing.py | 4 +- iris/ops/all_gather_matmul_hbm_buffer.py | 77 ++++++++++++++++++++---- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/iris/fd_passing.py b/iris/fd_passing.py index e62239b11..1f71290fa 100644 --- a/iris/fd_passing.py +++ b/iris/fd_passing.py @@ -159,9 +159,7 @@ def _allgather_paths_tensor(my_path: str, num_ranks: int): _PATH_BUF_LEN = 256 path_bytes = my_path.encode("utf-8") if len(path_bytes) >= _PATH_BUF_LEN: - raise ValueError( - f"Socket path too long ({len(path_bytes)} bytes, max {_PATH_BUF_LEN - 1}): {my_path}" - ) + raise ValueError(f"Socket path too long ({len(path_bytes)} bytes, max {_PATH_BUF_LEN - 1}): {my_path}") # Encode into a fixed-size uint8 tensor (CPU for gloo, GPU for nccl). # uint8 matches the [0,255] byte range; NCCL supports it natively. diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index 56c3f4e27..b8356539d 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -30,25 +30,74 @@ # Key: (M, N, K) -> dict of kernel params that beat PyTorch. _CHAMPION_CONFIGS = { (262144, 8192, 8192): dict( - bm=256, bn=256, bk=64, gm=24, kpf=64, fs=52, nfs=128, fsf=304, + bm=256, + bn=256, + bk=64, + gm=24, + kpf=64, + fs=52, + nfs=128, + fsf=304, ), (131072, 16384, 16384): dict( - bm=256, bn=256, bk=64, gm=24, kpf=32, fs=4, nfs=64, fsf=52, + bm=256, + bn=256, + bk=64, + gm=24, + kpf=32, + fs=4, + nfs=64, + fsf=52, ), (147456, 28672, 4096): dict( - bm=256, bn=256, bk=64, gm=24, kpf=16, fs=59, nfs=36, fsf=52, + bm=256, + bn=256, + bk=64, + gm=24, + kpf=16, + fs=59, + nfs=36, + fsf=52, ), (229376, 28672, 4096): dict( - bm=256, bn=256, bk=64, gm=24, kpf=16, fs=4, nfs=56, fsf=52, + bm=256, + bn=256, + bk=64, + gm=24, + kpf=16, + fs=4, + nfs=56, + fsf=52, ), (327680, 28672, 4096): dict( - bm=256, bn=256, bk=64, gm=24, kpf=16, fs=4, nfs=32, fsf=52, + bm=256, + bn=256, + bk=64, + gm=24, + kpf=16, + fs=4, + nfs=32, + fsf=52, ), (8192, 8192, 262144): dict( - bm=128, bn=256, bk=64, gm=8, kpf=32, fs=4, nfs=8, fsf=52, + bm=128, + bn=256, + bk=64, + gm=8, + kpf=32, + fs=4, + nfs=8, + fsf=52, ), (16384, 16384, 131072): dict( - bm=128, bn=256, bk=64, gm=16, kpf=16, fs=16, nfs=8, fsf=52, + bm=128, + bn=256, + bk=64, + gm=16, + kpf=16, + fs=16, + nfs=8, + fsf=52, ), } @@ -82,8 +131,10 @@ def _auto_config(M: int, N: int, K: int, world_size: int = 8): while num_k_blocks % kpf != 0 and kpf > 1: kpf //= 2 config = FusedConfig( - block_size_m=c["bm"], block_size_n=c["bn"], - block_size_k=c["bk"], group_size_m=c["gm"], + block_size_m=c["bm"], + block_size_n=c["bn"], + block_size_k=c["bk"], + group_size_m=c["gm"], ) return config, kpf, c["fs"], c["nfs"], c["fsf"] @@ -128,7 +179,9 @@ def _auto_config(M: int, N: int, K: int, world_size: int = 8): gm = 24 if num_m_tiles >= 64 else (8 if num_m_tiles >= 16 else 1) config = FusedConfig( - block_size_m=bm, block_size_n=256, block_size_k=64, + block_size_m=bm, + block_size_n=256, + block_size_k=64, group_size_m=gm, ) return config, kpf, fs, nfs, 64 @@ -495,9 +548,7 @@ def all_gather_matmul_hbm_buffer( if config is None: # Shape-adaptive auto-config from K-021 sweep data - auto_cfg, auto_kpf, auto_fs, auto_nfs, auto_fsf = _auto_config( - M, N, K, world_size - ) + auto_cfg, auto_kpf, auto_fs, auto_nfs, auto_fsf = _auto_config(M, N, K, world_size) config = auto_cfg if k_per_flag is None: k_per_flag = auto_kpf From ef0a173a401262438549805543b0652579c927eb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:58:59 +0000 Subject: [PATCH 53/60] Port auto-config system from ryanswann-amd/iris feature/auto-config-xops-perf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-picked: - benchmark/ops/all_gather_matmul/auto_config.py — auto-selection of kernel configs - benchmark/ops/all_gather_matmul/configs/ — all JSON configs for mi300x and mi355x - benchmark/ops/bench_all_gather_matmul.py — updated to use auto-config iris/ops/__init__.py and tests/ops/ left untouched. Agent-Logs-Url: https://github.com/ROCm/iris/sessions/e61a7d71-7247-4dfb-907e-37befa0bbf63 Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- benchmark/ops/all_gather_matmul/__init__.py | 0 .../ops/all_gather_matmul/auto_config.py | 582 ++++++++++++++++++ .../configs/default_config.json | 27 + .../configs/mi300x/NN/ws2.json | 159 +++++ .../configs/mi300x/NN/ws4.json | 201 ++++++ .../configs/mi300x/NN/ws8.json | 290 +++++++++ .../configs/mi300x/NT/ws2.json | 10 + .../configs/mi300x/NT/ws4.json | 10 + .../configs/mi300x/NT/ws8.json | 31 + .../configs/mi300x/TN/ws2.json | 10 + .../configs/mi300x/TN/ws4.json | 10 + .../configs/mi300x/TN/ws8.json | 31 + .../configs/mi300x/TT/ws2.json | 10 + .../configs/mi300x/TT/ws4.json | 10 + .../configs/mi300x/TT/ws8.json | 31 + .../configs/mi355x/NN/ws2.json | 17 + .../configs/mi355x/NN/ws4.json | 17 + .../configs/mi355x/NN/ws8.json | 246 ++++++++ .../configs/regression_sizes.json | 101 +++ benchmark/ops/bench_all_gather_matmul.py | 88 +-- 20 files changed, 1840 insertions(+), 41 deletions(-) create mode 100644 benchmark/ops/all_gather_matmul/__init__.py create mode 100644 benchmark/ops/all_gather_matmul/auto_config.py create mode 100644 benchmark/ops/all_gather_matmul/configs/default_config.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws2.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws4.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws8.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws2.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws4.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws8.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws2.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws4.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws8.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws2.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws4.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws8.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws2.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws4.json create mode 100644 benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws8.json create mode 100644 benchmark/ops/all_gather_matmul/configs/regression_sizes.json diff --git a/benchmark/ops/all_gather_matmul/__init__.py b/benchmark/ops/all_gather_matmul/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmark/ops/all_gather_matmul/auto_config.py b/benchmark/ops/all_gather_matmul/auto_config.py new file mode 100644 index 000000000..0e8990886 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/auto_config.py @@ -0,0 +1,582 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +""" +Auto-selection mechanism for fused AG+MM kernel configurations. + +Given problem dimensions (M, N, K), transpose mode, world_size, and GPU +architecture, this module selects the best known configuration or returns +a sensible default. For world sizes where iris AG+MM is known to lose +against PyTorch (ws<8), the default disables iris and signals fallback. + +Config files live under: + configs/{arch}/{transpose}/ws{N}.json + +Each config file contains: + - Per-shape champion configs with all kernel parameters in a flat "params" dict + - A "default_params" dict with architecture-appropriate defaults + - Params include FusedConfig fields (block_size_m, etc.) and HBM buffer + kernel params (k_per_flag, num_fetch_sms, num_warps, num_stages, etc.) + +Transpose coverage: + The iris AG+MM kernel (`_fused_all_gather_matmul_kernel`) uses stride-based + addressing (`stride_am, stride_ak, stride_bk, stride_bn`), so transpose + layouts are handled implicitly by tensor strides. Config files exist for + all four layouts (NN, TN, NT, TT) under each architecture directory. + Only NN has per-shape champion configs from benchmarking (3,489 trials). + TN/NT/TT files contain heuristic defaults only (empty shapes dict) and are + marked enabled at ws>=8 to allow heuristic fallback. All transposes at ws<8 + are disabled (NO-GO based on NN benchmarks). + +Usage: + >>> from auto_config import select_ag_mm_config + >>> result = select_ag_mm_config(M=131072, N=16384, K=16384, world_size=8) + >>> if result.enabled: + ... config = result.to_fused_config() + ... hbm_params = result.hbm_buffer_params # k_per_flag, num_fetch_sms, etc. + ... shmem.ops.all_gather_matmul(output, A, B, config=config) + ... else: + ... # Fallback to PyTorch all_gather + matmul + ... ... + + >>> # List all regression test sizes + >>> from auto_config import load_regression_sizes + >>> sizes = load_regression_sizes() +""" + +import json +import os +import subprocess +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +from iris.ops.config import FusedConfig + +# Config files live alongside this module +_CONFIGS_DIR = Path(__file__).parent / "configs" + +# FusedConfig field names — everything else in "params" is an HBM buffer param +_FUSED_CONFIG_FIELDS = {f.name for f in FusedConfig.__dataclass_fields__.values()} + +# HBM buffer param names (kernel launch params, not FusedConfig fields) +_HBM_BUFFER_FIELDS = { + "k_per_flag", + "num_fetch_sms", + "num_fetch_stages", + "first_stage_fetch_sms", + "fetch_block_m", + "fetch_block_k", + "num_warps", + "num_stages", +} + +# In-memory cache: (arch, transpose, world_size) -> loaded JSON data +_config_cache: Dict[Tuple[str, str, int], dict] = {} + +# Cached GPU architecture detection result +_detected_arch: Optional[str] = None + +# Supported transpose modes. The AG+MM kernel only supports NN layout. +# TN/NT/TT would require kernel-level changes to permute strides. +SUPPORTED_TRANSPOSES = ("NN",) + +# Supported GPU architectures with tuned configs +SUPPORTED_ARCHITECTURES = ("mi300x", "mi355x") + +# Map gfx target IDs to architecture names used in config paths +_GFX_TO_ARCH = { + "gfx942": "mi300x", # MI300X, MI300A + "gfx950": "mi355x", # MI355X +} + + +def detect_gpu_arch() -> str: + """Auto-detect GPU architecture from the current system. + + Detection order: + 1. IRIS_GPU_ARCH environment variable (override) + 2. rocm-smi --showproductname parsing + 3. rocminfo gfx target parsing + 4. Falls back to "mi300x" (most common deployment target) + + Returns: + Architecture string (e.g., "mi300x") suitable for config lookup. + """ + global _detected_arch + if _detected_arch is not None: + return _detected_arch + + # 1. Environment variable override + env_arch = os.environ.get("IRIS_GPU_ARCH", "").strip().lower() + if env_arch: + _detected_arch = env_arch + return _detected_arch + + # 2. Try rocminfo for gfx target + try: + result = subprocess.run( + ["rocminfo"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + for line in result.stdout.splitlines(): + line_stripped = line.strip().lower() + if "name:" in line_stripped and "gfx" in line_stripped: + for gfx_id, arch_name in _GFX_TO_ARCH.items(): + if gfx_id in line_stripped: + _detected_arch = arch_name + return _detected_arch + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + pass + + # 3. Fallback to MI300X (most common deployment target) + _detected_arch = "mi300x" + return _detected_arch + + +@dataclass +class AutoConfigResult: + """Result of auto-config lookup. + + Attributes: + enabled: If False, iris AG+MM should NOT be used; fallback to PyTorch. + config_params: Dict of FusedConfig parameters (only valid if enabled=True). + hbm_buffer_params: Dict of HBM buffer-specific kernel params + (k_per_flag, num_fetch_sms, num_fetch_stages, first_stage_fetch_sms). + source: Human-readable description of where this config came from. + shape_key: The MxNxK key that matched (None if heuristic/default). + expected_iris_ms: Expected kernel time in ms on target GPU (None if unknown). + """ + + enabled: bool = False + config_params: Dict = field(default_factory=dict) + hbm_buffer_params: Dict = field(default_factory=dict) + source: str = "default" + shape_key: Optional[str] = None + expected_iris_ms: Optional[float] = None + + def to_fused_config(self) -> FusedConfig: + """Convert to FusedConfig for use with iris.ops functions. + + Raises: + RuntimeError: If this config is disabled (enabled=False). + """ + if not self.enabled: + raise RuntimeError( + f"Cannot create FusedConfig: iris AG+MM is disabled for this " + f"configuration. Reason: {self.source}. " + f"Use PyTorch all_gather + matmul instead." + ) + # Filter to only fields FusedConfig accepts + valid_fields = {f.name for f in FusedConfig.__dataclass_fields__.values()} + filtered = {k: v for k, v in self.config_params.items() if k in valid_fields} + return FusedConfig(**filtered) + + +def _split_params(params: Dict) -> Tuple[Dict, Dict]: + """Split a flat params dict into (config_params, hbm_buffer_params). + + FusedConfig fields go into config_params. + Everything else (num_warps, num_stages, k_per_flag, etc.) goes into hbm_buffer_params. + """ + config_params = {} + hbm_params = {} + for k, v in params.items(): + if k in _FUSED_CONFIG_FIELDS: + config_params[k] = v + else: + hbm_params[k] = v + return config_params, hbm_params + + +def _extract_shape_params(shape_data: Dict) -> Tuple[Dict, Dict]: + """Extract config_params and hbm_buffer_params from shape data. + + Supports both the new flat "params" format and the legacy split + "config" + "hbm_buffer_params" format for backward compatibility. + """ + if "params" in shape_data: + return _split_params(shape_data["params"]) + return shape_data.get("config", {}), shape_data.get("hbm_buffer_params", {}) + + +def _extract_default_params(data: Dict) -> Optional[Tuple[Dict, Dict]]: + """Extract default config_params and hbm_buffer_params from file-level defaults. + + Supports both "default_params" (flat) and legacy "default_config" + "default_hbm_buffer_params". + Returns None if no defaults are available. + """ + if "default_params" in data and data["default_params"] is not None: + return _split_params(data["default_params"]) + default_config = data.get("default_config") + if default_config: + return default_config, data.get("default_hbm_buffer_params", {}) + return None + + +def _load_config_file(arch: str, transpose: str, world_size: int) -> Optional[dict]: + """Load and cache a config JSON file. + + Args: + arch: GPU architecture identifier (e.g., "mi300x"). + transpose: Transpose mode (e.g., "NN", "NT", "TN", "TT"). + world_size: Number of ranks. + + Returns: + Parsed JSON dict, or None if file doesn't exist. + """ + cache_key = (arch, transpose, world_size) + if cache_key in _config_cache: + return _config_cache[cache_key] + + config_path = _CONFIGS_DIR / arch / transpose / f"ws{world_size}.json" + if not config_path.exists(): + _config_cache[cache_key] = None + return None + + with open(config_path, "r") as f: + data = json.load(f) + + _config_cache[cache_key] = data + return data + + +def _load_default_config() -> dict: + """Load the global default config.""" + default_path = _CONFIGS_DIR / "default_config.json" + if default_path.exists(): + with open(default_path, "r") as f: + return json.load(f) + return {} + + +def _find_nearest_shape(M: int, N: int, K: int, shapes: dict, tolerance: float = 0.15) -> Optional[str]: + """Find the nearest matching shape in the config database. + + Uses log-space geometric distance to find shapes that are structurally + similar (within `tolerance` ratio per dimension). This avoids falling + back to heuristic when the user's problem is close to a champion shape. + + Args: + M, N, K: Target dimensions. + shapes: Dict of shape_key -> shape_data from the config file. + tolerance: Max fractional distance per dimension (default 15%). + + Returns: + The shape_key of the nearest match, or None if no shape is close enough. + """ + import math + + best_key = None + best_dist = float("inf") + + for shape_key, shape_data in shapes.items(): + sm, sn, sk = shape_data["M"], shape_data["N"], shape_data["K"] + + # Check per-dimension ratio tolerance + if sm == 0 or sn == 0 or sk == 0: + continue + rm = abs(M - sm) / sm + rn = abs(N - sn) / sn + rk = abs(K - sk) / sk + + if rm > tolerance or rn > tolerance or rk > tolerance: + continue + + # Geometric distance in log space + dist = math.sqrt( + math.log(max(M, 1) / max(sm, 1)) ** 2 + + math.log(max(N, 1) / max(sn, 1)) ** 2 + + math.log(max(K, 1) / max(sk, 1)) ** 2 + ) + if dist < best_dist: + best_dist = dist + best_key = shape_key + + return best_key + + +def _apply_heuristic(M: int, N: int, K: int, arch: str = "mi300x") -> Tuple[Dict, Dict]: + """Apply heuristic rules to generate config + HBM buffer params. + + Based on optimization data: + - MI300X: 3,489 measured trials + - MI355X: Optuna TPE + broad sweep + + Args: + M: Rows dimension. + N: Columns dimension. + K: Reduction dimension. + arch: GPU architecture for arch-specific heuristics. + + Returns: + Tuple of (config_params dict, hbm_buffer_params dict). + """ + bk = 64 + num_k_blocks = K // bk + + if arch == "mi355x": + bm = 256 + num_m_tiles = M // bm + gm = 4 if M <= 32768 else 8 + config_params = { + "block_size_m": bm, + "block_size_n": 256, + "block_size_k": bk, + "group_size_m": gm, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": True, + } + kpf = 8 if num_k_blocks <= 512 else 16 + while num_k_blocks % kpf != 0 and kpf > 1: + kpf //= 2 + hbm_params = { + "k_per_flag": kpf, + "num_fetch_sms": 16, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52, + } + return config_params, hbm_params + + # MI300X heuristics + if M <= 16384: + bm = 128 + else: + bm = 256 + + num_m_tiles = M // bm + + if M <= 8192: + gm = 8 + elif M <= 16384: + gm = 16 + else: + gm = 24 + + config_params = { + "block_size_m": bm, + "block_size_n": 256, + "block_size_k": bk, + "group_size_m": gm, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": True, + } + + if num_k_blocks >= 512: + kpf = 64 + elif num_k_blocks >= 128: + kpf = 16 + elif num_k_blocks >= 64: + kpf = 8 + else: + kpf = 4 + while num_k_blocks % kpf != 0 and kpf > 1: + kpf //= 2 + + if num_m_tiles <= 8: + fs = 4 + elif num_m_tiles <= 32: + fs = 16 + elif num_m_tiles <= 128: + fs = 32 + else: + fs = 52 + + if num_m_tiles >= 512: + nfs = 4 + elif num_m_tiles >= 64: + nfs = 2 + else: + nfs = 1 + + hbm_params = { + "k_per_flag": kpf, + "num_fetch_sms": fs, + "num_fetch_stages": nfs, + "first_stage_fetch_sms": 64, + } + + return config_params, hbm_params + + +def select_ag_mm_config( + M: int, + N: int, + K: int, + world_size: int, + transpose: str = "NN", + arch: str = "auto", +) -> AutoConfigResult: + """Select the best AG+MM config for the given problem. + + Lookup order: + 1. Exact shape match in benchmark/ops/all_gather_matmul/configs/{arch}/{transpose}/ws{world_size}.json + 2. Heuristic-based config from the same file's defaults + 3. Global default from benchmark/ops/all_gather_matmul/configs/default_config.json + + For world sizes where iris is known to lose (ws<8 on MI300X), returns + a disabled result signaling fallback to PyTorch. + + Args: + M: Number of rows (or M_local * world_size for AG+MM). + N: Number of columns. + K: Reduction dimension. + world_size: Number of ranks in the communicator. + transpose: Transpose mode ("NN", "NT", "TN", "TT"). Default "NN". + arch: GPU architecture ("mi300x", etc.) or "auto" to auto-detect. + Default "auto". Set IRIS_GPU_ARCH env var to override. + + Returns: + AutoConfigResult with .enabled indicating whether to use iris, + .to_fused_config() to get the FusedConfig if enabled, and + .hbm_buffer_params with kernel-specific parameters. + + Example: + >>> result = select_ag_mm_config(131072, 16384, 16384, world_size=8) + >>> result.enabled + True + >>> config = result.to_fused_config() + >>> result.hbm_buffer_params + {'k_per_flag': 32, 'num_fetch_sms': 4, 'num_fetch_stages': 64, 'first_stage_fetch_sms': 52} + + >>> result = select_ag_mm_config(4096, 4096, 4096, world_size=2) + >>> result.enabled + False + """ + transpose = transpose.upper() + if arch == "auto": + arch = detect_gpu_arch() + else: + arch = arch.lower() + + # Step 1: Try to load the specific config file + data = _load_config_file(arch, transpose, world_size) + + if data is not None: + # Check if this world_size is enabled + if not data.get("enabled", True): + return AutoConfigResult( + enabled=False, + source=f"Disabled by config: {arch}/{transpose}/ws{world_size}.json — {data.get('reason', 'no reason given')}", + ) + + # Look for exact shape match + shape_key = f"{M}x{N}x{K}" + shapes = data.get("shapes", {}) + if shape_key in shapes: + shape_data = shapes[shape_key] + cfg, hbm = _extract_shape_params(shape_data) + return AutoConfigResult( + enabled=True, + config_params=cfg, + hbm_buffer_params=hbm, + source=f"Exact match: {arch}/{transpose}/ws{world_size}.json [{shape_data.get('label', shape_key)}]", + shape_key=shape_key, + expected_iris_ms=shape_data.get("expected_iris_ms"), + ) + + # No exact match — try nearest champion shape (within 15% per dim) + nearest_key = _find_nearest_shape(M, N, K, shapes) + if nearest_key is not None: + nearest_data = shapes[nearest_key] + cfg, hbm = _extract_shape_params(nearest_data) + return AutoConfigResult( + enabled=True, + config_params=cfg, + hbm_buffer_params=hbm, + source=f"Nearest match: {arch}/{transpose}/ws{world_size}.json [{nearest_data.get('label', nearest_key)}] (target {M}x{N}x{K} ≈ {nearest_key})", + shape_key=nearest_key, + expected_iris_ms=nearest_data.get("expected_iris_ms"), + ) + + # No nearby match — use heuristic + file defaults + defaults = _extract_default_params(data) + if defaults is not None: + file_default_config, file_default_hbm = defaults + heuristic_config, heuristic_hbm = _apply_heuristic(M, N, K, arch=arch) + merged_config = {**file_default_config, **heuristic_config} + merged_hbm = {**file_default_hbm, **heuristic_hbm} + return AutoConfigResult( + enabled=True, + config_params=merged_config, + hbm_buffer_params=merged_hbm, + source=f"Heuristic (no exact shape match in {arch}/{transpose}/ws{world_size}.json)", + ) + + # Step 2: No config file found — check global default + default_data = _load_default_config() + ws_gate = default_data.get("world_size_gate", {}) + min_ws = ws_gate.get("min_world_size", 8) + + if world_size < min_ws: + return AutoConfigResult( + enabled=False, + source=f"world_size={world_size} < min_world_size={min_ws} (global default). {ws_gate.get('reason', '')}", + ) + + # World size OK but no specific config — apply heuristic + heuristic_config, heuristic_hbm = _apply_heuristic(M, N, K, arch=arch) + return AutoConfigResult( + enabled=True, + config_params=heuristic_config, + hbm_buffer_params=heuristic_hbm, + source=f"Heuristic fallback (no config file for {arch}/{transpose}/ws{world_size})", + ) + + +def list_known_shapes( + world_size: int, + transpose: str = "NN", + arch: str = "mi300x", +) -> list: + """List all known shape configurations for a given world_size/transpose/arch. + + Returns: + List of dicts with keys: shape_key, label, M, N, K, expected_iris_ms. + """ + data = _load_config_file(arch, transpose.upper(), world_size) + if data is None or not data.get("enabled", True): + return [] + + result = [] + for shape_key, shape_data in data.get("shapes", {}).items(): + result.append( + { + "shape_key": shape_key, + "label": shape_data.get("label", ""), + "M": shape_data["M"], + "N": shape_data["N"], + "K": shape_data["K"], + "expected_iris_ms": shape_data.get("expected_iris_ms"), + } + ) + + result.sort(key=lambda x: x.get("expected_iris_ms") or float("inf")) + return result + + +def load_regression_sizes() -> List[Dict]: + """Load regression test sizes from the JSON config file. + + Returns: + List of regression size dicts, each with: name, M, N, K, tier, + description, world_sizes, expected, regression_threshold_pct. + """ + reg_path = _CONFIGS_DIR / "regression_sizes.json" + if not reg_path.exists(): + return [] + with open(reg_path, "r") as f: + data = json.load(f) + return data.get("sizes", []) + + +def clear_config_cache(): + """Clear the in-memory config cache. Useful after modifying config files.""" + _config_cache.clear() diff --git a/benchmark/ops/all_gather_matmul/configs/default_config.json b/benchmark/ops/all_gather_matmul/configs/default_config.json new file mode 100644 index 000000000..ff96ac1f0 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/default_config.json @@ -0,0 +1,27 @@ +{ + "_meta": { + "description": "Global default fallback config for AG+MM operations. Disables iris AG+MM for ws<8 (fallback to PyTorch).", + "source": "benchmarking on MI300X (gfx942), 3489 measured trials", + "date": "2026-04-13" + }, + "world_size_gate": { + "min_world_size": 8, + "reason": "ws=2 best 0.89x, ws=4 best 0.86x vs PyTorch. Only ws>=8 is production-ready." + }, + "config": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": true + }, + "hbm_buffer_params": { + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws2.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws2.json new file mode 100644 index 000000000..f5f0fd87e --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws2.json @@ -0,0 +1,159 @@ +{ + "_meta": { + "description": "AG+MM ws=2 on MI300X \u2014 DISABLED (loses vs PyTorch on all shapes)", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=2 AG transfers from 1 peer only. GEMM dominates latency. Fetch SM overhead exceeds overlap benefit. LDS overflow forces ns=1, imposing 15-35% penalty.", + "shapes": { + "8192x8192x262144": { + "label": "g5", + "M": 8192, + "N": 8192, + "K": 262144, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 4, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 186.062 + }, + "16384x16384x131072": { + "label": "g1", + "M": 16384, + "N": 16384, + "K": 131072, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 16, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 8, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 153.042 + }, + "4096x14336x4096": { + "label": "mixtral_gate", + "M": 4096, + "N": 14336, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 2.334 + }, + "4096x11008x4096": { + "label": "llama7b_gate", + "M": 4096, + "N": 11008, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 1.784 + }, + "4096x4096x4096": { + "label": "pow2_4k", + "M": 4096, + "N": 4096, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 1.109 + }, + "5120x13824x5120": { + "label": "llama13b_gate", + "M": 5120, + "N": 13824, + "K": 5120, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 4.144 + }, + "4096x4096x11008": { + "label": "llama7b_down", + "M": 4096, + "N": 4096, + "K": 11008, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 2.477 + } + }, + "default_params": null +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws4.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws4.json new file mode 100644 index 000000000..30b7a6bef --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws4.json @@ -0,0 +1,201 @@ +{ + "_meta": { + "description": "AG+MM ws=4 on MI300X \u2014 DISABLED (loses vs PyTorch on all shapes)", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=4 loses on all tested shapes. K=4096 shapes crash at ns=2 due to LDS overflow (65540>65536). ns=1 workaround constrains pipelining depth below break-even.", + "shapes": { + "262144x8192x8192": { + "label": "g6", + "M": 262144, + "N": 8192, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 64, + "num_fetch_sms": 52, + "num_fetch_stages": 4, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 161.027 + }, + "8192x8192x262144": { + "label": "g5", + "M": 8192, + "N": 8192, + "K": 262144, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 4, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 167.944 + }, + "131072x16384x16384": { + "label": "g2", + "M": 131072, + "N": 16384, + "K": 16384, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 4, + "num_fetch_stages": 64, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 209.556 + }, + "16384x16384x131072": { + "label": "g1", + "M": 16384, + "N": 16384, + "K": 131072, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 16, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 16, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 239.757 + }, + "4096x14336x4096": { + "label": "mixtral_gate", + "M": 4096, + "N": 14336, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 16 + }, + "expected_iris_ms": 2.192 + }, + "4096x11008x4096": { + "label": "llama7b_gate", + "M": 4096, + "N": 11008, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 16 + }, + "expected_iris_ms": 2.163 + }, + "4096x4096x4096": { + "label": "pow2_4k", + "M": 4096, + "N": 4096, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 16 + }, + "expected_iris_ms": 1.494 + }, + "5120x13824x5120": { + "label": "llama13b_gate", + "M": 5120, + "N": 13824, + "K": 5120, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 16 + }, + "expected_iris_ms": 3.257 + }, + "4096x4096x11008": { + "label": "llama7b_down", + "M": 4096, + "N": 4096, + "K": 11008, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 16 + }, + "expected_iris_ms": 2.578 + } + }, + "default_params": null +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws8.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws8.json new file mode 100644 index 000000000..2c518f1df --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NN/ws8.json @@ -0,0 +1,290 @@ +{ + "_meta": { + "description": "Champion configs for HBM buffer AG+MM ws=8 on MI300X (gfx942)", + "source": "sweep (3489 trials), optimize-loop iter3", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13", + "convention": "Shapes are (M, N, K) for col-parallel (M-sharded) AG+MM" + }, + "enabled": true, + "shapes": { + "262144x8192x8192": { + "label": "g6", + "description": "Llama-70B MLP hidden x hidden \u2014 M-dominant", + "M": 262144, + "N": 8192, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 64, + "num_fetch_sms": 52, + "num_fetch_stages": 4, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 139.069 + }, + "131072x16384x16384": { + "label": "g2", + "description": "Llama MLP variant \u2014 balanced large", + "M": 131072, + "N": 16384, + "K": 16384, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 4, + "num_fetch_stages": 64, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 167.345 + }, + "147456x28672x4096": { + "label": "g14", + "description": "Llama-70B up-projection medium batch", + "M": 147456, + "N": 28672, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 59, + "num_fetch_stages": 36, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 74.244 + }, + "229376x28672x4096": { + "label": "g16", + "description": "Llama-70B up-projection mid batch", + "M": 229376, + "N": 28672, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 56, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 114.265 + }, + "327680x28672x4096": { + "label": "g15", + "description": "Llama-70B up-projection large batch", + "M": 327680, + "N": 28672, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 32, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 162.136 + }, + "8192x8192x262144": { + "label": "g5", + "description": "K-dominant square", + "M": 8192, + "N": 8192, + "K": 262144, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 4, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 217.725 + }, + "16384x16384x131072": { + "label": "g1", + "description": "K-dominant large", + "M": 16384, + "N": 16384, + "K": 131072, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 16, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 16, + "num_fetch_stages": 8, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 223.748 + }, + "196608x18432x16384": { + "label": "g9", + "description": "Large balanced shape", + "M": 196608, + "N": 18432, + "K": 16384, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 32, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 32 + }, + "expected_iris_ms": 266.608 + }, + "262144x28672x8192": { + "label": "g8", + "description": "Large wide shape", + "M": 262144, + "N": 28672, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 128, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 32 + }, + "expected_iris_ms": 278.546 + }, + "4096x14336x4096": { + "label": "mixtral_gate", + "description": "Mixtral gate projection", + "M": 4096, + "N": 14336, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 16, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "expected_iris_ms": 1.933 + }, + "4096x11008x4096": { + "label": "llama7b_gate", + "description": "Llama-7B gate projection", + "M": 4096, + "N": 11008, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 128, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 16, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "expected_iris_ms": 1.946 + }, + "4096x4096x4096": { + "label": "pow2_4k", + "description": "Small power-of-2 square shape", + "M": 4096, + "N": 4096, + "K": 4096, + "params": { + "block_size_m": 128, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 8, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "expected_iris_ms": 1.512 + } + }, + "default_params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws2.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws2.json new file mode 100644 index 000000000..897be3f2c --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws2.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=2 NT transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=2 loses vs PyTorch on all tested shapes. LDS overflow forces ns=1, imposing 15-35% perf penalty." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws4.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws4.json new file mode 100644 index 000000000..cc1f9d297 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws4.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=4 NT transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=4 loses vs PyTorch on all tested shapes. Best measured: 0.856x. LDS overflow at K=4096." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws8.json b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws8.json new file mode 100644 index 000000000..873cb76e1 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/NT/ws8.json @@ -0,0 +1,31 @@ +{ + "_meta": { + "description": "AG+MM ws=8 NT transpose on MI300X — heuristic defaults (no per-shape benchmarks yet)", + "source": "heuristic extrapolation from NN transpose champion data", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13", + "data_tag": "heuristic", + "convention": "Shapes are (M, N, K) for col-parallel (M-sharded) AG+MM, B transposed (K×N → N×K)" + }, + "enabled": true, + "shapes": {}, + "default_config": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": true + }, + "default_hbm_buffer_params": { + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "heuristic_rules": { + "note": "Uses same heuristic as NN transpose. Shape-specific tuning pending." + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws2.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws2.json new file mode 100644 index 000000000..2fe67e154 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws2.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=2 TN transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=2 loses vs PyTorch on all tested shapes. LDS overflow forces ns=1, imposing 15-35% perf penalty." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws4.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws4.json new file mode 100644 index 000000000..c8977d5f0 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws4.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=4 TN transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=4 loses vs PyTorch on all tested shapes. Best measured: 0.856x. LDS overflow at K=4096." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws8.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws8.json new file mode 100644 index 000000000..df9a5b3f9 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TN/ws8.json @@ -0,0 +1,31 @@ +{ + "_meta": { + "description": "AG+MM ws=8 TN transpose on MI300X — heuristic defaults (no per-shape benchmarks yet)", + "source": "heuristic extrapolation from NN transpose champion data", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13", + "data_tag": "heuristic", + "convention": "Shapes are (M, N, K) for col-parallel (M-sharded) AG+MM, A transposed (M×K → K×M)" + }, + "enabled": true, + "shapes": {}, + "default_config": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": true + }, + "default_hbm_buffer_params": { + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "heuristic_rules": { + "note": "Uses same heuristic as NN transpose. Shape-specific tuning pending." + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws2.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws2.json new file mode 100644 index 000000000..cc2c2497c --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws2.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=2 TT transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=2 loses vs PyTorch on all tested shapes. LDS overflow forces ns=1, imposing 15-35% perf penalty." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws4.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws4.json new file mode 100644 index 000000000..55ee5f423 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws4.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "description": "AG+MM ws=4 TT transpose on MI300X — DISABLED", + "source": "ws<8 is NO-GO across all transposes", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13" + }, + "enabled": false, + "reason": "ws=4 loses vs PyTorch on all tested shapes. Best measured: 0.856x. LDS overflow at K=4096." +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws8.json b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws8.json new file mode 100644 index 000000000..a184b41a4 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi300x/TT/ws8.json @@ -0,0 +1,31 @@ +{ + "_meta": { + "description": "AG+MM ws=8 TT transpose on MI300X — heuristic defaults (no per-shape benchmarks yet)", + "source": "heuristic extrapolation from NN transpose champion data", + "gpu": "AMD Instinct MI300X (gfx942)", + "date": "2026-04-13", + "data_tag": "heuristic", + "convention": "Shapes are (M, N, K) for col-parallel (M-sharded) AG+MM, both A and B transposed" + }, + "enabled": true, + "shapes": {}, + "default_config": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 24, + "num_warps": 8, + "num_stages": 2, + "num_xcds": 8, + "allow_tf32": true + }, + "default_hbm_buffer_params": { + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 64 + }, + "heuristic_rules": { + "note": "Uses same heuristic as NN transpose. Shape-specific tuning pending." + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws2.json b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws2.json new file mode 100644 index 000000000..9c07592f1 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws2.json @@ -0,0 +1,17 @@ +{ + "_meta": { + "description": "AG+MM ws=2 on MI355X (gfx950) — defaults only, needs tuning", + "gpu": "AMD Instinct MI355X (gfx950)", + "date": "2026-04-15", + "validated": "unvalidated — no shape-specific tuning yet" + }, + "enabled": true, + "shapes": {}, + "default_params": { + "block_size_m": 256, "block_size_n": 256, "block_size_k": 64, + "group_size_m": 4, "num_xcds": 8, "allow_tf32": true, + "num_warps": 8, "num_stages": 2, + "k_per_flag": 16, "num_fetch_sms": 4, + "num_fetch_stages": 1, "first_stage_fetch_sms": 52 + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws4.json b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws4.json new file mode 100644 index 000000000..3d64610a2 --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws4.json @@ -0,0 +1,17 @@ +{ + "_meta": { + "description": "AG+MM ws=4 on MI355X (gfx950) — defaults only, needs tuning", + "gpu": "AMD Instinct MI355X (gfx950)", + "date": "2026-04-15", + "validated": "unvalidated — no shape-specific tuning yet" + }, + "enabled": true, + "shapes": {}, + "default_params": { + "block_size_m": 256, "block_size_n": 256, "block_size_k": 64, + "group_size_m": 4, "num_xcds": 8, "allow_tf32": true, + "num_warps": 8, "num_stages": 2, + "k_per_flag": 16, "num_fetch_sms": 4, + "num_fetch_stages": 1, "first_stage_fetch_sms": 52 + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws8.json b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws8.json new file mode 100644 index 000000000..17fa7051a --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/mi355x/NN/ws8.json @@ -0,0 +1,246 @@ +{ + "_meta": { + "description": "Champion configs for HBM buffer AG+MM ws=8 on MI355X (gfx950)", + "source": "Optuna TPE + broad sweep", + "gpu": "AMD Instinct MI355X (gfx950)", + "date": "2026-04-15", + "convention": "Shapes are (M, N, K) for col-parallel (M-sharded) AG+MM" + }, + "enabled": true, + "shapes": { + "262144x8192x8192": { + "label": "g6", + "description": "Llama-70B MLP hidden x hidden \u2014 M-dominant", + "M": 262144, + "N": 8192, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 2, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 16, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 37.558 + }, + "131072x4096x4096": { + "label": "a3", + "description": "Output proj, 128K seq, 4K hidden", + "M": 131072, + "N": 4096, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 16, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 6.304 + }, + "65536x8192x28672": { + "label": "f4", + "description": "Llama 70B down, 64K seq", + "M": 65536, + "N": 8192, + "K": 28672, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 35.959 + }, + "32768x8192x8192": { + "label": "l1", + "description": "Training batch 32K", + "M": 32768, + "N": 8192, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 2, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 5.445 + }, + "65536x8192x4096": { + "label": "a2", + "description": "QKV proj, 64K seq, 8K hidden", + "M": 65536, + "N": 8192, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 2, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 32, + "num_fetch_stages": 2, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 5.775 + }, + "32768x4096x14336": { + "label": "f2", + "description": "Llama 7B down, 32K seq", + "M": 32768, + "N": 4096, + "K": 14336, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 1, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 5.961 + }, + "32768x4096x4096": { + "label": "a1", + "description": "QKV proj, 32K seq, 4K hidden", + "M": 32768, + "N": 4096, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 4, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 16, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 256 + }, + "expected_iris_ms": 1.864 + }, + "65536x28672x8192": { + "label": "f3", + "description": "Llama 70B gate/up, 64K seq", + "M": 65536, + "N": 28672, + "K": 8192, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 32.303 + }, + "16384x28672x4096": { + "label": "m2", + "description": "Large FFN up (Llama 70B-like)", + "M": 16384, + "N": 28672, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 2, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 8, + "num_fetch_sms": 2, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 256 + }, + "expected_iris_ms": 4.658 + }, + "32768x14336x4096": { + "label": "f1", + "description": "Llama 7B gate/up, 32K seq", + "M": 32768, + "N": 14336, + "K": 4096, + "params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 8, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + }, + "expected_iris_ms": 4.86 + } + }, + "default_params": { + "block_size_m": 256, + "block_size_n": 256, + "block_size_k": 64, + "group_size_m": 4, + "num_xcds": 8, + "allow_tf32": true, + "num_warps": 8, + "num_stages": 2, + "k_per_flag": 16, + "num_fetch_sms": 4, + "num_fetch_stages": 1, + "first_stage_fetch_sms": 52 + } +} diff --git a/benchmark/ops/all_gather_matmul/configs/regression_sizes.json b/benchmark/ops/all_gather_matmul/configs/regression_sizes.json new file mode 100644 index 000000000..40a497b0a --- /dev/null +++ b/benchmark/ops/all_gather_matmul/configs/regression_sizes.json @@ -0,0 +1,101 @@ +{ + "_meta": { + "description": "Regression test sizes for HBM buffer AG+MM kernel across ws=2/4/8", + "source": "28 measured shapes (12 ws=8, 9 ws=4, 7 ws=2) from 3489 trials", + "gpu_target": "MI300X (gfx942)", + "date": "2026-04-13", + "usage": "from iris.ops import load_regression_sizes" + }, + "sizes": [ + { + "name": "g2_ws8", + "label": "g2", + "description": "Llama MLP variant — balanced large (highest speedup)", + "M": 131072, "N": 16384, "K": 16384, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.343, "ws8_tflops": 420.5}, + "regression_threshold_pct": 10 + }, + { + "name": "g15_ws8", + "label": "g15", + "description": "Llama-70B up-projection large batch — highest TFLOPS", + "M": 327680, "N": 28672, "K": 4096, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.284, "ws8_tflops": 474.7}, + "regression_threshold_pct": 10 + }, + { + "name": "g14_ws8", + "label": "g14", + "description": "Llama-70B up-projection medium batch", + "M": 147456, "N": 28672, "K": 4096, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.288, "ws8_tflops": 466.5}, + "regression_threshold_pct": 10 + }, + { + "name": "g16_ws8", + "label": "g16", + "description": "Llama-70B up-projection mid batch", + "M": 229376, "N": 28672, "K": 4096, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.277, "ws8_tflops": 471.5}, + "regression_threshold_pct": 10 + }, + { + "name": "g5_ws8", + "label": "g5", + "description": "K-dominant square — M-small, needs bm=128", + "M": 8192, "N": 8192, "K": 262144, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.224, "ws8_tflops": 161.6}, + "regression_threshold_pct": 10 + }, + { + "name": "g6_ws8", + "label": "g6", + "description": "Llama-70B MLP hidden x hidden — M-dominant", + "M": 262144, "N": 8192, "K": 8192, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.200, "ws8_tflops": 253.0}, + "regression_threshold_pct": 10 + }, + { + "name": "g1_ws8", + "label": "g1", + "description": "K-dominant large — parity shape", + "M": 16384, "N": 16384, "K": 131072, + "tier": "champion", + "world_sizes": [8], + "expected": {"ws8_speedup": 1.136, "ws8_tflops": 314.5}, + "regression_threshold_pct": 10 + }, + { + "name": "g5_ws2_disabled", + "label": "g5", + "description": "Best ws=2 shape — still loses vs PyTorch (0.887x). Verifies fallback.", + "M": 8192, "N": 8192, "K": 262144, + "tier": "disabled", + "world_sizes": [2], + "expected": {"ws2_speedup": 0.887, "ws2_disabled": true}, + "regression_threshold_pct": null + }, + { + "name": "g6_ws4_disabled", + "label": "g6", + "description": "Best ws=4 shape — still loses vs PyTorch (0.856x). Verifies fallback.", + "M": 262144, "N": 8192, "K": 8192, + "tier": "disabled", + "world_sizes": [4], + "expected": {"ws4_speedup": 0.856, "ws4_disabled": true}, + "regression_threshold_pct": null + } + ] +} diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 43225d5f5..3ede49de8 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -2,17 +2,27 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. -"""Benchmark for all-gather + GEMM: RCCL baseline vs iris one_shot vs iris prefetch.""" +"""Benchmark for all-gather + GEMM: RCCL baseline vs iris HBM-buffer prefetch. + +The HBM-buffer benchmark automatically loads tuned kernel parameters from +configs/{arch}/{transpose}/ws{N}.json when available. Run with --list-configs +to see which shapes have tuned configs for the current GPU. +""" + +import sys +import os import torch import torch.distributed as dist import iris.bench as bench -from iris.ops import FusedConfig, all_gather_matmul_preamble from iris.ops.all_gather_matmul_hbm_buffer import ( all_gather_matmul_hbm_buffer as _hbm_buffer, all_gather_matmul_hbm_buffer_preamble, ) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "all_gather_matmul")) +from auto_config import select_ag_mm_config + @bench.register @bench.axis("num_ranks", [2, 4, 8]) @@ -21,6 +31,7 @@ @bench.axis("K", [8192]) @bench.axis("dtype", [torch.float16]) def rccl_all_gather_matmul(state, ctx): + """PyTorch/RCCL baseline: all_gather_into_tensor + torch.mm""" M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] world_size = dist.get_world_size() @@ -44,61 +55,56 @@ def rccl_all_gather_matmul(state, ctx): @bench.register @bench.axis("num_ranks", [2, 4, 8]) -@bench.axis("algorithm", ["one_shot", "prefetch_prev", "prefetch"]) @bench.axis("M", [1024, 4096, 16384]) @bench.axis("N", [3584]) @bench.axis("K", [8192]) @bench.axis("dtype", [torch.float16]) -def all_gather_matmul(state, ctx): +def all_gather_matmul_hbm_buffer(state, ctx): + """Iris HBM-buffer AG+MM with auto-tuned config from configs/ JSON files.""" M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] - algorithm = state["algorithm"] world_size = ctx.get_num_ranks() K_local = K // world_size + result = select_ag_mm_config(M, N, K, world_size=world_size) + config = result.to_fused_config() + hbm = result.hbm_buffer_params + A_sharded = ctx.zeros((M, K_local), dtype=dtype) A_sharded.fill_(1.0) B = torch.randn((K, N), device="cuda", dtype=dtype) + C = ctx.zeros((M, N), dtype=dtype) + + workspace = all_gather_matmul_hbm_buffer_preamble( + ctx, + A_sharded, + B, + config, + k_per_flag=hbm.get("k_per_flag", 8), + ) state.set_flops(2 * M * N * K) state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) - if algorithm == "one_shot": - config = FusedConfig() - C = torch.zeros((M, N), device="cuda", dtype=dtype) - workspace = all_gather_matmul_preamble(ctx, A_sharded, B, config) - state.exec( - lambda: ctx.ops.all_gather_matmul(C, A_sharded, B, config=config, workspace=workspace), - ) - elif algorithm == "prefetch_prev": - # Previous defaults: block_m=256, block_n=64, block_k=64, k_per_flag=1 - config = FusedConfig(block_size_m=256, block_size_n=64, block_size_k=64) - C = ctx.zeros((M, N), dtype=dtype) - workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config, k_per_flag=1) - state.exec( - lambda: _hbm_buffer( - ctx, - C, - A_sharded, - B, - config=config, - workspace=workspace, - k_per_flag=1, - num_fetch_sms=None, - num_warps=None, - num_stages=None, - first_stage_fetch_sms=None, - ), - preamble_fn=lambda: C.zero_(), - ) - else: # prefetch — new tuned defaults - config = FusedConfig(block_size_m=128, block_size_n=256, block_size_k=64) - C = ctx.zeros((M, N), dtype=dtype) - workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) - state.exec( - lambda: _hbm_buffer(ctx, C, A_sharded, B, config=config, workspace=workspace), - preamble_fn=lambda: C.zero_(), - ) + state.exec( + lambda: _hbm_buffer( + ctx, + C, + A_sharded, + B, + config=config, + workspace=workspace, + num_fetch_sms=hbm.get("num_fetch_sms", 16), + k_per_flag=hbm.get("k_per_flag", 8), + fetch_block_m=hbm.get("fetch_block_m"), + fetch_block_k=hbm.get("fetch_block_k"), + num_warps=hbm.get("num_warps", 8), + num_stages=hbm.get("num_stages", 2), + num_fetch_stages=hbm.get("num_fetch_stages"), + first_stage_fetch_sms=hbm.get("first_stage_fetch_sms"), + ), + preamble_fn=lambda: (C.zero_(), workspace.locks.zero_()), + ) if __name__ == "__main__": From 2528e8e58272c67113a198539cc6c0668f66bc08 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:47:36 +0000 Subject: [PATCH 54/60] Add docs/benchmark-results/ to .gitignore Agent-Logs-Url: https://github.com/ROCm/iris/sessions/e61a7d71-7247-4dfb-907e-37befa0bbf63 Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- .github/agents/skills/accordo/SKILL.md | 68 +++++++++ .github/agents/skills/kerncap/SKILL.md | 131 ++++++++++++++++++ .github/agents/skills/linex/SKILL.md | 98 +++++++++++++ .github/agents/skills/metrix/SKILL.md | 76 ++++++++++ .github/agents/skills/nexus/SKILL.md | 74 ++++++++++ .gitignore | 2 +- .../mi300x_autoconfig_bar.png | Bin 0 -> 85774 bytes 7 files changed, 448 insertions(+), 1 deletion(-) create mode 100644 .github/agents/skills/accordo/SKILL.md create mode 100644 .github/agents/skills/kerncap/SKILL.md create mode 100644 .github/agents/skills/linex/SKILL.md create mode 100644 .github/agents/skills/metrix/SKILL.md create mode 100644 .github/agents/skills/nexus/SKILL.md create mode 100644 docs/benchmark-results/mi300x_autoconfig_bar.png diff --git a/.github/agents/skills/accordo/SKILL.md b/.github/agents/skills/accordo/SKILL.md new file mode 100644 index 000000000..1917a5d24 --- /dev/null +++ b/.github/agents/skills/accordo/SKILL.md @@ -0,0 +1,68 @@ +--- +name: accordo-validation +description: Validate GPU kernel correctness by comparing reference and optimized outputs. Use when verifying that an optimized or modified kernel matches a reference implementation. +--- + +# Accordo: GPU Kernel Validation + +Capture and compare kernel outputs from reference and optimized binaries to validate correctness. Uses kernelDB for automatic kernel extraction; supports configurable tolerance and execution-time comparison. + +## When to Use + +- User has a reference and an optimized (or modified) GPU kernel and wants to check they produce the same results +- Regression testing after kernel or build changes +- Validating multiple optimization variants against one baseline + +## Instructions + +1. **Require two or more binaries:** one reference (e.g. `./app_ref`) and one or more to validate (e.g. `./app_opt`). All must expose the same kernel by name. +2. **Ensure binaries are built with debug symbols** (`-g`) so kernel arguments can be extracted. +3. **Choose execution path:** + - If an Accordo MCP server is available, call its `validate_kernel_correctness` tool, which performs capture-and-compare with the same semantics described below. + - Otherwise use the Python API or the `accordo validate` CLI (`accordo validate --help` for flags: `--kernel-name`, `--ref-binary`, `--opt-binary`, `--tolerance`, `--timeout`, `--working-dir`, `--kernel-args`, `--log-level`). + +### Python API + +```python +from accordo import Accordo + +# Validator for the kernel to validate (binary used to extract signature) +validator = Accordo(binary="./app_ref", kernel_name="reduce_sum") + +# Optional: set working directory if binaries expect it +validator = Accordo(binary="./app_ref", kernel_name="reduce_sum", working_directory="./run") + +# Capture snapshots +ref = validator.capture_snapshot(binary="./app_ref") +opt = validator.capture_snapshot(binary="./app_opt") + +# Compare with tolerance (default 1e-6) +result = validator.compare_snapshots(ref, opt, tolerance=1e-6) + +if result.is_valid: + print("PASS:", result.num_arrays_validated, "arrays matched") +else: + print(result.summary()) +``` + +For multiple optimizations, capture the reference once and compare each optimized snapshot against it. + +### Snapshot and result attributes + +- **Snapshot:** `arrays`, `execution_time_ms`, `grid_size`, `block_size` +- **ValidationResult:** `is_valid`, `num_arrays_validated`, `num_mismatches`, `mismatches`, `success_rate`; use `summary()` for a human-readable report. + +## Workflow + +1. Build reference and optimized binaries with the same kernel name and `-g`. +2. Create an `Accordo(binary=ref_binary, kernel_name="...")` validator; set `working_directory` if needed. +3. Capture reference snapshot with `capture_snapshot(binary=ref_binary)`. +4. For each variant, capture with `capture_snapshot(binary=opt_binary)` and compare with `compare_snapshots(ref, opt, tolerance=...)`. +5. If `result.is_valid` is false, use `result.summary()` and `result.mismatches` to diagnose. +6. Use relative paths for binaries and working directory so the skill is portable. + +## Notes + +- kernelDB is used automatically; no separate kernelDB setup is required when using the Python API. +- Increase `tolerance` for floating-point comparisons when appropriate (e.g. 1e-4 or 1e-5 for single precision). +- Use `timeout_seconds` in `capture_snapshot` if the run may hang. diff --git a/.github/agents/skills/kerncap/SKILL.md b/.github/agents/skills/kerncap/SKILL.md new file mode 100644 index 000000000..f7d007143 --- /dev/null +++ b/.github/agents/skills/kerncap/SKILL.md @@ -0,0 +1,131 @@ +--- +name: test-kerncap +description: Test local kerncap changes end-to-end by profiling an application, extracting a kernel, and validating the reproducer. Use when the user asks to test kerncap against any HIP or Triton workload, or wants to validate extraction on a real GPU application. +--- + +# Test kerncap Against an Application + +Test local kerncap changes end-to-end by extracting and validating a kernel from any application. + +## Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `app_cmd` | **Yes** | Full command to run the application (binary + arguments), e.g. `$WORK/dev/llama.cpp/build/bin/llama-bench -m model.gguf -p 512 -n 32` | +| `conda_env` | No | Conda environment to activate before running commands (e.g. `llama_cpp`). If not provided, use the current environment. | +| `kernel_name` | No | Name of the kernel to extract (e.g. `mul_mat_q`). If not provided, profile the application first and select the top kernel by execution time. | + +## Paths + +| Item | Path | +|------|------| +| kerncap source | `kerncap/` (relative to IntelliKit repo root) | +| Output directory | `/tmp/kerncap-test/` | + +## Environment Setup + +If `conda_env` is provided, activate it before any other step: + +```bash +conda activate +``` + +If already in a different environment, switch explicitly. Do not assume the current shell environment is correct. + +If `conda_env` is not provided, proceed with the current environment as-is. + +## Workflow + +### Step 1: Reinstall kerncap + +Ensure the correct environment is active (if applicable), then uninstall and reinstall to pick up local changes: + +```bash +pip uninstall kerncap -y && pip install kerncap/ +``` + +### Step 2: Profile to identify target kernel + +**If `kernel_name` was provided**: Skip this step and proceed to Step 3. + +**If `kernel_name` was not provided**: Run profiling to discover the top bottleneck kernel: + +```bash +kerncap profile -- +``` + +Select the kernel with the highest total execution time from the profile output. Use its name as `kernel_name` for all subsequent steps. Tell the user which kernel was selected and why. + +**Important**: Use a sufficiently long substring from the profile output as `kernel_name` so that `kerncap extract` matches the intended kernel, not a different instantiation. For example, templated kernels like `mul_mat_q` have many instantiations differing only by template parameters; passing just `mul_mat_q` will capture the first dispatch that matches, which may not be the top-ranked one. Prefer including template parameters in the substring (e.g. `mul_mat_q<(ggml_type)39` instead of `mul_mat_q`). + +### Step 3: Extract the kernel + +```bash +kerncap extract --help +``` + +Use the help output to construct the appropriate `kerncap extract` command for the application. Key flags to determine: + +- `--cmd` — the application command (`app_cmd`) +- `--source-dir` — where the kernel source lives (ask the user if unclear) +- `--output` — `/tmp/kerncap-test/` +- `--language` — `hip` or `triton` depending on the workload +- Any additional flags (`-D` defines, `--dispatch`, etc.) + +**If extraction fails or produces errors**: Stop here and report the full error output. This indicates the local kerncap changes have a bug that needs fixing. + +**If extraction succeeds**: Inspect the output directory for expected files (metadata.json, argument dumps, source files). If the output looks reasonable, proceed to compile and run. + +### Step 4: Compile and run the reproducer + +Navigate to the output directory and build/run the reproducer: + +```bash +cd /tmp/kerncap-test/ +make run +``` + +**If `make run` fails**: Stop here and report the full compiler or runtime error output. This is the primary signal that kerncap generated an incorrect reproducer. + +**If `make run` succeeds**: Proceed to validation. + +### Step 5: Validate the reproducer + +**5a. Smoke test** — confirm baseline replay works: + +```bash +kerncap validate /tmp/kerncap-test/ +``` + +This is a smoke test only (VA-faithful captures). It confirms the replay runs without crashing but does not check numerical correctness. + +**5b. Recompile** — build a baseline HSACO from the unmodified kernel source: + +```bash +cd /tmp/kerncap-test/ +make recompile +``` + +This confirms the VFS-overlay recompile pipeline works. It produces `optimized.hsaco` from the unmodified `kernel_variant.cpp`. + +**If `make recompile` fails**: Stop here and report the error. This indicates an issue with the source finder or VFS overlay generation. + +**5c. Correctness validation** — compare recompiled HSACO against captured baseline: + +```bash +kerncap validate /tmp/kerncap-test/ --hsaco /tmp/kerncap-test//optimized.hsaco +``` + +This runs replay twice (captured HSACO vs recompiled HSACO) and compares outputs byte-for-byte. Since the kernel source is unmodified, they should match exactly. A failure here indicates a recompilation fidelity issue. + +### Step 6: Report results + +Summarize: +- Whether reinstall succeeded +- Whether profiling identified a kernel (if applicable, and which one) +- Whether extraction completed (and any warnings) +- Whether `make run` compiled and executed successfully +- Whether smoke test passed (Step 5a) +- Whether recompile succeeded (Step 5b) +- Whether correctness validation passed (Step 5c) +- Any errors or warnings encountered at each step diff --git a/.github/agents/skills/linex/SKILL.md b/.github/agents/skills/linex/SKILL.md new file mode 100644 index 000000000..dca5b7d6d --- /dev/null +++ b/.github/agents/skills/linex/SKILL.md @@ -0,0 +1,98 @@ +--- +name: linex-profiling +description: Profile GPU kernels at source-line granularity with cycle-level timing and stall analysis. Use when identifying performance hotspots at the source code level or analyzing instruction-level metrics mapped to source lines. +--- + +# Linex: Source-Level GPU Performance Profiling + +Map GPU performance metrics to your source code lines. Get cycle-level timing, stall analysis, and instruction-level metrics for each line of source code. + +## When to Use + +- User asks to profile a GPU application at source-line granularity +- Need to identify which specific lines of code are performance bottlenecks +- Analyzing stall patterns and execution bottlenecks at the source level +- Understanding cycle-level timing for each line of code +- Instruction-level analysis mapped to source lines + +## Instructions + +1. **Ensure the target runs on AMD ROCm 7.0+** with `rocprofv3` available. +2. **Kernels must be compiled with `-g`** (debug symbols) for source mapping. +3. **Choose execution path:** + - If a Linex MCP server is available, use its MCP tools: + - `profile_application` to run and profile a target application with the options below. + - `analyze_instruction_hotspots` to perform instruction-level hotspot analysis on collected profiles. + - Otherwise use the Python API from the environment where Linex is installed. + +### Python API + +```python +from linex import Linex + +profiler = Linex( + target_cu=0, # Target compute unit + shader_engine_mask="0xFFFFFFFF", # All shader engines + activity=10, # Activity counter polling +) + +profiler.profile("./my_app", kernel_filter="my_kernel") + +# Show hotspots (sorted by total_cycles) +for line in profiler.source_lines[:5]: + print(f"{line.file}:{line.line_number}") + print(f" {line.total_cycles:,} cycles ({line.stall_percent:.1f}% stalled)") + print(f" Executed {line.execution_count} times") + +# Find memory-bound lines +memory_bound = [ + l for l in profiler.source_lines + if l.stall_percent > 50 +] + +# Instruction-level analysis +for line in profiler.source_lines[:1]: + for inst in line.instructions: + print(f"{inst.isa}: {inst.latency_cycles} cycles") +``` + +### SourceLine Properties + +- `file` - Source file path +- `line_number` - Line number +- `total_cycles` - Sum of all instruction cycles +- `stall_cycles` - Cycles spent waiting +- `idle_cycles` - Cycles slot was idle +- `execution_count` - Total executions +- `instructions` - List of ISA instructions +- `stall_percent` - Convenience: stall_cycles / total_cycles * 100 + +### InstructionData Properties + +- `isa` - ISA instruction text +- `latency_cycles` - Total cycles for this instruction +- `stall_cycles` - Cycles spent waiting +- `idle_cycles` - Cycles slot was idle +- `execution_count` - How many times it ran +- `instruction_address` - Virtual address in GPU memory +- `file` - Parsed from source_location +- `line` - Parsed from source_location +- `stall_percent` - Convenience: stall_cycles / latency_cycles * 100 + +## Workflow + +1. Ensure the target binary is built with `-g` (debug symbols) for source mapping. +2. Create a `Linex()` profiler; optionally set `target_cu`, `shader_engine_mask`, or `activity`. +3. Call `profiler.profile(command, kernel_filter=...)` to run profiling. +4. Access `profiler.source_lines` (sorted by total_cycles) to find hotspots. +5. Use `line.stall_percent` to identify memory-bound or dependency-bound lines. +6. Drill down into `line.instructions` for instruction-level analysis. +7. Use relative paths for the target binary so the skill is portable. + +## Notes + +- Requires ROCm 7.0+ with `rocprofv3` support. +- Source mapping requires kernels compiled with `-g` (debug symbols). +- `source_lines` are automatically sorted by `total_cycles` (descending). +- Use `kernel_filter` to profile specific kernels by name (regex pattern). +- For Triton or other frameworks, ensure debug symbols are available in the compiled output. diff --git a/.github/agents/skills/metrix/SKILL.md b/.github/agents/skills/metrix/SKILL.md new file mode 100644 index 000000000..969ef6eef --- /dev/null +++ b/.github/agents/skills/metrix/SKILL.md @@ -0,0 +1,76 @@ +--- +name: metrix-profiling +description: Profile GPU kernels when performance analysis or optimization is required. Use for AMD ROCm GPU metrics, bandwidth, cache hit rates, coalescing, or kernel timing. +--- + +# Metrix: GPU Profiling + +Profile AMD GPU kernels and get human-readable metrics (bandwidth, cache, coalescing, FLOPS). Architecture is auto-detected. + +## When to Use + +- User asks to profile a GPU application or kernel +- Performance analysis, optimization, or bottleneck investigation +- Need HBM/L2/L1 bandwidth, hit rates, or compute metrics +- Need timing-only runs (fast, no hardware counters) + +## Instructions + +1. **Ensure the target runs on AMD ROCm** (e.g. `hipcc`-built binary or Python script that launches HIP/ROCm kernels). +2. **Choose execution path:** + - If a Metrix MCP server is available, use its profile tool with the same options below. + - Otherwise run the CLI or Python API from the environment where Metrix is installed. + +### CLI + +From the project or install prefix: + +```bash +# Profile with all metrics (auto-detected arch) +metrix ./my_app + +# Time only (fast, no counters) +metrix --time-only -n 10 ./my_app + +# Filter kernels by name +metrix --kernel matmul ./my_app + +# Specific metrics +metrix --metrics memory.l2_hit_rate,memory.coalescing_efficiency,compute.total_flops ./my_app + +# Save to JSON/CSV +metrix -o results.json ./my_app +``` + +Options: `--profile`/`-p` (run `metrix list profiles` for names: `quick`, `memory`, `memory_bandwidth`, `memory_cache`, `compute`), `--metrics`/`-m`, `--time-only`, `--kernel`/`-k` (regular expression), `--num-replays`/`-n`, `--output`/`-o`, `--top`, `--aggregate`, `--timeout`, `--no-counters`, `--log`/`-l`, `--quiet`/`-q`. Discovery: `metrix list `, `metrix info `. Note: `metrix list counters` and `metrix info counter ` are not implemented yet (CLI reports “not yet implemented”). + +### Python API + +```python +from metrix import Metrix + +profiler = Metrix() +results = profiler.profile("./my_app", num_replays=5) + +for kernel in results.kernels: + print(kernel.name, kernel.duration_us.avg) + for metric, stats in kernel.metrics.items(): + print(f" {metric}: {stats.avg}") +``` + +Use `metrics=[...]` for a subset; omit for all metrics. Use `cwd` when the binary expects a specific working directory. + +## Workflow + +1. Identify the executable or script to profile (e.g. `./app` or `python run_kernels.py`). +2. If only timing is needed, use `--time-only` for speed. +3. If full metrics are needed, run `metrix ./app` (or MCP equivalent); optionally restrict with `--kernel` or `--metrics`. +4. Interpret results: low L2 hit rate, low coalescing, or low HBM utilization suggest optimization targets. +5. For automation or tooling, use `-o results.json` and parse the JSON output. + +## Key Metrics (reference) + +- **Memory:** `memory.hbm_bandwidth_utilization`, `memory.l2_hit_rate`, `memory.l1_hit_rate`, `memory.coalescing_efficiency`, `memory.global_load_efficiency`, `memory.lds_bank_conflicts`, `memory.atomic_latency` +- **Compute:** `compute.total_flops`, `compute.hbm_gflops`, `compute.hbm_arithmetic_intensity`, `compute.l2_arithmetic_intensity`, `compute.l1_arithmetic_intensity` + +Use relative paths for the target binary and output files so the skill is portable across environments. diff --git a/.github/agents/skills/nexus/SKILL.md b/.github/agents/skills/nexus/SKILL.md new file mode 100644 index 000000000..ad714bc4d --- /dev/null +++ b/.github/agents/skills/nexus/SKILL.md @@ -0,0 +1,74 @@ +--- +name: nexus-trace +description: Extract GPU kernel assembly and HIP source from HSA packet traces. Use when analyzing what code ran on the GPU, debugging kernel dispatch, or inspecting assembly and source mapping. +--- + +# Nexus: HSA Packet Source Code Extractor + +Intercepts HSA packets from a running process and extracts, per kernel, assembly and HIP source into a structured trace (e.g. JSON). Use for kernel-level inspection and assembly/source correlation. + +## When to Use + +- User needs to see which kernels ran and their assembly or HIP source +- Debugging or analyzing GPU dispatch and code generation +- Inspecting assembly-to-source mapping for a HIP (or ROCm) application + +## Instructions + +1. **Ensure the target runs on AMD ROCm** and uses HSA (e.g. HIP application or ROCm runtime). +2. **Choose execution path:** + - If a Nexus MCP server is available, use its tools: `list_kernels` to enumerate kernels in a trace, and `extract_kernel_code` to get assembly and HIP/source mapping (signature, files, lines). See `nexus/nexus/mcp/server.py` for tool parameters and schemas. + - Otherwise use the Python API from the environment where Nexus is installed. + +### Python API (recommended when no MCP) + +```python +from nexus import Nexus + +nexus = Nexus(log_level=1) +trace = nexus.run(["python", "my_gpu_script.py"]) + +# Or run a binary: +# trace = nexus.run(["./my_hip_app"]) + +for kernel in trace: + print(kernel.name, len(kernel.assembly), "instructions") + for i, asm_line in enumerate(kernel.assembly, 1): + print(f" {i}. {asm_line}") + for line_no, hip_line in zip(kernel.lines or range(1, len(kernel.hip)+1), kernel.hip): + print(f" {line_no}: {hip_line}") + +# Access by kernel name +k = trace["vector_add(float const*, float const*, float*, int)"] +print(k.assembly, k.hip, k.signature, k.files, k.lines) + +# Save/load trace +trace.save("trace.json") +loaded = Nexus.load("trace.json") +``` + +Set `log_level` (0–4) to control verbosity. Use relative paths for the run command and output file so the skill is portable. + +### Environment-based usage (no Python API) + +When the process cannot be launched via `nexus.run()`: + +1. Set `HSA_TOOLS_LIB` to the Nexus shared library path (e.g. `build/lib/libnexus.so` or the installed path). +2. Set `NEXUS_OUTPUT_FILE` to the output JSON path. +3. Set `NEXUS_LOG_LEVEL` (0–4) if needed. +4. Run the application as usual; it will be traced and the output file will contain the kernel data. + +Optional: `NEXUS_EXTRA_SEARCH_PREFIX` (colon-separated) for HIP source search; `TRITON_DISABLE_LINE_INFO=0` for Triton kernel line info. + +## Workflow + +1. Identify the command that runs the GPU workload (e.g. `python script.py` or `./app`). +2. If using the Python API: create `Nexus(log_level=...)`, call `nexus.run([...])`, then iterate `trace` and optionally `trace.save(...)`. +3. If using the env method: set `HSA_TOOLS_LIB` and `NEXUS_OUTPUT_FILE`, then run the app; open the JSON and parse the `kernels` structure. +4. Use kernel `signature`, `assembly`, `hip`, `files`, and `lines` to analyze what ran and map assembly back to source. +5. Use relative paths for commands and output files. + +## Notes + +- Nexus is intended for research/analysis; ensure the target environment has the Nexus library and compatible ROCm/HSA stack. +- For Triton kernels, enable line info via `TRITON_DISABLE_LINE_INFO=0` when using the Python API. diff --git a/.gitignore b/.gitignore index 0c1773ddf..8bab3e791 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,4 @@ logs/ hsakmt_counters.csv core .intellikit/ -.github/agents/ \ No newline at end of file +.github/agents/docs/benchmark-results/ diff --git a/docs/benchmark-results/mi300x_autoconfig_bar.png b/docs/benchmark-results/mi300x_autoconfig_bar.png new file mode 100644 index 0000000000000000000000000000000000000000..79acf7e65bf5c286b9144f17d4f4503ea7347227 GIT binary patch literal 85774 zcmeFZc{G>p`vv--B$+Brk|7N!V@b$NBxNi^B9+W!N@huDK)o4DBos1~F*2p1%*l|s z5|Tv54C(B<_xt;v-&$v_^Vd0loVAYC`Td3AK^-x7J#XjeYG-|#=YoZ^i?yS} z9$_(2;r+XLtXy5syGVEWl;Ouz?;^*2sw?SAU6`ZRju3GFk??^KemZWnuJsC{Pa zX(7Xm>4Vb-Yk+y3_#1Xce3FZ_RMJ?7s`eQRAM9?P^@(qZ@4hfQ9ti@%)mY+4yr zud!cBOyoT)zNs_c)}X0gP@C}zt*NQ0=)Qebqg{ooiltPhZh6g3Y(E&rd9^v^@XMAK zcDxM!WSf=KQ^R9TO-;1+mgbc{EGjB0>I=MBmcF)JyAJU;XT_PeNbl`y`aVAU->2MVBbT+$?oEN_lAo_S+L`~eR-~LqNJyw_;@!J<8)Yu+u89_5b#ZaozkmOI^Or$}1$LVP**I$L zdP|Htf83LC|CZtRXX&?YZ~31knXVZt=TDW(Gfot;MD8*;7%hCVr8#oJXvggry}eul z0s_(_UNhe}-3Sd$5ict%v++<;R(9*X-<5B>p(=Fq3VC_?2;q|)`}XY<6%$i@_dxaM z>kVc+`}c3$v}u!Z*ZJvh$Livw)(RQtDLg&>`qb&uw6?ak`dKEM_|(E);X+&;E@x+F z*9>XBe|+)hXO3Dy`RDQR@gd>i?M3lFdY&ueDq36DiHV6xPY&kqv23{S)L-@Cxx2{e zGiN@1{77xsu;D{j*HDLa7`J>_*2(7&@FJoT5?69__uP@N4VoTlA4)IdRSr^L=zITO z&$g^OoV!0FxC?z;U})Mwr`|G=DEPfeP;tf|!W{IsoRg4~w(kF=s`4Y{w~ z-l=u^%bQ!N$1^#uu?x_iIdca0lw{MIzVX0CQ5J5wjOk)y?Jv$!clKHb^!Y9@vvM8q zzj}40iHXU_&!1`OE(esmJs6!oclPYEojZ5#wfn&4GSr}Y`t)f<$w)b+l`B`qh+8mf z9yuar-JBfY(BL)u^&Q_bdin>(`8H1q3WAGxM>Uz~JD=1C!(9Y*b)* zdAV_CoRo`x&E1$71{oQdNQ3~iYR3=C8s=H=zJ z8OClGbTd40V%45Kdm=3)Y+BeF<7M@BJwI2^5$R0@W@cvASk}CHrLxV$!y4abZX;2q1MzY`g2n}QX}ZUE++KO& z#0hh2>$IHirY7yFA3Y~Vmg*0G`t(U~WS^)gn~si-bZtrLmiG^j=+)jfYtM|U8U0z4 znX#}(q%&ayd7V3wj#(GtMn^}FH3-y?_cBIX@8V2 ze5{~}9J_Vt(j{c5V{@h+9v)YBYQgEPW|Z7 zqfhekUVVBQ>^9STqSRC3i*r!}mr_TX;nS_>>S8Y4yvg>es%oo*#JX(LQYB|+@dEp< zAmnL2F|pOMo>K}JFFrG#!x|z$=@Dq>U0hx!DhJ<;i3v$g-e&jV2~AsD+cWPuafDoo z({IP#62+0W%+;GWUuRq=_;GP=5=lqx^y#g>e}20o@Ag+*qT4886HzsRI1IRcebs}c zBr&@WqM;rKZ*07TTd8>acIEXAA`Pv+(k_EcVOwPA4GawO9Q#ZyO$UB5u&}hnIu6uC z3eLGC={-97Yb3jzZTxXtD1}0Yo{t|t-hAXHVwAHC zPc`aW__b@5j}5ZfP*(63mCem-x(XfJp9h=j>aL+@;bh?B<7-JjcG%Li$b^M=8OcG% z9v}bov%gwhQ&UAriMFz`GTEo}^lQdY*=qkf(?GfvEX%0H#mPobhw0!k6quK5n}u83JR@x<+SqD8H__Aw9Uz7pKz=l z{W%|jjFfad(|G^IaUSv(u3xp|WWCl~TU%d5h1b>7bNaL7v)k|qh1~xN8O7=M{B(dr zfr>|brfv$Ox3~A+!6LVj^$tD7SESv(l{7v~ySQMy0c#W!vyl`u6msk}BDoW8-US66XKYn~D^(u5Ny>b2eON5yD*|UL9o`^DS5?491 z<({-#!0p>>m6Vjuf2zDPTe2MKbu9HhUj+W1GS``THI96WxS;j_^(mM;Z&1V_D~{rM zsFe^u!WGz2liyf#<3PSQc^`lRm0Gu2`1kg_W) zDijWQ|I$9wivp!l$~-?ce4D&;?0%;Q`b(p|N{FR32;(;6nbyY!m94GED7GVa_P&cz z>$t3~*SOUuADe+%^ea{jS(hR)R=$3%`fWfU zy0&#<5v7*aUsPOt&FolTV?hHhX(e^KIr&htBzCjA54&Pc}Vfh&y4Q6*X26RQ#Uel{OFg#i=~fQ z(^i;kXk0^;zC($7-3ks0`c{_^A20my$lVj>B05qXSd;XvZPSvz3({6c2cJ|xfeKJ!yF z6aXm`KqDnSB%v?o)LrW3?)t6e&}@~>>J1x$kKElyLwQbpy=CvaYu7H(|ReH%kU7CH~sxX`k}&p~eu0pPwID@$soC6<622Gv7Os^1E{^YLV0F zd2%gN3rw|*q@<-!x27Kh{H(uQHIjzV=04U zDXR#1(}1sT!}d~(+V#KY=PO^lpaJY7D0lsCL+XlBVs0ozr4Mzlc=;_H`*$Xkdv{!RD;4`o9nQh-fXAy3GMd{L)FK0tNA{HD_jf~5cQ3$w`1H!b| ztX};tc)P6@(-kFNjcDOaH-~J0e~Lz7+4ALSQ{uHq%1DgS-{SXJhG`g|KK%fF3mf&S zQT|Uxei$GSBSn(4#T*texmeP^GdX{8gPAuk6;kXrVpBi)t#y}9hM^dZLY{rsaR%O3 z24+6nDF#W&Lial|4tVa(-O9y-jJ2J$gXlkL{rQijM`-i23(g^5UCGbSCvOJ0rJH?s z@vGu~hwl9}XB#ffAP_mDx%ZsAkvDqhiz=%UZ{UX#kKDX_T?O_)eXA?0st!_Ym+6^3 z9~Eoelir?@nVB*VAe=JiU}tCd3ORMJuU~|Vrht4u5VfF-7QiwiZy*CB;~IrQ?@;up zx<-X2!qGE2udl5;7Vq0AT}ax9{DShM_oQjx#L?;8jfh~^d3suv;O$=Jj%l8j;E7yT zg*bUXQ`<6b-(QLpo7=aRx6{RP>@{;Yu&H(m={2ynK0LBHb3jl(m1oP);==`IR#vGs-DTLzf|EL(U0oxs{rZ~+4&1k>jXL(c_sgp!wQv<+#gq%1 zHgA4hxBMK6!Fpkn)TbHF^1o-g96#qa%H6K9ymvx#$L*`Q%f}I$rJR$`J2~B&34Z$Y zX-!$({Bz~@{BZ!kW|NmxU|L$5D>GVV9jWMnLF4q@k4KCpPgPLE7j3pi*S>ypi!Y^k z{7-3aZo^xnGH(w&jl|=}J>+I*XQMq2Z`!yqwSH62BQ5UCQDE@=8qU`{cI>Ep_fE&6 zVW==$wPpZ;yVjy6Vl@>Q8yoxN$rJ6gA*5QHk=pq!UBzyXTaItuygAa|;lqmpm(;dLvyp4M&SmRtg+CMxmddUPGyz@ zChw6G1Z&YFr{*L8`L)U&9-DuDl-tN}g+=@2Cc*;90tGf(dX4p#&b1hC-TI)Un&QPO zq`mP>Iu`P$$JI~fN%YN|JU|zL=wHy5)lE|7#>RKh->=zakj0KSp!Gk0{ydwAX^Hk* zcW@`xrNv9UW_eb(B1CNO3y+K^NIG;k883A?mPa}W4(B-b$)%OCY>`$Ub06!?Y`F&Z;H+GRlf*?io%k4j|oaPMh0a0qMfTHt#`QyCW#e&a@jLy1AU{vospN*Xd3 zzQ^#Jm6f=L`T&`jcEz?-CASG+-8+DY8(?4wWFl8botFq z89fE|cc(-f>+edcBFP_XnDp2}nu>L#f1;I@U;33)B^_~z1%%yGt9M(6XSES53@cZz z1}xs-Kgjzia>xa=fjMa|e@`0>$8oM*o3w_19_?RiTdCKq!|<#EQsFD~R4H!B4ua?BsGkk* z#_nZQt8TjgeqYNJprs%cl78%QdWrz|nErrxjW5~7ruYs9jNu>Ba77ReDX!`+~m;VxtvvUKoZ(1qck)$(vu@epi61}H;SDO zZ18sMQ%Mjz6IAuF8y%A{6_Rh;&d58lL*e55lyzuz19+EZplyB=t(WRxOHF|$gzD}d)d1i|2+jsoVq_zj2^z)*;oFt~>F|n~%ksi}i$}j#@ zO4+IHKpUuZcBaJ1)sk9#kGF} z_fh%gjXDUb)VlY-vS=z-uyU!FN`ZUM+%pEeoxG1r%6Xf?$O0Rh;Z;brXT@WUMdIn1 zIT)zZZ5c*31IS6+?Ee4;OqAa9Sk1%!+3!U@FsAz4=l=c_DgDUFsKIRtF$V2IK7|63 z)gz<7Zza%F5;~P;{gD?nsj2+KGIHL(&Sy?rx%?V8Puo&stKo1~w$Jt(a}+wGwY7)OP#MU>y7A}g?^u7Q zdA!KnR_^Cl+Lf97-99bNuB|s11n)cJ;D=-Lc;LxrrqPqyGI@@isfiFUF4xmA$TGPl z*N6JrJQ2*w&B&Ac$UU4(cG=Aiy*;}IoEqmh^GQn^cgCXu|B_l-Gv?u7+F90RkZl@C z8>Mir(a@sN0%7|u&sr~3wz==&fwly{C6BNIhyLzBHg$Ta z^4yCFjayq)3FV6lYi$%Mcx)U6A`t|%&0Ig8*DTPr@ksrhsQkL;QSD#AI7Ri|y?eL6 z-OR5w){I*#{s7I*B0uNKzCLd3&ks2k*IRyoi0S|JvtMv>VPH853PPy@%?~Pm^TdnG zE6#N0Sp%6j>iD?-=)N`RIQ8@AtwCAOyV=kd-K+LQJ02M2p5r$y zxfnL$ozR}y&6Pc?%D_8%8GE7v7+Xk-WHXo1Am+`^B`iMjUL76JocO5w=t!)3ONxw( zZ{1pr+)K~n68{Wn$U5T@8roFt#DLC;t*Y15C)v@ZNVJc(fQI@52JPJwbLD(jaMj)m zqlF$hSzxAB)YJ}VXOtg;{_%5aSTA&PAk(C1jdKyF(1`*Sv}O`*_V)InXjy0|7S8?m z#k8)j*`C}u)74pEze>U9Pf%bW&w&G+l&E9xfyh3Fd5*nSY;pJQwML;n9iDXhCY=Q= z+P1&l2Bc^5?AjfgP5XD|ifcK@N!EDm(~8@lC=q_?5=~V&ceeXIWM#FNLCjF4_*}m> z(IEMNK(`O;qW?O(kb|f^@9G-JMho7(*?C|AQc)758_FNLkNPl5e?-{xgoK2=?#@o* zVpGd|blTiCf7UurHec|an3<`1m!$SC!)Ol`>%v^-F?rB40fZpB`?Fp9AaPLKvyN$Pt27}EBV(2CC2^&~mgef2D3K%LyP%{$r zqH^1m7`!LzC7b&Qd-wdps7(9g60uwF?`DIrR70A&84mX1l3NaQ$vVT!g+A^~opq@&+7!3smLj@4sX}Ebie)$s` z=9oQ}EIW?IF+f*JtLv&ZN9KR#JithHnDEKsFK;hE>begi<=MrbTY+kxUKlkd7iqZ( zX=<20O-^z`P6bLSa``d}H=!m}iXIxx^-C~O=NEtdG|B8oAr*YFd|(%vej!IlV-oah zbLnIAQbrcW6(Rf(9tN1EzqizjT5`|SG4*op_c1B#t^Vn002Y-nvebR+_YaLH1srcw zS5;|9PW2T6c2Z3}N1;n?%$(T zd2_%FEcY&huHRYJ`RE}zqIYrsLnB^z#2_B2+gj6a{38uwI2bUv5{fM9(apDK@#oh> ztq2W;{NLPJAZ}4(#pG*4YSUiFUQSRERZWS?Pdz5jgY(95Ol@FEyoF5uMG8g{Joa^u-#o}Zr_Y@Mbwq^r~Pk2V##SmXNvx+ zAH2#luh}fu$y88m9vr7U?_R!4hcpV@c*xMm$Q)qGu;k)X4K8T4!#=vY|Fjk7JhTv+ z%o^yMSr{wl+qMn*A${}OA+$$h-Y<`NB#B5$Udowi(&1+}Bu(SQA_z@?tj9-S+-it{ zgwV&n_%@yVYjUXZUGkx|-Po+c!ot7s11_!C87S}?p&+j5-gtY;+iFD@z8!G>wXndq zefx5dqz943#-W-5sEHbk(2F}BuNiQrnl^nDJf2~g)&8@f$Ym(Kr4S;-aN30nKfhiT`r;MBQXss*)rU?LWZKu>QW^g`mlklJPJrp| zEMH2)N7|lqX3q{_7d3mG9E!~t`jBB!HaB?J?((a7-Q32AFgA)C#4+`nx6qzkH3B7{b68~E5xxJ0YYQ-=Sale z-wQ}vRFg;9+lJ>{2KEDy7SnH&(w3l4Ch4YV{2GWhJCC$c;xS2&WkDMm1kKg8UjK)| zktt>t+KL#lGdKVFCxGNOXy{Gz;}K<3zkSjKsF?f}=4rYqA3uG%gk-BxeIhnKfA)Se z_)+bAn`0#xf6~e?j?;Z>%Up$k`t|GADUfa+T(8;((a3JKBn!CpQNop6`n(sDm5p5SNgMboeNIqCk^TwDj;;SR53S zo1RIF)9q%0)<%w1XkZL&w-~+8iF#}80nmilwytx5HA6E-lty8YInKVTAklTWnJ8JP z3A}+##(ozfBv0kbmS)w023wA*7$@zY+qP}nDe!dC zlX8AboZyR*9wa{k0inWKaS96Wj8Lvw$?HmQV_ARa3byNu4<9x{XRAWyqM@+wB0X1@ zuCkXEp6JBJ(3h)42CgrFwB&LguzzZ_YXih;ew_!)2%)-Z&$*9~VRgf|01QVaehOmc z4g;U7d1voRG_&gJ>e?OxUW<&p$ri;9AF(wEC3|%R)Vqx_H)%(y`wn@=HNz*o<>k`*! z*3IiSM{yw$&w56*|AWT%45(GgpG(%WL@zsL)~Ux6jsH_fmPZN{KW8MA)<%nLP}w8D zacty3_7T7>?WBXuR0TQ>{pJYP$2Yj;O-E+;7507l&OYV)Rs<~8*RDbl!~r3t1A~IL z3J4sU?>*o(qsifWe8Ygmaqd#jY1*4}U0+6n2c4Ds`l4%iRUkiv0%dCAIyA!It@Z5K#4(6B<_a&~rncy*TxCwIKAj5%UMm%5z^zof)K4fLk@Rw4lwLwDqvK z1ISn-s)1Z}yf*2vciZMfBLQ|s@)*&Tr$WK@{q<>up`jtc^2nWF2Rt5e9j_@ecb-(q z2Vu82JAPtE^#bD$+E`h+h=s+`{^(X@%1=W>!C(Od_NUtW4do+zw(|2cP|z>V>Svp7 zfvcv%pN5V)+i)^@mw!emV6JPwR=ajeQQC>yNLp$2HBvHuOLLZ`4mlE5P1K7QFVKEV zZ{_22n)z--q6ZbGS=$###Xum5>Dke^{i`98+lUN|1F7C>&VhDkZBm{y^bsMS{P9$K z*2(GK8F19YX}{y@S|Qz~wbxH{m*myoO*>>~w;K(1qWcn1YUJGKYwLvsm%n!(?M!i( zbP^-_ZJ*(b(4OQ_cP!^NI){6m#Bp&7EbU74L?B!p~Zi7>W# z+XWYw+YPNi!dh9}K{{Cm1~PiOy2RGmfkDB z?o09Hdjtf;s}l)+>c*0tei#}Vj7v-klLu@)6osSF zRB(NG=9I+F`5o>sT!WH{Fe+m%z@TL4hI!^HF#s%gt=}|OO zBf(RzhR5QRl~o8TkBwblgn%AJq1V%XZaoKg+ylFg98wV#6cn;N4T$OqZCK+iwvo+8 zpucJmHeztk92oJq)$^22JUCl_@V)C|4$H1@pFan{q*HqNa>r=x`1twZ;d!w4wsF_T zg1LPwu%%DI+=AWncV3YgHCRRhjDxAGkpjz06|CNCrgW%wwPJtQJsnH2>BZM9k!>2Ugvg2(k)h*^-=iBfm6aVF>D| z?$`=yqv$FBsh&-+-W`oUFt;SREgFQH&`C2e36Yl5gY9m+M8cv|Qc_xMEzoCiwC`~l z)XcO8>v^l!cGcFJv*Kwxe-Pek=A7rmmz7{!X{qKX8B(p_m@ayG(Y;|>&n0s~)6y6H z%KiI1c$#X)54*JKvp6o=%Nnc-If~5TGxaJ&Z85ayYIy-6KFnzIQ zEH*82R<%!UFA=xz+*^l5rbKO8_!s9#vWwEy<`yP^K{@>BEF&c-bX|mBjqGznJ3xfg z)m&V{zfGI>mRMb=mbVqDD;zSc`{Ay3G`Rj4{Vb%A)M?2J=g*6xh|?@Oe(*dJQLc=T zNp@Kv?k@fS{Gb0inmpLcUcTg08-VX+88l2#laEuppl47Zrp$yg_0x1eN1OSsMjy-W zTx4R*wPlOY(yxA2qNq^}fL)~5$)6y$k*=;26dNvXEoiv)N({j8uR*Q^hK7bxJY~N= zuL`yI`#qn3-py?tr9!x1FhqfDN`znsWv?6qcOrEf9tDo}laG%J(J0`ZRq;(5lj^H$ zcM|1?;_+P=l??T8EpqA)%JWN*^?E3g;4H6ocb|~j^uofzD(V5^%*NU%+#V94Mkho> zRb;DINO7vJu6cRTC$IlmTF@f&vS8PJMLOUR{Tg|&pb+=53?Ph1*sF~${jBCzg(Kw^ z3fF5S)#L%}uhp;!{}c8YJ7vGq=Ro5*^uxr9MWav$XXA(`SX`_JJb=U%%$ffa^akTVNnzLxVu;54TdHhB5M9Lt`&o7q8v> ze0B<(mhgj!y}}k-JqEk+1N2a|{-7sne?DFP*)L(2{m^Z|q<65E&?q46uEHELaB+(U z{8#HK6-ctqC)E_C+<(NmE{1bT{bTP7wYs~f2cE!lbsHn|0C`tRN=gp9$iwbMsO?A> zJ@n`My=F2tLe89;YaEtR?&ytUU}bIh6n-0)<#NLJWMSUh+I|VZonb?O9d&pPR`Ld- z{bI*8Zvd+d^O@4fY&mKPTv7;PRZq|2s%bS<)s--w(kNh^uM+c;HMwsP+w4@}<;%)p zjgCg8zdK;Wj@ggPee>qc%?X6zb)cQA=vfgJ;4E~FK{o#Sp4*u00z$)96_O5{QV6Pc z2%Iw-3qGiHHY1Hc4hX#Z({SjH?{$88wL4jtH9F*z2-H>uqD1Df1bgk^f% zkjwmBW3j`LzDrY6uEc9RY)H$7chak!1rvUF7)sFbxg(Jv&V|j&c9;EG^3r*rveF6e z^~jv!nR~kD&p&A!`-~8|0-=Tq3=a?A@4NVH>aK`{#4%W0NizReG0L<}08ORdU-s6s z;oZ!ZMo+Fc!xwsrT-1uo;K@;(Um{xo&B`@!jtpvQYMIj`ld-i9GRSb-Q|jYoPgu%! z?w65SMV$t{Cp`&Ux`FWqN(ZfU{(&0XrxW>3{Ri_8fY>8Dkfe^!XnmT86cV*V$YPfd zLwm?_UV6hi)R+)07flRcSZlVcSFd7|f;;662nb02Lx=71X!^hcSWN4?E|Xq_0KYH# zx3Lps+^P^WeJ}tR6eJ=$AgN$2Ve<*2>Gm6pQ6*&D8b@<;Ua)e><=8R8re)q~`HR@$ z`VOCo!53uL@E9z?*P&kJJEQ%)?7D~4U+LL*Xk(XAkc|XLz1fq!g9xG2D>J{}DH4qf zENp^~cLdCUiz{LHD2{#LE#c5@2q8MX_1^S3r2Q1=?(gQV-MFDIkiA%PPS$~ig{7HU zW*~GB++BL6%=Yc7#ff<4C+|rne*XM9+2^rPQ${VMy(^KCku^hlbA|Wr-6LLzh#A~@ z{zS2Zce&@Z2BTb_LyvL){3FfStrP<&Une-{BlCbV9)DK`kd16C15E^q?sQn8QOI|? zK){E{wlj8iaic%mFHGtp)qH+^9ghr}S&@ycqHeFY8!mL=TpYzn?Q->iOnxcf18@DoYNSLWO<+T0mEZiHRxsR6G)q zVYzQDtZV`JPO|83!~}(i0s{~|Dm%pJG-@m_ldZv1+cSYv` z^x%7~n^gvKEn#bbCeV_#D#2$awljYmByY3FCV$Kgz{rr3wS3ijD-(A`dWI`(pYi0H zl?7`06T647%TJ^6(Cq}k%Fp6^?pE%Txm!0mq^3*)Ho@|x`Nvh#?2K0c>#wo1(eTvO z*I!9WN}9TPbz*u(Spt$0yHW@q@k*4ANS`ktX=GC=h0($YxuF{YjBJd&uW;kXX6`~T zc4Zba%1I{nxVHJptqS~s#7H06Y}pW(mMeS&gbd6EQ((KhO15i6+-#d3E$mBAafIq; z;dj>OqNc!zoI(6Z?-J*S4K?zjy5J}dxNfek(bzz9ytmS$ofGpt(m5_LhRz|JW7(W93zlEDdJ2=5X+A}h#B zi2wOLo3OXLgIKIGjV11NNx>!+vCBZXT{#q7=|<`=Vv4NQ!o^mCvLNgjvgr%Zl#tT` zFn$1&j6agm8Bpv5Xs38Q$*Gb4qYvv?!DKaD?2!}ThpWIz6C$5@5nJF@^c5`BNqs~Jkr2WUVnaK@ zdTSTl^&8%cC#PP}f~a1F!2-fb5Gs3LgcaN~=Rv)oCs>av7pRoz&qj-Xyn;oXd>u0( zqC1}}C1KeD2WeX zTO&UNPq;<||0O*4ex#e`k}~c)#61rl1vPvdqLVk!KQ?w#vFmUEhIQ0kC|W7`M$74$ zd~sk@$cnHIRE4bOJ3mmfj@Wk27-9AaDA=Uba~*1pGTzY~Z3#9?1!j*xOH>#3`CdnO zq~%FwLwi_(r&viA6M{r*D z$>;m_*>?uQN2k&L2oO@4!G4!pFMjL2j2@aX6@4Bk?K&kFonBQ}^Ff%y-7N3fR>KE? z{~9efG5Us1vj5`%Zvq1g3mF&)Pt$v}-qj@}qy=^k26TAASZ@@XQ+9SHO-Hu=v)7?} zBZAE^-zKutyC?aCipnyudCRe#4tP#6pp@eEw{G29`g8^HJiSw^?Cj(~?}vavb1VFH zP@4;c)vfqNMg*R8t(spNtBt*sb`&{vcU4pBeSHgW+*m`$9?tPO}28cP1p zqDz?CGyd=A8snLHmqBfM^O0xuY9H5sXD8=F@US_MG%HYnSFc-ltf`(?)*cfXfD2W4 z$`Cr}DZsD_)Qlz&fI!7`pvs9Uh4EPmcLL1otHBY($oWXa9mt45J$>~nfq|#MR6INX zDPS*W4jV1hK^1`SaZDU2!Otry`o#Z)MsgOE*RL&UYk?D=cuWpr{7H>ZP>^xg@eKdx z&tqDFu_hYW?ntfaR+wiV(F&j~>*}VDzt> z`!m>Z?>Z(R)}7sRHSZ1tI&jN3)3uUX;*@AlP3vzzSCZM8*@SfayFU zDc{&AMvIX`!LR4wrCGiD(Enw?b^qVkAm97@p~GAGmkj-%&sYE7d_hN8RYgT%$89)f zG5x4CmN(n8-cu4&2A4H=yo4IIs#qZ6FwFUv@F{`;pOS5$!oBS8m?T=nzLmUqQs*#8 zgT26ooD&=wxenqGGdDLkrXA=poT3Il0A}0NRWW_?P&1Ym^$c9*A)!(b!n&t08ubZ5 z+0kYM4Ixr|`7qjQ%oRm+=D;g&S8LZ*z>HSH9}|LDUG1E+hnP_AF#Y%5)`#q2aN_3X z<}_%mS~85*l5tF!mQ3xB9y#(F>61j~v*KdRuCY5fIHa1D%k9vN@dpQ$FAmCVi>9Wg zsM|Mvv?;^+T-aq>A82i&3;cVB5!oQ9n4xAuZ&(foo5I<%+aTH!N(3ry+yRd@Fi2qL zWy=$Y5&;gvx2{RK41R=`1U`bo&_ulb(#Mad?hS*}K&>=1l^DCnz$*${XDQt|B?kDA z?gK!OX%{#H$PiIT2m{eUQjPNkXMXfN&R;~^@^N%D6!qp)!#x=iH1JM901iaSc>uS~ z#fdLh8}^x$`))zvga2t68g%I9s}3JN4AVhBvM<&K#qHkL7&kKLUS-*Hsr?tWsHoSBoj#z&{ks~;Tam@l z+%EIC3*O-7?jDTDKtJI&6R1beh9W$fajytB;>Qcs5xJs$*B|hW$usD}nb9S`K*ffa zTBs%4j2nsTYjesf&wdJ8&Nw1^qGG|0cN5l;jxgboBVZK9>fqz_n*=}urc@`%^{ z&m(puW`9g8(okex4UmjbX`-TU77boOvV*Zi5fB*pA^Q2@4$!J#}jQ z;|r1@VOymr6uWA86ELEB4PX**@mQv}wl*8CniME#k7PEAqELO|66ZuX`a>#$BhVkI zK0R`SQUy#x0Q{t~g2%mqO!cCviH$a+YG`ta`lnn{;*}2}}r3sEF@QMsJI5`nqNa82c0-EGyv}wfu2N{u| z7#RGu(a6=s%Wg*C7}p98Jz2%f{2Fyq^zmFR<*N)sr4Q{(eJ91ZSk-4BkMx3!k z=+_8s`U|)MrR`yUH{zL$MH6oofE#gqpt`;*us^ZeIFA8iZyi|+_9*$wDYO^L*^Ag) zboYH?z_b*8MkI_+Ppcv@T)N{+_Ff!6g#kiyc$2`GH|+ZZJdc5JRw@QVS;ob1zO*Pi zf|dlFiD-t6YG$mDmB+=*=G&*Cfq^SPxNH0ef4h%=RE(4J;Q$^p%sG3NePf-#z|%#b zJm+Kae>d`NUh)?n;ad%p3!B3XZmwu;BbBp)>%)$(!ydp0#QF|U}|HD z?{DLc8#gjgpONm2C+Gf$3$E9e^j$SPrhM%mI+9DUDvJDjLSbH%PsMi~YBx{$)61 z4v&OfzwY&@`0wh=o%^mxWNsCexrd;yVK$+7>g8ij!0fE%?R@}US_t;`un$auMs`v5aaEgjMa1k6hI27PQ;O^-dIi{V zV)sD)e2rb)QiGn1;Y5L*BAf+iMvaFJQC2^}EJ?m_9?}(RUTTDl)8}PCVdk)#pjHxt z7ibHjULczgDGhU52*s~YhmMUL<7X%R6uN_H!86E$sC|%tk)+a-k-B(LUf|XJigFd! z`A;;N_QeDgz`i-Qo@exP=%&LKF zZ7L8#tHQQiN7GDBkbuz zX2{AbCuy+`=Q|i#mSxBiCI>_^9OE$F%-Lbk2P2}G4+bM+=fx%{KU?G>(DmRDUVAz9 z6UviD?bNVh>VXS*{B1?@NWaS{B6V3Cfr@Rp4MnppO~eGa0G5N|V^booieGr= zrhnC$mpE&Jn17|!?(5>J$Pp3w13-&6dsz{TVPRp$lilThW)=>6eQ=_Ph9LYjxtx*p zLLi%|3hWb?W4_op978UPpRbD~$Gd{Js@1uNu2va2f;X@Ny<6VkNO#d5_?A)&y1@~D z^Ffb(FftDXn~w6F`3@kjVUSNsVRBh7)1`K$jl3In?!RbV`g%!&VkXeb7t3cPoWQ6DC7PJ(oC z=}teor81|%5UKS$LzNr$0S%U0KM9{#{?xBu7Xi9f5Th=TI&d{c?eIsRcI%y>sybFw zMtTbVurpY0g6I&~Q^J2bEEB9v9007Bmk>TUDhp%cI1B=FZR^RPprmB<4BC*}Vc}W7 z0p$Db-UBNWRZGZ#Wf~^~IHP#j_-w(fJvEN`!Zf?vJC2aCJ;XRc1%_qbMH)Rt#MDJK zaoD|2A!E7zpjjviR(3Pejo}j&Op;8e_rv9I5jic(6l{;)V|f49qq*>hcQ#7+BqJYR z5+_FNV-9$T0DD42r~r6@k5cz&c8DJjAmE4-ybh~ zHJo`d4&L}8s7Hi8DlSfE*%~kJ7ly={wyfWItyK*(QV6G%^D<(pMw0+vHiaEzQgU&% zqM{k4Dh?>_A_3%E2KF=>HQgjBuMUueaP@d-k%kRTJ2lwaBn$5Rvr- z`as;0&;$c;5Kn+eO2m+d7*K-cxhYkLFOqlHbRuW^zLD{$c|jPKjq|L?94gL0*@z}7 z7>xl0@O%Bm6O|zB|C(Tmip(nmSN^q~SuoFIK$_qqAqEp*d6+Wej%sN9pi1>Pq700weyfy8);d`|>9_=``b1RGy)WqzF=4uaVndzbi%nwy(J z5!)hPXayH7QutLBJ;>*f1E6>We zYs<@cQI-KJm5{`t*645PDbn#k^4eYx!HwhE@w*w9Yym|j5+-5>U40LcUH?F9r572E zLK41|o0IcT@6b8*>h2m2|1>DTRR?N3#u#BC-30geuI0Yi^e`D-J@DA9NF;AG7*qkA z7$RMsuFRvUnl9(hD**DO4E$?L>McDnHgoO~Sc_l7j}n@a)L5wsaG}onrX8Dw3MV%q zPpwB`gb{;|0wVj8x`;?~WhqP_iR`-nP#EqzItrtkYeA@SO!9mExpVjK;Y;>DV0nP) zs95E^vP?uWB~DlF$Qu6G6;766pSH&Ku88V944X;W@R56Z_b8A?iQMMa8cAqQ&s>I% z7R*JIuAsS(&UCJ0+d7PpH!l#aT}W0=jteB*MaNMZf2h?MRL>l)nR`!FRPc-~rRe=w zb!35t0%>#KKo(2e_F>gtFw;4M?$ty*UcnRWe1vsc1oKm_%rPwV5xgwXM zP9#4K1Jb2?6Zhx|rusN0(R%eGLsR}(ihyBreX0EY1 zq@`62v_WQ>t-n^o!~Esh(7WfP9fw#?b9QFD_~Oq85f?DxhN1mI8 zHu0NylDsJ=!LJZYCIy$&ptXMzMwuyp*i=s;|2BKReEAZ&g4M1wkN8tN+RmVgf{rBO zL-WmWq+>Odiq&h^(qZT4pE?8iq6wEnRp9#sbsxAmuArd>#w~V_C5jLn0jvk$n$Xw` zR*FaqL}}fvrLJCq^7jI3tgJ>&V?nSXL~S?LEn{Gvo-G zx3_oFQCRI8&>XDeE5Rw;hlv5>d;(bc)$Ht8TO1eg5zP^ttf6_|(OttRxDjw3tzQtR zGwZnWC-8|7W}2c1K_DX* zLi8RKVYP6CE-}ht&YhgcuEwf*pnr&5}QG_qPm7bnN7Wf+omzAQ~$wDkxNX%=U(#g#61M zQE!78CqnGO-Yx34=rJgZFELj%V6ujd&0j$8p;hJ?Oh!=R z;X)*t=$QYv8E_=E4&>iySoXgD3(JYc2s8i(hUrMzCE>=rc^^5&gu{zvpb*|3GOvc)_04?+j%RGdon1xzCqXnLXJ>$>h6RmiR;2Xds0KC6XiT`4 zg5QRGK|4P?CbDG(B!4&nI99N5R-k2?PJe{+wa95zM5u#L1Bd%DgYY8L18|3A>jBmR@cd%^cWnSceI31^BBi5Pi96_ix6Gos=}<&{k0T=|o`k+t2K{nSU7Z2^ zWu!fIUCs{fk06ZkEiNvehZi=$f#vTrzVz?eIb`1nook3&N&P}Xe=%e$Z;DYS+~pqs zx=+r36%|IA|K!S$ki1lbY%XL(az>MNyz=}noR_!>Y5)lhOfQ^(Y(-vB@chC59vk|A zwF8(8Zg*Q7l@n~vjRN??O(!|5s$pzLAd5ug%BBjF_70B5>heJ-tcnM*4 zVJRh%4CqJbKHE(&VgiA@zzo;)FC2!nwj_G)oNt*6C{Q8G79~rQ@9og(D#l0)Aj0hs4V+#q5mS#qW@XN=>#8g0D20TQ)*!^|zU-(Ye(q~-m zEmvSMVWB}j5SK5JuyJ(Fobh#*eneQ|MJ#wi*F97aJ*2hd|aqks#_uGw`gJrECw z*z}-K+|#%RkzpN4x+Gr#jE&{V&L0fN5j`-8K*^anI>NVgUK_`iyy)qI0W+TV}) zZBJI1bEQKf^RN483f>M=c&>we2yb@AYNnzH$(qQ%M2IZWB70f0ldVu>-#yp;(R}Ck`=95W z=l?wCJm)#jIX~yjObtGt&-?v;z3%(Euj{(+*RD~X@L--0iQgW{tp^*5D)&d-*7)i; z&BK}fTA}31FFuM)&hUM8H|wxz@D{jq)-ed=t}HhM6$^fWycQRA(TlPe5BQEQBjSF( zDJV}&QN}3L+S;ttp=Cd}Z>82v|KYa$U+_HkFg*jF zoD63mDZ@XM`JFIv?~qr%ti%F?k|Wa6g(HbF8$(7UZA?%ZAKkD@*zZ5Pd|Tc^MfO}Q z?em}^DfzRDO$ICbUb{Az;q2<_`u1}BNdsN>%zE#9H*^1*)P`)Qbu`!&6FgOwa1-F( zp&rV@_6!@D--1@VO0aC0i~I$5V9tj7%(zVN^!f2tpF%i79tc=+JvInEWFvs*y8gv(Lj;AFKHi;GzQ0%AX92eDrEocnKox~2mxwpt# ziIO9p83yqF1((|+%rNVXy>a-TM|XSW~Z&W5|_55yaLfIWduy0WR9$ z50nRU8PEl?Pxes0 z7IA=*JN(ht@Huio$aN0QWuPE%0gX)a?)tue&w;~;S~9Tb2uWNVb3zLdWK>oLb=G%b zYQ#n)Ul)kE+9>8u0M>(E9K1OHpG)`<&Yx||&g!~ed=zR2b)=&0`ZsV9&}{2hds$ms z&u73BazZ=ZJnhDfyhC0{ZnjNX)8mF;9&sb3XBL3KqrqrkAQ;YIcQ>IE8Gi<9jhAW! zQK1bjfQrIh7y*%-PJ<~CgwKQ6D@#OnZa2~X&tf@Y8g!ZK7S@J6WH2yqY-s&=hHVQ@ zjrNG8?Q)?oP?bH1Q`ZQST9GVB2njx34PDW>;K{*At(%11zq4k4p^l)J4>&X+ulMjV zpd(;M^}xR0(bP~G&`?-iEmWGq7mfjmOHmw~dx{EBk10g1QAD$SaRS|ipyPqLK37I_ z!7U^fFa#7%{Zxq2M_!=w7T?KmY+t@lkTm^wb1d-$*b7B7~G8j@aFdr-B)r z(QS%rk2}nvRrP0D8U>r70@>5=&1=gAw{QOmmV%?O51jTELP$qx8eefr&H%ItWbyfM zUWFneQcO29&s442~ zjR;kYklSE=XU6#?y2fyqf%_^RXrSNobhOeEMqfwqD)r}=STN;)O!*0r@UQ~aV~emv z==Q-#^|7X=1(U5n2F?KwYX(IJLiA#|Y+hp>hin;^NGSqS0U1h&hU&*Wynk2yHyiv?!O)N4-5fSU1cC!+P4=@8Ls?`dI?* zpe(Hi$PGufg?K5xxle1Qw57Kd#LoXA` zmjmW)3y>df!m1J3P-I^RM}r$~*BaDmd9q?tJUguc`>_CsEu;C0us=KBlkYHD1Z)g? z1UDdl*@3yvs5P~kUtYzX`wdcL*dShx(g=M3n%XMT*k6BY+wv>ZgFlOO96zdeLH&L4 z2dQ`E-!I)?w0z;QK(+-<--N9{zu#a#*6u?t0n!g}0i6+|SjTrVzuP-F{Jdt(9hNz4 zG^#PpwC{K`s9;R~SVSYvQB_;x#)XG|FoW&BLDa=^CI z`ra)*N=&qKm!>aZ&d~cXX)an^`0KlwB}nBV>~CRIOt{n{LG8M*apZ+cdhUXf zV^lbWB9S_q`!KD9z#-Px>9Luw=sbR~>l`a6H#aQB3+E2fPPRu346qyu( zlrrqcmci&lJ_tBgA(DEU*8M?S_4HlQd`1gBcRX_xqZJocWf_qwZ*I=Yc>VfyGpGab z3X!T==%{IhltC&CS!O{q(?kb~ae$HEK}$z@{R9&$u*Fjh4xsXkMpQt)bX$9PArJ=j zsx92>ctYp;+jW6i zJ0AHQ2717eOV}}D5E$Y@&m)8L=l?|a5Mqmj?v*CulJWE~X)+f>SG3NZd!&>CW)?Q} zJ?II&SAJ6QL$tEM4vY-O>V1n!vMG{3g#823>UW4ck3n2qcpuDyMlDj0SVx=gh8b4H zL!7-xvlLQJ{GxpX_5r&(pX5g%aGq-ELnx8 zhu-o1@QI-VX#w4_pp;ogS_!!k>sa6_b5kKzl?@w{perl*1wB{>3 z$?u3}pyHNcb0S>1sM2T*stvOz`ximG`5RHH6-^>U;gN(^Dmt;FG{vX?I3f`X14Awu z&e)ciWiKg|+vI)!{#*ckz=ln5H0O2S!u)02NQ5R9Kuz>rJcg96Q0gCZ-=cMFe0(EB z5gIrTO7@(Q(QN>B$uWdW40Gp!U{20%sMfI6{*Do$Z?z6Ei0VchafMA06wwFp``8k^ z$Ll;WfdNEDFuXcty8_3}EiGY_-^6I9p#%N!5D|w+yLT_!{3P&b5B8b{tMlhUGGge6 zE)U**;DMLByR2L7L<$HNGCJb%9FdYLI8=FTaKl3gS*X8}V^*Q0(dsDjrM`XVqFz%J z!!U#1{OOl1>mlvK>f{}lN&>x<6e|NBS!|qzy!kqU$hjx9-H&1?k3Vz3Z3n@-6txF6 z@%&d5LT-4nTxQx@0-8wLKuo`mrTYjR2s|=xz{}8j!w3+Wf>X#Aq+pOE0+uc|_O-Xk zMT+{iVj_!$Akv4t4$s9Luu80vfL2n#kf;)Tc-?ib;0;AJ_`9fbfG!7X9td9XPBDI< zRjP<6U_czf4%Y%ryV|O6)1$@xz7HQ_&@hN@cz7xwkor!uyWtgQGc@d89ovni0n;)Y zML*0krRPi35-L77T7+kNCe3^^0^F8)!;EGiE*h5VIQjiJ-Yysf3eAMN z#zFI3N&*i@D>WhJkwFRJh`vj&iR^lP%gKk$cwJl5QgBdU0U4T_O{>4oUvF_C7sWe6 zjyv5No_%KTep^HqD414sR-n(O;z0O0jZ6ivZ}f;cnL!@t379Fn^!ym|(%Xu@L< z1Wu_mVgSe_A_tu|pjw65u5kAr?6#(XK@z$;%cwXDk+URqLG!UcPcAvB2n`SPI?L!&@`Wd>+GGM8G#u9oD`^wfBzz zCP)W2!;oD#v_~^zJU=@ixV!lIt%51upivC%z3``iy0Ze>~6dP%61!#al zkGJqKXkaKquoRNqBiQds>>4;}$BbeCo%n)B{Z32ojydbAs;-0qlE&s5q^a{C$6v{C zh?!X@TTk<#3;~>`cnD7HFBk-+*|nIWk;BE&o1*fZfZH(j9K$ojzF2~AM9ikRp8I=D zBism17yN<3c5868MVFlM9(30kemD z>QdR6f<7pPNRibE5RWJ_kV*c9)lP_oU~#YZW=$+*XyFJA8po`RhsIz~mr}*8MidS3 z0p!9Xwac(-2b^#mD0<(bw`G^($$1vQ0f! z&fyR81*1yeAJl)er66ril3guL@W}&(8=qK&jf*aotEs7}jolOPw{>oI`2Oh_$h;G& zAV4wfTV>o1G!bnh5M}_vvo$)Jt?uWc;=pMCL%)*jZuThaJ`IE-%sSFJ_TyVt?1 zJPxA+jqd_d*Me=7swV`_>BbbumLLcmU^iXhOk>=NoHiO88Ii>q5=X?m=iqYQgJw2@ z-y6Mh$55o*_4X!vKb`o3;;86*dJws3crDKBg{e;i9wzSvX1Jpp5)#4?*A_T(mPXZ^ zrD1q&?Jv*MkqLk24VMO~Nc~a^HuS-ELA^3~LHbOzUvML~$0MmyT!b9Zw(Uyu=g$k8 zr`WpDwZLE?`#G9+^As$>61!u;;DsGFDl8QT6G0j;@EpP+La^`!9%cipu}yi4Y~XEB zTv7vTxcHP7(*ndJV@GR|R|f3a48h|kF(>jj$j)St@|w9yeIn!R1vIdVkF^; zo|!UU&P;c9Wa4**-K{IY8q)K}6ysH-AAk`#2dfi>P5{V?>*Ga>9@+}=8azDScebr^ zwL?MRZ%Eh&2S9Bt1)%ep#;V^CuPEdHL|8=$mUc1hkD zcg!VanPrRzB|ojc_M318W+uYr#)j@n>XL3WxFXM>32UHPvi9PxG477-jmZL4$ie{j zb&RVWi~|qL5>YyFF91xfWKJziOn_SR6&@z$l~HMXS|13l1s!)_E@8D=e_Yq18xNGx z*qd>KS&+MWAwZ)yBUo-5HC><{>MC-=jb+pxH~S#OK8qu6vq*%j_Q==y6~pu(t#g2M zhJ4FKW+04pHCNOG7-wPNXLroUP5!?8TU| z>tX`jo0sjrf0{>LBE-b?U?f1Fq?Ds?#gExv5o3!56XYJTyJbq{#tNX=!bB z!Fy9}rx1wrz5xsgB(k><)xN)C7t@004h#N9g96oVurjW3fJ@=TF*Qnd;es#5S`?>tof5N2H_?kF6 zRO16wo&X#0BYgHlmCEBn-n7y_|MHTtA(EqiuxHPTVD?DzXuk#ihLAu1z`=uAr_Mr{ ze17@MGU9bGIzSr;flf|?18SGqr{AD_ZOyYp!NE;@JIWUq+7bw^bd(tS#Ax2-p5~!A zEY9~hAA=qjc^E)2Td3#3n})WvrPORgRS$|s27~l^Y1NpVG*_BlMpop`4!1$sgsivJdW_%$;R#kLZ(p(RVi7R7HsQ; z`3b)rJJtgF;JUA`id(9m=+cF*A>!A2!!SDfD$C-<2hPl)=tN%*S9Bz*H`xy64D|Q2 z-W6VbYdgGdEiHA`s|9!7L9(0sM;{=J-7P$aBwPgGQ8A=U!b}&TY3I#ky}NnmhZ!J z@+;RARvT&G{h@;1ylH+c=vO%tfF722{x4c2Prp9?Vbj}}FN0sYxZq~dxlNE%-!unv z(#ppFfr`Bl_J(7+?@DE9+BdmAFy!o3PLAvYaP37&Cycn;7apBcB6;*-{tDKBj^h%K z8bxsdj|t~xPtglvQ-i-?fYntv%(-Cz z-1)hq?FAWasFz3n<<3VVIq${UHXUViJmJSab9EKfDD=gf0lQ_vRg~82(O3w2nHcc{ z_Z;@qKtoJ3{?p{G-JV&SM0T(r9ijH)EA$s6H({)G`S&N8{*a9dm=7ZQ_m_#33RzjX19pTOiX-1^tv z@^QQN-o+#OJcmbl!R3eResY!g@pAw6{EffkUinT-=Up?qL2ipaDK971%-GF-Ao>9K z`hV>R%utDw%ud1f=k?%T(eQzT@&+?ZPjf&C{Xm$1rtj!Jd76CyMfqWPhBnnCCR*mdPV~3@{%3^vyTwbiv<6$U8jtr0BEKSA;MhiZVqc*?qkcymZl;aS zS;n){Bbaf29p;}oxY5@ETv4zzLkIxt5ZeK-sz zc{AHCFndcsJ80wlrJb0FLo6D3XyMF~z&~21rtwLCfJ1qT>JmI2nZt46IS}e-t_|95 z2vOL^s&U8k4-HlN#-mzO;7EXl64ro-y`2Hc$@|i+d)<(r(4fY7z+rqZ+Sg6V$Oj4w zeLlQ(wU|W<^cW8ofayW8A`G z3yT5k#{K_p(vS?~>3f`b5J&M~$+QL)NY_sct?y{{xq9Qqe3XAQlVx;y-ea2X4at&aH$Re~P-zy-eFZ#8eG4}M?ACKg;0M;JtH(p7# z7{L4q19YiU3lu>jr2HtOLV&A&qjyTic_*O@Vb8#`SsOk*e70;Im%<=U$LD=!{3zOxQ0%JQNF~% z0MWt1pxI7R_zSf$gVM_EvYDO~M&A-F-hUa{1r!H~X)avKN=JfG2P~||=t;@C`=<*7 zJJAK43kw4ZLsViGbXfexGHhd|pDI+;)U-*^hiwpvFf!2u9m`(L>lw&MG=_ECth+7J z<4L_3oRf-mZWwLQCfR#r^T|A8*vo+dRuWz%#v)8e$5&9dk0x#M`$RAPDIGbS)*)_- z0zNj4n#M2-*oQtF9d}*0f}t}ULz4-9e2r834s}KO1uzz={uel-YL&Z5})_ zmCl2`P3yod9fMDohD9J~LGn>3P{Y&?rXBTXxOmMpUS$M-eZB_N4}%cpopxw{3)Uyi z4=F=?3yXN``vkBoD@V+fFbjw=&9!>-KNkUZj)Qp_FJ9QW^<&Pe7H|?kCAoz`jZrHc zO|d&*(;wP6MC}>kdcB9w!lnh!1Nwss3KlZta5Ph_0nL<3L}ko)2{i|aa&H#AKW`&t+)XbuX_;lPV3X#q3Q_=n|fx82=+`kgR4p4f5dD;WMrLovw;fMtF+MOfg% zA(Miy&_l!D8lN71efzJ!?iFlv$pDI3*$wkoMGYr9I~Xs4>@5;z%aU2}GIk5aiNQHN zJ?~aQkhA3GB{OO>ifeK|kr0bgn@UBhI}&3zbx%5_>#W(hN9c1%*v z#E|y^9RC7IXID6=5Sp_Ry7nfO!Ke{7T)SIP`|)ElPA$S+;-Osw!jZ~Nbh6|^YrRJH z2$UAW7n8Qsy7?vk0cc-&{rBPmjBkqH|M@8QXod9yobvO}SIEd!lrxzxgVeZgiXF{cx=7RUD^#Y5hX@vw<@pDc04N@5<#_zt*oiHkk3a;I1(mTLxvWkQ zlsx9$97AU~`k?w>{Dsyr7g#T?dK>Rx)L3T0wmF&w!D>nhBH2+ zbYVk%J%fSZFV6?qy9Q%vx)hz1n7)0KmS=kf3v7P!x>^_mwq|>=VMH@vj9G>_8rlYUu*{j`|>yqen0KgqWf8+}WS5G~uge8%JaB#&6D3LTS@bnWHNNCGq7Ks~u3vFJs zzwvSUu)l**=Brj)<1{7sh8Cd4_U_R8uj_zf9q%=DmSb;kOf2j7A6adu3ES)~v;_o1 zlOfq*ziH!bWKqH*nAztl#h%ea^Dxl9ftl-PMN>wdXb2PtDuTWs;JL}kg6nC<4I*nM zO~s?LWfdIJdO-&~5EKDAqNNUk?GrZeTNID5^2DUL(R_D&(UgAF)=x2<3C3}4kVAC* z0cj;!$5in_QIT&1COmCSQ-PJp zN6Ht>m^SDua-b1PLxXEDauy*c44QasdA2}X2ojZgve&;*eOvasYaAiLtB$0hFMpo+ zNWB%pVybBq8-qLwY;0X-=isM2|LqOO)Nk2otqVP*<+()M>=z$B`U=%Z)=CUCps`?d zD@pBYSX}^a90P~WwXX;T`S60QT?M2b>FSHFw=d? zNbHJRCpKY$@}#juO-nDyZXxbIqPv6ZKM5RVrQXm_D0$S2zYb^NSUCIsoFg=5U?V9K zL5|};0kCbyyr8Af)aT%%zeO?|V?Xe3xN^93rSl;ViZIuWeZx;_z%LDRLQE&AoLPx& zxwrNj2mJmL*!D11HvKSf&%VUHm2P4ZgDiefui?gYaas2>S};hh#BYxsEZY3<8_An| z|F-Ynzamg>_}?E`^W*pazx)+RTpu2i=%|mxqjyQh-uXH;SvY{!PrsY5h*WS~H||>J zb0f$>n8kOA2B1Deete8a4@a6ah7yH`cfJtver5~87fX5$=4|5W!CiL_cb0Kie?km{ zOh5^eZNDjv|DRk4dO`p6`z68bF5pmRG}dqo>_`mu^Vqpn*?Na&(h@c{8?L6|k&)_$ z9pH5JyT|UL)WQ%5kOLvXej%q|1|zNv*dL^)+UBq5ksxm!@aTJ-_yu5(GczNdRaFlV z$O6?SfvW61lwas@k3i^1nrvaPZoRf4I^4ZxJ5hoq}x+^@$wJRM$IKt>8ztCLWSz=fTpG+}n&4 z1}}yG*6x*pItxH#5(nsy;Wn~e|33ULD`lIbqy0s?|P2_7NLHJ-up4tbR9$;EnQtYlz`6L0h?cz zo`qo~AxPV3GjRdNpg0D%*Lp#P3AqIF>!BN~1qVg}Wsz)-%<>lb2;+JA#TnG)SYUq( z+yJj7uB#>Sez z#wS3CMr?eEsSxtBnLQZUfoyS*uNlqLQxnwp0xbv3Jp?E1-mJ$lQQ!76viIy|IrOK( zZ{kN5v#mIm6CCpP49B5gJtHnujB+oLTBf$+quQUF->zKpcG;qch>wOlg3oio%{Gs3 zTS!sSNM7+`w>>YNcWoQ7sY<_P0c3q@wG^lRt-DL~h8&X~Nef$&T&Zv17;h(?5sA zBv+{#p!QZ$H3*v=PR%aH6zwyyZoT7`@s3_RX`e%Gb+nwC!@!r8NzwXiHkWdL+S(TP z;#+}b^;w@n{ueQ|NsA!jG*Q=;(^#SAMF*1>`WD)^#kyltHPCkygfn&a3)Npb`Ym&$ zQKawX92LXZ z)7@!O8v?F3s&9C$4uSSAwd>9F!3RBnmyoe)wCRjVsl8ZZQF_>-M%O0{)iC~ZHfnfg zcyR&@6h=dEV;g zD{P@x1>YD@()Lo3fj{;)UTH;-!-YVxUNw`n&5{@A`gnEry+6v{o?NX_=&fer>!qX` zd9g0_tYxL*Sf|*G?@*PRfSPHhrH0+t2JQOFZEkbKD@s(<>N+_ET!g!JAJ_VKY_i? z2976-oOxWK*vP04r4?tIroLIz15SLsOj;+Yc)v859&>0Do$ofe7ZnRN`D38rA)w1> zGkIqaW?!&g114Q@G zVq7i*ME6H&u_anCn`bCO2yx**OZkI5XaRjh;ETVr4JQ*^f=N6PZQ=t+!8$X%G^F#QS7 z*o)RTEQuxbe(*){NQ#a&yBNLrnj$<>Z&40KJ}USEf>8|~IOez)soQ=DqxZY$-D3Zp+D_OsRlg1d%}nZpRq~{DD0%KnG1loJiXdy9Cco3g z3XipU;*1NAzyJQbC#^Fu4F{E2(EY>Wk1kJxN#z^Tg+1HS)eofk(|GWYPGyQvv4pJVL(vb7;USVq>=(ah@kmvudFwW zOVjhn?WqW`s@*PxD>WK9d;YJ@Wmq;M-(UaYd%)~Gh@dm!+N`u@cIkWn>Ka-r=-1@X zaHaK5<#^oFw=3zBJVgD{fj6>5OZMHPONDu`stZ>rtKYl6#IUphq3KoBrG)+mTm3Id z;B)vONu}2&hK2{)UMwgmFvq4jy=cB?8}7ex3NBJ){PW{Vi8r3IqQa7bIFIvUiH|3_WgC8?xt_9klcI+Qh&QNuX4K?kQX)!?&5=jQYVc4my? zNe|@AU#HXVr7Qz}qtl?t6p2jp4t}*nxYM7;)@o*2fGwULEHH!1eFS1Tj3TGZB@vpV(d0LOSXaGDhJyzec;Ep~;f|y~roX#8fi~me(ztRiR{G92T&H4-md9SqKsdhI>^G0L?3pRk zfUW?~Mf}g#OPasiaO1SAi)0y!Xn%_m;pCF{lPLu%v0zFDN8;wuvv6i**}y{~S%2ap z3g|o26zNd;_`r@Hio$jbTiCqAS6@l3|N9F^B#x>J=sl9JTo%MGI;!Onnvo%b%_9qO z+k0nfePY$q{k0b!^SG|xF4>4}1W(8YBp;*L(&PTh){Qp$9y&MHSVio68EL=eT>JOz zvorN2NpYem&3W<>1BGNK9ICDAjFlgMjZJMVKoIjnebK>{J$mfUS6a5-XRT^7V1~EB zf;KdYHx2^a+`N6qjaoP=Ra0S=j+EJlM(hU3g)I#V(mVI%Wi{CkoUHt#Omk+udS43c zY==?7mZC(zxTNOdA)f-VJj+;XnYB2NhbJGNRI#r&0mnJMd3O$LZYYvx!Lj2$xBfwGC33Vr{uy7njXNjo3Toy70#4h zmVw#WgU1HEo9FUBBMN*CoAOqfX58b!t(^AAo@^IL`a1HJ+rVq6^s7jB7k0h0M;e=s z#VNIyhf-CgrKA!Ic%$1e?WGt|UTtV~mJ~RG*r&4XJFIDySCGxw?f}n%yJlF*ah7%{RR`KU zWlOAP2(w7`L$7q~1rd#}Z5$lA_Q+X&QLKqR8n)YUMbS@}w@q#n5Rg?^;6?ddc=&5o zY~6@xqyI%5-6b0!5#@T$SJzuo;Yv!&$W!F2qH0Hrg{qy;c68Kx7pX7r7*Tgz!0Yk} zY2WGtCo>o%{Q5KA<_bPdRN$fKl}c*um>5>NZo2OV#owfcPRelFc8FpgJbW<;65eOd z|8?Kny8};DIY4i&Qb~y|GUh3H`R|$fKP#2+)}6btraw4F)2G@~YW3#w8+m01#Upd5PhgVNMsOGxtadH)k&r$7F|4c0M?l zdmy^n|K^K3K_D^SX^IaK)&HwwwT7K>thVAe+%UWba#%U4T^bRH7;o2g`V=me5Eb*G z1EML1JD@Y{K9uRIbmLn)Q%-nL8nIl-=R2hSIxRr|ER244e&^j>Abon4|Y`&#+Ml~u279unZ;K!7cG2d(-GNTqRFK_>grYVywAGIk_x-FX} z2_z$cKZ=6D{blUcx@oy&e`8~2lMiC4icPy0nMhnU_-Gf3KTUYOjfWc%-Y7L0PZi~@ zy@w8pWI_i?xV;@7()^KK1ic7iF~bUhDYrx4BAym?&P+AU=z=@TcjeM}ps0Ly9}s{M zEG$8^-O;udUzG~9G=>dx4|T^b0)AEM7RBJK|eV4Yt^Xa-jr?gPOc<>KGeP_f12}$?3G@&ZOPZ#KQzr+*#ZG$k076sTH~(;q zOSgDmb&;N*u{Y?Kb~F;wjw^>BF-q*!kn!U?`wM^?{F6!!@?pB(yC;4o59z*B>Rbta z1GQ`?hKI;t0-|Bh1TaP?0K6m&a-2un>De=G3G7)A`6MP|Y=C?zl&uDasT>b9FW4@O+K~yMEsFO6w@VW~kNQf+)=lmr9>5`6#m4ew z`zub#>b}1DGd=QhJPl_3R30>a+PDbRfGCPpO~T9~NVHUN5Vj(uxys~ui^x>qX-7T! zNO9?1Q8ta%CUkLFyW6SwL67`>dVqvCl+X-5*}87#9V|);s_CDn|AqzFm;m-_{9}N? zOjTVyFvl36ti?-~`8svRsj*cvxVRz;BLKMvsm3)&w{JLljD)|0dYboUjy!>l?$+l& z7ADOXc;5UcZ5R5$ROU^7;>vyp**#h>2T3L5t)o|V@5HwkrkPck`^{h^Jm9~JNCbHL@g>Dz8 z-7VvS{@1_AwqdyxQwW0U7X}FYc27&!d9->fPLzBZ>UElVQR@hv;Gy;mkgwK}fG*Zk zs;UnWX{Vbtb8|kG!E-o?tz80RUJ%x~iM-D%`vahP%JB`O#QZH|?E+4|D9AeLh9NJ` zghIim5F0%N>yE7q5Hl0*p{nMTM?V0*ZpFW`Vr%(nt3Pckhh8%!1U0JDl1+#{GDnTQ zMG{-_lwlB%u^?sWJHtp_Q+CyCZ$3THcFW(Lw>l}JP)qbHNF_qF19X{B^VUjxfF|6H z7m;0hc<}wo(sdY}WPsrMY?~Alvu|5?%UB#FW-rcoJq+I(xlhU=Qy?j52a--&YV<0b zh#eGz+LXJ)a4pHh!hqR_t>fM;Stlxj$gf$U`*P#JEzQY|*`sd+I4!xE7d2UWeig@B ztB>avP5r8@Zg$K=ht~DyJSJ9SIM2&@Ro_t#FU0`JhftLL+-SH7;h)d^T}GGA(|{w- z6BJ9<_MkWd<&su2`~qDXd%!i^_wowEGzJ0(I3z|@Q+M_i#rLu1j-9S}Wv@~4PruHzs+%K)xf z51$zEOhu~5Y@)cE^MXOr88l^0No4ccg5LKX{N`sM-*34x{llL%4^-p> zC3|J0a3F4%v7{N}Z%@NtI^J6T#CYjmojVy89BQ~Vr3CUS{8C_k@Oa3AgH+Nd}*yBzreU( z)1+Qj-YAL|kL7wVD#GbC5SXcSuH5qzi(`?LloX8ji4ld&yD=*NGe?OF@T4xvR>=;r z_5LWDAoB+BeEIM=k2Ts@V@zQU^CyE3*of!M_=B?f0(Qaez^9uv!)aK(beo@`6jm&} zR@H&ppa_2tgp7IG5G^C-R1Z=h&G}-_SI%L?8l!Ex?|2jC_x_sT(wn$Wi=`_1bpHo za4yXL(!(R__!N&ZV20;x`ejKtsrR76Vn=Gdxuw`_AYhyk=d5$Sgp@(3D`0|s&B48> zZv(y#qaxDBTCiLhjWi+*gxzhAh8FsY5C}Z7VD^v4eiQ$nKmE@=VEo@+2Yq5d!-XKk z?tu${?{BkkSH$Cv^ioU%MC?^)wIpyqxhM}e=U*5F&Q0dvm`!Yx zrAOwkxsS?mt0d-|?2?@@NyuR{_?R&KISd9DN=TkeVFS{T>SL5(4ua^VinB{y^bc#|fkszS71*A$3IKhlP_2HBgZ;QoxQ zOhOh#Wv7f8GTr``RVs@j6t?Yh;b6Y27E2w-ih;p$+0v0|Dp^dx!lSS#=sCi9V9$ct zhvX8%a_e48^E1w2t9D#-R7HchC<4-;ay=!y*2>kP2gSWtQO zpHHd?*QP&J50d=|-pnA5f}jj0I|6k!!1hVF`VL|oXyx&cE|rMS9gauki+n+xEV$d< zX~c%zIeh;0O`d#|YQ9MI5nWUJ>x?{6{^??#3*e9zj!*FdV{`y$C$`oack?7%Xx*)w zf+0cAUfHz%#he5ZEkd2_$wOC( z6rHN$XPgqbe>gR7YIp*ZS)L#!+YEjpRM7*lN;zmY%L;00TY`1CI8&=3kTrTfb^_C3+5xPJWSz##(bcvWcAcV^SLf^j z^6#~{IJ-D^HF4JIa|A-7l(1nBQePiAPyrxD53CyXx(;wmB~Al*?xd?$dXh+ z{6&S9@oZ4kAKi}h_(wfU8hk2*u z*nt@Lv`?i9eronA3x1}93W)u*ip?=A;qKV`K0b>UaP4Mk5jlW06rCy7cK0aBt%rdi ztdoaR8+q$Dpqt@^YF?y~6Lt%igctKg6E5uCwo~#I+n?+7@|HalTh5w$@FTWzBDRSh zKnE>+V#7LVV__b90L0y`J5`^(1OK2|>4ZBW>4A+?c~Aqr1)e+riLn+-Ffu(lc?w#X zG{|d$NC(~Ly_|##LApC7y6y4boCb2=&imAQDc%patb0q)u?$NWp@DI0clDN{JKf6| zUAMZIb>$}KXMh+{yAb5*oUoLbi;KMg``+18Urh<%;WK2fMxj&5Dlxm1MjtKXaAc=% zTR_5X2cg2GzfczNNP4TA=kw%4oKxv`)U^MOvM4glJ8Ix5m^4j8yY`}7Q0|XPv{`n| zpqJiPvNy|@bF*gnBg-=1JH9L00h>dKdt9AlMj8UNytg;a-V+^D>Ulh35 zTxYejp?vX3!U~{in*8IPHm;AMV&n;W423`4+!U4E^g|Lh9CX6!+5=Yg^y$+&U>GHl zx1){MK}GgxgFMJIC{#KE(RYIODuBxD4V3J-0n;~`Uqy{5G~qcv2S*M z#PH^Q$X86h4M}n{aB4AR#16(Qss>bmp0s2FJyEh4c4i42V3_bgz?e#B)ZbK!9@;#)t zu#1T}Sqhx6`{pY$qHbO$ncwcS6*d)hOGWc4m0Rh8bkCjFJ$qxd5NA)j2!4K%bh1MW zk(>MAfdw;@E)VQE(`NI`8gAwg1i%1;s??)e_IVb+Srl&NU8Cb&7dP^GjMa z3JW(ow)tm}3{gOiYZ-sI@o{0+a-?jxIJm-K4sA)f59 zlS9=rdWxOZ*@2b4-fA8Yk9O_(e(e_z%_&}Fe3iO&w%gLiI*1fwoZ`!^O8Ot9ROS z4;$}Pf$f!K%XrEZ4 zCt3a^DXX4OAigI4=nb%xAgQ<6KdO3Wa86059jb22(HmQ$V-!XKRru|F&)9IyeE#5C zbI(gtuu2ynN=_~U(gqy zQn_S$vNN|QtJPf=Y*?4}&VtG%{8?A3Ch%X!i8@>>Zd*(GajNnr#doGi-)5Z*UgL1N zKU{1xU+n1xIc|sI`rg!z3{6j1XvPgjhA-UkNpo5uGgH5`E#%h1BX!RVTm_nWeR?z6 zJaT3JHf`3~(!&ix&<`FuSk*@Evgo^e zbfH6P%@Ypq{iYgcE)E4)d+aIQF*s#%b0MRsF;!V;w63YcxT(R4um1JGS0_8f1{Li) z5)-LDV)4c`#RblntfE)(xUPyKxud_+D|%JN=qKFhY+hcE_$*%aJQy|{{3@5wp5X@I z?5cZimcLATxgLvS-;oXC6NBXv2ZkL)MzvZ>hR#|S6L}7mUMIOyNgC(QxgG3^P??I$ zNbKuQ&32YvsM)A^8sWGzZHdqyuSS|=UksG+T{CeSlQtj!;wxgT`o9)3JHr6#a1fZJ zWOvneUlHNWLG~V5BMr(6j;(WizN;{PKT3uLTV#d9IF=26RZEf``IeM*rsu=sOE;#m zoS}g|Gge!$kk3b9sD2$IVpa62iXMGS2z6S!x2&HVVS4lir{3cICjQS1Ja=SXZgHE^ zb`O}@EjF3ZSM(CS>ImKF?Dt1*fSX1XCLmqCAwB`wxU7SKpt) z_yja9JZ#Yk!~5+_^`9waVLT9P(rCc3b-CFyi*jd_gdVgOOXIn~IZ-(htKQI@*4bab zLOHa>SN}=F)CgC~l&Pglz6)FNP)8UHN&-$``I9;x8a}Z}(o;#c8(BU6ZqwNLcX!*G zT^1ES2z0ReI;0$DlJJJpRn##4Y*MMrp<>lct<;8v(oX^{m0H-MldJh-zR9KCr^tt= zz5ollBK**|6X}+Imf;iW-JFhRv4Z3dn5efb9kM||>H4BBTxREvI*guc`8Eck$TY_I%llBOCx@c3ANjL0INXZAHT9%N;C4hjj%vu* z+y0{TfPZ-xo0_rL<=#LS&u14ZB6dkXHMevDcI$PVG^6B0@*LXoVv=v8I1_|kSAyEh zXDy@{WjK89LANSa)`X0u?%9*351H(1XJ1!zK9cPi+Y`*AuQt1}TF}ta1;&FdLG}un z*c8q4UoMt#9#@dR=WnHI{`%*n+N{jmRZsIc^_G>~iqGb+PoC~c%8!uGe`vS`@__z8 z^@Nx5mawp-z=l)W&*KNe#iq8CYv&=h*_rV{q3rGUHT!HYOQ}8ER?!r#++QA4V%u2q zr{D|yUsxH7WDm!fT!I0SN%4XPNa{%qDp$Tyxx{4k{vGkD#(tSM7Dql!KQdA(Ryq+K z31pQDQbt!RbF($~0%Qv%Z5O;umO{Y87 z)6{AEeyx`=L3Jr7)2&W^dl7Ha>gBT)B_TX6Eoq1Sj08KU_vrC#I?ky#m~3#$(1)Zw zs&R0mA7)INZ*UeFYs-B*(kQK(8d#ZTP@elB$qYt`un)JnRi>k&sVK7tph(*9*wneR zL`E_{mz#^hscfmzprzqpXXzsU*kiMg{}glW7i|fiNDtZQG@d7B-qkeiy+wM*y&`{= zGY!o>NuKUvG6pl#uKG(gtO>vruUWt}T|9yi>U(ak76PIXwsZ z1t=a5n0kuS9y6owoBR44tQ9u~N@a&@qxEZ+5f$9L6md8$7}ClbxcJeVz~3_GYFZX)l%r;zq`T3#8NtlJkgK7l;6BR{#p%Joo} zA(#?Z&Ai*&GqManz3yUZxy|kRqGw`A+FRwx?;~JnL`kY4HcM91Q5X-^KDpgH^4C3E z(3xRC)~vS_rC%Rvy9-OOhnI5Tz89CQR8wV8OpA$q4&E{La*j<`dUxVb(2V-%vcA;p zfPt)mR_8ejgNn1Kz5H$THr!m!!76?d@wnbBFD8Ysz;mstqO@1+0`;p;`#2{c;w%zS zUmv~d^a~pUljv1@jSl2hF5y))%CsClIMHLD2^Xfxg~W7}BXZZ~USlj$8|ZL7RNq*e_i|JN*5Q0LR^!{1VyWc?59#t@%(1$^=T za8pOUXT+cQHNsIZBw2mV7=n0>XKDVlKL8`ghkXDm-W zW;0NR8^ZLMKDx113i4p|&=lAoFQ{R~6F=bvC*S=<4BeRsPHbINLd#fD3fkXDZp8D! zSMbe$2)zIEKkZdVaGQKjN@qrfy}VBjdj)u7Sks>V(4rog;scw4l#s3)HXg{aTekxg z4~v?v>-ETp3z_(#;04dk{z@|&SO$}<7`9210_ZUI#)d8>oY)46|oZy z_>%p>DxU|x%>KfwnOVJ%A^tN$5c3NE=T8X8|3AMQDn@g1w+3F>$lSEG2I0@p0|nk6 z^@MMyndsDDIeD&Xj8hxR0P)G}PY$d7AGKWk!OkJPm|ko!1!7msi}`8^OSOFyP7~fC6f=mo9a-B6PotsC!7rg`p3ng)S;B^b0G;KXP<%_^@bdUZ?osX>iGf3BcfH zIWUzX*u!ef^44@3%wT$LB7fw_M$tbYQxMToqoqe_P_i8u(eh|ui80DGp(#NCg;jDF+~W_Z3;zG z)Gd$r@`Q>ZzGMqyK~v6-9n!c$I(&g?J%>!4>f5!fQgm#!0cXui2m6h4wjjF&9V)B+Y@YAdO%MVq_7=5)Ugna`8J5x47$P1LZ8{ZLLGc)l6_=`NLnurSFDO+!|Z z@xU0Z=%}yY)Mem8^x(l!QTfsWnWtORoT+JynxCoF0E%mosnbNAs=JW=XkSzirrl5V z;EK6Lu3Vzlqh~m&6T3(w5Zel@ojol4y_gfOnEiqADh7?Hw&Y z{0CY&XRzmy&UI<6d=o=wBp%$e-zQKtM!Py$;&5%h-XVX9*$;8%jK~i;lJPePyV?Kw z|KLw2UX6=sB0cP)K6}+zn-vuXNB`6xZi|39%%RD ze6}ah4atJbY>2};!JHL)Zx{;6Q;X#*D+v9KyxD2!H6kNa7`h0ATdw^=o=^UwWwW~x zAL^Mi4rgGt6_In2Jd(_4iC(S}%WSk399Gt_(WU3pgXc-!57KZ2Vg&zvfqf!4-{1i? zAHi|_j+Wm+>Og_Pb2Xl|@2+zaXf!hLmH+V!oH=V3=Yp5$w{w5m>UKyEzEgF^tiO)I z$d~Z{$4@fZH@TiQw-oJ5WDh_kkR2gC`=-usR5RyYBeUl)IC+twic^)fWG2%O^HM6H zj0D4AJGzM?cle8fd2R#LyDZU7q{HYcF_Vc^q3(9$Entl z>lnK7ero#gZ#&eJa8xk}qB^x~!6tCZ0f$;LzmA2Sb6E44FgtFD*YR}@d&vSv zzR$PZ`k`1#iVoB>`bVIMFx6=P<@Xv{a}oVI(Z-oiZgKhv#5^my>9Mjv*wk9|#A#;w zzuz;@&}Yx~!WRa!5o((fvqQ5Vqi?FB?jQBv`B=P$KF1bnmc2{DJ}RFA?7@p)$)JZ; zxX?g8W@`e&`Hd!S65UEG)UG6v!!GwgTl0r1&e*Yfh?8S2I;Cu=#Scx<)T%&NPm4ws);{`WufE$VDGKN zvP{?R;YS_YQOCd-1sySvQV^BK7L`!x5R{Tekdk!N(NQrTO4=f&q@)B!1*AktDG@0F z2|>Dk>jZT6{NDF_zy1F6{qY@}WA9@WV%rZnpy_{S=jQ9{NAJQKJ zP+VN!P10pVMiouI{{6F9zxUW_ zzz7VqIl_H22_Lm;k+Cm?&71s3*AL-SL*-dWahe!9swi;MU<~xwW#x5K?_8oKVFO`s zFj_K-3^a%`B*ZoO3SHds;taF(7=qgceXP%%*^|Gc`AgU{=V&~0q7fa|E}ZY~jA?4z6tAD4#f>QQ#lC%<^y!dk(iAj{ zna zUtVmT%7E9UUtSUDf&$nTw?llh-}w_`C={Ean#NgRlCYGz=dO545H*6Dhl)WcB~eNC2r7)U6EqM)O>z*uC6O_HRap%HEHyvj5fjF-|MCU0sX0ZgADR)BH>TG|O+fmuji}ljX;mChT0eP# zTw*Me5(7}A(J$452j*Ta*Vye+DuF#TYQNYNHL^+1$T5jO`&R}O6*%?9*50Rc;c z?Zu8HhNl7lD{7NkUBE+-?D^JHSKH-KQnlm}4eipniJDb~PHcNDMGoiy@|PUt`uaRT*aj+L zZK!o?4R0Pn-A(fK9z4Pc=`>~&Ku|jC|%&}&)0lQ0~YA4AI zYzls(%AQsUHoVitlBum@DR0U@hYf+6=2}Q+s9U86C{Y@k^KMlAj-BAE6*ex^!zTFzPo;?S9F4gSDyppSbSail{Qlv?Oe*+)1i$+kQ z(nZ9e+}I>Z?iV7cr{-sk0mw!E}* z3^g#Z)bMmX;+H*L*Ax$RLVT929q3k8E4pmtnwz_a&V_P;p5q66c5F3`4#}&V9Txsj zB%*5P8>}xVZAS#BHQk;NYR$E%PyPvfB8&&hj1_YZE$1$}Up>{0V>FYcvRMufi%9{O z&>kAvr$&q1EuJKBqs_vc-#&9Fam)0kAWc-#0_$+(QNi&;Yuy0?O>_--u72z^QbEI# zgh%u$OZ#zQLQRs1B^*=M{zKU{bZR)iVBl-Lp>dS2a^j15oF%qgntcesJ~WP|ukkL& zZ>YpTcQ8M-`%{np@gg3Vqiwwb#~t_UD!AztrMO~l;l>W5&;JEep&_fH6w{ysIpz9F ztwZjWNjtrE&A0Gq{B7{tuNd;3(&60r)YU_C5t2;Gdu8qJ?Fc!!{gqRjxb@OXCm!_k z5$rk8fGM~2<>E|@l=An@G`I^qRtaa0dje%j8qaZmZm{ z79b}isTnWN6P$cgi9keV8p5Y(O5AKkc&Kd8P6GtlD_6=zvsVOWkb?^fq?2KlNCSRU zbx~)zaQ8=Nddl)`pC~<2Crz!H!C$AD3s&*5(-<9fD@G7=%2z=KIz$#U%Es}fF`z2S zYv)epT8Y-}>qt233%#Boa4k_1i}Gs_f+X%)2KfB?*wDQE>Ctq9W=pU{}1`GzvsmQ-xi zm;5a~o4h+EDHo20bl}6>m39gEh^wUvW$&*c25?#JjQf1pK*NKb(g6!SE>kv2d+7 z#&EY&TqWfynWMa>nh3pQLY5c??j~kl(dbXeEFs4xz)6pUs`Re{MjAHvI1tf|uWjiF zCo}-#2vJY@QJ6}g2g`*ZthL%{QVjOl=o$7xH`J0A=Fm)**yckLKT)!PL-*^SjM^B` ziA!7f4a1r{kf}od+2Rn=NxdGy4gcB&82|Z>RrCVA6jeD?i5NZ2WSNi>A2lr80=1)t zM&RV@OT0t$jGhv~pn8x-M@mp{Y`ZGDJ8Ysd#qYK14vLTHQD`@wvW zj*|q+7FlRMb@M)#7^3w-$eUs!lC~_I%A*IKeDGLl3~UDKx7s)HwPauf$+)D++{s_P z@f=}~bL&<_yW`Dili$nu9WBObbyRJ7fGu+#@=UEymZS46xeM`po>UGa&6b{hLgRX* z%}qjGL!$^h`QIPsSs;B=TOssI{`tyXD7~VFk`Vj>+gcKkf5mpHao1dkHgLcglO_28RFwa@ zxwb-S_%E9a%jpQ!chvoZV&?P`vNVA4fx$PhPB{o`>a|llJL60gdN&a0%|PUGfZLxc zRe;47%y3qB&YWt_qd&Trm0#?@4JM;ADH7BHKOhy8q&e)fn&??ldxl%kkTGLXK25&= zb6WxPP^J8a8`OAtsq|qujJr-G58A>wS4j-yg&BQVY99W%q^YGKuomT!LPQlondxJ_ zQFJ$NiT1t!dpQ&GMhFc2Q=WXlupPT34KONax_Ri56UC)PVoLwGCNt`|;oYFtn+6Ye zNhVTLl1ln*@v+^AEK&cm&Q&FF=0`c*MEOoKPM}YTFd>65$V7$1nnO(t8Ih#re(x;N zB%h|jADfp$LR7g*4*=2#G#r1rp^+kjc2P#DDx|9Syv|TZfy9<+_Z(K+*{i zlVJ~Me_PDbOqd5D_zo7BI-dOdGwCa7%vOOoFb2Saf}tUlYbNQVedIqvbF(yWOnunJX1S}<*4{I=dLP3!`E0>pTiWzZ^*(H0ReyW)uO>JT^VIvXZ^iSYB{a{~O$`~1L?82x(*LOwJX%eZQc3xT!3d^w;vgIoM2YPiFAcE5+( zIXd)6@o7qQf6dPE(%Xb7Q#6|9A8d2F>TV?L{5V_Z0r&Pvc(3P6{q6buA6cD5FuXg+ zrdJsU-S~X|76Xg-r*m3@H)FD70jn>3=P<;vxjuWdb@lcqwE<&p)lN?4kQ_dV+BDpu zI%oN=>ged_4}XIN2@}}2kd^QFg=QRKBl*(Qp$2i0y|kA zgc%147VMq4EXS3%#FC>QHy`0#J{_pjQ4EdQvNc10%0K&A#ipdb%CIJKM*)o}nSG4bNXM*v3{$E^)*PsFZ ziMe}(Yues4?RI{ow&)RR>v)f<#@NE{qNmxLI3tyY2dnK&>qPKDqUFwBT_`X-(qVn2 zd5gcj`RSZu$kyr|z8*NOfc3WtD&z;9UvAA<_VW}Ve&a*pxhoat+alqtvuFLsl#%X+ zKu>0=V$|^NM~5~e=fY|+z6pha9=wQNgRu%wg`7%e4Ak~Vr;!bL-@bU&jIZcw{A>37 z^t54eB*O4YN9bh9!t5{|eu-KX`OfZQzy=1RSNrqZ7dQXPa&d)G zRiG!2$Zfb1w25V5Azxi{Vop`s(jcPwDIb#UW&uE7rw^K!z9_1AFNM;+!DSb-%7ioj z1``EDlz(2jNTS?Q*2l*P}aLp z*gi%=BbbzEL8H{KS*0BrtQgmW>eY`B)cNfqIG$p*b@g$Zv{NZoSNhu`RR=6x1m^9j zOFU;uHVm#DC+F?aa@kjX*EpeKQ^8ixU)Bop%}~$~jlvQ<)%kvVORe~--P-`SdlxQM zJM*XZ^Im4DLLN~+Xj#{#q>Vd5O9!G_XCyME>1#`|MTUOxMz}mIU=tf&y;+9mr#pVa z&4gHy{0u$zii84_poVdeEi;v5xhSOOdz#i>tbt1&^{iefiK#zSKDwjy3=PAEJ&9E5 zO#{q_bZvv`J{ao8HmBoosTkwM_@BiePHlPaK+@%ZZNr;LV&(@(rAyK*R~n6g^dEMU zYEGl!2|w@h?d`z^oxSD`W?do18RioF`f6`&p*U|U6SFgqAO6OhK5!Hx#;7wn}WqI$Nemz#sw%W6R8*kGvsJ`f@ z4KsAAr@OJK%WZwbgrlu%;=_y~xFDF8>?!ik)fSx?#{uG9kE|QrbIMdV=kW+XG0t#S z7KaS`e7lNGSuG|?CD$(iJoDmaN^beM`SIoMutYAvI zG6Tj_%l75_3i6kaH;Y%mGHeGX5ck$&kb4w~lO3eL-Ygh&br_ZN{Av`vhk#Yo@G(Q{ zxK%ev`7|W}yrU|zU1>iDnC54U75fEmvKDIsZ01p{3Zh{&rN~nv6A$ zHtPp`ToodiznX*$R-P|@vE;$etJy77ywg%e1i!sSf%AfYf#JDA%u(mY=~wj1IBmV@ z#m*UduRF&TH7H>0vAFKs3Zqhaen=;1gcEQ4MC|aTSveR2@IJ&UnGGN`K7Z`;+$(_! zEB?S-q>Q-wZmk*ON2mtUOvAwLX63#rYujd3YfYzaNx!UnFv2BO64_Fb#m}U3S%Z_c z=AzYZv_AW+HSx&mEyo6PPT3XAt72ku33{#h=?W^SLriOtt@XqeVs&#C3YXZ&n+Bjb zYxK|%3*b&42_J)^r`VyNd%<|^IE#k>rllKxpFeN7%%ETlOSMy5qTZ-J010@46_XYThM3JVLWu^R{jT&@I!QE*w4nEtnl66DHo&$>G+K zJ?QaHOe;n^Bd4ujsp`<%pjzk`*KWpz zC!ZbpYfyC@7Vj{Wu5)9n-jX*300l4@Sos*UHUe4F^B!mh<_br%A^mk87l2Q}NPvQ8h3DINFN8v&G;5P7fy=?BMQ+qwzK*(8e?}!2`g99a$ zE1&ixK{y(AH+K!*n|;ve(TC3gd0WxG#Wins0s&w_P26Ww-WA@DVUP&5dcENnCu~7N zJc&qhF$U+5L&_22I|NbqSOCn@1=vssS&J`V$8eQew9Pu8U0wf=N;)a2G!GL-omL*@5$Jd`(KV8|3s)KAzg(q{aM)E7kj0XPbx z!Ge%OmCt1^Cy@CIdeG7#jI_&h+iK_o5`ykgj$jQy3ME2g0b}2SF*)7LHa>6&{KzHZ zwTK}wtQN5bG_MEqr|Ek&?=G3VI^2muz-q1}*n-sL%}E(i_xZf3+h{sH-jO6##0G2K z5)4wYQ}4(SgY5;zBJzSkhGB|w+>gLa7)XY<_QRX3fy6ZsaR56Z&=z1gv`*{6=6%?^ z+Ys(t`6N0bv4h2zlT}+;h^%{>I%e$-Cd4dH-y$@p$hoH{rl&Sub3!;8H^IP z@6w88P>D}|bJ^X@p;edB=@xPX`!d0m3X%Dv2OtBg5J-qbeX5YlcEm(d7Fo0trJ@7) zGvtCe+Q>=93{se8J>15qlbAum*9{fOb%5louv|4FQQ#{WTh@GghWIh~;S8Wb)`9;u zvGT=GNE)FvCDZF}a^b`#JW7}@jhR^blFJBv%7`k`qZ5=U$wX08QaLf@`$Ml9HJmf? zo?LrD1L;?#8H?F;LPj}HT=Plz+ilC2N8lnP)^a6GtNU}a|2-O>LoGUi=1`SAVfiQy z^Ir0nwxE?@>5DhX-#H=-B-sL2)Onc1jKSe54fDZhP`fN^*uw zJ<8UE4;FWob8uA49p!Z5yHXFbs4 z<&sQ*cB8|~7ceA_$LwEf(G9SGt91#3VX#GC*1PDB>3a>f^7b@OcRa?D=iEg~UdRYM z$T_ktJXYGJ9c;@{GHI^{h}*Mvu{4DJ7_K=?oE%oRV=Z$UV+)C6^<&Mi+V5`h!#%<$ zk=3SG0|+X~#IAirl*WY0g6)xcpC+)27(|+agjz(U0D{_4Ye64%3=|jTte1KO#z@sn8v9T8pIZLnu@2G z4~ikI*bz4O0qr)DF#y!_Ex0A5sLLa9TU^xV29wpPY=%;}mpfBgac%@mF3H)O=AZdhynS~y*rrGvO zZsP%`9-rELJxhGwo~d0HyLghUxSz)5F|Do;K6_lT`Fv!~(G9$={Jj{t#E`RAvwk&Y ztN+e^%-&`4H5fgHPZ!L=srJ5P!YbJfrY^133;URuPJ|^o4Iu`kr9ylF?kKreOxTm% zvLky1tVNoU3GN|13D zL7Fi~Y9)!DByiH$cL#rML`cXdT7t~CLH0+KIl+R(7L9h(fLaD_1bC}{LDpef>VzIb z^{t1(@F=-`kOdPW;A;rz2w*4a5qjRivCWgf6{AP_@qt?kBLAG@&FUGeO@- z;xF4ZS9RQPz~D zYSrcF-F77Mz)c}HV-oxmc`4Mc2+*1&2xMeR`hYm&s+~>)FWJHpa5k6-;W25X{|a*~ zSF@0U$;egGW8vFG>8uzKM7e^m`PpZnO=CGk6JMnEL-Z4;=2=27U=;dFP z$9#5jlTHO!q7v~uu0&%}FVm&fsOmUy*B0ml#eV~1_f8EujG?0+PLF{gj7j!P#gQHn zL8xJT5cCZbhAAgeS?n@(H3MG(8IHW3Xqk`=pTvV4HJ7i_ZKjnA>fDPoD&&A|~2hZ1I#RPHZoQV-$M{QESj(~{^BIT@up5uj+__z78hw>#oNOHmPP5Id9fL1dZM0;nWD zjhI6y+oWSB1)!SS+3A&V2gRY0A-^kB9{wZX-3rKJ8et<@B^|tY6YSQbE>QJNgFO`h zgUAI`4U39qk0u8CNYt9p`SY&oJEUF4s9$*eANbtsNi{?%l$|8G{J2V(+L}xYfU^hP z!utX=X4muoTC-0Wg*u=`w1C*QnNB4kj1Q);Kx$|TC-U4)YJX5OpCZ#86+=wz8d^Xj zEdM-e!EcByg@BLjr}<8@1JTiQuyfARloKtKY_WJz(`W*|U~Q|GosNRJDbZnyLRZV4 zjOvay6W|hE^}lU{_qgsM52N+u--WHS16qk2$&(UFrI4R4FLR;E9Oa~aV!q$rJmi^J z8EBS&S$tv#)rxhGtz&YPamMeSM-x*%jT%l~&&J-v@OJW|=DRrGd3dtu{$1xg72SFh zgrsw(dJEol@+(CAJJ+)1xBvX<|KIoEhNT3~oBl=z!H^v^Nl`GzwTVo>K0v@@yE=GM zgW=WQhH5X|G94?=*S5)P5C@ld6Ml}qnIZt3n4;H7e|Kb4etAGW<3;zs+kzDnFrXLF zPJ(8a1QhwCE+w%G`5BN;Dh(ll$s`x?YE&M|7{zOD!zrcorRm81Z?{~<_sH+wqV``! zid5|q@yR47bF`f5-8hCu2nwgeFaNmFZaP(+iA&sQT=Iz2LX`d4bEuP}qg_Z*0OeKl z#o4T{0fs+)Yq0XaHH)K>c1ZQ1J({XAE2(58Wgwa$E^?_X1Gv4q>TeyP>@9JyxpaIA zsC!a%wvJBuy>1h}(KzHm2VOBrCmU}PWkg0?C^9K$Q+I)KmPxa77^R`}Z}sd(rc{M0 zn&02QlgdqU@?w|7QG5n9rzRVv;zS(Ja5lc5%*4lNgod<*p7oQeg^YeGjfwMSQd%!- z#|a}cOvpdtrRx!`t%zZs5Y~}lE$UJlQEx(=rAEt@bk7LG8fUX*=-U2o0pV7K?*ohO z-uB*ZJRVNplo?x(r01TPnqfTuRqcvbXXy7S#Z#@F6TJL`t|JWJ;N_+ zJ3MuEv0q&0?I6l_TZC1YoZDW0_|oiOD9FD_&Yz>>_J!R8K1X(PT=+qAM4q~P~> zBGBn=`R%v8VC>3qhQ*g9Q!)pv-i%s#k7J8x{q)me=BN64zq0JiOiAYS88g^Ei@>zH z6pQ|R!|PxC#2i%W>gpD?kKl0O^C~6|vD@_C__lQ9$)libK|_~(j?;&$keA@f)9!R0e?B+2=+b+3E!HMg59MxN+Xs8EuuB3#@hQGwXse|3euiIm zL4gzwt9NHdE!Y7M3EGMiUw4ix^X*Yev$v%VJjJ(6H80Pdvp=KxIu4Wm<}^I-gY6jd z{1y(5mIwZ9-+zhm`WlzlIMrp9UUit8T5X#f5;FYhP<+g*03nX3|-1K6efgybRB zI4LLRwpB6Gh%-RYA_%81?l5}4EhWu^S0fTm899(j;h(_dROjY#9H`jP*r=ljyK*PV zB6u-d*1ediI>_93^c&PxVK|Y{pe(%NlFZbo7vA`_Ggr!t20Sw=^^HHIDreqUp{T9> z_>HgNBaF*%C`PJz^XuJGAI6$!f%&AB-P|KZeeG2OI2h~7l`En4peuTMiovX+E=8!#Tg=8je?H5tk*0>G#~oKKhtqL^+QJm=IEgrr6a*h-1>Z2s0y?*PKf~@Qvf}nB1Z4kX)0K*_) z*Jl)=5`P58F)qwbN=iZ-e!!bg=lb4#`{K{HoM^92*T9d9FoYq8|K|Ji{doI3L0qHr zd~jK$JA%eGu)+$)sjK;QZ|<{&vRMvsa9AikB_#!u)26B>lcu)x+cqUJUw-|2Z!p%} zothh=AF9on3rb+>E91(Y|NQ9t&0zd{cjL}RP_~Ee-o4x1+bdu9;)(zSXFF~yr`acA z9fv|v_PV@W5fn)ox?!E0PvZq0hT^zbZoDicPoDN?$U-qnTyPA(&8KC+U}$f>eMt2C z71+qG;5{XWd1U!`19@)sWI3di6G&Oh)~pGq2CK9_QZ4Br&R`;PYTmqgWH?*`wI?lc z{8p@?FhZp$Xn=&;`OCI^j4K2deilFQ;^Sp7SZDo+mt$W)IE_0fb0L?+9mav8VXDs} zf#C*XsHfh4?hEwu??+DsORx3YeQqz@`jhtk-h~)(633W!3f^*aTZUuh6)})5kJA!5 z?Vzt0IrgJG|H#&f0f4u_={AUqi>oSQXIbJ120sy7zI_gjukY?dx7;sq3;eGV&LYP# z)>RH`7cnw4F8;KB@-8?#rJ|4B`rB_WBJ+pn=)EElF+oA)bLY;fB$)>y1M=fC@*YUx z)--f^>M~wm7#{K~2rKEJ`B^BMn;)-CC0%I;W>GFBq!v_myR# zCl6t`>H<=?fNuWEnX_k8fbd4}Ofan-yQa%vNDMulQsppk4{(J@Tm_L|Fh-t`948kh zCy6DuTFgEXBtj%miE0Zr8VKctxgC<38~byZEE@pgo40JseaICm-#GL%NvUJ;jBT39 z$k?~{_o=b$4DnMqJN`V5X;0y6>)d$MBVSfy`BphuTsjuc?HyGrTr84K9T#zKx&4*P zm)|?Gx1j!%$M5i+M{Z!<>9KOMQDSu6?gu<)2*fhkwkOEm!C|B1jTO8KNH+4158qwT z8*kgAJ2*JVVck(3nxGt`{U@dg<#1-ZWDz9dkI{&}j}KK(NdTc0LPd?)PAnuf_9-I? z#z=kdmx_}>-(*4Rl|*TXw|{u9%4Xkn5$~r}{syN3BmU8WSnQGz-}}E8aq3qmUAkc1rE?!o1TsnirFRQ!{p8hMIsrs0&BBT zDxSsonD!$kcHQ>mROA`OZD}nIz77029K-c=uqC+zv{!E2yqR<3#>_rDoCe(sEb}v< zy6$h^!pP^^&b}A@>0n ze~5M2p4HJ0J`a-P)BPer$an_O1pbk2(VxU%45XOZl}whA6U(}*eEs*qZu~uQ@sB+Y z1a|)a@T0FbKVGHl?BoY&Jicg#lNR>p?F(}l&d2tDUomER#aj%S=-|T2tvJaKK z(=XrO#(6$$8sA;9Nay&X;q>}b$6(0SOX;C*uWUy7?q@t=oyAz^`r~e{yOFYa?%v1O z3uiI3xlL|gnA(=~pE02y&mDhT@nf;#FJ|li^P{P)`@gyy%)q6WcbR?wR{lut;r_XU z12`O}i@}gKk=(yWZeYuf9dhs9y&D4fR9uq{s#pNq9p#MlmH8`Z+n{uok5Cbi3J_Dr zf{?!5R-F{f#d0)Eyfb~=QCDheHW3$eco*`Q^VN;(*YAO9_Z^5CcU}!K0mHImILN9D zIBGajp#ieM))S8pdyitSQ`mdk>EDqMX}x1SrsY%S4U*|23`SDfgzRxAARwT)o*dU2 z3w+8Sq-y6d7l|Cgn@xV9E;ahy3}Tu}_*=tT|A9NF;?{NRB$zuaTjhRUxlIBeOaHxcY}BujBRFs-}l~q?qP8~=+`i$RGloh?{C0xp6#*G zbEVX=W8^xuQ4+nIKeyl(NjOI+CYd+VVTkS3N&E$A$OAzD0rPn9LtJ|~N=yz(m1xa9 zn{G{k=6FdP8;=r%(wbtrpD3bn@)QHzy~md>Hnyh5TK~e~f~C z;YzeRQS3Mf{fKm1L?dcyyZb#b`#8b$?bv=cUOQ*WDEMr7bMu&aD|o_D;G;YGbn(rU zrvcu(ItuN;2#osKC1=iCW~O4wEsknsh942?%*QYc53Fngi5WlafMJpg0EuC|eBRbZ zJ11`nf)0Id<;sODp^e$-5ymK)D{=J|F>FWi_6j zIZzsMd>>ZA@r%=Eo*x^rbpS=Xdi$Bnc(0ATyu66a+%?Ud>dA4%-H-up5fM@I#5_@F zQsQ-ttvtbTpSf)g^5%I=HV5tL^&a;#_;lO~nTjUL7Rz--?rT5OuDQA8QQp7XQ$C(O z(SQAc!jov9HSvUmumUWvq<_4w|;#A~N0K)XKh9KpC z_vlb<{{Sj3_R-o?*M4lR42_LPH_bsi75ed`Ce@dAeP7OnXr$UaWeTM}L#4Jg%Hm&l%G!Y0e zv@U}Ik}$QTmVfTi*pE*k@rQQB+I5F+u6#h}C1Lh;>#5Yl-!^Qp zY|3ASd@Y42QRFLV@ugA&%_Sbqx%Vr`W2Q1j=SFo+O=Df%1N8JlqO}-&Gah%qHbISi z97LiTPem$e$$L41wvMKM}vqcikE3xl1$fR3^;|^(!ENsLRk3TvVcZv1;qChp>Y8ucHQk2^5@UTIU>_9=wH5c=_5D|ZRO%R_Tj^adGqI=M6+`qExX)pAiD6_FP6h0 zD}zHr01wp+ii2DLHhg2mPSW_ed`5?cht%Rt?Cc=`Brbs0JYS#nGo{j*^I1x$oJ951 z3b|fOM;mw+6mDT~a9F=+(INrU4{8@LUi21oNXEoe5T|0~?lrDV=0jAZPhfmN*{&q< zoR&L}x(aov2()pCclo~G**XT}4i91-tZZzIYm(*h`Q9Rh#hcabrGO6RDUz+R&)xx^o5tsL%)E zsNZJ^o|#)IS^{7K`4NkVh!|Ny_MiY{hz{yfPc-|hBN5sB2oq)|#*I0y*$N-RPtd5mpS(Z1YJj%Tjj$V z;y$RP;Y6;hMV`I)(5SyIF7CPb=I=zK9rWK-s{2|v`3wK$A8ooF=QGj+yk{S?p}Dy^ z?n7;@26Nf3zYfa|<>vRO;^9{658`%HhP`&}+WG#*u$$Yv`})c<|CHaT%-U<#Wvz3f zp}jq#ASz-7C0pRaz5yt&f|1Rp~!n|8k17`l#UkI-VD z#~Kn68T*EXT{kYh{9lU9IPoj}#frNAwXHzZTTJFNymE$S@jG1w74qny8n)9I&$XkB zkz$=3(K%G^wG=Ay`aSgsZjR3%{=aT2-FYsJv{ZJQAg4Y9aE9F$2j$y>W*?m)K4<+e z4;2e<>+ao3aPN5QFKV}@6{sbbqyf}B-VOpr?`2I)Oe&*KU*b_q)Ks(vzy<=;+T7Z@ z9H5{UC$Cy!G^9@3Yf^1bf>T37^EaB|#etjoJTScXmV1Z-Q3?ZzaFfU3}O-5A>p zMWvb0jj46OgL|Pzh;8H4NV7L6kMvuzbSauxUI3w03cGB&G}TjWjzgOy+fkj=ev9@{ zM1kL*nLxWfL%}8g^h{<6NRoV?Dc>xt*gFeZfk-SOi_CwMb)Z6bp9X1O;$R#)(TMHe ze>qGp^dX?9R**?l6T<4F)$ACeBQYZs4?wyQ+KS4ShJA9-XV8I7R}XnS|9j^Q_~bH8 z7aw8XzI_{lu1CN?b3xaj6pbea1jl}2uc}i&ojjcD#UD(aGb|qtpxM9(dlZ>lKF1#_ z(;QM_^a?x7Zu8&|1^oTWwLYPZYS7xjYVuVwAOO#hgWp6v39T_HHX`4A=qPv~#}p zbD73@{ecW^Fa!V}`3CDrD*;tF6QEXsj22_>M>WPSovh{Am-5E>3;oA|&O(6;q zDq$MwX@m+AKms-C5ER03XENs{C5gTt7KETXFm1F?b#P!nyC`JZ$8jo#R+It<4Q>Sa z!(>N~-NL&D^6dfPoOzry_$zV@CpF1JlIbRFZK7I%BqWMt$U*y>h<$k?{c8#f++Y4Hi{R{8r9)6hwC zpc!c`3cRtes>Bygb&wH0I^wqF0npbM`aX$?yPrbl}>f+X&ZO`zma6`~Iy{hxw_s4>}nP=X)ahx>Db_DCkd<D(zB)U46A+ z7Ot0*kM9J6ctQ_FCS*z?Q-E9X*xK1W+5hK~)tLGVs$H4lIGTK+zr*;(Q^K|IYgM0O zh3o(Lb}KLWP%q-%dGwblyR~6kmSFYu>sRW@&=7BAV`C#~1zlYk3J`wFTiCnBHLHY2 z@a)4BBD`@cm#tnM26d_MtJ#Ph#fVPwiDswq=A4KK#E=8mGbxV*(slusd|z^{I;uHt zWd0^2Ba@k#S&<9jHixOHDI^;f2{B}2vvKxD#IEu_i9Ym%Y-c~9lyV1oLI;V}Y03DD zg#LEm`WCeI2tva%TxLj=WrC3GS`0yo1R7IWoK!|_?kLvf5G41#I2O|&`_@{Dm@l_e zt%a(sG}YDuF`xYU4Ju*+cN)FFk7Sbz;2*7hIX(;cYQA{msxqGU$BrG7!?~ZdT*5$< z5*|gSc68!0!dqpm-falx&J~_3gw6t3b}PiEVaBu*sfq)UMSU;}m(qREX$I)@!yp#? zBa-xz-(c3!0-na;ee{E^3gIF6m>gTT9syrsb~YW)|M~dXFokoZpa}S87{8C+gFW~b zLkr(8eB9es-UdnQaRks3;^dtoPi%vWXnReHB0gWdL;r{8 zyj_4ccbxmQA7$`ilyC$IKNdI5e-2?WeO^@9o2;#^jce1?aSO|llHx(SA4@t2*A@kA z^?02;7Euf%!y`Nx0%Q(RxME57Zb=QM^wxowC-#aqNS1|p1y`1Zk~fJFj_ z4jv?ABMih6;WyAJwPA6Up=%&v@$xOQ63k`Gmj?oDZNm;t*pGKs zMGR$9*lHfJ@GaA+6cylDv>HG9U14-I{xt4~W$+golJ~EQZxLGS8s?cGDfV{#EFF2(HCtwT_|~p6!O5 z>QMn|CoI4eUUf{i?lb``PZzA7#J+U-^2Tq~#H^jOu;7Q);B)ms^bT6=USvIFL!_@k znbHBKGC0zR5#U%^+1c4Y&7A3l0@?>__kAm%PF;2{B!Mag78aJKuC=RItk6TGKMhe) zJ`PMT!2UXBW0N>{-aPYbdxR`owb=#jch!n!0qrV!Fos0cglD|4v1~vS%fqViO_nz+ zqEy;lZTh#0V#rlh108F|M{ja+apkup&0e_XEV~yBaNb7RBbH1!ZdWVLoiy=r%1BTL+mCr@KTTeTnfi^TF2{=^!BR zgEic-XU`s8V0a1UZO?r~?fM=Ie!wTJbm4deC{GtCo#w8x=;&=|#aOQSQZjPk!gM|Uk=#-sNBK{%oh{kMBiww3lR^I@k(4ZR(a z1s^cjk>Y~wZ^A7pgjzfzA7Y^^^Ol=`-i2&}vO&5xm0-dW1Vc*o%ej=cm0Cfck_~Pvo{e%cLUGE(yEO zgG+g$amJmpvIJ&NxMZBo*H7PMenLd@g0XW8hW`^r{dbw35`~XkGV&IYA;hcu#U{R3 z1@50jZFA?&MX-H@loo+J-3F?`a`mOhfuMa7ENVHBA0T2d-_ltT8_XWKs`zm^9#Lac zQxM{1rTa6mQGrL?FPyChxnBm&1rbtJKKB%mHZfvb5NFP_YeF&b3XfM65k0431_m%wl=~(lM7ec&aE8tD&c%cudzg%>ZPoe6t0Q1f_`0} z^H~BeR4)eI4DJ6c0e-tXl!sc#LmlBtRNa+`*R|8R2V`vA+0-2sz0Ca-O5N@ zI#E1z!LZp7$I_Ba0f%)eZrrq~vNb$UuO>n*$=oVEB`JxRa6+HQMh4qkyPFC)B^TN? zt`)Oaf=co&;t=sAAOI>_u}jVsZddJ}muw*@t>@b=rS$0F&%~qvHwZZ5!DU-mAAzSD zge`vn0ph$8&r38C*!AT=&Ku0CaT|M4O&XwsqIGn0dqOg>+Q7DI7|+QbIdYvx1BU@F zYLP~apwvgr2+-=FCo-r|r8{nMJ|4fp))9G>JJPHPQ4FK!8w4(VJ4;F|Sb`<;QKxEJKD_hV9tqXiXKZLZ zOsmybeF1Q1t)$5`5b%=Y5G%d@U?%zVP z`uOczS;~|wH#WYsP)o6j&@J?jKtkX|-O?gvg5XL?g_t^c>TKVaJ>5(l6!bb*5{e-u zmEyY(umi}}7rEdLt_#O47$SX^x10k4wNR&{qa)PgXv|RtV!aZ6Lmu`PI3q|OQRLXz z*cjwL-on=Md7L5S@Y8!lZl5>;KhFkQzE74O(HSr@K>Y>G!mhk z=CG9@D*_REf~X~!+V~9k9gqh8QY}wz)gZLK96~}WXzTh#3W3bEd+c=|Q;RvtEY^8; zLhHGp?blBSKxm0YnAE04ko_ZVDU5D?@er&@F!sT$hku>LZ8q^!C@W<8&p~gI{OZ+y zq)=tRpyn19#mL`bsPcr=r;Bihipq-vOa!$^`_O7pE36;b7Uo?Py9SEzeD-=TK_0_3 zMG>mI3j9TdFFl&Y$gmU~{*Vy4P%hQxT$SjbXpCOAd@bu1?o=nsnjsXo#6?DEq@P0F zb27oGLN3W%2LcdANTr#iiJ65pFAdw*oeJbzE9K?#nu|; zC3t&XrC)pd_O0CQcplzLAdWi_eRku@xSSS2)E_lP8VZOB)J>sdEfPHiA*u_js0v13 zN`VA50=&a8jUlu#SkT_;!fjIAdbKb01#P<@V`NSF>a&59!)fUnxJAWiO^A@Ys&mjU zbIz)@p4h2)u1_} zjt~CDV-@Vv=+ujby1LWP%A{msd=jq5m$~<>T)Fb|dE0NmfWpuy zB|8i!U||PhFVyyr5O^AIs2FTHe+7>mSo>2pbu)t(Ss^vFz+|@+nf@DT*Q%;iQB|jR zK4qBw9LnpxfRV^sro-Nms;qtU9Vkp+F&7+M^&9~g*ts_6(aGkgBT1VM`Je$aNK*Gh zET-?T2T0D!bc0D{oFu10xGa#|<-h!L_rZ2bt<(mhUHy^^oknDyCvTNIZrM?+VGe!7s&pP0Qr|`e<8klvIpZqaVSOK$8@jG3RCL-QLGGIlKRMl)O`|(>1CA@ zZVlBlY&L56m2!NdFMSU^#!~Cx<)%n23f2GWY8ytA4EE}7h8XGfJjby?Js{#4eRg;S zV{whPdD4LX-Xc@)IQ`^Gs+;DFR*}zjh2k+~NKpSc_InWtk7TZaB0M>!SsKDMGmMW$lC#5d=jD z7;LCprbA(?|LNzwjMv(9A-IbrF*-*@F`tgzP=-3`e0Kv`c~|UAe`Cb^{2PJ6=7o7ON`&@9ghO3xvcG1bCvO005OJ|>6k($^FG5#Gu7?Y5+X4t7 ztpVDN2qZpw0je8u%77O4O7uEHjU~W3LWYogiqUHEiEL&t zVje=`7lQ5@N%Acb-P>QH$e^u70TcV`nP{j6P(3Lxz5}psmIby(h!P&g)>0U;mc;7G z{JdtjERv=i@DfX8cT&V+U4wHu98?er5R}rW*w}y;Jze?%S{FD|bCP@$El8Z|2hKbtEgkttyfp#belW#cXhx&2KsfaP(fIt{R$cwiB50ZouhhLp|{I9?M60#pK3o2O7 za1Or@Wdng$wDwWg=wq)S9(rOxW?OO?Y z5Mb5(E^7o&5DULeGebjY7Gr$fEXExZ$SAfm^TCQ4-Q3=U;CPS;%fUGH6g+z`yvx#+ zE6)NGJBDI8AJ-!VXfVIV=>m4PF}oM)#@)WW=$tNJxe_RJE2WTzkbUr4)dPcrUdYyT zbl5%MW8QeB;L(*@$MBh5GS6(st0Xs9KYIA^Cc6bN2YAJsQqxmkU$2FM&$+QEBL!({1dnj zrpistn@=;>ze(%aNcW(s6JVxQI%tRjo>|GocnCMyak}B`lx_e;UTE0rJ335u1=+d0 zTdBo#buGYMl!DgvCcvz@ix%xdqTdBU2%2(y>@StIqGdO){rTs1bW1y+@!-P~2|_8T ziziM;JRfA@o4F3R+aNM)!kpI*L5rri%=Ok;c8HX^5P}8)K+J-Q3c@s>B^!_2Lnq;d z*iust2=evyj+<*i(8macm>{qYu0k!v`D{g7gNB3Ze>D4pT zXatX@1BN=t1ipV$-FENZ-%-|MSs?qZz2$noFd#+i((GSYSm^!hW&Ksumb0;$0L&9|})ACn%7>C}r+HaDY(YGSo~UxR5Pv zvHRo^w5czsVm^doE}Qi-!}(AvT91V~AO%a&7f}8KXpRT zyx}d}FWBfG;q@O-x$;^LO%A}(>6@Kii0G~T~||EjPO zIG;d!;AnscWxU%dV*$izK?p8Kui1;Iw`J!}1w6nKI01w}^Rk7D>r36DmAH1>RW34& zk5Mn^6H{v7M&jlvu(BT|9-=JCLXVC)&`%IXI06m9Hs*})=WP(1)@6O*yQUTwZMOb@ zr-9Yhhg$zm=9%MI7k*_?h_)y|6f`yO6Ql-EM<4DA(p9Gt&GrD7I`P-luGY7a}118@S3HPG`c zPQaU70Cj;Pk%ItiG#~~w$?>;XMZH^BQsIsj@C2wh(vB>UG=go>(dg!`nZ_7j^J?QQ z+A>JqIcJ`p#hWkeMG#g%??56bs`<8UlSRraMb8TK+t9 zVL*afL8GC3XS2D*=y09p%w>QCnpIFxFfcH14?V>05?=M>5`rZG2~mom|1Vv#HeLTkpaX!>^)jIbl3bxT|DVUS1w0hr@g*R_w#tmo8l@g*$v# zn*tw(P0s*=(qZ?)TMNeBMnDHMyn}fRR8s-&m=t9?_F~d;^wG%2U2POvdT5(<__2FV z#_CFK$RPwz4I6EYCtqFKUql~;gA0_gceRl<8{X2|-2}9UP+F)Y-z-?Y6!az0O4gaD_pnUv0p-0%uL>%{ z8$rYiK%(}@!`;6uU8+Sl!>6sk)`^zPnJ{$ct0jXp|N5PLnwnrC&4BTp>$X%2>`alL%rFouAIRww8#T#6Mzo%bQ6LCb^E$#6E_J7 zRl0F07m-bb7hfS7_LGrnf>mmi-h_=^?+#0VJwbv;W1A0jNE`WFhu~cHK%owzx*~Nz zEzvEbk)vbaL<`Wgcyn0HymJmngtjE~%M%Db*!{4%^XFU4F)+o*Fuelu71dqFjkxE0 z03Cu5Ijq|4zI{H;?u9C;!aW`KX{gzNR;k-U`NIKY5L7=Cx{H?s=tDc~OqvS=b^*8A z25oWHO4lZrMATZ|RNq~`{F4Q!&K*lHmkC<7SC&SK;By`(%U2imB!SoDfD^$5k5r7# z)S`ewA;+p}3+d6<-d+I&_+7w@tVWeg%?0NxbRI%SAceAu7RL#x#R(=gcqQoXLnFRm z*##jlQqh8b?Ttbjz^bb8P|3rGqul{(RM}j(;Ej?!_rW+;%*tK6cEtj5xtctC5Ip^u z^0lMB!CdAK zMwWK_y+&!!zWgjVGRaZD)Q%m!#x7`5=yN3O&R(g7V@Q{7$f2cZ)xP1k-)^$Ug341> zMnkTTUjD8&vXxf(7-&EG)F{4-TP_uO36xX8h!nsNlN`CuB<%*Vw+KgP!0k&RF%}Sk z%1NOFHPS9fl`RhYt8@l`Yp*J2-@Li16y%Mt+q8i;(D-*YLeBDG8JE)AS6Nv?KO0~H z8BEYByVnfEBWRVRE_(BX7@(B(O-;HhmaW!0hQ^i_28Bapzo+g_u+llm^a9WCtwkdO z6eEeF0C8nyiEZ1SltP!~G(IzMIA{P~}6jrnp z^5n~(ldzdqNj72H%W`*m?B=)B6XG`EaKi*X!*$QkzWXgD0hnMA>V-|)w!QTgvdn!D zg}weOVAfc09mSz1HwVvsaqr&BdSE1gsawoOP&AsbdjTA(%mvtSMb7jy;X{Y{u=St1 zV;p;exdIg?95@oqNCD+!uB@ks>)=4=#znZh$2*e^z8Fq9s-T zz;3w<#4ve)+lSjzx#k!skx&P!R_{66Q^^0WCZk?j4XHXmW-O{& zPrNyE^=9?#A)Fm{(xocAd=#B^w0q#=$M-1E(f>jE1q&)NoVfe2W*WHwL(nWH9d5_D zZe51H4?^-`CMGD`WKk`EjwlK+Y*OtHNpVH(hO!KLOlyGU)e32Na|jjU&%WD>{au9A zB!EUWLh0#VkYQF1V49_g#D)C=O(v=i;%-rucwqnj&k#+%d4C{L1|3CZl0`7-wt#0= zmZD_WfRf3oyb(n>(X|+T<$wwyr4NX-BEVerVKf4qTech~Tm|22ujIwlHB%17vT6m` zXg6^uN1E@$<9_De5BUtClen_wtgI⋙)S|1vt;w5W0>NVHSu_Pf*(VJt%Z$Ct4xy z*{#!rdd47cZ-K%Dv_uj1T`w$XTVT0Wf??SQZRJtHLOcwKvb}1b)UX?tty~#G1FmVe z#=(uSO9EO->+Azt@D9+6T)LyfAf|<|ajCXMbIcWNx8n9%VFhsgZ^=FY0H*378k#sz z+q8^GS5dPaVY&6g^AsGZD+uClN9k9c9YMKg)nbU7N_SMJ>jJCXi_(Tp+a(P5M9rbO zjE^d68+9q}qo`d3-?|?E^7>O2ShA6zX2S89D|X6ojMnC5tpsJYpCFLCza5CvEm%ze z8zE{yG+N%GkryRRE6Sjwgm<`;zJY{8K(9z(RI!!>u$Xw450Tvinn2Rhf7SCvZts#h zFu)8ks3IZ-{Fxr38eCsJr;QRw`dg zQ7V&FTvYUpQl0y=v-A1<`1uDuyWRGKq~6!-{dzs0&&QYN^94j4s`S4q`;2L42|b-y z!>4??CC1F-7~NBjoIoLtT7;EBA>d-fII6^k+Vx&{IMv=!>q0$_+MX()8WD-d`+8z$ zV(CqkP)eJS(wh*kz|F*E3#}Ur!Kg&Jy^WQ?ppuPs7Zu0bdRUn$u;@`NUEnn^#-N0 zDj0+$#Rd`F6pxOB2tg1=pQD`x*WLJ#uu%$AAXn8aPIdVh#w5t-Dzx!CIJ40(?`;Sl zx+}Fl%CSD=)HxHHZ8$=ju*N)`wBqYR4vM(0=w)!m?2GNL3@+(j&8rW^!1?a5VQV@H zom6Q^Tp-6%n_4$4RYB^>@WUk`BSj1VwH%xCw_c$XTH?*oDP`9nMtqJFTzf{=dIA&$ zYyG`Ut^i%LCqF|RXvSzf^H7^uLgb~%CocG)Wn-jysBvAmasWjIl5b&JA+K7LQW-uq z(s;|ZotBhetr~%eCg`s1GW;cS7awuLWL_|bLV-Coph`*c;@5qXd z+xo?CT^0JQd#(WqexYSob8;9*cV$Tl{PYLQ&o8wMxOYSBHiTGV5qXLmm;MbR z*fSr4wMv;AS&Z_7JKN4U{Kb)Z<&ej9_~?oRh&4t&=*(}aC)7D7&T=KCYoFuGc?716 zdH7=Jo}**t{o_Y~-%RETNVH(G<|B@#c!&zwS@QC_?tbGqLL7Niu%e%nG$^Si-F~B*SS_UqDf4v0y-FG$!+?lv>vl0b8@wky-FN}! zqvm5TNPp4}C##|H@F*ssHN2U}_*P(8#fiRo(;eFg9pBH`!7%B2r!n+GSmER!6#J)} zadX-~_;ISIJaW$|j6QUgdkOw2Mg!?h_s9-i%viX;Q(fJLP+^QoZKCoG122!X0qG{= z#tHd*Yn(3M0ouOF`yD0KqAp5ZW#1s6y{SFZP@R^i{XJ|9RCxb_2oWd=@ zkeoA-5}=I589Ckaa&O%Rw}!1EO2h1Z&bfaV#7*G1yz-DFCK157tbIS$t%A*!Rw!3ovxRL$`Dn203Bg-X0aD+4;Il`PRm8Px8APZ%mZ@g0Mw6CQxaW*f`Se{J-Kj> zRe9e+x4{q;6Z`h>UrIc@&2AbwW9=L$4M9RI{@_&Pq-d{Rl~y(k8#@87fMJ7Ug48QMeQ z^IzC%)zMFeziN|mPn!_ zVcz36?XQ`2a)Wb2_CNBh8AJcR2xHLa7vCK;=zCCsi2Ha@{?ezyN{LJAMcv}i9Fz?3puF;O@&rTtI`BeGsE(AF-$VH za=W-=d9KxjYkFI+UsrUIHU3+>0{UAh?6w0SMT)$%0r5is9td!?@`cB2t3AC)5 zbQt92xrK;LF$>=ln8%$DcN0I1%2DxZF-Q8nomU<-XW2(AM^zd?iRf`F`;?MHj?G`S zaU{J0Ne>@-A-P&k`ZY#iW|e@fR-_-}m&VS=&t{Eij-=x~ZOYb=pD{INVb1 zwxQ1Q)ORmeUIgj@|9@*7+St}s%0+KU&@oc9%CG}L2@Dx?a3M0OR1IxY@#fc#HCq<| zPZE8UGF<29nf>9568Av3=c94W-yVfnOJ)8QmhyR7g`~V-w#41D7fXXP(_hv5Z9pa7 zWu%x38v24j!x;MM+mzI~$H7#0AN1Gn4&MEckk1dh6Ept0uhTdO7%|x

@IY9!A4% z5TaVuZOnMJIRZJX7?(|}jMRoYEBe-!Mr)vJ#Kj(1c`w9?z8JC2P!lDGrHKeknIK=l zl`oa$z~DLH{=nz< zx;_T4I>jeX(qEvLZady(cH_VLYSoYEx#pUHz}09KA_91X=|i89)>kLCQ89~EABs#l zWD@Yg5$34`3_<1^r&FF-xTPZVwAMropYC?ytIXxpNZ9*FseG1QhUV?Ro4EutK)1?$anjMpwHXe5RN_Antl$v4UXpyu|df=2vsNbofgb@{647bPts^M$E^d7uV(S$E4 zhQ91)q(rEB4~61x<@$YHulhM1nsM{d+YzbiH$fb)6U(^X1Ed6(k^9mXBOTSz;!GdBk!go$8R#4nudR)_WrdV2F|qFO@et zDcXcBxMd`m0!1vLu!(|I#2isoc%t%gT3#exev6aKv)y1f&sqoYM)T-=%BuWC7^`5; zzE1x;auSugA1zZdxn#d>L|0J3SyoE^B;zo|yv>O$fhLlK(Py{$Kllnem38JkNx*MGR(N9`}b zI_vK&oD8cQqN=}Fv1b z=xwm4W#EAsS*4>Q@f`=f&+s3V(I`c<6Y_%UcTvi*VXyT)yl3uznS@@?6u z$Q-X%v-hMMJp`MHtdWc;U>J%>M6dY*w49i=@QO|iY23l5BK9JN%@$M`uViIzBu*Ur*_>DN?(1$ z#y}S}zD9QsxOIQ!nHaeeCQXgpuo;zF75`2Dq2`zIh?m=&g9d|QS2;OFjVI^s_2Q`b zh*ajdrKO>&>WCUPG|EIYJgVIS%CGz$*bql#iKo~vD>HM~=9e@iFzE$lP3-KC&R*!>DsF{-6Uw+EoE)sbERtOL6oqy&e#wd0 zdH-sVrUXc@{|Z~lumU+4DlwB-o6Oy%FtxI}+q!1~(KesABLi!9VchD|ub(DtN*!$1 z)G=em2%ZT!%`OUyqNPVnxd*GN+vW>q_>|L|k7YEVN9YWy`?ToC&&DcGFLlD0Gjf#1 zA}1D_DxS5HuZg8dehIdKD3e7HZz+B33?kl=6L7xxYd2-oOB}#X%m{tf)@CVh57e~a zFf40lmf%EmvZ;_lwE17=MuK1K`}nWouF}4XXm64LOMT|dzrFO<5%9aUdVNK#ZPmFA zaO(_okx#kAZ2axRIAL_NPrMem#+I)d)@C!INY+daxyT{1O^Ag=RIb+0aL zi&?eH8upK#W~x-&hW+Z%{&13rHYmXlpN2M*rsiX{IGE3gD3(i~v8<}(1qt~4z_t;V z20yoWU5WiYVD-4()C&0o$Ur%Q;^q=niLg}^_704yf+5RMJrB+&qmt@5%Or!;#BNG? zA4YK{V<`~Lo<5aJtVB+I*T(|U>5#oN!C*;`K$}l}?P9ZA(Y9c~uy0Ry%mFY=Lj#`z zQj@)uOhqRBIW;TzR9_TEt8A z$A7Pa|7SEGUongP?|4f7aDVRq|J5JEI{&ZL=raC$)7#Fvri%M~r-8%A3qDGl`{mdF E23?%H-~a#s literal 0 HcmV?d00001 From caed8a552d3ae63580d2e817738cdd3e3864bfb5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:47:51 +0000 Subject: [PATCH 55/60] Remove accidentally committed .github/agents and benchmark images Agent-Logs-Url: https://github.com/ROCm/iris/sessions/e61a7d71-7247-4dfb-907e-37befa0bbf63 Co-authored-by: ryanswann-amd <109695074+ryanswann-amd@users.noreply.github.com> --- .github/agents/skills/accordo/SKILL.md | 68 --------- .github/agents/skills/kerncap/SKILL.md | 131 ------------------ .github/agents/skills/linex/SKILL.md | 98 ------------- .github/agents/skills/metrix/SKILL.md | 76 ---------- .github/agents/skills/nexus/SKILL.md | 74 ---------- .gitignore | 2 + .../mi300x_autoconfig_bar.png | Bin 85774 -> 0 bytes 7 files changed, 2 insertions(+), 447 deletions(-) delete mode 100644 .github/agents/skills/accordo/SKILL.md delete mode 100644 .github/agents/skills/kerncap/SKILL.md delete mode 100644 .github/agents/skills/linex/SKILL.md delete mode 100644 .github/agents/skills/metrix/SKILL.md delete mode 100644 .github/agents/skills/nexus/SKILL.md delete mode 100644 docs/benchmark-results/mi300x_autoconfig_bar.png diff --git a/.github/agents/skills/accordo/SKILL.md b/.github/agents/skills/accordo/SKILL.md deleted file mode 100644 index 1917a5d24..000000000 --- a/.github/agents/skills/accordo/SKILL.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -name: accordo-validation -description: Validate GPU kernel correctness by comparing reference and optimized outputs. Use when verifying that an optimized or modified kernel matches a reference implementation. ---- - -# Accordo: GPU Kernel Validation - -Capture and compare kernel outputs from reference and optimized binaries to validate correctness. Uses kernelDB for automatic kernel extraction; supports configurable tolerance and execution-time comparison. - -## When to Use - -- User has a reference and an optimized (or modified) GPU kernel and wants to check they produce the same results -- Regression testing after kernel or build changes -- Validating multiple optimization variants against one baseline - -## Instructions - -1. **Require two or more binaries:** one reference (e.g. `./app_ref`) and one or more to validate (e.g. `./app_opt`). All must expose the same kernel by name. -2. **Ensure binaries are built with debug symbols** (`-g`) so kernel arguments can be extracted. -3. **Choose execution path:** - - If an Accordo MCP server is available, call its `validate_kernel_correctness` tool, which performs capture-and-compare with the same semantics described below. - - Otherwise use the Python API or the `accordo validate` CLI (`accordo validate --help` for flags: `--kernel-name`, `--ref-binary`, `--opt-binary`, `--tolerance`, `--timeout`, `--working-dir`, `--kernel-args`, `--log-level`). - -### Python API - -```python -from accordo import Accordo - -# Validator for the kernel to validate (binary used to extract signature) -validator = Accordo(binary="./app_ref", kernel_name="reduce_sum") - -# Optional: set working directory if binaries expect it -validator = Accordo(binary="./app_ref", kernel_name="reduce_sum", working_directory="./run") - -# Capture snapshots -ref = validator.capture_snapshot(binary="./app_ref") -opt = validator.capture_snapshot(binary="./app_opt") - -# Compare with tolerance (default 1e-6) -result = validator.compare_snapshots(ref, opt, tolerance=1e-6) - -if result.is_valid: - print("PASS:", result.num_arrays_validated, "arrays matched") -else: - print(result.summary()) -``` - -For multiple optimizations, capture the reference once and compare each optimized snapshot against it. - -### Snapshot and result attributes - -- **Snapshot:** `arrays`, `execution_time_ms`, `grid_size`, `block_size` -- **ValidationResult:** `is_valid`, `num_arrays_validated`, `num_mismatches`, `mismatches`, `success_rate`; use `summary()` for a human-readable report. - -## Workflow - -1. Build reference and optimized binaries with the same kernel name and `-g`. -2. Create an `Accordo(binary=ref_binary, kernel_name="...")` validator; set `working_directory` if needed. -3. Capture reference snapshot with `capture_snapshot(binary=ref_binary)`. -4. For each variant, capture with `capture_snapshot(binary=opt_binary)` and compare with `compare_snapshots(ref, opt, tolerance=...)`. -5. If `result.is_valid` is false, use `result.summary()` and `result.mismatches` to diagnose. -6. Use relative paths for binaries and working directory so the skill is portable. - -## Notes - -- kernelDB is used automatically; no separate kernelDB setup is required when using the Python API. -- Increase `tolerance` for floating-point comparisons when appropriate (e.g. 1e-4 or 1e-5 for single precision). -- Use `timeout_seconds` in `capture_snapshot` if the run may hang. diff --git a/.github/agents/skills/kerncap/SKILL.md b/.github/agents/skills/kerncap/SKILL.md deleted file mode 100644 index f7d007143..000000000 --- a/.github/agents/skills/kerncap/SKILL.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -name: test-kerncap -description: Test local kerncap changes end-to-end by profiling an application, extracting a kernel, and validating the reproducer. Use when the user asks to test kerncap against any HIP or Triton workload, or wants to validate extraction on a real GPU application. ---- - -# Test kerncap Against an Application - -Test local kerncap changes end-to-end by extracting and validating a kernel from any application. - -## Parameters - -| Parameter | Required | Description | -|-----------|----------|-------------| -| `app_cmd` | **Yes** | Full command to run the application (binary + arguments), e.g. `$WORK/dev/llama.cpp/build/bin/llama-bench -m model.gguf -p 512 -n 32` | -| `conda_env` | No | Conda environment to activate before running commands (e.g. `llama_cpp`). If not provided, use the current environment. | -| `kernel_name` | No | Name of the kernel to extract (e.g. `mul_mat_q`). If not provided, profile the application first and select the top kernel by execution time. | - -## Paths - -| Item | Path | -|------|------| -| kerncap source | `kerncap/` (relative to IntelliKit repo root) | -| Output directory | `/tmp/kerncap-test/` | - -## Environment Setup - -If `conda_env` is provided, activate it before any other step: - -```bash -conda activate -``` - -If already in a different environment, switch explicitly. Do not assume the current shell environment is correct. - -If `conda_env` is not provided, proceed with the current environment as-is. - -## Workflow - -### Step 1: Reinstall kerncap - -Ensure the correct environment is active (if applicable), then uninstall and reinstall to pick up local changes: - -```bash -pip uninstall kerncap -y && pip install kerncap/ -``` - -### Step 2: Profile to identify target kernel - -**If `kernel_name` was provided**: Skip this step and proceed to Step 3. - -**If `kernel_name` was not provided**: Run profiling to discover the top bottleneck kernel: - -```bash -kerncap profile -- -``` - -Select the kernel with the highest total execution time from the profile output. Use its name as `kernel_name` for all subsequent steps. Tell the user which kernel was selected and why. - -**Important**: Use a sufficiently long substring from the profile output as `kernel_name` so that `kerncap extract` matches the intended kernel, not a different instantiation. For example, templated kernels like `mul_mat_q` have many instantiations differing only by template parameters; passing just `mul_mat_q` will capture the first dispatch that matches, which may not be the top-ranked one. Prefer including template parameters in the substring (e.g. `mul_mat_q<(ggml_type)39` instead of `mul_mat_q`). - -### Step 3: Extract the kernel - -```bash -kerncap extract --help -``` - -Use the help output to construct the appropriate `kerncap extract` command for the application. Key flags to determine: - -- `--cmd` — the application command (`app_cmd`) -- `--source-dir` — where the kernel source lives (ask the user if unclear) -- `--output` — `/tmp/kerncap-test/` -- `--language` — `hip` or `triton` depending on the workload -- Any additional flags (`-D` defines, `--dispatch`, etc.) - -**If extraction fails or produces errors**: Stop here and report the full error output. This indicates the local kerncap changes have a bug that needs fixing. - -**If extraction succeeds**: Inspect the output directory for expected files (metadata.json, argument dumps, source files). If the output looks reasonable, proceed to compile and run. - -### Step 4: Compile and run the reproducer - -Navigate to the output directory and build/run the reproducer: - -```bash -cd /tmp/kerncap-test/ -make run -``` - -**If `make run` fails**: Stop here and report the full compiler or runtime error output. This is the primary signal that kerncap generated an incorrect reproducer. - -**If `make run` succeeds**: Proceed to validation. - -### Step 5: Validate the reproducer - -**5a. Smoke test** — confirm baseline replay works: - -```bash -kerncap validate /tmp/kerncap-test/ -``` - -This is a smoke test only (VA-faithful captures). It confirms the replay runs without crashing but does not check numerical correctness. - -**5b. Recompile** — build a baseline HSACO from the unmodified kernel source: - -```bash -cd /tmp/kerncap-test/ -make recompile -``` - -This confirms the VFS-overlay recompile pipeline works. It produces `optimized.hsaco` from the unmodified `kernel_variant.cpp`. - -**If `make recompile` fails**: Stop here and report the error. This indicates an issue with the source finder or VFS overlay generation. - -**5c. Correctness validation** — compare recompiled HSACO against captured baseline: - -```bash -kerncap validate /tmp/kerncap-test/ --hsaco /tmp/kerncap-test//optimized.hsaco -``` - -This runs replay twice (captured HSACO vs recompiled HSACO) and compares outputs byte-for-byte. Since the kernel source is unmodified, they should match exactly. A failure here indicates a recompilation fidelity issue. - -### Step 6: Report results - -Summarize: -- Whether reinstall succeeded -- Whether profiling identified a kernel (if applicable, and which one) -- Whether extraction completed (and any warnings) -- Whether `make run` compiled and executed successfully -- Whether smoke test passed (Step 5a) -- Whether recompile succeeded (Step 5b) -- Whether correctness validation passed (Step 5c) -- Any errors or warnings encountered at each step diff --git a/.github/agents/skills/linex/SKILL.md b/.github/agents/skills/linex/SKILL.md deleted file mode 100644 index dca5b7d6d..000000000 --- a/.github/agents/skills/linex/SKILL.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: linex-profiling -description: Profile GPU kernels at source-line granularity with cycle-level timing and stall analysis. Use when identifying performance hotspots at the source code level or analyzing instruction-level metrics mapped to source lines. ---- - -# Linex: Source-Level GPU Performance Profiling - -Map GPU performance metrics to your source code lines. Get cycle-level timing, stall analysis, and instruction-level metrics for each line of source code. - -## When to Use - -- User asks to profile a GPU application at source-line granularity -- Need to identify which specific lines of code are performance bottlenecks -- Analyzing stall patterns and execution bottlenecks at the source level -- Understanding cycle-level timing for each line of code -- Instruction-level analysis mapped to source lines - -## Instructions - -1. **Ensure the target runs on AMD ROCm 7.0+** with `rocprofv3` available. -2. **Kernels must be compiled with `-g`** (debug symbols) for source mapping. -3. **Choose execution path:** - - If a Linex MCP server is available, use its MCP tools: - - `profile_application` to run and profile a target application with the options below. - - `analyze_instruction_hotspots` to perform instruction-level hotspot analysis on collected profiles. - - Otherwise use the Python API from the environment where Linex is installed. - -### Python API - -```python -from linex import Linex - -profiler = Linex( - target_cu=0, # Target compute unit - shader_engine_mask="0xFFFFFFFF", # All shader engines - activity=10, # Activity counter polling -) - -profiler.profile("./my_app", kernel_filter="my_kernel") - -# Show hotspots (sorted by total_cycles) -for line in profiler.source_lines[:5]: - print(f"{line.file}:{line.line_number}") - print(f" {line.total_cycles:,} cycles ({line.stall_percent:.1f}% stalled)") - print(f" Executed {line.execution_count} times") - -# Find memory-bound lines -memory_bound = [ - l for l in profiler.source_lines - if l.stall_percent > 50 -] - -# Instruction-level analysis -for line in profiler.source_lines[:1]: - for inst in line.instructions: - print(f"{inst.isa}: {inst.latency_cycles} cycles") -``` - -### SourceLine Properties - -- `file` - Source file path -- `line_number` - Line number -- `total_cycles` - Sum of all instruction cycles -- `stall_cycles` - Cycles spent waiting -- `idle_cycles` - Cycles slot was idle -- `execution_count` - Total executions -- `instructions` - List of ISA instructions -- `stall_percent` - Convenience: stall_cycles / total_cycles * 100 - -### InstructionData Properties - -- `isa` - ISA instruction text -- `latency_cycles` - Total cycles for this instruction -- `stall_cycles` - Cycles spent waiting -- `idle_cycles` - Cycles slot was idle -- `execution_count` - How many times it ran -- `instruction_address` - Virtual address in GPU memory -- `file` - Parsed from source_location -- `line` - Parsed from source_location -- `stall_percent` - Convenience: stall_cycles / latency_cycles * 100 - -## Workflow - -1. Ensure the target binary is built with `-g` (debug symbols) for source mapping. -2. Create a `Linex()` profiler; optionally set `target_cu`, `shader_engine_mask`, or `activity`. -3. Call `profiler.profile(command, kernel_filter=...)` to run profiling. -4. Access `profiler.source_lines` (sorted by total_cycles) to find hotspots. -5. Use `line.stall_percent` to identify memory-bound or dependency-bound lines. -6. Drill down into `line.instructions` for instruction-level analysis. -7. Use relative paths for the target binary so the skill is portable. - -## Notes - -- Requires ROCm 7.0+ with `rocprofv3` support. -- Source mapping requires kernels compiled with `-g` (debug symbols). -- `source_lines` are automatically sorted by `total_cycles` (descending). -- Use `kernel_filter` to profile specific kernels by name (regex pattern). -- For Triton or other frameworks, ensure debug symbols are available in the compiled output. diff --git a/.github/agents/skills/metrix/SKILL.md b/.github/agents/skills/metrix/SKILL.md deleted file mode 100644 index 969ef6eef..000000000 --- a/.github/agents/skills/metrix/SKILL.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -name: metrix-profiling -description: Profile GPU kernels when performance analysis or optimization is required. Use for AMD ROCm GPU metrics, bandwidth, cache hit rates, coalescing, or kernel timing. ---- - -# Metrix: GPU Profiling - -Profile AMD GPU kernels and get human-readable metrics (bandwidth, cache, coalescing, FLOPS). Architecture is auto-detected. - -## When to Use - -- User asks to profile a GPU application or kernel -- Performance analysis, optimization, or bottleneck investigation -- Need HBM/L2/L1 bandwidth, hit rates, or compute metrics -- Need timing-only runs (fast, no hardware counters) - -## Instructions - -1. **Ensure the target runs on AMD ROCm** (e.g. `hipcc`-built binary or Python script that launches HIP/ROCm kernels). -2. **Choose execution path:** - - If a Metrix MCP server is available, use its profile tool with the same options below. - - Otherwise run the CLI or Python API from the environment where Metrix is installed. - -### CLI - -From the project or install prefix: - -```bash -# Profile with all metrics (auto-detected arch) -metrix ./my_app - -# Time only (fast, no counters) -metrix --time-only -n 10 ./my_app - -# Filter kernels by name -metrix --kernel matmul ./my_app - -# Specific metrics -metrix --metrics memory.l2_hit_rate,memory.coalescing_efficiency,compute.total_flops ./my_app - -# Save to JSON/CSV -metrix -o results.json ./my_app -``` - -Options: `--profile`/`-p` (run `metrix list profiles` for names: `quick`, `memory`, `memory_bandwidth`, `memory_cache`, `compute`), `--metrics`/`-m`, `--time-only`, `--kernel`/`-k` (regular expression), `--num-replays`/`-n`, `--output`/`-o`, `--top`, `--aggregate`, `--timeout`, `--no-counters`, `--log`/`-l`, `--quiet`/`-q`. Discovery: `metrix list `, `metrix info `. Note: `metrix list counters` and `metrix info counter ` are not implemented yet (CLI reports “not yet implemented”). - -### Python API - -```python -from metrix import Metrix - -profiler = Metrix() -results = profiler.profile("./my_app", num_replays=5) - -for kernel in results.kernels: - print(kernel.name, kernel.duration_us.avg) - for metric, stats in kernel.metrics.items(): - print(f" {metric}: {stats.avg}") -``` - -Use `metrics=[...]` for a subset; omit for all metrics. Use `cwd` when the binary expects a specific working directory. - -## Workflow - -1. Identify the executable or script to profile (e.g. `./app` or `python run_kernels.py`). -2. If only timing is needed, use `--time-only` for speed. -3. If full metrics are needed, run `metrix ./app` (or MCP equivalent); optionally restrict with `--kernel` or `--metrics`. -4. Interpret results: low L2 hit rate, low coalescing, or low HBM utilization suggest optimization targets. -5. For automation or tooling, use `-o results.json` and parse the JSON output. - -## Key Metrics (reference) - -- **Memory:** `memory.hbm_bandwidth_utilization`, `memory.l2_hit_rate`, `memory.l1_hit_rate`, `memory.coalescing_efficiency`, `memory.global_load_efficiency`, `memory.lds_bank_conflicts`, `memory.atomic_latency` -- **Compute:** `compute.total_flops`, `compute.hbm_gflops`, `compute.hbm_arithmetic_intensity`, `compute.l2_arithmetic_intensity`, `compute.l1_arithmetic_intensity` - -Use relative paths for the target binary and output files so the skill is portable across environments. diff --git a/.github/agents/skills/nexus/SKILL.md b/.github/agents/skills/nexus/SKILL.md deleted file mode 100644 index ad714bc4d..000000000 --- a/.github/agents/skills/nexus/SKILL.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: nexus-trace -description: Extract GPU kernel assembly and HIP source from HSA packet traces. Use when analyzing what code ran on the GPU, debugging kernel dispatch, or inspecting assembly and source mapping. ---- - -# Nexus: HSA Packet Source Code Extractor - -Intercepts HSA packets from a running process and extracts, per kernel, assembly and HIP source into a structured trace (e.g. JSON). Use for kernel-level inspection and assembly/source correlation. - -## When to Use - -- User needs to see which kernels ran and their assembly or HIP source -- Debugging or analyzing GPU dispatch and code generation -- Inspecting assembly-to-source mapping for a HIP (or ROCm) application - -## Instructions - -1. **Ensure the target runs on AMD ROCm** and uses HSA (e.g. HIP application or ROCm runtime). -2. **Choose execution path:** - - If a Nexus MCP server is available, use its tools: `list_kernels` to enumerate kernels in a trace, and `extract_kernel_code` to get assembly and HIP/source mapping (signature, files, lines). See `nexus/nexus/mcp/server.py` for tool parameters and schemas. - - Otherwise use the Python API from the environment where Nexus is installed. - -### Python API (recommended when no MCP) - -```python -from nexus import Nexus - -nexus = Nexus(log_level=1) -trace = nexus.run(["python", "my_gpu_script.py"]) - -# Or run a binary: -# trace = nexus.run(["./my_hip_app"]) - -for kernel in trace: - print(kernel.name, len(kernel.assembly), "instructions") - for i, asm_line in enumerate(kernel.assembly, 1): - print(f" {i}. {asm_line}") - for line_no, hip_line in zip(kernel.lines or range(1, len(kernel.hip)+1), kernel.hip): - print(f" {line_no}: {hip_line}") - -# Access by kernel name -k = trace["vector_add(float const*, float const*, float*, int)"] -print(k.assembly, k.hip, k.signature, k.files, k.lines) - -# Save/load trace -trace.save("trace.json") -loaded = Nexus.load("trace.json") -``` - -Set `log_level` (0–4) to control verbosity. Use relative paths for the run command and output file so the skill is portable. - -### Environment-based usage (no Python API) - -When the process cannot be launched via `nexus.run()`: - -1. Set `HSA_TOOLS_LIB` to the Nexus shared library path (e.g. `build/lib/libnexus.so` or the installed path). -2. Set `NEXUS_OUTPUT_FILE` to the output JSON path. -3. Set `NEXUS_LOG_LEVEL` (0–4) if needed. -4. Run the application as usual; it will be traced and the output file will contain the kernel data. - -Optional: `NEXUS_EXTRA_SEARCH_PREFIX` (colon-separated) for HIP source search; `TRITON_DISABLE_LINE_INFO=0` for Triton kernel line info. - -## Workflow - -1. Identify the command that runs the GPU workload (e.g. `python script.py` or `./app`). -2. If using the Python API: create `Nexus(log_level=...)`, call `nexus.run([...])`, then iterate `trace` and optionally `trace.save(...)`. -3. If using the env method: set `HSA_TOOLS_LIB` and `NEXUS_OUTPUT_FILE`, then run the app; open the JSON and parse the `kernels` structure. -4. Use kernel `signature`, `assembly`, `hip`, `files`, and `lines` to analyze what ran and map assembly back to source. -5. Use relative paths for commands and output files. - -## Notes - -- Nexus is intended for research/analysis; ensure the target environment has the Nexus library and compatible ROCm/HSA stack. -- For Triton kernels, enable line info via `TRITON_DISABLE_LINE_INFO=0` when using the Python API. diff --git a/.gitignore b/.gitignore index 8bab3e791..0bc6bbc55 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,5 @@ hsakmt_counters.csv core .intellikit/ .github/agents/docs/benchmark-results/ +.github/agents/ +docs/benchmark-results/*.png diff --git a/docs/benchmark-results/mi300x_autoconfig_bar.png b/docs/benchmark-results/mi300x_autoconfig_bar.png deleted file mode 100644 index 79acf7e65bf5c286b9144f17d4f4503ea7347227..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 85774 zcmeFZc{G>p`vv--B$+Brk|7N!V@b$NBxNi^B9+W!N@huDK)o4DBos1~F*2p1%*l|s z5|Tv54C(B<_xt;v-&$v_^Vd0loVAYC`Td3AK^-x7J#XjeYG-|#=YoZ^i?yS} z9$_(2;r+XLtXy5syGVEWl;Ouz?;^*2sw?SAU6`ZRju3GFk??^KemZWnuJsC{Pa zX(7Xm>4Vb-Yk+y3_#1Xce3FZ_RMJ?7s`eQRAM9?P^@(qZ@4hfQ9ti@%)mY+4yr zud!cBOyoT)zNs_c)}X0gP@C}zt*NQ0=)Qebqg{ooiltPhZh6g3Y(E&rd9^v^@XMAK zcDxM!WSf=KQ^R9TO-;1+mgbc{EGjB0>I=MBmcF)JyAJU;XT_PeNbl`y`aVAU->2MVBbT+$?oEN_lAo_S+L`~eR-~LqNJyw_;@!J<8)Yu+u89_5b#ZaozkmOI^Or$}1$LVP**I$L zdP|Htf83LC|CZtRXX&?YZ~31knXVZt=TDW(Gfot;MD8*;7%hCVr8#oJXvggry}eul z0s_(_UNhe}-3Sd$5ict%v++<;R(9*X-<5B>p(=Fq3VC_?2;q|)`}XY<6%$i@_dxaM z>kVc+`}c3$v}u!Z*ZJvh$Livw)(RQtDLg&>`qb&uw6?ak`dKEM_|(E);X+&;E@x+F z*9>XBe|+)hXO3Dy`RDQR@gd>i?M3lFdY&ueDq36DiHV6xPY&kqv23{S)L-@Cxx2{e zGiN@1{77xsu;D{j*HDLa7`J>_*2(7&@FJoT5?69__uP@N4VoTlA4)IdRSr^L=zITO z&$g^OoV!0FxC?z;U})Mwr`|G=DEPfeP;tf|!W{IsoRg4~w(kF=s`4Y{w~ z-l=u^%bQ!N$1^#uu?x_iIdca0lw{MIzVX0CQ5J5wjOk)y?Jv$!clKHb^!Y9@vvM8q zzj}40iHXU_&!1`OE(esmJs6!oclPYEojZ5#wfn&4GSr}Y`t)f<$w)b+l`B`qh+8mf z9yuar-JBfY(BL)u^&Q_bdin>(`8H1q3WAGxM>Uz~JD=1C!(9Y*b)* zdAV_CoRo`x&E1$71{oQdNQ3~iYR3=C8s=H=zJ z8OClGbTd40V%45Kdm=3)Y+BeF<7M@BJwI2^5$R0@W@cvASk}CHrLxV$!y4abZX;2q1MzY`g2n}QX}ZUE++KO& z#0hh2>$IHirY7yFA3Y~Vmg*0G`t(U~WS^)gn~si-bZtrLmiG^j=+)jfYtM|U8U0z4 znX#}(q%&ayd7V3wj#(GtMn^}FH3-y?_cBIX@8V2 ze5{~}9J_Vt(j{c5V{@h+9v)YBYQgEPW|Z7 zqfhekUVVBQ>^9STqSRC3i*r!}mr_TX;nS_>>S8Y4yvg>es%oo*#JX(LQYB|+@dEp< zAmnL2F|pOMo>K}JFFrG#!x|z$=@Dq>U0hx!DhJ<;i3v$g-e&jV2~AsD+cWPuafDoo z({IP#62+0W%+;GWUuRq=_;GP=5=lqx^y#g>e}20o@Ag+*qT4886HzsRI1IRcebs}c zBr&@WqM;rKZ*07TTd8>acIEXAA`Pv+(k_EcVOwPA4GawO9Q#ZyO$UB5u&}hnIu6uC z3eLGC={-97Yb3jzZTxXtD1}0Yo{t|t-hAXHVwAHC zPc`aW__b@5j}5ZfP*(63mCem-x(XfJp9h=j>aL+@;bh?B<7-JjcG%Li$b^M=8OcG% z9v}bov%gwhQ&UAriMFz`GTEo}^lQdY*=qkf(?GfvEX%0H#mPobhw0!k6quK5n}u83JR@x<+SqD8H__Aw9Uz7pKz=l z{W%|jjFfad(|G^IaUSv(u3xp|WWCl~TU%d5h1b>7bNaL7v)k|qh1~xN8O7=M{B(dr zfr>|brfv$Ox3~A+!6LVj^$tD7SESv(l{7v~ySQMy0c#W!vyl`u6msk}BDoW8-US66XKYn~D^(u5Ny>b2eON5yD*|UL9o`^DS5?491 z<({-#!0p>>m6Vjuf2zDPTe2MKbu9HhUj+W1GS``THI96WxS;j_^(mM;Z&1V_D~{rM zsFe^u!WGz2liyf#<3PSQc^`lRm0Gu2`1kg_W) zDijWQ|I$9wivp!l$~-?ce4D&;?0%;Q`b(p|N{FR32;(;6nbyY!m94GED7GVa_P&cz z>$t3~*SOUuADe+%^ea{jS(hR)R=$3%`fWfU zy0&#<5v7*aUsPOt&FolTV?hHhX(e^KIr&htBzCjA54&Pc}Vfh&y4Q6*X26RQ#Uel{OFg#i=~fQ z(^i;kXk0^;zC($7-3ks0`c{_^A20my$lVj>B05qXSd;XvZPSvz3({6c2cJ|xfeKJ!yF z6aXm`KqDnSB%v?o)LrW3?)t6e&}@~>>J1x$kKElyLwQbpy=CvaYu7H(|ReH%kU7CH~sxX`k}&p~eu0pPwID@$soC6<622Gv7Os^1E{^YLV0F zd2%gN3rw|*q@<-!x27Kh{H(uQHIjzV=04U zDXR#1(}1sT!}d~(+V#KY=PO^lpaJY7D0lsCL+XlBVs0ozr4Mzlc=;_H`*$Xkdv{!RD;4`o9nQh-fXAy3GMd{L)FK0tNA{HD_jf~5cQ3$w`1H!b| ztX};tc)P6@(-kFNjcDOaH-~J0e~Lz7+4ALSQ{uHq%1DgS-{SXJhG`g|KK%fF3mf&S zQT|Uxei$GSBSn(4#T*texmeP^GdX{8gPAuk6;kXrVpBi)t#y}9hM^dZLY{rsaR%O3 z24+6nDF#W&Lial|4tVa(-O9y-jJ2J$gXlkL{rQijM`-i23(g^5UCGbSCvOJ0rJH?s z@vGu~hwl9}XB#ffAP_mDx%ZsAkvDqhiz=%UZ{UX#kKDX_T?O_)eXA?0st!_Ym+6^3 z9~Eoelir?@nVB*VAe=JiU}tCd3ORMJuU~|Vrht4u5VfF-7QiwiZy*CB;~IrQ?@;up zx<-X2!qGE2udl5;7Vq0AT}ax9{DShM_oQjx#L?;8jfh~^d3suv;O$=Jj%l8j;E7yT zg*bUXQ`<6b-(QLpo7=aRx6{RP>@{;Yu&H(m={2ynK0LBHb3jl(m1oP);==`IR#vGs-DTLzf|EL(U0oxs{rZ~+4&1k>jXL(c_sgp!wQv<+#gq%1 zHgA4hxBMK6!Fpkn)TbHF^1o-g96#qa%H6K9ymvx#$L*`Q%f}I$rJR$`J2~B&34Z$Y zX-!$({Bz~@{BZ!kW|NmxU|L$5D>GVV9jWMnLF4q@k4KCpPgPLE7j3pi*S>ypi!Y^k z{7-3aZo^xnGH(w&jl|=}J>+I*XQMq2Z`!yqwSH62BQ5UCQDE@=8qU`{cI>Ep_fE&6 zVW==$wPpZ;yVjy6Vl@>Q8yoxN$rJ6gA*5QHk=pq!UBzyXTaItuygAa|;lqmpm(;dLvyp4M&SmRtg+CMxmddUPGyz@ zChw6G1Z&YFr{*L8`L)U&9-DuDl-tN}g+=@2Cc*;90tGf(dX4p#&b1hC-TI)Un&QPO zq`mP>Iu`P$$JI~fN%YN|JU|zL=wHy5)lE|7#>RKh->=zakj0KSp!Gk0{ydwAX^Hk* zcW@`xrNv9UW_eb(B1CNO3y+K^NIG;k883A?mPa}W4(B-b$)%OCY>`$Ub06!?Y`F&Z;H+GRlf*?io%k4j|oaPMh0a0qMfTHt#`QyCW#e&a@jLy1AU{vospN*Xd3 zzQ^#Jm6f=L`T&`jcEz?-CASG+-8+DY8(?4wWFl8botFq z89fE|cc(-f>+edcBFP_XnDp2}nu>L#f1;I@U;33)B^_~z1%%yGt9M(6XSES53@cZz z1}xs-Kgjzia>xa=fjMa|e@`0>$8oM*o3w_19_?RiTdCKq!|<#EQsFD~R4H!B4ua?BsGkk* z#_nZQt8TjgeqYNJprs%cl78%QdWrz|nErrxjW5~7ruYs9jNu>Ba77ReDX!`+~m;VxtvvUKoZ(1qck)$(vu@epi61}H;SDO zZ18sMQ%Mjz6IAuF8y%A{6_Rh;&d58lL*e55lyzuz19+EZplyB=t(WRxOHF|$gzD}d)d1i|2+jsoVq_zj2^z)*;oFt~>F|n~%ksi}i$}j#@ zO4+IHKpUuZcBaJ1)sk9#kGF} z_fh%gjXDUb)VlY-vS=z-uyU!FN`ZUM+%pEeoxG1r%6Xf?$O0Rh;Z;brXT@WUMdIn1 zIT)zZZ5c*31IS6+?Ee4;OqAa9Sk1%!+3!U@FsAz4=l=c_DgDUFsKIRtF$V2IK7|63 z)gz<7Zza%F5;~P;{gD?nsj2+KGIHL(&Sy?rx%?V8Puo&stKo1~w$Jt(a}+wGwY7)OP#MU>y7A}g?^u7Q zdA!KnR_^Cl+Lf97-99bNuB|s11n)cJ;D=-Lc;LxrrqPqyGI@@isfiFUF4xmA$TGPl z*N6JrJQ2*w&B&Ac$UU4(cG=Aiy*;}IoEqmh^GQn^cgCXu|B_l-Gv?u7+F90RkZl@C z8>Mir(a@sN0%7|u&sr~3wz==&fwly{C6BNIhyLzBHg$Ta z^4yCFjayq)3FV6lYi$%Mcx)U6A`t|%&0Ig8*DTPr@ksrhsQkL;QSD#AI7Ri|y?eL6 z-OR5w){I*#{s7I*B0uNKzCLd3&ks2k*IRyoi0S|JvtMv>VPH853PPy@%?~Pm^TdnG zE6#N0Sp%6j>iD?-=)N`RIQ8@AtwCAOyV=kd-K+LQJ02M2p5r$y zxfnL$ozR}y&6Pc?%D_8%8GE7v7+Xk-WHXo1Am+`^B`iMjUL76JocO5w=t!)3ONxw( zZ{1pr+)K~n68{Wn$U5T@8roFt#DLC;t*Y15C)v@ZNVJc(fQI@52JPJwbLD(jaMj)m zqlF$hSzxAB)YJ}VXOtg;{_%5aSTA&PAk(C1jdKyF(1`*Sv}O`*_V)InXjy0|7S8?m z#k8)j*`C}u)74pEze>U9Pf%bW&w&G+l&E9xfyh3Fd5*nSY;pJQwML;n9iDXhCY=Q= z+P1&l2Bc^5?AjfgP5XD|ifcK@N!EDm(~8@lC=q_?5=~V&ceeXIWM#FNLCjF4_*}m> z(IEMNK(`O;qW?O(kb|f^@9G-JMho7(*?C|AQc)758_FNLkNPl5e?-{xgoK2=?#@o* zVpGd|blTiCf7UurHec|an3<`1m!$SC!)Ol`>%v^-F?rB40fZpB`?Fp9AaPLKvyN$Pt27}EBV(2CC2^&~mgef2D3K%LyP%{$r zqH^1m7`!LzC7b&Qd-wdps7(9g60uwF?`DIrR70A&84mX1l3NaQ$vVT!g+A^~opq@&+7!3smLj@4sX}Ebie)$s` z=9oQ}EIW?IF+f*JtLv&ZN9KR#JithHnDEKsFK;hE>begi<=MrbTY+kxUKlkd7iqZ( zX=<20O-^z`P6bLSa``d}H=!m}iXIxx^-C~O=NEtdG|B8oAr*YFd|(%vej!IlV-oah zbLnIAQbrcW6(Rf(9tN1EzqizjT5`|SG4*op_c1B#t^Vn002Y-nvebR+_YaLH1srcw zS5;|9PW2T6c2Z3}N1;n?%$(T zd2_%FEcY&huHRYJ`RE}zqIYrsLnB^z#2_B2+gj6a{38uwI2bUv5{fM9(apDK@#oh> ztq2W;{NLPJAZ}4(#pG*4YSUiFUQSRERZWS?Pdz5jgY(95Ol@FEyoF5uMG8g{Joa^u-#o}Zr_Y@Mbwq^r~Pk2V##SmXNvx+ zAH2#luh}fu$y88m9vr7U?_R!4hcpV@c*xMm$Q)qGu;k)X4K8T4!#=vY|Fjk7JhTv+ z%o^yMSr{wl+qMn*A${}OA+$$h-Y<`NB#B5$Udowi(&1+}Bu(SQA_z@?tj9-S+-it{ zgwV&n_%@yVYjUXZUGkx|-Po+c!ot7s11_!C87S}?p&+j5-gtY;+iFD@z8!G>wXndq zefx5dqz943#-W-5sEHbk(2F}BuNiQrnl^nDJf2~g)&8@f$Ym(Kr4S;-aN30nKfhiT`r;MBQXss*)rU?LWZKu>QW^g`mlklJPJrp| zEMH2)N7|lqX3q{_7d3mG9E!~t`jBB!HaB?J?((a7-Q32AFgA)C#4+`nx6qzkH3B7{b68~E5xxJ0YYQ-=Sale z-wQ}vRFg;9+lJ>{2KEDy7SnH&(w3l4Ch4YV{2GWhJCC$c;xS2&WkDMm1kKg8UjK)| zktt>t+KL#lGdKVFCxGNOXy{Gz;}K<3zkSjKsF?f}=4rYqA3uG%gk-BxeIhnKfA)Se z_)+bAn`0#xf6~e?j?;Z>%Up$k`t|GADUfa+T(8;((a3JKBn!CpQNop6`n(sDm5p5SNgMboeNIqCk^TwDj;;SR53S zo1RIF)9q%0)<%w1XkZL&w-~+8iF#}80nmilwytx5HA6E-lty8YInKVTAklTWnJ8JP z3A}+##(ozfBv0kbmS)w023wA*7$@zY+qP}nDe!dC zlX8AboZyR*9wa{k0inWKaS96Wj8Lvw$?HmQV_ARa3byNu4<9x{XRAWyqM@+wB0X1@ zuCkXEp6JBJ(3h)42CgrFwB&LguzzZ_YXih;ew_!)2%)-Z&$*9~VRgf|01QVaehOmc z4g;U7d1voRG_&gJ>e?OxUW<&p$ri;9AF(wEC3|%R)Vqx_H)%(y`wn@=HNz*o<>k`*! z*3IiSM{yw$&w56*|AWT%45(GgpG(%WL@zsL)~Ux6jsH_fmPZN{KW8MA)<%nLP}w8D zacty3_7T7>?WBXuR0TQ>{pJYP$2Yj;O-E+;7507l&OYV)Rs<~8*RDbl!~r3t1A~IL z3J4sU?>*o(qsifWe8Ygmaqd#jY1*4}U0+6n2c4Ds`l4%iRUkiv0%dCAIyA!It@Z5K#4(6B<_a&~rncy*TxCwIKAj5%UMm%5z^zof)K4fLk@Rw4lwLwDqvK z1ISn-s)1Z}yf*2vciZMfBLQ|s@)*&Tr$WK@{q<>up`jtc^2nWF2Rt5e9j_@ecb-(q z2Vu82JAPtE^#bD$+E`h+h=s+`{^(X@%1=W>!C(Od_NUtW4do+zw(|2cP|z>V>Svp7 zfvcv%pN5V)+i)^@mw!emV6JPwR=ajeQQC>yNLp$2HBvHuOLLZ`4mlE5P1K7QFVKEV zZ{_22n)z--q6ZbGS=$###Xum5>Dke^{i`98+lUN|1F7C>&VhDkZBm{y^bsMS{P9$K z*2(GK8F19YX}{y@S|Qz~wbxH{m*myoO*>>~w;K(1qWcn1YUJGKYwLvsm%n!(?M!i( zbP^-_ZJ*(b(4OQ_cP!^NI){6m#Bp&7EbU74L?B!p~Zi7>W# z+XWYw+YPNi!dh9}K{{Cm1~PiOy2RGmfkDB z?o09Hdjtf;s}l)+>c*0tei#}Vj7v-klLu@)6osSF zRB(NG=9I+F`5o>sT!WH{Fe+m%z@TL4hI!^HF#s%gt=}|OO zBf(RzhR5QRl~o8TkBwblgn%AJq1V%XZaoKg+ylFg98wV#6cn;N4T$OqZCK+iwvo+8 zpucJmHeztk92oJq)$^22JUCl_@V)C|4$H1@pFan{q*HqNa>r=x`1twZ;d!w4wsF_T zg1LPwu%%DI+=AWncV3YgHCRRhjDxAGkpjz06|CNCrgW%wwPJtQJsnH2>BZM9k!>2Ugvg2(k)h*^-=iBfm6aVF>D| z?$`=yqv$FBsh&-+-W`oUFt;SREgFQH&`C2e36Yl5gY9m+M8cv|Qc_xMEzoCiwC`~l z)XcO8>v^l!cGcFJv*Kwxe-Pek=A7rmmz7{!X{qKX8B(p_m@ayG(Y;|>&n0s~)6y6H z%KiI1c$#X)54*JKvp6o=%Nnc-If~5TGxaJ&Z85ayYIy-6KFnzIQ zEH*82R<%!UFA=xz+*^l5rbKO8_!s9#vWwEy<`yP^K{@>BEF&c-bX|mBjqGznJ3xfg z)m&V{zfGI>mRMb=mbVqDD;zSc`{Ay3G`Rj4{Vb%A)M?2J=g*6xh|?@Oe(*dJQLc=T zNp@Kv?k@fS{Gb0inmpLcUcTg08-VX+88l2#laEuppl47Zrp$yg_0x1eN1OSsMjy-W zTx4R*wPlOY(yxA2qNq^}fL)~5$)6y$k*=;26dNvXEoiv)N({j8uR*Q^hK7bxJY~N= zuL`yI`#qn3-py?tr9!x1FhqfDN`znsWv?6qcOrEf9tDo}laG%J(J0`ZRq;(5lj^H$ zcM|1?;_+P=l??T8EpqA)%JWN*^?E3g;4H6ocb|~j^uofzD(V5^%*NU%+#V94Mkho> zRb;DINO7vJu6cRTC$IlmTF@f&vS8PJMLOUR{Tg|&pb+=53?Ph1*sF~${jBCzg(Kw^ z3fF5S)#L%}uhp;!{}c8YJ7vGq=Ro5*^uxr9MWav$XXA(`SX`_JJb=U%%$ffa^akTVNnzLxVu;54TdHhB5M9Lt`&o7q8v> ze0B<(mhgj!y}}k-JqEk+1N2a|{-7sne?DFP*)L(2{m^Z|q<65E&?q46uEHELaB+(U z{8#HK6-ctqC)E_C+<(NmE{1bT{bTP7wYs~f2cE!lbsHn|0C`tRN=gp9$iwbMsO?A> zJ@n`My=F2tLe89;YaEtR?&ytUU}bIh6n-0)<#NLJWMSUh+I|VZonb?O9d&pPR`Ld- z{bI*8Zvd+d^O@4fY&mKPTv7;PRZq|2s%bS<)s--w(kNh^uM+c;HMwsP+w4@}<;%)p zjgCg8zdK;Wj@ggPee>qc%?X6zb)cQA=vfgJ;4E~FK{o#Sp4*u00z$)96_O5{QV6Pc z2%Iw-3qGiHHY1Hc4hX#Z({SjH?{$88wL4jtH9F*z2-H>uqD1Df1bgk^f% zkjwmBW3j`LzDrY6uEc9RY)H$7chak!1rvUF7)sFbxg(Jv&V|j&c9;EG^3r*rveF6e z^~jv!nR~kD&p&A!`-~8|0-=Tq3=a?A@4NVH>aK`{#4%W0NizReG0L<}08ORdU-s6s z;oZ!ZMo+Fc!xwsrT-1uo;K@;(Um{xo&B`@!jtpvQYMIj`ld-i9GRSb-Q|jYoPgu%! z?w65SMV$t{Cp`&Ux`FWqN(ZfU{(&0XrxW>3{Ri_8fY>8Dkfe^!XnmT86cV*V$YPfd zLwm?_UV6hi)R+)07flRcSZlVcSFd7|f;;662nb02Lx=71X!^hcSWN4?E|Xq_0KYH# zx3Lps+^P^WeJ}tR6eJ=$AgN$2Ve<*2>Gm6pQ6*&D8b@<;Ua)e><=8R8re)q~`HR@$ z`VOCo!53uL@E9z?*P&kJJEQ%)?7D~4U+LL*Xk(XAkc|XLz1fq!g9xG2D>J{}DH4qf zENp^~cLdCUiz{LHD2{#LE#c5@2q8MX_1^S3r2Q1=?(gQV-MFDIkiA%PPS$~ig{7HU zW*~GB++BL6%=Yc7#ff<4C+|rne*XM9+2^rPQ${VMy(^KCku^hlbA|Wr-6LLzh#A~@ z{zS2Zce&@Z2BTb_LyvL){3FfStrP<&Une-{BlCbV9)DK`kd16C15E^q?sQn8QOI|? zK){E{wlj8iaic%mFHGtp)qH+^9ghr}S&@ycqHeFY8!mL=TpYzn?Q->iOnxcf18@DoYNSLWO<+T0mEZiHRxsR6G)q zVYzQDtZV`JPO|83!~}(i0s{~|Dm%pJG-@m_ldZv1+cSYv` z^x%7~n^gvKEn#bbCeV_#D#2$awljYmByY3FCV$Kgz{rr3wS3ijD-(A`dWI`(pYi0H zl?7`06T647%TJ^6(Cq}k%Fp6^?pE%Txm!0mq^3*)Ho@|x`Nvh#?2K0c>#wo1(eTvO z*I!9WN}9TPbz*u(Spt$0yHW@q@k*4ANS`ktX=GC=h0($YxuF{YjBJd&uW;kXX6`~T zc4Zba%1I{nxVHJptqS~s#7H06Y}pW(mMeS&gbd6EQ((KhO15i6+-#d3E$mBAafIq; z;dj>OqNc!zoI(6Z?-J*S4K?zjy5J}dxNfek(bzz9ytmS$ofGpt(m5_LhRz|JW7(W93zlEDdJ2=5X+A}h#B zi2wOLo3OXLgIKIGjV11NNx>!+vCBZXT{#q7=|<`=Vv4NQ!o^mCvLNgjvgr%Zl#tT` zFn$1&j6agm8Bpv5Xs38Q$*Gb4qYvv?!DKaD?2!}ThpWIz6C$5@5nJF@^c5`BNqs~Jkr2WUVnaK@ zdTSTl^&8%cC#PP}f~a1F!2-fb5Gs3LgcaN~=Rv)oCs>av7pRoz&qj-Xyn;oXd>u0( zqC1}}C1KeD2WeX zTO&UNPq;<||0O*4ex#e`k}~c)#61rl1vPvdqLVk!KQ?w#vFmUEhIQ0kC|W7`M$74$ zd~sk@$cnHIRE4bOJ3mmfj@Wk27-9AaDA=Uba~*1pGTzY~Z3#9?1!j*xOH>#3`CdnO zq~%FwLwi_(r&viA6M{r*D z$>;m_*>?uQN2k&L2oO@4!G4!pFMjL2j2@aX6@4Bk?K&kFonBQ}^Ff%y-7N3fR>KE? z{~9efG5Us1vj5`%Zvq1g3mF&)Pt$v}-qj@}qy=^k26TAASZ@@XQ+9SHO-Hu=v)7?} zBZAE^-zKutyC?aCipnyudCRe#4tP#6pp@eEw{G29`g8^HJiSw^?Cj(~?}vavb1VFH zP@4;c)vfqNMg*R8t(spNtBt*sb`&{vcU4pBeSHgW+*m`$9?tPO}28cP1p zqDz?CGyd=A8snLHmqBfM^O0xuY9H5sXD8=F@US_MG%HYnSFc-ltf`(?)*cfXfD2W4 z$`Cr}DZsD_)Qlz&fI!7`pvs9Uh4EPmcLL1otHBY($oWXa9mt45J$>~nfq|#MR6INX zDPS*W4jV1hK^1`SaZDU2!Otry`o#Z)MsgOE*RL&UYk?D=cuWpr{7H>ZP>^xg@eKdx z&tqDFu_hYW?ntfaR+wiV(F&j~>*}VDzt> z`!m>Z?>Z(R)}7sRHSZ1tI&jN3)3uUX;*@AlP3vzzSCZM8*@SfayFU zDc{&AMvIX`!LR4wrCGiD(Enw?b^qVkAm97@p~GAGmkj-%&sYE7d_hN8RYgT%$89)f zG5x4CmN(n8-cu4&2A4H=yo4IIs#qZ6FwFUv@F{`;pOS5$!oBS8m?T=nzLmUqQs*#8 zgT26ooD&=wxenqGGdDLkrXA=poT3Il0A}0NRWW_?P&1Ym^$c9*A)!(b!n&t08ubZ5 z+0kYM4Ixr|`7qjQ%oRm+=D;g&S8LZ*z>HSH9}|LDUG1E+hnP_AF#Y%5)`#q2aN_3X z<}_%mS~85*l5tF!mQ3xB9y#(F>61j~v*KdRuCY5fIHa1D%k9vN@dpQ$FAmCVi>9Wg zsM|Mvv?;^+T-aq>A82i&3;cVB5!oQ9n4xAuZ&(foo5I<%+aTH!N(3ry+yRd@Fi2qL zWy=$Y5&;gvx2{RK41R=`1U`bo&_ulb(#Mad?hS*}K&>=1l^DCnz$*${XDQt|B?kDA z?gK!OX%{#H$PiIT2m{eUQjPNkXMXfN&R;~^@^N%D6!qp)!#x=iH1JM901iaSc>uS~ z#fdLh8}^x$`))zvga2t68g%I9s}3JN4AVhBvM<&K#qHkL7&kKLUS-*Hsr?tWsHoSBoj#z&{ks~;Tam@l z+%EIC3*O-7?jDTDKtJI&6R1beh9W$fajytB;>Qcs5xJs$*B|hW$usD}nb9S`K*ffa zTBs%4j2nsTYjesf&wdJ8&Nw1^qGG|0cN5l;jxgboBVZK9>fqz_n*=}urc@`%^{ z&m(puW`9g8(okex4UmjbX`-TU77boOvV*Zi5fB*pA^Q2@4$!J#}jQ z;|r1@VOymr6uWA86ELEB4PX**@mQv}wl*8CniME#k7PEAqELO|66ZuX`a>#$BhVkI zK0R`SQUy#x0Q{t~g2%mqO!cCviH$a+YG`ta`lnn{;*}2}}r3sEF@QMsJI5`nqNa82c0-EGyv}wfu2N{u| z7#RGu(a6=s%Wg*C7}p98Jz2%f{2Fyq^zmFR<*N)sr4Q{(eJ91ZSk-4BkMx3!k z=+_8s`U|)MrR`yUH{zL$MH6oofE#gqpt`;*us^ZeIFA8iZyi|+_9*$wDYO^L*^Ag) zboYH?z_b*8MkI_+Ppcv@T)N{+_Ff!6g#kiyc$2`GH|+ZZJdc5JRw@QVS;ob1zO*Pi zf|dlFiD-t6YG$mDmB+=*=G&*Cfq^SPxNH0ef4h%=RE(4J;Q$^p%sG3NePf-#z|%#b zJm+Kae>d`NUh)?n;ad%p3!B3XZmwu;BbBp)>%)$(!ydp0#QF|U}|HD z?{DLc8#gjgpONm2C+Gf$3$E9e^j$SPrhM%mI+9DUDvJDjLSbH%PsMi~YBx{$)61 z4v&OfzwY&@`0wh=o%^mxWNsCexrd;yVK$+7>g8ij!0fE%?R@}US_t;`un$auMs`v5aaEgjMa1k6hI27PQ;O^-dIi{V zV)sD)e2rb)QiGn1;Y5L*BAf+iMvaFJQC2^}EJ?m_9?}(RUTTDl)8}PCVdk)#pjHxt z7ibHjULczgDGhU52*s~YhmMUL<7X%R6uN_H!86E$sC|%tk)+a-k-B(LUf|XJigFd! z`A;;N_QeDgz`i-Qo@exP=%&LKF zZ7L8#tHQQiN7GDBkbuz zX2{AbCuy+`=Q|i#mSxBiCI>_^9OE$F%-Lbk2P2}G4+bM+=fx%{KU?G>(DmRDUVAz9 z6UviD?bNVh>VXS*{B1?@NWaS{B6V3Cfr@Rp4MnppO~eGa0G5N|V^booieGr= zrhnC$mpE&Jn17|!?(5>J$Pp3w13-&6dsz{TVPRp$lilThW)=>6eQ=_Ph9LYjxtx*p zLLi%|3hWb?W4_op978UPpRbD~$Gd{Js@1uNu2va2f;X@Ny<6VkNO#d5_?A)&y1@~D z^Ffb(FftDXn~w6F`3@kjVUSNsVRBh7)1`K$jl3In?!RbV`g%!&VkXeb7t3cPoWQ6DC7PJ(oC z=}teor81|%5UKS$LzNr$0S%U0KM9{#{?xBu7Xi9f5Th=TI&d{c?eIsRcI%y>sybFw zMtTbVurpY0g6I&~Q^J2bEEB9v9007Bmk>TUDhp%cI1B=FZR^RPprmB<4BC*}Vc}W7 z0p$Db-UBNWRZGZ#Wf~^~IHP#j_-w(fJvEN`!Zf?vJC2aCJ;XRc1%_qbMH)Rt#MDJK zaoD|2A!E7zpjjviR(3Pejo}j&Op;8e_rv9I5jic(6l{;)V|f49qq*>hcQ#7+BqJYR z5+_FNV-9$T0DD42r~r6@k5cz&c8DJjAmE4-ybh~ zHJo`d4&L}8s7Hi8DlSfE*%~kJ7ly={wyfWItyK*(QV6G%^D<(pMw0+vHiaEzQgU&% zqM{k4Dh?>_A_3%E2KF=>HQgjBuMUueaP@d-k%kRTJ2lwaBn$5Rvr- z`as;0&;$c;5Kn+eO2m+d7*K-cxhYkLFOqlHbRuW^zLD{$c|jPKjq|L?94gL0*@z}7 z7>xl0@O%Bm6O|zB|C(Tmip(nmSN^q~SuoFIK$_qqAqEp*d6+Wej%sN9pi1>Pq700weyfy8);d`|>9_=``b1RGy)WqzF=4uaVndzbi%nwy(J z5!)hPXayH7QutLBJ;>*f1E6>We zYs<@cQI-KJm5{`t*645PDbn#k^4eYx!HwhE@w*w9Yym|j5+-5>U40LcUH?F9r572E zLK41|o0IcT@6b8*>h2m2|1>DTRR?N3#u#BC-30geuI0Yi^e`D-J@DA9NF;AG7*qkA z7$RMsuFRvUnl9(hD**DO4E$?L>McDnHgoO~Sc_l7j}n@a)L5wsaG}onrX8Dw3MV%q zPpwB`gb{;|0wVj8x`;?~WhqP_iR`-nP#EqzItrtkYeA@SO!9mExpVjK;Y;>DV0nP) zs95E^vP?uWB~DlF$Qu6G6;766pSH&Ku88V944X;W@R56Z_b8A?iQMMa8cAqQ&s>I% z7R*JIuAsS(&UCJ0+d7PpH!l#aT}W0=jteB*MaNMZf2h?MRL>l)nR`!FRPc-~rRe=w zb!35t0%>#KKo(2e_F>gtFw;4M?$ty*UcnRWe1vsc1oKm_%rPwV5xgwXM zP9#4K1Jb2?6Zhx|rusN0(R%eGLsR}(ihyBreX0EY1 zq@`62v_WQ>t-n^o!~Esh(7WfP9fw#?b9QFD_~Oq85f?DxhN1mI8 zHu0NylDsJ=!LJZYCIy$&ptXMzMwuyp*i=s;|2BKReEAZ&g4M1wkN8tN+RmVgf{rBO zL-WmWq+>Odiq&h^(qZT4pE?8iq6wEnRp9#sbsxAmuArd>#w~V_C5jLn0jvk$n$Xw` zR*FaqL}}fvrLJCq^7jI3tgJ>&V?nSXL~S?LEn{Gvo-G zx3_oFQCRI8&>XDeE5Rw;hlv5>d;(bc)$Ht8TO1eg5zP^ttf6_|(OttRxDjw3tzQtR zGwZnWC-8|7W}2c1K_DX* zLi8RKVYP6CE-}ht&YhgcuEwf*pnr&5}QG_qPm7bnN7Wf+omzAQ~$wDkxNX%=U(#g#61M zQE!78CqnGO-Yx34=rJgZFELj%V6ujd&0j$8p;hJ?Oh!=R z;X)*t=$QYv8E_=E4&>iySoXgD3(JYc2s8i(hUrMzCE>=rc^^5&gu{zvpb*|3GOvc)_04?+j%RGdon1xzCqXnLXJ>$>h6RmiR;2Xds0KC6XiT`4 zg5QRGK|4P?CbDG(B!4&nI99N5R-k2?PJe{+wa95zM5u#L1Bd%DgYY8L18|3A>jBmR@cd%^cWnSceI31^BBi5Pi96_ix6Gos=}<&{k0T=|o`k+t2K{nSU7Z2^ zWu!fIUCs{fk06ZkEiNvehZi=$f#vTrzVz?eIb`1nook3&N&P}Xe=%e$Z;DYS+~pqs zx=+r36%|IA|K!S$ki1lbY%XL(az>MNyz=}noR_!>Y5)lhOfQ^(Y(-vB@chC59vk|A zwF8(8Zg*Q7l@n~vjRN??O(!|5s$pzLAd5ug%BBjF_70B5>heJ-tcnM*4 zVJRh%4CqJbKHE(&VgiA@zzo;)FC2!nwj_G)oNt*6C{Q8G79~rQ@9og(D#l0)Aj0hs4V+#q5mS#qW@XN=>#8g0D20TQ)*!^|zU-(Ye(q~-m zEmvSMVWB}j5SK5JuyJ(Fobh#*eneQ|MJ#wi*F97aJ*2hd|aqks#_uGw`gJrECw z*z}-K+|#%RkzpN4x+Gr#jE&{V&L0fN5j`-8K*^anI>NVgUK_`iyy)qI0W+TV}) zZBJI1bEQKf^RN483f>M=c&>we2yb@AYNnzH$(qQ%M2IZWB70f0ldVu>-#yp;(R}Ck`=95W z=l?wCJm)#jIX~yjObtGt&-?v;z3%(Euj{(+*RD~X@L--0iQgW{tp^*5D)&d-*7)i; z&BK}fTA}31FFuM)&hUM8H|wxz@D{jq)-ed=t}HhM6$^fWycQRA(TlPe5BQEQBjSF( zDJV}&QN}3L+S;ttp=Cd}Z>82v|KYa$U+_HkFg*jF zoD63mDZ@XM`JFIv?~qr%ti%F?k|Wa6g(HbF8$(7UZA?%ZAKkD@*zZ5Pd|Tc^MfO}Q z?em}^DfzRDO$ICbUb{Az;q2<_`u1}BNdsN>%zE#9H*^1*)P`)Qbu`!&6FgOwa1-F( zp&rV@_6!@D--1@VO0aC0i~I$5V9tj7%(zVN^!f2tpF%i79tc=+JvInEWFvs*y8gv(Lj;AFKHi;GzQ0%AX92eDrEocnKox~2mxwpt# ziIO9p83yqF1((|+%rNVXy>a-TM|XSW~Z&W5|_55yaLfIWduy0WR9$ z50nRU8PEl?Pxes0 z7IA=*JN(ht@Huio$aN0QWuPE%0gX)a?)tue&w;~;S~9Tb2uWNVb3zLdWK>oLb=G%b zYQ#n)Ul)kE+9>8u0M>(E9K1OHpG)`<&Yx||&g!~ed=zR2b)=&0`ZsV9&}{2hds$ms z&u73BazZ=ZJnhDfyhC0{ZnjNX)8mF;9&sb3XBL3KqrqrkAQ;YIcQ>IE8Gi<9jhAW! zQK1bjfQrIh7y*%-PJ<~CgwKQ6D@#OnZa2~X&tf@Y8g!ZK7S@J6WH2yqY-s&=hHVQ@ zjrNG8?Q)?oP?bH1Q`ZQST9GVB2njx34PDW>;K{*At(%11zq4k4p^l)J4>&X+ulMjV zpd(;M^}xR0(bP~G&`?-iEmWGq7mfjmOHmw~dx{EBk10g1QAD$SaRS|ipyPqLK37I_ z!7U^fFa#7%{Zxq2M_!=w7T?KmY+t@lkTm^wb1d-$*b7B7~G8j@aFdr-B)r z(QS%rk2}nvRrP0D8U>r70@>5=&1=gAw{QOmmV%?O51jTELP$qx8eefr&H%ItWbyfM zUWFneQcO29&s442~ zjR;kYklSE=XU6#?y2fyqf%_^RXrSNobhOeEMqfwqD)r}=STN;)O!*0r@UQ~aV~emv z==Q-#^|7X=1(U5n2F?KwYX(IJLiA#|Y+hp>hin;^NGSqS0U1h&hU&*Wynk2yHyiv?!O)N4-5fSU1cC!+P4=@8Ls?`dI?* zpe(Hi$PGufg?K5xxle1Qw57Kd#LoXA` zmjmW)3y>df!m1J3P-I^RM}r$~*BaDmd9q?tJUguc`>_CsEu;C0us=KBlkYHD1Z)g? z1UDdl*@3yvs5P~kUtYzX`wdcL*dShx(g=M3n%XMT*k6BY+wv>ZgFlOO96zdeLH&L4 z2dQ`E-!I)?w0z;QK(+-<--N9{zu#a#*6u?t0n!g}0i6+|SjTrVzuP-F{Jdt(9hNz4 zG^#PpwC{K`s9;R~SVSYvQB_;x#)XG|FoW&BLDa=^CI z`ra)*N=&qKm!>aZ&d~cXX)an^`0KlwB}nBV>~CRIOt{n{LG8M*apZ+cdhUXf zV^lbWB9S_q`!KD9z#-Px>9Luw=sbR~>l`a6H#aQB3+E2fPPRu346qyu( zlrrqcmci&lJ_tBgA(DEU*8M?S_4HlQd`1gBcRX_xqZJocWf_qwZ*I=Yc>VfyGpGab z3X!T==%{IhltC&CS!O{q(?kb~ae$HEK}$z@{R9&$u*Fjh4xsXkMpQt)bX$9PArJ=j zsx92>ctYp;+jW6i zJ0AHQ2717eOV}}D5E$Y@&m)8L=l?|a5Mqmj?v*CulJWE~X)+f>SG3NZd!&>CW)?Q} zJ?II&SAJ6QL$tEM4vY-O>V1n!vMG{3g#823>UW4ck3n2qcpuDyMlDj0SVx=gh8b4H zL!7-xvlLQJ{GxpX_5r&(pX5g%aGq-ELnx8 zhu-o1@QI-VX#w4_pp;ogS_!!k>sa6_b5kKzl?@w{perl*1wB{>3 z$?u3}pyHNcb0S>1sM2T*stvOz`ximG`5RHH6-^>U;gN(^Dmt;FG{vX?I3f`X14Awu z&e)ciWiKg|+vI)!{#*ckz=ln5H0O2S!u)02NQ5R9Kuz>rJcg96Q0gCZ-=cMFe0(EB z5gIrTO7@(Q(QN>B$uWdW40Gp!U{20%sMfI6{*Do$Z?z6Ei0VchafMA06wwFp``8k^ z$Ll;WfdNEDFuXcty8_3}EiGY_-^6I9p#%N!5D|w+yLT_!{3P&b5B8b{tMlhUGGge6 zE)U**;DMLByR2L7L<$HNGCJb%9FdYLI8=FTaKl3gS*X8}V^*Q0(dsDjrM`XVqFz%J z!!U#1{OOl1>mlvK>f{}lN&>x<6e|NBS!|qzy!kqU$hjx9-H&1?k3Vz3Z3n@-6txF6 z@%&d5LT-4nTxQx@0-8wLKuo`mrTYjR2s|=xz{}8j!w3+Wf>X#Aq+pOE0+uc|_O-Xk zMT+{iVj_!$Akv4t4$s9Luu80vfL2n#kf;)Tc-?ib;0;AJ_`9fbfG!7X9td9XPBDI< zRjP<6U_czf4%Y%ryV|O6)1$@xz7HQ_&@hN@cz7xwkor!uyWtgQGc@d89ovni0n;)Y zML*0krRPi35-L77T7+kNCe3^^0^F8)!;EGiE*h5VIQjiJ-Yysf3eAMN z#zFI3N&*i@D>WhJkwFRJh`vj&iR^lP%gKk$cwJl5QgBdU0U4T_O{>4oUvF_C7sWe6 zjyv5No_%KTep^HqD414sR-n(O;z0O0jZ6ivZ}f;cnL!@t379Fn^!ym|(%Xu@L< z1Wu_mVgSe_A_tu|pjw65u5kAr?6#(XK@z$;%cwXDk+URqLG!UcPcAvB2n`SPI?L!&@`Wd>+GGM8G#u9oD`^wfBzz zCP)W2!;oD#v_~^zJU=@ixV!lIt%51upivC%z3``iy0Ze>~6dP%61!#al zkGJqKXkaKquoRNqBiQds>>4;}$BbeCo%n)B{Z32ojydbAs;-0qlE&s5q^a{C$6v{C zh?!X@TTk<#3;~>`cnD7HFBk-+*|nIWk;BE&o1*fZfZH(j9K$ojzF2~AM9ikRp8I=D zBism17yN<3c5868MVFlM9(30kemD z>QdR6f<7pPNRibE5RWJ_kV*c9)lP_oU~#YZW=$+*XyFJA8po`RhsIz~mr}*8MidS3 z0p!9Xwac(-2b^#mD0<(bw`G^($$1vQ0f! z&fyR81*1yeAJl)er66ril3guL@W}&(8=qK&jf*aotEs7}jolOPw{>oI`2Oh_$h;G& zAV4wfTV>o1G!bnh5M}_vvo$)Jt?uWc;=pMCL%)*jZuThaJ`IE-%sSFJ_TyVt?1 zJPxA+jqd_d*Me=7swV`_>BbbumLLcmU^iXhOk>=NoHiO88Ii>q5=X?m=iqYQgJw2@ z-y6Mh$55o*_4X!vKb`o3;;86*dJws3crDKBg{e;i9wzSvX1Jpp5)#4?*A_T(mPXZ^ zrD1q&?Jv*MkqLk24VMO~Nc~a^HuS-ELA^3~LHbOzUvML~$0MmyT!b9Zw(Uyu=g$k8 zr`WpDwZLE?`#G9+^As$>61!u;;DsGFDl8QT6G0j;@EpP+La^`!9%cipu}yi4Y~XEB zTv7vTxcHP7(*ndJV@GR|R|f3a48h|kF(>jj$j)St@|w9yeIn!R1vIdVkF^; zo|!UU&P;c9Wa4**-K{IY8q)K}6ysH-AAk`#2dfi>P5{V?>*Ga>9@+}=8azDScebr^ zwL?MRZ%Eh&2S9Bt1)%ep#;V^CuPEdHL|8=$mUc1hkD zcg!VanPrRzB|ojc_M318W+uYr#)j@n>XL3WxFXM>32UHPvi9PxG477-jmZL4$ie{j zb&RVWi~|qL5>YyFF91xfWKJziOn_SR6&@z$l~HMXS|13l1s!)_E@8D=e_Yq18xNGx z*qd>KS&+MWAwZ)yBUo-5HC><{>MC-=jb+pxH~S#OK8qu6vq*%j_Q==y6~pu(t#g2M zhJ4FKW+04pHCNOG7-wPNXLroUP5!?8TU| z>tX`jo0sjrf0{>LBE-b?U?f1Fq?Ds?#gExv5o3!56XYJTyJbq{#tNX=!bB z!Fy9}rx1wrz5xsgB(k><)xN)C7t@004h#N9g96oVurjW3fJ@=TF*Qnd;es#5S`?>tof5N2H_?kF6 zRO16wo&X#0BYgHlmCEBn-n7y_|MHTtA(EqiuxHPTVD?DzXuk#ihLAu1z`=uAr_Mr{ ze17@MGU9bGIzSr;flf|?18SGqr{AD_ZOyYp!NE;@JIWUq+7bw^bd(tS#Ax2-p5~!A zEY9~hAA=qjc^E)2Td3#3n})WvrPORgRS$|s27~l^Y1NpVG*_BlMpop`4!1$sgsivJdW_%$;R#kLZ(p(RVi7R7HsQ; z`3b)rJJtgF;JUA`id(9m=+cF*A>!A2!!SDfD$C-<2hPl)=tN%*S9Bz*H`xy64D|Q2 z-W6VbYdgGdEiHA`s|9!7L9(0sM;{=J-7P$aBwPgGQ8A=U!b}&TY3I#ky}NnmhZ!J z@+;RARvT&G{h@;1ylH+c=vO%tfF722{x4c2Prp9?Vbj}}FN0sYxZq~dxlNE%-!unv z(#ppFfr`Bl_J(7+?@DE9+BdmAFy!o3PLAvYaP37&Cycn;7apBcB6;*-{tDKBj^h%K z8bxsdj|t~xPtglvQ-i-?fYntv%(-Cz z-1)hq?FAWasFz3n<<3VVIq${UHXUViJmJSab9EKfDD=gf0lQ_vRg~82(O3w2nHcc{ z_Z;@qKtoJ3{?p{G-JV&SM0T(r9ijH)EA$s6H({)G`S&N8{*a9dm=7ZQ_m_#33RzjX19pTOiX-1^tv z@^QQN-o+#OJcmbl!R3eResY!g@pAw6{EffkUinT-=Up?qL2ipaDK971%-GF-Ao>9K z`hV>R%utDw%ud1f=k?%T(eQzT@&+?ZPjf&C{Xm$1rtj!Jd76CyMfqWPhBnnCCR*mdPV~3@{%3^vyTwbiv<6$U8jtr0BEKSA;MhiZVqc*?qkcymZl;aS zS;n){Bbaf29p;}oxY5@ETv4zzLkIxt5ZeK-sz zc{AHCFndcsJ80wlrJb0FLo6D3XyMF~z&~21rtwLCfJ1qT>JmI2nZt46IS}e-t_|95 z2vOL^s&U8k4-HlN#-mzO;7EXl64ro-y`2Hc$@|i+d)<(r(4fY7z+rqZ+Sg6V$Oj4w zeLlQ(wU|W<^cW8ofayW8A`G z3yT5k#{K_p(vS?~>3f`b5J&M~$+QL)NY_sct?y{{xq9Qqe3XAQlVx;y-ea2X4at&aH$Re~P-zy-eFZ#8eG4}M?ACKg;0M;JtH(p7# z7{L4q19YiU3lu>jr2HtOLV&A&qjyTic_*O@Vb8#`SsOk*e70;Im%<=U$LD=!{3zOxQ0%JQNF~% z0MWt1pxI7R_zSf$gVM_EvYDO~M&A-F-hUa{1r!H~X)avKN=JfG2P~||=t;@C`=<*7 zJJAK43kw4ZLsViGbXfexGHhd|pDI+;)U-*^hiwpvFf!2u9m`(L>lw&MG=_ECth+7J z<4L_3oRf-mZWwLQCfR#r^T|A8*vo+dRuWz%#v)8e$5&9dk0x#M`$RAPDIGbS)*)_- z0zNj4n#M2-*oQtF9d}*0f}t}ULz4-9e2r834s}KO1uzz={uel-YL&Z5})_ zmCl2`P3yod9fMDohD9J~LGn>3P{Y&?rXBTXxOmMpUS$M-eZB_N4}%cpopxw{3)Uyi z4=F=?3yXN``vkBoD@V+fFbjw=&9!>-KNkUZj)Qp_FJ9QW^<&Pe7H|?kCAoz`jZrHc zO|d&*(;wP6MC}>kdcB9w!lnh!1Nwss3KlZta5Ph_0nL<3L}ko)2{i|aa&H#AKW`&t+)XbuX_;lPV3X#q3Q_=n|fx82=+`kgR4p4f5dD;WMrLovw;fMtF+MOfg% zA(Miy&_l!D8lN71efzJ!?iFlv$pDI3*$wkoMGYr9I~Xs4>@5;z%aU2}GIk5aiNQHN zJ?~aQkhA3GB{OO>ifeK|kr0bgn@UBhI}&3zbx%5_>#W(hN9c1%*v z#E|y^9RC7IXID6=5Sp_Ry7nfO!Ke{7T)SIP`|)ElPA$S+;-Osw!jZ~Nbh6|^YrRJH z2$UAW7n8Qsy7?vk0cc-&{rBPmjBkqH|M@8QXod9yobvO}SIEd!lrxzxgVeZgiXF{cx=7RUD^#Y5hX@vw<@pDc04N@5<#_zt*oiHkk3a;I1(mTLxvWkQ zlsx9$97AU~`k?w>{Dsyr7g#T?dK>Rx)L3T0wmF&w!D>nhBH2+ zbYVk%J%fSZFV6?qy9Q%vx)hz1n7)0KmS=kf3v7P!x>^_mwq|>=VMH@vj9G>_8rlYUu*{j`|>yqen0KgqWf8+}WS5G~uge8%JaB#&6D3LTS@bnWHNNCGq7Ks~u3vFJs zzwvSUu)l**=Brj)<1{7sh8Cd4_U_R8uj_zf9q%=DmSb;kOf2j7A6adu3ES)~v;_o1 zlOfq*ziH!bWKqH*nAztl#h%ea^Dxl9ftl-PMN>wdXb2PtDuTWs;JL}kg6nC<4I*nM zO~s?LWfdIJdO-&~5EKDAqNNUk?GrZeTNID5^2DUL(R_D&(UgAF)=x2<3C3}4kVAC* z0cj;!$5in_QIT&1COmCSQ-PJp zN6Ht>m^SDua-b1PLxXEDauy*c44QasdA2}X2ojZgve&;*eOvasYaAiLtB$0hFMpo+ zNWB%pVybBq8-qLwY;0X-=isM2|LqOO)Nk2otqVP*<+()M>=z$B`U=%Z)=CUCps`?d zD@pBYSX}^a90P~WwXX;T`S60QT?M2b>FSHFw=d? zNbHJRCpKY$@}#juO-nDyZXxbIqPv6ZKM5RVrQXm_D0$S2zYb^NSUCIsoFg=5U?V9K zL5|};0kCbyyr8Af)aT%%zeO?|V?Xe3xN^93rSl;ViZIuWeZx;_z%LDRLQE&AoLPx& zxwrNj2mJmL*!D11HvKSf&%VUHm2P4ZgDiefui?gYaas2>S};hh#BYxsEZY3<8_An| z|F-Ynzamg>_}?E`^W*pazx)+RTpu2i=%|mxqjyQh-uXH;SvY{!PrsY5h*WS~H||>J zb0f$>n8kOA2B1Deete8a4@a6ah7yH`cfJtver5~87fX5$=4|5W!CiL_cb0Kie?km{ zOh5^eZNDjv|DRk4dO`p6`z68bF5pmRG}dqo>_`mu^Vqpn*?Na&(h@c{8?L6|k&)_$ z9pH5JyT|UL)WQ%5kOLvXej%q|1|zNv*dL^)+UBq5ksxm!@aTJ-_yu5(GczNdRaFlV z$O6?SfvW61lwas@k3i^1nrvaPZoRf4I^4ZxJ5hoq}x+^@$wJRM$IKt>8ztCLWSz=fTpG+}n&4 z1}}yG*6x*pItxH#5(nsy;Wn~e|33ULD`lIbqy0s?|P2_7NLHJ-up4tbR9$;EnQtYlz`6L0h?cz zo`qo~AxPV3GjRdNpg0D%*Lp#P3AqIF>!BN~1qVg}Wsz)-%<>lb2;+JA#TnG)SYUq( z+yJj7uB#>Sez z#wS3CMr?eEsSxtBnLQZUfoyS*uNlqLQxnwp0xbv3Jp?E1-mJ$lQQ!76viIy|IrOK( zZ{kN5v#mIm6CCpP49B5gJtHnujB+oLTBf$+quQUF->zKpcG;qch>wOlg3oio%{Gs3 zTS!sSNM7+`w>>YNcWoQ7sY<_P0c3q@wG^lRt-DL~h8&X~Nef$&T&Zv17;h(?5sA zBv+{#p!QZ$H3*v=PR%aH6zwyyZoT7`@s3_RX`e%Gb+nwC!@!r8NzwXiHkWdL+S(TP z;#+}b^;w@n{ueQ|NsA!jG*Q=;(^#SAMF*1>`WD)^#kyltHPCkygfn&a3)Npb`Ym&$ zQKawX92LXZ z)7@!O8v?F3s&9C$4uSSAwd>9F!3RBnmyoe)wCRjVsl8ZZQF_>-M%O0{)iC~ZHfnfg zcyR&@6h=dEV;g zD{P@x1>YD@()Lo3fj{;)UTH;-!-YVxUNw`n&5{@A`gnEry+6v{o?NX_=&fer>!qX` zd9g0_tYxL*Sf|*G?@*PRfSPHhrH0+t2JQOFZEkbKD@s(<>N+_ET!g!JAJ_VKY_i? z2976-oOxWK*vP04r4?tIroLIz15SLsOj;+Yc)v859&>0Do$ofe7ZnRN`D38rA)w1> zGkIqaW?!&g114Q@G zVq7i*ME6H&u_anCn`bCO2yx**OZkI5XaRjh;ETVr4JQ*^f=N6PZQ=t+!8$X%G^F#QS7 z*o)RTEQuxbe(*){NQ#a&yBNLrnj$<>Z&40KJ}USEf>8|~IOez)soQ=DqxZY$-D3Zp+D_OsRlg1d%}nZpRq~{DD0%KnG1loJiXdy9Cco3g z3XipU;*1NAzyJQbC#^Fu4F{E2(EY>Wk1kJxN#z^Tg+1HS)eofk(|GWYPGyQvv4pJVL(vb7;USVq>=(ah@kmvudFwW zOVjhn?WqW`s@*PxD>WK9d;YJ@Wmq;M-(UaYd%)~Gh@dm!+N`u@cIkWn>Ka-r=-1@X zaHaK5<#^oFw=3zBJVgD{fj6>5OZMHPONDu`stZ>rtKYl6#IUphq3KoBrG)+mTm3Id z;B)vONu}2&hK2{)UMwgmFvq4jy=cB?8}7ex3NBJ){PW{Vi8r3IqQa7bIFIvUiH|3_WgC8?xt_9klcI+Qh&QNuX4K?kQX)!?&5=jQYVc4my? zNe|@AU#HXVr7Qz}qtl?t6p2jp4t}*nxYM7;)@o*2fGwULEHH!1eFS1Tj3TGZB@vpV(d0LOSXaGDhJyzec;Ep~;f|y~roX#8fi~me(ztRiR{G92T&H4-md9SqKsdhI>^G0L?3pRk zfUW?~Mf}g#OPasiaO1SAi)0y!Xn%_m;pCF{lPLu%v0zFDN8;wuvv6i**}y{~S%2ap z3g|o26zNd;_`r@Hio$jbTiCqAS6@l3|N9F^B#x>J=sl9JTo%MGI;!Onnvo%b%_9qO z+k0nfePY$q{k0b!^SG|xF4>4}1W(8YBp;*L(&PTh){Qp$9y&MHSVio68EL=eT>JOz zvorN2NpYem&3W<>1BGNK9ICDAjFlgMjZJMVKoIjnebK>{J$mfUS6a5-XRT^7V1~EB zf;KdYHx2^a+`N6qjaoP=Ra0S=j+EJlM(hU3g)I#V(mVI%Wi{CkoUHt#Omk+udS43c zY==?7mZC(zxTNOdA)f-VJj+;XnYB2NhbJGNRI#r&0mnJMd3O$LZYYvx!Lj2$xBfwGC33Vr{uy7njXNjo3Toy70#4h zmVw#WgU1HEo9FUBBMN*CoAOqfX58b!t(^AAo@^IL`a1HJ+rVq6^s7jB7k0h0M;e=s z#VNIyhf-CgrKA!Ic%$1e?WGt|UTtV~mJ~RG*r&4XJFIDySCGxw?f}n%yJlF*ah7%{RR`KU zWlOAP2(w7`L$7q~1rd#}Z5$lA_Q+X&QLKqR8n)YUMbS@}w@q#n5Rg?^;6?ddc=&5o zY~6@xqyI%5-6b0!5#@T$SJzuo;Yv!&$W!F2qH0Hrg{qy;c68Kx7pX7r7*Tgz!0Yk} zY2WGtCo>o%{Q5KA<_bPdRN$fKl}c*um>5>NZo2OV#owfcPRelFc8FpgJbW<;65eOd z|8?Kny8};DIY4i&Qb~y|GUh3H`R|$fKP#2+)}6btraw4F)2G@~YW3#w8+m01#Upd5PhgVNMsOGxtadH)k&r$7F|4c0M?l zdmy^n|K^K3K_D^SX^IaK)&HwwwT7K>thVAe+%UWba#%U4T^bRH7;o2g`V=me5Eb*G z1EML1JD@Y{K9uRIbmLn)Q%-nL8nIl-=R2hSIxRr|ER244e&^j>Abon4|Y`&#+Ml~u279unZ;K!7cG2d(-GNTqRFK_>grYVywAGIk_x-FX} z2_z$cKZ=6D{blUcx@oy&e`8~2lMiC4icPy0nMhnU_-Gf3KTUYOjfWc%-Y7L0PZi~@ zy@w8pWI_i?xV;@7()^KK1ic7iF~bUhDYrx4BAym?&P+AU=z=@TcjeM}ps0Ly9}s{M zEG$8^-O;udUzG~9G=>dx4|T^b0)AEM7RBJK|eV4Yt^Xa-jr?gPOc<>KGeP_f12}$?3G@&ZOPZ#KQzr+*#ZG$k076sTH~(;q zOSgDmb&;N*u{Y?Kb~F;wjw^>BF-q*!kn!U?`wM^?{F6!!@?pB(yC;4o59z*B>Rbta z1GQ`?hKI;t0-|Bh1TaP?0K6m&a-2un>De=G3G7)A`6MP|Y=C?zl&uDasT>b9FW4@O+K~yMEsFO6w@VW~kNQf+)=lmr9>5`6#m4ew z`zub#>b}1DGd=QhJPl_3R30>a+PDbRfGCPpO~T9~NVHUN5Vj(uxys~ui^x>qX-7T! zNO9?1Q8ta%CUkLFyW6SwL67`>dVqvCl+X-5*}87#9V|);s_CDn|AqzFm;m-_{9}N? zOjTVyFvl36ti?-~`8svRsj*cvxVRz;BLKMvsm3)&w{JLljD)|0dYboUjy!>l?$+l& z7ADOXc;5UcZ5R5$ROU^7;>vyp**#h>2T3L5t)o|V@5HwkrkPck`^{h^Jm9~JNCbHL@g>Dz8 z-7VvS{@1_AwqdyxQwW0U7X}FYc27&!d9->fPLzBZ>UElVQR@hv;Gy;mkgwK}fG*Zk zs;UnWX{Vbtb8|kG!E-o?tz80RUJ%x~iM-D%`vahP%JB`O#QZH|?E+4|D9AeLh9NJ` zghIim5F0%N>yE7q5Hl0*p{nMTM?V0*ZpFW`Vr%(nt3Pckhh8%!1U0JDl1+#{GDnTQ zMG{-_lwlB%u^?sWJHtp_Q+CyCZ$3THcFW(Lw>l}JP)qbHNF_qF19X{B^VUjxfF|6H z7m;0hc<}wo(sdY}WPsrMY?~Alvu|5?%UB#FW-rcoJq+I(xlhU=Qy?j52a--&YV<0b zh#eGz+LXJ)a4pHh!hqR_t>fM;Stlxj$gf$U`*P#JEzQY|*`sd+I4!xE7d2UWeig@B ztB>avP5r8@Zg$K=ht~DyJSJ9SIM2&@Ro_t#FU0`JhftLL+-SH7;h)d^T}GGA(|{w- z6BJ9<_MkWd<&su2`~qDXd%!i^_wowEGzJ0(I3z|@Q+M_i#rLu1j-9S}Wv@~4PruHzs+%K)xf z51$zEOhu~5Y@)cE^MXOr88l^0No4ccg5LKX{N`sM-*34x{llL%4^-p> zC3|J0a3F4%v7{N}Z%@NtI^J6T#CYjmojVy89BQ~Vr3CUS{8C_k@Oa3AgH+Nd}*yBzreU( z)1+Qj-YAL|kL7wVD#GbC5SXcSuH5qzi(`?LloX8ji4ld&yD=*NGe?OF@T4xvR>=;r z_5LWDAoB+BeEIM=k2Ts@V@zQU^CyE3*of!M_=B?f0(Qaez^9uv!)aK(beo@`6jm&} zR@H&ppa_2tgp7IG5G^C-R1Z=h&G}-_SI%L?8l!Ex?|2jC_x_sT(wn$Wi=`_1bpHo za4yXL(!(R__!N&ZV20;x`ejKtsrR76Vn=Gdxuw`_AYhyk=d5$Sgp@(3D`0|s&B48> zZv(y#qaxDBTCiLhjWi+*gxzhAh8FsY5C}Z7VD^v4eiQ$nKmE@=VEo@+2Yq5d!-XKk z?tu${?{BkkSH$Cv^ioU%MC?^)wIpyqxhM}e=U*5F&Q0dvm`!Yx zrAOwkxsS?mt0d-|?2?@@NyuR{_?R&KISd9DN=TkeVFS{T>SL5(4ua^VinB{y^bc#|fkszS71*A$3IKhlP_2HBgZ;QoxQ zOhOh#Wv7f8GTr``RVs@j6t?Yh;b6Y27E2w-ih;p$+0v0|Dp^dx!lSS#=sCi9V9$ct zhvX8%a_e48^E1w2t9D#-R7HchC<4-;ay=!y*2>kP2gSWtQO zpHHd?*QP&J50d=|-pnA5f}jj0I|6k!!1hVF`VL|oXyx&cE|rMS9gauki+n+xEV$d< zX~c%zIeh;0O`d#|YQ9MI5nWUJ>x?{6{^??#3*e9zj!*FdV{`y$C$`oack?7%Xx*)w zf+0cAUfHz%#he5ZEkd2_$wOC( z6rHN$XPgqbe>gR7YIp*ZS)L#!+YEjpRM7*lN;zmY%L;00TY`1CI8&=3kTrTfb^_C3+5xPJWSz##(bcvWcAcV^SLf^j z^6#~{IJ-D^HF4JIa|A-7l(1nBQePiAPyrxD53CyXx(;wmB~Al*?xd?$dXh+ z{6&S9@oZ4kAKi}h_(wfU8hk2*u z*nt@Lv`?i9eronA3x1}93W)u*ip?=A;qKV`K0b>UaP4Mk5jlW06rCy7cK0aBt%rdi ztdoaR8+q$Dpqt@^YF?y~6Lt%igctKg6E5uCwo~#I+n?+7@|HalTh5w$@FTWzBDRSh zKnE>+V#7LVV__b90L0y`J5`^(1OK2|>4ZBW>4A+?c~Aqr1)e+riLn+-Ffu(lc?w#X zG{|d$NC(~Ly_|##LApC7y6y4boCb2=&imAQDc%patb0q)u?$NWp@DI0clDN{JKf6| zUAMZIb>$}KXMh+{yAb5*oUoLbi;KMg``+18Urh<%;WK2fMxj&5Dlxm1MjtKXaAc=% zTR_5X2cg2GzfczNNP4TA=kw%4oKxv`)U^MOvM4glJ8Ix5m^4j8yY`}7Q0|XPv{`n| zpqJiPvNy|@bF*gnBg-=1JH9L00h>dKdt9AlMj8UNytg;a-V+^D>Ulh35 zTxYejp?vX3!U~{in*8IPHm;AMV&n;W423`4+!U4E^g|Lh9CX6!+5=Yg^y$+&U>GHl zx1){MK}GgxgFMJIC{#KE(RYIODuBxD4V3J-0n;~`Uqy{5G~qcv2S*M z#PH^Q$X86h4M}n{aB4AR#16(Qss>bmp0s2FJyEh4c4i42V3_bgz?e#B)ZbK!9@;#)t zu#1T}Sqhx6`{pY$qHbO$ncwcS6*d)hOGWc4m0Rh8bkCjFJ$qxd5NA)j2!4K%bh1MW zk(>MAfdw;@E)VQE(`NI`8gAwg1i%1;s??)e_IVb+Srl&NU8Cb&7dP^GjMa z3JW(ow)tm}3{gOiYZ-sI@o{0+a-?jxIJm-K4sA)f59 zlS9=rdWxOZ*@2b4-fA8Yk9O_(e(e_z%_&}Fe3iO&w%gLiI*1fwoZ`!^O8Ot9ROS z4;$}Pf$f!K%XrEZ4 zCt3a^DXX4OAigI4=nb%xAgQ<6KdO3Wa86059jb22(HmQ$V-!XKRru|F&)9IyeE#5C zbI(gtuu2ynN=_~U(gqy zQn_S$vNN|QtJPf=Y*?4}&VtG%{8?A3Ch%X!i8@>>Zd*(GajNnr#doGi-)5Z*UgL1N zKU{1xU+n1xIc|sI`rg!z3{6j1XvPgjhA-UkNpo5uGgH5`E#%h1BX!RVTm_nWeR?z6 zJaT3JHf`3~(!&ix&<`FuSk*@Evgo^e zbfH6P%@Ypq{iYgcE)E4)d+aIQF*s#%b0MRsF;!V;w63YcxT(R4um1JGS0_8f1{Li) z5)-LDV)4c`#RblntfE)(xUPyKxud_+D|%JN=qKFhY+hcE_$*%aJQy|{{3@5wp5X@I z?5cZimcLATxgLvS-;oXC6NBXv2ZkL)MzvZ>hR#|S6L}7mUMIOyNgC(QxgG3^P??I$ zNbKuQ&32YvsM)A^8sWGzZHdqyuSS|=UksG+T{CeSlQtj!;wxgT`o9)3JHr6#a1fZJ zWOvneUlHNWLG~V5BMr(6j;(WizN;{PKT3uLTV#d9IF=26RZEf``IeM*rsu=sOE;#m zoS}g|Gge!$kk3b9sD2$IVpa62iXMGS2z6S!x2&HVVS4lir{3cICjQS1Ja=SXZgHE^ zb`O}@EjF3ZSM(CS>ImKF?Dt1*fSX1XCLmqCAwB`wxU7SKpt) z_yja9JZ#Yk!~5+_^`9waVLT9P(rCc3b-CFyi*jd_gdVgOOXIn~IZ-(htKQI@*4bab zLOHa>SN}=F)CgC~l&Pglz6)FNP)8UHN&-$``I9;x8a}Z}(o;#c8(BU6ZqwNLcX!*G zT^1ES2z0ReI;0$DlJJJpRn##4Y*MMrp<>lct<;8v(oX^{m0H-MldJh-zR9KCr^tt= zz5ollBK**|6X}+Imf;iW-JFhRv4Z3dn5efb9kM||>H4BBTxREvI*guc`8Eck$TY_I%llBOCx@c3ANjL0INXZAHT9%N;C4hjj%vu* z+y0{TfPZ-xo0_rL<=#LS&u14ZB6dkXHMevDcI$PVG^6B0@*LXoVv=v8I1_|kSAyEh zXDy@{WjK89LANSa)`X0u?%9*351H(1XJ1!zK9cPi+Y`*AuQt1}TF}ta1;&FdLG}un z*c8q4UoMt#9#@dR=WnHI{`%*n+N{jmRZsIc^_G>~iqGb+PoC~c%8!uGe`vS`@__z8 z^@Nx5mawp-z=l)W&*KNe#iq8CYv&=h*_rV{q3rGUHT!HYOQ}8ER?!r#++QA4V%u2q zr{D|yUsxH7WDm!fT!I0SN%4XPNa{%qDp$Tyxx{4k{vGkD#(tSM7Dql!KQdA(Ryq+K z31pQDQbt!RbF($~0%Qv%Z5O;umO{Y87 z)6{AEeyx`=L3Jr7)2&W^dl7Ha>gBT)B_TX6Eoq1Sj08KU_vrC#I?ky#m~3#$(1)Zw zs&R0mA7)INZ*UeFYs-B*(kQK(8d#ZTP@elB$qYt`un)JnRi>k&sVK7tph(*9*wneR zL`E_{mz#^hscfmzprzqpXXzsU*kiMg{}glW7i|fiNDtZQG@d7B-qkeiy+wM*y&`{= zGY!o>NuKUvG6pl#uKG(gtO>vruUWt}T|9yi>U(ak76PIXwsZ z1t=a5n0kuS9y6owoBR44tQ9u~N@a&@qxEZ+5f$9L6md8$7}ClbxcJeVz~3_GYFZX)l%r;zq`T3#8NtlJkgK7l;6BR{#p%Joo} zA(#?Z&Ai*&GqManz3yUZxy|kRqGw`A+FRwx?;~JnL`kY4HcM91Q5X-^KDpgH^4C3E z(3xRC)~vS_rC%Rvy9-OOhnI5Tz89CQR8wV8OpA$q4&E{La*j<`dUxVb(2V-%vcA;p zfPt)mR_8ejgNn1Kz5H$THr!m!!76?d@wnbBFD8Ysz;mstqO@1+0`;p;`#2{c;w%zS zUmv~d^a~pUljv1@jSl2hF5y))%CsClIMHLD2^Xfxg~W7}BXZZ~USlj$8|ZL7RNq*e_i|JN*5Q0LR^!{1VyWc?59#t@%(1$^=T za8pOUXT+cQHNsIZBw2mV7=n0>XKDVlKL8`ghkXDm-W zW;0NR8^ZLMKDx113i4p|&=lAoFQ{R~6F=bvC*S=<4BeRsPHbINLd#fD3fkXDZp8D! zSMbe$2)zIEKkZdVaGQKjN@qrfy}VBjdj)u7Sks>V(4rog;scw4l#s3)HXg{aTekxg z4~v?v>-ETp3z_(#;04dk{z@|&SO$}<7`9210_ZUI#)d8>oY)46|oZy z_>%p>DxU|x%>KfwnOVJ%A^tN$5c3NE=T8X8|3AMQDn@g1w+3F>$lSEG2I0@p0|nk6 z^@MMyndsDDIeD&Xj8hxR0P)G}PY$d7AGKWk!OkJPm|ko!1!7msi}`8^OSOFyP7~fC6f=mo9a-B6PotsC!7rg`p3ng)S;B^b0G;KXP<%_^@bdUZ?osX>iGf3BcfH zIWUzX*u!ef^44@3%wT$LB7fw_M$tbYQxMToqoqe_P_i8u(eh|ui80DGp(#NCg;jDF+~W_Z3;zG z)Gd$r@`Q>ZzGMqyK~v6-9n!c$I(&g?J%>!4>f5!fQgm#!0cXui2m6h4wjjF&9V)B+Y@YAdO%MVq_7=5)Ugna`8J5x47$P1LZ8{ZLLGc)l6_=`NLnurSFDO+!|Z z@xU0Z=%}yY)Mem8^x(l!QTfsWnWtORoT+JynxCoF0E%mosnbNAs=JW=XkSzirrl5V z;EK6Lu3Vzlqh~m&6T3(w5Zel@ojol4y_gfOnEiqADh7?Hw&Y z{0CY&XRzmy&UI<6d=o=wBp%$e-zQKtM!Py$;&5%h-XVX9*$;8%jK~i;lJPePyV?Kw z|KLw2UX6=sB0cP)K6}+zn-vuXNB`6xZi|39%%RD ze6}ah4atJbY>2};!JHL)Zx{;6Q;X#*D+v9KyxD2!H6kNa7`h0ATdw^=o=^UwWwW~x zAL^Mi4rgGt6_In2Jd(_4iC(S}%WSk399Gt_(WU3pgXc-!57KZ2Vg&zvfqf!4-{1i? zAHi|_j+Wm+>Og_Pb2Xl|@2+zaXf!hLmH+V!oH=V3=Yp5$w{w5m>UKyEzEgF^tiO)I z$d~Z{$4@fZH@TiQw-oJ5WDh_kkR2gC`=-usR5RyYBeUl)IC+twic^)fWG2%O^HM6H zj0D4AJGzM?cle8fd2R#LyDZU7q{HYcF_Vc^q3(9$Entl z>lnK7ero#gZ#&eJa8xk}qB^x~!6tCZ0f$;LzmA2Sb6E44FgtFD*YR}@d&vSv zzR$PZ`k`1#iVoB>`bVIMFx6=P<@Xv{a}oVI(Z-oiZgKhv#5^my>9Mjv*wk9|#A#;w zzuz;@&}Yx~!WRa!5o((fvqQ5Vqi?FB?jQBv`B=P$KF1bnmc2{DJ}RFA?7@p)$)JZ; zxX?g8W@`e&`Hd!S65UEG)UG6v!!GwgTl0r1&e*Yfh?8S2I;Cu=#Scx<)T%&NPm4ws);{`WufE$VDGKN zvP{?R;YS_YQOCd-1sySvQV^BK7L`!x5R{Tekdk!N(NQrTO4=f&q@)B!1*AktDG@0F z2|>Dk>jZT6{NDF_zy1F6{qY@}WA9@WV%rZnpy_{S=jQ9{NAJQKJ zP+VN!P10pVMiouI{{6F9zxUW_ zzz7VqIl_H22_Lm;k+Cm?&71s3*AL-SL*-dWahe!9swi;MU<~xwW#x5K?_8oKVFO`s zFj_K-3^a%`B*ZoO3SHds;taF(7=qgceXP%%*^|Gc`AgU{=V&~0q7fa|E}ZY~jA?4z6tAD4#f>QQ#lC%<^y!dk(iAj{ zna zUtVmT%7E9UUtSUDf&$nTw?llh-}w_`C={Ean#NgRlCYGz=dO545H*6Dhl)WcB~eNC2r7)U6EqM)O>z*uC6O_HRap%HEHyvj5fjF-|MCU0sX0ZgADR)BH>TG|O+fmuji}ljX;mChT0eP# zTw*Me5(7}A(J$452j*Ta*Vye+DuF#TYQNYNHL^+1$T5jO`&R}O6*%?9*50Rc;c z?Zu8HhNl7lD{7NkUBE+-?D^JHSKH-KQnlm}4eipniJDb~PHcNDMGoiy@|PUt`uaRT*aj+L zZK!o?4R0Pn-A(fK9z4Pc=`>~&Ku|jC|%&}&)0lQ0~YA4AI zYzls(%AQsUHoVitlBum@DR0U@hYf+6=2}Q+s9U86C{Y@k^KMlAj-BAE6*ex^!zTFzPo;?S9F4gSDyppSbSail{Qlv?Oe*+)1i$+kQ z(nZ9e+}I>Z?iV7cr{-sk0mw!E}* z3^g#Z)bMmX;+H*L*Ax$RLVT929q3k8E4pmtnwz_a&V_P;p5q66c5F3`4#}&V9Txsj zB%*5P8>}xVZAS#BHQk;NYR$E%PyPvfB8&&hj1_YZE$1$}Up>{0V>FYcvRMufi%9{O z&>kAvr$&q1EuJKBqs_vc-#&9Fam)0kAWc-#0_$+(QNi&;Yuy0?O>_--u72z^QbEI# zgh%u$OZ#zQLQRs1B^*=M{zKU{bZR)iVBl-Lp>dS2a^j15oF%qgntcesJ~WP|ukkL& zZ>YpTcQ8M-`%{np@gg3Vqiwwb#~t_UD!AztrMO~l;l>W5&;JEep&_fH6w{ysIpz9F ztwZjWNjtrE&A0Gq{B7{tuNd;3(&60r)YU_C5t2;Gdu8qJ?Fc!!{gqRjxb@OXCm!_k z5$rk8fGM~2<>E|@l=An@G`I^qRtaa0dje%j8qaZmZm{ z79b}isTnWN6P$cgi9keV8p5Y(O5AKkc&Kd8P6GtlD_6=zvsVOWkb?^fq?2KlNCSRU zbx~)zaQ8=Nddl)`pC~<2Crz!H!C$AD3s&*5(-<9fD@G7=%2z=KIz$#U%Es}fF`z2S zYv)epT8Y-}>qt233%#Boa4k_1i}Gs_f+X%)2KfB?*wDQE>Ctq9W=pU{}1`GzvsmQ-xi zm;5a~o4h+EDHo20bl}6>m39gEh^wUvW$&*c25?#JjQf1pK*NKb(g6!SE>kv2d+7 z#&EY&TqWfynWMa>nh3pQLY5c??j~kl(dbXeEFs4xz)6pUs`Re{MjAHvI1tf|uWjiF zCo}-#2vJY@QJ6}g2g`*ZthL%{QVjOl=o$7xH`J0A=Fm)**yckLKT)!PL-*^SjM^B` ziA!7f4a1r{kf}od+2Rn=NxdGy4gcB&82|Z>RrCVA6jeD?i5NZ2WSNi>A2lr80=1)t zM&RV@OT0t$jGhv~pn8x-M@mp{Y`ZGDJ8Ysd#qYK14vLTHQD`@wvW zj*|q+7FlRMb@M)#7^3w-$eUs!lC~_I%A*IKeDGLl3~UDKx7s)HwPauf$+)D++{s_P z@f=}~bL&<_yW`Dili$nu9WBObbyRJ7fGu+#@=UEymZS46xeM`po>UGa&6b{hLgRX* z%}qjGL!$^h`QIPsSs;B=TOssI{`tyXD7~VFk`Vj>+gcKkf5mpHao1dkHgLcglO_28RFwa@ zxwb-S_%E9a%jpQ!chvoZV&?P`vNVA4fx$PhPB{o`>a|llJL60gdN&a0%|PUGfZLxc zRe;47%y3qB&YWt_qd&Trm0#?@4JM;ADH7BHKOhy8q&e)fn&??ldxl%kkTGLXK25&= zb6WxPP^J8a8`OAtsq|qujJr-G58A>wS4j-yg&BQVY99W%q^YGKuomT!LPQlondxJ_ zQFJ$NiT1t!dpQ&GMhFc2Q=WXlupPT34KONax_Ri56UC)PVoLwGCNt`|;oYFtn+6Ye zNhVTLl1ln*@v+^AEK&cm&Q&FF=0`c*MEOoKPM}YTFd>65$V7$1nnO(t8Ih#re(x;N zB%h|jADfp$LR7g*4*=2#G#r1rp^+kjc2P#DDx|9Syv|TZfy9<+_Z(K+*{i zlVJ~Me_PDbOqd5D_zo7BI-dOdGwCa7%vOOoFb2Saf}tUlYbNQVedIqvbF(yWOnunJX1S}<*4{I=dLP3!`E0>pTiWzZ^*(H0ReyW)uO>JT^VIvXZ^iSYB{a{~O$`~1L?82x(*LOwJX%eZQc3xT!3d^w;vgIoM2YPiFAcE5+( zIXd)6@o7qQf6dPE(%Xb7Q#6|9A8d2F>TV?L{5V_Z0r&Pvc(3P6{q6buA6cD5FuXg+ zrdJsU-S~X|76Xg-r*m3@H)FD70jn>3=P<;vxjuWdb@lcqwE<&p)lN?4kQ_dV+BDpu zI%oN=>ged_4}XIN2@}}2kd^QFg=QRKBl*(Qp$2i0y|kA zgc%147VMq4EXS3%#FC>QHy`0#J{_pjQ4EdQvNc10%0K&A#ipdb%CIJKM*)o}nSG4bNXM*v3{$E^)*PsFZ ziMe}(Yues4?RI{ow&)RR>v)f<#@NE{qNmxLI3tyY2dnK&>qPKDqUFwBT_`X-(qVn2 zd5gcj`RSZu$kyr|z8*NOfc3WtD&z;9UvAA<_VW}Ve&a*pxhoat+alqtvuFLsl#%X+ zKu>0=V$|^NM~5~e=fY|+z6pha9=wQNgRu%wg`7%e4Ak~Vr;!bL-@bU&jIZcw{A>37 z^t54eB*O4YN9bh9!t5{|eu-KX`OfZQzy=1RSNrqZ7dQXPa&d)G zRiG!2$Zfb1w25V5Azxi{Vop`s(jcPwDIb#UW&uE7rw^K!z9_1AFNM;+!DSb-%7ioj z1``EDlz(2jNTS?Q*2l*P}aLp z*gi%=BbbzEL8H{KS*0BrtQgmW>eY`B)cNfqIG$p*b@g$Zv{NZoSNhu`RR=6x1m^9j zOFU;uHVm#DC+F?aa@kjX*EpeKQ^8ixU)Bop%}~$~jlvQ<)%kvVORe~--P-`SdlxQM zJM*XZ^Im4DLLN~+Xj#{#q>Vd5O9!G_XCyME>1#`|MTUOxMz}mIU=tf&y;+9mr#pVa z&4gHy{0u$zii84_poVdeEi;v5xhSOOdz#i>tbt1&^{iefiK#zSKDwjy3=PAEJ&9E5 zO#{q_bZvv`J{ao8HmBoosTkwM_@BiePHlPaK+@%ZZNr;LV&(@(rAyK*R~n6g^dEMU zYEGl!2|w@h?d`z^oxSD`W?do18RioF`f6`&p*U|U6SFgqAO6OhK5!Hx#;7wn}WqI$Nemz#sw%W6R8*kGvsJ`f@ z4KsAAr@OJK%WZwbgrlu%;=_y~xFDF8>?!ik)fSx?#{uG9kE|QrbIMdV=kW+XG0t#S z7KaS`e7lNGSuG|?CD$(iJoDmaN^beM`SIoMutYAvI zG6Tj_%l75_3i6kaH;Y%mGHeGX5ck$&kb4w~lO3eL-Ygh&br_ZN{Av`vhk#Yo@G(Q{ zxK%ev`7|W}yrU|zU1>iDnC54U75fEmvKDIsZ01p{3Zh{&rN~nv6A$ zHtPp`ToodiznX*$R-P|@vE;$etJy77ywg%e1i!sSf%AfYf#JDA%u(mY=~wj1IBmV@ z#m*UduRF&TH7H>0vAFKs3Zqhaen=;1gcEQ4MC|aTSveR2@IJ&UnGGN`K7Z`;+$(_! zEB?S-q>Q-wZmk*ON2mtUOvAwLX63#rYujd3YfYzaNx!UnFv2BO64_Fb#m}U3S%Z_c z=AzYZv_AW+HSx&mEyo6PPT3XAt72ku33{#h=?W^SLriOtt@XqeVs&#C3YXZ&n+Bjb zYxK|%3*b&42_J)^r`VyNd%<|^IE#k>rllKxpFeN7%%ETlOSMy5qTZ-J010@46_XYThM3JVLWu^R{jT&@I!QE*w4nEtnl66DHo&$>G+K zJ?QaHOe;n^Bd4ujsp`<%pjzk`*KWpz zC!ZbpYfyC@7Vj{Wu5)9n-jX*300l4@Sos*UHUe4F^B!mh<_br%A^mk87l2Q}NPvQ8h3DINFN8v&G;5P7fy=?BMQ+qwzK*(8e?}!2`g99a$ zE1&ixK{y(AH+K!*n|;ve(TC3gd0WxG#Wins0s&w_P26Ww-WA@DVUP&5dcENnCu~7N zJc&qhF$U+5L&_22I|NbqSOCn@1=vssS&J`V$8eQew9Pu8U0wf=N;)a2G!GL-omL*@5$Jd`(KV8|3s)KAzg(q{aM)E7kj0XPbx z!Ge%OmCt1^Cy@CIdeG7#jI_&h+iK_o5`ykgj$jQy3ME2g0b}2SF*)7LHa>6&{KzHZ zwTK}wtQN5bG_MEqr|Ek&?=G3VI^2muz-q1}*n-sL%}E(i_xZf3+h{sH-jO6##0G2K z5)4wYQ}4(SgY5;zBJzSkhGB|w+>gLa7)XY<_QRX3fy6ZsaR56Z&=z1gv`*{6=6%?^ z+Ys(t`6N0bv4h2zlT}+;h^%{>I%e$-Cd4dH-y$@p$hoH{rl&Sub3!;8H^IP z@6w88P>D}|bJ^X@p;edB=@xPX`!d0m3X%Dv2OtBg5J-qbeX5YlcEm(d7Fo0trJ@7) zGvtCe+Q>=93{se8J>15qlbAum*9{fOb%5louv|4FQQ#{WTh@GghWIh~;S8Wb)`9;u zvGT=GNE)FvCDZF}a^b`#JW7}@jhR^blFJBv%7`k`qZ5=U$wX08QaLf@`$Ml9HJmf? zo?LrD1L;?#8H?F;LPj}HT=Plz+ilC2N8lnP)^a6GtNU}a|2-O>LoGUi=1`SAVfiQy z^Ir0nwxE?@>5DhX-#H=-B-sL2)Onc1jKSe54fDZhP`fN^*uw zJ<8UE4;FWob8uA49p!Z5yHXFbs4 z<&sQ*cB8|~7ceA_$LwEf(G9SGt91#3VX#GC*1PDB>3a>f^7b@OcRa?D=iEg~UdRYM z$T_ktJXYGJ9c;@{GHI^{h}*Mvu{4DJ7_K=?oE%oRV=Z$UV+)C6^<&Mi+V5`h!#%<$ zk=3SG0|+X~#IAirl*WY0g6)xcpC+)27(|+agjz(U0D{_4Ye64%3=|jTte1KO#z@sn8v9T8pIZLnu@2G z4~ikI*bz4O0qr)DF#y!_Ex0A5sLLa9TU^xV29wpPY=%;}mpfBgac%@mF3H)O=AZdhynS~y*rrGvO zZsP%`9-rELJxhGwo~d0HyLghUxSz)5F|Do;K6_lT`Fv!~(G9$={Jj{t#E`RAvwk&Y ztN+e^%-&`4H5fgHPZ!L=srJ5P!YbJfrY^133;URuPJ|^o4Iu`kr9ylF?kKreOxTm% zvLky1tVNoU3GN|13D zL7Fi~Y9)!DByiH$cL#rML`cXdT7t~CLH0+KIl+R(7L9h(fLaD_1bC}{LDpef>VzIb z^{t1(@F=-`kOdPW;A;rz2w*4a5qjRivCWgf6{AP_@qt?kBLAG@&FUGeO@- z;xF4ZS9RQPz~D zYSrcF-F77Mz)c}HV-oxmc`4Mc2+*1&2xMeR`hYm&s+~>)FWJHpa5k6-;W25X{|a*~ zSF@0U$;egGW8vFG>8uzKM7e^m`PpZnO=CGk6JMnEL-Z4;=2=27U=;dFP z$9#5jlTHO!q7v~uu0&%}FVm&fsOmUy*B0ml#eV~1_f8EujG?0+PLF{gj7j!P#gQHn zL8xJT5cCZbhAAgeS?n@(H3MG(8IHW3Xqk`=pTvV4HJ7i_ZKjnA>fDPoD&&A|~2hZ1I#RPHZoQV-$M{QESj(~{^BIT@up5uj+__z78hw>#oNOHmPP5Id9fL1dZM0;nWD zjhI6y+oWSB1)!SS+3A&V2gRY0A-^kB9{wZX-3rKJ8et<@B^|tY6YSQbE>QJNgFO`h zgUAI`4U39qk0u8CNYt9p`SY&oJEUF4s9$*eANbtsNi{?%l$|8G{J2V(+L}xYfU^hP z!utX=X4muoTC-0Wg*u=`w1C*QnNB4kj1Q);Kx$|TC-U4)YJX5OpCZ#86+=wz8d^Xj zEdM-e!EcByg@BLjr}<8@1JTiQuyfARloKtKY_WJz(`W*|U~Q|GosNRJDbZnyLRZV4 zjOvay6W|hE^}lU{_qgsM52N+u--WHS16qk2$&(UFrI4R4FLR;E9Oa~aV!q$rJmi^J z8EBS&S$tv#)rxhGtz&YPamMeSM-x*%jT%l~&&J-v@OJW|=DRrGd3dtu{$1xg72SFh zgrsw(dJEol@+(CAJJ+)1xBvX<|KIoEhNT3~oBl=z!H^v^Nl`GzwTVo>K0v@@yE=GM zgW=WQhH5X|G94?=*S5)P5C@ld6Ml}qnIZt3n4;H7e|Kb4etAGW<3;zs+kzDnFrXLF zPJ(8a1QhwCE+w%G`5BN;Dh(ll$s`x?YE&M|7{zOD!zrcorRm81Z?{~<_sH+wqV``! zid5|q@yR47bF`f5-8hCu2nwgeFaNmFZaP(+iA&sQT=Iz2LX`d4bEuP}qg_Z*0OeKl z#o4T{0fs+)Yq0XaHH)K>c1ZQ1J({XAE2(58Wgwa$E^?_X1Gv4q>TeyP>@9JyxpaIA zsC!a%wvJBuy>1h}(KzHm2VOBrCmU}PWkg0?C^9K$Q+I)KmPxa77^R`}Z}sd(rc{M0 zn&02QlgdqU@?w|7QG5n9rzRVv;zS(Ja5lc5%*4lNgod<*p7oQeg^YeGjfwMSQd%!- z#|a}cOvpdtrRx!`t%zZs5Y~}lE$UJlQEx(=rAEt@bk7LG8fUX*=-U2o0pV7K?*ohO z-uB*ZJRVNplo?x(r01TPnqfTuRqcvbXXy7S#Z#@F6TJL`t|JWJ;N_+ zJ3MuEv0q&0?I6l_TZC1YoZDW0_|oiOD9FD_&Yz>>_J!R8K1X(PT=+qAM4q~P~> zBGBn=`R%v8VC>3qhQ*g9Q!)pv-i%s#k7J8x{q)me=BN64zq0JiOiAYS88g^Ei@>zH z6pQ|R!|PxC#2i%W>gpD?kKl0O^C~6|vD@_C__lQ9$)libK|_~(j?;&$keA@f)9!R0e?B+2=+b+3E!HMg59MxN+Xs8EuuB3#@hQGwXse|3euiIm zL4gzwt9NHdE!Y7M3EGMiUw4ix^X*Yev$v%VJjJ(6H80Pdvp=KxIu4Wm<}^I-gY6jd z{1y(5mIwZ9-+zhm`WlzlIMrp9UUit8T5X#f5;FYhP<+g*03nX3|-1K6efgybRB zI4LLRwpB6Gh%-RYA_%81?l5}4EhWu^S0fTm899(j;h(_dROjY#9H`jP*r=ljyK*PV zB6u-d*1ediI>_93^c&PxVK|Y{pe(%NlFZbo7vA`_Ggr!t20Sw=^^HHIDreqUp{T9> z_>HgNBaF*%C`PJz^XuJGAI6$!f%&AB-P|KZeeG2OI2h~7l`En4peuTMiovX+E=8!#Tg=8je?H5tk*0>G#~oKKhtqL^+QJm=IEgrr6a*h-1>Z2s0y?*PKf~@Qvf}nB1Z4kX)0K*_) z*Jl)=5`P58F)qwbN=iZ-e!!bg=lb4#`{K{HoM^92*T9d9FoYq8|K|Ji{doI3L0qHr zd~jK$JA%eGu)+$)sjK;QZ|<{&vRMvsa9AikB_#!u)26B>lcu)x+cqUJUw-|2Z!p%} zothh=AF9on3rb+>E91(Y|NQ9t&0zd{cjL}RP_~Ee-o4x1+bdu9;)(zSXFF~yr`acA z9fv|v_PV@W5fn)ox?!E0PvZq0hT^zbZoDicPoDN?$U-qnTyPA(&8KC+U}$f>eMt2C z71+qG;5{XWd1U!`19@)sWI3di6G&Oh)~pGq2CK9_QZ4Br&R`;PYTmqgWH?*`wI?lc z{8p@?FhZp$Xn=&;`OCI^j4K2deilFQ;^Sp7SZDo+mt$W)IE_0fb0L?+9mav8VXDs} zf#C*XsHfh4?hEwu??+DsORx3YeQqz@`jhtk-h~)(633W!3f^*aTZUuh6)})5kJA!5 z?Vzt0IrgJG|H#&f0f4u_={AUqi>oSQXIbJ120sy7zI_gjukY?dx7;sq3;eGV&LYP# z)>RH`7cnw4F8;KB@-8?#rJ|4B`rB_WBJ+pn=)EElF+oA)bLY;fB$)>y1M=fC@*YUx z)--f^>M~wm7#{K~2rKEJ`B^BMn;)-CC0%I;W>GFBq!v_myR# zCl6t`>H<=?fNuWEnX_k8fbd4}Ofan-yQa%vNDMulQsppk4{(J@Tm_L|Fh-t`948kh zCy6DuTFgEXBtj%miE0Zr8VKctxgC<38~byZEE@pgo40JseaICm-#GL%NvUJ;jBT39 z$k?~{_o=b$4DnMqJN`V5X;0y6>)d$MBVSfy`BphuTsjuc?HyGrTr84K9T#zKx&4*P zm)|?Gx1j!%$M5i+M{Z!<>9KOMQDSu6?gu<)2*fhkwkOEm!C|B1jTO8KNH+4158qwT z8*kgAJ2*JVVck(3nxGt`{U@dg<#1-ZWDz9dkI{&}j}KK(NdTc0LPd?)PAnuf_9-I? z#z=kdmx_}>-(*4Rl|*TXw|{u9%4Xkn5$~r}{syN3BmU8WSnQGz-}}E8aq3qmUAkc1rE?!o1TsnirFRQ!{p8hMIsrs0&BBT zDxSsonD!$kcHQ>mROA`OZD}nIz77029K-c=uqC+zv{!E2yqR<3#>_rDoCe(sEb}v< zy6$h^!pP^^&b}A@>0n ze~5M2p4HJ0J`a-P)BPer$an_O1pbk2(VxU%45XOZl}whA6U(}*eEs*qZu~uQ@sB+Y z1a|)a@T0FbKVGHl?BoY&Jicg#lNR>p?F(}l&d2tDUomER#aj%S=-|T2tvJaKK z(=XrO#(6$$8sA;9Nay&X;q>}b$6(0SOX;C*uWUy7?q@t=oyAz^`r~e{yOFYa?%v1O z3uiI3xlL|gnA(=~pE02y&mDhT@nf;#FJ|li^P{P)`@gyy%)q6WcbR?wR{lut;r_XU z12`O}i@}gKk=(yWZeYuf9dhs9y&D4fR9uq{s#pNq9p#MlmH8`Z+n{uok5Cbi3J_Dr zf{?!5R-F{f#d0)Eyfb~=QCDheHW3$eco*`Q^VN;(*YAO9_Z^5CcU}!K0mHImILN9D zIBGajp#ieM))S8pdyitSQ`mdk>EDqMX}x1SrsY%S4U*|23`SDfgzRxAARwT)o*dU2 z3w+8Sq-y6d7l|Cgn@xV9E;ahy3}Tu}_*=tT|A9NF;?{NRB$zuaTjhRUxlIBeOaHxcY}BujBRFs-}l~q?qP8~=+`i$RGloh?{C0xp6#*G zbEVX=W8^xuQ4+nIKeyl(NjOI+CYd+VVTkS3N&E$A$OAzD0rPn9LtJ|~N=yz(m1xa9 zn{G{k=6FdP8;=r%(wbtrpD3bn@)QHzy~md>Hnyh5TK~e~f~C z;YzeRQS3Mf{fKm1L?dcyyZb#b`#8b$?bv=cUOQ*WDEMr7bMu&aD|o_D;G;YGbn(rU zrvcu(ItuN;2#osKC1=iCW~O4wEsknsh942?%*QYc53Fngi5WlafMJpg0EuC|eBRbZ zJ11`nf)0Id<;sODp^e$-5ymK)D{=J|F>FWi_6j zIZzsMd>>ZA@r%=Eo*x^rbpS=Xdi$Bnc(0ATyu66a+%?Ud>dA4%-H-up5fM@I#5_@F zQsQ-ttvtbTpSf)g^5%I=HV5tL^&a;#_;lO~nTjUL7Rz--?rT5OuDQA8QQp7XQ$C(O z(SQAc!jov9HSvUmumUWvq<_4w|;#A~N0K)XKh9KpC z_vlb<{{Sj3_R-o?*M4lR42_LPH_bsi75ed`Ce@dAeP7OnXr$UaWeTM}L#4Jg%Hm&l%G!Y0e zv@U}Ik}$QTmVfTi*pE*k@rQQB+I5F+u6#h}C1Lh;>#5Yl-!^Qp zY|3ASd@Y42QRFLV@ugA&%_Sbqx%Vr`W2Q1j=SFo+O=Df%1N8JlqO}-&Gah%qHbISi z97LiTPem$e$$L41wvMKM}vqcikE3xl1$fR3^;|^(!ENsLRk3TvVcZv1;qChp>Y8ucHQk2^5@UTIU>_9=wH5c=_5D|ZRO%R_Tj^adGqI=M6+`qExX)pAiD6_FP6h0 zD}zHr01wp+ii2DLHhg2mPSW_ed`5?cht%Rt?Cc=`Brbs0JYS#nGo{j*^I1x$oJ951 z3b|fOM;mw+6mDT~a9F=+(INrU4{8@LUi21oNXEoe5T|0~?lrDV=0jAZPhfmN*{&q< zoR&L}x(aov2()pCclo~G**XT}4i91-tZZzIYm(*h`Q9Rh#hcabrGO6RDUz+R&)xx^o5tsL%)E zsNZJ^o|#)IS^{7K`4NkVh!|Ny_MiY{hz{yfPc-|hBN5sB2oq)|#*I0y*$N-RPtd5mpS(Z1YJj%Tjj$V z;y$RP;Y6;hMV`I)(5SyIF7CPb=I=zK9rWK-s{2|v`3wK$A8ooF=QGj+yk{S?p}Dy^ z?n7;@26Nf3zYfa|<>vRO;^9{658`%HhP`&}+WG#*u$$Yv`})c<|CHaT%-U<#Wvz3f zp}jq#ASz-7C0pRaz5yt&f|1Rp~!n|8k17`l#UkI-VD z#~Kn68T*EXT{kYh{9lU9IPoj}#frNAwXHzZTTJFNymE$S@jG1w74qny8n)9I&$XkB zkz$=3(K%G^wG=Ay`aSgsZjR3%{=aT2-FYsJv{ZJQAg4Y9aE9F$2j$y>W*?m)K4<+e z4;2e<>+ao3aPN5QFKV}@6{sbbqyf}B-VOpr?`2I)Oe&*KU*b_q)Ks(vzy<=;+T7Z@ z9H5{UC$Cy!G^9@3Yf^1bf>T37^EaB|#etjoJTScXmV1Z-Q3?ZzaFfU3}O-5A>p zMWvb0jj46OgL|Pzh;8H4NV7L6kMvuzbSauxUI3w03cGB&G}TjWjzgOy+fkj=ev9@{ zM1kL*nLxWfL%}8g^h{<6NRoV?Dc>xt*gFeZfk-SOi_CwMb)Z6bp9X1O;$R#)(TMHe ze>qGp^dX?9R**?l6T<4F)$ACeBQYZs4?wyQ+KS4ShJA9-XV8I7R}XnS|9j^Q_~bH8 z7aw8XzI_{lu1CN?b3xaj6pbea1jl}2uc}i&ojjcD#UD(aGb|qtpxM9(dlZ>lKF1#_ z(;QM_^a?x7Zu8&|1^oTWwLYPZYS7xjYVuVwAOO#hgWp6v39T_HHX`4A=qPv~#}p zbD73@{ecW^Fa!V}`3CDrD*;tF6QEXsj22_>M>WPSovh{Am-5E>3;oA|&O(6;q zDq$MwX@m+AKms-C5ER03XENs{C5gTt7KETXFm1F?b#P!nyC`JZ$8jo#R+It<4Q>Sa z!(>N~-NL&D^6dfPoOzry_$zV@CpF1JlIbRFZK7I%BqWMt$U*y>h<$k?{c8#f++Y4Hi{R{8r9)6hwC zpc!c`3cRtes>Bygb&wH0I^wqF0npbM`aX$?yPrbl}>f+X&ZO`zma6`~Iy{hxw_s4>}nP=X)ahx>Db_DCkd<D(zB)U46A+ z7Ot0*kM9J6ctQ_FCS*z?Q-E9X*xK1W+5hK~)tLGVs$H4lIGTK+zr*;(Q^K|IYgM0O zh3o(Lb}KLWP%q-%dGwblyR~6kmSFYu>sRW@&=7BAV`C#~1zlYk3J`wFTiCnBHLHY2 z@a)4BBD`@cm#tnM26d_MtJ#Ph#fVPwiDswq=A4KK#E=8mGbxV*(slusd|z^{I;uHt zWd0^2Ba@k#S&<9jHixOHDI^;f2{B}2vvKxD#IEu_i9Ym%Y-c~9lyV1oLI;V}Y03DD zg#LEm`WCeI2tva%TxLj=WrC3GS`0yo1R7IWoK!|_?kLvf5G41#I2O|&`_@{Dm@l_e zt%a(sG}YDuF`xYU4Ju*+cN)FFk7Sbz;2*7hIX(;cYQA{msxqGU$BrG7!?~ZdT*5$< z5*|gSc68!0!dqpm-falx&J~_3gw6t3b}PiEVaBu*sfq)UMSU;}m(qREX$I)@!yp#? zBa-xz-(c3!0-na;ee{E^3gIF6m>gTT9syrsb~YW)|M~dXFokoZpa}S87{8C+gFW~b zLkr(8eB9es-UdnQaRks3;^dtoPi%vWXnReHB0gWdL;r{8 zyj_4ccbxmQA7$`ilyC$IKNdI5e-2?WeO^@9o2;#^jce1?aSO|llHx(SA4@t2*A@kA z^?02;7Euf%!y`Nx0%Q(RxME57Zb=QM^wxowC-#aqNS1|p1y`1Zk~fJFj_ z4jv?ABMih6;WyAJwPA6Up=%&v@$xOQ63k`Gmj?oDZNm;t*pGKs zMGR$9*lHfJ@GaA+6cylDv>HG9U14-I{xt4~W$+golJ~EQZxLGS8s?cGDfV{#EFF2(HCtwT_|~p6!O5 z>QMn|CoI4eUUf{i?lb``PZzA7#J+U-^2Tq~#H^jOu;7Q);B)ms^bT6=USvIFL!_@k znbHBKGC0zR5#U%^+1c4Y&7A3l0@?>__kAm%PF;2{B!Mag78aJKuC=RItk6TGKMhe) zJ`PMT!2UXBW0N>{-aPYbdxR`owb=#jch!n!0qrV!Fos0cglD|4v1~vS%fqViO_nz+ zqEy;lZTh#0V#rlh108F|M{ja+apkup&0e_XEV~yBaNb7RBbH1!ZdWVLoiy=r%1BTL+mCr@KTTeTnfi^TF2{=^!BR zgEic-XU`s8V0a1UZO?r~?fM=Ie!wTJbm4deC{GtCo#w8x=;&=|#aOQSQZjPk!gM|Uk=#-sNBK{%oh{kMBiww3lR^I@k(4ZR(a z1s^cjk>Y~wZ^A7pgjzfzA7Y^^^Ol=`-i2&}vO&5xm0-dW1Vc*o%ej=cm0Cfck_~Pvo{e%cLUGE(yEO zgG+g$amJmpvIJ&NxMZBo*H7PMenLd@g0XW8hW`^r{dbw35`~XkGV&IYA;hcu#U{R3 z1@50jZFA?&MX-H@loo+J-3F?`a`mOhfuMa7ENVHBA0T2d-_ltT8_XWKs`zm^9#Lac zQxM{1rTa6mQGrL?FPyChxnBm&1rbtJKKB%mHZfvb5NFP_YeF&b3XfM65k0431_m%wl=~(lM7ec&aE8tD&c%cudzg%>ZPoe6t0Q1f_`0} z^H~BeR4)eI4DJ6c0e-tXl!sc#LmlBtRNa+`*R|8R2V`vA+0-2sz0Ca-O5N@ zI#E1z!LZp7$I_Ba0f%)eZrrq~vNb$UuO>n*$=oVEB`JxRa6+HQMh4qkyPFC)B^TN? zt`)Oaf=co&;t=sAAOI>_u}jVsZddJ}muw*@t>@b=rS$0F&%~qvHwZZ5!DU-mAAzSD zge`vn0ph$8&r38C*!AT=&Ku0CaT|M4O&XwsqIGn0dqOg>+Q7DI7|+QbIdYvx1BU@F zYLP~apwvgr2+-=FCo-r|r8{nMJ|4fp))9G>JJPHPQ4FK!8w4(VJ4;F|Sb`<;QKxEJKD_hV9tqXiXKZLZ zOsmybeF1Q1t)$5`5b%=Y5G%d@U?%zVP z`uOczS;~|wH#WYsP)o6j&@J?jKtkX|-O?gvg5XL?g_t^c>TKVaJ>5(l6!bb*5{e-u zmEyY(umi}}7rEdLt_#O47$SX^x10k4wNR&{qa)PgXv|RtV!aZ6Lmu`PI3q|OQRLXz z*cjwL-on=Md7L5S@Y8!lZl5>;KhFkQzE74O(HSr@K>Y>G!mhk z=CG9@D*_REf~X~!+V~9k9gqh8QY}wz)gZLK96~}WXzTh#3W3bEd+c=|Q;RvtEY^8; zLhHGp?blBSKxm0YnAE04ko_ZVDU5D?@er&@F!sT$hku>LZ8q^!C@W<8&p~gI{OZ+y zq)=tRpyn19#mL`bsPcr=r;Bihipq-vOa!$^`_O7pE36;b7Uo?Py9SEzeD-=TK_0_3 zMG>mI3j9TdFFl&Y$gmU~{*Vy4P%hQxT$SjbXpCOAd@bu1?o=nsnjsXo#6?DEq@P0F zb27oGLN3W%2LcdANTr#iiJ65pFAdw*oeJbzE9K?#nu|; zC3t&XrC)pd_O0CQcplzLAdWi_eRku@xSSS2)E_lP8VZOB)J>sdEfPHiA*u_js0v13 zN`VA50=&a8jUlu#SkT_;!fjIAdbKb01#P<@V`NSF>a&59!)fUnxJAWiO^A@Ys&mjU zbIz)@p4h2)u1_} zjt~CDV-@Vv=+ujby1LWP%A{msd=jq5m$~<>T)Fb|dE0NmfWpuy zB|8i!U||PhFVyyr5O^AIs2FTHe+7>mSo>2pbu)t(Ss^vFz+|@+nf@DT*Q%;iQB|jR zK4qBw9LnpxfRV^sro-Nms;qtU9Vkp+F&7+M^&9~g*ts_6(aGkgBT1VM`Je$aNK*Gh zET-?T2T0D!bc0D{oFu10xGa#|<-h!L_rZ2bt<(mhUHy^^oknDyCvTNIZrM?+VGe!7s&pP0Qr|`e<8klvIpZqaVSOK$8@jG3RCL-QLGGIlKRMl)O`|(>1CA@ zZVlBlY&L56m2!NdFMSU^#!~Cx<)%n23f2GWY8ytA4EE}7h8XGfJjby?Js{#4eRg;S zV{whPdD4LX-Xc@)IQ`^Gs+;DFR*}zjh2k+~NKpSc_InWtk7TZaB0M>!SsKDMGmMW$lC#5d=jD z7;LCprbA(?|LNzwjMv(9A-IbrF*-*@F`tgzP=-3`e0Kv`c~|UAe`Cb^{2PJ6=7o7ON`&@9ghO3xvcG1bCvO005OJ|>6k($^FG5#Gu7?Y5+X4t7 ztpVDN2qZpw0je8u%77O4O7uEHjU~W3LWYogiqUHEiEL&t zVje=`7lQ5@N%Acb-P>QH$e^u70TcV`nP{j6P(3Lxz5}psmIby(h!P&g)>0U;mc;7G z{JdtjERv=i@DfX8cT&V+U4wHu98?er5R}rW*w}y;Jze?%S{FD|bCP@$El8Z|2hKbtEgkttyfp#belW#cXhx&2KsfaP(fIt{R$cwiB50ZouhhLp|{I9?M60#pK3o2O7 za1Or@Wdng$wDwWg=wq)S9(rOxW?OO?Y z5Mb5(E^7o&5DULeGebjY7Gr$fEXExZ$SAfm^TCQ4-Q3=U;CPS;%fUGH6g+z`yvx#+ zE6)NGJBDI8AJ-!VXfVIV=>m4PF}oM)#@)WW=$tNJxe_RJE2WTzkbUr4)dPcrUdYyT zbl5%MW8QeB;L(*@$MBh5GS6(st0Xs9KYIA^Cc6bN2YAJsQqxmkU$2FM&$+QEBL!({1dnj zrpistn@=;>ze(%aNcW(s6JVxQI%tRjo>|GocnCMyak}B`lx_e;UTE0rJ335u1=+d0 zTdBo#buGYMl!DgvCcvz@ix%xdqTdBU2%2(y>@StIqGdO){rTs1bW1y+@!-P~2|_8T ziziM;JRfA@o4F3R+aNM)!kpI*L5rri%=Ok;c8HX^5P}8)K+J-Q3c@s>B^!_2Lnq;d z*iust2=evyj+<*i(8macm>{qYu0k!v`D{g7gNB3Ze>D4pT zXatX@1BN=t1ipV$-FENZ-%-|MSs?qZz2$noFd#+i((GSYSm^!hW&Ksumb0;$0L&9|})ACn%7>C}r+HaDY(YGSo~UxR5Pv zvHRo^w5czsVm^doE}Qi-!}(AvT91V~AO%a&7f}8KXpRT zyx}d}FWBfG;q@O-x$;^LO%A}(>6@Kii0G~T~||EjPO zIG;d!;AnscWxU%dV*$izK?p8Kui1;Iw`J!}1w6nKI01w}^Rk7D>r36DmAH1>RW34& zk5Mn^6H{v7M&jlvu(BT|9-=JCLXVC)&`%IXI06m9Hs*})=WP(1)@6O*yQUTwZMOb@ zr-9Yhhg$zm=9%MI7k*_?h_)y|6f`yO6Ql-EM<4DA(p9Gt&GrD7I`P-luGY7a}118@S3HPG`c zPQaU70Cj;Pk%ItiG#~~w$?>;XMZH^BQsIsj@C2wh(vB>UG=go>(dg!`nZ_7j^J?QQ z+A>JqIcJ`p#hWkeMG#g%??56bs`<8UlSRraMb8TK+t9 zVL*afL8GC3XS2D*=y09p%w>QCnpIFxFfcH14?V>05?=M>5`rZG2~mom|1Vv#HeLTkpaX!>^)jIbl3bxT|DVUS1w0hr@g*R_w#tmo8l@g*$v# zn*tw(P0s*=(qZ?)TMNeBMnDHMyn}fRR8s-&m=t9?_F~d;^wG%2U2POvdT5(<__2FV z#_CFK$RPwz4I6EYCtqFKUql~;gA0_gceRl<8{X2|-2}9UP+F)Y-z-?Y6!az0O4gaD_pnUv0p-0%uL>%{ z8$rYiK%(}@!`;6uU8+Sl!>6sk)`^zPnJ{$ct0jXp|N5PLnwnrC&4BTp>$X%2>`alL%rFouAIRww8#T#6Mzo%bQ6LCb^E$#6E_J7 zRl0F07m-bb7hfS7_LGrnf>mmi-h_=^?+#0VJwbv;W1A0jNE`WFhu~cHK%owzx*~Nz zEzvEbk)vbaL<`Wgcyn0HymJmngtjE~%M%Db*!{4%^XFU4F)+o*Fuelu71dqFjkxE0 z03Cu5Ijq|4zI{H;?u9C;!aW`KX{gzNR;k-U`NIKY5L7=Cx{H?s=tDc~OqvS=b^*8A z25oWHO4lZrMATZ|RNq~`{F4Q!&K*lHmkC<7SC&SK;By`(%U2imB!SoDfD^$5k5r7# z)S`ewA;+p}3+d6<-d+I&_+7w@tVWeg%?0NxbRI%SAceAu7RL#x#R(=gcqQoXLnFRm z*##jlQqh8b?Ttbjz^bb8P|3rGqul{(RM}j(;Ej?!_rW+;%*tK6cEtj5xtctC5Ip^u z^0lMB!CdAK zMwWK_y+&!!zWgjVGRaZD)Q%m!#x7`5=yN3O&R(g7V@Q{7$f2cZ)xP1k-)^$Ug341> zMnkTTUjD8&vXxf(7-&EG)F{4-TP_uO36xX8h!nsNlN`CuB<%*Vw+KgP!0k&RF%}Sk z%1NOFHPS9fl`RhYt8@l`Yp*J2-@Li16y%Mt+q8i;(D-*YLeBDG8JE)AS6Nv?KO0~H z8BEYByVnfEBWRVRE_(BX7@(B(O-;HhmaW!0hQ^i_28Bapzo+g_u+llm^a9WCtwkdO z6eEeF0C8nyiEZ1SltP!~G(IzMIA{P~}6jrnp z^5n~(ldzdqNj72H%W`*m?B=)B6XG`EaKi*X!*$QkzWXgD0hnMA>V-|)w!QTgvdn!D zg}weOVAfc09mSz1HwVvsaqr&BdSE1gsawoOP&AsbdjTA(%mvtSMb7jy;X{Y{u=St1 zV;p;exdIg?95@oqNCD+!uB@ks>)=4=#znZh$2*e^z8Fq9s-T zz;3w<#4ve)+lSjzx#k!skx&P!R_{66Q^^0WCZk?j4XHXmW-O{& zPrNyE^=9?#A)Fm{(xocAd=#B^w0q#=$M-1E(f>jE1q&)NoVfe2W*WHwL(nWH9d5_D zZe51H4?^-`CMGD`WKk`EjwlK+Y*OtHNpVH(hO!KLOlyGU)e32Na|jjU&%WD>{au9A zB!EUWLh0#VkYQF1V49_g#D)C=O(v=i;%-rucwqnj&k#+%d4C{L1|3CZl0`7-wt#0= zmZD_WfRf3oyb(n>(X|+T<$wwyr4NX-BEVerVKf4qTech~Tm|22ujIwlHB%17vT6m` zXg6^uN1E@$<9_De5BUtClen_wtgI⋙)S|1vt;w5W0>NVHSu_Pf*(VJt%Z$Ct4xy z*{#!rdd47cZ-K%Dv_uj1T`w$XTVT0Wf??SQZRJtHLOcwKvb}1b)UX?tty~#G1FmVe z#=(uSO9EO->+Azt@D9+6T)LyfAf|<|ajCXMbIcWNx8n9%VFhsgZ^=FY0H*378k#sz z+q8^GS5dPaVY&6g^AsGZD+uClN9k9c9YMKg)nbU7N_SMJ>jJCXi_(Tp+a(P5M9rbO zjE^d68+9q}qo`d3-?|?E^7>O2ShA6zX2S89D|X6ojMnC5tpsJYpCFLCza5CvEm%ze z8zE{yG+N%GkryRRE6Sjwgm<`;zJY{8K(9z(RI!!>u$Xw450Tvinn2Rhf7SCvZts#h zFu)8ks3IZ-{Fxr38eCsJr;QRw`dg zQ7V&FTvYUpQl0y=v-A1<`1uDuyWRGKq~6!-{dzs0&&QYN^94j4s`S4q`;2L42|b-y z!>4??CC1F-7~NBjoIoLtT7;EBA>d-fII6^k+Vx&{IMv=!>q0$_+MX()8WD-d`+8z$ zV(CqkP)eJS(wh*kz|F*E3#}Ur!Kg&Jy^WQ?ppuPs7Zu0bdRUn$u;@`NUEnn^#-N0 zDj0+$#Rd`F6pxOB2tg1=pQD`x*WLJ#uu%$AAXn8aPIdVh#w5t-Dzx!CIJ40(?`;Sl zx+}Fl%CSD=)HxHHZ8$=ju*N)`wBqYR4vM(0=w)!m?2GNL3@+(j&8rW^!1?a5VQV@H zom6Q^Tp-6%n_4$4RYB^>@WUk`BSj1VwH%xCw_c$XTH?*oDP`9nMtqJFTzf{=dIA&$ zYyG`Ut^i%LCqF|RXvSzf^H7^uLgb~%CocG)Wn-jysBvAmasWjIl5b&JA+K7LQW-uq z(s;|ZotBhetr~%eCg`s1GW;cS7awuLWL_|bLV-Coph`*c;@5qXd z+xo?CT^0JQd#(WqexYSob8;9*cV$Tl{PYLQ&o8wMxOYSBHiTGV5qXLmm;MbR z*fSr4wMv;AS&Z_7JKN4U{Kb)Z<&ej9_~?oRh&4t&=*(}aC)7D7&T=KCYoFuGc?716 zdH7=Jo}**t{o_Y~-%RETNVH(G<|B@#c!&zwS@QC_?tbGqLL7Niu%e%nG$^Si-F~B*SS_UqDf4v0y-FG$!+?lv>vl0b8@wky-FN}! zqvm5TNPp4}C##|H@F*ssHN2U}_*P(8#fiRo(;eFg9pBH`!7%B2r!n+GSmER!6#J)} zadX-~_;ISIJaW$|j6QUgdkOw2Mg!?h_s9-i%viX;Q(fJLP+^QoZKCoG122!X0qG{= z#tHd*Yn(3M0ouOF`yD0KqAp5ZW#1s6y{SFZP@R^i{XJ|9RCxb_2oWd=@ zkeoA-5}=I589Ckaa&O%Rw}!1EO2h1Z&bfaV#7*G1yz-DFCK157tbIS$t%A*!Rw!3ovxRL$`Dn203Bg-X0aD+4;Il`PRm8Px8APZ%mZ@g0Mw6CQxaW*f`Se{J-Kj> zRe9e+x4{q;6Z`h>UrIc@&2AbwW9=L$4M9RI{@_&Pq-d{Rl~y(k8#@87fMJ7Ug48QMeQ z^IzC%)zMFeziN|mPn!_ zVcz36?XQ`2a)Wb2_CNBh8AJcR2xHLa7vCK;=zCCsi2Ha@{?ezyN{LJAMcv}i9Fz?3puF;O@&rTtI`BeGsE(AF-$VH za=W-=d9KxjYkFI+UsrUIHU3+>0{UAh?6w0SMT)$%0r5is9td!?@`cB2t3AC)5 zbQt92xrK;LF$>=ln8%$DcN0I1%2DxZF-Q8nomU<-XW2(AM^zd?iRf`F`;?MHj?G`S zaU{J0Ne>@-A-P&k`ZY#iW|e@fR-_-}m&VS=&t{Eij-=x~ZOYb=pD{INVb1 zwxQ1Q)ORmeUIgj@|9@*7+St}s%0+KU&@oc9%CG}L2@Dx?a3M0OR1IxY@#fc#HCq<| zPZE8UGF<29nf>9568Av3=c94W-yVfnOJ)8QmhyR7g`~V-w#41D7fXXP(_hv5Z9pa7 zWu%x38v24j!x;MM+mzI~$H7#0AN1Gn4&MEckk1dh6Ept0uhTdO7%|x

@IY9!A4% z5TaVuZOnMJIRZJX7?(|}jMRoYEBe-!Mr)vJ#Kj(1c`w9?z8JC2P!lDGrHKeknIK=l zl`oa$z~DLH{=nz< zx;_T4I>jeX(qEvLZady(cH_VLYSoYEx#pUHz}09KA_91X=|i89)>kLCQ89~EABs#l zWD@Yg5$34`3_<1^r&FF-xTPZVwAMropYC?ytIXxpNZ9*FseG1QhUV?Ro4EutK)1?$anjMpwHXe5RN_Antl$v4UXpyu|df=2vsNbofgb@{647bPts^M$E^d7uV(S$E4 zhQ91)q(rEB4~61x<@$YHulhM1nsM{d+YzbiH$fb)6U(^X1Ed6(k^9mXBOTSz;!GdBk!go$8R#4nudR)_WrdV2F|qFO@et zDcXcBxMd`m0!1vLu!(|I#2isoc%t%gT3#exev6aKv)y1f&sqoYM)T-=%BuWC7^`5; zzE1x;auSugA1zZdxn#d>L|0J3SyoE^B;zo|yv>O$fhLlK(Py{$Kllnem38JkNx*MGR(N9`}b zI_vK&oD8cQqN=}Fv1b z=xwm4W#EAsS*4>Q@f`=f&+s3V(I`c<6Y_%UcTvi*VXyT)yl3uznS@@?6u z$Q-X%v-hMMJp`MHtdWc;U>J%>M6dY*w49i=@QO|iY23l5BK9JN%@$M`uViIzBu*Ur*_>DN?(1$ z#y}S}zD9QsxOIQ!nHaeeCQXgpuo;zF75`2Dq2`zIh?m=&g9d|QS2;OFjVI^s_2Q`b zh*ajdrKO>&>WCUPG|EIYJgVIS%CGz$*bql#iKo~vD>HM~=9e@iFzE$lP3-KC&R*!>DsF{-6Uw+EoE)sbERtOL6oqy&e#wd0 zdH-sVrUXc@{|Z~lumU+4DlwB-o6Oy%FtxI}+q!1~(KesABLi!9VchD|ub(DtN*!$1 z)G=em2%ZT!%`OUyqNPVnxd*GN+vW>q_>|L|k7YEVN9YWy`?ToC&&DcGFLlD0Gjf#1 zA}1D_DxS5HuZg8dehIdKD3e7HZz+B33?kl=6L7xxYd2-oOB}#X%m{tf)@CVh57e~a zFf40lmf%EmvZ;_lwE17=MuK1K`}nWouF}4XXm64LOMT|dzrFO<5%9aUdVNK#ZPmFA zaO(_okx#kAZ2axRIAL_NPrMem#+I)d)@C!INY+daxyT{1O^Ag=RIb+0aL zi&?eH8upK#W~x-&hW+Z%{&13rHYmXlpN2M*rsiX{IGE3gD3(i~v8<}(1qt~4z_t;V z20yoWU5WiYVD-4()C&0o$Ur%Q;^q=niLg}^_704yf+5RMJrB+&qmt@5%Or!;#BNG? zA4YK{V<`~Lo<5aJtVB+I*T(|U>5#oN!C*;`K$}l}?P9ZA(Y9c~uy0Ry%mFY=Lj#`z zQj@)uOhqRBIW;TzR9_TEt8A z$A7Pa|7SEGUongP?|4f7aDVRq|J5JEI{&ZL=raC$)7#Fvri%M~r-8%A3qDGl`{mdF E23?%H-~a#s From 9c99965a04bd4b7b4b1580c44bb754f48bb5b2ae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 19:52:03 +0000 Subject: [PATCH 56/60] Fix: add tl.debug_barrier() before atomic.xchg, fix tests k_per_flag, use randn in benchmark Agent-Logs-Url: https://github.com/ROCm/iris/sessions/6b5a926e-b45d-43ac-abac-7b8805d2d367 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 10 ++++++++-- iris/ops/all_gather_matmul_hbm_buffer.py | 1 + tests/ops/test_all_gather_matmul.py | 16 +++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 3ede49de8..ee6af5954 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -35,9 +35,12 @@ def rccl_all_gather_matmul(state, ctx): M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] world_size = dist.get_world_size() + rank = dist.get_rank() K_local = K // world_size - A_sharded = torch.ones((M, K_local), device="cuda", dtype=dtype) + torch.manual_seed(42 + rank) + A_sharded = torch.randn((M, K_local), device="cuda", dtype=dtype) + torch.manual_seed(123) B = torch.randn((K, N), device="cuda", dtype=dtype) A_gathered = torch.empty((M, K), device="cuda", dtype=dtype) C = torch.empty((M, N), device="cuda", dtype=dtype) @@ -70,8 +73,11 @@ def all_gather_matmul_hbm_buffer(state, ctx): config = result.to_fused_config() hbm = result.hbm_buffer_params + rank = ctx.get_rank() + torch.manual_seed(42 + rank) A_sharded = ctx.zeros((M, K_local), dtype=dtype) - A_sharded.fill_(1.0) + A_sharded.copy_(torch.randn((M, K_local), dtype=dtype, device="cuda")) + torch.manual_seed(123) B = torch.randn((K, N), device="cuda", dtype=dtype) C = ctx.zeros((M, N), dtype=dtype) diff --git a/iris/ops/all_gather_matmul_hbm_buffer.py b/iris/ops/all_gather_matmul_hbm_buffer.py index b8356539d..37fe99ea2 100644 --- a/iris/ops/all_gather_matmul_hbm_buffer.py +++ b/iris/ops/all_gather_matmul_hbm_buffer.py @@ -313,6 +313,7 @@ def _hbm_buffer_all_gather_matmul_kernel( tl.store(staged_ptrs, a_tile, cache_modifier=".cg") flag_idx = m_tile * NUM_FLAG_GROUPS_K + k_flag_group + tl.debug_barrier() # ensure all per-block stores are visible before setting the flag tl.atomic_xchg(flags_ptr + flag_idx, 1, sem="release", scope="gpu") if TRACE: diff --git a/tests/ops/test_all_gather_matmul.py b/tests/ops/test_all_gather_matmul.py index 0b3702a6c..afe503728 100644 --- a/tests/ops/test_all_gather_matmul.py +++ b/tests/ops/test_all_gather_matmul.py @@ -145,8 +145,14 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a config = FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) + # k_per_flag must divide num_k_blocks = K // block_size_k; use 1 for small shapes + num_k_blocks = K // config.block_size_k + k_per_flag = 1 + while k_per_flag * 2 <= 8 and num_k_blocks % (k_per_flag * 2) == 0: + k_per_flag *= 2 + workspace = all_gather_matmul_hbm_buffer_preamble( - ctx, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout + ctx, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout, k_per_flag=k_per_flag ) all_gather_matmul_hbm_buffer( @@ -156,6 +162,7 @@ def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a B_shmem, config=config, workspace=workspace, + k_per_flag=k_per_flag, staged_a_layout=staged_a_layout, trace=False, ) @@ -213,6 +220,12 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N config = FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) + # k_per_flag must divide num_k_blocks = K // block_size_k; use 1 for small shapes + num_k_blocks = K // config.block_size_k + k_per_flag = 1 + while k_per_flag * 2 <= 8 and num_k_blocks % (k_per_flag * 2) == 0: + k_per_flag *= 2 + all_gather_matmul_hbm_buffer( ctx, output, @@ -220,6 +233,7 @@ def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N B_shmem, bias=bias_shmem, config=config, + k_per_flag=k_per_flag, trace=False, ) From 2dedbcee0bb7559ed79d80395d3e8d1dedef8604 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:03:39 +0000 Subject: [PATCH 57/60] Add state.skip() when iris disabled by auto-config, fix benchmark randn inputs Agent-Logs-Url: https://github.com/ROCm/iris/sessions/6b5a926e-b45d-43ac-abac-7b8805d2d367 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index ee6af5954..7797e5fc2 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -70,6 +70,9 @@ def all_gather_matmul_hbm_buffer(state, ctx): K_local = K // world_size result = select_ag_mm_config(M, N, K, world_size=world_size) + if not result.enabled: + state.skip(f"iris disabled for ws={world_size}: {result.source}") + return config = result.to_fused_config() hbm = result.hbm_buffer_params From e42c7a3b5fa0d5e48cb7e1c6df739839c2165760 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:06:12 +0000 Subject: [PATCH 58/60] Use per-tensor Generator for seeding in benchmark, use ctx.randn for iris tensors Agent-Logs-Url: https://github.com/ROCm/iris/sessions/6b5a926e-b45d-43ac-abac-7b8805d2d367 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 7797e5fc2..076e0c082 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -38,10 +38,11 @@ def rccl_all_gather_matmul(state, ctx): rank = dist.get_rank() K_local = K // world_size - torch.manual_seed(42 + rank) - A_sharded = torch.randn((M, K_local), device="cuda", dtype=dtype) - torch.manual_seed(123) - B = torch.randn((K, N), device="cuda", dtype=dtype) + # Per-rank seed for A (each rank holds different shards); shared seed for B + A_sharded = torch.randn( + (M, K_local), device="cuda", dtype=dtype, generator=torch.Generator("cuda").manual_seed(42 + rank) + ) + B = torch.randn((K, N), device="cuda", dtype=dtype, generator=torch.Generator("cuda").manual_seed(123)) A_gathered = torch.empty((M, K), device="cuda", dtype=dtype) C = torch.empty((M, N), device="cuda", dtype=dtype) @@ -77,11 +78,9 @@ def all_gather_matmul_hbm_buffer(state, ctx): hbm = result.hbm_buffer_params rank = ctx.get_rank() - torch.manual_seed(42 + rank) - A_sharded = ctx.zeros((M, K_local), dtype=dtype) - A_sharded.copy_(torch.randn((M, K_local), dtype=dtype, device="cuda")) - torch.manual_seed(123) - B = torch.randn((K, N), device="cuda", dtype=dtype) + # Per-rank seed for A (each rank holds different shards); shared seed for B + A_sharded = ctx.randn((M, K_local), dtype=dtype, generator=torch.Generator("cuda").manual_seed(42 + rank)) + B = torch.randn((K, N), device="cuda", dtype=dtype, generator=torch.Generator("cuda").manual_seed(123)) C = ctx.zeros((M, N), dtype=dtype) workspace = all_gather_matmul_hbm_buffer_preamble( From 7f163a02156477b721461807fd18dc49ed607b2e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:21:59 +0000 Subject: [PATCH 59/60] Add bar chart: iris vs RCCL vs expected for tuned shapes at ws=8 (MI300X) Agent-Logs-Url: https://github.com/ROCm/iris/sessions/306af9bb-b705-4c03-9d49-a31600e6df43 Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .../benchmark-results/bar_chart_mi300x_ws8.png | Bin 0 -> 108480 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/benchmark-results/bar_chart_mi300x_ws8.png diff --git a/docs/benchmark-results/bar_chart_mi300x_ws8.png b/docs/benchmark-results/bar_chart_mi300x_ws8.png new file mode 100644 index 0000000000000000000000000000000000000000..eb3e89e4d90262f75dd9b1ec67f84e011f42a103 GIT binary patch literal 108480 zcmeFZhd-8o|37>nG-)WKl88`dWRIkXY^m&wtc+12T(|I1}@qWKwujhIlw>8z3wrphCND#!9Q_70k1VPFi?T=x~HzNRVIr zFq^fjtCNeQfPnq~yn^4+*-GG@fm;pUWrLIQc^86U+)n$h(EpC7F}rao7yp#S|@-NfW{ zko|vt>ZxctZvXGUPWw|_`JaDdXO}T8kNw|&RHpy`fAv3iwG*^jj zb9!onw`vssnI~D6)seQ9^K{g`|GhJm4~KwcAE|zIuya9Lle%W_I_z@%|Dz8lp83V)7kVCFx_&AmQ3IXUc$ zg>^^%D|P#ZzKUD)pQFVDHgDeSH1+%2rKY62TPdTD+AEZJ&fpEXGqPf1V}C}u{i)mU z`lpU!VQS!3YAVlU-y-?>hCSc%vfs`K7V4{egA) zF||HM%{R)*%5+1^%KRdSzen};SsXUV+bn9^=pP%q7hjDn{7d)XV5;uu>`42c?+LUU z8Tk(znOh1o2JQCy@(dZf-6vDH(588O$W^#CC6w#PzoMjg2Z( zIo975>?Ibaf0lTqonD%sNT_<hChcN3>EiVEpC>? zD-TM`$cQ#yu(9Ep9qmlJy!t&sN!4=hZ)?IULucpLFSL__3AXf%jLNBj+T6TD92GaW z>>xhOb@|0*cnS-9Oj_i&S2j26IaUXShBEGzbglE0jyQByz1Vj}QfG$O!DYb?E6P&c z-rjC?)$qxqM?p!dQMTzEyu2#n;^OOx9xU`@TSteKtDRS8|DMkq8tN>1OR#O+$(tCJ zH#pS##!7PUrr_e@;+(u(&F2b|Mh!#|%@&rRM~|3tXS_#;U-K_7FDH89|6Uu0mbwl% ztBts)Yd*iO%OxnNdG6e~7knurZE8bUTjmnH%ZFR^`=(zTE{N`z%KB>@CHwZPv6rh> zp{osr{PN;(R=pQaOp8bKmfc5BCnhJ`8hyl_JMqe_()DF+XQ$B~>8qONMmQPUGcz+K zoBFYxCM8~8js3c%g`1tdYR@c6X}sgjyLYz`rVYtz`qk7lG(Eq+MGFcF`agVlCNk<^ z_!mulSO}{O3wiP>DJjQ?nhpg926k6HJoHHXGCOYcHfHAA?jvpcxw)0?WrT!;teexB zpUQaAaBy&ZeImL^-)V_^PP;e_D zX!c=!N6{7fv7aRst1C-d>F0z#R8~H>Yst*_T`6pn{QdQD05&28;k!Jcq^PL)g72;A zJ4(Y6&ohffrthvlZoQ=PRNCWuQ`*^z@^bksng04ackWd5RNkQ=a_w3^&J4GZ*NIiY zLhCzL54L3Ob07P;g&3cmy;)W!_x}C+@xCuR4(sQvkBp4W_gOmnMAYskNPT3!o070#8qr`B&}hEZBtnjoxdVvf~5m6>^+j=Pc;BR6?=ZnQJW`Pi{zEY&-C zb(IC|7xbllmzl1N{UqxHzEWePR);ZqZ@%XYZ?al6jgF2EQJz({8jKQXZTRuEWp&+a zOI7lBXlQ82nl$^jHQ-a|J`Q$vN^+gC`GhP!Z#`#V&^y?Wa<0fdqpk4c-QBlvYI{f8 z-wKL~R))&025~6e>&8JrFDgeRe`Qt6Iz8ICn~RHU{pQUd;`~-0uWUJ?s92z+uC$Ea zE_<+&Rbgdmj*$qAj*eD7b?Uo#?dSU()Wr7}S{$iLR(hWK!#&FjQ|K2PhBZc{qrh z^1iWAm)MGpQkyMy{v;aQK_Un@;bPj;kag?UdCxwL#|^z7sa59db3Y;?QTz9Y+dJxK zjj~Ozsn5uI&uc!4j^4Cm$BxVBI7^iDr}#+G_8BOxIHT}Rh|UszbX ztxM_Np6{t8)0W;ftfR)p#w^1@!NJ=d92_Y9(dBx+MT_xT%Fel+XDmp)wPfPA!kOET z+sIP=#cMv#rs4OU+>x@=Q7#9hr41RlHRXA)jI`y}wsBSd94jbcEIRtQWY)ss-F0e8 zf3g_XF9mmnir8n#tu5>DhRgZ+=335%#S;M{P+pPN3aLSC;L@nWPSAQ zA15XrNKLtb(<|o{ZQJznhOlK7;oo@3P)kK+qh6-r27>$i>jN~KnVxAU8x}Zg(=FJy z=k3S6NcuY2Urj-{ySqP>WD`-Gnav5VSc@OV6rn zYu8okb3ZJMP*qh;H!9jC{P=Wlp;aGt$B#Vw-Nc^^tLI!36BG4aaU51s#HG%{GmGsl zEsQ5mp5(P;IeICGOYI4v+-}7lVBD*)etN8H-#$6t4_HUCqQ$etNpoDBtNA zi|(2B?ubd7FJHdQ55)OJB$r&BpcHceXUMT>p!xm#xBf8k%#DyWF)da+hcZf<13!9I(ivl2s z4y|PHWvJstMBGTKOs&aIV6~9c0!lQJJlG^mrrF4-x%aZ}gzRUC5 zR3i_+yVB)zaEq~>qhly8{h3We@;Ts2#lGxCB?gqcsDJ}tZ_NBZ#L(=6TYRS`U%HVtT+ z-!qM*Y`X1fX=#B!_qq(!oN#r``MAJ9%S`Q$x<{@K_nDUr>FMdfmWhon&iiD&%TYOh z_4jk`Rb80uKb~vbBzU<;aqx{=bb?YS7JnCsvrnt3X}!?gznMiux$S9NvDch>;b1$^ z-{$HxotL^igJaW6b|^sr@>F4wtZEZ)1ULq(S0}3l>R*la_EIr2GR6+w|6-v%u!)_0 z(eWQAE9IH|dEaaH@3GUTL#kXo1ZRPZTbLd-r+Cec{sICa*!pGl5`ReL=w;}$45w-( z&}`mZFWa1DRFo{DHnG+=#I2neG~Fxcvp1N*E33?BX&0{dBlcEOcSuwe zmPy%3AlI6yQm;9>hlkF7d?aMA^?bOE{;7ES^ky@2^KDzV`aeH@EB;sjjfz;UbCJJ45y7oR ziT5GVj>7TdXkmfb*@7`RAU=!J$5s|+nq;>mDf_Aoqq5)ID|It0Y%37uPzT+D?-qLc zPk6JO$cJ5*gaCftI&>z^xl~BRUoDB0a}zNwCvbQVaB^M%8Pgx=dTqIB`;Hy^!@rDI z(92n*@u47FcNU7a<=Njs>H3B4hEAwxVQ~O(QqZ!B0moi@*y@&{ygX4AB@k-jyFfiR z)}>@<7^Cv|_$|73ot=Ak98~`R#E@h@h|`?!+`kcLGVa1w@ArWlNVrM2Pc{U`;oF`mPnV+1tj*eFG)yuJQJt`U#HbQ`DNjAZ822`|kU$s{ThhJxB z%ZIYcygtLYZJSRgDH6mxs^F~t8X5|opTGLIEmze>|J=Dp3=Enr=9(t5K8t|>J*p-2 zI@;Q%j*eM*1MTF_E6d#yWaHq#PhH)xcC{&&j?k{13GpTHY3AeI?}&Oz&TmgdD^SW! zFI>3hHr#A^=;7wDyt{oR{dSEnp68~AhK7n{Tg?nLUB*%u`>nB(-vyM4gvnTum$6sC zu}C0@DxWlm@0>0oZiMzoL2O|WDNj5VP7u3~UJBDT{LE)ueD0;yulF}pO4zThQD?|` z&p#7A6VhXvgFV@F&Y|wvG4g%U4&B+RuyGIgcD(1z5KHxw=;#D+GvZlp)R^CQ z9F9-;RPEM3|VPu`XbJg$FjRwCA*SM}Z41xv-r@SGMiibrZ-| zS*D;iMuH>hT>Zg??#E?ot3}hsi`RrV?c_bM!ne=A5XC{oQZLJx@0;J;sF8?u9YLO* z+uJ#_9XcgRSbG2d{d})Eez05$VAmKKuU)ZHSDHQf^vir@jynDXkMZmVCqSjDz)=wN zo_C6Eu5E8Ouzi#FEN82N%D(3G^G85aR2!VJy&sun$X}x*>YqB>G``qInBrdR4@7Cq z&Cb>cmMEX}oc+s_^!Hhf3bic1(^QxsGdqyLWTxtbe4P6|iBZprBxh`i@0PHM~W) zw=uF6y_R4zG&Dp#Nu2i8qp8vz0L2I!8brCc*$?*2Ppk)a%b+sx5e%!IwagpP#JsAb?fs*1rVcCIZ?g}%^A zxJj@I+Xm0Cu@SA@O`JR8+==5XZ6U0_fAa?Y0`^S)M zPF1((+C|mp2)Ny|E;K%=q;v<$gK9s&N$FuV@?WM`p=&kOWX-phnSZ*sxjoUoPPcL; zd9cuX`j3{)c&&P@Wa{a&dn_8?y*sv!mNqTppscKMet!OX;(crD`Q9rG`vco3%6Eo7 zXFhWAMXbhZ0foHu)p2?I$2hMxrVeiH=c113Tp8_1ow({d{pY?2Us1rDIPuF0FYQpw zT3@QJr+cEJIGt-w55aZu_vd{sa#u$801a}L2xu6v)t*}6)P1SD0sOAzN1BSz-D{5@ zKW;AUOWjqUB3q}lcj(;t^E@&tqobplF14RRPzsb54GglDp$(`kXfp~JrytEw0r(Me z{i9iuQ(afL6>Q*EKmZk%r>?-oiY$*f+9$QOw|8`OWc#jo=1#f)sXMmlg&Jw=!o9Sx z@CyXBeyu5~0IEqX zvwatD-n@D4wZ*B(xIbYjarft*tQdOx_*}5JkNq{=<+s+9T8buq*3uYECFfG>cAOW5 zJG=Otmggre>9x=pWo2bsZNt6GI3XW=y0_Q1`m^{A2p4-~JQ=!t=3*nH z%Cr(rs-$-5zje5a??PGEQEjwZnvh@~ITc#{CC(x{JG&fIy)n|JK5>KOmC?<_j}k92 zfM(HVF&y-yiCql#U8P1WYZkb{^`gdm-KhMlcY;Wo|6DBfSyJ({W6A0R!&X*Smj7Ao zam2fgPga%{z&z;g-AbG&r9(l*z;J2e$6n6T+z)`3y~1MeIbM$HGns zXqy2Ft(#sx{HQB&e&+YL-6Sajbb%IDjst&R&Tn<|fR3S6rO%wnz~JEH40%(?+XS&` z*WvPORJ6hm*JQ+su1>I%>TGK9fW&2^NS$KlNMoo{-TnR8Y|mn&zhsr+lJC>lH%~uE zDSnVLs+|IW)>`w}BwletWBU0W{);)n_)P898GfU}7l}6)vXFSI6F5 z{FX?en5LKU^1;ckKYpCs$g(!mV#52$%g}Jo>hhEZ@ejqw)ZAR22co#I!s>hiC5f6y zOib(s%x9_2z0|7Rn}eQ9C>Om#)9SKHDj@3HUh=S+V8ahOdN|3+ct}1522NX%*40fu z%{)l+46TXxY-Y&$Hy8Iqw?|W!f)f1TLJAdbq|XJ?U@1K{$*KX3@8(v8p%a7s{YepB zIwmNTrk0k}(Eg#DK7({eLf*@R^_)N2+Ir?EdTpn69HJ{+nCMl?s1DOO8>3|6mKX&M z8at33I4&r(|RUfsaw@$wBv(ZVA} ze#7-Z7=br$#-oEfIXf#a0YcC^ph+0=SfB5vtE=C~X8dHCRNvU`0?3aiR1zBSyYisM9 z8b=Nr73%V)opKL#lvwaBaqbVUf5a}|CaT8zA~H&6I;m!x+nzmpNG6_cQpSp_^A8UX zS7Bc0PQ|&^b}|~Rm0OFDrKjD@6qb>vc;dvVUatr~13kM|=l&{?6t97_1p75v&uQhC zss{A4A)m{3zeo^gDlD8j!ozclg^^QAgBEPS3{*4!a_`M5k;>Z45J?#e#sBlfbGePQ z?L-T^x;V{4irf_KjVs-2F)C5~ceinzBnZe(p$9dda;u$Pz80s|4dMI>q3abmoN#Y? z*Wq*i&{HUsRr}fZO^-q8oF7by%c)7*WaQOWO(n|Q$@baWQ;(bOfUUK45Kx?jIP^vj zk|m{|e?tS&?p8tq2Y|w_-@kAAt}K!iQblH_v!f%i44Pb0qd3acce~&lFJ5qq*wn9s z+(-CBSQFI=P~@e_ITCGbIozBfnI!e4W*A%Gv2A!J1J%j%TEI<^n0G?Bx&HVub9{Wf z6V$!aPoX3qKl$f9 zE6i-6->;bLIpSw?e#0c^fdketk)6E;nc(T#W24wbJZj-{o^#{lnkp^qb4fM#QLj#E zJg@xtksT}$DzmunviRccNSe@}zoVn~U@8!rYKPvqkLVLNRDMxBI$Q=LI>qyi z(|cU)#maL>y3ovtI?oCN#zLCmrjatZ1#mJpLdK{++WZ@5pngj^S_=y1iA6&?dU_>@ zz!5Z@wEid;6+eD3$$t!}v{DojHFY0mXOGX?M@5Q%YR;~%5v(b!@PvMr`O!m)D@plN zcf-uMep)7*!RulQ{l`P3NA$DQXA=RzK{s-UsIPzjBJKEhEV+0{hwH*N*;2N#tC+ zCP1$M%JS>YbYM+Tp6O}8`^2(-xt~o1-V3i8K4``qJmLm8sd{W_+U;{vV&XHvV*mx% zMLSpLd+BZ3fIPbY{@uKP-QwI>+=WFJ`jwHv67cagrc*~Dfxdq{1zqmLPd=4Va-N9z}vY?o= zn;UhQdx#}hmY;iOHF$Qz)y>wM5<5%0&f1ks4gx=@Tsv2}{JSml3TS0>>Pjo;K2!Jr zE%VcZ4UFfLGy9fiphJ#NO;Lr-x~kgR@}kGNdMa?geaZoW_o3{YzLJjqHGKnv1o+mi z#y2xEGDP%l_NbkAW1!tzJCvIRw^Z9mLMShAD|eeBuh%=VWDK6tqG6(^cv_pP*qg}{_>s6CT?up`5;+BLgE6v zaH4*)Og;Ljnz`4J_(ut|pq=m;T$0PsQqe~0&N)ESKkuOGsgr#A9lkw9V$x<(sb@Gm zIWtoUm*xI7&8q;#lRLQ4|-lM>MxUGD@5bpxN@^Cq(?*s@Jl<%R#_nr>JUc zp2|q+49YQlnY&+7Qev#r>0NDTWR!H7Qd?U)F{aAo&e<&EQ+`&zPhM0vj3TYw%lg*V z+%+*0R0P&H69TaswfvHr%z|1@Z3fCD%F739gOnIh?j4{w#Jq_<^FnSWu+{lOhrURc z4**Ln0f;j21>Lz5uHElCSWgX`&-47f<#{RJ<^1O9W1xgY*sPCyyJ=6Gh;8F~d3{F@ zkKh*qJZx-)7GEB0cC_YJF>!~) zsvZ1~O&t|>Y5Qk+F5a0L*h%Ykl-rbAsl#k;)+F6t`_wxfitMupb%PTNL%^`N9 z>P2{Xdh)7?iHJnhd%;`O;8Z#sHXGmw51a6J=qx-{(#^1CpX?rDD>F0C%D67W_U+dp z3s4fJOQTXr$66R#tYBW_eIC%UN}o$Qjp~wUy7GWi)v3Q~XYO$9eW*zlU%zf961(#x zWjv?Dd^_E`yE?SbCe3qAUqwp2at1k)}){dEMB^wsRfXnR~4%BeUsG{Be8(=&e_ zEydkBgpJzK`qbu)o0 zfVw0Yk#8Gtf_O-Bn~WLwn5N*%ym8Ugn|6Ib2!u%Nlt*GsKH3I1CW80O#v@iWA8^_xpfdx$?LTl}9o)!wU}+Pb zSGsj8)AZ6l)H=8ca$FXE6-k8C2_sY#E05^hCo6 z5KkrtNyqvz1{Bu=0}Uh0Eg+09656M{A)$aRZ==1X(z>+>o;ytH)k59UG_qB zXXEVbEZo>*Wxgw65fKrC_ExsG!RSMj{t=^!dm;LhI7wc=?k(Do_~o8;q{WW#0C|f5 zhym_RlH0?Km6g{+cck9G4o*&5PAt5MY*aI{)k5jjeiAXwb@~yx^)JArgzPN=r_~=I z(iDRA`1JILSLUBgEi3}>?iL$L`Zfe~{|u#9+;f_nF#S{a93YGmVz_8&4z(^>eITz< z5RkHDRioGLKm`8<92LoDUNbI_KV?rYDt`A z=iu1J#1w=UMVf!wt?3<~7hVIJ7R%@ao&yJt6?dJMAeB(tP)84W zQ3M@eK**b#vXNf{hGzMvPqfegbXg33xjgt{@dIodiHeEQPAT0~ZE9GyM=}iyu0!a9 zUWcWj{gG?ONJF<_@L(N-RXa%~)cu<%GgSVrUn2xe zj7Q|N&o|}R=nQg=A%Ihd18G=%6;yW{8Crmpdj|;&`Slb~sRy|{^<=gf$9Fp-2^Cm2L72+ersn9wwsHeXcUd@PJT8y@l!@Rs zVnOq+#7oBEKP#R**@K?+11dfMMJU8uRWT-!0*Jg|$cZy<2xgRAI>gRizvCF7>ApPZ zh!pa~R_K~vrkBy@wiAj_XByR`T7W$rAKE~&j!d@mD#*_dLZU^K*Fe<+KO4s4b15W)Yj=)KGoCVLD{Y?v?(A2~QOG{2b z6p=_(b@g2`UbBt)#x*rHJwPG#Yi}>L#%te-i;ENKtgY3s^i)v^w*Ha*oM8L+RdO;;w(f znuk?Sell2`sri#%gg_2A0Hpeo4Hl1g%4)7HLk&pcVPsd9iQHXFmtKkZZ-{ZEHf91a zSQXM$j%^vbcLP26`^z(jGTJ%NPubbn_HmEfv8X82>6^nE5WDHT@^=7DaRElXhgX4CMz8xqlY*b0U-0PBn@LHWh}VTc z_#pS=)%o$~DnDnYRU){rLDoQnqka1HDGYD&btIjIv)@w_E5$r3>;lUWU-A`{KBRuA zP!a`?961hbCWy>J2hu0@8WsP%B3(O#B0z&J@<&O(4QVS10?e-xRqja4!4d$13LIjf z4E?EXyIEMwV5N~+!SCp7KU!Pgp=kryRifUL<;h0y7?J(z)fZO-@Z4Ll!YP$^$7l$- z-HE@?p%f3KPyFAWj7kHVbpX2Oa}a&(teZ${qD%#lh9R^)l&eyawHAe^w+is~PA)Fh zznZ;eNc4z;W6YA#{qyhWBMKh9K6H1KJ(yV<_uh?5*5fRe7KrrHFobTcW zFkdDy2Sar+)q_xQ^07<3y}dO{y*;4!zN@O*h-<4P14R(HZN44{V84kyKzQs7HmhAho@4>XcUK!>%k2|rizNxJV|ca*vJ5-fwa!y_Ht=F4IwcUTDnP& z{b0kV>7gc7^VY19JvQxm86vCx(4N()-d&8u^3m-P_eTx*r`q7RaXbG0{Y!r2{QP`3 z{F@j0xi5_)Tadhx_E}^=*NrF@BfVQFGB5NpSnyHlb14VkU+Va%g66;tJ4~@w*lFzC z+}zbOpQqkEdHneKrPkMrwm-!jao|X?7)K{}cAA5IF$7*zVTY2l9`?sE5Gkl-nR%xX6#^X;X}*lTg|f&%klhMU!24dZuS_j=>Z0YCjNL*HvK-n@SsW33mhSjh`kB3C1u%1LtF%F#5Kr31lZhhbUL*5%h0HywbHr`H68;nMS&KwZ_C+pE?_MgMTQL# zH_~tbRB$Pj!0l&;jsoyRA{+mlT2fVB%oDmLg~saB1k8aGlaU*O4d*`EaT^%J_qiS! zf^9`t`2N<>Bxw#%{1*29)a`vF>Wk4*7xPCTj$7*R;lpHBa*vezW;8LXfGv?f zWWaI!IR2{w#`Kd0HBRt$CgivB3kpEB=pG%lX9*a=o&7*YwEpL3v3$?}`JbNU{^T|O z_vb#V;J;h+-&ZHnZYHZ_fA)tDie$V>?TN^>w6sG|CQNN?Zlm`mpxNM#RS$CcHY~5K zz~exY1Zw*7i6}mk(Gcb%rGK2u&cFwR-y)-#?{wkOlP4>7G?IrJ&i7!n1NWf6vOY$RtOsnwH36DBUnbWQ{_kN|Zn6VnTqXaL+$EXDh#rk!wP zQSFWo)Wlv!iZ^z+j;x}vC*Gk5qq$Q)TbAHJfB{jCi{~`ZEb&dr=woif&f!A_d51xU za+_OHH1;9*fXX(Q14(k)|1?(xEOl4jxNzCIx%7}4TI`A0e>XTH0OrySz;ko50^o4K z9|mX+M7n^qt$|BV0MLHN8y+}#&@azjFk7{&5Uq!lh{T$Anfh`UkIKT$!1c*Z(PEEFRzeICEBTr85VAFqb_du$AD5QZgv6;GQ|5B#<#{GGYH9JY2|aNEgRK&s+vA{2j6p z6a=)!tP_#5p}FD)bY#kou&)_V(~C0aZ13A%*??>&3^q_0W!dh_P{{-*m;ysbA;-* z^#|coHxiWoOd{4{ZNdm>Fd|t%=?}+)frp1DE{Si_9ai1So5nTKsi~>(yjsNPO8>kr zFe!~e#&Hut_CbkGhQ-au;v`Pl0EtO-##{>DhaSCXr!K~+io1|ln@48b_!6#;j5KFN zS4{)Sk@;*2d5mosBKMOT;Q_=10t&E=8*%~`e*HA&8MIQ=_fp9JXm7t06tsRN{Ta<< zZykOYkX~i%P^KN)9l$g0op)(%b^jP9aZb7xGz; z-`u>7P=rr-2sSHp1;~0^Npjmh#;gVc0Z#OtqSWSE85XMxRJ#!!Ghzb5e&Zv82 zWDAC09-+TOuLPRg1MB#R;VrD;*QYYOMeUjc!^3x=Cv+nog?erZX3ks_hiZnP2YCWW zpq!Dx$9v{wAbZR?1fF;KzsMOFL=nEhD@3c%30}ksNJ_Gh5l?`wU)|jV%*ejlIPy}eoyw|dZ$*sV zffDi@uc$xz4Mr-YuzRS%4?X(ZwMuJ=a)h32_AMHSQL3c!nMXO(src{vxIX zNb!nTlTv30-Fu>SphvU zX_Lh1xQ$cwGQ%7ksNU07b>g+pl6z`Q9$P{2DlA)@!@YfdfkNgV34#RkXl8bY)6yXE zQ4(;R?qF2GB8}gzmllQT9o8RaFlw%@lnKVq673Utd^B*6$abEpbMT=Jxt{noWI7PT zH|0PK)EgP(VeAWHj$x3?KVD!)=1{`aendBMsE?DOz@8pTSc(unNoF;k1DoDaa&b1xV+l=LJk#x!~EptSn;_7o{^a55a!1Zn<-nX^sk0ks8 zr3z!+QiHmstfqE`N2kQojY8gawB!8X3f6A}(%d$_YFSt$@60A0a$3u#00xP!0T=S~ z0Dq{Oh!J}_21G@jz1glP6nlI5{uHXIO~zS+0syvCZdnFz)uDNx=zR@%zzggl!ao3s z^L?aw;Xf8BDvP)jxzb#J< zffL>ns&%{bCw4D_P;SPUuyIP2MA zZ|IfvKV&_ILhzY)K-kkRU8%Ic{QE2O-FB^pYDhEdpLYVn!!!IEPS62HERSS(~pKf3XmLW#4`@*Iu(D^Zd4b z`vj6Cd5j8ks*pO)Qk~IwB67;k2*Z6_2}MA!h@YWBL19CG+H%vHCHvxz2M=VEdMpgT zo_t~20+VAeP4-DJsJ z5zk63*n31WAB3phT(!=bv!kj$we{|@$f~K>+{q~s$t8y{-lKE(dP9@>8Y7`IQoOia z3gGs*f@Bl@EC@k8D4ma!^E0+-L%=n#prC-Ek$AS9E#%(4NZ)kK0O)(ladMtu;5R%+ z*epok*zu4%3!SC1=*k$m#OF%A3&`Lfgjijhv#`IAQ2o+;$mi#|f{Wjt@T_o`x(!Dn zfwU7%eg~AY?I5|p8>QY0A23lko&B&WE85JI!>4`6_U(z2i*)&nnAgIfV-PAu5Jtkb zv9j`$yc;tpY!8tROlh;k+?C2k?=z{|ClFB8A-z%Xpdg&HAP6g=!wdLVnTA2^_hrN50%KsJ&IoI1O2S~149wl7^{7!RM*B(~yUpOswQ`S!MS7$Pvn z5HAj3dvL8T4D8#seLFiDl5=fP6cWev+D%{^21A{m82@z~U6{kDmK@%MbabXInW_Bp zl`D+6R5KjN9*A?~XCs_qjDCiSg)E*KvZR#$e;QN6p%cZgr>AE`=h=aX_`{(EWQE** z9t}oPCJ+Lot_BKkCG_t~9HyixOZ2>WxC&VQ82)TDHWbngxjJDozF z@6V6Z=2l11?&|F9#Kg%CNbL-(tE=Q4C0BbHBMR?9btO;k3@yW~-?kk)I1sBo%r^jz zZJSlgOx>}vvXW#jf&xc=7Rh&daDRHhs^U>#$%!#?0E>7@G56PZ*M|G{;YC%tfq+N0YRBBKTc1+%?+_b0~Y- ztx(;$)$ZV+05L?!-a%$SKuuLyIZQT1G`89qi^E8eqcvxelj!p|?f-yqL(&}Pk1~Zd zE>ex+VBY^3jY&Bb4P5m=cwmJ2-aK(5BbQVrNi zQZ1c^hQ=$q7XArLQ3C7nV4rZpEcgITKoJSPro1^-Z?dSIhSmJ!s8hE**~ca@1ql|j zpVZ)2SC<^glaae*0YF(_44aP>LCJeg?i>tCoMow|_*W*XmKPWQRi97)sXj<&Y1ft^ zLU2eee2vM?#U-4QX3*<3-c@t)`#7d42Umb@PK{YvW=k7+UbC8hF!$`Vz|o_60Ly30 zYxy9~2_XueQF3$USZQ$x%|PRo^1c2l{_5(iK}X;^qR^@|plBvO(*yk!ndL@aCMhRo zTIS?Vzt5!8IfHk=;(A!NCuHe>+`kCsAohN*DHzAu6VA(f%`pBcPq-;~jUcx?s@R`2}r-rls_ON&13fHxG0Gw;dB zwaNlEDv#;LMCyu$@Uh+h8Ip`bSkWx?D{L0s1ct7j4(Uy`s4hM@IB zfhAPm^+3v2nHp(*qckj>BlGwffA=b~f`d0-6tekXqbe+qoqU#*lvE{pBFeug&v-GI zYDb*Z6?K~@%U1`%QnPb&&v@P4C*_`w;cVd6=~a|d&`o%JptK@+YT*JCGOFzhZ1jJu z+~r461#H$}T)(*FxIb`y{8xZ0)d$QCSuXwQUD~y6o8=alf7A@W9yC_EL`OIcHS#3c zOsn49i&LH+7`rLE?PXOtgTBESX>+5J|qAVQFM!B>G$Ese}s`>dhvC zgg5mfK7R!kJYo!Levz4wf`kTx5>HX0{QY}ML?mKgWQ^-26oDN?VQ%i~~K7 zGHCYoZWWj)#Ie+o@3rqr~fN<4xF( zFC(Ims(fWe4Rw1N)dH;7~(9EMZuy**0!M(I(C$`ZCQ6L|1AwM=w1i9r&n@qK#-x8^rG={o}s8eg^-zqAOFAB*Z91jx3r(-`kH+M_f zi}!G~KqbWzLo5bNAbh@qL|6x(i}ZWRA@!M|oun8IiKPefjshUD6Iko%+$Fm@;FG%z zDv^hIR`zb+9xtNC%9N@3oDydI*4?|)W_L0&-U6p12#7=xL-Tl+0!1P<0eC8!mjLJC zu>lV?j4y3d2cV4lR}ts29Q90z&3UnXq|FUQ=Q*0S)8)(8z|QNR-A8<0jIiFDvxPvZ zqlT@N+W?6PBPnC_{oGB8?tD zK?Rjttla|Gsw9v~{udp}9DV}p?s)I4tCLe$-i_obHmSarP4ZslIpi_;7BKyK={y6F zflrMkLykRQ^hcjF@K%#%UqT^Hbbc&zv3?VhlgJbP*VOOE0E&NZuINp=y}^(}e!(f! zAE>o9)ia`TXj0k>8SB1-9vO~+qCgquZq(q)q3nR32l96UUK?hS2RBD?&S7{?dFdBo zh$n&0YNlT5U`82ZHdOL$ci#=v##!0=+KYK_-m(SLo7D0Wk61xDIxlH1)1{o_VrN%i zA@pU9)O|-nZ{H>baA8rwqhV8BZ~8_s)=Z8DU@6Tw-A!{k4qeOg`%bBOQ|i zwV}92y$~eHpb;#fx+GOb=*z#A4dkM{7bb0Dl9_UXLqpF#afka$lmilw_R0+B*nLJ8 z7T$0BcJAbyxme9TfB$iJkR(c2z_n`>K-QZb9UTz|dxkwq9yN!K{O3A*&v&ZJ>yI@; z1Tw?O2F#2K)HxAWC3t0ZZFEL&&zPCTHy)w?yt+#15BUlhj{#8Mx*>TBo^nHa05JT> zhk&%wDO%nvW-OHq{-lSzXl}DkrlG{(7*eWt`*TM8JczSV!v2fqiqUYVk!72;A@dKX z{Duqpa0j;T-J6jYlION=^$Nd*`(C%_`TW7}Qm3(+O|wk2uh-7I!moo4UtU>xVtC6w z`MKU>QO>QMPmxD}5PTi#8tF|_P|_t*NzyeD|EBp4OCPk1} zN|~7gl~3oLp_5}MR~Y?D5UPJ}bzW|6C1&v`2vG1U!TwJzCv~z$rt9Z6)^ZD`-!sd4 z4JJM4X#Z}fQLp}Xam1}{VIIvR^HSBDfHWDEwtgA*RIXRT`pv$FN|2))7}7I47GDz*lg=8 zN)oSHlpj6uO!V2t#3VZkQhZFZk9#OdD3{!5dWGi!-MV#adJS;#(27Ycd1H!#GY2gg zFduNHaK9)#-%GTCc()L2U_x27le299lWiUXE88)KkK7Z6pUV6&2&JO2$ep}yoj)EW zfpb6&097A;dQuokljDfqpBd^ba5)&sZxoE9tg!wXxKS@UlM*=OeQcZ@`}SR!@YZ`Q zAm&}mdoG&|r~leM-+2neQ}N`VPjCr@Tz;P>XGoC&!Tyh|@_lGRd!*2vH}d&ME-6%f>R|zGkRb^7qn`!z(O~3hh_KjY zXAut@tM@1+Eot80?5ZBt?o&c#A0qq7MOT`v@OY|z&-a|!8hh(4xG-enuLZzsH z!3sX)Do6@WQ>WZ#4@+eUGRk-kDglN z{5%)BRl#m33JGcz)l^NWEX@naZl3rv z^7;%p_NcxDN&PxGA*8%JMO`9J`tDM+QXM zq5XlbKHoCpIN(CFlP)xO%l)-(l~bqQp&KIYxt@xODruh`KrCrU zjSn^)B(=h-Bk9$lxzpfr`&aynuZ%sdnywYKep$?DgiK#EkteMo6E=H%$|X$BHb$#f z4bc?BCUl^BN;d_M7}($Mv0~UqwSvd5C~S=^S;UZWNDJc>*CDh>ANj8f=Oxp4b3(YXONz^x= z5gNOyj;uVte~!7T!7~dS38KEH3=*K$(CV_F9`j~R5t zA9!K}8A?MTAPpau|#ivR4Kn-1Y&}urAE$ed! zhS3~7J3v0cN$ujovs@JUsef}8hAE1-w`*s0;hK-Hr+}vQ@K{2h+|b3Em<{q%p}E*b zHd|h7+@HvLyUqFSP;~KG9at69$0bY1sp%)FsWy=g+LX~0?wg{@3DZ$fE(1i4)>_3Ju*B^}>ppC2^ zT@A+L&u~z4Y7&?%)*cMJl7vc56V_bhjH75DfJQS*Wv!f(5a1vn?^$y~xn@$3G?vZa3N^&Urh^OKJ^Wb?HvqMjNm@#DF zdVC=z&R4r&;StI;Ty2d0_|E?Me%#&*InNN}0L%Y7|Bav_=0sk3OkKq4{$Iqsd05Wt z+xC57Wz1r&Wgaqx3YjHC=Akkqk)g~<5vdGCibWYRrG%2HBB@ZJQlVu`MT1sFnbL%4 zLZs( F`mxwq$j-gn#HKi;-`u&A!=^&#J>|KkBKibD?+nNX zsKJzQZggpD2Zl#W)YT7d%KGDP|$ zBY1$C?hiI=;s7Nl(ap`xT6EZb7pll?GaH%IC}tDl*8u}PjtXnW$Q+_S z=@l{-@r02f69^P(BV;f4qX}3QoB&^fp38*>Zz-Kw!^VxrZrmvHCy~f2cNl6=l=l=% z33W5f5a0eiqiFHdh?U0D9AbVV5_AdP?FL(RMuJYkkQNxr<64C6oCsdRgISSXXlHJr zr5T+y^=SE+pttWskuY70jy?n2D094Z>(vw84S)Ic+S+;|$w5B~elf9S>dDvJq4-au zol`h70Dh8n6UC}L6}&#h%@hIP*5-BTQ&OHEj7+CI@y|KTqKrlgVvUKzbxxjGjxf<9 zhM2^or7VzMnIO>SjMuDR)D?jfM1*2;^v9EZDrQWZe`Qr$ES*pw zN(B$%s>G)n15PO@Rn)rTmtcX02YSa`*wV^0c5_?)aVvJ4a5LX0JQ+M3n};2Aqf5ao zhgv^ZhzBBC2soY>JaHK|rv2{ecxCGGVY93>q#>4R1cfu{_cV>^wROdyWL(iI?r*3@@GcL@!%x>&M5dzd9`X@-%J8Y~ONX;w`yerRrQC=}J2$CV? z(}jl`GXs>b$HbtzZj4g)IiLe%(p@&5th`&h!HpGSFy`NLGoQ-ePFt22*@&>U=*84yhng$elG95r$)#dwMfqr(-5fdju$c@Mgx>t)k(non~E35LDPB{&2=>_p`NZFGLE zicl?*Uu%dvsg7rMhrc_opDm0M(<`&yWP~>xFmp9PQai7e?4m7S-)SBHlz}Hw7tWF2 zrC8djohURdSDDX-8f+gPgXl&BwB*9N`4)!gTI!6(jdA|cwzTneMsKvYH=DB%>$F-r zz7k~Km?b`UeBsCINvoqJ0^;$y%~A{eqw?qb4!gBFI_urF*v3bbYjA&>T;Z1jt#Ony{>x4qj$$n{r#&| z#vrS^(N|v0%%~WIYFS3OqjZRBrx(Y&v@1sK*ot1)`^hk-2FJ4$gP))U_=_Btmb8fg zM)vHYXvmJI2{>)oqksPyV!)zk4)FRCjA3lUE~J4SK|w)^&W!DyirBmRZa&d9IARra zuSZty=?P6D@?O^tWkkdhOw`lOEi3{DRnutHbw+gc8~n$-V)sK%0rx)5!0JZtD6$^4 zJ{v$D+dlEP>Lf#z@4i)i_U%5Z^&r#z5yzWF`82`);WzMy+4Db9ilIKg^km(M{!3l5 z>?^)FqLLfnI#NndW5c&i%|P4o#?O9@$V#g2nvAsJ=Rx3^#b^%4vxgr)gTE}(27fY{ zE_sZEEt3>OH-EmEv#E`_AL&{GlZ^NfMQ(wV0QBGuGMcUN7+IYNNP&h9IcS^}_cDbu zHn0p~dHb)I<`NcvB6$Ig*ZC`FPnuApN_ZFi5vinjKEqc&y0mnXs3mE(<{ZvCASN1O z9C;DtoH&F0oUP#PQEQ4d!RODPiQ|)t5<~9g zqP_ss&@AZYlpuWnjE)}8x$z$}VSf0Et;iN%(1KZRN-vBTyENc3!H;;h{rdOc>h0a0 zkL&{1X|Cb##+w!IVmbwxMxv6EK?A~~D}$WkV+%641-Lg z(W7gaPGTN-20k0%D>HtDkMuOWr*?i`ga*u6LppjkXH$qbnMOf!EuavA;U(50hSSrY zu7XDv+cR-L{ypCS&VJO1le#*Vxa#(+teo7}*j0R-i{9qmgM#pOUSYe*Z|rBl%PcWC6|! z=UJjQ<1ome2dlB$V}XI`{L5cQ_8K0tFm9jl3>;H1Krzs!iowEqCrUFB+ERUb_zs#< zLbMUnG8xX`ml;|>MyQsfwT*&*}xcw-eD9c^|9i+@@ z=<)UbOw+*@DdXF<_`I^)_R(-JvLehN)H98;yIdee}jv!qC( zH3~Cr=4el^l3dth*?c&i8x39#?9s`t!nRo(nW$=3tJ#hfVMIJCSDUD|w9wm8IEZIA zs*XQ2W?yJ1n_&F-8&spQv9U67sHo1*>~Z>OcPuTahX_{vIGc=)`}|JX^{;#Xad8u* zX9XW}y)%HieH^fB?4qEZ1^VpL4!lT@>QN9rlo_jjy~0DX`J?vT@F%;AOo@oCB0Z)| zc2MWa5hkynp}MAHY6!RFk(#J_y05eqBv|0#E;z>BT-$2r&dFW>nDSsEZ~5n!ht-tc zlmGdT@#B9ENbt^UzY6Qrj!o*;9XnATI=3pe+FXYYeqn$ zE;2n`J9t<719`n+_z41LHVz-G|Ji;!V+ZWMx!osemOeFPso^L}N?3P%0A z$!j0+|9C~k=^EvuQv%8dV8u>2gSyxQqPAT7>Kbe7i+a!6#(=J$&Gk&!{-gU_%qg6E ztcFO#Gmu#h1!r!dS+#`0w{P8ObC$p`&MVd@6w^Km;VVL9#$fow6GZR2{_Pxb9C!tq zm2;a;X9no7O!vco5r-#HDM;T9E2rVvmvIO>7Q$dy`1`_pgaSp_~Ev+DshQ7uS1dR?!oY`qpSD=csf1OQm_|74mRd0v zt;0GEs%|y(nyymokbCt~p=&Nb`D`LtRpHFAX0MnN)S82=vRWtvnfpaaN%X}sVaGt( zK33*jn#_xavB^`Z|J&f)1-R^U7_?%tdfypcR|1m@RvPTXJo)fps-BUt-~#<_GKkyCn)n z^>j>rPgE6{_-|MX5n~g>mY>%_Xde-C8#{7Wmi?4d&G!Owz?1eq31SYnY-pp`F{~eof75+O`2W_ak4Gw#yGIk zOBm&W@tasUKqc_qBG6y%v|}(H`@^Ku2IvtU{e+eFQMt4Nd|@%1{!p9O4cRn@IlQN9 z?s^RP8SlzZWKh(Vzs>wZaS=eo$7+fuu}+5C;Kw z{f7an8UQlJ#xFP>B-dBeff{Cc+Arl%faM`utB-_21}m63=>@7FV64&Oi9OX*ug&@PcAvEImZ?l6GXyk$$J&Q{)dgEr*kx>A43u6J^BibxnizNHJ? z&RWX|txP>p+4qA8^Sq4U4*l(~f;!OX7OZ==ORf0dxO{rewh=2|)cM>qTzF0Oqw zk#ShPtu_j2Q7{hazu~zRl}m9|~KFl0o522OXc7*c88&yZo=`w%a#q z>VB+OU-@47Wr+#^EK`p3tHW{qb4rhTinlp2?PW5U^3;G~D)4=c#P(P_3j#Xe5Xk#jS?b7Y3=siPEi2|FI5h=->q35yLeK`d*&Xr%t^M z3QQ}h1bJ+5wqoxOW(+C9V{EGnt3}W%0}*!TC%xw-ckbKgf{Z84R;Q<^02BsP?!kFm zsw<2oVgU?cw^Vitn;#t4lzG$!JbSUEW70<9jLNMoln*}ZzcDzZnjXd#9w50GJLv{q zjcZ;POOqMVZ)uLba0WDZt@kAy!4m3$7 z=Dr{)ufYET5=B8CLHscJ7_s|T#Zi@2Mmi^euUXcCgF|&7J(?&C=mmrG*ql9-vg_Pn z6eLQwjEC5pOBm#>P*aPLVOs(66CU(T{_cQb)4*4F!#sD=(%Pw(|6{yaqH&EjIv)Oq z4j<6&7rDp@V?%Q3IQ4$d;+&iiiVEwgSC+8pYO@YQ`=urv{^a%^<+b6-S<`ej23_}& z@o7)zUJ4-wEY~dG_6R-DZ2M&9bm0drZDTv}be-SW4Uju9t^|`7#Tx{&Mm)bI^(Vcn zYIm>fvAM^ulb#J(zhOfI1x)?~vn@0L5!rE!2noyFY}7Yrr02)83Yb{x+-J`8@sagr zKA`sc@o=XBU(``hg{9m$&GRyDw1qn=PID0d4m02HQCAg|UR)-5agK^_p;)g)u{s^LB?m8R$XvkV5z=6ClehJbHmjeIzsJpOyq=!kz6u+!W)8<@e5X|_3%YJ@X6C>4*ygC~ zJ>#CPiK}*Da%~AyyK@j!iL{ckLSh~ukWq9;d<+x{Bt`eE9d{Na{O207@3uUfbGDga zx=Vqq1fZ692%0C+4|+)>(6oF^|Jzd>ib^kj2=p>PJz2|BLQp@Z^h+Y=tHE;A~G>7&iWr1H4q|#qkWh3 zGAl);&Vef{mzTZ%<8glNFiP!Sa!jQ@6{8#uq;t&YVw1Z00JTR6cS6A^Jy*Y*6 zxhY-^{PXhcB-kl#@Gl3B9XsZ~-t0BVi-2W#CJh*T0-7EpyL@T* zo^ez&cL?4h3op+d@2B){Gg+Qk%=8u2$tAl&!-ie@ExO^{J7X5mH#b%Ndh_N@8TH@Z zJSCjxFZMJt8%&0<3kie4kn<09`X`lXX9kEZ91IMavxAQ14Z%@SJcuQ~Vs0>SWDP~D z%y5#2mQqrseC0E^Fxn|p2JtoUb)N>#@QSAi=5k+%->37&q5Jr?u;?u|;}!j(kI zJ=d#OHw0t{HTlx?AeSL*K3vuEHXI+tg+}Dx08VRpxQd!h55~+uaiO@8kWd_U_25ps z_B8wUJA9v6i9!^Iu4f!+b+LIqM&M8L|6giNjCJ}4-CjfHE>=U`ej@uw3Is& z+EAk6?<}J*$QQyZWQFj=kVRn2$-JtX!AS+JIake1Woo2@q?-Oqwi_M?(O~9zTWDzC zc_5O4mobgoPfP1A>uCL@)Qnz9?nkrCM!qv#rbUlG0aO0QC3EvK3Na=A6VF6U6b8nP z8K6#RW2EmdPO<^@9kqQMa=0LV(tWUtf~Ky0T~g8vw+D5lX20c)kej=HeAP9G+v11{ z;C{!B9m8fBBg_;ai(sJuT^7G66%0va$d(ueDl;>nSQz!y5N9+9Vs`*yd56SKMg1Xl z`lwT^;@s6G#3%j&xnn}_jQ&h(Ab0M>RWSxd2j4TZD~k#wE02cYafz`Ha6tBD9l?SK z46|0-kk-ky&uP*`XNP!4pj>WJko&;KFp4Q27lfujtLY4YOQBckudlfo&9@9NBi#D_ zHs)#Yg$o}rSUrU}sA1Elw;}qh5rs%+02Fs^{kvli`m4unO~o40;;EwR1N#A|KAZfJ zH;e;TN3pk)%8xds{2kWpo~3$T$nNUbe>?qbGh8ih!V1I72S^2vKQNSLa`n)KIhVSg^*-|#I3hrd+_1(xR~Qx}Gxj}roBuN3Ewc2Fqd!{Q zow4@FT*VUv$zs`5^1Aqj$jyfQgiVxs_H_EO-*!o|65SF24x=m^!uUB~S)4ZOZuAs-E?B@Aajssw>9KLV6blOlF{H zFYXMCGON}0-`0E3}ZhM8g|YFT|b zDlzOO;y@|m1PNcXNO5$EG*61rl%8|~E(jkO8dKFT9$V9=)V6c4yEm!nIV@rgT&>l6 zkJObQNL_~v8Nw5o_g?^zooY*E8l5em=nS7leS3!I}SpWF;`YJCn_TW+E( z`1{hC2jRr4Z@7G)2rG2>c@aogkPD>K!sFncprGegh~Im^O_i$z)1h*bo{!PoL-D}@ zq0CgBkebyfcwS4&^Ly+#`4sfKOW`|U0cKCra^((}pBJST#I3T6_gOvW&S9Ul`O;~_ z^{frZXPmYTa;v&ji7^oqa`}uwDPOCpf|=Q_FX%hWSx1DOGmeHb93_{`HNw@d{vu+<)5T4JL;PZ3qipRrd6#RT($Y9F<%|$ ztj>cJa=_W!0%m~y%?eQN3&Ye(2w}BWSy|x{ zw?jrR;LGkZbQSwh&+JBKW?KcmEEg_K!TSLF;&bl9dN9@xH8P&J18MzzRfJq>Lyny! z=1>qg8#Zi^5jv~)pUDdB{lMT!ra4w@QOZxn4W6vu<}yO_IABk@hYvz1U--uH?VZj{ zax)&AhvYOO|v+(52L@Su@GLq`kJQ_YfL&6KdFoG7`>gd}oFr zg_9>z)bY#nE5ZAcKm_nq2Z~jk&X&d?D1e0NBZC3_o?FZ>iM`Xtjo0)tA)J@6-LP93 zOEHbQye`{T%4G%)XUunkr|vP$XD8f_C?H|+q>AAk?0`-V^x15(jX(2AQ@^O@mW*vk zK&~qXi5X@OjYgA)?=udbCs!Z|EQ5$e9D(>vgH>KJhbfLV;=zRn2gTh>9%Z|$XNHJ( zCeQ$7Sc8b^32G|Q5XtQ!Kbrttv`{zvg<$I3ysknrsR(3ffn>elL&cKNvx+sige{I7 zDP|$1OvEDSF|z@p4fNkk)dG^W4~4)oi|T^1rFt^-AYWBI#4w)zR&+V^^#WS%)lS|T zK4vHfY$r{q2-=10!PdiQP}-qeAY2 z++0=Oh}@ezyZ!d&(%*_Q1eD}PYU&^Oz=~;(j9dz|K?O56B#z#P8+&fD7sV=2T`L+< zAQ6wNqu0M1ivbq0{62~FJW7IDjTF>~di^FsY)k^1kA2Se6UY&RR#6|H>#apZ<4wt# zW+G(=8ZdK>y?2RAwO03hm=!pmfY`VKIm!?%Cc+x5gc1m7TIj+~yc@2=9fgcTj9Fx8 zKQIKFy(R1x7~L`8e{vfW?VpS}iqZw*Gjvcg!i|qowcx<3wlH=CZI{e@3jHIPiwrHp zM9*-k2Ls8)$jSj;^~?jkzen4R6|VO~*l3UN+p;h9%O4cU-nS_`lh zrOhbR7IXsrTQ_V>&Dtr%XBH)yh`IY?^i34O)B^?pu>2+V#1dTuvGe~zS=@-$Vgyqi z`qLGdGPUAijt72JDDaz+2*8ijDi*lp5tzc$!I50n9<9|)GM^~_1APU*^J!L zv|lMw8T1$s^Zgi$+Cb`Mi)>1IkynZSje~8t`z-Si;y;bWXYEC0RNDH*x#%%~CYM0_ z>t(M3TBc}T5s(dyuB{l&-j6u^6`w8fTvXqYPNsQLSh$PD5b$LV&ZKEcD5(ame4~cl?LE)_bE8-@8(!=ll5AuLK5Y~9+ne_@-b?{KG$2sFrU%h?S zU&6`ydL=Q8KS3x6cadH{hKFRC5waIlnaS0u>WMRWd;&s|=*f+Gf%6gAG*xiAe&d`r zP^2{V6AvjQdxEr8_kz^M5DI^5Aj2MulwU0qJ zXF0Q#TuTD!^CZ%gj#viHh~$Iz_3wPvD%a9G;My7>HL*8D2D+FuUO52IK!##Eo;xP0+S1KVmE6eSG`1QEC!(1Wwp!y0xv4A zbe^B^Gf~Z()}V9K*@4sO;IM$-#j?Jhe8W`7x(L8pu2S5am)7)s(c8Bd(z@w{r9FFo zFyAv{NS`iU#yCvR*>bzQ?=iJXYc9(at}1wt^(qHg?vz{^IH4{QtVx=0Fl3o1l!Q;k zUO0H&t|;3XIZ0B@4DGyP{PSu@N-~Jfv}w}>F9ECwum6-@ z#s$>u6;D`kVDYe{w7DliMcc~l%fIvdEjQJV;~rpzhoKp3%Q)VPE+t=jp$JS+_VQ|r z8y43hY`+1{SuyXESLZcO6Q@sBi^Anx^i5P8=MtyZFbB1_>@c@@7FktO(TZatlmL?F zkCW7&hg_>kyZ=DPBh;K#y@*8j_?v^H`hTR3jz}FG6S@l!3^elNi+H(UXt{>9Z%4V+ z3$emuDuaeyFmwz>i ziD)!A!L*rzraXOR%tE~%j{I9m?dP8@D7shga%t!FRTfdJmojuEmsMlADCAz;R+UH51;_@-~59AmIqbqm9J+ObN#wPLBopK{lT1E z3^zpr=0LM&m{SH0dK13{ zs*%}I>iAjplmJPmuCWeRv1SykhW0fSR&p^Pz>J?*Irlg>H;lRUt^)Y!nsIqme*`Hb zC{t5K#lA)E?xiEA9P33rc-HU@>m=?HF0K{=J zJV1@!85*B%%vkSGX(R~C^zJ7eG2#}bP^(t0h(?Yse_C$Qr4`Tw3fJ4Dz)L{l;ND%K zShyqp7P@_bMx<)s8`6tyD*Wlnn5wu%JtxutzE{-qG-y)QAn#{^;SUrF^rYWE#5WVh z2|B`=vlP#*Kq8U@*Fwg4#@+W0ED%C&@iLroX~_i9uhIw%#iNj-UTS7_Ph|z@LsLOe z0e(Kf3f!V0qau73%?wDR&&! zf*J0u2M^jb)SzaOA@Done=%~`S~Pl0n;U;dLF zCBTW;2y#x!qoz1|4>bQ3S^PGxD{Cp5X?CAbd%*dx?sjtei~1ncXbVL%!dKz11ZNj> zLh95b{3JEvNkb>lAZd7IiMY=O(T0@IG!Ol`gn^AZI&d>ptu&id^?$%1x>8EEBRg^C zi+xt=j06_xpl$ODwPHLF6K={osn=x9C@{yuK+US0NprLIEN#N_}^xRbnVn)jA^Vy3cjj_NV7_cR;1J)`pvN3S5$mWWc$%1EM`|4 zuBm!@nwmhwWt5*ysHzYf+U$L}MRQBikzS(?Vn_b->w7NA+yI3D7H)k(9ba=mq|~J% zNm#T=^Ui;9&3;@%+>aMLh07-dsMUVf;Xe4j2<@Q9Ufa!_+~m!_{7l!bhL5Dw)VE%v6WB%VIsQ*_f=YRXrMINb5p)2I;bl+}V`8l@X@dxeN zwY&B3;grG{C=v`Q3a*df!_(Sp#C(!e-)YkZmDaPqe0Tab@@>5bFSkW+n;kM?b)?zu z0E?qrE>BJLH=J+sVAZ8T`eCcm!xK~V!X_kipP%;7q*vm|jPUW+_3G6b$~5z*$bUdk;&^^wjQ-Tz9}U8h4t&q+>8w4R(t$!ysD=#cyhqHfr78+ zjr((pH234RI=6<9QqH4@4wsuvL$83Y{myTh+j@x#Uu$zPn+Sihz~D4e&M zob(7z=I>75Y*&BJTX)EY9@3#C=2JOGSq%(i%a$#mjZfBPn1b02vp}cT7y*97!r8MA z>r|J|bSz8M%8K3)iZbpDe0Csltld!Slka0b<&0P}ED86)W+Og0t7><0z0NqqWGGdQ zPsxd}MFnT}>!hIbF{1DYi&r1?`+HeIg~=Uk%{-1AQPb6%HtlfY@?IadupKq6Xcg5y ze0R5_LV{k33a4p#(=I+CL7x&zNu_n(B#Z8o`g$&I<4_cM)`-C=n*WDp*`RCb02#Nj zrv-9hT4i7FR(nV`iwwiP;_$VwgcLi^EgoEA*b64+*~V9Um%Db?btv@rDu21Bubw*q z*ahnKd)&RkTTh|4jSz<@CW$~6*JYMag+{%*k_~RGW4~!r@tyW-j8MZ8QiUS4An{g_ z;VRHOX>dzH?klH{w_6|M9>$Q^s-oXVP9I3)JFSyr`9WW&@Aio&M&-V>ZoGH<@Y;p& zx&(s46V!8Om}uG6>?Sb__Z1Wr)Gg_&i$K)_hf@9d{EP3>7RI-#aZl1nnFEN3PgAuK z|9{D@dU**zo#10breai@lar%KtjrF8$)USCXVRnQf#e)rcM(dZhc3)D%O-w%!U;>` z>|lZit6UfUy09>CP3)J6HD7aUOs<^3o^s*lk2`1yO|uTUy91>4oG@WRAX>{Ro~IFW zxcQZ|xUx&KkW_LXd0_6AuTxc2B2%x-4w|(-+O%&>xw{TXsQb{{1BBUa$H4INWZz)( z6OOY<;{AvFjqUQ{S6P7@X7~CjbJwD}j?Mz8=f(`I0m3zz6<@uXhnq%=(xgNDm;38= z$=3(MTsF;{cMOf~g6AI_%iUZPvO?L8nvMFI%+>k)yk2B<^h%8GBd;9CSjF9@`<59t zn&n5l=Pr>OhX*K+`}EXyddRY9RZ2_K=_4zQLMq{+)ipHqNU7;sxtk9amvF86x(G0K z?s%OCYKLkk6(skX$*ZSp?{=Jr1tOi+N$QUUyu034YQK2GJ>=XYe6 zx6tBlf!eRzeFqPoMNgIm!5?In-Fi9(UES};y?-Zc6OqSiq)_PY6&*XH*=Y!Y#hODW5X zf#` zGFEB%=;T)P!AP&VlaKX@ic!xasY;9hMN2CyXLDMxn?89p$3Y*7^=_s<_-M=6ZVVUI*2PUgw1<8X$-*FT^(4?R& z+5Q_=m;qGE3A|Z8W>miKrf>j3lWH)E3v|2ZU>W4IPoBJUqFb9&oN5!tny;UMpr@d_ zG~nZyMa<*>_Npd$XjT*dx6JL`D*x=t?_YiH&-D^V0?pi-3|6W317f`gtrS)9WPSvt zxz|ulZn5)G4HPOPN3K{u;aGXp&(`0jv?^Nd`20ataJL&O6IQOZ2ueTCF2Dc6+N3nn zI$ccz4(cO`tr>C#q>#=R0)v8bH)Sm5@T}Umw)DyaTu8bTm zbEd|7ztq?a?GW|ljrZU!-!@L5t>cqQcr}V~SQk9!(pUg4wGWrP`pgfY4ld^8%miOg zBf#%)aVhd%6Z^QS*3*-0k6~YP9EyY6ftNGNbNa5zriW%eMEfF6=aZ>g&ph&#|K=k_ zSfq+f)esn77TYT4`dmPZ6xtTs3amK7KxWHvOH3g4UwyXToBb5PW+@EtUY$a>ethK# z@a{nL^Ht@k(`Ek+e}8Qp{A?PHUh?wHNhe>ClQAQ!T&F&p%6Dxe`EL z>HE>w5Ewg@;|<8(rdhU02fgq0Tl`uX0TW3siE=U%V#k`rOnHbdM2)>dOAc^SXk~}t(h_J^g!$?VBZo%jibV(-~Rv`Q-F*|KllgPa)eejZo0|+Yyt=06eO?_dAE^&d;SLMz#xPc!UtPN; z>u`Iuk6yjY^8R*c#`0+0H z4=j^3e8U)Rx7o=99O&+(%xvF2oV3vKw!>wORo4`tixh$>;>OUBdmt z%L)&CRGRxnsbLShN0yb^UQT!#@(rjHk zc=TxD(@MMp+hD!yLTFhFpyg#N*81FbxOy|&Lh}_LC=twHfYfHtPQ$Wm5JPec*IjzZ zezD(5^}QS!X%$nZwiXr^Nmp3Bw(f5C?%f;F$y+ODWBh?-KysN(cCEc~_UzgA9EZWG zZP|&onP=(vT~uG)Y|zP0JDE_L1G%cD+LoR1plqXkrSkNOtl@b%qQelsJ8wzsKLMs|w}H(QV#h!{tt zEG~W~$qBI2!s z#9W!#kkuiVN@pI(xLu=h->28WA34EU9fht$dMQXq{x-I~G6(G-LgesH>Uv$iuigpw zY*&-7kh4LQu1f99S*FXGt7;<~ZAkR0V$P_FWBH7ZqqdkJUD$ACe(-F^FL_?2rKKrs zzzYO@X8axXrccixLxlp=<=J1rM|1_hNVAD1vrGCLo~`o&TQXf9=~PvCcK5Dbj}dIy zQDec!>??N@y z2ZMBax(%z#cmq^kK`{O+$(;|9k`AkO)V3Kf76S+_2jk7)Rd&6)GMhGQL(77{YdmLu ze`#2>7#WEKXonD+F0^F69D%_!m?W+{neTCRW%Ev_tfs^crmOQ^`H{3^U#bS*MXjY8x9gSa902e>FEd*}NG!!rw^#lrmN$b5qh)Pe zQc_bZQuO<3Cyb|9GH?3T$Hvgz11VE^x!qtKiZjX{Vo?hd^04f0?UgpKwX)+bbp6q3 zD5jv329F>KC3j07bf9eDTX&|VjO+}je> z6Kyw5yDa^-<~vDPki-`-CXHC}cr0&p7L}mEx{MK z1*`RxCvU(Aqorr3X7KW^^Cj|8BuBNFi+0x+ep-l8Nm7I+Dmhm+hUd|vb6@8MchKIy zaGG*{r+lK`b-#}_LFCh6I6E+{cz%+K{dhDf2lxT`ozR4ZFfMZ4_U?BZ?LSslKA~GP zYcp|IhE3d~GOp82N=nhqUqjY%1(v1$o6qMw>+4uKmqXz<`fi9(pu1mBlfSkG{-0GyX}&DyK0uO@NgyWc8}Y=XziYB+pA~Jx0eqrxMO8mh?V`b z4i9$qk83|kJ3BQswQrkHqt7Su{C)JPlggf+B?P-SZdJ15wdw|2%L^w$FUEas{VFfd zUrI)tds3)$TC{F`mW5NtjsjAc_v#V02C|@g%Mlhnw)M_&a9h9CLi;P{hXZf5D@oD! zZCSs#{O6pcH{Vnz6M0);l}w4Wf}i`MlFhiSd#Q=ZwccgdHny&9I-N^HPx>$WUd*DT zjr{af006l#RXTzS2FX?u1|`i@pbuJT=jLOhR%6k=%KxE+FaL{hi$(Fg+x7w>}RpX#{F=225v{AEWo&KZ4c)rL2Ai; zbdwA_*hd=d@^{yaGiotmq*ub2MT<_V*uVZQ{leUt3=lr%DafV0hyqd4=??p}Yuldt zNl8!G%-su5$K}%rw_~zLLVV2UmwT$%u9{y)lp2~O$CGUf;2^8uSu2oVNtJtSlvAu+ zl_jM&L!C(x(~}Y}-nemN>|LuKKhY*dYZ+pbK#4R8M)hzjO_OsCR5p&TE#R7|Y<_I0 zN*J3xe*AV&=Go`|TI=8t6}I{-qZk#G3I!zWRGwYw)DJLx<=fr?o!jjyhEQ>+%rtLe zvOktOMJmM>zx}rM{;nFIxykTWDRj?`!>98nRnu=Hnsj3i%Q{V-tY&j-7L~6rD=MvirYF_eGB$q+bInrK=R`v~adb&Q-74T{ZfsCoK)Zo8UTyo5%^Z_y5Z?te_ZlyGh!pv#U4om_n>5%Vu*$i*O0t$5(f+*B>?0M^*d1 ze*M}c+cqhzpUv{lUgMv=^BMM;p7CBpANmFY-yVdkH&| zx_9d~4k?#Sq46;rl{gsw*I93@hlV4ITB5ygahujpw=f*5oA3KcyZd6AqPO4d5=cR% z)x*uNW>b^gDaDn7H%^#Y8zLFJbjn?~d+!h6YpBnvHnAimPQ@QRQBsnjQz{leEqkJVq)|{jRG>w+t;m#-FF0 zP!V0Or92KsY<=J}pMhs|LYG~kNb|LS(-zO;*|kZri!l8~@DLleKw{Jrk3nw4kT;%Z zJ=7b9;Qo?Jf-)3dHUM(OfKT2m)-W7nH zQLN&+^jXG%G-#z|WvS4v5!_&(xoQZ>NMFJkCao^YZM)KhLZ=NcZq2Q0#XK!z5dQKT z^_s0yc8R96TuaMU~tPPuQaF z7O)vj1Jt^+@5JuK(tAfPE(M~aHn!5JoJc>EefaIVzOfG)&MR?GZy7?U#s zh}+?8S4?6x79@;B!?=sk2!^0#<)n`zHYy;q#|E13dmovvA_!Z_^|F095r!lF6z60I zD&IkyD&Lwod27D;7Po`W;~4iO0&ev&wM`ssnc}w#Y}JmlZpMitTL%Ji%7hyL*_s?@BSq1x-$tHcRXLg6!Q$40nPp*5n0IZ)}wScuu{Iq6W zQeyZmxA{I)^0j*c7gvg5PgbUncOuDf2v?a-gW5KPKQyiEuRIRzkOAqO8g`A|(+Lx9 zb(H<>oNxf9g1s6*_jzyi)s^u2N?B`g;pEtATEWTowYDK2zTtLl)GV5I+MbtBGHi#p zeg?%rI4W``_kwuksE>Er4_@+D!)L%4N-9B+u#!OJnr067N~)OB@^T|Oopls2K@bv2 zSxM3fVMY*0>>HNhmL|;+kU(lsXzY0yFAG$*n%SY?HiGE@7!cBg1$_XWf+669q{3pT zy>;6B{)z}LUD?}d)BNR0e_kufw~D3}CY!ve6jzc?5fR9%}iBVcf_`?0^bdpA|Uf$}3ft@oVB49XnX}e6ZV>8O@NK%kP%MP zX}*F1=w|Yb7*e=1XdObSKs&jmwaMloqoj0zhbjxcdm^lPF(_h_k?Ys4fOTMx1`v)4 z2n$(J>w*v_34O{*E+VDL+`cyFW0xZu=D+QR1o7_B zNH*}v3-^+e)*milM)lOwr=_87hI1arrbqa44iVm&X4U3>e?@6(tm7uH%iY~&k$5&* zxsHrc3w=fz^XBNeTeoQtYq`(`n?Fu~>AL^c`oVD`>@=a)$^n22smccp@6aNQQc|u9 zu41-0jSsP>aun4!+AFCCOMrG2@y0c@|x z<=Lm-4!1A$qmE;!WU%@C_3N+TIUFbm__38Cg0x#dUaK7ch}xp3Zb15ozS9s_38^id zz+vxYn@AT@ee7yAMsEs?pbLp#b`9stO8&BIoOFA7DOnYsD!xbGz^{o4MlBMp)e5Wqp}yJPn`bss3QEg4_eo$y=O zXhXA^`)f&6#cov^OxD>D2hGcuFB_q%;)lF_SFqK?mzkR*mo(DLVZU7O-@ z`!C96TD2+gB;;FiI#$DpzfLtJg2GthBL3{&R;v} zzWV3Y^oHaY&3N1G@M6>swM7A$W$@~!e`l(|USzM1o(hkZLPJsx%g}Pgb&*%l3>Rfz z4UOg2Z2OHqn}&AU^tQkB17IZ=V9$(yKU-IR`!rPE>6A}E!CizKcUdgx0f4DS+EDzw ztKNnshcCVRD}93+RSs-;EvmIwdh9}>GHiV?f&iJjb}ygjW3a$<#=xh2mr?sl`Gv?7 z;AQh;tpH@bKS^plmHYG26C?YK9BE1(mF+}D(qzO^oxTrikEC^fUWN?_F+0S2syvcu z*SI!q%Knj+N7zz9LkBSq9n0vi)!TluFT9#;idF1o4WIcrG6;p)1@>0AoZJ<(ciL8 zC%Mn?Jh}qJFOS4xfqr4^{*SN<1vI7(FL(9CqO`C1*GZim{e(Xf$_hg2;L14IwAY!O zJ?rjKr&~>pIlxf;Mrv)Yf;jjI3|DGO zt^@7U(eDizE#FhtqK-nXC9N7XETW&o zM4Zg!5%V~kp`=+2G|d?+U$NIK2*mpCh>y-tGPSIVXd{Pl%q`Ot)=UsLk;|YuRDUF- zmAmS~g}EcHe*N-A*PX{V|KvxTqWZ`C;(`J<4(e&Q^-dkKp;wRL(UO z?eW{ZtP>_M?~pNza3nGKd6UKr<#~gY-wz1X4REWkhLq58FidhAkD&zl&d-pbiq4rIr8@RR;d;OsnNF zd57mCb=h5he;`4m^!;w17rrEX=*}VvONp>zg3#-+O@D2FbNbt((1?{(Xzx?DDJeXT zKrTr_#3Fita(rIIjn!9sksgRRyZ7x&@|>J|*=KS|etMbjKnBOuaSoW zz!%5Lz({5Dhy>3YvVNwQf zZJ=T>XU>s?!3hL}U|=AjO(nz<{c8~bF4P1*Sh zg;(o=Md=7-Q)S(;mkedVxrmZMNIGh_atrjj(c#M&XH}7!U3QMu0mUG81 zHzGetBL)d7Y7{{Xuri)AqGzq@Sn;5v9$4$%_|VC~dv2T)5EN|Lob;8b1N6i7%`v`-5-vhaF6akFOXouuq>p84l`SF3sr;!?4aG`tPMqIygPp6Mw?ZmMv35 zjBT8IeamWA!E4)(p2lQ6IH>eQC6zS84^Wo)TLIiEO^ezf*B#$+TWgVxS`^3Q8PD6$ zVFdEM(cz+vf;rLCnd1*-W0%`S0(ng~9UW0l>{R5#kG)@sK0$Q8)P^Ktwp2bCWD7 zOb6?KXHsog+>#Z@ZMWX#Io@*n=fA4SY z+ZPX*xFGB~S$B86Mzy`4+DSJz<;-`LChrh=*WzD8yn}D#%^^bBB#!4^%==|?zGwqY zhotnO2vbt&M)_o{jNG@kkupJ+Q)FkMKV?eq4{i8|$R%nIzo$gLuKKbR2buhKf68L3 zHR^qA-knP;G@LK*jtbBB$cqZMKQ+6LqQzb+D73-L!ZOhk4`!0dE2E`g&Q1%P98$#{ zk2cXu-#h-rJ=-CUTPTBVV`c}iF^VEe4VU7P;m(H8pU)uXsRg}~onnS4pD_r{pc#o= z5XuAh^z;C)CxmL-kIM8W8PlfYldwY8+VR!G`X#jhf z`9gk*k7FS_;m6m>Hb18%_|O0S&j#`Gp#Qgzl~-73v3|~vA2h37)>!KI2;4cTq#=f= zCHDP%xTAFhq_A#V_Ths81(XppS{VA+(jV=<$BYKPw`oW_#Fv}n88(itcXZqFY%S-6 zBRs^_F%`2>kDdqOT<%(r$a{OqL7)q1LL%lR{@P%|UweT@3{rjqAt4Q)?j)^rPfUFK z^JDQuI{eoUW?)#KUp4)%b$ZAMc?Jgdq?JTKWcxp~%F3%DQl+YctPM z5VY&J$XUgqpo@FS&!2K05M=cGd)^M*!d2md)O2)mj+%U8{h8UC#Snn>1q1@lt#Hlk z2h8g1m;#e(N~*VXblGzM55=nH-ivjf@6Ja)OKK=W;4czzK$*-gKlY8s-0r^CT%#Vc zVB-rVB*6*&lm}a78Hsonh|Ul4VnILqKU%i*|5>7@vIL=27m^)BcwLSh^c;jDb_c%6m(1WY}~_j%}3YquM?+>K1wqHSA?+T~I^Ge!3rb8PdO z$LaU#5g}&=Ybx`=KaQPuN1%>?pL=xm@($%R6Eh(|VQUxNc~SdFjZc=X8INAB_9%L_ z<*f+!BJ3vMi4j22^6meu#OO-{{QVxn68IZ0u%93Vu-wMOQ4M5W=n7+>zZ?gJHB)_J zEmohu%*@P!El6)Q8}u2a@0_7viHb;;tK?~9Si+=FIZic>G1P6XpWa<^Nv&6}Air_- z6wfcaA3hww##xA7`#@Y=9EwBJqV&3>e}kGAQ4Au(%h~TNaXu#pWiIApoOJ8o{e`Bk z%VLiePDJtd6=g^+&=nfbo7XP%tirihJFU}o6r*(qc7NfEH1dx83{kq@k0`tR0h#gV zm&xiWz5W+nX98Dq-oF21n;FY6h%nX^m4xgejlFEGvXpG0Y>BKzV=P0~Q%OjnBvC`6 zL>VU1!YFC%LXs3pl2rfqeNJZPd4A9T`+7a|%y2rL^ZkB4_vgN^`?{_h7jE4O52rM^Ew-c2r;6Wz1zf@7 zjCc;rhV}ANfNP&Ntwe6^8yM(t#j=*dwS4Kym3EZf8X6i>#=NgzPob-YHbN8wlKCxt zwvmJ=UGQ#&({8=`EBUp^&-8ouwq^%uzLBA`&~V`}Mf)CMH5KDF6)FsWsu7XDa2T4# z<-UA*jI@^U1&9=V^{1xjjODoJOJz@AdrF8b62XX87iT=7oq+a9bOE{}mDcj#t$NWky-Vx92fci18~<5r%`v*@ zx64nb38@hN#@*Swh8zR*=tk*Fw9JAUyJ*yL`E_z>{Hmwp1m`;j`^i#9A3}1>8o zpj3bH<$ljbqGXirA^X4np4tkF1iqM{0n#EsfWpb4)1-;@^r~9H_R$XtIe?h0pdw_F zZk=}3=U2G(D%=*257&Lid3iLnHbvEeHx{3sTpW0BT>_6Wf6wUlx1sBs>-SNZnvh|F@rAMAb-H%NdikFXy35Ke zb{Fx3$dVYcY-<^-MH=_O0t*r<@F}_hArZbqXj3gk$0h=SZQl;AE4!86q6D-V4dA}; z5XM0S5dkYThbj#SZlv$GaV>POvCqChq_-kP;wfIr7%-TSY9OaJ5rMjTFNkz!b&}`h zCQ31|-xL3NdYPXJNm~!0av3%s*#8J@hvN&PF73@QWwHGgpmgou=;&qdV^u3Vs9wWAQuNu|n;@rj z>VMv?sjPR%5a?uROrnzOM8rtpeOjNZe6wzYb8;YK<6}$r(E{tp6FRJL(^ef;G0lhG`rPiVddge zOl?1nHgu01Vsh-40c{&2-n`)wLq)!;b-+J|RyV(m72sPDbi{6ib_cIS!h3mSS>Hy( zTY8TKZ0a8VPW2`26zbz_l79U%q;YUxvdb$$VkOog{mF~pMS?;7xh?wg7?=_O#g=dF+_t^KVgwMS$a=u4{Cs`2SsAK?fnTf}J#I=L zp$-OUK#4$hmuQ|$I7Q2~1(@qU~Et>ty`R&Di%iIq$V=^8b( zNR&A~s;QIam|yWE+f#Qg@PoK-~*R3P9K6@*7u0@WW!t z0FsUtO`9HOP5kFc2Zcj^ZI(-Si|RLT7EUqP+ia&ShghSLo}a^&E8kAaD0+0J-9W9@ zn=e$B?0V?w(O~eXFE7*Yuedd&eDSR%gDYMecKwSn zwwQE{$*c2XOHJ@sx;BAF!!ZP(<;Ai+{oj*UA}F7Pz|UOIm|yuc#qC{^rjD6O%!1Ub zPaLP1nT>4U?f9Z;wl3L*cJWD_%R;XQ1P+Svd}`YI-n|AzcgNqI8soe4<#uJ+o7(|} z56lEz3w}rv_!u`3=`(RYTH`y>L;u1b<0|E1g(f)s4-j=E2Z zLLuo$deC{-;3L%NW*L+TEhxC8jeykGg5bS4v2w*Dx76-)+1#dad!Z#ZG5KVwhJu8f z$r+y3xZ!0dgVN-tBhv$pzDXNbem5daACASYZ7r6SR;)~FuzJ#o5c9(Odil48`u*nQ zJ?FIU(+dSQ;ftm<*GlZUkZED!0?u8et*Nwbb zc4%sP5LAmZ(YGq!y1sfH7Bne+P|i9_*J2xV0C$L?v$L_W7;`@ zrOB$GwH^3m?WfNv@l*STR#z-J)bPa1vja0S%)5``vjq>>ccE=O;#r7ScStp;UQfV) zJApD15C>hy0=SC8BA843WZyTN(?v(GufBfvxKfsznR4NlCVBU}hV8gl$lvNN88>5MUGds?UBf1J1ZS7%`Foaa$BQs!=J{?N z4aS-Dbj@K456fA{Z1hv~0;bA0>8y0SV9 zIGaCzE^sUPV6Y>T2kIYy?Va$Wr2U2aMW?Gh&$1qeof`WQM(Jx{`LvZ4<;Z(uf zD$D2TEW97Gz4yEqFZ1;hzxm!~N%9X_YoFGNTfH{#e%Os}QODUg#;8DztCBR`Ntu`YoBR>b67C{T%%Vg^?wp#nuIjT7ERNfnjvk$Wutm`V zhCkAzOGm@R`p1rKZQ@tei~Pb>r4n$1BrjoSrO=Bn)u`Sd_|w z!an&rO+sNHLERG0KOMG}d#;N#dw1=NcJ=vxC>K$rHr?oUIY}6J^kOYxUf^X8a7xi?3JuD< za-Y1C$7x;}tGkv=2Tfy)MTZDX1Fb{7JvIW+ym(nu6p0307a}X9#GZrNDiq_Az+Fve zN`RpgItlbz)Q4o_H)tQd`ORW8DL0Pa5yD-SP0H~=;Y@c`WcNay&>N`&P-5Fkp=~jl z$}NkWH1G*B)}LrOS)yg%YI)dTS?}k;bl5I(MD@YH5VCRJgC5+FCPO~#7G>tlD;uo@ zTa)XuAC9mVD!OI*fiPO-^Dz)#gK$-}&Sl7NVG7;i#bZ@=!- zOnMEIg-*-pPjcMuW*NfChb-&~8j_Nhc87KG0v2M%VcE&1hhVUfx}by<2_RXN9Li6P zS^`A^&*hatClg&JsiMSSSTAQ(=;d=(gm3(v2uG~8uyj}nO#`u}7oLqQJ zk=jIY4n1X-i~HVXopT!Rd$sjI`cB12zx>@F-}(%T>^rELeQhZdwlNwmk=9sR<2p_@ zd`!qhV?V2Jti6Vut^WEvISvz$!2LD9Wv!%ZH7}A3T^vGo?%Ye(Iks6P1UI3(+kZwU zF(2wfN=ga_1*@F5Y;!yENAn^O18Qs`t+5Req0L;Xf_qH^&eKE?eTMR2^+Zkv&QfKF zFbgP6l_CFXo715$YWp150SoX;K#vd(KV6C;B-D4 zcWwN4RNu5o+(){$fQ=e=pEiC3J@@Iw17eF~1HRTq1n)Xu)o=g>zjQOqN#L9lS(va} zgzxX2Wc6aC0xOdoSGf2C-3q>K5>JqcRT}!za;FimibRIs4Sr8@!XHjgBQnz5u zfB5iWY@1~m+9fq-#pzt8u*VaU@Y{QkhC=uJ#fJuM?w&k0Yw@Mg2q$S#^%+D>lv4LV z`lP1`0CaE}-JDUM+t9uI`8xBG(rr&^_3ayTIVf5Bp7a{VPD4|6xcC=faB{en&iIP^ zT+Pw_vx>D5*CiUNE!CgqfN$uSuU-{OI-a&qd&Q12Xl-YI#S<64xm)-K;ispb z5__kA@B}Zn)XYcYJ=@sWjyS&26o%~E)qagNgPQgKQ*dfQ+fl7~3l?=tbwNmN%Y=2ugOsT5R; zXfI1x!XHU=(tyhT=jfq3vkL0ne{$>AM0#Qd@7`OuIvM4(M|KJ@B-?;@^g%YF209fM zHzjaxvqO$@cy8?VYuEHK6bS9!rOPpk*-X*Shr?jFNZqA|$48eMgD#s!$YA`jco;j4 zoldXZ+&rblRWPvzEf_?-JhB&^2#2S=)C4_g8iZSg*6yIGIgc`v)%p49FHNL`9s(D| z@PSA%VP)t8cVxuHcNZ5MN%4CR>%N@=M^M{1XWZtRGrM@^1|q&azIk5uTk=Q|t2fB(5+?W$ETrhcn~f`{a^UVCY(+kIbl z_UzeVS*}BePAPI7(7<-|jVopF56B*T6RG;OaKF!BB`MwOW!N4m0;K0o7t|j8AwrR_juVf;z z*b(Fwt=FC3W?q#>P$`C1rn7Uu?7pQ548WO1Q{%#b;oo}IKK-B&7VTz0MBtJ+t)LTJ zoN02kr!Qv`frKVHD{~6F>odpaSaxT9GhmCwYkj&f9 zu4UQ_Ti6?Z7ZdiCMCo;JIri{1lo~g)CjFI^R&pWo(w!2Su;r)dk=jd5f$8>f>g^m? zjn{d$t&UwBzJ2S~JxQ}HIbiJXW5sOXs5e6%AOIAbxi?6@7yVct*B)Ek z7LTCaOdxSwdV_W9=6Y(f+ql(RT6O3U#R>1<|2P3%*i2MbS!8PSDoFmj66t~H0)BA5 zEtXl5rwDw6;DOrSetFOrlZ4&9OD<7BCAHQ9u}`17%y7o+^A7-l%-=NbX}+11lifE5 z3NPJAsk2nclUKLw0H3*|y|m6a8EZ6OI;P(i&8V%p`#ANHM!oiMuxHX~l()j7u5X9# z)sY_-Bq=>QFn}IInHL64j>~mDA zP`P}+A^Rmjz$g}NP;?xuf z;L^5$0XQG|nQWaF@ZF&rj#Uce;GX~s+wW?Q1yZ5(?_$OXm7Wad2CzT*rV(kRt`T=o z$gvbw5=uV|Q=$IA#?7qf#tub8E4FfgDv;6EK3bg@NTMK_p%^h_^qk@HS)Geud+7P3O0BTK?1KP1|av3S@PsiQjYp27+&^G zGb8QACf-y@agIuo4onV0W!HlOTHM{v||O)W8Ry%$qZ3jFS6Su<*cu zrAHRW7xpvDU~hpRAGVMA3$wBl!s?@iLR#-b+?BnEd4oQIf&aS;Kt0Kz*Vk@Fib>`% z{mq*;iL5woMi9usDW>LT#A$mjWu$v1#fMu=^?9VyMMs`L1kEvKSTT?Kn6LtBR9fP} zBybdfu<6#w73%I9W%IO@<%<*?oqVIH$=5H^V0DK&uNhm7Pa7_AA@+qlqTZ*{)Brk( z&l|l=@)WN)qQD1KoqDu6(7;2$`LUr|m!NoYlq3#Vn zqQq-iNb2?h!_;e%9xo5yRab^kMkqxhM{Io(da5T4D!Pi>`t}35UkSp{ph*)us!&^G zO^f$|F_f>qP_V^h=C)R)d%l(~PnEJg%?IZ!ZnZhJI&ZzhUrRaOe~09D0$z0ur6)~_ z9P2&y5bl_M5e}|DD0SDGRTeQ9F6K-<{lfRtlHoVs?5a-KGjo(653rWiucd&ZFNZ(R zhQS?N4>g+@o6TJ_U8G(r?3j6r=c-B<`Gd>3UbV*xu?PNuXpoE-WIZM@kkc3qGEnHQ zTo}-}blT~O=N?^kg4PB5Y09Zd(X=)cy3Hb6jBPBp#c;L=xxpKVP*gc|T{}D;QA-gy zSzA*PylM`V6R7_f-B_Xs63?-g6S~ifon$cZi<*!S`JMW8L^^PK966<7gT4aaxW;g^#;G*wa}#^Ekt5k{?|UyF6R_ErKuV%;G~s%qz+qod~H@ zkc0-$2;-fywEpqw1R3jSV~Jm9Tk;QTNo5eS1Bw~%S&4P(6S3olxK5`;{85d z@35);(T_vQju!?06nB53;nWa7nAgb_Zwv09f1I-HiO<-*y6c{gawx%j@Iy+sO(rZ^ zxl-EGIqy#ZVjB{ndOd%q2tJXMQ~Wvc!1|H7FAD?bt&M3vMB5DS&EkiL+G~dW;?pI( zs7{mQp$2c1vvi#ylF-kI-`u2wOfX6jg8_Bt-!#C99t<>Ae;QK=nHIv)2p$pm__v>w zHaYtj6hB-y8n61C|KdDGL^+(|Y_chN)$GX}XW27R1fw7AW>Mcqe!C-;Q6Ad)4hep@#_<- z))n?$lu+0&=F6Mj_0G-Dd9(6esK0OEp2IkY1!x+u?^|qtQCp$WgtUZjBE|}&+7Q%y z^^R#jgc)q$oiyv(;ab6etol^0_iFQxcZ$(zG4-f$?Puplv2%B5>#fV|5@>&ROghGz zk3ntQ>Zw+e!hC}>ep^r7axG6E`4SaBAuRip^-J7BI0o<4`BvMk;?Np%{->X9vZsSb zaB>q9i%W-xXYFhKj=(f9VCTY9zizWjd!5B$nuP@Pvat9ILUN78usDJf-rTs%drj4k?a6Y)K7bnRPZCvB!$0;OR?oHdwrPpex8ll`(el#r z@REXvYdhV$JS=H$c9e$(zMoFt+oQ zC73Ws+6jDCu)ew?NKC1@bN=fnuG<(dKQ5eq=dO7fr2=0nb;cstIq02bKPGx^`U{{% z(3`zzVMi#P-s$_i9yj>3>2x~%{YYTZjpWg zr=Hx{L=rz8Gn#9qBP$a4cSunrCOz_hQz-Io&^H!YsVE7tUbF%I{MxAYozi}8+vTX_ z@fU~^17305rLa%U>Iy%mN5ZEgf`~F~DF6284t;4=`@c~Ea{gMGoeH0H-DJODg|@C1 zshWtWYaqZEIWtqn4psQBO=!RPbI7#WBfs|?4-Y5wg${W{w&ViG1}lM9vtdJ(@|8}| z4iq~~`0(0hP!Uw>E6O0zzbHch{HKePb7yW;N?%!-g%9lhO@*R5(Rfo38tvB&E!4d? zt`;Ly83t_d2ufmgFLKt;+0G;G{Z0VrRrn~0Svkxaj)EvPp7hIadC_<=YB3bOF!>x^ zgd>z*^C#_-o9mTYm%XFgp3uC8#)b5u)n`VEMrN}jcCH{()TydA9eZ2=MM?-LJjh_b zRWV5%6BDL%s;NR07Bh~mU&lS^*lNHoU?|N?U1|k? zXVtr1n@d%@l@kX?AS1$`UAc+=ojRuV%C!{NOJTv%Mw)=Dl~5ipx9G$Z6feJ;L=EQ& z5;fDNt(3E@dpmjW@_)40e`YJr8?BCUFzGv!^WpfK$gby4fT@HmSuu>ni}q{Rd|MDQ2kej!>s zjw;U#E7S{U;yz&Twtzp!%CKK&csS~-LAvj>auNV5p+93K%h!{pmDD>B^kdixYTr8% z^8lmVZ2b;aVb9L?Uw_^(Z$P8rAE{6$*c`1i^F5da@O;8_qbIS$ztcd%Gy zEm3?rO^Q{xjw~*9&FuQ9O*fZU3yjx%(Djxyfy>^DGP3v?K3F)Xjt?*z`D6dZD&HD> z-NzvNoMEY@==n1x>2Fc~B78#6O!NF7G&ALo*F8~1Q;LYmw={mGAQI)J_F6J$_ssGA z1KP4v)K@t~$^@j5^ENPzWrwhsmK&BY#?BcOPCygZFCoJZfA91L!cCRS%Nu&)bjlIo zerCkgjR#IxeDVD#_xy~J4i}bNLZ=Wu{HXcrmxIy@odtq7pR>H8VgdF{Lf=IaG=G$? z!mHt!^yTwEd>(FY^8O@Sv=6jSRac*%PTr#Dee832`Hh~9D7@idPPd{gT`so3td09| z%fed2!z!LJX50958xN0*@P@9|zDp^-H-4mx^&>|yLZlWOaf6zKX&cbrDB4B$CA1oO z*c1jxszTR&&hKhwg&Pdf@P$FI%n-BYy=j&PKP&+tL*OS4{i3VK_w>zfVDRGiuGuAv zhPCMB7V@7Z7@-=h>p1A3=*b!0B@!B04zeU=IpA+k>t~!$XSBr)ko$F;`qp_=EPHbr zsHVV8qMW1Ar6wn9$>squ8;xi@3kX{5q5z%(+BIvi6{usjxmiF7{~a#Il{DNlk2+Ozk zaDg~o>!qgLy!&zet=E4$>2e=cln=o&6+xi+ zxN#zCgTe5}mGIm~+1qrkb96&8)M(#661inJUESoGwwqlmWkZFG}V`HD{D>R&Mzuz89v^9y^j6*OAyGWv{h51pVl5XF_y03SVzxN zKamYG8`aDTr%|aGgx^W(*=6*1sA)PhSvllM7n`P?N9+Gb>B}c)#SR{3ET>QY(J~`t;!~i6i>At&3xr| z%2J^GvozXJ1ga^H(%*4nYjJl@;D$+7rCGQjW~wdzBOvm=!{!jEr?Yva;p63{5UYPU zA?o0;bY&)!OqXA*wl+q(j;)T$dn7oyW<5sOgdbrq3x-5zRP)V`bfo~JkHZ3~VZ8|m z2!8tSBqXr-J&B3e->g&9fLSSpVF-}8!JZ#(j~3nz4j>03 zZh--bnT`A*k44Ptg+{HuEP4_&M6w|Ceq3Z;0nBC~8xCb(VZA(683M2Q6~yA3RicXE zPv`sxbG!aZv;9EsqBn*9tw%qb<0}&=*#5Ikr2P~`yR%lnP8<0G{xE(VqT8hVOV)N+ zwg0E?9qv>8tId(h@thJe%tNJ8%55pz^z6oXZRzF{b24cy3R#hXH|9#Z(GI0$C&WRp;eWCkD5~U9Oby*f3rVbpzT#1YM;#&c z3X&n{uSi_D?60XZncp51O3MrP0?{}A4TPoCT|7jc1_;8Nn`G2d5j?}=;iEn=!|SwrTkVT=*o|kd>Ib4}@vMFs(Sc5Sa?kBrg+b9hz}9p< zix#(Zm=?Oh( zc@=l-o_hGk`VsoKtBw&lnSs^OB~Fbjy%1?^8PPIk(#1!BaaY~qSz<^WYaF|M{88q|7823HDSw!kKDfIC#drx zdsDcY$!3zWJ9ay`iHHXN2g*qfg|&>XvGG*!bl>*fw|xB}ey3esRhX_(v4Hu#yV2w6 zV2$>5`8&D3O8&gOllwJlXLhClK98V|HQa9;?~jDQHS<3$I>4!C$DlkByRSoQUh7eI z?W0NTJLu!dg+mgmmfSmXOswS8CUUB-pM?qyQcEB?C=f;IswI#$G~2I-4{{!?0fYw( z)kqv$zAmo6<#ehqAtDrJ>#C)ONWwGU?DjG`n%4E9=C$t1^w zVvK~$%!;kn7n4UC0FPfBhiLkA?w(M)G*4kOP`N& zfAg0mhJ<%e5zKe9L6s7{Cbl5(=Jzdny{-pW{2z7Cq?gT$2G&3Hl@As@cL!O;0)C|C ziq$WvUq2*Pq2~n*YBa3@`AV;uIOO z9f%N#i`27}m*LpsN%4;w>T-)4hx|@m&#s2osDU<9jOrw@t2vZYv^g`xWL)6K_@WDA zsd-~k&vD!?U~H;Vn}mwWUmW~|D%zT3(;|}li1JpLu6V=IF6yOPw-+CEZlCWuFevkHvhuKy zWoD(-Vlu4lECV6n(P*1tU~op1im>c=sr9MB$!g#bK_>(1DMx?bG5_h{V+Q-zeE#4p zr!WLTMs&;8H0o>_EX8?5BekhLfc zw_Wbph;ay`Af=1$4E|5bm2kR`pkEwH6j~e+eD9pM&g$wG&NbF|;;V(1l8pKg`in4b z32=9D7JirW-D$-b_49Pv>nvOJUS7`2 zW0CIN^p1G0{g0sVa_)+n2hWMvcwAtP_!X;Wvtg1c5(?0$C5}8Rr7BJk4&U z+&wv@>+&^4_JH0D+MfO|#h%eaAUK)m#A=BW2hj8#R>ca7mQJlV-)%e5cTr{+G5J*m zSjg@>g2E0oa(q(TbC@K)c3oKVOu*f%jjur99MTmX@s%P*+K{o6!T#QrUi+_K20w~< zcXvUHUM|P}Pw9vZLl5`@X4hOKC0^Y&ld00$jrvEgpy=5`m9~*ozkZUKSXEi(h<$`* zmkR;|zt~s&z!eF_D{7=Wv|UBo+=hLbN|&qAiBhtT_qyuhnA_NnK_ZDDzGv`O3I*!lo~w) zu8ruE=vdkk+-{Y1=ukF%O7yzQHG(LRP1D4-p&d5wRS$eRUSE|@yU@USNEw&4_Kl+uPs>o{< zeI{#3lF1MVO8XI9BC7az3Vz3R<$$)LN`QKeqIc?@m$%p?%PdwB zdl@^Z6q`Dc@XCxhez)iEl}(T_ixd&-Jt!GgB$tv{%E&@&wYTt*sCBX{X3Bpoj>~Nc zB`ad(+sE=XD0Ahdf(bDGMrTEWA|(n}6Y+Far=9566%GH8h9`Sq>ZLU?0`kyQ;gf6> zR?P-UIR5riP#X{E)~z{K$i0Q+sSJT5AiheLv;@qZMm~o?IG+uTeYCuttZAFD;6tvrJl-K}bq*8N-v%O8%S)$4h_j zC22qQTv8i|-QU4N@uPq2Q8L~WU->mpfByDhA;61@pU1!=_OuiQhR>axoLW3r@hAMd zssi5s@Jx8Op3t?Pqfcr@DBd_>4)VBQIVv}lDpLx zg024|!Kj5zLpX>LsF+v7-l3b<+SvH^&Y6B+J`X3J^!)8m zcC&Q72j7L;wwx#@@G>!1W^$bZCwkSpl0-bLquGd`Y@A_TiA^AWCx6Y_-qtCJQeEE+ zN&JGyMR3TX#|B+wf+*D3v6L<8wnC4YhK1)w8rXEJ9u1gr>P53hd^(l({5h*7Z6~#y-VHZ{>ZFZ+4rsiGr!0H zDbS65fV*6_aRE%Zw}CphjT5?3Kb_%C|=`0)j-8YnW(G9KMWYlNAZz)@h0dH45b?&as+ zR?!y~gqB219RFe$u4T>+hGB(9qfio&gX4vke{s*UmuaoV$y%1`-6+SmR^PT)heWS| zEW@FU0gywx_pQORujP0r!H3r|6P>wO7m^&9ckklcK^%TS#KnT56t*PwkMQNJZ>ZpU~yu7>`?p(@&AlzKJ zL8am#j2oCZ9IUGdQ7_`@OQYxSrA+0JaBPop*a;P0@E*hqW!hTuFiC zt?+nDMwa2I1ch1u#%~QMx<@K8sW^a_+~*G%OLhNq6`hgv=pp}@CN@1X!t6NW4{c}k zc_*fwk4!xWNMWWV)nd_woCvJDN{`fiunj4A_zywrYoBk+(iq?ee4xpGat>nVw2@d&DTS1X@MmrpY@3FYm zs6AD+oPMH=(lNuCj?D-pzdYZ1vSihmVo71J_SlpLoe4-Z*ug>gdGYqJF7n$_p>(mbLV zmo;;}U@yA>W*QDHLG>!tjJUIl)q38^kJ2QNxW|)_ViD@l-fP1x2Fy!&`z(1k zw4J08ZI$UcI`NO79$L*>>1Am|{?94XMP0ZOcy5&I87R6ubwB+u#dS_Z^!3)24~}fY zNd;5s8w+L)RNrU3*x}oe(MGmN4ewS7qcbQkGq79tMJ!r%C&QLBz-(o4h+;S|z_Kkt=clqXgf9AF`28-pIKeDl{~soQAeSo3XK z0I!KnB2;*RQ314`x}}M?3=KzAFVWJ!6%+^?2@xxS>Geeyb0%Qnw=;~9mEf?C&ZAHE zla@lLii>wPJ^Fgvt4AW@{}&R$SLpFA(i)}j+hKkK3-PvekPn~9%1~wK0Q2xFyAmR$ z|H*jeZIpORZ&$LDBd}1gBonelLLnqO@tVQ0kmG^UN2qC{?K1iDZa8%z#%?C!kRl}; zQ?-umyY&O%DfY{56{+ignbJ6YQ@>10nxE+QKhT8KSEG(M7m;6m&e_sI`pu}4vi{Ju zR6^!soa4IRP2fMSjvQ)1mL*QP7T2S}`+CjV2!v~Syiy$xxB2{+@sO^yxyS80bXr?f z?BfWfPi%L3*8zWA6J>Ht1|WZ4#&mINr|*v&xV#^*#lZ)nS`!B}c)NyMSE=6|8d(Hv zI&vgSj5bQTU$Sb&TS{Pssm`>jBU(F7WAlLV{oV2&Bq&Pz`{H%~W<4dUSUQ0A#9b4O zGY>nM;V9trqret}A}@WMDvWcL**)rnBZ0d7LN6not9wpGH9nMq6M(or;*AEB zGV){fIlfre;`6S1(MMa0}L*oyeGaEB(m`~%w4Xj$! z9bsyEqNCQB7Qc1;ZkyJ&S;J3jF0@Np+oSe2jihh3Z8EeX)v%fR-KDObI^Ec;9bZy$ zHNK+r!e+z9{N-C#u)6qR>cN3|9=FUM4lH>zu(!fu;cYvE!J7`bI?_&*5!#aFieWVL z-jhPEBvZ1chbl~XzaEa$*%*@FW0NeY?z~&=CDj$YfZpDm?R!u@(gIPVjc;rEgq_T&x3YE&Fap;XgG*IE`=r+S%L z%keNL6NO@ev!*k-s4Q%0yo%5Y+3(Dq6>*ve3>*_K(#Hsu&_GsvL8DU*4=(v99E%9P zetY-6EU@s>%2i6$OKOD;S_U)irLTZyexD!`)UmiI%r$FIq`Q=p0?;{;8 zuSX{SM1HzEC}`q-HTBeM*5Twi6DH5lP;5J-IAA-xjg8f~AQRY3rS1dNdu8PQtn9C4 zo5G^mzpT9!|NeK4fPsLeBPU+S3>_K2v*531Nz%umxV&}cuKJAz%Ivaa%gw3tt9H&d z9~eCXr4)~@#_vtBTSb`kok1Q&J8%cb9%Pq#2|a6hc{KPK?ef#DYxkFB|0s?D2qiq= z>jnRiwCJzkec98kpS?{?ofWM|oXt8gc*@KjKjt}C_eLIiE3qonXTja%rSF2jT)m<( zH8H`3$;>G$U;Bm@z3#Wjcg4FIJFlM+&*y@>4tGiwc}JoquM$k$VgspN1c`>2XyAG1 z2^^*&ET>LY{qRzQv|eG{_C<3l<<(0)k{T?EE9_V9uXj|x{}Tf<6eG9&vld4Fej{dq z--;K%>X*HJYeum%KhXHXNyni&mG8GG&n-$VZM9SQbGgPV(yZFsU%D_1Yx8U2)<)M} ztl5r(x&~yl4UFEo*DWgTNs{7S{UGg!1sG@}7qvt*Q!%KY(lh5yr;dZ|N@S zMY&>L*D<nZWy^Qqnd3|+D!MnVm1=p9|(py})#?SdL zi`MVo8`B;eR*+@=+~3h;N!u+Z5Qt|_k3^6~EoJ$~K zs1Tu&I>GLj0p&fe)K&!lA?zoDRcBI2NT84)Hf_;WB!~GnlEIwnpt5@+&E$g?>hs(6)e-Pk9JkaBu4|6g@BqJ`8+pOy z_uB4oP}}ueY~xp}&WCJliKFza-5yo{d1RzAM8D2A_S;-b#0X{$3^+cNfhQCuk3+TR zy}x%PoD5tBOu%oGfk~LuT8qeC81xii#~Jf`!a8i{bUG9-*Qoj9riV+m&zd;lu!eCv+%6^?eJHt+N( z4_ydF_phVZM^67<;rMRKmCA*T47&>lJQ4`*6(@>i;<`$vQeQt_)7P>$`n~T0b7k9# zkE<7rt#k~CpD^Ls-IIIP-#epkotTh)E;%A<@AljIiB%~po;2?^Nw>4fbiXyF${jJo zyk>=u(SG=d1X$!A(lMk9F6uU??UoEFrN#|`j1(?t1XHWKhh^p$xWavHsE(6s5NXPC zr)OMltSt{hw~y1b7|NSd)AC}v7hUh5W45O{!lyak%ZLt4O7gq=d)s&DVRq(aPrGyT zj9a%HTW{MbEk9TP5O9tHZl(=sh3J3Gs?@7aqdGvB<_D=P`vlx-OcY59g#vpBl<%DI66JCiM zBkH*7X>8pXpP7G$z;T#;u1_;gz#$=M!gCw)?s6TW@d!$TY~~gEGz-Se3m1zu`$zU+ z&S+C{@L)zFVveLDSITcliq_R2t4yGJB)uM;YY ztU+|~FFg!`CUufnEE%0?ZAPPWx82>#rVdpPm$@Apcr@tq$1F!Ou|zyBhCW(#bbD%O zYS7x7*8K}#SyuiP?z1lOZGyq|;}uVTIr9yJ_#&cShgDU^R0hoZa2*@>>xK@#(AZzC zEwVJV@y(9g#d72(op|Wc`6hVUg=O`WEz-KsDOOC36yKMvETM9r~(O*c>~LV zQe}7@sjXO=$vWhgzXmLq@tM!VhthA7@l;URq>2FbNcjA^o1m@2N0KQ5Q%;SXWlx_< zce6g2MZ`s~O}jI7aK)bGBOs7&-M} zMyS~O@2D!pS#hwOb+PeRLPV$NT?FD!`_g!O%jldu_~TKKZ*p4 zT8_dP8h_;eIa`|t=i=_xkDN6AQIodGE2af({yyu?K|QCNVTBo?nl>FwZ@=(4Je0M1 zhk78_El$#2AeFW9>mNhDD(Ujz(arYdOjl^D*LuSDQ$KH!mL`VIlRy-*JJr&iCCNXk zYnjS^XimJg%wGUo&f<@64vI-bmZM|FaL!~aag~$51mZZxFKeqUenAskEK6_6Ge%; zF3wzKT4j*3KiR*$Q!tkf}Ex6UN6Z&MxL zprj)=vr>dnE2As$`HX@qg(&HT&*NkmBJ>M$ysgR5PSnLxibGlGY~qB#_H*2_vDsWa z%#|TfhoTl08LbRFxSc28!Sll&5e4Hk5DpkEoEcfUS6MZ<7u5r=2@VihA+bQ$>Se5M;FV z3=Gm19EdldG|Zm8anmMekbyS+7VeV`aBEB%y$vCW(1RGdq^*1)nQ9?pZXmN=-8B6h zFRKm0z;DF8mjM-se=`AJTC{5Qs$jS4&#$_*Y#%gEZyDj}5_Tx`SgTeStP?OC@)CT# z8)JVEFJ!|QN*&>m-p4#%+7`gG;^KY2OL^IKd43!}8}Td?(~} z(fo8#;q?W@_6djhbAr^T8pe;C6fx0iyXl@6KGki6<_>mTeKAUNYzD=pYUU%2#rV2= zNY$PWiyeJBe)NT)bcdGh+aD_EB~(?|UASeA6#1;o8Y9dW_}NXj#H;QD5<)@h?iE!a zecmQhz(c*`< zetosUgNK<7zNuSxj$SqQ!=2No8+Mw!(hg{~$44tK`@p5}rweL@YNsioXVVa(HRdT+ z0j3G6#d+ZB5U~4OrPvZcVLCl*`*+w^-)MDIszRnlh!DNkwd_uZHv%Jgi@Pk~mIVVA z-uXg{aF8WQdm@qrF;+Z5uG*QT&=Mht_>|7V~4VF^?L4 zrJfXC9!6J&7#NI`SzN%!gk!f;2Y4Gk|LT|D(oitH-LzG!tqLlv!e9LIZlrEP=&f9bX%EX|W>7XHc9g&^->)qFMBCYpvI>*Lb~e8)Gz;oa;uK z6`cIe?8+r@AR$G|rhnv7k{);00YlJ(6XqQ{7Q`O89GZ<(Tt*HtQo$E5vF_l15Rqh# zXX*azA%j@^8Dx7Z$$%WbaM)pQ0V4L6tt;oq-p@r%i|^??&~fIpQP+Q;Hco6-Bvu&Q z>|o+sII)wx?RKtIav=Tyj@F;fwG|!SMNhsHH+4$j!Uv))#iOd@Xy9)vZn)bi$zs}$ z_UY%GA;W}jWyZDIBiMZK@!}h8i^Lv=biw`Uz&i9%Uc7AdrYmuUT+g-6>Xl85*?RwFx6^Bx9q)Q2@Rwm=$Eg7q+)r z9mOX}ste7t$@&rn9I?41N^qA}E2;wK?VQnUsk2-w84pTj0wQTb1NN ze&oegek5Kx!~ishMX)S5sXSz|YLK!J0*Td%_(%SK_oR2vGZ`NnPkCQ(e?e5Kdn<+4 zvN;#*r!ITv+c?TGN+K(O=-<8kU$n$9USs^cgQKFV*O=MoN{FdED&*rl4*i8Q-aPXkay3W`%>rEM!b1v_9pv4;wavQ zg#~yJ{&i-m!HIT@{llj_ObE-&&gmE=&HCSen-X%(vXl97yNUEpJr1=T{-H&~U92S* zLF=bu{!i8O1t=28Iu& zHZ)P&OB0gvXm60?o_#q)Y1QS`@TE_qGBJdLcme+Gz)@8?@^9LehAwFa5k{L`!A2^7 z_5os2n+ajoG6sgURpz=ntj_hjX!(tz{Au!Xm*QRVeyys{!VX-H<;8E}p{sjNbyhx1 zBKK#rGCe+7GYn^fJ3{ULQeHyP6hq3k01-mObDx!YVm)JqEr!r>#E6iuNX63Sx+g1|wxgl#uL_0PilGsYUrnA^v3Y1D zIpCQAIK*(78hR*Qr9!ZqB1THjp1FzNx5lgy3a>YlySL10d_L7C%1~8Ds$Q?y)`w?- zA$zWOjZzq}dOq*b0a(4q1hq|v+#y`&8zI{wu1M%s$C&-VlT6(_O^PowjMq*O( zx8CCCiOq|UYu&bqlN7tS$F-oDIzk#z`j@d0c7-7)JUCX*Gh6Aq}2|pJuq}amj%>*;K6>G`5)Yce5JbqL~VPPOL|Ei7akj%Pi6zi z&sY@8iONii>;nvQrhjk1n2Aw~i8^FIO)!4ixB@)&=G?1 zS}a*Tjc%-p+7i~Xx9Uk9^Oa0TTk!h0yQyGkjK|U^@Zcq7zwJ-;-SZt3_Zw3HIWcz- z+g|u&jv3l+d~2DzUmU>fnShK^w8i(B~XbGR<`w;|oXHIk@}LSMSD967klBfpf6O zuzFrr>2vqA8NKN^>N+tp{966UT|eVtv?X3!jy^p3|3ptNqo602L%tYNok+J)x>2x6 zjU6|BrwEKph51$)prj69WSdS$qUp(LQ2a#wzYQ%36Jdu|w(w3Co;8 z?i?wSRz;~v=Db3wC4QiSGOni zb$|CV-W`U_=dQeT@=cE3H7?$&D%ZS28vI^mzHOT@J&D3!Q&~f^E1qzEnCdrS?-|HnQd3Uc1W7n=-Gnxg-e(Y00Yl(*(do?GmuD13c*)w?z&--Ld9PQ(K#$Avh zc1G0jVJ6a9-Y|3f?fRoPb1aEIis=oo(C7XW7;~)(s{Wjv*oPrg0|O_tp6$`%8%0OWpZj^YUu0|j?UICP5H9Dco$^qVeVVheLh^H_ zZ6)U~7mLliux3D0_v{f$pH#>!cOjTy$(~7Ng6XHZBPPJ1`-8GhMD%DVMj`GHgQB1I zRees*Wuq&VoF=}#MhOR2JofaOFj7^_j?CKNephsr;i4+v;?=$D7#PDmS$oyXz;7-9{-r-u_cf%>xlffF4g#$&iE;ez?Ry3XmT(mb#| zk7=Jw90x*Lp*Os4&C8!|_%z2n091vhih+|;imxR{IcxeTKRz*zEKiKNsT{|$hJDvK z2TZthW_9AT?n}fxlVLP7upX7mt1b6ThGxphIyx;Arz~H7Ry&_PaMP-c4gf!Y?^OHY z%c4o8I}D@tMLMi5X*ggD2|O6KaQQE#RFoEkvBBpHbAXxY@Gk^eh~T&|`StHnCl^c1 zt_wu01@^gZ_E#$Hg=L8?9^V#3{SC#1LU$4<26mf{ zdPYVvi69dli#lTT%?y3uUwO*cp$Qh%|L4>{q!r|P3#(5K+tx#BGe}`T;Dk!DCQ<`T zVRZyoIX8N3Fmp5mGa0D--8F!K1^5tb)33G{afPr-tp5T z_r32?)Am&Tt%mbB#?q)S(^~S%|Ml;&+RI`eEIc=MX&Z%Ut=`vOZR|W8o`#$+BSzHy z+sf%F!FNY5|7U&3?Dy`^&nQf-nv@c`1%BcGIpQ3Y8!K3UtMw-E%g;1rBnx{Ix3f>i zcs-u0eX`}F+p%YMerS6f%sTbSlP4g+K>8X)W!3yU9dR)u@sUZE|D0w^qE9SYzB*}n z;R#g*_lvjOI|V1*htIl}cwO`M<$)&?zJ6;8&3_GOuc!KV zoqfGV(bE?I*vxE?fK9}F-=gLDL)W)ei2|1ATcHE^#KskPanLE9&Z%dPdh6;h64Y|D9^0^LzKnx<3hcg-PFN7}G)=(sssOQ`lfK zH0;7f^flny0z3%VVWbK$aLp_uG`*r z-T92We*w%GtAuWG>0DguL-(8WnARb3qP!;rG|6v;={|7^sTQ=rW8ETShfRtoTa^-h z>)w%(-WMIe1E%kZ&Tn_;^#^P{kEY6S8qCudo%8M^OM3S_?bQr_&F;5y3dM5dbzSH> z(^u!VSOogXl9lTKAeF1++Hp!mgK;@M5&Fd!&7 z?T-Q$pU`JL@vVO9(W)jixbAt_IVJi)??);?JZg5?>#VW##)U6kvOHnKfUb&;>%R&x z?L%M}j#2W#E~>tJ@lgOgcG3GUgMgBOB&7_sH?%u^u(`rDM}Erfe{U?Z$uZ&#pe-Vr z=?`G79SXgYk!1SrynDdK2hriWRPyV!+oIo!<3!#!{Uz5Tl0w>tL`DAK6j;Gd9ylCou42s%@^73+05c+H!W1#WMct-JKyguye8< zmvfN4d8k-2G!Qf?!2|}!HbM|tIxa$cE`4{k$-IFue}<^E$aL@U>xdD4wN$4vgP4;} zM4$G`1>ahH5#&A8D2F@w0m1hINc*ym0QH38Dn9!{TP;~C4JttE+}KL`CzGIkHPD(v zp_s$~AemAw_y8>{pfrWo#69+=GYfFD=Fvq;Ixq71EEii!vk#1SKLGxfBA zYW}LQn;fb>L5|_$dbM0YSe+PN5!DGMnS8c>iyt1BMi3uAi>0pOU~t-t1u6m3^QY@= zd(9egF85?z)yXjO+Z*-l8~htp9^ZQQ#S{jvO_yM$x!i*dU-dU5&imR zw-dT^|LpkqOYNJMubF;F_g;Q0b|kMv-@rT~ZD-okQq+huhtul7Zau)i9Nzu>cbV#E z6*J$3hDudw)Os@|ZPG;(ndu=dC{X43)Gz}5L=F3z+-g{dx@@eV|0BcUx%0PDp3aM> zu_i;Hg|_T+?@VYDE&MrVa9)Xtz@a!7F@AqpTvtU5a``i3k$HLH1k|+hWq^^XZzv}k zlFM12XjYV1`SE~uZmh-D_%8&$#Ng)%aPdXlOKB+p6vT~ikMd$oaW<>hk#i1uqf&i( z!7@z!SKf0qKO^JPQsXlV19o;88VMuxw5v<6;pC>iC#svV&w7~2$$sv^qg)nneDHG< z#tKcT@&vL_jl@}W{?XoD>Vb2cepaEk%l&V!Ki_VWdGU4dG+L|!>#uye-u(a3_9kF8 zuI>BqGS9M26>S-cQkoFWHcFFBBBLMocmWK1e8sU(#`gGLmk z`F~z3d%t`Ce((P{zVADZZykG}VXf!6pZmUs^E$8ds`8nEW=bYvi$?3rGbc|fGtpgf zE&dbvM0%QttabA}-DnCAc5wcC*H3fm?VkUR*_6A1;PpD*1Jmb@?GJR$ zFD8M_-gZy)mb3v7iILfLu@G{#b7Od74+})mez==>>T*;-*?hHsMGuy1#mxTW)s@yi*pv$C% zCksr2<3Ope%J=d>28cBK`=#PA(PSJT1+@|C?nYc>;&A^MxO!ARL&M{BkkQuSifPOQ zBC%i)*QhP+6sJj6aM)DF6cPR>xBpPf6l~6v`EQ|h$b*v}XimbRiY1LMc(4j8NdZ=e zJk@Ss^xBAEdL1J`wQ2*ROe!QaqX6PJ1glgVIRrquohlXgrPt$jw6^}-yRKb3CnC_L z{Vu%9z(B}@q=ooKPqdCa|GO?{cIbxul(m<@V(J^zbAuy>I^ET+XXgS2S%Bys!5fXF zq#8UjY;wH|)h0+Lbm!hUe_-mI-Ty+PHE*j^{A-Av2UrZ1Dw*rgC3qcs3F^?y@V%IJNZ#_<%w*(#dSH?6R{WZ!;M2L8CJ8UI{ z9qK`7p>6B9NH;!NJQh3={>!L$0M{qf2MwhV$G)WZnZRFTrLlTveZd#JNCPQZ@9B+^ z!45vZ1_{TRHQT|#%);Cy!2bBh^vp!I55#H&UHMQcm*67M*7yYCKW_ke>TgcxLZx`~ zuc%3tSlhPW{BM4`&8>RQ?-EKl?4(0+Dy0SEFD(XzV?{y#a&=}&hV1Y98eNo{z-SV9 zXi;@dPiI$Y^dJ8sJ}=QO{`D>lEyiDw8gBQ0{nyGm5{Rn!#w& zz89stK1Z_heXZ#$sHkHeXA^tD*@eIl8WbpzOc9L${8zQ_e>~12cJHH@_2S!3v0~3Q z9f?Cuj|LggWoB)MC;95gKI^~s!t|chw)=0=7p^}Valp(B#AmIW_izWlUgL@0_+O^@ z3LZrI{|G=HX+lvS9@$QGDS`6%(D_ zI3M;d657w`xoKH3lhN}?SvvB{_28_{@U8&lexKhn?zz;%)CzcH5>FJ2(iCZG#T1S( zETssnPLZx?%WXCSP{yYU``9LlKPuFX)KT>;D-fux~CJ)dP30k9c7D zV=n*io-}_0(?5CsMgu5$UcN)=^|#8|u}`19fREt%M9=x_jH`-P#)9hKUS6RhGMo5~ zM-@GNCARdI8H>S&K+F7I#DKbps@Y{mvKD`MOUk`@5PG06K>J_Tf&&fn3{^1{S1FzY zq^!q=yb-s_E3%=Ji!blAzEkXY*r-BNi(sp<_H1}+8?q|AwAPM;wW)@VirHFIs*^QS ze&2lNKaB9dy90cCiyX7lzA4`TdZq`!$@Fh{UvVCtDXrrqZ!k9U#b?pt#c^OA=KcKW zSBi1q1Ga^SzjwyvsY;Coj$^xzTZuQvF6xq+N$@^|_2BO5g1v$6576VN6M!MVjeP@# zaoUyksRW^QVyNb)MVSlKnPmMT5oeSeH0me8w zWD#VjH&fd))dI*3Cg}FED_{5##l@sL5O{pp+WoH6x@Z1NqmFT~{11gdlmE{Sz!3z`YpoLRZ{(srV?4J=}k~z{kj#G^I zUK^+daS6uDMZO0Fi!7=fx^6h_`^rbf>+!-O6>JDnA74d$jrujQb=F}96`C2a#@2iM zk_0&KG!Fhy$>20&mniku?N0o*&1wdpVsIuDUBr8U0a z&M(aO{wp&7_em2Q_Y+|2LhU33WAUINYV1AG%zpM4L)$8~mzsq%x*7mwTq5te8QnJw z-vfTAh?2kBGJ^)`AsjG8uNFGU4Dapx6U9JJaG(~-{PUTP@3sN9H8@?u)wNT#0w3oD z?kZ*-2c6RP<0i-RON~T_v?HfH32J+NvPvc-#pTNXtSLmYR~-KlVEl!G(4PqmEZX(2 zBQx(Wg~g`h>=P}Spr{u-OV@T}kfw^$tLy}s%<+QO1`? zDj;p0h@zm33#O)`81V}(w+ga4HBIoo;YMWDFRYpm?#G9@Vhj=43-kelPLcrcf}G(F zYQf{t#s7MKB!ENU+PJ*{Lk_ARMxb`>26RuQ@Q3m2s9}1Vfum`!m9*z40oeYNw|Drh z_}}ZwveIpB4?2IiHgr>guY=gP<-hKJ{2bsf5ohp(R2&Zh{wCI)kx1At!spXq2|Dl8 zMhLV(2PJ$6d|17{sWNls%pNCwhF0fh3F9mG+~armX~6aWhiL&%z-GkJlCr%MgzZGe zdnZuhA;;{?VjD8M)fKTWJ_@%kfzXD=8uSua{33b$J3K1dpd>VxPvGE)J3JCbKNHLL zzr`3;E30O5>^_igMJkt7#rtVd z_QC%{p15(x7koSxGGlHiLb2=1dH*swc(>i-ZhW54)y3a)w~BviX!JYO$ny{3uYZB+ zY~)f^40VWW(y{mMi4iv+=$U1{Aw)aZhl}J54gGN~Vxt;ONvO}+M9%bIrUJ}C&?nHT zg_w9dwY~!xMik(CU`+*A;^vFd!9O}E5{ucvfjG!>+z4>RRH{=?4ZeX`>uKv@cBcKS z%Pn{&+a%t!c(CD6n`G?p#y@~61I7rdKico4za{7)5B$^kYaxRc`>#L81yhXq?9;__zBd>Sp4j&I>59Jqz`}!Os$( zor-1ewXhpze#<=k2TN$-`q4T6u9MN_L{IMR?bQa{`VXJN@I241lKUhmp6Jl4yx(z* zCh9h|?l^xxi)uGNli9cTE3N<(WTkvU^?X~!8Mdv!ef@g<-guDo zclM!=#u#G0gK4dck#?(;{P(f<3Lfdyb@g$<1*`un_hkH7KnaDw&T)DP@k*E7U@&4A zv$-{G8exmOq?642I6KffPT(rLa<^E70kr4ep;6EWi7N%4F7GZA2KP$~xg8GA*pq*5 z3|d|LMeGyucUif2yzB&QbC$cw^~>Lm{l70evEL!+AFIPA!(sWhi0E%v#JETs`@gRO zekNL(0!qAEKO9RxWIJ+R+IeISb#D$2e^$j4dbm{c9GPOcT}q1oX#!^6GuU|Z5B_Ip z9{DQ}vfcH+&-^}xgvH;j@5;I4T_*tskn-3jlpGBEETE z<1f{Tv}Cvlx0^D?hG`vHf$L1}LhO+wa8b3!(Ze?d4vbqV*k|;iIQ%LhLWgSM{P`a{ z?tvHb!j}7GgPppdWhi#RE#RW$9k$$8ME>(X&bRqyr=gUB;M^m}Nh=t^NhsRo`|{3;Ico*u zG4XjW?)&|jk&sRPiFt#k@PB6<#D zK+B4u-UFI62N9PQCdf+JuV?{5ry2d0k#0(mrO?5yP6K6%q*)1~G4cALOSehHp2!PM zq;>x@+QsN#wg1*C0mb^t{kC5*$%DkK&GbntYFxl!w$KUIdY3y zE5JP^F^vBN)v;?o@Dm?SJQu<_R@ssIYFjqf3~P~mcr1%Wn=!ymYt^TGwadV*4v2CEQDIi)Ob(fa#&cd9JdfV&z}1m2Q{_Y37pME%w=9p#-;b!OSE2#R#w+yRTmc z0Lx7P?9EfxKz?Ml4yK(O6Hv9e!Kaz?lOPE&53bpEyVAbs-2mSBvnFY zeb#T4zu2?`Ffq&Q?Afzl-DbDS>tA<_-YG1odNfD>b?x@d6eb9i9=o37Ygt@}7O05= zE{<->I5)rRq>mAb(T0jx~`lRqvWkus< zorYcCoRTNn5L7ZSuKYGCYg6X`<=CSS(I@%(GDv!m7_OUHvK`C~UW1M_f`{maK-Ag< z$Ewi+2vN$^%)}IaDl|cB;V|IlyV4vC-$M%~Fg7pkTB+r3;E$mC zkG~#+R$a)0-~JP4PW@-h`Tyb1=Ob|IASNDyB1-;Tep4g|C|kC5h@t&ri*wWfjg|u+ z5FotQeC~vHzlKsaL5a)jxzJ3RM(^_$r3&$%l!-uA_fg>sl zoYKA$@dHooIoR(b@GCIaBe8SZ=YKGm$hgQ$>OvR+$ESiKr4HuZ)0Kdc@fj7{Dzt0f z#EvJ_aEfyoTI@SOQ=H-~%NU?>L_xjieI#*Blxx4Ca}kU-G}G7y)o+gZ~Mrin-iq}~Ao5RBbry1ex&M32yZlt`Zfu{f7Mc@}IL1vKQl zG9EItR3%{tkO;Sn9)XL?*2+XPBPf;BZ0Ni46FF~aqo#tVK?NJECL~osQ^k&LP>dp> zV0RF0X@bI>WwJkST$tAQ5zQ#J~lL?Y@?tZgoizKF3)GDQPf%P03ZORe()Rm#7EX|sz*lxp9+eA#S*N$ zLs_WGV;hu1w*b}l)g(X$y%9V~Dkj9k zT(M0D)kFe)i-nRa5vSQlaQ?&&qJ}w|6$pOfHvBG?7ZFTA3Q!{1Y_ITQ+6YVvTZH>t ztqZxJ&(1EN6V)6CkE8=}b-GqoR@aR&y=D91!-q+Kiga6W@+cLAb`@r(B!VGyu}dBu zJ;AsW(2LTc^<(VOmA+Ox&Y$Km$$ZJx1_Z^ti9L+jyWKPv8VTV|REiFRLNI0iHVsfc zg;XAVIK31!<4ChSl+JGXjE&L-BhSRA#iQfZK zJn5Y7U2blnwnO*=wq~+0YbHH8BcnP+(>097v1|3b01{rGoUwpZzcm$_(UWrtJu2_M zpcsWMk|XOyfds+-B$X2pYck$8mvT+Di>4aj-ZoYneM;}b8V7@~Us2bItJs72>ioC2 z3645OO}>Q}wM1}y|pk77)9fCKTJIBs#M z{U|ZRcfB(Ou2aBW%LicR+d2EAOR`d zI276}AidrKGQ&nHu;T~z9?gFCb6_AG?Yx(va#ddE`Y9j|)iWh#c4taSDTXKIcAw$X z#yfa6u%K8Fjt0}EDiA-CjwJ9S9HWVY%n9s17=`kFp|S)l#|He|#>_fQ59+56O?_`waK&meOMN9}}dWUl}YpFdv(48$cA zn!j{R_GK>5VR^dbxOQ9qKta@oFC3FvuHq`f=>{A>zk2j@XlSWQ3zQbVOOIkX{P`T< z4rqZ%KG*?q>QzLX?qzo9fYQdO5Zuumhmv)fm*IMss;m|hdo>J)$@%*>jk>N*@Iu$$^uW2x3MNU;ra8MHY*t=h zuB;lDSt+Xc0elPQ&D)3VYJ@zcsjV%`d}fwGGz(KrfmZ2MFPHaBhw#xHUmFDT}Z7gfTBg-U<|4i8x+1=VXp};V@8=LJS6!I9TkuOnnr!3Ot}Y2}u&*I2p9(Tv`Y- zR4jSa<*_l-4Xs6~iK(D^CkLifv~8g3^cn-7B)^?mF5PuC$`a_G6bHf(*@Ld>H%$OaskThXS(j><*aFqZp5Qh7{NW%5 z;p)+rveN%u-0gFq4l=X1L1ae5nCDLDMoA0r*1c%&rtx(B* zU=hN;TtN}p;pf-=n%P6W9@x&2DcEXUDr5(gm~`KkwI84PVNK8(C2RbIG&A|a@jfe~ zTt4uk#zy0buaW4`7q*$ZT zD6)Jqtxk15ad)bps2@Go2!crM`YWT@dlE=$R%04@M{Z%0+z0Pxi}YNb@_TQf^L6#w zwZ#d;2QUxBxl4knzJ2>iz2kLUX5a{1v6;VS6NqxRg3OVdM#Rcy4n+h7?S~PtM4poJ zdwAHDD^}=9HHRlVLfyh<6MUI1TjzIY53qbjaaSZ&riPi{OL3m7<&WilI~`>H`0-

Im^n%w8^Wi2&lmcoMf@I=)D5ZwvF!~GiT znvDJ3&dJM@Mm$;b0fua3mg4z+NPpRE9}e@$ef4E1{|Yqe6r?Wb#wpIL?nB%4Cbh?I zh+UPKw*gONn9gf(tk|ils(w(Rkln}0fGCf7s5tauR6Ng0rV|VF5zlQj3;tyz%k(7*Q(mhUPek}E(ROmu`9DLo4q3oinzfRvBBK{0kS z0Ud}++CdHPe;1F<#k>j6-PFc|OUfj1S3DF$^qQvwt|3(cw`=`dzjxQy9_j8$CEY)? z64Rt1SX=)13PBY}IBQW0`+vQAR#U!GFtBb|2v<|#wXuTVOjU#zw zaAasgSBbzf)jfIV9JmFG*fwvz6ggz4-`&4 zo~>5F*?WUvklx#O__*3w*#!oc4XV4$Hj;`T25&q>pYflMTf;u?OS zE&br(3nZ8gxdRJ>SqBmPP>e*A1Bgm$bEzr4>`gY#4UGQ-6~tSlHw#StLaymoDb)2i ziI+tUk}Qw+otVnC4e^gQYSlF0fVvO{0;l+HLas7X9T8T_I+rzpskPnjTQugjKEt8a zO|_tTE4P6|FOhS1l)o|JgE)z0R|_a;qx<(m7+>G2--o$#LtRn_>F2G|MiF^8`do43bQhl*fb(5%M$2L?NxcsowDtcuL`B*CON-9FA;r zIO4-HPU}5zz;+0##MJ&kxgwS=`%NOuPDXS0b~o?EekI5D8qBNqmUP*h4UVyFdI+i> zm$ZlGV^kBGmk=te!fi!5ug(!Y#1c_JcoAO;QAPMF(gx^5KNn)oBPhNT+62%BcK+5t zZ>tdU4rOXmpQqcn*gB(~XkVjC3 z^tG_KqSTxSoR{6GND~DLFMCGiMAw1qH;BHMO3WFcRQnDl0s<~v1ug*wU^Y-NDk7i} zR>H4Bl3R!u!j9D6x^>I%{O=4^Kd|tQz-~}FABo{5keJm+yg(TSf)O;HX7VV|+Kb-w z%Dc=}#}2H)*VT_??L`eHJ?drso;`PT+Hzy6RV<(Z5nRC;hiJPJA422vFl0)H7CWek zzKB#M4Jz<}fwPT7NLTH-1LkXyv@4V{{gMhs-Ctgl>Ax{pNG-@X6u4XdIb34POEQo+ zs0_B+h`HoiUMLK^NpUsYWi>1l&6K1}!`u7jv_FqO_1VQZAA+;fhKtuj<>Z0Omn4?_ zkUDi5y@yoPM1^YPv{c(B-E;H?nAE~GYd5y$A|mBeLGy5tvxjptkSSsd0c*SkZFw*m zjU6MfK|;){+oO)dwBlAY+lz)!kg;e3(H*lR|nzZSnj}lbR9M z%YP9s3yI3uy%u_IM}DM6Ro2MH>3rjZHo6mPx=<R zbQ=+<0@aDv7#Y#7+c2{H5<;Zp-fZ5xb-%Z#j(~@abD^Ifc=ak&tW=r#L)|jK-dt^5 zSm7QS(1OPkHg>K#Xu?6Pa~krMDyMg}zZD_cZdg^qj(`6AISUOdzL`MI(f{Yh?-ARn z(BOAqHV|5p7{L~54c{iei9DQTT73=D!sRLt(A;0f`(27!GLTeKFL%vqBl`z|!$uxQ zZ4QDFNRtPM)B;(t)>B3kG2)Bd4AJeIyZd6{O|iAHc>2nUiGy-o4%KM-lw$ z0)HiB04Gfd6hf%}UHs14u%X+nu0G~@>e3HK{mUUGn}+^1BqHkMQ4k;W_>_-9o&!Yc zlo5k;s}Me##21;;`L@g13kHGC6}gQ$yz;WOLzCD&1p<;JZ9=MsG%FA4J0jp%Ve9K5 zkAf*ptCqdjT|OJK3Psfeb+hhczT76Y9-8<8AuM(Ir5xNA*pGfcJFAKHUF{AI--BJJW?3l_BQ6|P!`t^}mNrObPyFumUaJ|JNS_jMQN zyb%Q?R~sEaYXRz`ZX1I)e-6MHeTN*-!5mX>b`V2|8@L1HnP5@eZ7%rvRro*-E!Su< zqj4e>$OKCa`2iwPe;}L2dg(i>sF!+32#{EyDI3H;VFZX<2T8+#aeg$XD?|9XGwT!0 zt|U5jb&ynB zsWt`(h%B=59x2bhEF1Zj)Ufim4+ z4Y!D_A>lP7M&H56xCQ_D0PdA)y|l8R;r{Sf5Zn<|0U`pA5X@<2x?hT(RvmwfII3ed@zOJOV0}A?7Mk z8wDxJ6Z(p(1cex|SzR|44^8lW-2!wrh>r$s5X6G~{|4gk!*mnqI`H$HO(qv{9jVbH z$Ut`uX_BGmG!B^9VQ`(>-`f_7EY0VD8h=#Yd3GL_^XkT4^*iiJ4YUXB5={3?$VZbi@!~ z(_RsO0XfTET+3ltC)&1rBkOkz5nPBuG8GdMe+JP9u*JF%FCZj^3P4+UQm1wIt`5XJ zCfbD%T-pUIgaT5H#+qKpzmX(f@OReA2AakW2ix`|6^QfkJaJAmLvKef7rUPW0-%Sn z!};p}oVbNh$S_pv(AfF{t;N_*{e~kWPy~Jh5GMu?w+$wzKbT}faQv%C7?@C2%=N28 zsN{syQTE!V<;%AqAo+2(aTK@NhVRq(ANpm~b_$k@R>XkHOsWMNFb@$reI1x#tA(~+ zWJodS2UF=Wx#bAAj<)qc2=d&35jYIA$m(6%ZR*E>V2(s>Zyln3YWJe{SArHDeCn_w zm;pG+3;o8V`Ul$R4^l@#YpxEmjb)x+A8ien;bo}a0UpzigTZd0!6~J&NxKjkkXj!r zELu_OG!2w%wx>~A;Immrmgzy+i=q&m-*SZe@i=^Kz#K^OGsW|T$8}^-@d#ZBqhtP( zfF#9AEY6#49CCawauQ2f>8;HMAK|=cST4mcG`}3v3QqydqrHY1fL2Hm*pNmaEEru3 z&16BTlRXp{y*X{rEc<4t_X%ypuK;*CQs@P11uya%p|2yl=nGL=aq6fCp|Q3 zZ;nMp>I}B-Y{pCv^Px)>Je^^?VN~)5T9!gnd=L6~|0Go`X7@wIl*+StX`bbddo_Zc{my-9^XIXXvGnDzooIsuRKysR3U5S{gN%p*RN=kLa`L znyi4&Xx7cgn0DH_J)mUxJA#{$M(rus$H&(m0c>6R3?MOrKC|ke@5eK{hjg)PNQG5lTHEX>&gE6xb(1IhB@MNpxTms!0 ziqsmqWd%v)zzl`NKY&QZO#pv}SrZ^s@{|U(5TvbL|Jy31i-!RCZ-&RMM5(+-Cv3wq zt?wl6H2#Wa{9pOcCuOuAKYjL$TC0G7BlOisazcSC5nD?TZF^{`Zm&s=M^PZk%nMtM z`tot}IzWi26Ps=U-bUt_5xF1?ZvnEsj&$YF`bP@b7+2jqE5ESw;~)}AS~!BJ(Hb2D zU>6GEqMtWo&CxZ4ad5lBM2u4s*L(C?ll?`_gT|Yn7OVroQqmzq8N)bZ2y>1&9`u&U zP*ng-$Xy_TqS~Yb{NOw6Or*=)e5?UkMN-hfQ6RZf(A~D7>n%UJqa*>tJ&ci1;vWUX z8-YKl8iwe33usvw#a+Ef&*(adI*Eh{J@+DmLBs>x3o#iG7kKCP()|9eyyyA=rZ)qdH{-xS(rhUSTD%h|e_<5bXl(jTQ#hi%vOu6y=UZ z34Z>pCZ}MJs1l^?0MkHz4C^M6IA=-dd$AHI_4O!a)h9yx`m>%3X&Bmi{ zfB2ZM$CH0!+lJo*+aWo@-%;tbQExrF92bKh@!Hiss z4TsX>Yh1;}j{60$G^A8ZTt6tS(W417ai>T*yPLcWH5TFyc46(Y70Ho9B}46auL(!t>k2@#<|Sq(SRO7F69q7~vZ6o1 zm6khvfe>GW%{Bb&5-8+ANW;MaLDA(2lcruqKOdJ0y%AQh`bZQsWgY@wZm(Jo6TF@< zW^}r20&J-)fheU+JZ`*(fMQY$vee$rrSs<>fa&d<^&mf2AJdy!4f(=h*DUrP!}0>} zMRfpcZGj?R9LAymL5FlM@Kts@WwpagMC@Yt!Sx8ZfR~~yu8x-7j8?Qyt|uIvS$Z(c z*a8TUCH6ehjREISV;QRFD%Q5t@ zcjUgmA?tzea5*0Z$*b$0yK}nB6>^~smTT3E$&)8v1{8G}g1uKjI_MouB7kPWqD2S! z!eJuWr3j3NdV+vtRW;Xy<(p#f=gC$B##2Z8n{5An=s-%nNkDoZWRAv98>$-s18QpP zMfT&?)eCQb8HGcj@i?PY4CqPALC$J{Sh*$=avoQ3zVT3ip2p$OwP19ZIF6$@Y>-r; zt2llqBv#WE%WmC)H7R+oxWUVzr@4t)iqzG?ON-L)AaiKAmLmO; zYbFF77qc4jS&-2*uW2QxFU!*js9Jjz*5xWx2(La;e!NwJs-Oem4S(4Js-sCe7%5js+CE8f>@- zjG8FPfZkpOkF;aaktpB`;h;H`DoX%_|MspsKM=0lLvG8>n>R1R_O4=8hj9XhN2#jqAC&GYAo_`z;X(M)sgT4^*RfTq+hwgbn(nApS99=1d7@DPl#rX3Me&ah#66 zx*-Q-(G^g9Vm)zHQc?=-LT{i+|58RBcI;ha1a8w_60At`qc6=B!cY@4;ceXkuA;P$d-9>sd z6kfaAu-ayURK4@CjVKFsUoo@H+Lwfy?#zOIV5PDIfIaO{$?iyx?iU$(PT5A0kJtpY zP%oMpx9Wuvw*G1iVd*gHo}f=DvffGsOui}ka(y|Ef_L3^jF%-dMn?hb3b=U2tXb=s zrG!)nDnLTAwd5?U#K9U%5H?_oxX{$xi;UF0c)@}Na$fnZksf6oMo0~){7`6-8jh)X zt~O&36;Ovbf}+tzt*_uK@^6P~>bbx`Nfhj|YcLKgi9;2-%qW?_zqO=y0m}w1kEoj& zD+}c~WeL(~t(GT!>i|9nM4c_JNsqalLN5V3G}U@Mf^8jVU6T@r48Q`>VfN}ehYGyw zD#6;hwi+OuSn!2o-zuoN(Z}M+K+{JY#JcQ8{T?$!{bW;N39rCLG+nke#pu}}b7=p! zr`w<_O^J!8VHaxO?*tgS>PQsY8$o^MhMJNkrb35zWn*lTtsUpI=*sKH!#(Z@=0wHTjbyxqpwE}|3nk4G zAQmYxr7)zQPJ;ALBa>OK^;saDCL>t;9U%@4q2l zGpbKI&f{JMnm~hMhY?k*2IEX8*`)qvT-@SUAuUIXF0P={3Y#EPeKKdLq#F^AyA9xX z7RtStA0gaKjl@Cb-MPIQ$zHhp{U|aELffsHlcU>#L=8Dz>>lzz<>}rs<6EV+=3$sC z(zQVC)!}X{)EJO^sJuq%*qL5$oPohxnvklv0T08cf>`GJyLd#H%$rE?HE@Ez1@mGS zJ>Z9HFvp3o>Md>3zvL4s$IyshUy?yn-vL7+IE9V|Xoz8)|Ec;r?0Rn30P>Zu-}_bq zJ+RC15TnT_x9L;;8`n-V=8|nl;Rt~O!fxyjk>oBKI*H))ASovjTxKtK!9)IC=cqSD zK>*c=nL}cI@t)W@%#^Vqtga4xB{3#9R(AoG;Z=E!{N&ORb%w3TyJwF%F+r;U@(PIO z5GeM%i>%MPWPfaj{kt_VCC@k4Ih4M$mOORljM|`OWHrJjpd;oo7Y31}^FhxK5rO%@ z@7bi0PMnn=ND%Mf97sjV6jWc@oAVaF)gW;X$~?Ld7wiwlWIRH`;E3S9bdtm;Y%wV} zQ3a)tMBssNxV}#pBc*y+M(A6aKzpM!Wu2Iq7MTfD`%!Uf7j*Gyp+uZYY|kAV&cI|z zs0{#o4n`bF^DUio*#z{Kn#tpua-gy}y?P?An(k4y$44b_G)7oK?-ryi^ zzfYx(CvcInQF)bJ?_Wj`h zkv$L!A(2Di<5ZH+1)ocJ2#+FQnVcblS$B+oEnC`8$!Xiz*q*YG+6FBRm0A4_7l+<< zXJKRRAmtF)=0RE{YO!NIrY)(!9Rg zdlIh@oB)`HT0FH$_)BeY&2N|+{6Tmq_xo!#5>Ww-twp8u%<0os-F4Z(c^`CT)H*_y zU2_D#M<$XUA#~qznFGXGMiOur(Zvx!OWE&F44Y^k?K%Jmc+Cn2=e_+%I6ea^C7FA~ zbA%1-jM%)3s+uM~BTmUf6B(`BJO(ZR8+6Zi+}zN3Tdj(dnbJ4o0*}-UR1g)BHIxH; z{Xvdr-94Ms8p`7CsE13Cm@;~vx@$tAZUN2w60R8F5Uyoz&i=5yx(Jd}et+ncRM$;( zU_!sr($a(rs)y^&K*RYE05;l0;G4t|0ECp94u*;6H^r{^c|*DPC(3{5SkcE22?W!D zG*rmGSxN`KefyTjbc!SEPe2X5JQvzbK0lSy%YO3VtFZF`!Y7X^7tK{S99mZT01igY z9wDg%z-y28EFTm=Ut;C+0~H9Q-6$Vjyo>f9^B(7?TmWa#q9BnpsI@m&Ko~x3q;t$s6_VboR*T z&7ZWz`H<+?H^f4ehpoq0UQL6e+QnI%U%e53q~uK_Agwj^-Ag$q4FvkB_0Q${F``x{ zv&6|mdEqk@s^3vS*Ji$v3a1OqM4SIY z2b$pLjk%*V`qZRhx8)kZbWDtek1)H2$w?}&L4e>_L6P)9`<( zjQjIEpglxl0B{^@1qD^8*Mj^j!vAf+IQy4WK@xinjkl3Ty_@DE;|EFl0&{kZTih0X zu_nNsl;40+(j4}e73oo5j(x5x;8meXLi{SwdDnBt6wX1Korgiz9LZjF{mY>yL}Qof z4Ke0`hz{^9!GL$L36ikaWi;(33H*-y;M#E7w1MmfHdl!Z7L;;6!b++IKw3&4$$Uvyk7&DlUDpBc zQE%$zz*FCYe=|q}hic)dDxi0pl-i&d>Ha=zf^T0>l%PJvT?i8h|CRqDS5)Fv)U|#L zdCME1q9L)~h9_JRr^)GJ5NvkePL zcD)iv31O2aKj8mwP`3OTRNA*KXK!gFCSrmU}eL5ZPu z%V^wtu;FsCy;}eQwgCME{U(xVTA)F4t5`sQD$i&NygGz&UcNbpe|F<*LS zXsLC7dn+Dq(ZJDj;;WCX8lb~M)F)`@Ux^*w%DGbxj052Krlkmf1MKw~Ek=|qqEaYE z{3Qg?VgT;@{FPi1z#a@T2b`6sl)?+`*^?(va;dZ+8fW$W>@Igi>W_q?X>(6LitiLh zA9<>BbX6Tjy#(;3B15shFo0WJBG|Fj7IC*EJoZMWI!*=gkoKbuqZG&667p)@14D#> zSVrL>xs7_efB*c!DH7YTR#}3iwIt^2Wlw9zj0xHvm_%2a%Q$yhCZ(nn>5jPYVmLpt+scdlh)Nt6q?+uURI#dUYXsDGE_! zC2R#SLCUa5JuXZgQOZt4sp^J?hM_ja`%ij!C$}Pl`!*aPivZsO2fqbvf)+SHfRj8j zXtzY-VC`ko1oO()?|aBt^T>Wf&@!hf{?}( zxfZw>Yy+GX;pXn3Z-^Bs*jkPF#Jc`L6&ot!ARb5Or*0SUt>5#3+fma8D2B`L*#T7g zR+X%imwyjjuu4C1&w?dOtnj>2y|zgq^umm%y6uXJ;W!B9HK7&L`%waO^Hu;tSo8i2 z77>r;KsKD=CQ1{TrAV${B9hq&VIc$9GDP&>>&~Dk;o4_6R<9y~xZ;QO1@P-otG8G> ztx<@t?#}~vG1aAJwc4yf-7T5Osmgm*8*3E69q5z&?g@VxIf%HRfX$o@0IgY?MO5UD zZF_M{TdybLfUK62ldB&3;GF+-E2vS2F^+@MGz>CfrF3mw&~4d);vE=u>kNZiz3MpJ zfDy2EL$}dk)r;?)HS23f>Sl~0!roBZ)vZ_O;4TKbD8f!|l@^4v{qOd?(k=$cl}aKi z{i!~vf|y}OkTqX8TI4M~=3pD%uXY2aOt#iRb##(|@gdGsz4>v$c54)Xg*NK?;5ieQ z5Gbby|E#irjqyzZm{QNH@LpMh=DWNL)qXs3Ag*Wtm^(p5)eV}MdLx_su>fgGjmeX2 zmS`bYDvBEUXQy6VlNMIkS)WslGjtib|C`VX|un$h3zqDFxNHEt^W zbMjf5J^>hzTh$Tq|MyaF?@J$py1bVJ@= z?Ag|_W5@wP>;p2Yb`AYV%Iq<@aN8?NB>Ou)qhNM{N>u1TA&>rAp@0SA2we=2@^8|a zL&r)+hFp#S#zkg-9fSjY)F>fhrh`J@hC{EG%VBCcWxG;C?ySW;Ja;KjKx-nTet#;i zbDtG$9&MbPR_5|0rcdNjJe`|#eU!Lk@6l8n zuqoXZip~<&MV<=54$$*DdWunG?8$XwTX+M;R z$%!{R^V$!Vih^EEUJ`V9C1M7_&q1)0$Qd-%-lYGbmp^u~(a=fw`(o`VTjW+XZwxR5 z2WSG7nNbf1+X_ZNl}Ts=#H8g=mZC}r_^jxqNCLBmmK+w`kae4^ZQ}Z+$nY~Vhyjw=wB95JM7&;dau(5Es*Tx6k zoI&Pum&ty1D#i1Dq}=14C$Dnnfy~WdRo}*FEGVu;PBEcE zMA!5)4%d$HMR_zSnfUZr;e(WlrPmb?fYP)ip{o7IV57FCT=T|{L)N5m2KWc2JlO_L zZX^7%4!jhVLqJ(aDN8R8fko56GFhmtM~)EtnrDY6mzg^JQ|j=`W<}KE{P}v3mRkrW zBOa+f%ee`)0u-$L?YV6@q8+armzwMkux&`Sybv^MaCd!9`3Xt}GFn05dJvT2<7Wl9 zye6`G#LcH$z;09j*(U%8J#0KMlEwc!>!W>lb%$P}VY4nvjwvdtVEH8~s^i5@7+hrj z;xP0tQs&(q`hq4f*PU)={^E!=V}k5?Z8#W(uS{TGASH=7rz}4XhP%O`w6hKX5E?o* z9@!K5+`8$k!3-Xoc{PW4XJDopu-rUl7n4|e$0+di4&7WhO?!UI(egJSHP1TZ?N z`cFfz-W`L+AM+-TnTO3%7{h0(aB`!oW?y+HC;o zjk==f?4o}GLx|Bu<&ewK{mhnYlX(sD@9)airltKuB?Wmrvu`d-q(eeV^yyFuat>clBk3fDwHOu!5as}94#8N4)%vfp;C#mnz;w> zT%OI*q;-Wl9(vd#NoXHcQQ{3mHr%fTs71?I-V8?q3Aoe{jk(#OTMVRM;)@p@OBoM%1%8C2E{0)heQ7u?JS6;U0<&2 zf2}6!w(h)@zc{Qb6$Pe_T9WsO>E#9j|yY8+N%tRwk{ zK}+w@3CpqzLE-xgV~BUQC+_i!iHUhM)&&yj$F8n*eSLi)Z(1&cN5hzBYHE67cINBXyTkFA zB;K5V4*AAlc*`04vhvU8OMb;+wE5=AC_Iv-&^d56ShR688bxG3yl`qO4#~NXe6+(h zL@QI00L#+c{A^fQ7^ARSZpF%!r=}zBw=I)S=M;^JPM1ayaH1n{)bL=ZJKB|MRJq?G zqhUf{cEWO0{6gLZhlZ-cA2Ti@-Qku^Hr3BSKswb&C^RHQ1x+cj1>!$Cd>y;%3@{RD z-IZzD8|!|URg0?TAc<@qpquvjv?S+hCyrN0x8!CRMKD~?0u2N59JhPv$i!w2@2Q)8 zMsw|5(r=%tICp2(r0(q(uHTUtICmDXpd#ZeZV3pBGCK6$*4FwNCWeBO8s3=-+SP0L zRBnu$u(z8vefmW}154#J@Br--_QD=RO-+e{2;nOPC@t;u>8_J1w@Uxm*=cn6H{KE6 zaBS#Xjz`cf{fQ4W1QZHFW_iNChP2|3>#oQ+OmG>Iba8Q^Om6m4Np%?3Rj9R2+|t?E zX^xs-$h!&i0+dz(PM^PYsSwG90x`o%9#Wc%hQ51d(MX=yoOIo?zP(FrxXF)kvuU%Jk2 z9wx(}+V>jFqjS(b#w75whKY+kJUracbf+FjT@E*P-+ub&(I04xTT5=+)x$%GHx5wK zI_Whl2RlAGm8WzZP*pvjl86PIS}i*A3B|XmHpRuo!S*7Nk&zPM*WqZ-3sL1SD=V`H z3pS=V+H`r@`}ZcWCVW`V>?GN&&l$>Y=X?>2&t%v+ItGKA_X4^I!BQ^H&Y=JVbgxWe zP&@RA)-UxrYlvRpCX_4QGO?S(L4Htv=|(F)F<#PAQYzra z$O3_yzi1J^Vd5S}5jY?>@7^u_d>@RR;D^rOEj>zS)z;?hzMbCGWZwLx=Dg3PHb?0; z%mJ_Ib-##ZKU+lAo%^Y&3vlaTWL(0E3(dNL0KUpRDnCDeJ~y`@<{`X>#%uHXCqSmK z+(_AqJCeYgqHi8OdUWCztda2O{69Zmc&Kew@~W$HJ3lVRd~NhqxYx6Ib?UqWqEpeJu5HY6>;qvKWaeWN$3hr zzn+OXp4^*lqcK`o%gESRF{Zo-4B{0io+~K?%e$6hJd85H>1WtMu#f!ItB4B!<9#${ z@?3m;{Gkm?Fw^~3B@|iz1TH(1QG>aZA-!6<4~@4sp~%bxLFQa$X69x*69mu)F}`E9 zf-p4r1paWDHfCP=A}J`BZ5&wD~TxN zAxFq8XiMi|)A3j^PcjUQ&|XQw4Vd&_FY`&WvljgIz2APTVZbo|=Oy{jSLQ$8{Qvz|zvlqP z^Hq&~j02V6A>d<(Jm`dK>{j6RP8?Gh7lVT*hp3(cMMz=zEvKl-N!8fn_+hjVjeY%8 z@x=_ynOe7XF3Y$ab>g6(`t0(2{PM&3SN?=jpBMlx#5_-GYPexlVQaP`i4z0Y4Rydd zZfa+Ny*HR7Kjdu1@tzN8x4HQ1P`8ao(srmv_r-%Cbk?II%d*ip?!pf_>%ZQIYU zuCJ|4J5>HSI$#|(GxzsAVDza&9R-bOkWXpbjE8V%c)n76#eNLFE!H=6{V?V1bjCi& z2;DMBU5$$Ec|bC?j@Go~K9b{)B1lC_O6v2Q#HHxZJddrgN=RrH-rLsEar>S7(DHiN&j$mEsx@O1-CerUY9pM(X|U_+3Ve7GN^IS=>o-8)fiS*EvUU2kcvZm7oXYTDx+&?( zjviM*AtAD<3eNVJ?QJXWy`~D0TqUTTnhn)j$Sk~xO4B` zJlLuy$b_!%zI_icXv`l!Kwz1R2nuD(Nl9SJ&RNBeB_1^UwWpOvP{aBO)&rhjUf&WD zgNGG+T$VJv>SkV^9CD9SAiOl96ZLFBKw{Ykd3%Tz(ezw|E78CNtK=e{7X!of>+H0~ z6z>%X!`u5nT-}1CCg|!_Ke#%s`uJP7ro)(E>uvS)96t8DeF$)d2@J?GWZ^Boc+09a zH8n-UYrIXhh$ z)o(mJ{+uSR6ATOtR@b(7XrR0LdR7(}AfT;i2ybj}pQ)*-3HWgVxVd+Fa!FqXBghvc zZLqbo`*Tl%B7RB?bue>FOTC)+3%v#c&}#AU{rl1DL*mG4Wc2ggH`0!T>3LjIGD|~4 z<7)e~Jw^#g}nWH2>$v>w~yS zPDTpc>Wn#a7BN0HHZDg1(zZ8t_Y3590T~<|9D02fhc-}T@9gYc?9koaZI5ZhsWzxi z2*QgYlr)fw{;vq}MgdX#}0-_p`Eqb{G?+618@aJw>W zbfmBGh+%w%3Z!i&`%>)D)6+9M!!+Ao5wxhuzE7V&*K5}sGnvW3!5P%kpp9PJ1Wyk6 z&q#Z9%UV$xUYx!H2f)2j>xLRFM?s(h8Rh=*ABpB-2D-EJQi{$AOO7K|@t#e7R?v~D4Krho!*}PlXjcU#3>SSJfdwVgk z5|EnCR8&+1#`+Mnp5uU}V{pC!SQg^Cti8Ko-Jjx6Xu%veY{SPl2&V<8I0e18V_N7MK$Gir zJiWyMPpYdqk-kh~+>DRkE9;HNePf5}#%aTJ@;+-?BORD(P!8U05bmplxIno18x?J( z8P%3`fV7HHAQ!SJu={ixq2~-n4dkzM&CodQ9=fQWEN7U*G+A7b z^89HV(*-(3Or5yxK+nMwHiM;Kkj!6^9R;stiZ4=CF4<##v-`1wjTuFFoQX&Gia$Y& zjK-ng81o@b^3^a3a1i4nDp6<8UVdrIDd>~vj0BpY3@SC|(yBT;XUKkgx?tiK;wz`s zpWyCZxe~#Ep2SDq2oG)UZg&{4g~(MH=E!qbu1vwRX%;yNlIC2Z43sw4O4FMrW*#J{yQD1?PjO;g_yZ{93bQeaMi%Sc!Hwuv2T|neMa3-24Udv zU?OWuyC<@^uSf|dGUxze>#i;Mgt6Pvh@UKK$3O-yzQ#SM7@QdE^RlC168VG+OQ+V+ zMlIq)E?yd?z#ArNS_nt>wa$UHf{$$4{0HE_U;yCr>6>r5lE40_kOlDI7zRatQR~ zavL1=EpYV=uBc7dG|qsYe5p5X>_!tznJS}tagtH?aqOjKsMpqlHARrzM6J<-Cr0~i=8POM8C$lTjDFp(*iF~o z#pO9tU_VU1;gNBkot>TiHpfMj8uJ-joSh{hvv(5sD7Vd*mKKYYV_pw)fV#cTKskmR zr1WRl61KREx*Qh{?1fv#Ig&u4=fD#(n*ZFZ9qIaU#--Ybh=|6nt~t@eC6na5`nq5h zpn&dl=|XOi76%(~)$BrEd?+j=@8^-VzqHzaBFa@|7POKGfVJnVf2;<9> zvc1o`3R`x@SA<{7M7S6mknUiqVP&;;`(*eW$*Z-njJK++Pz_&%)IvYiVh)2V@5qg{ z)Ch8G01wO25#{uDZ|tJez?2Y=@5ce$lU{XbgX(E{tFaT_qTNSB0G4GouY6s5K0fFyT-ok{ez;)XJ2wMWV|Op!3vfmV zMGtF`V$m5jASEiw0r{%bEE_GYd2mt~o%uU<*z&b&X985emz6iFv3SJ8m)wZmlkt3S zWHqq~y)!a$Pe~zCpU9vKKzO?q!Ep4RggJfLsP2Z080CA`G-cqfg!p%UkCk9V0}Nu^ z65flxxP z{dvcc%G&etSG`cH6Iif>PiXjt786PT$XjTH^%0w+G z?L#66GKpFV1fv0A&nvW6|NGng0oL$wx%Yj~IeVXd_UqXyCQIw<)Q(&}v0Bo?O689~ z+gQHFR8^{$N$YPFKj~{s(<}5uT~ILM@wc+GQy~|gnMbvkn0~t>;*Y-jl7J0usY9@# z?}f^+vhLGPOO6ddfl692T+#Hby>JLE{%xAKchg#v0R%olVPT^r6LWuZkd+kQ(;iF} zbE|mcN_!6vg-??0uMpu!3}yLe!F3mzn`xi(5PMpemKConm#pvn)Fm;?TDqhZdVFCZvc7}oEt;?EmP{DIk+F` zrYJXklCn4YL2A9h%#$oLr;5QSXSVj}yg*aE*BeRXIOU28V_0G~`0P?oYO$G>uYNN| zQl6;J%gc*1ylc_N*_7r0D^W`eS!_|y5>^-|{OZsR91g#vT2_7NXXH?seu0qSP&yfFxo-5hEwqpw>t<75fg7DH?c4u}I5 z$&DV{2Rte&Dwh6!77@sAjb|+&Djy*B&U$PwjwHLm22u_ER*_NEn+GsIo9yKDI4K)+ z;S0he_r8+YjL;l|pE@Y{4{cKZ9oMYxw6sVhbK;NAC156t4y7VT`v7MUdc8!F=;KzSv4Z)g8sdeC!N`8JQBH$jX69MqsJxn zZJb_)mit-_4c%M~ z0l_y=Y?SnKa9d2))C~d}UOO_=!Y4_)>fySKA9zq>1k$IKx#1-H>LE-8XxTLU)T^ycu=2; zlraIdpG{py1fYVOA`E#>r>pqzhsRcfQalmSsv0;G=7c$G20g0PP)`A!-&>Phcx)G; zxk_%HsmdZ4ujW63$uy#<;4JkNmgO315dg!f2U|EHOUFeD5I0s14~-1S8klmKIubT# zZDly5jM!Z$PK~I;5V@KX8_2R82)!HO?0BJDNU*GZt;l&AGiYnx9@Ge{8rtf6U}w+> ztgo!vCMSc?T$8lghIm<%bn-WeTvm23Q6XX#8hTL|F?h-YCYCJi4mxLxQ(Gr<{()ABTy@}kU!xr1F6CH;?bMac0>xmg?8P;fnDQ28QO z3ilf6B$O!)6!%g1^w!e0E?7J9?OsNo4nhuquWu*X$|~{*Yt~GEt-pC(jyCn~-t&&T z#oi3A8U2X8D~NT)SBMNBg$hTN)1^;9u=rVh{d_LRO7rjubY7FR-M!-!8m^__vehup zerw9ai4t<(iv+=PS~4AWpHKea*fC>4|FnM6b6405!ui=?sNkc~Xf|8y6@PTgx`-wt zQ9axDIUZq?uo1v`Q05uO7?RTq{}S)fu5C~!i Date: Wed, 22 Apr 2026 20:52:46 +0000 Subject: [PATCH 60/60] Fix rccl benchmark: use dist.all_gather+cat(dim=1) for correct K-concat; add bar chart ws8 Agent-Logs-Url: https://github.com/ROCm/iris/sessions/84eadb5a-429c-47b4-9636-dc3a70726a1a Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 16 ++++++++-------- .../bar_chart_ws8_corrected_rccl.png | Bin 0 -> 74259 bytes 2 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 docs/benchmark-results/bar_chart_ws8_corrected_rccl.png diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 076e0c082..daf98492e 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -31,7 +31,7 @@ @bench.axis("K", [8192]) @bench.axis("dtype", [torch.float16]) def rccl_all_gather_matmul(state, ctx): - """PyTorch/RCCL baseline: all_gather_into_tensor + torch.mm""" + """PyTorch/RCCL baseline: all_gather (along K) + torch.mm""" M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] world_size = dist.get_world_size() @@ -43,18 +43,18 @@ def rccl_all_gather_matmul(state, ctx): (M, K_local), device="cuda", dtype=dtype, generator=torch.Generator("cuda").manual_seed(42 + rank) ) B = torch.randn((K, N), device="cuda", dtype=dtype, generator=torch.Generator("cuda").manual_seed(123)) - A_gathered = torch.empty((M, K), device="cuda", dtype=dtype) + A_gathered_list = [torch.empty((M, K_local), device="cuda", dtype=dtype) for _ in range(world_size)] C = torch.empty((M, N), device="cuda", dtype=dtype) state.set_flops(2 * M * N * K) state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) - state.exec( - lambda: ( - dist.all_gather_into_tensor(A_gathered, A_sharded), - torch.mm(A_gathered, B, out=C), - ), - ) + def _run(): + dist.all_gather(A_gathered_list, A_sharded) + A_gathered = torch.cat(A_gathered_list, dim=1) + torch.mm(A_gathered, B, out=C) + + state.exec(_run) @bench.register diff --git a/docs/benchmark-results/bar_chart_ws8_corrected_rccl.png b/docs/benchmark-results/bar_chart_ws8_corrected_rccl.png new file mode 100644 index 0000000000000000000000000000000000000000..358cd2321f258ed2f928294c11d49244f7816c38 GIT binary patch literal 74259 zcmbrmcRZH=`#ydlM1@Lenhkp;vL#trp=4wvGD4ZzDuqf>MkJ}sGDA{Ui!!szNa8lK z%FO;9*X#5CykDQ+-(Qb+k5_MZ_kCa2^E#jBd7Q^_oX;>l9kmUNI~WOq*r1`VqE8UC zwFE(PaV;Hw^X>D(b^J%h{pcxo0~cF&FDq9YLfgvy!g&|>^Y&*rJ#Adw>|LA>2#bjd z9}?uWb9cYsCLP5rl@yVZ%!a zqupMoSdBXAe*4-Vo4Asmp3Zxtl#TVn_bt96Z@)gR%+A}Nr(%7l^1JWNeUgT`?O%Oq zJ}~iest@`F{;n$#d~-~#rK^18=Z!e&a|5-bqqU<4&Luu&QC`ck@xMRIRm1%rG@<|g zl&x8RfAfD|-gueHjr-pp;{W#=m1RZ3zi(Vi&j`$KFK-T&_!sVjn{|NYR71YK>| zzwabOEp`5VuVr>}e>@~ObSIZ2QAM{jRryEfNCYX1i>0ZvYW@ftP$M@2;?-6pB| z2tOqgQ%#p?=zk0kGYAL>3=It_J=U}Q5W11yrY*}f#H7&SZC96>^VGX*>!y-RRf@UN z%I@sfNeEJYtdwtC=bCP#vg)+GSarEwMOBsCqRcP%#S3LSJ3Di!kfG=AaJQ7#%p3|8 zpFXLtT>SI*PoGvRGXulhrTMAG5^pzFKK(VRsi}JW(VreED*kGH9@6+!t#9;OK8tdH z)7E4maq&$xu_7U{v1|zm36{>zOf)n!{WY=72OZl!e0hB2;^)Mc&W2hq&f>NEuzRdGrFu)WjQ_&4Mp6BsYhUFCtp}3fhac>^ zD}Ethv%wDk|cBH;lCBG>oL4HPzSOq#C}1mbkCr z@55d+Fy*_tB5zxJ|1#G9eN)q>g@uLOmoHVvIts-c+XNAi{T;8}`c^#g`SuEW-(T3Q z3;5eh|Ejq7v0dX6Q8BU0j~{a}vI*bv9P6-ZNR&5e7YVMowvO$0v##RX_wQ+NiC@~= zL#|z;eP)<(g<(~GI@gqpmspH4*-uK2P=7`1qwC|NGnfxGU2nAmaYwD|b= zXAhq*FE1Yye&5}FI%PDcd?nuY!AqB(Y<-32gGo_U_U0MKpI#?8JBnOHo;-OX5tCO` z#72BzR$Suz`SYiO+qz-XSkt++anYN5stO(38Hpzdcj>P$a^Bc+rSbxLRgU(&2kcFPLF3%4>I#JEX%2>F# z)6LE8PrF5+eK*~(N&IA@(2T`MTh_t0QtzqMuC_;kE5;)-1GRBH#9DcI`HM5ZPGnJB zkEv!KH;#^o;C$^hkv+Nnl(BKfQrk|+^QTf{;wJBFjFfoJjv5zQx7F5aQT{eIHg3GP zU$^abHufZ=sj2DXoE#B$#ejqi&rQp*c_Cy5M7|=3>l=2`5npqyj>OBe2K9euJ^jY> zKtvrMdClPo6&gv$E{_p{lBHcC7PMt|b*7z%|n%=ia82V{r}- zT4ZGfN~6gY0ceW%CE`Zdp*9$!`%gU{YgoPCw{)KKSHnueCvdOSR)Ys#0o zkKQs=zMN@BH=a*SY!m8L@U(Q&RSF^YAnlIvSM(u9|0=o0;v}A?17vkBy6q zi%R=}?i4=8CEuJ<4_H&^CO>(eBqCe+KGy{{8zmU1+dDaa9GW?Dwx$fkQGf zXTH6#xwNt*edf#=>}L88m6boIhnu&FpItjWJ)Mhv(2!?ZcIZe5)5ojy%()l)Dg{NJ zA{Z|H{z1hqcPZ!!?YaTW5rrHSkLL*niUCWZ5fLm)lQkl^P4e!8sA!tZuDhFSO>{(l zn;MH9#mW+Qa~@PyRys^wy?XV}?3gK`w7Rm4N4IL+sWyEuPS#79u*4Se?z-C`fV}(~T^zXlatUrACz$qxW zUQA5v(!zAc(&7-7eA1Mj?_*Wf`>w8C$Yvit-`h_v6e=Vq7MINLz3;98AY8t^TlVM5 z@}m7q9*Vr_uk!Mhg=5waCn+`7)lqyz(DnniKL+X^IM|GL74yW#9lFSk+x&Ox{AxeLZ&Yfa?BjJfBdk-V`A^^?Y%GM!cMN|+0UB<%}Wj)sqvi}*buPvN9~k} zN$O(E5kEgUcMp%omrkam9fj-gECEd_(sUnPn4h#ky658KV>ocGJ}S^H&DO!;1~wv( zfNFST~m|3ikjNG{rmS*DeXFRfwRtseAlD+JKGPa?%BIH z#aMxlPb1U3)Z~0mnY_W+J)d(qrbWUAGsxw8ittxwZb?bWZkfR4nb=b=&W3eNPx~BA zRqLSh^71O4>r2&sAQ7`XJ$$e0<~gjz_Fv)BHr1?TjzB$WW8Z1|^87|3I3y&bi*8nQ z)iY((Dtqir{>47k;#zF>3`s?sJ6XuW^4%We-%CwV7PW0UzU7~zd>@|q)wb--3i}KEG!YW%*URLE}Znae{5!oQnUYOU!U764+}{}#d7xv zj*~g&Lb2Yze;mGD#3eQE(mZ07n3#R~4VDmSX>2S3_4?-AM2}>Z z;)zq|%N;ehc1r22l#Q;<|v5=HB&axGO{Y+w}k`DhPVW#T3sb|@OZOyE?f`|SQy@K_{@+2JD0q0 zV71@fUOl~t;T`Swd{}Y;8j>u^11`9`2kU33JXXH;r95!eeM+nH<3}sRgczY#&qmunRSj2GR;C^u9nC%0z<%n~spR5Yz}bBjmua?l z2Ma#edrZyBNQ*66B~kNG&U+JpyNryC!~A!z(XW?n>*7BbYYGVo^^T6-0#vs~5)BOt z`)v52X~(pIPjf0#6pOMYF8_j?+dVgzW2NcMJEx}r5w}UZ?lC)k+Um?1?m)%oKxL6U zYCo%^1>zjs%+pWhaR>FAW!h8fbmOW{cFvQyPhC?}x$5?AH@T3x*5^izkB`tFI&`Sw z%NLftno$+^#jIJjZcWYeQ>F87m58vaZZOj?5Act6OE(m;5%Jl-KTTrlCH5&u;^uMsXwq;h?Yx(zeI1 zmdj{V<|ip9>UU<`2}3l#>+as&*475#dN`W@M6UaAlY7tF+UgnoyHQcWc%9rTlahmo zsuRx)-@dxocVV!8H?bsmv^h4iEWhVz4L|mG4t={(+ z@xHKbeRhHfLiSeH)zx(hD0Za)$Kp%$3%%Kd%_545QN$dk`uh5;&?{hzY~HVvUc0!d zzA?w5JZANDp~H!An`cHj!mG04;^OTclPO(2)Jk>n*yI5>B~xx}5qMW!t?76g$+acN zf`f1!JJ8bF`Vj@k(#|dv_5E3Y_akYyeMlp8=#|cY&E+z6Ve2mQ--$arxrkVt6Rv6J z<(yB{Tv){_+MCUV@07xO*Wy|%0_Kk<2K@ZMAkDFehbqyAwnmU3OW5Mvgs$68uko%h zyyq=&-q^wu4eXL1qobJ~WtT5g`AiQ{5vX!}3Vwf+^31RZWKjP8ePQ%#9#8korj7rk z4qbQmg4}`vR^mgk+h8z@vYh_{HY?-fw6y)-OT3@veMjd~(&EHfpg zAcPJYHZ8ypiV5>n)L8W7>C}2Rd7%D6A9N(|(9(_{jF&mr=Tg3$s{g&JN=?C;YAxV! zbk{vPI=V9`?G7Uv-}*-TKi_jTKYQT7f#_Y`$T45ySLabKo}B;o!a;hIf%72nO6zQF ze!g40ir4?#V)8!~0G||L@pODT*$I^Oxrgqly*vOl` z5079w>?QGZpiy!jcXGoa%3z|;vXpZd%gK``C;d)iYunhQ+_gR^Fc{p>{l2TS)07}z z+A)|WSFSOrfaW2*wDBgJh%gb_oTgjjR1?NNfGdx6i;aKq;Ca88kbZJ#r8N(ayxYN< zc>k59V!!OD0b{`6gd+_PW%nZ$UdzhLGOKKBJDF&Kn2nw}|GmU`Jd1wM9VyP4+fns# zYIoRAI9gOoUE3;b#>-)fysKW3 zuj85KBG=T%3yqNrqMcqA6c}zdNCFa74qe?H()40xW~Mcpk}*;B!Y6@&>jU!6iC^ek z7?CZgl#t3k%~$2S_QXU*?Loi2hRAd3lnl4QrrJk@qN~lQI~Lc_QCW$WScD>bl0TYT zM#dyD(X`6h_VjCazV1c(jF4iKgGx6?&lIyjo$emnqLAY0WvPP)wdb--v`b1#))2kF zem%Pw;OTq+Sn}ddJR$PwlYi#frAo!*oASLT%z-|0W_;6zy~zIKZou*a1M#k-!$dA{ z#muHVGBWZB>iT3}#Qpoc4fkC0X;?TE{JwMqpx38Ta-aBq+A$8O*XrE4{ZX1Jm;U~K z%%Ys!`Cg7^tBAQ^k=UU_ryQk+g&=kC|k zLHA|7vMYVUcO2YANHAT9Yksjw6fH)%Up|cG|eT%1) zueQ@98;{S5>Ew<<9o|nbhlE8@%1KO2eC0~Hq1hevb|3ht#@8N>7gtyOxl~*uY*B;A zT}^yB^!v=EORp|?dd7_8m=){q{rCyt^ATO|iKf_+OMjHvFAQPhniOd`#7eAf9$b6> zn4&UO=={{+^V)j>QOQinHkFyi`TdlhVz>P@HJ`(S=ARqo^fyqjn|IUzU}z|?w`HFe z&{YJyBkt0)xee5AL}xWwf+fB?bmZ0g>-7*I)Y6r}hVJ z4VOv>)fC-^9cu{^mHk9npJZMEU-W0Fkw?tsyIj`bQh{G~khh&(`uzjvZtvo4nSHM~ zGq2q--qJ(EHx`tipRd{J%5CY4?p97t?gLUcQYQMsn@G!_-sYmS%aBk1>`)<>GF`Uc zemBQm>#Ex%3MDV4^7d{4a8E};VxV?!VrG|P!Mj+MTyqZr+@@r^vr9=xu!0770+8VP zg@=plu$=d#%E-$UFjt(&qv^+wIBw??GHZNt^u!gTVvmHmp~fQw3#gjSK-iJxfoOYw zqSxO<{w8R^oj?m5GgkvZ*FOwCVe(JOnU9QxK22gk)Lca9<0BzAKyNT`-OW)WyMe12 zJZiTMpFesyV&msml5UW8d)L>8ScMX2uHZ}alPM%(`WVcxS>mO1X@$P$?fj%9&Ur3h z)bWog$6~P#V2W;l_PD8hjWI)R&!;(xph6mj|4- zDRUo9{jOH*F=~4fO9H0vnO{!6P0d#6T#ntlg(RK3cwJ0QmP`m1)J(9Ef-xgvxHru<4q|yE5(}%QE^YO^6DuKjf|{AgdR`R*$t*FQ^KGuISQN8 zp*>rO2t|gwi9hQ&=P#R^>|WC}H9c%lY^_A8j;1WOCMWOd?d#(KKlNm?Lhask_Hk_Q zcOO3Ay5uSdTo^B2$>W{(eYsupOw2!>4fXg)zfzSq}(9dfmFPpyKI@4{zUc z)zJ$J7-i$hhA?q!a)5e{a{SevBSM6tM@-j3a*ifA&CJXs+5}GJCJ}elZZGhjPR zYQ_l0@i?h(?{AcJEuEfw&#Xv(kLs~sr=u1^%A)eke%U0%1r@Wbj(EpS+LAq6eV;(- z(Fh~y+7xq<4Lcg{V6}QkR&fJ-fA}K+trJTVBdu(#~JSJGfT4+9?ixF}eMl-sH zo*957##YwA0mv*~@$Pk|-TbRWWWQ~N96!HirbW3q6EAPHrt}Yf71tyc#&_yfXHKe9 z?MuH zD$m4ID3p}K03TtqSDYehzj|X$KHuF-OZ)-%nW5DX-nRrAgE}ZTKi^`^I;ZF-rb|J~QlCsg3WYW6XZc@E6OFWgF_d{_in8UFzD z+GXWr)!FZ^`X*nWOwW%F`6rNt!;EuJSyt0J$$XSco}hmA@eY!b3HbU4t$1~_^_erl z2rN-a$(X96>dQ(dujh2+2h>j9IX#>Z8?ERx_@<&lN!m_$H$CX@$oJ>$yB_A{M1y;|+LR;yY`^(9Xsb!r}ZB~jzOA^sJQq$utOJCmb?pJ>zf~Ubac$BS*65UL?{LZ6qX}hQbw=aMMTaz zb-g-ny$IC)Xy65!j0Xc2mo8m;J@(unjR|`qEGUThyRbk_1hasvqY3IyXf)<(?ZjV$ zS%BU?(XCmNggTrK_AVyofdTk@Do*M0b3W2W2N~9`tpK5e{7cFhjzaq>z2Kr=wasWH z9NH)&^DFi%C=Nos{`fq|p=fXYL@gH=7j&h3@5k6B>{!q{mwSX) z5IoZza~|gR>U~jS+wV&SG^c74K`)&;$r76AY;VuNDJ^p0GiBEp_0x~yPYJpmL!1C`&$}y5pMAJL`(hB_0jq@F#IJYt^*UMRXrK=cFg2tDsZbH;otz3|@*rkiW86aQx#PiA zeCZE29u1l9y%Qp$rmU^4FZRBpB_i&yX<$ns-k`ah8y0~Ewj-W8igjj+G%wVkcc7u1IBN&|t4nRTTb5C!cb=4UZ-N}MmGQP8!OUqDS1mxu9 z55_n)r*7~>tZYqjOQDAM_sIIA zD#_ikbL;MfmNAe#F$*<_o)bHN*Pz*APtsg?eBT)4NXs@+1}3JH0+5)S;<;Ago8wPac|nq%v9`_wc^K{e z#%n_L)TwP++S-i(Ia)$ePoe7eTgy@;w+NeULkGGbNb@HktfSD89?c>M(B~WW&Em4i zAgEZqB6$~3kYr4-i`2k`lZ?H0QhOI}rEvWQ%S?@$7$GVG+ov-9_|pr&S~4W)>uNIr zWzu{D5@b9auE!jNm=x`UHsjLbtPmofUs1)O>(#~UbSDpriz~+ppXLmXpkM6*tB=GG zj7&>1cvSn=ty_9vD$W-k zWej#fBgYh>k<3YU0WDXQiZ3P{TeQK}G?~+Ytafqf??obh?igrl%H=&qSti#&V=F;G z1^1xima9a7LQ@Ibv{$13fRvOP2o@rp-QZiX`My1SKGAHpo%M;`h;0XGN*Ot&T6WHu zt2-@quHux5T6NH?X%_@vOZMrVp!3evCq#0nCMX87gFYuy$(#0?UwL>4=7{-y#{xU( zlH@!`x7}b9xs5GmjRhmc0O(7+%ju8@2{QW7Eo|(O(re+-oB=A$w^akx`0eivLKdnBp9)ixTZDQ0_cQ`^46WQOr ze}AAQ>|J}ikhV-9$L+@bd%K$j7AoxM}m|#^**NAfum} z6}y6cPpq6ok^9>l!a)$oq(hBQ=}GzG@83)4@peJ7sd`~k)6e-TRc9BN!FOm$L~&Pw zt4zm8B^+U=YH7(?TU(p!*e-lOK0eiWKgCm6n1y?tj6W2h7_be zXqX=nc-Z<}If`$jR`i##je)nQedornWoKu%HAwG2{ptdhu&^*#B06F7l2EYJ?>~K7 z>*?v4TU2B;d@wS4rx%~)!>FhzTamV?(pQDcU$wqjAKD9vZN7)kH@D1tN_E96lKZH5 zt24j$8~P-jt2$6RMe#||)| zN*z6qr`3DjLuG=jVB=5Y8kUzQ23mw!sg!L5bq<|!C|bg&*vkAIj)D4z4<0Y*PkaC~#SOr$&|};0ln9t(w_(WMhi08p z{NYRX-v>t@#vN~xmP7>9xn)jQ8~~fp=ZeSlV)>*7VhLIX3kypSa(Z<$h(gNu`S@(d zf41S?ti_0^If0IBS%h9}gES%PGi}#`5bK^vo6}Qk7K=tdg?`d+xn^`mnCFFCLP}R< z!R#^A%okk>!){SNnGQTNENE)iZ`!npPzrFwg&Q#40KNX{vb}u(icTwgKmgz15Wvw> z@SuNx_tM0OSlqU6PJIl}10boIp`Rk^)FA;%nuEAIVUG{uvR5-?)I=K2UG z^fwY8*422VSR@4%{%v#fW+)N7C!Vq1=1|O@`F=qDphHW9tkgg9knb(*)h8}JWh}}S zb#)s+@6GwO0bQ1D0ouK;F_4x#;7f zgir4PZO5yhycS~aRp_Zd-b|Fy5QQO8*tu1Z>Gjg}xRw@1a-^G9koybl8cCWl7+f^d zZkavh(RX8_T+(ck4T#3T-DDHhcbi=vA!AOc>xgj85IXbwXJl^Ny7i&{q3nrY$K0u`2k%sEJ|GE_P?Q< znw4BSF8nAu=*hI@hwtANx)U=yZii>yrT%2xnq#4enk{$fHvv3G7tuAP&bUT{YNpE; zju|K^WC_B)eFgl2#_O5BD$?wDp*Lb&FsG)qb&JxZzz}wDgU?g9$mt`U4Gp>!?Jayk z-?L1m!gDMDJa%8ceEC$~8Cn^y@%3tIYHR4}tx-_zI$sKPb$1^un|yFO&VP&CUbqVq zAW|T8zXd}_;-sPB;a=>ak5EiN72iTcQ7j$Z)W|0IdktcxPVXU1Cr{A{tz~3QzQ`MG!oag=EDQ$$c=8dgFH(q;;PzO~%kXF0B z>#({y@WFbCvfcLp7Yi&9vx$%WpNwhe1I-t+$wYKD%M>-q*bMr_b&s4@?TqQ}6D(3^ z_HFjvoNZp}gI#b0U`*Je>=x({QfSZ3&5iI~P8+`_ye#D*v-L-fw3>+N%k_9vxp{ew z9br!)7P;zvZE>-q#FV@)kG{1_dK1CI${GwDnr1u;?3!*-t^n#G{O;akC>K)QQFn$j zPCSP|CUu~oRGxuQT=~lbk;=;6J`9_=rt}4DCBllipKo_5D5{f$1pO^ks}t-A4o}ce zVC8xtUmnJ?lfnt4`6Q&tQ?NSeJW>!+WYt=>y*n*g(^c$7W{>ULGbZ;x%t!rcO||Ey zt-(!gz=th?RDl#!^Tji5G@bk5%}{k&_|JH&Y$WS6e@&vJCn6GF{=nT!w`C`A7mA39 zZCqWN(q%7Fl^qWGgdPmmqHw5AHjCFg-o7P;{-X%)eY%O6FA}@l0fpDl(J2E81gm)GL{)JtF5lC{?c5iuKqREnEl$OGmee|bAnp)CJF?S%F&gJ%f;Jv?z9GI zMT2!Eb(~MYKP20tJTYZVyLdgppFA}9Ef*X;=;std022Z1kFla2%J~p#A!l%@B-`{}_UL$Qn-R#N47ZC7Shvl4d9;LNzHjIy5aH1L>iz)Dl zvN?O})@Ve%oAGk}pu{lMBsOgSGPiuK12l~Ix4{W=^=@a%)tRFKn~%RZtAU*f0(2`_ zFRdbHANiXHcawO?#MHDVW1VZ?sZaGQTE*ep=M!zNMQ!3KmC8hY?{}O-A9)cblMf$1 zK1P5}9^v5=4VdiLfS%C{+L36|*?#Yvb$d;XK%L=X`(^tbMo^-BiJ&tdZ=@72HyxsR zB(T@myt_wJe>M^C3u|-L&s{?K17mtUCuic^x?dd9t}5)CtpIdLC9Q0A#aF_iWv%ks z>5+DkW4pujQ^Pa_ay)fPX)lg-9Q59G3*Snk9$*<6SO|r6U)~xwKIyEHE zZ=0T(p%k3RaQhB@gHSIM5j}9gVDa9S8MNuqX}jev>Gdt7Sk7k{q*wRk*$u&iGOR4(|XZ;=1@k=Ic3!CRZ4XEFB#mJT{N|ws)q!E>`4Gn~}J|Z00d- zAAiNH#IukEsUns*_mqB@m(GG6X42FCjfT!0iNAwjqDhlVaeM#ZuhWSp$dMGYD8!Rd zoYbY?mJq8zJf4ARY`eK_b&CK%!4b5YRK&4ZVKh1tJT;%3fK23lritI>N(s|FShaIQ zGsYT!etrZ+C@9TVdgqQVQ<3E0_8mK(09Ji{<0TT*Z|USy`l@VA(@uS~-rKXA^Uvub zb5j1!pS)M#^G9Rq*~zSD;-`I%%4La3yDm}{y5IIk8_O1HT?)#V)UyHUNuSp$oBE>F zNe56wDa(^{J%Q(*%sflgd3Xa#&%3W*w}A$zN|1)f7S2Ss*G?oIMW3lXhP(TVe@yj1 zKpmdM&x7N&<`d#>LJidy2jFF!JFhe18zY``$yURKkB*)`ZKs*mb{5^Z+eh`Es&2s6 z?E}g>bn)jJ*c)JhcqXQorJ;Je8zm)8_Q#8tirUh5QyN4|>35(`KDSqW!7VcGvX_s( zv*l&o9d`LEAtCe#@oQwFY-kXvW8D|WRhnY&?413_%Iegq`*NYqXeLk8M}lI#TwE+o zO-=nGDn14EXfmlFARxe`Cgq^WQy~8@FeH)tDX%Kfm~kTFcgB6t*tPAPFyr#BB%ML~O5(z_8_U=2gBYYVI~mB|LD#8qqJ-hHBi~N2Ol>HeS49 zamcux%gjj#c99fcX8dAXK!`!OF{Dppraz!nHW z>rpLVSGb&{IZ+iqEh_d#I>AjJR)u(GZvBDUrMn$jpHb;3QiMMJ!{#$CE_Z0xZM6h{ z0ZXY2|A}W&rrICSr%BAR&dtn3)5~c+)KT#{GzC>8{PG_Fij45f5zQn`%`7bK+W*Nt z=Yh2UZSA^u@2H(R3a~SM%37Mcfbzj@=m|VBtty_@o5C%gD)aM>x52Y+nSB7Q%un}) z0eK>db}sN^W4iSMQNXl62YBh6+Dq%h9!avfPYS$+6X zjB&p0=hZgWHPKfne9gIMB~B%NcS#3@ZwYaWxI1?_wSZS~={JpenIFT>ojbk#c7;8C z$}`aLh?#TV_ue;pP2?AIb8}aJ?ZM}Ow$D?3*WSP_{RHFyGd{zohV=M6iU`_ml?piLSPM)G1Y)X=%zY6hkvV`ozPOk!N!GV(Tk0|m14m&`Y0=ZS} zxifv0;qTB6UGnvP-U7k6?#m!=^q-lT6LIXo;0X^O(h{Wm?_{bAzqf6U(q+!xl(btjJOD#*&_bm~xm#0Os&MP>FfxzlldrSV4w5=Sq*%j5(RvHJY^a{(mQl)TKMq9T&b_*&{)1o9|L zZW$^64D_UTp#KS$&HJ=I8SX6bFADzKBfY0|;(T?3JjJWFr^kX-;G|Y6ga~*8__2n<#G#ORQuz6xREASr zsj7xPdm65lTwOFNO3sUbOr%?y;N&;T&Yawi=XU1IlM)e-lzX$yin;IJy-WV$_sq-| zsC_XbX+^90kKXL*xP2!I{i85R?~N?rnI6S@$gM(CCU1tcx}HJv$M2Eic=F`j<9&dk zMFc1Qu?iU6HG~+1&2SsHsY4S05(FoFTie9W1e4B8Igx$)RLx#pIE<{U3cq=JLJv}% z%G)cnv_voTofox8S){9NMHFMxNyCFAkCs_d}7z0{m`rk?V3XkRG>Uwy`&(&l{U}Sq&j%_ zK1OgI2}Uyh`}faNIeBPVf+hv(-+R!-|HKn=m;nk9DRM!EFlld2*1%?E6*4ijk>8G4 z5Np!Y*dqWXMSE@JQmO34pTQSgT{pp~^6j-M@SV2s6tH?mZD)b~J-LY=Z?8Idh?tk~ zk~-bRx$)B*ckUDjW`SAssn8ZyV2FojdkviU0$3hKQTZ$ zIAJ*YbU8eV>TOgfR-E5ZeHZFbpUf;C2L1J9#s7>m(EBAPeGT2nWB4m|sY`q^cCyM@ zRmi;Ll+AV!7mX&8Hk?W_^#*k3h_zBuQsBelG9>#wE97hz!CqV=#~zZZkU?M;7BFzy z%I!rxKx(XmEq?Qp83iXmGLYfEBlgeUHML|EkPX7b4gHL$K``d039%5d~f>7#_bIE*G2m2L{Q%)g5f4WCO`g-iTDXa`^k*hom5j`Os!m8a$bC2 z$0nTo#$Vb$p{lZy5o%iDL|Jrn^q0oQ<8doLKdvh_2Xv&3*p=SMdTRR5pU8&GD71!g zv6Bg+o};Iz_Rx=JWo8D08!0J61$wsTok8`l8U3|fSFTF!iA~pFs8Lu<39Ed_Inrza7`zPb-ns=y5{j zFZ6u`$m6(_t-;)qyd3p?BDh6=T;!TE7lu3xuQW7W^dG3*Uicb{;iR;F^PgCKSa_ER zXT+HOgw_VwmQr5mlD*_&5A5;Pi>ERS9Wo!a9jr!PckaZdm1vvzmY+u44Kzp&;EkL# zKlxaXObyXfXHkfoOU6_7mIlpZRr+VP?bu^sHhtB=n=`%L!+dB-CE{v$9R$%q56w>?^=Jq>f^^Ss4}5&_1*x=xB&%| z0p2PrjFUkVmO#d9%1{$-z@-!hBzXfk#O`!eEQ=_cawxV- zC|dn>zP`TXRbmS71N12gI&Fj3WMTOBA_dy99f`YdNV$AJ``0I@4j7EG$l}EOYL7vxR%Cmj z-@I$#JOcxRYwkD5pM8Tf(zm;78P~681))r z9OYH~oPKph!%VuajTL=jSRu<*OgZzLqEwE!UopKc;{UoCOoopb@7O!BW!-{!#?|PC$2XvvL(h~$K{>)_mXCjES7qqlU z+%d(kRpJ4ESYa+c0tFqJC#Jn_-r*m+3FM4+IcEk+0a_93+0k#8Ci@vkk!DZ1v^M}5 zS{!CPmm_6<^Iu>UmvbI&KL@uZ5d;W{Fp_fFd1QJ^W6cO11cT#k7=?M;)58Ig;xL9O zA^5{ex3AwLpSeG7!wy4#+HABCCfh?uQ zb8`LNg})#x1c!%fvlmT|Y{I6(_IUzT$PS$6euZo>lk=hi%Ac=rep?kWQm;T@;Fc~1Ik6NfyHJc z{qmmwGeYh5f9JCQA7-zWL~?fiwjfgr9`<9ffZpA#|NWXLE=^N|4VaL)LT>O31>gID z`;H$!j?ub?5nHVBe}4S)!FgeVE1I%kVZ%H{st1rh4qf`qS)V9>6W|ET_yl^m9Y&qV zNh{=8+e_ZaMn~Z_XqyKt3b}dn*vk&V8NqVjIp>!#N5~-gpP%^R*RO4#uNZ9??Ro7X zurN2lOz@t5En!mR93C3FmXyXJV}C>x!!E%0_1iZahhp^FZHKzwz2ok5+V}6Rh;-sJ z+S}W!!1CSM$fKHs_Iq*~MuexhkXHfAGOD+>T}G#E>rhON2=~Tp`rq5z*x0Vj70lw@ z9hyM$`~T7A@hUl%QFJ@%?p`UcYt#IM!Ha*FgmQbd(yF}pu%durIsub@_)$!Qvlm@Q z?EUAbY^#NhW=sJP;Cdz3j;)T4BU-J?V?{l`VUYw|PHrYtxla&8oF$MD{`aR$w8yN% z4}>)l1ZEC7*!I~t=>%U1?Ee&rNil|nnUPrQF`oZ(1kKSdj2&P)5+p9(X@P zE+GEtQ_NN3J2>E8yN>C-Gx%UQm#@P`}A@dB@tCLt8hf1-PVjwGHD z>%(+)`8z|Fe;K?WhXbKklh%TT*|D2IN|diJJlx&&N-kZ5TZswTrvBvr9&j*Zx|N+> zn7#(?e{z5&MSuRAMaq8eCobgBHtD{A@Jo(8!d#3xS^T;_N5+yJ=>n=zF9eF;laud2 zp}H<9FQLICg2KWyZVQvy7(YX%+{*zk^YgF%lVuPtWwe-ef)Ga#B|bBZa1PbZ1QqLK z3(K>i!+C-XOtQT3682ZnecAAaXxny``m$k?5);s3sFkkn-~O`-J&asy;NzhpP;$My zc5rhqw5dOg>-+iB)n9ZtN8B_OYpHR5c!8gs0cdw(NxhgRtc=|OfX>t&?x>7w2 zmbe%Hz=7h@{rVa?-?Ns=#bv@hR`-9tWZEKH{8D7|!*93~clQ;x%KEXOd84?yHh`I5 zi(EK}3QX%GNZL+0BW30m7Sa;I!NEVk9bZ~rDAbID#sm=YYtp~y>HW@{t0lOiNg+Yd;A0J0$<%rQ&< z1#SKGSZAzm)xXD0tkAG>7cJ&)CMrk?*wx(qe?I$(iv}JgyMo_Z0(yN^_46s*;J4_w z5iQTY+>$H*%?yaz|DI3r#O$daui*;miGZ95s*h}vjoC|tIXRV_z6~#XU%J#kT50p{ ze-5O`#biT6**JKw^s#k3JUoDo*04H~qJRDhk}k;fLx$OA+sPp;Bjv26hr8os25}6H7^I zhdK#|E|a0=Ko6o3&Tx& z$r^%W1@MGnpyK7&-! z*@U!Cx+5oFYphEDdvpoqMT`7%e?Px3IEe$ZxVw==)L=t9P$&noBD}OvUQUkO&mcsf zB0(9u{FdUKQ2_Ix1}_y^bC&-0-8mGn^h$FcqQYhb_jSMuAiprARLp68no@)j4OU(& zuyUcdZbi`yU=za)MUU{RAbgNh;lZV)+6 zTJft_t`G#qvV$Q_gHV!)S)Crc($1|xt0i^wy(F*cRq#A zh%W64UIvGL5p@Ft4LWeT(Grl1egN@LPJ?FU-nWnLp^S$zB+q6uRirJj-L%9doDG8J zavebySkOL&ZOx61htU$hc*C#*exY@!EJGcI&nxGlOOUt*>B_b~0S-V-ocXdApIeE1 z(C>-?TNQjV7)T~KcBg-}uERLZj#!K=1!2XWf;qrW2#Ji`$bT|36x|FrCQ`DNP?t!e z1}*_w&~R5Vnv2O@c;YNk5j6W`F#UcNv6KY$hEzd;jmd#v*!7etm+rAYBZs^DT#-C6 zf(b_zr*6%V7|dF}TK7e%&_`Uxw}DH+xi?||9F_5!AcGon%U2+ldlwym7^t|XyByc`#2oZ9BGIO{|vF{ENx@Hb)v zj5NT&1VnE7uZ51Wl|C?&V6?dkP~^ziEu<2J;vbH-jv&a&gWR%~ zD;M1`N$3dKt-$N|1ZN6;#W6Bqr@o|W??h|4`Olv}IA&ra%Fg||(%EvSM$cMBpXs%v z2?}Hp={wn{9iN0g`IYrOfUC%WHy@@IMzU$a3uz?0dq=mydL?T8M85oT#wVX^np&70r5SDyJifD6j z2{Z&kN!p<14V=g^zbFOpk|^owiqyFaLN`GDdk^M<)B=bgbh>C&#q68*Knb@(u;Y*d zS^}_v8W6V`A}rW^z&3vb=OzpeIx6f?AJ{ckbw(WWPV{LJK3<%iL7 z8TX+ejBSxUAYQ(5TgTO+pPaMDU2anhkc*dex&g-*wfgre%z}Z3QGkUZ6pxu8&{H)Q zxmaMPRJ!h^VUGE3fS0n z9Ed7daj$BY;=)Kk#A{I|2U|d5hR4GEd@r^)xDrwFuAOd(ve1ATD;iK+pVilOTQ2qX5ilULX|)CnHZ_I!lz$ZSp=6 zh>8$A<U@5;Dm~&nLi2_z=;kM%*;c?U?2-W zd6*HZ4ycJf^bhnA2~+Zv1>{ItxbHGei*)v0a6-|*nFAFS6$DU^k+Byx?};lEC0E^5 z)chn^za5I-fIp-nHtdu%>|}yzp)f@L+vDN;ifB$TH5889C&x5gQ2@c`Tqf0W3^-9X zL?TkiT7VWT`YMG;%E`Hd+j#r+>!CVNbr(K$<}*W6mdyWq8rl=H7@rDtPU-uvFn=9g z@dCbD@+c{Y#^i@jy7R5wPb+w!5Bw={x7h_%>v){LVT|%A=!+dRq)Q+q zKmP!^BFJsg;@F|6-vn7&NMrH7eV}(Q@5cBGlJk355(vE%%`Fg|wjH^?{w?OR)S>8^=8ehT;9e&s?0=B$Zi=iDg=9i0&YdqekjJdxprFE-RcM2|B-p_cYRYVb z9@p~h#5!0zl<;s}1^3>3X>Hz7kNZlSmtW7o5VatItR5o0S| zZTx>14NCdBSEEQ*GTmB4A?cQd3!FSS1u&K7+_^!mLgUR&RV9XL2u_ELv-(Zbx%-|c zhj0}CDcwKz?@59SS0v7V{pauv6dj!BbN76THB}JiSO|3{eFyHDggPygO~@5Q1!Rf* zmQ<0$7<|dS^yiHGNZSUSB}B4GBJ>YT3+yh#P;myqhT$KN>#@T)0}$GvS$V*2j0{J( z=>_UES?0z2j1JNbU%v9-!A@||cOT23$xz0jPdMr(91ZOAy1?1dxJn-YKxb!X7)&u) z6kv#yqBHuRX?S`lh;|2@L7_UL}T7Sn>=6uBXI;iHWwPW_X0#~(jtSgP7trD#kWu#K3&#^^hNT_ zgy{LNqT!3C+`$znIBu2#hTpz@;|g3^A`QxD23`H;GvK%805kI!sd*y@$D_wGO)E_; zzCiQMa9228kFmyJ0G=NenM8PWW9bNgj7FL~gb}_M*=D;*fF?$3sEN_eB9ZqV$rDkm zpLBpQnH7_&F@k2~Tv)nskGFYT79$?nBD zPo@5=WT9#95`H(SRID_aluFK&fAiFak9$3KB5>t_=CPDa(#-ciKSS%RVLeXs$6AuM zrkL0ioXvvi0m$t?;54`f8UZ&&hm4NK>oCR{VL6QSrPu~z@(1wmDr}C{NPZ+qLynn< zh=`Cg59n||f<1lFHPDa&Rw~`BSe(?4>9WY_1`_K*ttJ~DCMG6wDhOxF-6m(jzz31I z2VNw(ZTlkRz7BwhC(l4!f`te+2xY`?W6Ci$lIZyO@t+7rUrMRCs7aDSc(HKBW{;u| zTyF#c7v#79kG(em%Q=7h{x3UYA7kv>Oj#o$M6%2nDwHTiDMM1(DqFM}gTdG?ElSE# zXcJLnsqES*l_h(N>`F+f=XGApjJcoXevaq={Ey>*9RJ&K{OJYJT#*8U7=q?!0?WuFYVxQ9MC$ zDl_L!suPE2;`NvA7*NlWC)Y-M47Y({rNX+1K2i|F3X>=a$MJw~fQsFRrLCt$j2eFP z$<#;_7VVek_3D0e>Qp>r^(g0l0RajLC3S^lTtbN$ee#@~ozL@B*+yFh;KK~Dc=|{i zu0*Nyb3c(Hqzxkp>rlpvNx=To5Y)}?7)j1KN)01{ z1-|AI3duE|?F7wZc*jrDJx1JWieTSy*pi0eQ7Olf&Jx0B9yQS{qps-IuivB~J&H_S zDv;Ti{~XOLcS37_PDd3Za8^qgq$h^$9M<&h6NXT-XKa9u@^Z0JN~ed20HSc9OD*O( z)Big;sxRcnMe5<>)nk#GPLCi3OBXHH<$$xDc8sQPdZ^on{^(R*(!EmssS>-^5V{jK zVyBn|yp+)#miNd^(qF*`%fKJ%)(+s3E417^c>Gu>?%1V}+^qnEYq+fqt$HkGyco@h z7l^pbhcPy^7$nU*@9r#;*;SK*bT^>u$mFZnxFcKw?R;4!Sns#cYtRnY)Oli{UHY$E;F?e#qK%EEM}WFZr*(BnT5^VZG|ag?L3m2 z*Cf;cok?qS!`1l9e#DXc^(jJt~s7M}Pb>{(ll%sT&|3ioc8w|3bodxlD~#h3?v)*i{vx?i7`l9ADj zg>a4->V&w=#o2CaQG4LrG$DYllNFF8J-_{GY5qZWBkXn(N2WY8Hi_tym(Z_&gOMj{ z^o!m#rd^0}&&8iUF3qP2y(Fr%VddMu1`fM-qrfjc)#&G+YwX=7jQ)}Y`G0l);U2iz z|EmLozp!bPivPjn5@>2kPYm3PX;EcG#a?28LV;sI zDkW&%_IV@Nk}~H@aSfa*4Bv%2emU+ztsxCB(Ml;>RJ4pS4N2=bL+Vf)iXB;o^-_j{ z`0ZBCaK24eGCnY&9i8+Yac<5Cid{2kN5U?^FxR5Z1<9lmUSP}~|VJ4o`K zj9L)b1*|JZo_76b?G`G*-9Is7DZ88|uZ@%)NlXW6Gu>z4?YIRIS!Ep+G~J2?Rr+Rx zJBCO+WXA9uXM7W#aYRH8u<`itdKPM|%&QTsyHI=N9oFGu#wFi~nlWv53l|6lg%|9D zFmA!|OP`{etOF!JhRGNO`&N2N$GZ`M3`0lV?h|%;k;wOx8(i;(ENcIcYFqd%#VFEmO3<|6V1djKbqv#3`8wOC^&8Ogb9C6zeVLHW6Ufk zrDPiJW6JT%m(64@Y#o)LSP#BZT90To`A}JzY8#s9?V2M*IuCL$QIN7rCg?fjMsA?8 zpErN~5{*kxJqQQ1#IJ@SB@<=eZG%)I$^*9L$<(W{6(+krRaGsf%oZa7=1lJ4C*EdD zW-TgY>}>reZDouqmd6$#qiM;pyKl1xt{t5{4tY`PA;SA_6!$7`(qQ@%0d=F1n~X!o zw?wlALG1zZl0~kcvTFB9Z{Ej#2D#DJ&DW0E`|h$#nPjOCi0%=ndyn zBhqmp-1OeP-3wF%cuGvZbP9r%nQFM2_P^?R^>W zY4H7*c(hNqMaPbR!UW13T51s)4Hi(=K_QLUxN0O*Ve5ST`n4C|C#jlGId}1*5Umw| z&e(H{53Hr&wsWT+061WxIR|_z=v<-@9E+^E2am@s2j&_O-mj2c8Ld!fYNUBf#-S&r z`45Czccp%ou}T&*J)j|^lSux8_2cKVa@pPE8+zAxD9-Z{mixHIW;2qZ8_POyOk6C0S zAtQxd!*3`;oDgKoUftUaZHma;5;XuTn?g8K(vpmQ6hBc)O!bGy&9TunQAko}zl)T5 z5u3;22T8S{Aq)!J$^ZT=_!Jum~8i;v?O5++Y z4rkxDZ1MBB7PIEFm>P(nMwmYIyz#$C0ePQ$;jM1smuS+oX?9=Fk8kbet^fj1TMsvFae7Iao1 z#kN6FIL}=Vu*>Mk{!z$q;p;pp(VT@E!2@Obl(Dj-!=^r>#`b@@+LQdoQh9b8VvRQ?P_;e;G_^7QHbz<2Z0%c27PJh2{qU<$TO8uJNyWAgeAT${AH8$`bo-Vi(} zOb9Epu!MIBf7C|Ee4oKq)c$PBwu}fx*i;+27_2@97j+f4;r(|KyCd9CblCYi> z)UrlL7e$7MS)avuMMgLd6M29Li&JT26&GxgT5}6!)oDM~1bexwav=XdN4(FT| zB1dD!{0%n`kR>JCIxMh1U)Sh3WeCU zFnMw?!WQafD=upk_1B=(vuDxbde(U2RKp`!t4o(I6M}TxAtf6PyflG&PUasV zXWB5lhYc4I_~?fo7>mX(n9kZoJ6tQ$Ik&rb@#3r7m)c$i+$+=$hGNZrmD;;-P@-E! z%x#gVp|x#S?pBeu6riCNhgA%4$ogg`r?z{e&p`xlhGR(0=9U*Tdy0NT$B7$EPAs&N zR#8kszp#D_NW(UPI%GfUKG^10!rJsZvlc99`oOb!WE@ZZ)6Y%Pbv+ifp33-vcZ~Z*V){Yv!-1IA_kBDHD&5| z;JlkBo%ZK`q(kjVR%m09Q$U9{P*~Irqp4Po=jz})U`%w>V0T<2&My~t)#*Qf?%V@T z?Z$5W?i|2~`RNh5jB~5wISCP%e4I6XQuc!oyf?i^WDPWEe|*Xvz3uZ>di@wW>iWCI zR|vfD+od%SD_yCfPz#?gkP6SFh`>S>d6LsZDP_sQCBeoqhZ<|5N)*C^( zuKSdIW@u4W_;|TaQfAo~ye&n6!=?0QLQZ|X^Q(6Le#+zS@D$kY z+yiV;g9<$nu^uD!i4l+Ch?}^Va|~++QmW*xCT$hnemD3O_)#z9eLKh7uS|9$U`v~y zeQsu0(5tL9%?%AxJ6d$Mzx)p^H|-*_A^PkEs&5(G04UfGQ3qVtKv4Brczbg`K&w7p z3VMVdlYV&n`J0$2D8PR#G zgTAq`@u_F2e*#x)jeog!q{EFv9j)R9^O_Z2)c?gFPl2RoVWT~0s2}}aTLHW{cjZb0 zJogY04Up|ki$>CK;du8?k1o=N5%em|ws4@Y-ncex{07~>Fa1JrL)g;E`Q_<@W8H;rz&9t-Jrqo_`EUkS zg$;hecZ+=hCh353%h-?a$%Z$cQkEB(?_$>33&jzhR!6Ia)5Y+lU}EX4!_kI-@h?UP z@Rioo9=f_6DYb|p#*KaHFtb}>9a`?r{~X1b6N8U!lS$?Sk5_q1UoL&R;FR7Mn*YGM zP$*<91Y^T*%LLwo7GT{<#U8C?im&DSgxa|D#lbTDV?$6!exhd=s)v{50n@n0*O=@u zdBI^Zf+A#wUDby-4LP}nh~{#b zkl!Ro-~BJbC-1K>4i)s5S&Z3gU*{k^s`sk9#lJ%~)>goSDQBlnNDa~7fpWhFR)scb zMn}^dh4Eq&qLd_f{KJevbN~8I=PHK`5m|v8b8UttGWFQ0-So| zF9xsc)8v2wK>?y;5+#$cBhq6iQW0Ld0kGsggMN%(z;kt|RYFpDOjZY~UuFRe^xpF0 z5E|Fo3QC1jZ9iz9!rk3n)DEVB%3usZkv^qDBC$9G4?`^0=cnb^3MXz;s`JL*Yh1nl zZUJ>41m-2WO2wkO59*ozGX?d%6B!v?Ig1%Jj8C!O63IOHl~hMLZO>#h1^k=G&2J}_ zuvk4I{)C`p>9=IE4nejn`GGE4JW8F=%!eJiPFptsH?CpV%N zWO;Yvm)G+&_v4Rn-p$>r(AwFR+HUUdGRI$Bj>7HX6wQensE$5_@vG>j)98Q!f1e8# zoPI7YShMM+69-_O8#Zma7Qr7w;RaklG6$dAZmYYZKkwkZ#T^&=HS^E7PO0|f3>?Iu zCT;sG*n+>{&CzpQWn#h+ATW%g*79H<@%q?SNkHuWxCfq;gsr;>3Ziq)UV7CTe14vF zB()?P82?nkills4G3EqaVecJruUsag5&v^Nem!35I(j2(;f6^*TJ9eZu#ZCm%wru| zv(|s3Ji}lb+m2L85!}s!6OCd#@DiWbWt^5pZU>$uyf8~CgSd7 z;1r4ZKwMRtM;BqSnTa8G9$>&_1|}IZa|W7rVCteFf^c|aPp<8)BbGF=MDkdq)})=9 zYFS$5BX0CEi)<^E5W(gV9Fim~Tzp5ECk~o+Y>Gjqx+5FwCl^m9AZ%Ro{G{W;&P|!;N~APQ$*Vf=hl@WZWIrzA-wZ z%=M9`O;|%azyHoy>Hb*aC=SzKLCcip@N4U}Wb_}~_ZKi*&5m=WnVbK%mi@4=#tUko zo1R{09#gvTn*zC4aI^;-*e`Elg^^9Oztnp9VbChw6Y!6t{RjPeu_10L`Xt^2pX>bf zsb&9PI?*itm%YY&5LEpS3#SM#BHZ7VJ3j{-QO1I}AxyNa-jha+dw$2J_m>fzm>T4?No`$D}_ zc!8)hU&nba{F*y$u9M$8dd9Ydxedxr)$=ywBE(Y(=VMng`f@S)BkOT8L#w18J+1P? zn;*ru4zy*gOdW(fZkAw|zCYKaQszrA>t^4m)wMBL3ztFyEInnRIw!^krd8nuX->#* z&p}*QAmxP8=Uj>o6yal8lcrBcu*_wSBwQu^FYqfe7!71qo0NYE8`&BJR2#4qPbBM9 zHFq;RD?}d(GsAe*=0E=wi=N60iqi`B(XoU#@xTAQKKEHVGbaY)hux`rD6b-E!6+4s ziFjgo!n4K${;?i(>2PM2r<{>ZN*sEEL4*lNJID2{T6BvYsdxan($g$*XUZ8VM9IC3 zta)I!D34#<1Z6bZ+PVuehA>~IMTxlGDLr}jp($^1EU=G%L?k8H%S`E6=zp1%P&$l_ z-2g(}%hnNZ@7uRGE?kAVl!hYW-4Q;u$%$3Cp(Nl4m5KY#)?wGk2LISjQ`Ff-%o$j5 z;MuL)x9Q2Nhj}#qTJMippU4HMKVvEJZxo&7DJpK%KHvl01cOCs6zMcQH%|lG+ z_WLLT>y6(E72D`SVN`m1KC*ZA6%Kb|Zk3!N$8AkzHbe5ACBP#|l2St=m}wtQloko- zBI#nH5SV-NE|)LpR;E~ykL^z0E0*K;KL45A@4EMed~ij^r%FgEvGl@kBA{HL@~<^7 zbC#I&g<%9?V!8K%VT8HdLKKp)=d;Y*RQO!=QeJLxTn{Xpe_V`Uo!oBIps8HU_XqNUW7OUWYB|`GqBA^4EUy z+EC^<2iz&;fEGDn4zX6qN|>bna2e39SK}{oB2PU9BB|1)|G|vLeSa4!;>-DkG^_5T z3`JaM&*lz^O~{yxhLQ88;c&N#7^DWqJ!?W*bzNldrN@oYfpZqHn%kOdoh)?3;hqXa5YhWgz zeB^8)N#ruV!EA70wJR$Yu(*zYl|;zCy>xC(c*#y=!8&Wl)Z%6o#yWFD&g9z{_8$pC zV;4$$22R^Si|%!#N!-+a+%l?=ItuWI-@{=PfkHTY6-*rnS=m0d9pks))STllUi>*$ zOBnni3p>J^4m{g8>%tzo73YTED^B`~tLe&_UNZa$t0S?dMXOeI=vasI!Al5_=w5m$aD0=0zhCH6c5`iKZhQQJU4YnhI)BP216Ko)) z#L(Dsju82dS%O}%vD(rlz^y(cSkRcN4sRo1I^0_xEn}Kj!)MCmX0hg=&&w2lDZ}Wl zm8oi`PibRsENg`>S%Gel1&rPA2uXX17=TvL6si{IUI*%mb3)dUCpPOO?ocX45fv(% zdh-xRNOB^LO7jiY>%_iX$5=?ls}>By>L3Pw%J?;4*uAqZtP^&MLUAp<`g5L?jV~cO zwit7$>36*sTtK8+{4!sMbDe7Ogz73{-EMRBKZ`#CUowdHF;U#P`4e{REN~M0m~kn? zsR5n%E$+Vo*+@lr)-G2{9|D1_=ZrWFX_f(r&B8$-I=Dm%mLrv43uxk>n4w3(D^r3W zLQJ;P>nCrMnM(3t$cD0MNqO?BnBLU@OpEP!wM+#`f0BK$xrqOuF9O6PaF#iZ4$?;h zh>g9MGph8$dSIUv-1MX-(c>Y5lZ9pF3H<^}Gky0Aa7-Va`pC#_dRvc+k%bLE1{bLa z9SA0mh#sh7Q-;4hmO|OBV&nz*Q399b>U{Axqu9_qcEBQz=R&GFPZ*?5YAQqKqSCO6 z4OsrtQdE2%?=LWcS~uQtsc2?hWX#>V6i>6?^fuu#`GfxyEFJh4w6kOsiS!oBf5xL$ zo_@X8*SgxeS#2SQ_i3dw3|7%X@iWaDbc!CIeipC6H;P|DYNf~dBF^4|cz0Mz#d3+< zHtI64cUfsXrK+1JdQFgZxuV4WWqTAO&#D zRNrHuQ`ZiwI06QrY@sXmB~ct0@WnX5H)Cu&FbP~n8r zyUevVP>=zYf(eL#BZ6%G0A&Cjw!_)X45TlReC<%+BX-eP#Jt3pm|r8VHU6sblV z6H(=;G$zOPG}^jhLrXcDLh&rOELz`wKSz54I8VUjO;Wzry zULJ7&1o@|-!XY@eyMKJfoFUCpzY3^d$Zt$GX~wR-K8@kI-FOOV?fCptSTZ@9y|GsE zGjXGLnTR_~IZGa~Qk_^W3I+!A{x(SpiH6d}Lktt+5yM61S^AO7DWVQ(*@$~e&hC=) zO+rt6WbMECSnep(jrpBv7L#~Fax9sgpcyjEf|Dod86faQ`M#O~0vB3#&0L2a|6-=c z;f*PDB|1oZ2x9hDas`ub8!29MWSxXpTJAnzXflmEM$)1f`)OV7?kws}g#y+#Wr>wO zskuKINbHgh1S-xe`IeCr>!3@nX8RbZ`}VW?h7K{Da?L>s4yj9ATrRMW3u~mC71~Xq zz^0f)PLmCl$@K8%5(9}XsQm};6rDUdRA-v(@rMNkAzJt8Vn*jLt$`?LPxHl-8)2=$ z)P0Te=o3S>n7n6HEz#S^22*DE?by*uCd@v0Ql&Wv<;NIq`XQwgPF%wiXYFrsZVc|) zt^)rbs z=QNopTbaEm6$b_7Ww@$=`5s~KYXG`hmc=gr)L}iFOFIc|6={nrsISN1PLrN^itmD? zX>-{@O%&oicJd(b)Dl|U%zq~3XBFI#kZ=rI`jf1+pYZQ+?u9qGvI*!QwgePePr*>Y z?lWEt^QaoCuu1Z&HT}-E$zn1iAu!D%YJmC%I#}fJUUHDAX9}53jGyP)9(yk{`{4s0 zvwReLoDyxP!iH)ec)$JKi36Ewumjx0FVw}CyB_mCpr&r$B8@=454URXGWO?Wlw)~K z?dJQ)Qx|5)c<*lp{jGN$j~fdFg2d`wXi|z;i#s%B5W_Aw5s)%hFZF)k4aaLj28YZv zB4iwkt@4Io^1{hwgNPsuJonm!AD82O=^8qOSK&bYg0yc!O*T;Gk7>9)%(95yLPhoa z{;v)Hu~|;;OgF&d11WwS6-)@bgnaRum3x8v!1W<^iJtEbG8)=1J`aZ--sIPwpY%uT zO&pyeTsJ%q*}{v%>rhd*4$2FS>Jc@lg+t2fcA?91!lsOyeVX@lAh%ywk0sQe+4o;B z8UO4(TKP#2=DEB-<#)Sl?8vh((&WH2MM?41Nm!x{9irow>1U%Lgbg;zzR)Rb^X<-6 z2%HL zZ*6_s^{E=Y@P>XvY&s8}$_X}W@Peo~e4MGoR4(H&WkI?@(mDyWn1p|N&+LWN9x%H-QM3yc3+LJv z-mI<6zBzA)mb?DkqC%g@g{dc!IH8*1CO6{h1&ZkMn(h?II%{G z5|7NL#6^ZQGDxN&-ytc%=VfW=`kxp|P~!V3@GD34Mv%*|fMOYCir68HmdLcM!m-4Q ztcitEi%Wb3y<|>(FocAdG2@;)P-JZEtdrh=vROf7^u4s2c-k2 zq`1G1*FSp{Y!I-wlym5kKn*S1ww1<|*b3M9zFAItd$n8R3C%3Jn+gXWc>L=K!fGiAo^ZQJ$?js_s{;*$VQuUS`3`WiXZAZxa0*>dk->)RP= z+g2)yf4OJht9cb=-i@Mqs3}|u1W3udBbiM}t*aq=xWh2tk=8qo$5EFz1B{vUdat8j zxd@Hap$u>e=W(y2#nu)-q|4;KTHD<64SSi=XHsy&knsoI5-qk*XU+#qaASpx*SoEV z34pW^o$Kt?1qrRFgMge$$^PpI;Py>aAMNC~9Y%W33I2BxNx%rul85eWbw|nRu>W`Gt zh!n_LoED@Moxegh?3xbI&enk@sobk zVmFGNgSVEv=5D&0$a?>VDunS2?cy9AJg1)}xr{z;QJ$Vr-*y5|iCE=4!>~X>8MN@e}|{ zYwI;AHJO>gr%BCBQSzIpJj|Ku$KVi0g*2}UhY9I}4R}TnXA-S=`}|fbsHZWs`HClS z$&1^qfga;94@nk4fsB~{SYQ>CT`5}jQ$dL@8*7qyRCH|o+flURF!l-#wm$Iez(Jk? zwtN{mVjN-CSB!h$im?a{(yBj{{(e4KC*j_o0Sx647n7Kd!2!ae3a!C^guLXi^VODd z45tvLZaWrXtO0v*<6`>jKoPKp%enyb1iP29XI@qXU0fI`bXJ?nsX!qqs&1oqO17xZE#3sI{yTDT2{@ZD`u*U5+tL<)k@v>Q~w> z#Ud@j9hYzp(iPvQ9P$7;v7V(q3ajOX1vkK=zag}4QaBH^??ol@$d%&XZ=mP ze{>7zl=kd}}&-4p^C7juXVa^q@$C5no4f;*gGSqmm4 zKGM&xjih`cj$mHrS7j`eI^#PxasE$Lm&imQnKbftpA`59ijJF= z?}aH+e-H*9K{i8kdLDRs&<>^ph*(>`6{ff=k_d71GU-JQCtKq3ptw7z4V9u=wTJYyu`p`4z)MfhlV zi9auEt%j;_YBBrU59G8QFIyV<@O z%x@KOOyD}zpx9|~L`pBA;A4nJHn88I21YptC~H43`ye}QTp|N}RVUI9JX@82phb!L z>AfAhHPvJG!(E0%ec^O73=9mEQ!KrEn|?EFnA_e@i3<_p8I!IlGrwW5Wq_3&quxfg z9E}aL7|`r`w`0mI5mey;oUrTjS3lk@GXxbI277&Kn>M1)5^K`#&1& z|9H|k3+yGgiArmnd~TBURw@oTk*>GcbdE5VS)K~+n4byD*;ZRx)Mi0ZSr5@oFvAoz zij$OBjy`!4um91@;xCZ#M?!xr*&SGgM;&r{15*b@@A}Dq3GNY$w{`GtW?k%96x`@9lk@cx8+Fn){LyoWoK*MspX+x2xJTqK|3RMw&aYhewmaz2 zs@c7B=g#$gyD?j5GBs4yEAPC({C}MJwpfV0Ov36a@Xi%DIm-B)R;^ptQ!p{*Dr0hH z9`3jM%VVu>CAc5bUxxY6(=YQV62jBhrciWcp*qL>QaW^qP^@#2; zN@W&Q{hzn$|CGo4bHl$sSsw2HgTY|`fCE1DKku>l)MeHCPDQ3mqnlT@dum*F?C8+} z^)Bu?wC9Ik8aX`Q_iN;aqlVvZ-gxY$s{{P!w=#OZc21+c;cgY-~Q+oBsW9RC32sv{`7wz(D_g(XqWf{XYDM_m?yD)6+ ziq6B|TY;+mq~%Gh2zVQ`z=m0iaLT<^uXdZ7S*X(`6YaP@%l)VN+1N8~V<>*w46Z)r;kpiHvD2nbodXq{X{dSD z8||RL$n#_Nx9LOXT-{bx7F?62SNVL$orS0SZf@`X##RSD!llq>T2aW3Ngsc?zpb`H z;hl0LY>LZ0*WyrEj$3nfOf0?F%1Gl)EFWq8?zUsJOebh#xp=f*MZpeBCL(jS3re#L zHA4x3IhPlmDmhSbNz-F;R221O=*{|yQD=?Kz8gC^Z>)C~Z^bX_#JyqUm-HUNZc0+B zTj7U|8#gX_esh34GCWQ(MFEax<)&t42Y8;GpEO;s>h*==j(H2io^dXcopMh8NhiY* z4HSykFQ!;n_)`cCb&QHNBTw$`8+Y>ups{x0z|d=1a?QmsRIzZP6d!keWSTvM6R5(Ecpx@z0J z9TNv9uj{aI=Zr;XV=q60mEMu5S2;iISv|#}OtVu-ZWv_s6Nbix@>r~gSNlwxsjuWA zpT1nZ_C<8UM~4l)>narH)xG=l*`Wrko)HpFuC91}p?O)f=ckD2m9w{w@C7X%L=5S~Jv5A9k+N^PVdwpCxJ+~Feb;{-= ztNOQ@xnfonz0}sBarfT5d9+D$?wtiE(oyk(I6CwUZzul2Z;eZz?B7}xb}-0bP5nN1 z%dXyE*~iqTI;l?c0@mO}Fyufw!qiXAeYj{%z>q~VXM0j5Db*OZVQ;^Y=XIza<2-o*h126qDL7 z)#*y76F1lYso2wxQcjN}PIKLUXLqL{VnZu0=t7Na58Vw@y$VpMT5wXr8Z+Pa(T7jZu9G z*Qcyf7t)AMdPiui+RKuSJ%p*P;(pDi`)-^$==wD5pcX*6s!-E284QyJ9*(5T z*4ovbN>J(6vY#R;#m4^L8(Id=Fy=)Y2iL65IQ0UZF1NF=^)Zbp^qrAwkbdLvN>8;^ zM!CggPa0mx`cb`D!+!Av9jrJ`y;v=ozIf}|HRs=<@oLPF3t2iRT{|x=!11uZnm<)` z(27SJ;4|L$3GQzsy_stIvBxZH?a`||@an^OCBXC`!jft5q9f<(w;S;5&&c9ahL4>R z@wB{E5y3s}@adNUgn<#2&yN{Zzg|BwBr@`OhxDVbpZH=imqmRR9Gz;W!6J5^I(hQ; zA@^4NNT~ai;^b%PtlztL@6t`yRr|sL==O3ou}=JBdc8K5+Xz>4PQQqo(NDY1s0A}V zY}uMvT^=nMkqNR~^kkChFoR2%-{_>?WWrB|!ook+AQ&^!W9=kv<5rcz&d z_TBA1q@CcHyqT+PaXKu0*LkdO@XB^OX^k1#pwklL&a0juozKIJXVZmu%Cg=Oarjj4 zX;w>3QB1sRs$bO{c7P9YZ#L}7o~k!nJ<|qU>9~XVsr@-?6?#uWgZ1u6V9xn%3Ay_A zl4Nzi0&XDcV-@c|#cKIoA2pp157uN;G`5d^MhJv=^u<^=mrCuH&6=jAckWjYCT|`d zyzuenAsNdP%68|wm8B#58X$u!Cl0x*dQ5aeCl;Ob8P$N4LtMb$3zF$Oq9pL!ae@BaR-zEV5a{KqRDT_$XP%pOoi z9=+UNUkPJ47n4ZXp+n|#@@uLxYYOn>W$s>n&!2f^bIQ_(WYTh8N~8;=&nW(ROsenw zxxD#=%gnV%T$YCZJ`hK^!-5$L<86c7N|Q-<^b@cfE}ED%P?Tq9B|P3A6Qd;_S+=sn z{s|5iToe7XkHc%u_8%IZtWz8~rS(ovAGtWltrY(E4q2@5y~}egUD7*xR=v@ApYI|O z-gn=8zFV>9dsC1F<&DLnv*lS_-=!_G2}rCl;R|q@*k9jw>4&^euWG6nDdiTd^~O#) zn6hzS&!?yUShNNmw!6OM;wwJ#VWU31Cv@^f+PCb2-D3a|>J^F0-8wG2IqUwDPF*}d zT<8*Blvo_J${C<$ci;W}L|7^gSNyIcpZxlT zS%+62ZZ16i!ui$N_Xqav`(f3;p&vC}>2jr{`8LwW<|*FI^jH3sI(~R*T749rR%1G>J8B$XA0IhV$bRyf@?p~T3mK#UFG^?xv%p{{w z#9aD*M#28Adehf!Boui0tWE0Z)!G1AdJ4cJHg|d-=uIgh-)t_vxirt}SVhq;&%&L% z7M|+m7=3mby7jirgKy8XPp!u#nEgOJv00l1w2I_cQ*|>B273fRdTYidQY+bCOIpx^ z6tTeJS=*~>0*nwEb_1x9LUf3+%cv2iJ?c~|w4jlT!fY6b+lrwA{sL->cQ4Nf^_v+0 zk)5;D0$lXXp13Wm1;`(Je{Yq{6^JiQ=~MPTv1SCaPAZ+A#O*wd3(dR=^=+CZ8SJMZ z(RqI4Ld@M|cS0e28Nf6F6dE(1u-lI51h^Qd=BZO#E{)`KbtxRPX>`&BbKBo|vZ>)= zex)M1=3{i=;`5D4uJ6BMlG9wny88Q0UOtzhxq~UC^wxKD9{-MtT3A2Nid5@TsWNH>9`*TMpT( z_i>(;AD0xV_I|oo(0GhLZQRLlY7f7YMGa4=+-YCl2|eEB`%;q z=agO*&#Y?#qzWcG&CEGT^{Zyxv8FVbi9x{>M@MoU>L;|b+gayG)~^1u#z4)^$g@@4 z*kE$zriQ*$xw2x;Jw;^O zcgP*j(VtC&`ObT{K!e$do;88k^eU)bcB%PmJDMpqw|G_<6&4oqc8nYj+C1n`Mm{Y! z;gFGtN5q4v1R->F!I ztKHTMthWrQ5kPh>>`#?z&6c z(N`@jGSYHR6(oGb$JMR3lBvxfS}x5o_1AM>nG@WLQ-QRY6Y8NX;N2R+o4ee$KUheK zeRcS&Gdqxby6eZ?Tls80FZ&jlPB0)7ZwKdDEw^GR9>1Es*=R)h)S#75!_=(nz5u*g zfUkYk1R~S3Yi)SCo_T`7n_rK~K8!y5ZaUo0;i~dzPpgm`IQ>b=jw>Fl&bi%O_jQsW z>v;Bv3l*;Bl%|R!KRkmyVM>g3dByqt<`ffIKs|@qeR}s+l}8TlNy=8d+w&kn!&0aR zGj|UztB^-zpzO$wv&WvZIj_^=q+0Rxy_8Yq7bA9wkww0u7{+O4!G^1>HVu~R!q~Wh znYsh@o#B!ir`4+Vl=4^V)Ig7qC7YVw!@-jcXT?yZkm;*f;yct(+`%(POe!;HcO7zf zNj6K*{$1hXste6Wrcss!qci0#+mQ6FvszOc`&u@x=DxGLGLlM=Zvi33owE$Bb39l&6mA+g=U$ce?QHH4B%sRS zEyZzBX{u&s-xK*Jj}245|M;-E=T3>o<2pM1>`}JsehAyZm2%dlJZD;uio`a-wURdO z8@y%pnGcUP$rhh`aQCv5)eSoDq%!F%1&0(u^G`lp7kkRuCb- zUtA=Mlb!>Yk0l3-DXgY@bg1L&F9famMd57LcV5|Z`Q4?{P`S)=^eCz^#P`AM%~;|E zSxA>^)@S;=!VOha-meLQG1AyIKSn=i$|^X5=uLeUNx!;R+>0BE&m)sm77@#|4-KO8 zj7Li)PCjR&-;W%+XOilU#xerBmwt?VNY;BE{2WqMGVIYa8rVX@&te)h)zmRmN^PiL z`stOYXMFBAIPgps)hQb=`*H#7L^9{MXwg<0-lkFOv1F^KFio)|FH{)Se7G_)i!2s; zxHTzi32ce~fZ4xL5T6xdlIqD_0^iunO*`uj6_PdDqda2%nKzd=?OsGQiGO{*F}Xlf zuyy$_XPc%mIxt|>tFtFUTsl+SX~${4%+)9uOBj%13$8!(X19O`~7{`FXj2aeazS6EltUcHR5zA-PU9 zGwYYVT_d&3@H(b9wPea7-#hI|p(ZO9+gME-)$94Nn8SS(=J6*-=6c@GTJ<=l=m;Eu zi^BJ}MJvpj-OpOIlGJYF+G?j~@})M`ft2RM0Vop@R0rap#seL9IF&RdzePR#6;T3_`Qzt=6&KYa4hMu%W@(1Ct4SK2d@V>>~i_{J0?t4Atv z;lZ8x4@RtcdaAC0&75ospQr1Rs`_+vE!eK}_|JhtAi-A;fUW*?+wuGN?{Y4Ua8-fY zq>Xz=TeUgzgCk^m&Vb&mlcknTLtnyP zC{+dAp|PnQcWS}_;}pKy1x`u#-Obq&{*)m-H--=33iO4`p0mEwva8^d-H^ivMz0AS z*|G@ul8IL)LeKi-OS2gZOmmjO9?oTH8YzWBqn*{dK%*3kU?GylMAX!nUgaHA(ekp02n3CJkeJ5L`{4E^e9xD$nl0fZIc zcgYx8{buW*?X`Q8u10kx!$iW(v>Uc$p2V)u>8sd;t`FC?`QE<9`3?<43Cn_DA#?{fH#IZ1>)g>iueI?vr?*5t z`^Kv#cwpMJWU=TA?cP(7vaQK@G>KyPZz+jf7LYI7OC*S+_7HHL*I zxxsO?8?kZ;?4D=NWoOlIzTWJk(ce?@iqOb_?16NuLG!?KRq`n>lI-sRIOYb z+Gep$yXc*H8l0(HHi3)6b`((XG$OiRPK@fujlWfIoK{1;KFIIn+^+tZmpC`abXvNk zH#O*vrKM+Ixob*ha#1S$E&3o;cO1K$}S-IEO zpN~~8fbwB5Ea*gKK0&b8#vddk#yTO6Og6K2FE{DiSoPuRlVg&qyy`T&$~wvdU{*b9 zlXz9wc4!|v?(%T^Dc_Bi+u%b^O#K|yM(Gdgo%1uN>bbonW1S%#$Z8bsSPa^}{qT$!Ku4Yg8VV2dAo7J`I z)OWKTsk-nRKFZy*K9}B{#V)KfM81}hUd0>$oQZ{n#p9hZF)@~0Oz2EvJAZtq^Bmb} zA`Mo+B>L{_ncy7xS$y?sJ&}KGWu!X*=g$&+0{#S0JJ#ex{Pl*~BUB%tF^yM;)zTlK zr3g*2mA8szGJfU#JTw>!sS(F<{9dT~(Hn;RXyx0I9a5r(27JCw2UEUdV8+lNGe^9) zlvHUlv61ShV-L)++sSzV?<%r`tG$0}rlTg5^|E}YE3)yq{h|p8o;B5>)4$w^1)rlP zauAm9Kmoi03#V<_4fEZL(X{;WvNefG0{%S|MU^)=7+FVzkeLcF0*U9Wy!L&Y`WX;- z%800Qq-YNKyx|ok1*|*2QeELeL%ONO#Kgo?q zY2CEFMnJM3$_YnD^~=dpecX{WK8~~Bj@~ZjJltgTMuV?kNE;1vjnA59!+MfHo|;%m^=QkimrU4!nB$+5 z*{EcVr8XX*^kPW}2&TTmFN+K&sS6B|JJf?6=?ZEr6+Pw_6_;=U#e3IxPJtjjd*jk) z5;APi^xKUyhdqm|f}l@_GY&>w8UQApP7#^~Mx(#5mk7sZC^BvUPa*SxgJ;v=-N#N2 z!PF+Iw=?yU=(VBwUv9&P+t?98){ZAAyPT`nrU2qQlbX&i@zaY%B%|{`%+8zc7$ubb zE&lo##fem*7QUaCh;sUYB)7c?YK7N+YM#zj2dGIh?rszmNFI`}Ma$UTBUzPB&G-7K z^{+AF`&)1Oydt7C$_+=V{>{erFZT3*-@%~Ys7@KcXNZ)9VFy&7=r!FmE6GhzE)ifL zhl--#?wLA8^>0be(^$_~xl5SL^OCYy0D)BYk}@cHd>uaTB=dz|mY!?nixX0b*|Doq z!N}fJWFsLZJeJ+E^^*$F9|1D#^WyeW_O37-nMhDB1_P%z=;F2w=}WD{{3D}9E0Zd` z1V>u+{hCToSD=@{+0TJlV~4UqQR669kWNX+4lM>MiL979xm=FHU3w=yIqzjoPLQD*<7CYAsZaht;$cgilJbXpXLxO9g4H1)KQSPI1lgH-kY z!NZp?TnIG-W7CXrI;;8-b}>L|ieMs`y=oi@Kx5a}kEbQUg=fLRDz^5HDQp&wtM9=99h2>X0W=8Wm96 z&wRd9l8H!%LJ#~avn@}owCW7bosY#~Q>SAk2X+;qzhx})ti1E}&$(flyEXqFU7BeEqK# z5Cbl};udo0!bX~e+p2ca9yOj<7pIFYsc+Jd+O{K_YG!d_%9AD?hxQje zv*oyslmGtV_C5~#B!G$dBXpS%WKwKO6{nUAI^IpOtB#B!*$y?%^6%f|YkDL@W(C0B z)uh<^S@}|G=x|7_v*~?A$^glNu#_u%`KgK&FJ(`An|ma8(TsJ!I$4s#7qYXU_w5i$o zbK?}7qLu2uK0T%!F{?UQ9*`6$5zz@NPc46yaK=})u6TbXt%^87TL%^H*yZ+)Aru^9 z0CFZ}OfA0;*(N)5Cbl4xexE^Ms%&*HTWod~Z9p)<} zH*dYCg^kDcv26<1wwZaAWx5>)z#Qg_Z~9X@ppj``6;NaK{TiY!A}a~YooDGXF(oy% z&G3b5nhw57)|>YALQSL+P|<;hu1`xGcU-&=G-Le>9+JTNPHs_+w`)#pj#;0tze8XV z6BvepGcBI)gxl(&dG@VI+|6G%eZBdsSP~gR%gc7+om+)4VzNZ%(H}o*F&dl!k|+Ph zy>8{JAKy=s08cMVkt7RP#$7@T>zAet%$x%-Pr#ZoJ*IO*hIxwp#}NwiwhaIHWPkRd>(0p~ zpR;!uD@$`DimS#ObJf?HZ})y18f^G0u-_g-#RARGtHRpeKjRmgV@+Z+(RAa--x&iv zDV4q|&yt&F_1@H*LUmpO;AS8_2 zJbM>fKBFY&0F%>*JHEDG3axl*=wJ%A@S3~)^>ZG*IQ)ELyQ{AH(9+cJc6#0tHkm5S z*J0S_zcmGjpXTu2e;^`2_6cYplR`TPYg8X0U^nhFpEH4?A%o)CMDz1zHH->bT>(w! z!mcagt%f=D*|_iNr9IZwmY*fyq>Uk?!@948<8^^EiE;9DV=tHpDqH0-BO!6$^W-`6!MzM2U{ya>HTyhLR|HkD1{B~D+b^ew}z@~@v2 zJ04~WvvOBh>m6zsQ0B%ui6xFG@ULJigs%uLAKd^#lO}%`|771L1RG@mh&Npr(x!uN z?)iJakg2f&++_v4nnQ}!S9V%Su}0=Nt8qj1hXbt-{n`nOdsyotk%##3v*#1%Bn!*jS!5@61zJ0_vsi`ZL)KXE=Vbo$iXorshFv;;;dQ zvKWre-=%oaCugNI#tVlNOu0;tJ+Wqtgld>Eh)4#oqI^3+h z&JnlIWry!EbUECA9^2ooT_8rcoRgX-YKm{JeXLLkiCk;yE!MLD-Lbb*Q&S^8CRSx$ z+H7^Rc~WUNmH)#$gB}{QrIRZB(t}2v4zEeAvCO}@_LD-R3sNF^t}v2jTGK00Znw_I z5uv7Tu5_;S++sBpQS~R)r5_1g0MZowl#UkTs88g>@kB{^*x^q4S|BHf$6}5>LoH@1 zpKI7@=aBg~7l-MwlTA}TN<=p639@v>;3VlMJdXV{q^A0Pq6<{EY3xK2yeGkAAS0Z^ z2d4f<4MzJNBDOw(OpIZ-?=(z0VZ1-|?2(=uLuVe3jGP}9EpO~lAE8x+bQNQ#zLs;r zRkM{G`^kE@Ke*c$Ewmfvu}QSU1r%&ba-%>^5x1wf^c*9P{&JYIM+#Jxq@~?2 z;lm@9&6;0zMr$k*9~zXdkFc_Q)U2qNZ64g28&;=`Q$>G}N9l7{-Op0${}WHkASNQ7 zhP@HPC0~E|%_-QfA_%don#jhMPM2DX*eR)w8G^b}mc<^$iZ)IpzCa96I^f?d8u>&) z(|Hzo%=i9f#dxK6>(tUiLk%Ob6zeOAs5wsRNhfZvsq^eqZG%~P+i?NCuIaEMbI+-M ztG{h~YspM2zfrwTjy(U(g-=d}52{ao`@#2K2S4rgG@`J#6;bT> zO|}GjOq2s%VIK!l#Vr&`h~cp+AwWoAU94R~6zk`zpsUTLsf&X9zd}Zlq$MNYST^MA z5|d2ruYKb*ycEaXS(qvkg1lgf?URO;gnVoEh2uYTPj8k7MpEB7L2MwEvk|-}?{|>H zTTg4{Ce4JQ0Kpy?yBnE2NCo?Xbt$EI6zow7t_g$Rk= z$rM3H1dk874T_@Alo}{Q*xuUjH0OJw`8h^?PT>yLXGbVQr^1+es|mWn`+7(6hk8IX zZ&5prTK`n6;3R?`$m-X@eA?aU0UxRlN$oFeUaCurK! zFQv@Nojy^%U2!8}(Wh2Lq0AvW+5S$O_XVHvexG&nw{(+DP`UxYwBLD>N%fVq0fgo{ zs2=s6cka*Xl+g; z-Eafbo<&gPak{3Ep3iw|OQ&{IU199Q8Lf6&qnZm6nuJ%=;ROv?nSwa6KNswry(Jj} zF8t#pokDM#$koY@ive2G?x?F4mGLI=xfwqDTfzty!D9}4H{4-3vcNG|f*$&ksaO>U z_wD<2dwg}VXLS!!n%he}%@7Aa5@lT>fBi&jMc7Uylg0%uSAF~_tB4auJB|2W%`>mZ z~08(rP1F4sYxpQ3?)>GK5ok|D56r&N^ z%orQbjIAu%l(3`zXt?Bs)TH8jN8J%6Nc`0Z@wr2#gCSka_+pHqk5W|JUMC2ZM=m~H zVB z7saYsD%yuAZaJ&0s&U4;7cP~;M69e_xyk{tMepMi@rvG>wZ4-Uao1tC=lj$53lHtz zVu0``4H*ZpIIC`eTf2 z5L-%8?N7yA5@T{!+?CkCFymr` zfI}k2Y#s4#JjAyP7|$$r1h23HXzGlicj~Uklr-sR3%+Fb#EYldA0HefIez~}a<LKQ+Hd9xMe<-xb7u(mP%?!- zTw6{oYtsO-#vxER)1!`TPYq2mG}yGHp(A40%)->UnL$=Li9-`B@4kwm&!i9xmBH)s zqgmHZ{K?RmJJg6PkpVJ@O*>-%t0vk~bz!f?_qs_&S&L#l_BLJD~CrbzlhaD)VL zVMilw!_v5LlJ`Mu-X(ZulygWFf>aWgC5VMx>OrV}^PD&WwY0Cq(-;)K`7Hl8i?H<^ z@Tq>A;wnk$gjYqeiN4lFV%82^cH7=x*VS4405rQa^&jf=>tU3Sa3?9E#VZZ-kh4rE zZg7R6Xm`-&(l92jk%7=C^N(Ko-t;lxP3+=DvP7X*{X`WNJx4N^IHx3gM!jrD9^&3c zZC;3_ArR6u9$K)J`HlxM8pJc*KyuNwj=-oU zern5=C86Ro!K37DUGp(dn0nv5I3D2l5s@ECACJ-)!T`DnN)q|uEq!Vwq*N9qfBw%L&XGUM>HtR^U>1y+h@^iG z#$nEe!z|BnVsWtDtetyRC`O46kTX*@&xSF%El!?Znwmzq#hnSGzRn%wDsHq~t#c$x zg1R%+T;aHpA16zjl<*j$j(v&JqVAI?RoRgysbrB?MAF#yQ!|xW=6@$jD~ucek1X#= zDfZN?C9F|Fw}GreeI?y&cKbIEHlX&Cpxy7??cK|6_Y%hgP5Dw-s7ZbnZ;eb0PywAe zJPO$?v=>F>gLh5yq}oG1TvbZ&wS}{qvE{VJ>IMyRm)7$>fA~kir#5#T=jQV2M`gYn zc^D;q_4bLyvP^zsDX>HYpE@u0v~1*N_-b=%z=f6PJDT?*cy*#BWfn83EOX3e&J3cL zMN5=MzUAztnU#bBA;=<#?;#{=i`kpCv#5r`<3x85S$GTPONYEJAnOWl&6(QLZ}B+Q zn;6WSFj-L&>X$_wgS{qvdZjc?s<~Da`n6g0g7ck3{E)3wQ9QE7UmPl|fpjrBrt~^E zb;ggMYqq@iUqyPw9uEP)3hyznzxXH<$5)<@qaIm&G%tt`LZMkMbeJ7?q_4yVTTBt2 zHgnYr(*>KU4umYqIlPGQBVx8HsNyIM?vb|I=e*ohr?41f3HCLQ#MeoApDBeeofqmu z?z_221u4coSH2)3>N+{2eQ{4s&6*Smd z&E9jaP2#wGDWfq$hy_*@@bDte1)sFpB`s(~WuH9xD4R>gja+O(uYIn|HArVsJQ(en zLm<#Znig43Y_IlMFqWYfX4Rc{NL7oy4@vkgM-ct2CAJHA&p! zbJ{4SUCN`T^8U0~`ZtRpAB`lug1gj2X)V3nVbRphg80p>mgY!rOwN`WqC8836I{^L zPO&jx$NDID@c@Fp<<9(yzb7Xt%rgRKHH-XC!T~?3DZ-~PIJiGXO3xEkx}D&UIEQ>8 zd_=?_J6TO%GCG`_j$NSBFS2twI$!N3mnvyNiq&|O)`-Ik*jr-rf_NH?;AbtZS*m6e z{?&z-D>*1OVUfHFgK;*^A_x7iL;lNYi62ejQ+7*_tPbUtbgSZ!DFi-U72~VJ?5%7P zG06#k$JmVRNFarIiPcM`X|FI|^_m<;TO55oz|c41>-L%Z4$YfkkmK435+xZ+ zNQBC8Ql^LqnMy*Tfig8Hb2%e~$WR$d6jFwgAyh&_ks(7##xf7zcDU~6d++Cc-t~U( zTHin4THmwQecjhpPUrbMf4}3{k8R(!ZQpl*RCJqC#BjIN&xN2vqw+PBnAA~DTfxNZaFY;@Bu|~MjDfS+>!-{-{UeIvxRFOj$kzTQ zjYWt#Oo2Tv8lJR(iHxY}ga{Nr_l__Fqx;*o}xXNGeLD?K{sKk#DZs zkaWi57m};3h}vdJ1yE^m?2<=E{2JtdEj$+sn(r7MWEDuV5j4EoPGoEG7e5l#!l*HU zRtfok=D{PSx##GITyx}b$oum&rsk^>PLku16iE_2JO-TyI<_`M!m|nff%0Ud9a@l6Jx&nNIE~RDSY3LgYI9fG3z+?e3zI z0ZNcV=K52}E?mAaY`u(UGEcRDl}3j5aOefG|G-6BO)A}Cyv^+0NZSo%e`3ujDwTZtEkf_ki`N2|3OP>S-Q;lqOe=K5QjNK(Uh? zW*C1@{A)96|1xC*T)*HHdV2W_+#e%QoqC9(HQG__Tb@p`MPL%hh^QEmZJp^q&;9e| zP#Lc)GP`a#S3Uqi#=_Xp3SI>^w+tC4G^?W(x|ysam-Eo{uZ-k+)FKmG6cm360^&&3 zvfwhB2ye{?Jdh=&+XQ;_5b@Hk3>Gpj{Ea-w{?cM&qjNB|AAw7u&BZNA` zzSE>ncgq%~004KwCQ!Llw)rN38;CA6_)dCL_xJh(s2wAYA;N$IejP`$TTNaE$+a6S z)T1@$Q6kd`D7_!t7o7kg-=4V>#IuBZWcGIg+|X^}9a5U8(3x+r0p60R0kdP?b0iU* z*E4_d&0kuEkwmqmH#|G!3P_WN9lN6d$fo|O4*#00r_+K(xq6$8h7%qTS+|6k!1E|X z2ihXR5n^23gY0Uz|1ig^L_Rc%^?PGk|(CmLy`(Lnk`)EimM zsErV-FmXxMA!|u};05RTONUZ@38G6QPJMtIC%;ij>q1F&@#c?nb@=jTzx~gW2YwW< zS}+27LWl;=+n96(Kw2}#`~D&U`%}}T=>ZyIL~RU_v)Yn6A1LB?r$k(u09Ao*7}2sB znMG)hNTZX~+`s$l|GEDifa-JOEEt31L~Ty^2NON?{JTw3A{!lC62A11TTH6r@MLTT z+g=m9N`%}$rM>Z9sS%gl!D`(S{HL!1sUf1 z|08>b%0s@9s1jn74xJDKq;m(3ld){;YShn@zAAyw#t^mOIWt{WKmTgoxKCBv|0y(& z|MV{#>HizX<-brc|NR%_q5O~P{Qtfm|9wCHf4>~)7x*Hp0e(>eatQW4(pCxGk~R?R zL^Og^3Jv7w;Lw)afRa2KnCv>VK|?K|Ah>)9&vvqcsPxki#CAhwZ~%xQ1w*7-9E{0U zVwDEr68B!_t0*F}aYr5z0SOIjbi`Q|4N=38R_sbk&(b6uD@nxg0j43l=HuZ}RT8LS zD(eFMMT#7TNku0PgDN(O{F`W5CQS-CVEfA~ab1cFnw#!XA`TE6bW6$mo%5CSM48tG z32SvrTmHn({(qdMt5ne7qzApVNFy2AaCGbRCL)*g?t-1VmQecjrY47#u!+hGQJX8PU*Y$L_ z{~}nM#BQabh~%*K6lo*{HB}-KDdOlQZ}j&s;7%MEI69%({$1qmJz@cg%pq!4wf=m|p|l9^ zcdrEGAqQ90liw92!g=rWm2F>N6*$|re`C?r?7y*uD(&p;1 z7InL}xQH5>v!=!`U))HER?e^=a{4=Ll>5Jw$q)7XBa_b(gawu9mLRe96ZM`r@6uf7 zZ>6p`ZlwqMs?q2u?9L&b(%=M3t|jSZcJ-)kdn8wAjd z*psC5-oJHPs-J}5upQ==|BwV|_WyvYQUa>}Cuns1{}~!(qj4B%b3~fG5k3HFR$A|_ z|7cr;^bRy=i_5oF^VfSeC0#fm;c=kx8j|aS0Us`AlSm?o7@TDz0O{St`h&$u5;T&C zJzC?9V(U(bR`aD6=m=mGeG+rrnxNJW{|nMqkv3uwAW0V|EhKCtiJPY|^Y_kwsQtkL z58?-{s+!N>;5glniv)fiBW;x4ljH+v=@460(TD%N954Kj)%jJf^1MJy-dr;UHjsbn zl#&JiLCik?yK)YG;z)-!8xx1^-h>JOqVN&UoVb0o(P$9%gyhbXg#hYA3?g1fA~u$h z%1hGnuA+hI=#c&7sR49HB6et$c7389NS1-+7#14OF7+!+_1^n=(18xW>VjMM=|ByY z{)g*Z9I3r-NEThCRP?$sQO}GU!~u~A`+)3dbO85bFiO2gnF#49*#%MhJt<;DK|2R2 za@5S}Mym<%x`(qSsH3#s=!XzEbIA*hz}(^QBk|y~lV0{b_#}BBLas;h#3B}|e&W=O z5ijg{Vz?@i41~`8jom^*nSKZr zeU-bo4;v*)+CpJQ?!o(&?%nP=6ms6Tu(2gr1nCe3m z0t26cirhBS`*?oDWP-}8BMr$^s|%y+(Y%S_sEjy|VH#nxNmUZ)jV9=^BdF1ySB5)Z z!1pgflH332!HId6Riry5aCzpJ=a9#smgko#ZTt$bP97@?P>+QcqW_ixB!1(Brxm;4 z+XIQSiAHR2Ld_E$q;Q{5D#UPi3+xc|Wm^VwWmH#6N(;^(owI zq=RQA(RbcWEi^vtG0{_iQ&W`KyosEQY=w9ri8s8ykr0zaiX_w=99G-O!o&*<9SiS# zkW@mNOKvtsn|O!*MP2b7m=lw5V9%F1+EmP%lGsC9Gmy>$SP7){i4BZbI@kw<2^6fT zk|>HfsJ&ANhTN2M5^_+tHHTf>I|wI5N`mX~$cUmz*gKI|YB&Z3k26RCpy84d} zLjLDQH&YQhCxJ)38LE1mrMR6If?J?F<|haTbl?>dbWT!<68!xZCoovb=TYdST%>ZZ z2J5i{rYnp47bs(c19QL4+?1w}fg3%V!JbwRO(Ij0GC^IfVqSY%c?cE08FrBT@P*(U zrnL@K&o$l zlj=taQgNDq^QV+`bBylyq6n;HtlDSxH*gDFoqycBe#nztjD0hTc1->%mY}+-#R}BV zqf|$70jS*Z7W=!K;m{_wfg8xDw}PWHMH|@1jUWvI5#Tuy1B9XM856@-DU4-@Fjn9p zIQw;E^wla-xDuK)4yTi`V3zF5&Bz|-a&*q!>{z|+*zW+ z(>Y0)t$`gQ0eqX`*fTi9U5*8JU^URnQ%{X&EJ2NHlA&fFmVH0LF95vaPZL-(8A>A^>vXDp~+JCdP5LM_h>0UC;9F zQrEF|SzTHHlt#lT%Re|!k7K0Qbpe_)EMDALi7@2|%b>~+e*=n*{PE!$Z_s`7_Y+j> zVbTlK@HfdELX)23^3%ScPQ+Aj3&Eg*TTt~8npy}O3nwiAO6fWOQAt>4@I}G#N^}{u zv!PNz6doge2XyCD#m?2^Y*mo-#`DHt4iip*bNe9_3`iVQq(1dUFYG3DQRF@C$I`Gd zQ0l`8HC~Ktrr+0R6UK{P<>P6V0&W}C&w?uDBprM4T+y%bAW_{*;d4u!td{#8gzp17 zRn&Fq4tMOyK*3UMY;(sJUoNI@wv&+{I}Kamkqm_1&js30f_N`8%+fXyQwg>{a8Dpy zWro=|R&T$L$sK2d=ge<+{h9;0-C$e}PS{*<3mS1Hc941ndkYxx>&MpO`MuKr&3@E& zq$_SLr`2o93UCi9`2a5|4$Xwpyh@^Q+{&;O&PrXfb~W_#ab_C+-m$0~Pa>me_5#`L zR-g@2F8KWDL}c^4|^kC&rKA(g^MHrDvwiW9(jP|h*L!|VgQ)ZXC|#3g&)|= zV8F#*zu+LB$%61Uy}KDw1oG9zYt<1YQ2%!7_bEqM+cT6906;* zjax}T6)Z)RN&UiQKGN0dXMu`XTn^GX(@y^@NY7FrPQrKxEnlz{VKd@+ zp)Zj~5&$ePT*NY1DSxgP`@d{MNrUGMH+O;qSXO)>DZLc%oZl+&LnPqztLI~+#&b;E zyd6N#l6eyLo)gj{A1+wuCMExf4pR%8B(jjbW=_(zDOeD z1WwnUrKJOZkOf+(F_pEjn}mzdBx5*rxmlkFCJ2LqT<`gPK$Yv8k~%o}M>A=KAAqpL zh}RHr$h8AmSx=m|Iq3p7e-;s&hX2V+llS$g+fTuF5-IXX(oFo7WtG*q8wxlWFs@JK zErbd(BM;Zv`A*nkI3hT*`kH9}dhdcXn=l>}257!_m5L{b1_v*D$4*8%%_h%FX|i{b2FpF4AlW*kFs$`WTFQE!M3FZuBzpW3Of zt+VrxinJ^t?~)*_k4(HXko&}E8)D>0awI(xhD%?{kVDvbH(x;#X)8etRm7e=R3Jz~>j$N|=~D9|(Tj6gkY9R8 z;*C6r04%Xu3m_aql@r$yGW$pZ__Y*dUsa}Kn-V1`I|q$u$qk4Cd@}kr!gbzaN{7^# zkOfT4I`g>}3YEIZ!6op^ivv5OT;TY5?2`Zs1McVG&<>_cA_b(OJIfs6RTDW6l6@3R%7o;Ukh01Nv+F7(Zx2|JZ@;6G6hjbm266ko@7=29gVg! zh}3bUyGP)<5n`=xEQCE*XUf2*5-)hYrdplfwT}Onmu5^RJb+w`Sm7l zH_Y#F-xFJ+g$HC{?G38nq=Q81w<3r3sy0%-nz2NeE@3XsJnj9v#ogGjev)}`V6`0c zpyWpWdy$NH{H#MBDnIn$SbW*%j!C!fvY%)B?<9wtq$K5Td%rdz#`$j5Va<#M0ijmQ zghf(V-u`&y?(IGJ)pXg#xHp%i-%okn?NO*MIQjeR=$GhQ1K$Q{#g@?Z~Ixt2Vz|Z9T_R#(3`NQGS5&s{mh#Kvr)*eBe@dVP;~o z!W2UdP0c-edhCBLC}IX|P=Dg{=df!1d4+|KV0|1@u`xjOP+s-w)vGnMwXa1-^Cl!D zEa`P|aiPLO(m5tg&q*s#&!0aBu3#DmGbiQLESG*HZ>9Z?xw`i}*40-aT3TUzA`1)4 zE8rtP9HV5&w@CP6)V8Z(5Tu_*&ZCq@{ix?7Mw9#+8X6P@`L?=mX7}>DC7BV)4Zj#D zCnu*>A$&c|jX_eQ#;VT0GMaV5v)xCIadLAnpgzPY-I$*CXsT)K&U*GNOiV{dryQPV z25M+%h>Vtk?LY`4HW?^rMS&Jx6Uq8g-6v4{C8-^^I~W6;$gE$Ck|AS6FwPE8<0??k zp)Fh&+}&TdwzhWWXlrYGrwjxsR?Gb==DFDW*t|f*)C;P6XJ_ZD%F4x~qob$KpJ&#+ za7jSj?TeY$)ji6}6avE~-0Y3JcJWe6R(30&5P0$M;oJE1eAKYr!^UgG@Og4Qn2K#D zy~cHUUDCUE`)!ULtNh&6*@>CT3$o6x31%V_q+xo?bZU{unBHkX3f&X~og|AOsj^fj z80qmR%;MdS$)rnr0n7i~`0)l9^NDR07u7BFF1Xb0-Vwj9aM&!DQm^18u=y1nuJm(c zpuL@eI(_b386crZ%U!9m#yVFFHbu8*CJe76qoXkL6nNDaDsaF5z)93%!m!VPRG5Tp zt=>4bpbSk>QDNa%P(s^!djnue*6L@U(-b9x2g~=2D*did=(Y}$;sC}UM9G|1rO_jk zMU-9L{iOyXs{nbIQ^qgPJ(5#8a=4g9&T^Lu2&#KEc2B(Ly!u*;*5~BpAkSLuQ)^V> z>;BHS=Bvxbcm6D(+=^kqgiVrzjKqe$KW$QFt_X} zeSSn%984%5i@d#?M&9sP_YH){#x6BAHKk>7`N(Y^ANh72-9_$Uy<@;$hey`^zHbGl z9(z+oH{8!fEbq{cK${s?V-u5(@uBZo%bSANsT+ovxviK*c?vD|`gP{shKw7ZJ{cq> zCmVLJ;IR%;R3)?B!A$Gw>iQrc3T)WG@b_0kIarEJubB%KqxWYY$zGRa(bC|HI6K&; zGL7wGIB9+m3xSPl%s$Dzc=2Mvzb?*Vva;M43ivR5{H9>ldd#;+gTA-@Ri7r%Cwoy} zUmti#hUS8cxwYl58v`$MbO&V?l_Kj{(rs4YF|iGpW*OxM<0|&&^ytXQ8_d}oL>%Q6 zyonk=AFQ|^XXF7Vc$r$o-dSTzeF!W^fg=%Yj0+YxW15EG=FN%dcb63yLArT#_%@(j zIi|JE2`-8}rr;v{-e67CW z;j1ua^QfieB?L>2&(#lqv`z-Fu=!)8A&+1NvZfHAGQ%O^{Uf8JYNt<2W<7l>h1r2u zE*3zy_gj9y%Ej6FQf%zH4|f%}imzE5CeA32A15yNJ`lV9(V;+O8vGIx%P|NjEAbO~ zH?YiwwdHRcl~_r*)iJ};yJsNM-qq!c=tAa8;qjGV9n@r4C}Fw~B^;YLjY#N&QDh(} zIWPoGKwNwoD5Aal_ANvRx-EKGpz!gb+miPA&z~D_>(WH<=G*PTQeZBzYh4w}i6y;f zJ*ecs^q}=hd>B|7<^eCjn2pYUA+$qT3U=mtQIQgaXl8V$@w?THr?UjDKvq*LI5@80 z5a7p$6&4m^+V)yNpONOV4I4M&UDo=jC1S4Cv%^!rFy{+nEy)m8O#UW=La`f!w`_Th z48m6Zdz3Xukip^MVgv-eTqiTkLF1Q}=0rL3iU$E(ln$5}h9C&_V~8gC2r}#nO{}b8 zgXq{h^ZdCSQujqjk5xe#lktQ5)YUhNh%AQRii`;(?N5`fZGxl@xkCVABew_h2dygT zLRJ6=?}yNUl8jB`kvkp#`e<6f?4t|~Y`L~yzXIW8dQ(-!hEG-lRBQYGeE~Bwv$OR0 ztx*V0i5SQ=jw&F%A}`O9(o!;({@%TNJo0CRsW-+C52Zrn5>BE<*-ByR&LFRNBHmy4#xO!LC^_)h&5KIu+iyS zD><6qc6Fz9UrL>QcH_(Q5*-~KgGlVG5X;HLH-IA7ihMgSFE2Tamj~hy>+I>V2HukM zo>d?>3_}2U3hqfnvqWe`@}@1|D{^5D+g*Txy?K4*jcYr-H)GSFyKp>6Jj}aW^vz@B zfcBm3WsTcW#%4jHBLJ|Piwmg(WWeZj%)B?sU+sB!Y7or^_GC1WQO&2Z@K1Upu^LC- z^bw=l`o3hdm4~d`i`AQn1LR8d?@E_*=P+b>C9KT>boxq%=I87vO({j{b`5)gx+yMx zDBK#U<4){0Y~}?BpS)VPMPFn7t+Lzo4Lf$Bu16y(1X{d@m_!A^5*? z#PT$c7@TD9BI4q1AmET6fW=vtYLwxmt8}7P5P9!1h=?j49{I*jABz6o$4R+X{tOF# z`#Q$5<~X$kw0h62!_6weL4^H2B)I%w=kMRaFqsR;$Z&zREX5ji#!)S5QMeY<*Qlh- z(_l)-#9}gi5x?1nUI}~PZ5hJjAv9Kaiw-s+u8n#5^5tmHs;Z#Gi0RI&wjMePwv^2E zCQ}`;nZLtA_T<^K5*!V2mCZvziaMNs6hwbj@m$^44rGN0CP#4bSH*k(IW}AqGNFNVUH0wRy!tl>8@{Ej)T?bS%ckf!& z_~lCn25!nZcPvB*B|{dyz2}a%sN$7eMN6)?p&Noadx8)rN!n z`hy2O+xvyA=Xm8TKk!(G6U#Xyjq-HIs_Gj2MNq*HGMN%zA}ZSv#V93t)fko?gj&(> zxG!(<-r5fkk=IGO4IYJ>J9gq+lvEkd#e`++cL$-@)8}+YJYRo2ykd;P##4BM|GbIm zoCqJUfe|qsj%TNfAW-mKmyFfcjCbF};8vyp9Bb(4$euKv-FzG1W$q3ETEfC_(;bW{GVHm&cFZBdI?2>`2YQ5 z{1+7?dHmo1Tt=NPj>+u#fKvxyK9o+kI)l~1NANFVo^D$GrlM2ucL9C)U^Y9j8HOqQ zVW&npj6kS*)!MpVMn>l2&og#7%rW%#NIT&1Y#{6Ey!#B8v2XfL< z;l16HVzB3)?oq^0wOrKb6{F~JIYvg}aTT|;tOGZ|^w^@v>DhZ6iBY(P$!s*%G-OYrmOvD5-fEE$eaObS1!yc=P~}I<~RIbvinq3 z?;&WC$($HW+VTDSy@p@GITKjA>=;CO{P=P5+2!TTU^4lDTK13rgp}xa)V3c#SP%#C0xQf<-SicF7!&Ha z)~sE-7O{W<3Sa;U?I7@(0b-cG0ujH&O(_QCeXnF!)~Li)uJ%23C2tjaCtVH< zWaC`C2n3N$ZWCZRdTUThN=g_~`vxO-#l^*c)F!hbYKeRW*|(iX%4#8o?kvT#u*){V z?B#p#8)Imyimx+C<$kmlDPaU_e7$`;5caEA%A#g3p6Qn#e+&u9FH>s6hSxvNAtfW@ zMKRT~bm|SN3ISys?V?FcM4VN{H^6V}y*%fLs(eN?agb`-EKm&04>pS#Bgu=AyP$!X z%8?^$$rHcgDC^YBf@g*5SOYw(?bndJ1tTk|{dtCS)hb(;m-vLxJ@@#jB>j{iCyXUk zU{toxzO!fV-Yp6WF_?tFLWvnaQO^=YZkpGP7JZN7UQrBMgZy~OYpdyGM5h)1B-#kkFEhbB6u zi#+WofFE53*BX!PeFf#>>Y9j|(5MDmwS{wWt(`PZ4e`Voa8hd+_#Yk8A}gY_q(m3N zG6k(@)uzUca2!TZem(8D4_RM@goOOPr?80h@gaG6dF&LZ69GW26Sr>OR2B&jpmafX zJjB6gfce9W%DAfu&CN%eJPARK4OHs-4Na(V<=j{w6{DF?p4>cQW|o2n;5~CyvTQa} zj_J;^RCLPG9?!||V)hFCAschXkO?8PU3Cvya0|8>>a z3M|x?a7}w;T)x+@-@?LVhzInpMJ%!TijCwCxy_bQ8IS)S7^{Qm99B_YuAXxPx0PQ@ zORKlFXwG8S>Y&UXzH+rXvb>==`?&S@etxv|Z>kIvDFC6_V$9B38 z+t8v>&ai?S{_`iPt?g)lzyCe_3t2hml>6MQXVZ+BdA@PbOQc(z92|R9R8%_K+LT|v zejP5q+cjp_`H*KOkKFhQ=VmW9{T4s+Tp6RV7y4lPY1Guzba*}ms$Ip+t@ULZnuU#{ z;{oa+Py;7B`-U=E9M*}>&mTVY!)j!ucFN1^0|W|-hyM1Skmq(@^MCG4EH@VJ?@-24UF;WiRE5KWeiD5w6VY^3m`yd<>UaW%g}lOP)*u1 zjXtAx`INper!>yOTdX4x$!<`(ASqGtrz4662_ZFXsi1M#sl{w;iQqFbJ&@oh8HiLg zHAMmPZa%TO`D(6xz2?esw#AEug@lp=)VF5f8|N1kR5Ub1Acs6DZ8_WadclyI2aNGn z?&juv*(d6*Q6G_hF8y)@Lm=}bO|xwLjlJ+F0)m5uC~2FwD=G>B3KlI|#7>bdqv$dI zF@6?1?99}tBkd&`Rj;JJAlb9kSzz6_=(W_xXBc^Q1xovNWbx}UaQtg}(krYsJ6K4b zfslFOKCY0U;|l_d#Eu!xb5*X*MTMYEgEO=d9d; z4MAqHQ+Ufbm1roU929vaRFU`bPF z5BJr^(ozM}dpWBxCAswZI20d^QK2OiD3WKV+xO5sH8mHn;^fq*1RT*^?7PRMpT}B) z>$G7cx?65=rR`HO2noJ#C?s?HFPdZckL|d>vKvq)3Gb*;iDCvLL_u7~4+L(ev)mHq zVe4YTqoTIl-IaHp6J+srYzR2z62Jyrc`-FM5Irqs;A@`o0O{7vw6wJG_!i^h=GN9Y z3P?M#Bnu~vKH40WOciC}@O)ceA5MM5nSSa?HOM)3s<%1sX3wDRR#pmr0Hbcc-PUT(1dHrbQDJCmYSP1s33f_s_>}@Bth1wI4-oJU zcNJ;OFJ@E*D8Bs(TzCZa(T{!OlFBPruHbFZJDo05yhsBr27>y3L4Hqf6OIdlnTW2I zcn^77Ak9AS>I48*sUEWnHy5Ix0L8@Zjc~Y1-Rwx7i)!sAaq$pJ7Z_c|-O_Ty&Ye3$ zL8P_7g~Phmw)Fn~s6C5)FXJ}sL-bSQ=mtNqiTd>1*_ze@Ua*9n(e{0$)D=n>ZUI3@ z&_!>{%C3E~&+aVNYcx_#JX-nk!e0^G+g?PezHk1w%Yq?Hx*8XWbWf#oD^d z0O%u$p!^DvE};Lohrlc~n#G1AvzvjpnaHV5Kg20ai+A^X97l zG>qxYI<<%Qps}$AAS9~7H$MF;d2C6w<%dRBE4=RQ`}e924(T zwD~nCwR^h4y2$Z6ySi3?VrR!N?08*aN!RZEG*f|MRJR>He*8MLlj!DZ+@71!(Sg{Y z3BU>T3wa}_hdXK1D=eucU}S00IJBr&h=@xnIXE~%!^3?aJ`{eQmlxF+5f45T;1e}a zmy^WB#Fom(F2ny2X>Wddkd)L0IpYodifRRR?Cc z)~;JugWG`|USnHpJK`4uh2&1U>A|r5=X8JkrX|jQAZ)edFWbsyC*+uJrwTLec7%3G z(sQ56*j)#EULq?d868wk8_A-SLuotGbU^*0tt5Tr_|(+id%A6q-RKTvw9i8Wk>@X8 zZrr+6OTWmqN1HISZh*ApR^6IPuN;b;oB<*B3g8^L>oxAplHSxcvNJO?dH@li)FWeL zZ%d6@QR01zcqwr+e@Zzt`Y_nzF;J9h~B8=shn`fG-4@amp> z%3x`d`jL2ncD}BeOgH-_vVA)T3?s_O>7jC*bWtiEcz_&=&eIRHuXtz2}BOEY`(c=_7^cNr>D1|{LczoQgsq;9%{2=@-Z8)fS-~m=?!7jRlTqY5h z8!6|Ukn|g7lHjByK}28XkdTl~)mJ`N1N~HdX6r%;yVS3>)QwZkRj7}#x_l4W1rpEK zh!OrsNv!c{#<@=GP>CZr@tEA;H*XeTTf7G6STc+_oB+$4u&}VQhlc`A(xqR1{=EQ! zc_B#sahMH)<9z1E&V%b+zPU2@uEt91W7%?nVx;w@JVLnq zSGYJrZZ%#1)P?*eb){^E*~@FdBMbQW_z2~Mxks0Y5Ci2pQt_e;N?n#GdKw6GB1=_1 zckY~TU?%oOl$^}%3u`imaGTLWU>Ri0t5`fn6BltcPjGd{#>VlG5szh8eE3j?WkQAR zzABd1r)UfL2<5N=kS>pc?@bBCig|#iVs5s5{@#Du?j#u-l8F=2MiHM5IBRr<+C0^@Pz^1K1qA1jw zdw#%NUnIP8${|V@GM()dp>i3Ot0E-0tM(tL=l~q1ejxkHYTYdQLIyv} z+#h-282azv0~RT4+_IYJE*%wQ# z`$|iso^Jy}b!8^~aW+=9iJTCm?4m7`@($q2)l!hn>) zxuzQRS|sPuxTmWN>}t~!XofP%Ms>?f=?%wL2TEb@3Q^CX)nvYF3==g}Eb+m+thS8 zXnTZ&h0ICK+c4ME`7kUH2B3hdqmIb-%tQ=6jCXd?&z?!IkvvNM9cYb{Ki|J!J~o8< zh%{5ME)4AR3hK@_+r_xiIxhXl1BG@OYT4V1;PDb9LXsUnKAr5^1XZ?*7yA2d3>3if zxi|COr!Y~#b0I3rwU&duw7i^yS_6r~cvEx3=+8R!oC64DtkjNE-;Riaaanoed`*IC zN48cI&MMC!@yI)L`X1^b;#1b&pr}BQN{Y<0(<8W-ApWu(W{?uF-V{%;?7RE})}e(y zM1xG6;^gX+bLsRm-n7sz&xZ)u{tT&o35La1!N{ZWT|HqbAf3ftxO4?Pzj@u)cM%FE zG>2mWL`hg3xiao88cqok|4Fs3rdnN)K>f?!*1g^^4*$B|1LkY{v z-&w||pn-<~Z`_8La^2V-1e`%6T3UHP=$)9TD2tB``}#{yB<-8>Sbf=A0MTjUYO~n4 z0X^}30%FCgsV3;C_Xlp_y3#=IpI8*nY${edpZ!BBGl0cr`4oEATF;pxL*`ykb-{#n z1P9?I>Sj?+HV~omZE4W!8;a5XOV2@h=wHUvLh}H{zh_*p%fdE)C&hLTCQ-R$^ zQvTEl0|QQm@@V(Z0JP>Abs=|b;ZS+btILq8b=rlmF?7Gk~M=> zRyrX1@@UQzxhPy}vOW#o1_of~V5e{SG=N~P@5DG@JGDXdcf` zE$PQ;Rl3cUqq|sM$M)777BgM2uxjP!7~v_5XAbMz+jR5h3e=3dxqUsZUQ;2cfiXXGmi9M#mA4x3%ku26Nn}P2ISD)L(#p6q#R?J<X-3k=y|< z+yKy1&9<*!S-scJ?qQ?acnd@PlP8iDURMLr2%_O`7F$PCzbIDQF>iHQ-E z?Zr3r`rB)mm6^GL`UIg_Q%frmDXvMug&oLK1B&7xHTpYV9a-vgZTWifrW=FG1pp{m z{9OG`b@ziE6p>H_T7R$e5B~Y{XHwly!LO0glH52I^)xjD#?JvzwgiQ^aRJ!+0U_=K za@ha@dw6v8KF$nYhmC@z3_lYZ03l6C3S04p&!{ZwV* zmUiT?zZaYb8-Zap0q>I0-cVChQvd|z(AYshhs*;_a?PMID&7#Xnwb3U zlhKmL7SAshk*rn;!6biTf zzBa!&tCbap^%I>hw{^>wd&txR{cCZ5Dr7*lb>e_VjG_k?Z&nmy)B9Pf0h@?Wk8)y- zAI*Yd@?5Lq`Q7892h2Xn|rWIZet^wh(nJYdu| zHR;&fr;hKEKJy~9rb+OvL#Kpfef<4?U>V|wPw}3crN1CIh`k0v{@aEI zqv0v%vuBf0Uf@T)VJ@=$J_zz!Fwss{PYp-DR5c~_?v!j>`78$$RBvzc7i{j6ygyN1N{X0-A4n>%1`}y;P?l)-G z3l}Z2#J&eqy#m?oGB6g_n6nC}g@uLl#d`(W**hTFi8tvW17)DFVCjmZ;6X@wM%_f7 zx7FxzJgT~xzQ!oHuLQxUjMuTRTxq2n5f#N6e@5>H(iW6Qozog`T{Ki2L^M4|=Yd8xaxly0;C!1vykFxaEf{E(fW=HFq9*Cl++Pe#(*@ z_Ct?apB`V11I$i0A|_@DMPwuxCnQg|DWkk@h4N2#SfJDpJ;5&9jJm@GFGXxb_>eY? zsD;+8g5wZ+`Nqe(5@V*q7xQfoo0`6Xpn|Vg3~?3mu&r)fbTk|FBsVwWnQc(PbDUx! zm%JYYbQ}X21GccOt&QU4<)w%SgpVf4G2{nnyN}AX+Wj7|0gh2VU^TRIax*hk?CgZ0 zEbJott1X7Dvg|!Fy|<(1A*LWPHilezDBajnck>Y?=rWLYtk7``KAlIyLzFTAt}_gi z=_c8WA*=gpChviphJE#Fe-IN4505l+R^qOl9_u$l!pi%}wb`bxv@~<*XGs~p1~Foh zew@M*;^Iuyks>cMh4V1_UXG47^=)lJIK%843LIr|(d=J|r;{~cy5Su`mcy}}PXT@b zhQaY>!@qZ3z=L;W#wdC7fdlU-PQQ#Dzc-^{qmYnlR($%C_)(x1qW5t@bdQuivC^mZ zu0RW*I8A?vY2!Hdz%J%WM zcm0mXWFl=WW=yw0#SjDzyN`?RgS%qWg-W~Dv^)+~8vIDVM%Ghm_ zqGBun`?{{~WvkTG-v%E8%)Aei6WF$fjJR>>f@$GYyi1hlvP!->JeLC@oXG9u8EW(9 z_8-OrV2B4B#v4ZFxY5}gY%!BElA5UL2b(AmuBi%x+NVrwb;c#!7VIoK)9)w36z?Nv zjio+fv)N1?0$st#?=JK)3@iwvGO|e-VQ+vvRtkGTtM-0U(p4ZihT+5(_hqhSv$D=h zq}MIzQh3DPjqlJQgEWLb3F^MGKNpBoO+87`ZO}(lOst}|wzjR&!^4ARTjH}t#imwp z(1(4R0U5E-$HUXJl>L+~cQF^sDT}C)ExRHd4MQFXfbrjutban%(bU;FQ*NcMQ%Dn5 zWu~{wKOb#=WG!!;wuAIozG3$_$W>Ykd9=4$Eo&dpJ9kH?-Mp^Rz_i$}4;2EmUVL_m zd(}n(0Tv1zmqu0#bk=vfnQa^9+_1J;Z+btC_WJ=N>#gZ-k!^M#`5BX3Ke3J7DZ3vw zG(0Sz?DcF4--xAc=es70a#q)}yBxTO;M{Gv2PK(qdUu8R*`d;6p%3O3i={;GIxo%; zkMuIKAcKI%g>S1je*uY@NYl4m_7W|E59n`hUQ&)!pyqptlkKFfz25Q zGf;cZv&(Dx`QfSBUw=Pk*=pfZDhaaAf9o;Wc1gvIMq{ndumCw5uxcxujjpalVqO>~ zzU>Z(IV>lFsfuPN(+#U(XxLZ*p8;a9^^F~)CKf#dEd{5Gw7An6Bnt$6GHF&{lNCHQ24QqKKeNcGbUxk z0_H_i6F&{%zhS*ucurg}xhms;z>01ui#(TPl?hx-#kX(fF}c%lG;f4bla-x~`fKqh zg`BLQ0~Px9-=%C};8^%p=PMTB6|Qf2&=aq+5xZtt3r>939=gf*KomhEQBuxu7 zdY;X~Buo-ooMWZ#KPxxicyB2}u&GKF>}>j8UWLZHkAGO3$ZY@^MxXcq4W?ADgLtc2 zB2F`Ghdl^O$0|>9vKde-e)+zAz^4&M7A|(Z1D7Z*N4CtMcpCAc=iq?`W5L3~R;eSnbX3ECokjGcqcJC}v{40&Z&Yk|kk+zkMfMz(Ouvx%_^lCSkt) zG`Y*S^3M5duIB=$AB&(uMw>wrcw+-i?TTJExVe?B)OMNcX_Av!(+Q5atOey|MVn<- zMrlX3zBr4#$3$?d%?dcm03=sa$sf4He&RXPWdPd~(Hv#9=ZZmC%-MD2b=;d&w?>GW zsww}pis<4f#Tg((eJZ$kKC=&A2O{9$X?5&rStS)Nx%ooAq(jqn@#X7xbE-e9D#(ka z(*@l3wqe0qUG{FuvZc>M$iBY)n#r|CIo(xMEuDt+5O>?Mm2 z+KPrYIqo-G!LC@i+Ayu2lZ#8Oy}ca?jjGR?OXXRuhr!!IDBy)8t$rl_O+^K5)QZNx z&=k=MLqiD|bT&9Up-O|n zZm;sslh?Z9QrWmQ%R0GAb1FF5^9CX4Y82uTM41h&Y? z+$PisWNU~hHQ`PEelQaf1x8d-I%Bmhn4ipQcas+99wbUwXA)K-KE zmgrbyVX8#XowBvI-gq17R3-lw6$AOKJcoik_f~xO8EkrSHm%d`YOZhkqen#gM-aWb&?47Ygoz^{0Wt4ZpMI8ff;{pxJMQ1iS1Pdgm*D^5jYT z)0P<8a>#< z3Q%OKW^D~o6QRlF?AdV9ERmq@fln#7`ew}CSK(8xZ!Lav6Fi#V^7BArTc@~&Ky1DJ z{P{NM6Y`J%qDxyZr6i4l6sagKzKq~YmIs6%)&Bneg<;x;g0g4V)_lKS5$6uU9f3eTlN<{XWxaAIUo2ETWi;-I)wI z2SVTmSy|aNuw2$ecLHJP?%%(p{;^p!0U#ef%u{u6{iO`<`3|n+4|sfCte&^{Qb&#) z5#F@v8fcFtIaP1p@-s6t!=I5=R|>b|t=wE$hIo*7i>vplvbqW+fNjx-v4H4@NHQiS zCu`GW6jYg9O|Xe3_=`PKdI8 z8wIOw7)Kn`BUn#WGgH7TFZQ^p~OcLA$v~MzRtExCSK4Vqh z^3Z|EIDYy?UvycS0eR-@1Z9vQ0sm|S+X!o@G|FQVm!E=bTp13P&hG9_-_~Ehem#0e z1MCpnRKdT4S~_fOtcyKJXmK^>&#Sn&=-Lmy@5pTAxW_NRXx*15dF&leNqIRj1loPe zpLp88%-TB|EV~tqn2m*=yAVC@o;?q*5ml~S_D&Ymzo>aW&z3D)2LJx`{M=$0q$sqO zclGvG$lmPS+}F`=ZZQm?-uTJf>G^Hp0><!EbF`43PfAF!PL9(K*!>S}e6$I@%wKkZ*Rb^YM(U&|!R z_*|WxU*q~iBP0F7!orTXtSg)4Li6UOps+W0?D8N5QpGNIGq)v$gWTfALGAy5EtC|UpNf^^DABfBLCZO{GZws|G)Ys c)b#rA64~2ZQvOPz=?6tq)!z4b&(X{O7ol)_ivR!s literal 0 HcmV?d00001

c23l3G-k{6i1wt4CJbVzQAZu)f5pmyckI|Se8E7JZFC2|xlS)QxeU$8|hXvv@?Xu~T0nNrNd` zH&xgmaZX;DCLaKH2p$8IeSue21apT}baiz%s0U@&fvo$?H_WR-H2x>6D;Id~`3h!p z%L8B?6NkNXfoY4@7&scRPeH5a-8Wlx!$O31knto?JAX`XT7!8}aq?J12|e5uZt(<$ zBY|soUqV^p5Q(m1a0Ee6PZQY+2~MqrD*$Nw8Ux=lL>^O6o7BvXA8&)vX5>Y*?t1uD zqsu|xq0X4ehTZ2*+5~VUV8f{+6v#MyI_z^Ns^STu@U|trM1Xy@E&&0GDBexbsWsl% zspt!~EffzR1B{;L(W6K8d@0;;>6gd*NL|qdP7u(XFBIZRL>AHpqw9rrhkGs*WjZVJ zpccSn`<=M#myofQ=dn1X5dgC9dR?aD&eU)`D>hUN2fA4~TK#R%$khRLRSC9nAfCPi zI(2Q-WV=EdojL?U|Ji;8sg zGp=VEf)|HEMU0*}pr*XKbUdN;Oaj*3_B<^ZD@ZCkUC-&~_{yp8Tq`Yjc2``iky757 ziVdTnW#>aRCnaR^7UH$1K&N&3>f7s`hi%**?#&q97F-yd%bAWb%1Hcr_N+Ztlf=SZ zeC#RU`tgZO+s)mQ&|J`C@V9)6f+;N=of6)9cJgq{W3owif;RSs-t%;qF z{mF(7#z|fAE^3RF8C=}T!$6e|pf|hWnr_oznPv_coSbI1Vt{PBW1CbR?k0 zCczU3w+&1#YECHs@*8Q9Zcr&S*v(*Mx$BiaIIM6Pawv)Vav0q=6CtyJL8cwvvwS3& z1Cvb`i882BmmzAMXnig}5NjRU{-?0}yRAVh6*48|Q!ss7<{eul(WW_jYS^Wv7~pr} z`^a#~p4a;-CL$h+cD~GoUbfykPvGo8q5tZBOs7#{PET~DUtCsyO#L7dv+LxrnL!`Df;W;$k*Lsc~twQzwZtepO7cHlczbYH_FcW z8U-Ph@Ba-OE<$mxR>Fda>-;|&_h);B@4Hs?R5q)>t)i35<)_Zh3|+Hky|bhcvP@+?6zj&dt3zjC1erFXPRV-J4;Klgr*uS>}!w947GV_4m|Pkj#V1X z$lmIDerBYF6SyyTDAe`Sr-F)+h=&8!!JL2siS#wAq%B@N;(QX48LK@R-6MJ2S4={p z3=Dr_+>^}Pw~x*@Hd$7*=_G~9j$KJlB6B{TX&jzv5$<+Uw;rb zw!N9fiE--;l(%iN*WKyn%s$nzMA~8p0OM<;4HCh?h6P?l>r~@x4dZZo}o|G(-Xv_6KkfS~l3xp08&eJN4DX22ZzkF&Wd!h@y{`8^W9IG^^u}+|mVa zNnuTqbH(UX5mW@OC4R0QPZ!yiSLx{V@4;y&*ZbcQi46(A*`y(jc8klA8dgl6u#845 zv%an<%S=4coE>ABz4IFEt_VozBNUw&lO=vTitl#@Xv*{p!S*}%*Lg|q?M2{Z_BU|m z@T8pZlxA_B3C-MujG;ZbLaiaDzb0?Sp7ifPZ`v@A>1CPwNRsQIf6D;~nFgbcrpdpu zF7m2O3m8cHr@NQD^NN*2rYx+zUt}4 z!wye&e!L+5Scy4lm=lFFdUW>@p^>*U&nNqQSu;u;;bj+p-Vp3)*gW!ma+O9d!9k3{qO5abpzTEQM(%+-YEUn-#Js$uN&=kMkd2q_1)RRtg&YYvZe-ZgUS8SGIcsP z8Pfow>B?(9jbpxlqV>$_!Z0s5p+PBU9S7 zgaF`enIQ~HRD$w8Jr9Wx0nBq#0`gkk)z;?GErOh!{`NcluEO9hx4i|#+1>B%doDbH zow#1b=2hO53Xz_MN<2I%r#)GVNJuoGt~d6q2RxxSG&dWWHg(3nhr3p7Ikt<^+D-|* zvE=T!v*;uUfmn{7#|W$vNIcVp+?ott1JKVsj+Ov@<0<5|+hA65701t_<(u}S8ns1_ zjtPwFk++^4z&=XRj+@8sYq@lCUx0IkDsjBk;zmKA!h`&DTMI}oVfkzhPj3}cHCr>3B#85YwH z0T~gX=sqQ7oj0AvnTRhc;7sN8hq_mUnYvUS>>7BI`WE+Ru)d#_SC_p5V}*0KmG0kj z|0V_&-i|l$pHx$F^H1dB|O3A7a!sfTy+JdR6#HYW<8f#o^5E2`F{-Cm1=q$ws3D&ujqd^~y>-gF!?rVR9# zXo@Tao8#C(&ws^8JidpWdxor=|}d6pbI$0B#QE z7YM26f#K^@+qo98qN9<;`p|?Tz_PzPjT)IIO8gGHb2573)pIT_aU7F{KIs|n0xo%s zlgFw$*VH}|_hjT`r?+cD04Ht;6#-D|{4Pj->1-5rL%|uX?`BpJUg*_^DG91+;Re8XB7udYu-{p5= zxp~2ca_kA;q4Ik_EuRA2Yt-ITy`Cv3AhoGx>9Tge2M=CCM_hC=S$67UQGq5(&Fqmk z7o0};^IpW9YZYy15OlM{%{MmAuzvG+s`ai(`2!W-#eS0OW3f?*@m1yBxn>dI69jOz zw0Hd#6IYs?J9mx>W-3~+k?w9cJ%r*lubntn5*w9a?DEw}pz%6IJTHA~--44h3wfOq;zP&pX&+!906j_sgt~6lF z+SNpSDs=8kd;9mZqfPy+#HkCGJ4>BD{#4vO7wYSzQrYN%;U}pg8#fkW)~;CE;01-M zwe!Aiqx41H$X0c^!mw76&t}zglv9MNWb&?HpWJ)>z-!OuBc}g}yby@SP$HSm`<`HX z45=?OE*D~*V~ebmI_~sh$;;f+iAP)FTJz-+rMAdOij@AQ?G3dE=>7FhtoC|et1NM* zWZR>H^84z+jQ)6cxvoar77kCrKJC1wQ+beWcP&^XKp8c3cX{o@NR^i~Bo{(hQ;}$1 z6M)>g^8?mXdET^mtGb-H5k_5BTG!k0D`GZZKjk8nZP1>*3qh@StQfnk0cs~Yf=W83 zJ~-wT!whO*d?l}&1t3o@=7~A9YY5vQM1|UF{;&4FJSxgN+ZNlWc4DL5Q8c1R)N)|7 z1wjEB5`$VQf=tRJMrDv@I9z86>_d0&i(7??Zxs-@q9T2Q=Z%-{ppp46($=T4p{n$o4Z|Lx@Y5y zGM`yDX4=u>%Yj{865lMYo=6&`NdbOxq-OzH7+7usKO3ho>jt3<%YGZX`1)GI@#OP=vq8}YoAo)!+g28PBN#MLKukK#> ze8b8hhi5@itWgvOVZfM~^!eGhuG?UXJNWYg?bgKfic+jKZu65+I*)&oc|VceCE@^kmH zEhp369Kh~s#!y=pyAr&uAg4-CZ|GZ$#>WSbdfc5cI*yzu?fLU7GG;UiCm*H9atN5_ z0-BXRIoh^S?WvDV{-IhD{@q#Tcq}-uAmq6(6ozQK^q=W)1e`4)(Qm3N-iSdUiU7C{6`Q&k9SGUHq3nQ{5+_iaEh+i?m*3I$vx8(3BhX(xv;SaSUFuOC@nI;$3t5;k{X*C~8WYT>GkOiJY9~>)WI3(B(Cu zmY=*R^Z9qdI>5i{oXpGmZ^`JI8e2`8B>rvb`DSlf6IA26m8%tgRyE198O!H?VE(OZ zzE*;~Y7||SYoxJ5CQwE^on((TsZpq6OwEQ2LQux3ELF65h{aLL8DDFA>W?9d7c zWI=(um3su^(ooGt55r8Y`s-{wRpxb9HsvRA`4)ID(@{knZV_=~K|2Vr_vx52ha`5i zb6yJ!?Ou}b!Wcf@kTpdSL}t00*w{S`_7SQqT3lKUrt@TSHe zs0V_ox@^qtu8kr(4n>B{0(G>@?E}9|52!&wB=m;5NJ0U&r=?KSp z)R8~dn^0lwS_;|8<<<$Ax8P@(8@zPK3FZhD^J+H0R{c6P$JYC-68aS|{`Y0JATYQ+ zJw4|!dNQRinAR)ZNcV8Dc{Gty3Z(Md=Zjnxn|mBbeK&wCu2=uKAP$FCc6VACH`ly{ z4l)As0p!1gxtd|}fv{wJ@=FM++d`q=Q)jwFmEmF9-fy3fW(p`-r3Ly01oYaYApw!b zE7^ZcKR=XCG7ZmOSHcENDl2D<#=EfCfQR`v&n5ru<~F}l2&8^k>X5qucGkP8JBAg} zPDu!icIkc?E-{C{CNMe+G&vaJ8})e5fI^>ckJp=@#0f%8U_kkU+P`*~MSjy^7IKdb zGgd<@;+F+uuEpyAD*{kBp@W%j9ynJZtuV?CBV}cGBD%uiYlj{{G%R4c!Xphxw4PUf zrBC1H?!3Ap*K9(-v%)Csc};M{znvqLX?<>{J_eO^wnBnM+oanCIU=@0N8)*U2n~PT7^Pyu7 zvd5`QsmNY|My-E1&Vxx6J`Zkx^$sC|g^n_hGr~pAY*~rwI*! z(b9eST=26|1z#+|{|^GC&wD3&h5jtc9VpX78Av->37Wb=I=k*)j(+$JB&>5{$s!>G!?02JOe$6`QS&%S z=RN5GRZbbdzd#?GbR-8bFJ&N|UN_Prqm@u_Gv>pcj8#-w7Ad*#Q`AOHy>5W)sV(>D z$*;F71kr(}Zew9Kx>Q0=*YOxy9x+v!_aCCcuS32@Jy=plLeSQ*h0Gu5WP)TV=l7rW zwjE#&%mHu!m=jo*sXU2jp+|?<+@V{7GbV4KuW4bGp+=M^rL)Iu5&n z#RhqRaGExN0V1FSy?ePxsqp_KSOY-tKB-j_lJk}6Vwqmv{dDzBv3e^9c8ap+%E41llA}1cra0>DR|tYcE(H`Qbxr_1^Of z@FvrZ&gsk1u>$Mu-Ie`|9~i#8xQCXKt93I}#4fL|y#Ey;fz5O@D4G|M5gTiLuH(aJ z>IY$=(BvS25u}vUue-8YSbsRS4Xwo9|D8~d>GAi1Z2X;$3HFdP2o40O#8rG)xVlJ#PBggnGf)>=sgD`SqO3N)^BFq24Y<()(l91Y>-`R1O2s7;DM1497adz7 zw?H+7K@!GoV^q|pmC5b`GBp5}%`E_pY%Z#cKBZ^hEg0x=@K%Wlb1}vkjoh*%+@F)? zh+?#4kkgGu2#mmD!0YbvXB38E!5R$Bz1ParWL|zO;S!m zgW5`1d=jiC{({*W?^Edaw42wd#f=LSjWLo!QE&{RV094$Nc?!EkuYL~Lz z2GvU23wnyx>K<=iLH{bNy`}%_fPNq?qqT>VK2UMupWz^_KQtB{5C;i)o>uWUBL*Kc zaaI=Xk@}d4qxkYVwFNJ^2bvXnns}ljX{s|wc8Pz!b)FIna}65fC~UdxBQXwz`rVF0>4&Gx8IH$FskbMGJ6N86WBe}Ux ziLB#UPcn ze6fiyylivIno?vbnT#s==_;>RXT*=Ze0*z(f{p$j^@-QzZ!bX?EZK5tiToPS5Eq-B z*bCriZyoCCbM5XeV+Tsh$}l44s%SD0rCR4{vj_`Vdto;kWU6D_HOYCotx%c9+ZI45 zr-x}CRBdPjM6(z}w{;NAnaz~d;fJn!L1wvojyXI%c7QuY#SGON@N8*`R)W989xgOD z*6I0Lb_nx{`75#djbYo~VF6R6QRI$+1lht4qmmLOYQEDm*#-e6thV-D>_aGm9WA5h z1Vl>0<_ZM=WpKYDyNV=u5MWke_M+rJ;|xoh_MABADu*zdXqqjD$G8(q7e=A;Eiq4t z#NzWXx#k_;GhrD97Y#g!z+eDDkCGy>=$>F@JP6se9&2m|3_@J$_}C5#9FVf8tfPFJ z_><(~4%^>BezKD94z9sNCpuI64D{D z_i4IEOPF;OSv7qhNU*N&oN|SlhOB3l@DTi?~uPEAcsN}zGKQp=slnE_*ds}CCBi3~F=ySwK|M9St80Sy`KC^Xh|TdfeUsr4QX zn~62V1EdU%tjU0fv$?q4p4}TZ%}Rg)OtxT9{0|b-zSXHqkr)%3W#j&uFIKl0p)DoIvC?q;h)mA6W0`NOhE+}0EgVn8?X(HqcM<<3o_3VqMAR*y-PBzoW-S#q zbPM6-Nbm!)gZMzH>5M#nYnbV2M&I;O+dCMinI+C6s-CIY2A88j6ORRyW0UO=C9rGQ z#8sPmb7_h?d#=tH+o>VU`8VS#FMR4ZaaC|QW$d?ni@o1iE&PaY@mP7(YM#rC5!)fh zmNcmk#0vy5m+zIc{v?n`gBw;I78nIZWIxhslMf4uD_4Rg>=cn6CRbvRb~tPSX_>m((lz$%|bl}z$7#N&@J}icSGT9ty%I?vu3dEoo z78}aWf{yPN?72@1n=+x7joE{;ISA8{mY&1u5n5boYvRDAuof_h{k*nhDrt)V>}hHt z;>f5(4G{t|qQR0WnCe<0K)nQulYH{#CgF?VRlD5TW>&m7^tr@*#;W5Ros?vgslji8 zmj(WkHmygWd&@4+pHX(|y>P~c%GrbO>#e1cB_|HUhhng+8rD=x>KQ)2P z(%l2C@Ek0IK`#;H+K328P!M^ip!PF@1MeT=CTxDA5jU0EuILe+i%v!hG|&Lm+D=T@ z1vzob3nEWJ_36(-8C8zv?lqid`Y)%@Ob7vHphUGjS06$!jB!Q$a_GNG8g!hCl>HGe zKJG{2qnbhY`hOoKanBlJEB;q(GXqBA9{hmvX8bwYiWCz7c8!mBs8>?40B4eDa4yM3 zC>>*gSZo9iwc)EF-x|P!y)jz!kG?D&+*)B^LnoA4pKg@#-c%?87GA}2yBBj9nzgR( z^X#a0N{+??&3>GQYh@OGnln^+9_x#M49w@PZCAn^q6f6DQl{zck#|IDTxR5yzEA$h zyTJ+YX$&?ft#~@TDCFW8q7zzX9|<#EPfgCgy~mp zdet={0g9(O78^SWsBMG15ow493>DxaU`6i@ZaTbqD*(00#AM9Ox`oYDq@-yOD)9#YQEyM5`{klm& z8WdAOftI}!n>GQ|UW>Y3x&1|C85PijZg>TmNuma9#}At{-&wlg9R<9M%(=Az)Q6{C zk=5j4LiSQn6@5Y}BH9?&K%KY1z(2xk+uo2*3r!XW7XbrE!-(Y%ytxvakrKeA_P19M z#k-JPN~T{f+mO(%F2Ndjg3BOkz1w(yu^t>Cdz*4PIOb!# zB{ui)MWr>UpWHC-Atj;g185MxH*BsBKuPABHXQmH=+@?A2gwoVjKPOWoPYgx=5&oo zR}m}op7xI8@4rJpgU|SH7wP=ZXV9a>>vvD^)<-h4h9r1#AWmhL)xLoFM2*WQ-D}>f zUjUB(6_vnR$o<|)w{n5YC7$fd`}%WU#e&H9E4Fqkw&RgU_r4!3FZj#uqsNci1*LB{ zF*y*P-uu?hF6c*l#&3(cl7sX0j7_CA`2JhGZJ(RJe*>X^6GdYL26&&ldtg#zRG(=V zf-6V*Ji4G5vvGN3Qi(*(pX}y9TBNO*eO?l2pxRDih~sr|ttSkcN^k~cDfgPRyfq}^ zF4{92Fu9$vazm}!jxhQFx&`yW?{R%0?^SrUpIq89u!FQf20i&BN3D9nf8o>+G^{ z$BZJ!N1BT*;4na-2PfkIuG{JTc2xQ0ocj1T7!$&8Jo&;8Ibn10MJa@xKltg$)76Xb zdjG}e5cQr4H2TKiw0S?T1t7&_Vo0E23R+;~jnYKw5*i8J#_vC^;V3!>jNJpW38EO8 zh^YgKW7Qo(az%gy|~jk=|I?S;hbd}=a2~i{qH!;JSL`f zNiYwEK@4V(Ae-T}h@Y~FpHs^Y#dky-GR+E#fasa?E}H+RCX-&z`~P@e8!yPoLXYOR zzLNclsu4f~20e5SH)aWnRmV!u_?`z^Lzi*p4!2ZiCzqnPUxCBsT;^NkH(}Di=6kll zhbBxpSp~P`=>o>tD|*xN{2`u$1-$X7)k#Dt(otQ5KLT-uh^DMyz77aP8C8e-Opj}l zMg^*>#7y$7bKx?AJhh@#m$q;lK=>fG)gaB8J^Y*X1jjK_F0i@uKj2_W%?NmW`QvW) z-6$mu4_NLVtH7eEM3?7wxf3<`S$Ll1#TQAy!?krDietHbOf`3Kvg;0~3LfIBjV9@9 zZW;bud$bMQw2Eu~z+Z1=RGCCo{e^+9ywXrzJqS}(5uT?ieakSiUJ2>K5VR$kr}*9+ zl{kvDaLWU)~&sW%9`WPqVG-_I3R?}IAhmg8bl>v5!0JP zSe7{z*rg;1psX0*QM(PK$*zJmY}&=8nLr~|*uOf+oqPb4(F)rE=zgK%tXNdRovYMv zx~0rghyHvJ`v>}q@Es`H)m%)GUo6MGaaIX%8pSSgknZhoKRZi!)U>xW?WP(`$#SD^ zu*@)~3}>b}K*Y)B;-HR(YK9gpT?cXh4cZw&1DSVL9~2<)1TtEhP?8bqG4klkrm%LpA;^(j{7$Fi8^(E0|)91-faYW%082EYF-acEc!i{F+~XyAwf7z(x}VI$Fd!h$7;mkJ6j0CrCPe%$1sH+7X>fiV$Y z%A|>f!Z1_W>sOrMD)L@>rtgfZ-u|;#1pzgyR?W4y@kD3U(U1c_p>+NjEO36CuBGzt zfCaXnI3QBdMC#!W7MYg;IO}JQ{PJiO;c>K* z3Fm=Hdj51RtI)Ny<;^xSuL06_JfwqBWPaocRo4lf+Q4r~DfP9UWHVLKsAN=g&}`?< zk#G?ao2~HbR&wfy0tPN2!}IrUKe+hVZ%~+=7@ivD#>L>Mw{>pB*YvPPE$y<(d0_p* z|H?MpODEFspGa{5)ZPgvoxxFatRe8zvzp1Eoy&4<@pL6(8dVhRG~m24%bfu5l1C_N z$yBX(@czLXa><92?Fy2t;=FxAlM(uhmWr#mpmw0Lmc);w#;tBabExz%t@(3kNpv7w z)E;U9L?|_;_uhOL`j4J1~6)HT8BB7TfXh%^X_E<(?>#=nU(>jmNt~_($DNeqyRy2D_-78WTFmy~~etqYgUppR$=`;r;&Cz0LA*BGv|FsEb zm7yE+)^G@rm(nT!-C<{FbMoV}r?1p8cE*3P<^=Lr@e5cmvY4>|{_7eTWS`grG$PTw zKnTr2|W@P&y~F$nOQo${P8<3 z-wFu~={^yT1@ivC@-1z!z;2s9@^5opyE&uFze=`nhuh7s$MtJJ=mJOKy5#OUbd=mfA%|Q_V=e_i~DXTmlrk;YW z_1FUHyf^Ij@ujNg**#4qyPqp{O}it0u2|QSBI|0lsj9;ZV$I$6hhw#m75+- zXUE&`MHqdL$mwIZvWK}1DN7|}W!T5rD7I3!FVMiG< za5C{Me|AD-qnFM;^qsoD`R&zz`kTj%e>3{jW$5J8)&_Vs;Cl@Jop~_F_U>8QpT2PM z*oOS{(oKnFW;Mc#nQ5Iuk$ceFP>S1r=^cOU1$Y!k3f=ow-?J8SU)Gm9v-`U5`CWRZ zWX00m?>l!=3dtdG@!ogFCxap0ORheCXMB>GyTz($r~&gne8irb9j1RsD?KUW_I@9m zwQiF_!=}?UntL3+yH3-T3_)2BCc zmtc-jpFADr@fcr~PnyVD7!=4DNDMeH9SE80Z7Cv2ADefDx#xEeB;L+o6_QI$(Q+*z zLgo(kwI^HAyV2`+D8u_2@((|F2S2!<{q55^Daot`m$}R@+8SKf*>S0P?U83UzNcv= z*TEw~bM1%gh$=o)-tMl;Ee+h-=Z@{$J?l@UEMLDFBHNz}Rh-(m`OwC7H=@G+=9dc1 z0{g;0f4a4%{d0xXo5dj4{AgVzRP?p#mr5RE zH@$oPxBbATrV-gLy$<&5H8Q$(V_mSjD6biqLx?*rdG%K2JL1P$H`3DN%o{vMa;KWz z%T)smlh^r_{oQVs@qea@koR3o9iVc#JQ-A8jN0k8OZIZGNT$Mv(d4P+IRp%E0Fv}P-_W0 zZfLnXY5GxGvLOt6j7*Pg2i!0>r21($D&h6-Z{5Qk)b%1IUI%>nwa_1Bm?+4kOG)*& z3-hXg*g9EOhMRTWCU<5^8mWoiT`)BXmYJCx$gbU>*k9Xx{CTkVN_vR2`tjdIHt3?} zPxLrY`9QTkcqrjy$>W9;p9UJ_qU5D{BAjJ|=9uA?4SyBVQ_*ZCyxXDW=!C-a!kbOD zDJY21C{8vCM=G^7Msj{z-80FWX6mg zp+ck%RX{=(s=40LEbp-_YCVJgP$qg+6-)z7=Dz3Ol|vnJH@+U}$ut2I9^^>zVAC>P zAX9GJbv52@K4^}j!K)D#HE9XlUA!MUF`@ki$Zxa<-16aFy?t8j1eecrq#0Dj@Gla$ z&22@EW$d&7i; z>_&+QLk)JKP~iV(jRQb?u-v^n^=t&*Gvy9Rr_i2>=jYihxu`l^d-K$ii;4ZkYQ$|JuzR6lB~{lusr?t_7Q2j7w+Q;hu9VtnXoG#zv zELP4}dvlNmQW4RVkouuvoKLJ0$n5m`QnB>+a6P~oOK|1l+X%0AniC&yvAF_1_qHhg z1q~3PO)PV~zk=+GoIs%IAkFqr?c$(kCkIN_CKRwIK^-CT6$A9a)CH|$-pd#~&5bk5 zj$tX6F<=Rt-FNL>IEsF_@xXx#6pLu-PCd$eD}x5;U@jDq)>yDOY8 zT^wUJR(*W?_kr<_M=hfBUItD&FGhpBM}tIPKy=UM+M>zCIns>1sXi*KS^Yq68GDzx@8R^< zznCs52r^PYW7#5vm-*n%u8wG}>$!%7X{DvM`E}9u9p-bG`-q!3q{mzjuRW=uI&I-O z3+ShO-b%wc;GCGtepvNiBPJYaZ}OC4P(ak72p@d7b$%PhSCpeSXLfiT!Dbg1Sf;JB zHqwkYT<(762}m6n7!urO9z^%cqHZv~lN07GR%QD{7hkTaT6=ok1n1bBylsk+hR9k-x{`QaXiT$4$4AD9VHq}N z1|VBdFh#q`soKb@4Q^Ovz~g^Fp~bjK1Dw=d$ds3~f8t{B^v9=m4o}$|Q(%m}#N0a( zT7yFc`xJBqZNOJ}{j)mBD!1;Nb7GE!{Ge#0s7At3LOJU|kudBE#ffz%p!GAakMG+G zs&N-;rrwK~oEWQ79;W^SER=!(-dTa7g%1u~Sd zas^F%;H)2jU-+D*KO-$3Yw(v{QlC6Yye+HrazzsN+xUrN9v?&gJE*0NipBcNkFNCE zMje@Akd&{oC<6?8VbEZXu|f)pYw>n%mp^)IsyD1l$l;JhKo9*(%$j->S-ceFgRlk~ zp+`P#CU$(m&IgVH5zWil;*}L|%tN_RiAL=y!|S+}i`oPIU0&~0Jneg_r4rm3J;Qdv z_)3L_swz7@-7IIEXzX%DetTjArrJnB5VHS8hnZNgoJz(ojeqa(Z}fbtRqi;+-*Z{o zBu!dwqg&P=v~Re6$D_6mux?pMzmvqx)M znim9(8t5%7xpCuujaUEq_V7~U7iYB9hn7Um>&$!^+Yo)rvJ1)1#XEM!-&O|H;ybcmR4Z01zqOA-de7hhp&fU*7uIK@2URU#rkjA*UhIaRYvNgJ5N;~4(l!7 z;?LhY;&q_-Ew1UhWk~oPUc?7yR$Jyi0d(?ZgS8v8x&c9%lAa=duVSRNN>)rX1TnI5WytsbH@NufU%gWl^VX zl=24xDud*S&DgmtxxP6$Vl`mp$SWl{$uzd!$Da@7jz$_^#LPv^zrz$s<>G`)2TF$i z%BjhKcWXs5f z(O%xEo_CVHu}^`i$8jE8LK?qgsmS z=GPp!2CqQ^*%N^BU6(tbx_9W0YC#`kmmhKZ5c+%Rf57W{n56i zx6t0B@W>3jh5m{^Y=%@E&V$S#49aSWF;)?#Vg(^@B@xC^^31Muo0=F&_A&#ZJFIAn z*?tL`ILj_aunG4?xWe#dsJG^7Fpm_o%*48g+`r2n21b#}{`yOOHBVBtAXnD44;sfs$kGP#q7A<<` zpAU|dK$9tgKqy1k`#kpMMM;{?as!g+Lf{npI8)5;K|8boxXK~)x0pBU$!th^t2%&h zpXiK^ypu1e1i3qOZP5=cr1_aN+occ(-+o-x>z8`uvQidiwKZ8u$83BFtG5q*zt1RIsUv#Scm)Viv-n zDt5y#YB&){H};oU{e{nudTUxYQU#6IVm$srNa1brv}F(E-o7elv6FY043wVJU`JjL zF`DRB+`wWf#7(!k5Qb@P_;M%ZVv|t9PxZ&b{KeKO$i&$f>~M*!;z!kA5!){!8f&c(FPp25@tU|3 zeVEQ-JVuTX_zaLb%x;>skznf6^|^G8UXJ<&av@%CKQNR?Yanuu(9>0J(di`K>q53K zw(=o(Bq5GH*igI>s%L5RmexE-*~YI5&O;E!Cx8SI|I=18acRS7OK<4=McBR=td|y> zn3#C{0jTc22r7%&zBE0z!l?&hy+Yjn;8%5DH5uD@F4Fv|S?~Ofb(wXRAvZwMErgkI z6|18O=397cB8(j#0|^~VkcNO77lRyn>+RBzp`*Vj%&}JHP0)SuHMVIgopyFG3jnRq>ASvoa^dLc+_4E@pYPZeZfseD57p zgiE8Js!&8rfH`zfixO-$TeX?6QxM&(W!=>}?b$`>wj|!fE zwq>BH&ubIeD-gEU)|(@zFa~Hsft`ls2_AB=Ln<-0p*X@gMA+DIF8HAn%XWn2R#al5 z?dtF)iJ^h|5q|yT(uH+RQ8cs3rD2WR18f!8nVtAW=kQoiSAi^q35ibbhm)`$IlP+9 z*{5}@xt)eCKHfHLe%YnGmXv+>7s7AhTVa58MPvcH?OI}iYYt20ZL$3RjvJqVk zX}S$LA}GFDyevh|Sl3grXfekU+hGY>oTSnqWQrUuNKd^@wHbb*g9stfh;wzInb=%_ zLC|S7H)6bXtx<;c-l1V+5^c-`N%9qURvXGfwI{0_kp}5WRWCaaJcLV4s@LI z3Y{G!%#*Cdyw7WFE`*cSO5hhj6(M{o7m_$xw&TMu=LqWJP9dOlAmggQ0G2Z2FkVVD z|GVIci9YWLJcUg(I9Rd8V^E~R&BDe7Ku0pa=RDD+j9;MN9Y5LjGcge%Cu*l@gXE=2 zBW~$fA;pCSq;FE8@`)D$bt3!}oI9&nCD>u)@ELmIF`Lu{1LDASiSV9TZy3a?>6`eUuc(^(T@%Y_2c3LL$j+X zrz|r+sUB(1TDBUzoki5DNs>-@HTG0;@7EJIz?ftYr;N6S;gk?iQit73-)H$PMBuZ>_;-L7MeA7oZ0OnI&-U4MyizKDH zow%`d1}9~AVb9-DaJ5VNOZE)w3)Ls-P;drToHnzHm(6jqxIa(-k%neM6$Ao=b5qb6 z{Be%>5u_;8YoTGHR7I7k4kI%>ur=LOm6)(Z!Ibb_30DH`gqqdKx@d0EBzgk;6TFiW zRfBUZ*Mt800FErs71}?nzbDlNA@Tz#LufR;$3&NTB{|aAW!9}>e8u5@L#kr7O`={t zIC!1jEj9@}vLH9SM5;E7KBy!P4rUUl5iP6Q%(Y3N=)F_VsivbeHa-OPPdm+Z|`>McjCNinqdtaokoD`^aO`81LGO?>8!bnst_a29!EOX<#~bD z8kHy1L;*gziL7aOo}@9O?4uF{5`&v_#ez+o1l=TCBV_}n*<|xHTRcln!31S6L>g9b zvUEe5l!l-ajna^iQ5v*Ml?B@-$>4x_r~XN~e5~$k~`s)lyw+1^ES|7)A7E2Ju##drwe( zMjt~HX6fybXnPG!O1YQMFpQ`^<>$Y2okiN2ZjG`kd|cb?$m_;k&U;*_G)rY^ctO^G0(mCg1qy$wY=^77B-rONWIhaw5BaOVCxgX z^c(u)dywiSIFa>L2^wZsN$-WCW60&H8Ws$xNf3v_A#A%@R15v@4=fUqphv)rA$H~8 z>#R0a7$c1;xS(#Ro#f${D>_C5TsD^?03({ml4-*=>_{6>fGJ=B@xVz|PxM!^sirCh zQ$tMhng@ZfB#fA&2k-=$)dR5BXxRLqDK*E)D;dvdTi!ez)jg{G_};*M@@!E9dE9YR zI+@!j^pcwqi>y3g7pYDU2ZeWn^8-NQgnzUm9WICa1>tA``KXYzIXS41zEHuk)W@3| z*b9j!@Xe4A{UCN^j&XeTaJZxj^(1+Qkna&64Qx&pn`E!E#jqGhED(kKlxu)f-!5I; zOdtWJtng~k?KmkIrkbIexF_V)#iS=tyi(jk6?>}EV4)`LtO}}BCNWiD_2{_UkHL={ z6~+iO$eF@v6$mSSUeTPJ261($SS&E2p6*u;Dyo9eo@e_jY#M;u`bhx8G~RaZwNA=#AV*C#yZFyokFLO=mk)C^Lc!>@DWRnmE;19xj=uFBKWELzjAxMVSTYC~LxU z?mV=TBFI5qhKBL&3wG$f?y7O{d(eY{1k&7vL8gxEys<4ffHb)`pyw;Z{kOT`iAb+G zHr#U_jjqLz?8j}$gj(?sZYhdF=z3+KxX*n&*o_+Wa@1AWCG1q6y-|NxQXMs8QG1e& zj*TDBw!FFndGC7gCyifYh3rS&@0S`e(QSnU9!FG?b0esGzEttCeQ`)CKCk<7!r0MM zu~pR>BVROgTzY)LZ(WF^78~s#@2I@;Nv{2QMLr7 z6JG_VP9gO6nfIKe`%rd-ReRBn)s)G$Pq;tt%8fu%6nPI98jsPQUC3{}!&|9>bfZEFf7c$g*HuF|3kO}z9yIT%-*Nrxha`lC#cJd-Ige>4az3?sXXJc@D$a0L{uhSjV*R0rN(k z>PC)VNep^yB+-^r9+=RBcsx>7yJ>WM=@sk@SE6*2&=!GDa5%&^jXji=N1kruNaK>1 z!=>>6{XZ6oRGuV-EzS9AS9LHgK@WSo(EbRawhm@V)7G*Tj`)R1%d$0XrgKIh7FvV8D5SvugtESVW^%x zV}C=*JRQ*^bM{XhnPX@y-!fyasI+tA+hjG)^J;H{tY}_eOuiFbeA72$z`}VXC@FA0 z*t!9Js?U*IOHu{SX^A%(a2u_29?GJWPjI)+1lbTnaz#NZnt_IHjRN1C+|^k<{Gm_Iw)939F|>x%;w zrx4nL{V)ZMq{~1KcwJBdB-9qbbA=GVDh4LKDULd6eTfkU17Y#C?F0`$C!t zbgE+$SW(Zf=t&v81%4!YzI9ak~iFJB$V32oGnIC|9CWHD;us>e2Z z_Cmq+^^TM)u9o51cQISp7+X0pG3~}WUy&@M4?W66PtcGUS{(z62A_XN;lcllJaRPh z10!{3;QyN+_!y#4PAt+6QM7xEoR$(bI+OJl7TTyzf#Dv8vk0aVQG1JpF&hkcj2o)) zePy|ZF$gOiINA3=TtXA3ULy?9NlkJo%3>&Jr(#`Aptn%HJDrD#jZa81Ln@`XRMNBZ zCi{-!L6v!4Krv?6M=UDA=Sv}6Aj|-Lsf|NbGlceklJ-;ZT9tACZa~b+_k}s(jjyy3 zIUid`{>l1#;vq0VsyeB}q0enO`&qF$v!Xt`SKkG*c{G)m`zW6UcRK_va-m18u(ovk&jD1cy8Af3J zfCc6$SSu`a*Jg#vM%B7!5oqK2@Xs_peBFB18!1NM&Y2>2jSrzI$tKHX0|y8RcVSJ@ z=s7xv>rnL&)Q)O;UmrCnNbf^VDo8;oD<^F`s_t&b$`NEjtEe7OY(B}j3{o0QIz9_z zrhdUAfJ(Up?_sywJqG)mnluc<1ZO~gb-uajd{4M?P|=wVf(Y?XsJTeKO`V=7yPCUE zCzI5f)-L%%5sB?ghXSZFthkYe1w(Nh)q!}sva!)LvX_Rl=pnigJZ7si!ix9FniN3d zt-XU(X_3_q;NvYws}JXmK?&0Vz8Vw#$ttXK-#1V~<KV-m2 zIfA+#Dra+;CX7~R+aV8Ob8(+mA^HoeCCSok^&!y#^YJChC#<d4KiryLvL=KXZ~;d@F6F7hY9qvrA-+7iWm1h8FUi78WP5$CHvhdpJm75kBIZGhuw)IcSbq=$eSmL$y#@?X^uSS)++e{yYeL z*%gC*bmHMPR0~yELIejY+Vhl}p6bR*lY!*lj3v>GD!(o7X8~cfgdL;n0ytwmc~bI_ zTd4Tb@+{&ESx`MV*!N2r9qup@krh-3GLz(fWN^Asu)l76I4ZQuYUvG)3MRH6jB1#kzb(IjVNp1~ffna74{E_!nWx89HrDW0N*0P^JLftHkPjDyxS+XR=q;D7k+EZm$u=i2E0u?h#G6 z4&R?KXPFLXjC`S4#FaL@Ejvn@V-~qSg}ne~wvS8WIN}tlfezcI7A{iAgyGs6EWcgc zWw`(Ri*;Bs{;WQbqa8bjG)aC%;1!tz1EC-r=wRe79_TKhdE>>UBpQro`&TavM%6K- z{Rj?A>Kb#t=Nq`0b?Pbb^v1J*bJ$`lY?lj(P=TbJn>?LQta59LFgT0`y!Ffire+CI^QkhqpJJg%jccxAM%F zX7QOczJ^~E|J6@HSjndt$}jkwSSHb!Z8)TMj-zoJu$WI<%OJk04kOi{+QHnJkn+!XdF;#?vQv(@>Qq^yB;!c@$ zx?eQt#DD1y`2|2ptY|$&SBi*>L!=`dW<}~aX0~u?dR5s(&iD=7oK0*w6co%l=%j#0 zjeaZvY`B@=p8FWS%xtjdyvj!_%QVi(qks5L!d5|+<~-$JBB^IR)H_mWGHf{doafq= zcB9`Q^~c#uTM2BUo-|E1_%VyG#l>MucD!sLDAbatDxzA=AbFR;K_1mg^^s0VxKRvK zCF94zEy$Aw^ZGp3Q*RT#Vf9ZR%?JyF?M|X{~mJvsad)8a( z1-#oTpeG$b$<|JCI%=YIcu_t{-4^Wn=pS3|?kCd%Kg$1=g2Z`7=``8eiBkY7^vqe6Ocs6ET)X+dg z;TI)T-%P6SJ5Mb$YIRo+Q$op3!zIQraI?4IR67m_>&BmJez>tNP(=-A21_cx$Bs)l zF^#aKo)lp4W7!<{O+#I2Xh5%JhBD3;XG{VBW|I*dnN)4_y!i{xXAB_#Gt4g-6b_l7 z?i3Fu+WIsyH&!Ua14NIVRX?oQng$rFow_k_2{e82(-;amIhd-_JchMlCv?M#;Z{64 ziUwT5)Hnhw<1oq%1^+~9%M_*NglSWrNSqSmuJ0CWUZyM_Oolh5Tk;Hsc@CaZB-J0f z(_{V`;kN@mQHTHlm8CoqsWV28Q67Q2vnEyzG#z@JSICzf33jqVJ_>P4n~A_9`g%+J zy3X~MuWQt`49D(?>G+2=A^T<+^(s$hJnCeu@Xac2;$d~m9V1{y>xq;|vTfy6& z0<9d%T9NPq1WuA*kQp*sgqw*Xhh3FJw$b9AulG{@O9O6TO;kzfPtwJpgW0qU!1w2Y za_xh2&-U`K`H@pYPfw!RX%Nf^MeRy67KZiY{l(&TL%TtU3ddP$t? zFKyJ}n$9^(D`o2oB1myi5{cFC3}x9M zfFE`ED?@le886VA3UUe{zi*sWrPMPcV2Gq#TpxBy`6^h(Qkv&3Y;h{NXV`Ci6Rio&}9*^afC6$cGTDGK8dWH%NhwFP>hSZ{rk5FN^pQF%Uz?7;Mupk-)g5vm`K zxm?{?sKUzj7BE0ufyN`a4xwrF6<|pZ{vg)5R%K&2a+eer?$XY+n>L1H*B`Yg^j%~+ zOURf{f^m58GDiQD>?ZVVq4b$7?0`|yIf1`H;?qeqHHybVoCHOxI1dih(_(~LmKFy} z)nOZ@V%Rt_dKX;If+Ii~0;pQ`5xKn7haaG#GehH(li;GIqYUi;g7^UYzNz&RS+f%5 z$_+nyg73A)+XH7_Im63je)ICj=!DU|5BBKmj6UXn-Fi2c8CUuL>kp>H`}#)$8WO#( R<06IFntQ%Z`s(1f{~JSDA$kA+ literal 0 HcmV?d00001 From e6b7114cbade54af3929d6052219bfefe479598e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:42:12 +0000 Subject: [PATCH 40/60] Merge benchmarks and tests, remove dead code - Merge bench_all_gather_matmul_hbm_buffer.py into bench_all_gather_matmul.py with @bench.axis("algorithm", ["baseline", "hbm_buffer"]) - Merge test_all_gather_matmul_hbm_buffer.py into test_all_gather_matmul.py with shared _make_reference helper; remove variant parametrize - Remove dead "push" workspace allocation from all_gather_matmul_preamble - Remove all_gather_matmul_variant field from FusedConfig (only one variant has ever existed; no dispatch needed) Agent-Logs-Url: https://github.com/ROCm/iris/sessions/ca7c4ca1-4b6d-4426-bd76-dd24e89f436a Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- benchmark/ops/bench_all_gather_matmul.py | 26 +- .../ops/bench_all_gather_matmul_hbm_buffer.py | 46 ---- iris/ops/all_gather_matmul.py | 82 +++--- iris/ops/config.py | 11 - tests/ops/test_all_gather_matmul.py | 246 +++++++++++++----- .../ops/test_all_gather_matmul_hbm_buffer.py | 202 -------------- 6 files changed, 234 insertions(+), 379 deletions(-) delete mode 100644 benchmark/ops/bench_all_gather_matmul_hbm_buffer.py delete mode 100644 tests/ops/test_all_gather_matmul_hbm_buffer.py diff --git a/benchmark/ops/bench_all_gather_matmul.py b/benchmark/ops/bench_all_gather_matmul.py index 9a50d3180..d232d4ded 100644 --- a/benchmark/ops/bench_all_gather_matmul.py +++ b/benchmark/ops/bench_all_gather_matmul.py @@ -2,15 +2,20 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. -"""Benchmark for fused all-gather + GEMM (iris.ops).""" +"""Benchmark for fused all-gather + GEMM (iris.ops): baseline vs HBM-buffered.""" import torch import iris.bench as bench from iris.ops import FusedConfig, all_gather_matmul_preamble +from iris.ops.all_gather_matmul_hbm_buffer import ( + all_gather_matmul_hbm_buffer as _hbm_buffer, + all_gather_matmul_hbm_buffer_preamble, +) @bench.register @bench.axis("num_ranks", [2, 4, 8]) +@bench.axis("algorithm", ["baseline", "hbm_buffer"]) @bench.axis("M", [1024, 4096, 16384]) @bench.axis("N", [3584]) @bench.axis("K", [8192]) @@ -18,23 +23,32 @@ def all_gather_matmul(state, ctx): M, N, K = state["M"], state["N"], state["K"] dtype = state["dtype"] + algorithm = state["algorithm"] world_size = ctx.get_num_ranks() K_local = K // world_size A_sharded = ctx.zeros((M, K_local), dtype=dtype) A_sharded.fill_(1.0) B = torch.randn((K, N), device="cuda", dtype=dtype) - C = torch.zeros((M, N), device="cuda", dtype=dtype) config = FusedConfig() - workspace = all_gather_matmul_preamble(ctx, A_sharded, B, config) state.set_flops(2 * M * N * K) state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) - state.exec( - lambda: ctx.ops.all_gather_matmul(C, A_sharded, B, config=config, workspace=workspace), - ) + if algorithm == "baseline": + C = torch.zeros((M, N), device="cuda", dtype=dtype) + workspace = all_gather_matmul_preamble(ctx, A_sharded, B, config) + state.exec( + lambda: ctx.ops.all_gather_matmul(C, A_sharded, B, config=config, workspace=workspace), + ) + else: # hbm_buffer + C = ctx.zeros((M, N), dtype=dtype) + workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) + state.exec( + lambda: _hbm_buffer(ctx, C, A_sharded, B, config=config, workspace=workspace), + preamble_fn=lambda: C.zero_(), + ) if __name__ == "__main__": diff --git a/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py b/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py deleted file mode 100644 index f4c6a57ee..000000000 --- a/benchmark/ops/bench_all_gather_matmul_hbm_buffer.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved. - -"""Benchmark for HBM-buffered all-gather + GEMM (iris.ops).""" - -import torch -import iris.bench as bench -from iris.ops import FusedConfig -from iris.ops.all_gather_matmul_hbm_buffer import ( - all_gather_matmul_hbm_buffer as _all_gather_matmul_hbm_buffer, - all_gather_matmul_hbm_buffer_preamble, -) - - -@bench.register -@bench.axis("num_ranks", [2, 4, 8]) -@bench.axis("M", [1024, 4096, 16384]) -@bench.axis("N", [3584]) -@bench.axis("K", [8192]) -@bench.axis("dtype", [torch.float16]) -def bench_all_gather_matmul_hbm_buffer(state, ctx): - M, N, K = state["M"], state["N"], state["K"] - dtype = state["dtype"] - world_size = ctx.get_num_ranks() - K_local = K // world_size - - A_sharded = ctx.zeros((M, K_local), dtype=dtype) - A_sharded.fill_(1.0) - B = torch.randn((K, N), device="cuda", dtype=dtype) - C = ctx.zeros((M, N), dtype=dtype) - - config = FusedConfig() - workspace = all_gather_matmul_hbm_buffer_preamble(ctx, A_sharded, B, config) - - state.set_flops(2 * M * N * K) - state.set_bytes((world_size - 1) * M * K_local * A_sharded.element_size()) - - state.exec( - lambda: _all_gather_matmul_hbm_buffer(ctx, C, A_sharded, B, config=config, workspace=workspace), - preamble_fn=lambda: C.zero_(), - ) - - -if __name__ == "__main__": - bench.main() diff --git a/iris/ops/all_gather_matmul.py b/iris/ops/all_gather_matmul.py index e92dc255b..041ff6a0c 100644 --- a/iris/ops/all_gather_matmul.py +++ b/iris/ops/all_gather_matmul.py @@ -180,18 +180,9 @@ def all_gather_matmul_preamble( shape=(M, N, K), dtype=A_sharded.dtype, world_size=world_size, - variant=config.all_gather_matmul_variant, prepared=True, ) - # Allocate push variant workspace - if config.all_gather_matmul_variant == "push": - num_m_tiles = (M + config.block_size_m - 1) // config.block_size_m - num_k_tiles = (K_local + config.block_size_k - 1) // config.block_size_k - ws.a_inbox = shmem.zeros((world_size, M, K_local), dtype=A_sharded.dtype) - ws.signal_flags = shmem.zeros((world_size, world_size, num_m_tiles, num_k_tiles), dtype=torch.int32) - shmem.barrier() - return ws @@ -245,46 +236,39 @@ def all_gather_matmul( even_k = K_local % config.block_size_k == 0 num_k_blocks_local = (K_local + config.block_size_k - 1) // config.block_size_k - variant = config.all_gather_matmul_variant - - if variant == "pull": - num_tiles_m = (M + config.block_size_m - 1) // config.block_size_m - num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n - num_tiles = num_tiles_m * num_tiles_n - # grid = (num_tiles,) - grid = (num_sms,) - _fused_all_gather_matmul_kernel[grid]( - A_sharded, - B, - output_tensor, - bias_ptr, - M, - N, - K, - K_local, - stride_am, - stride_ak, - stride_bk, - stride_bn, - stride_cm, - stride_cn, - stride_bias, - shmem.get_device_context(), - rank, - world_size, - config.block_size_m, - config.block_size_n, - config.block_size_k, - config.group_size_m, - num_sms, - config.num_xcds, - num_k_blocks_local, - use_bias, - even_k, - config.allow_tf32, - ) - else: - raise ValueError(f"Unsupported all_gather_matmul_variant '{variant}'. Only 'pull' is currently supported.") + num_tiles_m = (M + config.block_size_m - 1) // config.block_size_m + num_tiles_n = (N + config.block_size_n - 1) // config.block_size_n + grid = (num_sms,) + _fused_all_gather_matmul_kernel[grid]( + A_sharded, + B, + output_tensor, + bias_ptr, + M, + N, + K, + K_local, + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + stride_bias, + shmem.get_device_context(), + rank, + world_size, + config.block_size_m, + config.block_size_n, + config.block_size_k, + config.group_size_m, + num_sms, + config.num_xcds, + num_k_blocks_local, + use_bias, + even_k, + config.allow_tf32, + ) if not async_op: shmem.barrier() diff --git a/iris/ops/config.py b/iris/ops/config.py index 6a4a0575a..530df7816 100644 --- a/iris/ops/config.py +++ b/iris/ops/config.py @@ -34,9 +34,6 @@ class FusedConfig: all_reduce_variant: All-reduce algorithm variant. Options: "atomic", "ring", "one_shot", "two_shot", "spinlock". Default: "two_shot". all_reduce_num_rings: Number of concurrent rings (for ring variant). Default: 1. - all_gather_matmul_variant: All-gather + matmul algorithm variant. Options: - "pull" (on-demand pull from remote ranks). - Default: "pull". Example: >>> # Use defaults @@ -64,7 +61,6 @@ class FusedConfig: # CCL-specific parameters all_reduce_variant: str = "two_shot" # atomic, ring, one_shot, two_shot, spinlock all_reduce_num_rings: int = 1 - all_gather_matmul_variant: str = "pull" # pull def validate(self, world_size: Optional[int] = None): """ @@ -106,10 +102,3 @@ def validate(self, world_size: Optional[int] = None): if self.all_reduce_num_rings <= 0: raise ValueError(f"all_reduce_num_rings must be positive, got {self.all_reduce_num_rings}") - - # Validate all_gather_matmul_variant - valid_ag_variants = ["pull"] - if self.all_gather_matmul_variant not in valid_ag_variants: - raise ValueError( - f"all_gather_matmul_variant must be one of {valid_ag_variants}, got {self.all_gather_matmul_variant}" - ) diff --git a/tests/ops/test_all_gather_matmul.py b/tests/ops/test_all_gather_matmul.py index 9ceaa7581..0b3702a6c 100644 --- a/tests/ops/test_all_gather_matmul.py +++ b/tests/ops/test_all_gather_matmul.py @@ -1,11 +1,12 @@ # SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. """ -Tests for fused all_gather + matmul operation. +Tests for fused all_gather + matmul operations. Each rank has A_sharded (M x K_local), B is replicated. The operation gathers A from all ranks and computes C = A_gathered @ B. +Covers both the baseline pull kernel and the HBM-buffered kernel. """ import pytest @@ -13,6 +14,91 @@ import torch.distributed as dist import iris +from iris.ops.all_gather_matmul_hbm_buffer import ( + all_gather_matmul_hbm_buffer, + all_gather_matmul_hbm_buffer_preamble, +) +from iris.ops.config import FusedConfig + + +def _make_reference(rank, world_size, M, K_local, N, dtype): + """Build a torch reference output for all_gather + matmul.""" + device = f"cuda:{rank}" + K = K_local * world_size + + torch.manual_seed(42 + rank) + A_sharded = torch.randn(M, K_local, dtype=dtype, device=device) + + torch.manual_seed(123) + B = torch.randn(K, N, dtype=dtype, device=device) + + A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=device) for _ in range(world_size)] + dist.all_gather(A_gathered_list, A_sharded) + A_gathered_ref = torch.cat(A_gathered_list, dim=1) + ref_output = torch.matmul(A_gathered_ref, B) + torch.cuda.synchronize() + return A_sharded, B, ref_output + + +@pytest.mark.parametrize( + "dtype, atol, rtol", + [ + (torch.float16, 1e-2, 1e-2), + ], +) +@pytest.mark.parametrize( + "M,K_local,N", + [ + (128, 32, 64), + (256, 64, 128), + ], +) +def test_all_gather_matmul_baseline(dtype, atol, rtol, M, K_local, N): + """Test baseline all_gather_matmul against torch all_gather + matmul.""" + if not dist.is_initialized(): + pytest.skip("torch.distributed not initialized") + + heap_size = 2**33 + ctx = iris.iris(heap_size) + rank = ctx.get_rank() + world_size = ctx.get_num_ranks() + + K = K_local * world_size + + min_block_size = 32 + if M < min_block_size or K_local < min_block_size or N < min_block_size: + pytest.skip(f"Problem too small for min block size {min_block_size}") + + A_sharded, B, ref_output = _make_reference(rank, world_size, M, K_local, N, dtype) + device = f"cuda:{rank}" + + A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) + A_sharded_shmem.copy_(A_sharded) + B_shmem = ctx.zeros((K, N), dtype=dtype) + B_shmem.copy_(B) + output = ctx.zeros((M, N), dtype=dtype) + + ctx.barrier() + + config = ( + FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) + if M <= 256 or K_local <= 64 or N <= 128 + else FusedConfig() + ) + + assert M >= config.block_size_m + assert K_local >= config.block_size_k + assert N >= config.block_size_n + + ctx.ops.all_gather_matmul(output, A_sharded_shmem, B_shmem, config=config) + + torch.cuda.synchronize() + ctx.barrier() + + max_diff = (output - ref_output).abs().max().item() + assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( + f"Rank {rank}: Max diff {max_diff}, expected < {atol}" + ) @pytest.mark.parametrize( @@ -29,91 +115,124 @@ ], ) @pytest.mark.parametrize( - "variant", + "staged_a_layout", [ - "pull", + "k_contiguous", + "m_contiguous", ], ) -def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N, variant): - """Test all_gather_matmul against torch all_gather + matmul.""" +def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a_layout): + """Test all_gather_matmul_hbm_buffer against torch all_gather + matmul.""" if not dist.is_initialized(): pytest.skip("torch.distributed not initialized") heap_size = 2**33 - shmem = iris.iris(heap_size) - rank = shmem.get_rank() - world_size = shmem.get_num_ranks() - - K = K_local * world_size # Full K dimension - - # Skip if problem size is too small for world_size or block sizes - # With default or custom configs, we need at least one tile - min_block_size = 32 # Smallest block size we use - if M < min_block_size: - pytest.skip(f"M={M} too small (need >= {min_block_size})") - if K_local < min_block_size: - pytest.skip(f"K_local={K_local} too small (need >= {min_block_size})") - if N < min_block_size: - pytest.skip(f"N={N} too small (need >= {min_block_size})") - - # Seed for reproducibility - different seed per rank for A_sharded - torch.manual_seed(42 + rank) - A_sharded = torch.randn(M, K_local, dtype=dtype, device=f"cuda:{rank}") + ctx = iris.iris(heap_size) + rank = ctx.get_rank() + world_size = ctx.get_num_ranks() - # B must be identical on all ranks - torch.manual_seed(123) - B = torch.randn(K, N, dtype=dtype, device=f"cuda:{rank}") + K = K_local * world_size - # Reference: torch all_gather + matmul - A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_gathered_list, A_sharded) - A_gathered_ref = torch.cat(A_gathered_list, dim=1) # (M, K) - ref_output = torch.matmul(A_gathered_ref, B) - torch.cuda.synchronize() + A_sharded, B, ref_output = _make_reference(rank, world_size, M, K_local, N, dtype) - # Create shmem tensors directly - A_sharded_shmem = shmem.zeros((M, K_local), dtype=dtype) + A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) A_sharded_shmem.copy_(A_sharded) - B_shmem = shmem.zeros((K, N), dtype=dtype) + B_shmem = ctx.zeros((K, N), dtype=dtype) B_shmem.copy_(B) - output = shmem.zeros((M, N), dtype=dtype) + output = ctx.zeros((M, N), dtype=dtype) - shmem.barrier() + ctx.barrier() - # Run fused all_gather + matmul using shmem.ops API - from iris.ops.config import FusedConfig + config = FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) - # Use appropriate block sizes based on problem size - # For small problems, use smaller blocks - if M <= 256 or K_local <= 64 or N <= 128: - config = FusedConfig( - block_size_m=64, - block_size_n=64, - block_size_k=32, - all_gather_matmul_variant=variant, - ) - else: - config = FusedConfig(all_gather_matmul_variant=variant) - - # Validate config against problem size - assert M >= config.block_size_m, f"M ({M}) must be >= block_size_m ({config.block_size_m})" - assert K_local >= config.block_size_k, f"K_local ({K_local}) must be >= block_size_k ({config.block_size_k})" - assert N >= config.block_size_n, f"N ({N}) must be >= block_size_n ({config.block_size_n})" + workspace = all_gather_matmul_hbm_buffer_preamble( + ctx, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout + ) - shmem.ops.all_gather_matmul(output, A_sharded_shmem, B_shmem, config=config) + all_gather_matmul_hbm_buffer( + ctx, + output, + A_sharded_shmem, + B_shmem, + config=config, + workspace=workspace, + staged_a_layout=staged_a_layout, + trace=False, + ) torch.cuda.synchronize() - shmem.barrier() + ctx.barrier() max_diff = (output - ref_output).abs().max().item() + assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( + f"Rank {rank}: Max diff {max_diff}, expected < {atol} " + f"(staged_a_layout={staged_a_layout}, M={M}, K_local={K_local}, N={N})" + ) + + +@pytest.mark.parametrize( + "dtype, atol, rtol", + [ + (torch.float16, 1e-2, 1e-2), + ], +) +@pytest.mark.parametrize( + "M,K_local,N", + [ + (128, 32, 64), + ], +) +def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N): + """Test all_gather_matmul_hbm_buffer with a bias vector.""" + if not dist.is_initialized(): + pytest.skip("torch.distributed not initialized") + + heap_size = 2**33 + ctx = iris.iris(heap_size) + rank = ctx.get_rank() + world_size = ctx.get_num_ranks() + + K = K_local * world_size + A_sharded, B, ref_output_no_bias = _make_reference(rank, world_size, M, K_local, N, dtype) + device = f"cuda:{rank}" + + torch.manual_seed(77) + bias = torch.randn(M, dtype=dtype, device=device) + ref_output = ref_output_no_bias + bias[:, None] + + A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) + A_sharded_shmem.copy_(A_sharded) + B_shmem = ctx.zeros((K, N), dtype=dtype) + B_shmem.copy_(B) + bias_shmem = ctx.zeros((M,), dtype=dtype) + bias_shmem.copy_(bias) + output = ctx.zeros((M, N), dtype=dtype) + + ctx.barrier() + + config = FusedConfig(block_size_m=64, block_size_n=64, block_size_k=32) + + all_gather_matmul_hbm_buffer( + ctx, + output, + A_sharded_shmem, + B_shmem, + bias=bias_shmem, + config=config, + trace=False, + ) + + torch.cuda.synchronize() + ctx.barrier() + + max_diff = (output - ref_output).abs().max().item() assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( - f"Rank {rank}: Max diff {max_diff}, expected < {atol}" + f"Rank {rank}: Max diff {max_diff}, expected < {atol} (with bias)" ) if __name__ == "__main__": - # For quick debugging import sys if not dist.is_initialized(): @@ -122,7 +241,4 @@ def test_all_gather_matmul(dtype, atol, rtol, M, K_local, N, variant): rank = dist.get_rank() torch.cuda.set_device(rank) - - print(f"[Rank {rank}] Testing all_gather_matmul...") - test_all_gather_matmul(torch.float16, 128, 32, 64) - print(f"[Rank {rank}] ✓ Test passed!") + print(f"[Rank {rank}] Tests in this file require pytest + torchrun. See tests/run_tests_distributed.py") diff --git a/tests/ops/test_all_gather_matmul_hbm_buffer.py b/tests/ops/test_all_gather_matmul_hbm_buffer.py deleted file mode 100644 index 6a2ca6f9d..000000000 --- a/tests/ops/test_all_gather_matmul_hbm_buffer.py +++ /dev/null @@ -1,202 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. - -""" -Tests for fused all_gather + matmul using the HBM staging buffer implementation. - -Each rank has A_sharded (M x K_local), B is replicated. -The operation gathers A from all ranks into a local HBM buffer and computes C = A_gathered @ B. -""" - -import pytest -import torch -import torch.distributed as dist - -import iris -from iris.ops.all_gather_matmul_hbm_buffer import ( - all_gather_matmul_hbm_buffer, - all_gather_matmul_hbm_buffer_preamble, -) -from iris.ops.config import FusedConfig - - -@pytest.mark.parametrize( - "dtype, atol, rtol", - [ - (torch.float16, 1e-2, 1e-2), - ], -) -@pytest.mark.parametrize( - "M,K_local,N", - [ - (128, 32, 64), - (256, 64, 128), - ], -) -@pytest.mark.parametrize( - "staged_a_layout", - [ - "k_contiguous", - "m_contiguous", - ], -) -def test_all_gather_matmul_hbm_buffer(dtype, atol, rtol, M, K_local, N, staged_a_layout): - """Test all_gather_matmul_hbm_buffer against torch all_gather + matmul.""" - if not dist.is_initialized(): - pytest.skip("torch.distributed not initialized") - - heap_size = 2**33 - ctx = iris.iris(heap_size) - rank = ctx.get_rank() - world_size = ctx.get_num_ranks() - - K = K_local * world_size # Full K dimension - - # Seed for reproducibility - different seed per rank for A_sharded - torch.manual_seed(42 + rank) - A_sharded = torch.randn(M, K_local, dtype=dtype, device=f"cuda:{rank}") - - # B must be identical on all ranks - torch.manual_seed(123) - B = torch.randn(K, N, dtype=dtype, device=f"cuda:{rank}") - - # Reference: torch all_gather + matmul - A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_gathered_list, A_sharded) - A_gathered_ref = torch.cat(A_gathered_list, dim=1) # (M, K) - ref_output = torch.matmul(A_gathered_ref, B) - torch.cuda.synchronize() - - # Create ctx tensors - A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) - A_sharded_shmem.copy_(A_sharded) - B_shmem = ctx.zeros((K, N), dtype=dtype) - B_shmem.copy_(B) - output = ctx.zeros((M, N), dtype=dtype) - - ctx.barrier() - - # Use small block sizes for small test problems - config = FusedConfig( - block_size_m=64, - block_size_n=64, - block_size_k=32, - ) - - workspace = all_gather_matmul_hbm_buffer_preamble( - ctx, A_sharded_shmem, B_shmem, config=config, staged_a_layout=staged_a_layout - ) - - all_gather_matmul_hbm_buffer( - ctx, - output, - A_sharded_shmem, - B_shmem, - config=config, - workspace=workspace, - staged_a_layout=staged_a_layout, - trace=False, - ) - - torch.cuda.synchronize() - ctx.barrier() - - max_diff = (output - ref_output).abs().max().item() - - assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( - f"Rank {rank}: Max diff {max_diff}, expected < {atol} " - f"(staged_a_layout={staged_a_layout}, M={M}, K_local={K_local}, N={N})" - ) - - -@pytest.mark.parametrize( - "dtype, atol, rtol", - [ - (torch.float16, 1e-2, 1e-2), - ], -) -@pytest.mark.parametrize( - "M,K_local,N", - [ - (128, 32, 64), - ], -) -def test_all_gather_matmul_hbm_buffer_with_bias(dtype, atol, rtol, M, K_local, N): - """Test all_gather_matmul_hbm_buffer with a bias vector.""" - if not dist.is_initialized(): - pytest.skip("torch.distributed not initialized") - - heap_size = 2**33 - ctx = iris.iris(heap_size) - rank = ctx.get_rank() - world_size = ctx.get_num_ranks() - - K = K_local * world_size - - torch.manual_seed(42 + rank) - A_sharded = torch.randn(M, K_local, dtype=dtype, device=f"cuda:{rank}") - - torch.manual_seed(123) - B = torch.randn(K, N, dtype=dtype, device=f"cuda:{rank}") - - torch.manual_seed(77) - bias = torch.randn(M, dtype=dtype, device=f"cuda:{rank}") - - # Reference: torch all_gather + matmul + bias - A_gathered_list = [torch.zeros(M, K_local, dtype=dtype, device=f"cuda:{rank}") for _ in range(world_size)] - dist.all_gather(A_gathered_list, A_sharded) - A_gathered_ref = torch.cat(A_gathered_list, dim=1) - ref_output = torch.matmul(A_gathered_ref, B) + bias[:, None] - torch.cuda.synchronize() - - # Create ctx tensors - A_sharded_shmem = ctx.zeros((M, K_local), dtype=dtype) - A_sharded_shmem.copy_(A_sharded) - B_shmem = ctx.zeros((K, N), dtype=dtype) - B_shmem.copy_(B) - bias_shmem = ctx.zeros((M,), dtype=dtype) - bias_shmem.copy_(bias) - output = ctx.zeros((M, N), dtype=dtype) - - ctx.barrier() - - config = FusedConfig( - block_size_m=64, - block_size_n=64, - block_size_k=32, - ) - - all_gather_matmul_hbm_buffer( - ctx, - output, - A_sharded_shmem, - B_shmem, - bias=bias_shmem, - config=config, - trace=False, - ) - - torch.cuda.synchronize() - ctx.barrier() - - max_diff = (output - ref_output).abs().max().item() - - assert torch.allclose(output, ref_output, atol=atol, rtol=rtol), ( - f"Rank {rank}: Max diff {max_diff}, expected < {atol} (with bias)" - ) - - -if __name__ == "__main__": - # For quick debugging - import sys - - if not dist.is_initialized(): - print("Run with: torchrun --nproc_per_node=2 tests/ops/test_all_gather_matmul_hbm_buffer.py") - sys.exit(1) - - rank = dist.get_rank() - torch.cuda.set_device(rank) - - print(f"[Rank {rank}] Testing all_gather_matmul_hbm_buffer...") - test_all_gather_matmul_hbm_buffer(torch.float16, 1e-2, 1e-2, 128, 32, 64, "k_contiguous") - print(f"[Rank {rank}] ✓ Test passed!") From 5fac461a5688e95be4908ee392e4028f5d285ba3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:43:40 +0000 Subject: [PATCH 41/60] Update benchmark comparison plots with MxNxK x-axis labels Re-run after merge; results consistent with previous run. Agent-Logs-Url: https://github.com/ROCm/iris/sessions/ca7c4ca1-4b6d-4426-bd76-dd24e89f436a Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- docs/benchmark-results/latency_comparison.png | Bin 73832 -> 79912 bytes docs/benchmark-results/tflops_comparison.png | Bin 75345 -> 85572 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/docs/benchmark-results/latency_comparison.png b/docs/benchmark-results/latency_comparison.png index cef2396c2a1ee58f641489cbe47b644faecdab46..f335cb194a704d8d2a9abbb97b0ce630fb68179e 100644 GIT binary patch literal 79912 zcmeFZcTm<>+b(KijEOZG5CstxP*4z+CS4_<4agu+Xd0JNsIgZ}0tnb7uc}W}lhkOy1;0;Q5uc?)z%@x+gDtY3DZjZJRc2+DW)iqb*q_OFZ(u_H9b~-(70{j^j-nZH(}lZm#K+IkKJXFSd?V%eJFlp z&xzT0rK&c1rEC)!EQ$V|?QO{^yl(d!YUQ{sr>i(3tIF7YZcMWI1=G~rq zN?2GcNh6CI92`70HpY>%c!1kjZ7Dh?X85n&qIE^{JtueX-d*Zy@Xni-b)S&@W7%+F zb~?IXEit-Voz3&To+-N+1iO0BV z|LSzD*rKR!g<%8!BVts*vNM~r%*s0ZH=*U~P3r3h85#S>$CF$q@-FG=>1mXBtX>&k zHhyq`TXJ=2wy`BiGw|WV!x`;i2Kbw$;bv9cW}A}L*veooIy0FVbq}2q3D(iUL;PE5 zs@l>F0;$xa6!VtE_MJ9Ocy9Nbr^oyIWBU5~gmWrAM0i4EPH1K)ChHVEG4RZ2Pb`?o zuQU&R_3C^=@$K8U1$0!qy1SK=Go2Y@&$p%t{D7+P-~<-$8-r_kKSfubE?wf09oqy|^Y&`-InGzg~$s^>}sb2Z2MSmV60ULixOk$2)UsX9cz8rw6JPbF8~FZ+wx|F1&g5z<~pI z^4Fhdn>8LQ&EiVYZ^HAjXiqnC8P95MioL>+mzPJoFi;qE$Sy_ zCX0)UFU=Zb=|$agJr?`v#64YIUGak7qzfB;pwZ5^A2ybwBRgv1@-9}!jji=;X8Z2l zs)mNqMtRx=jsj~#zSKQTOer_MyjKs}{Ns-t%ru_{vC{J$945L1th)1+Crj3gi&y6~9Y&hibvwCq3e^k;+pdX>;`BYb5$A|8tjJ~SaDU9<@w3hKeueB*|#rtae9z$DUC{{@>q57h=)c+ zMUh*h*xK1WvM)*<@!={yJ%iO7l#`RgQt|FxOr1e#b>&jt@q5o}L+yv^j&N$_)}^nc zc&?tTT{F&TA8|iI`SsUdmpf?|569c!cL#Uc5?DJTCTyMQiew9w$f7wZ%9->#i4b3Yf7u8O_5P^cu-I*UdhN%eUwI;{vG3*u$NgDt?_P)Q(hhq=y??s6sXN}!Cbm6 zw>NLK|M=>M)j_fKdZEd}DE^+CQS$mhEUK!i?rV#K=8drrVp45;3Y|yVGvWjt$4rNo zasBk(KX-R`^V;+jUSx6*Ud}4*>Cq7t6+IgwBjB;G;|bQ2`crZ1JE0|OO9|m_Qx%$` z1JRU6s6}cqv9a_Wc#^b?j7hqltIB15Osb(t{s z81#kI*jt^9%>UNkzSs5jR|8KdE8tyoY|#yAXl$JQ?87zE^iq+PM*Ew9>s;ce-Px7z z-o2Z8Qmx~56RUl8xbb+a|8%;vlvD#=ZDL1lgjkAEWe}}Yp+Lp`63g~P&Cmx+{QXb!~alYy1h1*>hAb>5{ul9=UY}o4q^31ndIB!%tDB zrl#`s_VfjDYT7auIn+Ow*y8;u%1`tIZ}y*4Gc!uK&c2BW>#PY};YdqTXsk?#!pGIY zXttU!l7)qZwj83p876*`z6YI4$3;X$j7&^c7THiz@%s4g*M)>uS=^NMZ2z+D*I(1} z_n3Wrbz4eCCX<6n!1fRF9Wx<<4(5@)*Y+_mSnU6`v%Out%$Kn-&u-9uZcP0FlUT95 zpOesj5tqy!x9KYh`f^TA^9((kK8=jT9};m<3mU9>nt&xmO9>-8@VZdszwPnxZcfeY zOd4D5VmI|(x9JCZJTC(RXp+=3Vt-=(>HbA=an6+XFv+C**oLU1(?um=-hra4GmS?% zKa}5J|NPFUqr|i1gi4~RT13R3QK5nE_65(L9j<)w)OogGFx+*2^Y1x<_=TK0g>C6w zNt*luLPCRiS9BzvTC^lym655sQkG^?dkBlaw9fJK+h5VOMq=j!%?7GM1o49R{{B1d z@4rXYimhw?@WT&5v&I?fDJ5$zkvaw*3!k<=6?N~Z%f|Z^j=NlTJx$co=-u$7?ssBTuz{Hn{g2@*0UD%f+4KY|KQVlc1jY*PqDm>nqdz_rDk$ zl@u>8EyZhOncwj^sQGDPA{jN_Y^txKBlkv>=*rj9&<@eFXB8%kmy?a!J3BR*+`it{ z*Vhkt{P;zJYRU*6;1Y^FEj|5{FQVzbYYezsCXa>4YsWW}_vG~3cj$3=w{#FkeO;tD z?cTl5N178z`S@mZ$Asn9VakD~B6Jcb_#YK2%?4;*;7!eVnrKQz}Wn$hO|I%)-=82eRF{=7 zV~;9MKP_PC_FP}}`f}&)-N6IP*jG)@CAK@?yjfmaDsI)0SuZEH8ZgMmz;CUTqEn<4 zCAl+@T~X57+B$zUxnQP2nrj$MRlu4XH9=g0`4KRHhK(j^fj!hwp5ON@=vtxX51R5ZgE$7^{3LiSUy~pFRtJ*)g`KD81jWC z)i9qvU6;B(7InLOD9eR8WH4+|wT)Sf((p)5^d;a&SsmG)YBlBMbzOOOyry-BftO^k zbZwoSf*(D4gpKlo&#J?;A)1nZ{nJI?vRq(+ShFuFLOHGX1E!~g{QV=>R|duM zvDi?EbfPE63!9QOTYu~9^<1}V9>lsqnaZ5GVP}^iEQ&%FHn4=5=f z9v-BzFU?P;)QT>1l)b!eb^Gblr;_*gAK~KS5_g?zwUqr3z|K6p6!3P>o;^WxGc)#R z%dPRcS6`~8gonF~u#0QXElf}M)DCl>U+Nv@U!Fa7_H4t5i|N@AbXwz~oNKTD)aqrR zj=Spw?pt4-^yJ05a2RfoaO!pl#^YhTbmB zbf-)|zrBa08u%fY%O+1pY^F|JBlmiwQD!B>xGMGN(W6ayb|#Ap3+3YO=f(~_tzFp+{X>E*$| zXIVbm)}EwOq(R4}lgS}A*Jj{6Cr6>wqiS$gIyySKe0{x@v(n7WY-uDRWy*aeu{B_9 z%V31t)V=<^+-3{8#i{=06kW0L_KZsoqb*sPU_3?mR+U~?8x>`w-E^zTbh_aTLnFaMVpCp3D;Yn zQ7dXO6zU%E<;_QIUtkypN!r!1jM_}S(x0otMKnYCZL%|6Z{DO?Qkn>fIMOaAl-l_Q6=7`Svfpb~7bU^;c?k0NN&8I^_HZ%7VS` zvT0{_(x(CMKYf4Mu~E=yxXHf`tD>==mHseY+t}E6aL*pSEUV6JtDUPJYl|kOOJmD< zw16ypIp)nL$`-KR3=Z*Iheamgd%VUyaoG&vaV&NyeAV~v-OCF%u=Yd~=g0E}Ias$e z-TmvYXYP7=4dgzkFg)uyUCpmD_#_~}aH2iKB)kkTj*m+tv)sHB7^JgqGH)=PdwF@8 zp@W{!QtjE`;bns}_CxX+fBp5BvLyiaK!0U$XvyLLL&l>=ioV}!AD<~TU(XYfRbL0O zs#1)VNwc&YYfEzz5mVRoM5*G<+Jy{iSTOQBZIhqk+cmDjVD`LUze*PjqXS7=uqk8Ue-^Zn6 zsRkh5>?Sf%TwFW={L7GCgrbpmHA<4dtEH^0>_js2CEv#kJf;r^9hKvi&P=vvl$m#G z<{2A#_JBhe6F8Dqlruer)<-{g#$)AXf+*Sfk+oz2sq>rGJ#!NYUY;8-c$u3n*3+(A z8Di_gWa8Ot=-CU@*_tXUBGNtLUWi4^a461meJ#k-X|kugVZPI*BrDU^**U~@X2?Iq zuHwpEtK3{`aJwbn?nA1(fc_RhDLI2n2zueQcIQ}J7j zS5BB(ecW%Km^k=pbkw5Qar_DSApqQJG~&GQOR~K}>19Ey+=BdDcO5(pUez&@<;D#h zW6;)vYl@ef8~pZ-1A2447pGvYNjLY|AefA=(`l5D+l5}-MD};{FaS{=UzjpPy7s9vaW- zt(h;DIrd0cSh(-Y7lZvrE>j^zMAk=L(hRB$JQ*hyc%q`dzP>I6@H<}VCAGh{xtX`j zfbcc&f-DyG*)!|wLsFohUUS9n%OjyS#b+|nkLdNUP$*Y{wRSQ$^!H(Fw^lql&Z-^p zXI)Q^Xzd!m!Q5CoC{%`EN@ga{&V%RVvHhBX0dQgZycg?1i14gpwC2Y@etc%z|Dgge z;q+bwNl7m(n3!IVNSbJx#Dcr3ltdzz79=i7Qo zla`Jy4wSC+^p9p=-ai10@nNuD23^uvd+H$d8P}rOQTpah-@t&%KYJKJy04&pFQGVb zTA|9(^O(vhRf(T;i zJ1FWpXIc7BS)O~c(Rx>9S1p=DJf6b=$x0|yDz(@;p|gFDIt=93k_Fg z)nlY+Avg55i!J}-CMUWWO^E8v9jHfE4ikB1fCadf!^2Cih9I)kXk1RmSzb5DT;wVS zEIb$MQ3XnyCa5!5PrVJI=++eF7xEVxW^0$LT89CO)sI|0CKrOPTtf^|Lo zQ&Lilnq~_oi#1DDrhJUtL(UrUm^DO)b_`;b@2qUUBv4xcyy-AAbd^!eLrchUtgb4A zyE#=)92ERS?f5a7$Ik>EM%04jo(dW*tc(E2`^)r)_fA$Vu8vIt<`l25xo+qV$~Bp| zXVm+XweJ1%LeT(&CzRt2XG73ZjL+Q-OFp?J&3)OibjqLk(o8wC-w=v$GydCttnJL| z%5<&EY_n=}?hS*4)~hPjPk1v{ghWMkpmcBr>?id4qoc%krL<2Em1$|xb3LK2&ye-_ z@sr1nNurqwf*lafw_5X5C=TLV;N|-!HDUE85$?vOqrD|15Fa3=J#(HOP^bNDq?%)` zourm_F;4cW0)&jcd-u{XFeDIK1GvshBm4K=yPB~LE9g6fqh?vQH>8$S%~h2o`YAgS zZ1(!Etq%PkXhAmLe-$5jsTeoYyS~<%4m=PEQl}nt5@`9Ke|}+QWgSN0Uiwa)Hr6(nHvc-abB!fa*!!1=~JAGSGFO4Lqe=On6|UJX_deTg?+*X~;2x z&QsB!&J?3}D*L~Fb$fG`QRO~X78W*MUS+ydT6Nfv0k}1&pS=yWmVBdFAi*qn1XoOI z!?T`@h-8S2>gpI*J&yeQ@1Mw4MXAon)GcyRM$M(?H4iwYoge-9@qS!L?QfNSVrw^l z_j_XXK`<%f?%lgO9*ahm`5!-COVTar8M$;57?)Ec^D=>+$=U_6D5Gq6{uByD@W$uA z^+tw@T<6In%;GR{uND#zFavof)#j8|?lXL^X>GU&L<(y2^2F+VZ!6wDM_n3MNcbgYvp!}%sez&1km&{IDYIHeXR1%9Xm3G#nhK6 z4d6HE3(;+DYFG@T(9v4JUuK7*{CaGEFr8ypy6>(MJHmTv6*}ijUR(^O;Z0$+SX6OZ0UAb2Qz?HDRPU#fB z$jv>6<)Ci8uOdMGH#YXx+N`k%*N_O4l;Wj$J?~M7TpFSV>ubw4yw+W9>8Fk!eKX=f zeF`01B}FF*g63$sKeNi@r^MD!NIVY?3N)k^>lC?o-@o64x=aHN5`y6{1ad{(B!_$Y zJ_8dmPYSu{ot0=W)!YMP={-%?qb%etEfc{O4AcQZTt44B4tY5G5|evV-Mn$Ed5w!N zp#fN`I~t$u-hcX8-S$at9~o}chLxbX!LhN#A2$CCNv0|!3azWum4DgJIRQ$YJ!H(M zVxB$m|D5CDQ36vheEDNzW1O63QSr~a4{1GXt1`}jsPx>a*PXvr^wgG;}j%~jb*hWpdR^8;o^`+Jl zvNKmlb=S|nebgSl7`)1z`0T9B#qxmGYCq4_*50g5s7V@aN)0?gX%}m8u`Ynb&;ozTdYh+1r=QyVkW8qwkT0j zz!hH#&+qx|w`VFz8pES0B`Uy7P!L4Oo&yghTo-TLbU5_!W3?Aie5ce>&l8TwG8}pL zkOF)G_#yjJjTk>&q4u~=iV%qcs$_ajPAkEE`?IXq*H%@SS&jR_jnD@m&@@x+SGTq# zGFNqZ$A!B{MMg%ZXJjZo6?A9<6f0c<1vek6i?p@3SHhwOl29#ON*v^*fO-XIWMuSs zEL(x4s9d;kVQP9>5yh3)dCE|{_l+B8wsp4#I>sY_K{t2zopTn%Y}0QM>xB> zj)s_oS4<0gYk*{CTa?!5XO@e%Q)$iWCc{8umgjS_#3h|Fl$QE3!|6H=ZY6GF30$3jp zOx!$sd^BqkeP0P;xlwy{^%XP{l~6t_4(1D7a8-@hO6zjWwKp=1GAfI$_|5bpaCuudG5eRdFdz+DYU_07Me{eDd5^cZ)+ z@*(WC+@>tMYTw3apDPNp}R!Sd~5uti_7vqk~_PB^5bSJ&V ztX7>y$O}^rW7rfuL&8KRD7}1}U(l}syS{!(dagF17A!!Snq!q4HUgT0iur}| zP3Dd!kKLJU@8@~y8hiJU%jH9d4#8UpV0n7(#zlf?J2K7eO3!t5bv2(uQW_Wdw1kQ@;#qPxch;WWFce zYID(bAjE_afo`<9pwXMy(@^znS9cKn=Ku#QVGbQ)m=p@YBAbPC0nvaX)|H#|`)cLN9udX4H-C0@oQ0n?0)kNElx8x(cPA23gNjwV zk8+0ZMWu&K>$pu@ZPkY_snaK2^<)PwYlo!MNgDgE1+b4guj6Gbo|+5THlL;Tc+mgR z*r@b~dq8kvqo6KqSHMS=r~%o>r(d6T7=a!zwJKg@C_G7;BHm#jEXSB}=xR&g*_k)L z=@SW&2NdbU4p$c! zb66l=&)dLo7=>@j0}Zw2-uTi4M%3*(SroLVbK8mKfP@gw_DfSA{1O%K>%t^GE*n+{ zkZGfC!xzeVN1y*Q=)z;_IYo5S#8`_{G9lk0#5{FWleNysNOMupl*GgDI06 z3XR%N05=!8*yQA7(Vq!cCG^{S!9k9m#)b$v;Dp5gFtE**3MGeaz9`xD1-Iabu zrhWVM?G`*0VV`+JWF$0rCc*om+Qfq#sxbS*PL~Ex`QXrr=0>Q9{8;--#p6)^w~oswGPXWH4k{W**Q72^7pz$p*U7%wzbDGb8)o$9F8vZJYA!)={NIiChLhP6o3lY)X8!^6Yaz%Hw^t<%Eq!H$S+3oc^;ag*rq zcreB{3jxFv;W$dAd)PddiDf9}Pp2E*7fbc98Dxnw>kj7BG-eAZH~Z;*)$?GWCJS^$ z`qS5LO_Z#Alx8^rE<$5%yYfyk3}s~lJaq!E?}*woz&aZ6);Wuo7HS-63@VB%##}s= zCzi7$PV|gQn=KWlOF*zmGU7Q;$ww>AA`3jvRst4AMMcFO+pw|PM;RH@J0s#4uG|P; zbuJ9C=&ip0R^z1ttN0D0KmPc8ZE2JX&fc$j9(<7RqbG&H}bt6_6QZ3KB(F_ z1HSu-M-_&Pn(2o3B5NV?Pp_^ZJ#h&)!kuTc51%M&QDo$ZgNX|u(p<# z?MCmHl}O(xTI+gJPUJtzCFbfoj>&~vvVDbmBYop;Kor!9&eTG1+?cpH29IlqmS`t; znuyiH5UD%OXabNmH`yCuvZFmtta|bvE&c?(QZKJP-J%O0Hzy1;XM89x&;8Vt7;|B6 z(Ddy;d(7KXBaId%Q{H`c@t9(EO*?OIpQ&d)MSD_(#_o?Tj~a(=heY+fd;gwZa+w41 zm}XP&#XOJP+}s!7Qv@8vj1$^9zv8u%fd~3J@$i5_hWAmMNB+?aB z6KVfSYz2eI7n>U!xm$;gt{i*xgx|)R&4WZ|Mp{#B4#ZdjaP!m3-KZSr4UPEm5G*WG z6BPcYcD{&^;9M3kWHdXw?5vf0{Q`8f44MO=wHCm85=?#T88Nj9=w5Uc9zF_lwMByw zo;_dJDrZ)A3 zgLHn_s|rxMgW0xc%zh#ScWRI)I>5I#s@<{$Q0qqHaSDX+fN6M2;gokw(&R= z(e<#8cJgm7{Pvy!kWC@HOios|g{x$R`lYF#I)j^`m)ss`dnjw6bhj8!VNOwDA&*Uu zFi|Xs(eLJ|4Y$l?ZY<WeDCU zkj(C;*I!GLUl7k6R4A+ej@OxRA(1&>!95$zOHm~uX5CC+d~lzxkXwX>@&XCLP_alY z@Gx`8yxVr|YQdZ096$Dm0Y&KUwSu$}rk`MyPRGEdl`D_zL&48^T3gY4+|t3ZDnp zsJ_jDo0!svcVF^nOP+Uxp*GWDkMP(cg_W$(oP>~%(3YWrN5evCHzj%+ibw~6f?6pBFN6es1_olKgHIjsZicVl z^W#$|StQhW97dGHe;5U~C0kWMP!JTiTA^v~^;V|7Tt7OUn-|2rcBGk40v9IxO+Uy) zKnSoi&Bz%oRz>2kb{{_c-y4;qA_d^Z3CQRK^+WDqqwvoLwpT@_Z4*s*8GeW8c6z*eJo)Fv3*~NuqwFG=87GUGV!@u*& zPE9>s>d;VABMZLfi}>KvgMK-#^VSLera34)?2Y4cq6^Q6*11?5iWChDDhvjPwA_ugh5)?Hr;UYkwvR1Yebw9ahr#Ep=;?YA1C$ro_nyt2ts zhDm`(*@Q+deJu5$Ovu@Hr|BQrua><7=7*?CLNQ(r>Yjg0&&)JIa!Z;EQ6F1G_UeGg zFI5x-onmsi8zBvmZT?iqDOrBI)jkfjYrM8Fv0Fd~lDth2e%d~s%lZ;@iDW?|IeB@T z6~+XdCb}3V44{j88KI*he=`JQ;GN(_1sJnQI&#=; zbRx9UC*<{6{@^5`zN7V*n6k`?GGDxnkDIA-OkxLCeck;7+}mgruU?jw%`P;yJ25u~ z{Sl6b{EzC02Ry!-(_3ypi6XF$*W3!f z62{Cmqd7BZ?Sq*nfe0^Kd-!9e#D*ykz=A32CY zD*eNnwg@LDXY2eh_}Gr+YzWl(c^s~hnf66Dow{;wsPxXJ%k@tT55GhVr>U+C)-IO% z=_>P7#LlEYImRZw3&4(CJh6*w8-Q8%32IHD+35u6JVQ{Ac{004oXTOZsm15Be{01y&u1QCj;O9o?dEV@o^Dk9rJw07!DjT%WV-^9?NhJ zXCe#w*1vW^U`a^{LR~TFlP|s}-4vjQV@JgTu_@z)vJL;ni7rFp=tvb^g*5CBT@;TY znxGCiYN_9fqp1LTzrWC zOEPHEh10sK}Bvdpaw(epQO~fzLm`#AH zPgwBr2sZ`D7bNQm63%1Z#71TnFeH)X%{zhp7k`?Vp^dRu6iIRwMjGsoVF>J3F(3eB zfwVM0P~?V}Pj%K21mT9-I83ZhsG!fhX-^W@7gF}r{Cq2+)4%?H%oNp0!q(sojCLX& znavET>TopJiyH(gaFVvGT-GZLS)RFAw;4SgFT3in%Xk0$CcU&!g6>)7sL}IXOAGudmK) z6uD%<$fy17x5JRjDtTkzVgn!r5>%Bi+6eUu$o6BhZix;!9~G+w%hqQ~Ng28;TB8{R zVe|UucM#<}3FL-U83hv)B>70LzEte*0FBNRmqQHv&jsjlVKHHL-Y~?D$RZ ztTAu`!X9qqT=n=2y_kpFuacM*y7ea2aa=?E!VoGq@$fY}20?+9OFY~we?2{wncKT(Ea2WIydXc<%b82jN=u5Z{$SMl$Z_uMC9RbgcZ+M?UM?_WhqW2ED8| z!v=8qbcc>`V2vaaNz@xr1S&lqe)IXcQpBmx@2L>fi1qRDi5VJKA;H+wwnONZHqQ9S zC5UF6>*2f>``(4eyuxp=W}hQM25IGi*Z&}&BrkaS&Bl~j+e4zD3fQvruA((_AQ%hb zLnHQa3fB9dfBuQ2wf)zR7oe@bNv(c%;V&Mv(2Eb29Tn;Z@mrqd7Gh6zL8To?TIBvaZ;m!s+vEHnsqw_?>H5-zA zrqiSjWc?vr5g9|m=a9@YHpG^#TZ8S%PqML~#gKPMJR4F#P?4Yq@QZhs)mbz;r&?tYt&yif~b=Sisn>Tgs zAcGW6JAh$9>InN#MT7u(MP^dpkN_j*C?FQ}s_mJRRV*B%5r9-!yClv=WH*k!jExyk zLsUR>Y0-WA_p3c~f7nUBcK%xdJU&UkLrFjHq%ZI8)|Qo(#Xv|rde=)d0~*taJ|_$| z1y*R)eb~2YlT^q927Y3g1@!}cyg=`V_xTu62eu8}gv_sDNT#=`n0qoeLARtB#EgXk zBH2JL4H^%NcUijFm&6n z<;D^UArFkC^3%!=oeWs@|MyYu7rvSL98+U7G|^zP!%(!A(CHo4R-6fp!F3=9&;+?> zd#2}<5ZaCxYTYZwDBwQ;gNGz&rJSf%pE^NsH;v00rrjWR>p$m72rrrpNak9;o;D5svO*5HL*}gZb?Q)CX%rg0OiUP9)A|BvMTdc z=)Ht$i+p{x887G|78=RSg9Z(%yLEh@_{FTvD2Nv6XG&S-%2*HypuO}M@4~pFjbIP% zw*urBpNCvRYG&EHcLd|aW4Vy=3|uDQ6+u`HV2rd(OrDKp8?8ap|H+ehm@e^PkSODh z{)c?Jc9GYyO;t+D+i2S~K#N8aKZ#X0H8ounIna0)h(&?c{;z61t5+fD!CxB6`XtNTRE?HpcjUZn9 z?pQpK?yk)nb5c3i$ix*U(TIIP?z`OYlJ!<{V|U_UiId$Z`&2LnTmK~{fW2m+Q7%Ed zVbPALNIC7_`@<%wlO+E9ah>32Y7;C7Xb$^2JPh>pp>mv}C;%EJwTOLt`#bq?1cDtf zHbmkhxSU3)1x+wrAci&V)s%%6!N_aY@COlmf!Ihme+YOY2+SXzZVZ@0@9FpV@9)@o zNLY>B^ZE0)kRLLq>}NHDm8#?efyjQx%gcPCD~~Ku)Sp-k z35rLc2vVrwt`h=_n80X(>ShMuh@z_zUbYcud85x4%6t9ec_LopXMu~k(hdHt6!pW+ z3BenoC6E}5Zz_T7_e0f!KPZRk&L(_tn8_&z?(#_lWH!gzZ{5`Ojig$^w@yOB1)t0% z5wssCN5P9~(~5A#OUOI!xTmf1?c28{ya>pDd{}a$n45w%8Z-+>*fqJ#4f7}c2y8dw zZP~fQ)l$ugcAvxkTgS?RNDb zOV*lyqvn{RH7KCm*keu+$m^uAFfxX!0M&S$#Bs(eC*c@GVBk(gL7pAVtwtH1!P`Co zWv+EqC-?f1R)_YB&q6C>c~lWFdS>G;PCtd;M2rd&j0Yd1sW?;q{r+wiBJ)HZbQ9t)B>79V zBfuUXm5tAe2VF%ZUKJB_{6+s3c=m2I?nIK^#60rGmO>dZ!{&olT^Qp|(!I!{*Q0#S zBD`w>dyEi466MBtS9824Ticy~Y4SsyQYRynd7cv%C^=R)V+^xGMOyTF(p~{;yYg899opa9~L6 zYs;1`X0SLh;mJPpg$LvxpB{)(2-h?QU1Wa?1ZfS7P?MY>A~etO;KrYFEyPUfj z)%K$~okYzfuZv#5)-c@N0<4`Vz@SMT#qI?pNV<G~w6qtu%t9d|CMlm*Ic_qxLgd@n-0;->+bt13luwRwA|?rS2Gwidhcgw}axsiJ z7UGtWEYvaZn0N&APC{M4kZ3?kij6;G3Jwf#LC|NT;NEn4LK{5X(fDnS};#hRYz0k%dCy`~KfEaOy;t8^IXR1!{o5zn&S&r$*#K zzyIoP$mbLKNm#KqRAQ2qQ^8H9Z@vpqU`!xybC;jv@3WNKrrX-~MJXBX-do#JbB=I4$vz;OiU0F=;2mD&Nr$nJPnfI7YF(abrVkLnZHDn1@hK zxAYBvc?AFyH-M2qD{3?fB?-n}1&+cI5zq0Khmhk9L{Jz`a0)5UbzjP_Kr-RH_XY7J zlgkG&S@#T`R0mq2Jm@abr@HYP$Zp3j1t?Pn6iI9QZ#Pnkm>v{P(tqKiB>^}P1A*ij zV2q|<=s=39<;l4l&%yf15e{VbgJf|4fiUZ;GIXPlmHWob+*}K;pA6@SQ^{E-4-ScF zkhngv0`X`@frLm?BxCypRgV>+t)ZJcGZ{=>Ox-|A$V^Aqdp^IOYcs7JypiN%`)A z?nB6$SSlEq(rkzfpvS(!AIqJ2-PO!F1Q=eku;$G%_`CKZ8EycgrS95*i#xEYaVmjQ zgI*s=C6Xa8yu!ZzepV!qw*2(d3k=E;iAH%Hb?JlQ+5V+@-~l=*t$#n@0q3A&w)e)% zxs{aH+~vDRbow2f(UQv!a7;g3hj<1+VM>@FCw-o%8p|C?D;?gCt!pzKo%RXc*I10Wj*YnEO6n zlhNI}WB)UFq~&k_djgT|NgF20Zdaj0RGZ|tpU&_%o3RyxEk(qB8lF?vVl8) z>h|Rg>P&w6*5?b~2aukyI(yOO`Z9MnGTfsez zGxDNKHzhv)S7(T-gXemIO2$UORY@H-N79-Md%-?vY)FBX<~0sWV+Lb@Lj+KX%y&hR zFaTHsNiL+l-;Gb`{KI;yrenbXS-a=;m~;N(JUP*o+_cY4HbPXJARG9x_TaxrFjXQo zfhOEc<}f!-8}LLt50ywOndEZcT)|LojiTwH3UrE$n1Q9NC6)!yh z89NG#x#IQfo8H)I@@i^oXC$Gw+-f+?$ESkqB(ZPc*7aj3y!=2a1z#a)56tA_G${f) zL2!cY)g+GE*;hY){Mc<4Nuo#g?oY@&?+Pg&mw!SI=Ryc3=&BEiEHxY<3z6>&Iy4eH z!O*M$r1h0I|J>&Nd2KwosrYwLI9p4mvxfhW(<2DSgvNp*bK%S-?2%gy04gW(sHr%V zh8E&M@by2=PY^~iG(-+qU=(ptLQz7efTP38s1fON|9$`qs=>6EJED+y%clsr1hyie z01NbP!+SW0fJpUZ+DuWY$9IEpUeAD;3SN0^*u`V8uTSaCo*(i~PV`QUc^js%RzdWj z-poM3J*-Q{UV1}%S%1(Bp3QnP%d7Ro663$*2pFW?B1PbC6V(VqtK=Y(x;lAE>!qbI zh}2-(J6E}o(jwsl1cv|~Q}WRWEs2!Asq+FZjLk0KXhyr1xg9~J&r*$6J*vvX**LvN za|${q7}HGak1`6{U&hqVwr$(eHJC742(3ySxltkt5I+0ykJYBa`tN40wRt=8Tl!a!aQkpD~!h>^2)t zaf2K5j+T%7bl10zY^wyIx05(d2F{`~CeTR8WB>jaIN)cfKUfzVCyQeh6ZFJlBgaXR z0WNUuV>pkCXoNT#jYxasoT9zL%;a}&{CPGMu=Hmw$;v_G2av7`4lrp-34FjYHd zEqDx8bGap-=C*Tx{rP88wv}cA(@s2wq&>`%LrBFCd`IM3lW=EA7_8(V7?nh|NKmDK zI(70}at}aB2|%?v%FN7ZMJ!fZ+Xkc!q)%@&zVjZW$rzpmFS!zwjnap+BiMz73*Ua; zKvI27dD`Dj%rd(fPoX=3xf26m@0*A$f0s}wc?RsgZYcz52ZjB!Vt{c_|{1qsUpa_NfJ?h&=RVNdYh zv*>@>=&*@T-c$S0MaDcv>%MIEg({SS7uab;`61cIo!doO+1c1GQ!(A5-Si3N=UD&= z3;I0d6SRAuUCh0-sN6D_6uLd;8JMTbNPW~@7Ix?;WIzCla~94VAu&osGZ#3SqSz2t zk%0bg=^8EQupe1o?kaEEM?E68p%*l;Ve`&u#L2_UkKk%=0}+ zsYe1qW4c}Xj^n&Od~0&PjCd!B#}mc@DJ5s|Z5rfHGDIOIxUoC=s-Tm|%p4HmTUBot zXnTKo;egrj(ps}bpX2&>2Knm2<7#7x2!W_OPY(r(AL4#So0EBe$Lyk+QDmP}pJVxh z@xssBiC@h^V$J+x7JQ+A7`7Az2%p|Or*N489*&N?(lc^$9+c>_30pP2@ON`_<5298 zZa@l(96Lv53juIRBS9`=V`8{kNu3M`k?A}%g(JXwc)2=Qn`E^mE*-)d07!Ra8kIng zCPFWIjcutKzf}j6m}rzL=}xd0qm6@iVo_*Bpx59WB2~QYnJ&9pcHo0DcVfN6qzJ&a zLjs5-J>lc?G5PJsHQ#D@Rr6ZMan3ROo9d1`aea<`juKY?2fVDC+=)%J5&RmQhJjy? z;^TeBmfXGZE}XkEuNqK3UOpbJpz$@&VN`|8<7}Mz06P`CMH*m|ilhWOIjc6)upIz| z3YRz0eeTfd)PdoVSOwHOx=sQ7#!WG=5esqn^kz$G^;NX=CY<+x?wbg=_afOTBw0xE zjwGshqyp!BDO|q%56*sThI&J0wOkOLAfurW_`9wrg6AWMqXauw2~>+5T2%q4g?gxn z)Q8Qmbvf8DQM#znfU2V?H!Vo0HDg#Rp%@ybG9uulJOZeVz;__?`0$mF1ItFg+R($! zQ;y^?=bSXhYG}Y4BLN3;t`&*NBBhauqK|F^eMVWOa|e^C7FKN|2?FDc4ziCh$D@jn zSte35pT!8fc=g`D)4!7vV=pyddUrQGA`e7?3xQbrzkG=T*dVqG(yn0PSD_gKh#wnX z@^L_hi;Q=Wu``^lrH;4Xf#c}l;T*QHu_2)af~G!k6M`gFgOBG_6epkr^pqSdND4Ge zYPBWO|DbJ?lRS5HasjRrb)tA_zc?r zoSd{#p9kg}At#X{nvsN2-1zf@*}&)IbrG-?J<7bgajp|S7NJch>4~r46y{hDBaLx> zumz59y*tX@h;s&shDbQEpUB+)ybWd-Dq(%eT6HfuPk}UW91d8whWOa%Q#UFDUR2R~&1 zdJ_kR=%^SuBbpv@1AtKDhy;iebQA#qIRC2yRqa^J0GA>*JTY_^H>>-gVNg*SBprLZ!@J&RLfXoXKNaLejf^tF-nJFa$>O}Gf0wYI|NG}sF6A%!;y^_i! zMGpHQLs>)cK5=55!R;@&DhSk)8;=nnGtpaw)vWyCk$9X4i1!xcsOHFg=+ti|JhP3>R>-!XE8xQ(Q3UP&pISTHDuL`cii4=&RrmS-67ow`<3tT>GP=X98?dsIWA4-nfa3ugA>~(fL5>oC{n1zc7$?p(J>|`NF3RCj=!jA+cTmF*An5S(mWwgO> zZ#^gSA<6IoF9Ginq%jM$h=-v=P8-1LXM-9{LUY7}McHF9!w4rxL)uswjtPWB)<}}- zXb$E8hty~=Gjo()IAMV-?3j9>&P1%Q9B+M$r0_O3(eUta6{`@+b@b2}PW&x^7ommz zZwZaq^zL4lZ&z|ODfOss3YZo*VyVR~vx5y0F%5+$DI4dR++#7r80I_rllYly38Yk~ zKi@be@_hIcKC5^D77~I3E(sWGnJUM|``qCwwN>IU}z3!LCab3j-w?#9iBW zO6vuL<#?p0Trfu3kC8$k%D%^)n6oSOz!=-chQ(wkDp@m<_|FJo#1FKf6)53eG3P{~ zLYgI}S?RaT zR_*mqSMx>t|Dc$6WWK~~V=M^h-Ul1iavMV)qjxDhp{#4Ke>6*)3Wj(`T{y}I!Eh?oMn_|>&Kl03d78Ad2uz0oT7r1xdfG? zx0gH4k&Au4$CWEjd)N2K&-r8=1(J!6qom7xk;g&F#6?9$Ko#tna}~;|an>pxhfJk7lUx44C8+L{7)Iqa0N1U=xID5k1@+ z;q!Ki0@^mA+?Y-yw+ZLPtIHbGbgj%djdQtzGpEumvAe5_Kyh$5QxJa=padzkFYP*I zmnj7}Uzv8cwin%~A*eO4zsVDo7{^-|L9u?P;NNLvXjpD|7OKPraEn@pziTxQbSr+} zvai-VVTZr>Wjr~&#Iu_m;)6a#!W*Q!efJG;)B!Q_5VU_zP!psXSep@`8z&*em5L%9 z8izC5UPAqj*DlBw77>|6?=Z!UkTa^udxAp-3^E|K?M;IP^xre zf=EXY>7df2E4_2)+8flA^L^+0?|;V~M?lblZ%&~O0-2vW)dFJ50>ige*_Z`o!-}oV1rN~X5kI0%* z>urWnV_N`%qE;zb+GkApO`8{r0}F~K{a|?tyD@1+80fX1#1220_IP~w3N+~VL7P!mA8JN z>rOp!K53A(_?MgajK50FxL98zxpmt%0dfbK8V-WkJPI+O0N+n`(NU{YnaF9;#R=(% z5bpA|z?>2iGtMzaZ6zTZVc11wNjuuH9$~UYvFqF}H&tMWZd-2yU zh<{?f+p{V}`U`{lm#z7w@d+t*4eo_IXKoGVmv%{hkycRLycuf+zej&aU9xSI*i-Yf zGMU@Qw1z8&jZ&`EHJs@EvECNyLRX;MWJB(O{u?Yu(WGbr>kkB0kxU5(X8=%;i#U}z zxD)9S#{SRqP<_2nalX};kJZq!?&hhRrPbQD8<>7mlS|V)WX2kJ`E$k^V%s*JXT#Ag zC$akD^n~;>i~!1C*+iAOf*f3RttLe4p7eMhTFS$-kx7{fCgM&x$ZKdoCmrihUj*~T zGB4yhEOO~d+TV>}$? zYO92ulp$bg7)wCQ?wlXOF0`xpKisZh=^msKWDpD<(-P4gtEC@65NbQUwR=exOvzCn zT&;8xGb>Vq6R$W7-RP*0u+bbvN^%RK>*~V*I$!Ue4>wg%I03gF1DMbXm8Ae$0*NeG z?@e!eA!(*l&Q3h&r`gw+cAU2Oxu5a>|JU|a=to>NK#C+eBry(h z88|sn`HW61B^K;2BN1v~CbAo%8eHT$c5dWdkAYvq#x1<-P71da8IOAJzgfNp<% z^&%x4VHV*?MZ#@>r*{}}(rejb8WRxR4d?AZ!{vCDw*CU)*v>e05BdFD_=LTIRG zqoU_-Pn~14Q|T8ruA(bDkiWR5PUh;dt*mhL7kWrPNGmdP_&HY%;Pj$@ZBi&7%6JX3 zBNo&MF>hpb0e$1AF*eXBlRp)zC&*A76At7cZ$R3~FD%@1yV(UCed~)Ovc+kDT&ahM zdN!b|4}lcndtIG>0;Ka{Od(GPbk2&HqEd`nSgC+V&LZ+=nuh37S)fO+6Q8b}&W%ZM z=C8hFcS=}R*g(uxbwg_54{U}{!4S#q&C=kf~qPe#)k zg-?Le&B}QL&-LuE!9Z?8Fl_xvoq@bk3$PL(Xs?QiBHWCjrv{1Z&$}d3{&vq`c=52L z#+$?3l&j+x-tgHeQCU2HHFLn}J5T3{cIOn0SG1bFNKpelPId;Uk2KH`S?P-0LUy1? zjB0yiWS@4SAM8uih1C0W>hp6jWJFGc+`-_aE?9fAjk1}Q*eY#Vy=ILFrU(_I2_dkE z#Ha-fZBa9WW;>xSq`M|HO_(~=So?t!Gq{Y{-rm*Nti-gA(qzz|sQ|G1C4||m-crfA zah1BDKch}C$JpX)oH^I2PJEEGorLb9*`uu*)u$Zw6tn#`l0G+be|w4q=J=|mib9yy zwV5u7sDq&>&p=2HJw;_?XAUqKSy4%X3M+eh+=}KJ zq+43JQ(DlpkzfI>Jyg9o1n)XTI>R)FNYb=`yCp5g{IkTkaoNsC_CYE<@49(o4LyEM zoN=LmcKOmF+azSyE$IzlXoV8?X=M;Y#KI0!e}JOeXmA zWfv)5+w{%i#Z?u-$Y?Dg3EJj5c!%mQG>c>?OjrX+#Fd7lBJKcypnVDWbr9v(bVc_d znsXo>9-3=ofN@cV!@^buapZqSW?|#QJu#>eRt={@<=E4hr3ap5{@E3{iiaWR>O}x; z(xoYW(VtP@&@o1m9=zZw6#?QRsIeD>6@d;niW?gZ0iyHVs#E-uEHYNpAnn1AmwN0`65l~U{VCNjyrv5l=EePTzX=efs}En6?jQykD(f%#>QpnIy4 zXWl>9_ifsZ zXaZ)aNkC?!YnL#hb9ULV*IE=4tZ)zwt+g9AZp@F4MYP}m;#@5Y(i&zsus?tClfLP8 zr~JOz;_C*~OHm9eid=@f0H$4BBO47$DF!LIwgzLVsSv^(W{~#O7oUH=u=N4>Vpcdt zVyo+dHauVJWXyH}-)35S9>#tRT#Wuk0{aY)ei=^Iu{6C%rUigbfA`2}^QA&;5kR#E z1gv^@?;nR39wxK)6pDk99n!XD2?y#c_TdoVy{Rtyi=#y&=RS;4%nBzhQB5j-B^vj>ufB$mEfivp^l6hBC-OIICtxQ z`ck#(Cp~eTZ&jHeN_i61K}w7Es?g77bC!lMIL96tAVe&18SXsW+H(x_elA%&3*KZaNa8jr9lpN z0WGH5l`OSrsz5OQ46AAsfX2gbU*!TB%4{{JF!eC@%hbZz<)!?`NfBp^D$br)N4_Yp z&`{d;cW%9(T4$%DDZ1r#saqJO3xYX60GFj!+jaZiI2<`b`#m7elO-22Ri1GtIPYVE zKEu*6gj|_6fmpfu(2u~mZjiT&;&!+sf;cTlQ#z9CDEy9^=jh6c01LoF zOFSbAivnh5Vy6!lsjD_?$^u>|=JFa|pA?v|tZVxRQhnqh{AOb=4Q5~Kq`7Q2!>1!u zJsmPGr+jr*ZWYxqQ3-aYHW%s@ZJifM^OlMW*&9&mbWLCWCs-Vf&*^QPY{hv) zft{ywjLSmKwliY~Q4`n%tu?V4Q9|7Nd>6ie>&XT%sO)317n+D`s6#v(E#1;WzV$qF zlk*%dt1`^#y=vpL`s9w?NcXjR2X@eJKX^S{KP#?z|GWQD$Ip>GSSw%kGB+{tJ;+d) zHuZ=#^H@YP$63H$#&;LtN?tzfW{N7)Dcj&78L@+fAj3J+ll@`ZWP$+~Rlg(R3ezUp z+9AQgqx%Sa`)3-jVkC+-ChO$MdF)5{W(I*2Sr}ug569`Eg14#R=j`KN4guc;Sq|x1 z$Yq?-!aOY`iU>z`9+%g45H}IPU$uAN`|RV8ezDl$@ZR zhTdo$FM-wv#h}EfeE@GFOapf4Q;?(%Hso`_;T1`La&{n9hGUdI9g0yvy}{IgYAso4 zE$M%3kM}Z=us-<^mf>(*l4!1om_J5y;d1I0(*k^1X#HZz*ar@rr{TT&%`I+vqp#;| zdkzhrnO}W->6ah8=C-_3_&ESp91uXnLEb|8ZcKDYrHK|nzUANLggrtiy$6#Aibq5YMp$>)G-Vqg z>0pH;{>7k!m{JG;k{j&o`d-N`n>%Z+usxeO&6i=n{rKaL`Lr>#Q1 z!%4^r4OoLnpmjl6M3JZotwadqw0YIdC9B^Zir;Lu61Apg@4o{0fLA5 z+&L8uS9LHBjB0!*sY%YaxM1_I2oPL5PA#NSQYp#q@_~Xqb-yuHP@LD;fR3@zX~fceon)&fq{+nWpd#BKzrLdOj7!bB=U zSORaDU`m`3ZDRkmmc6u@keH5|I)Ec<0Dypq zCI~(w3>xuJ@PruqxM&jock;T#Cl^E;BHb3py^6qNKjCT=Gy9DH0$x(8M*Myu5}wiK29{KLdILR1FXB?#I3mZaIj93@7C zOhK*=Bhyo2QD%pLtXr#>WN`x}svvCJaRP$l57{?DYF4cQb^sky>MZIEVWMW&yjX{o zd?VhBY&qhC9WPTu)b-v3tUy7>B!wQM18FTf#V#|3UO)Xc+RIF@?b1(la^5U|=@3_t zYwXZV;sg90wYW{C7r0Co;h;;6F0(I#{!rMQ*0WF!+Du~1AQySEh1Cun6s@>ehwot7 z)UJaw{&@@tQd#PlXWY6?Vt(&mt+cUw--JY6=tz zN-R+4K3$%O5*RC?@jt+azq7U4v*pQWuyaP6&9IpY`#GM~1Vh()daIs2`p3`;rV89% z2Ua4`f{=AS%At*@4B=$}GYcK4I)v)5z2G+}`YMn&qv>R=?C6V=WVrG?-bfV41#-y3 zy1I-O6?X3@`&9YAj__0*;fSRKK?PzLAo7PMh64httH&(pUEbT1fWsn4Y6DjxWjQ%H z(M=<52-FXU!Hdv7fNWf_3|a>*vib)uNK`C|(LvXP7l<8IWzP!E7k?b#`D5u`a21y5 z2O{Thgc1t?P6VEUKNNs90h81&_6>3*P3)Hlar;nLYW6EFLEc{*pS$IzsE!Pg+da_` z)-_017fOA}{tEiz;c36|&*TyvKz6}04bTjb@r_J00r)d;EdqHqSbj*63p1*=m=9(I z`v75E8=U5cme1^}KL7L2R~Q6{z-g3xdoc@R$FX(JZd$uezT*D;-50X|@@1%P;SZ(% zvw_V1V2e?d&}cnm0^}+Jes8B67)EO1K^!4#2;|pS9m&0(arn`XpP)Lp7lBdm;V^t9 z{cerroRfFZ3$^l(mi~s|Ofp^41Ysaj!URbmL6)yyTckaBKqW8vC0L_DaRJ7|ULqj? z@eabgnQXd^4xM+K_$4BtunuVO(LnqAD_AO698b=^0**m{ckH5BSjpE|WsNa*g7 zdGFT%_IvqvIL+=C`u?L_{kzy?#|OXp-?Y^)|79%S*QjHte~f|pK=3etMp5*an`C0h z8!eJ;ph&`sX#2%04F?5i1sM%1oWddSEy17P z(7Rrw?D*iOdhe)0|0`1+4Z$Ei)gC>iwU z!}=Tp0^68{fIil*|DGPGLe#9{pAN(2y+aD2mRSySN+I-G#0+6Y-Xe4fBZW9gfOPO~ zGZfm){@~%?pA{)CAwX3H-|E^P;{0hB?p*``5MXL5Ag$F9=_TzN??QY<#xz=3XrW`i z-e(N1?RSY&>M;>I6Se)&l$+Yw8NKM)_`xR7YA|~mP#x-R&;zkTF$jzAqAnohHv63% z94f?0x8FiqVML~^v-;9I*bOC@hpo}A-u^r49R z%i@XK8^wc)VcwR;h=PRw6z_UEysztrgbx2@>;y>IF_H{51co6<`XQa!SVasy*^Q8r zyN9-+;10 zbnX1N!s~;be@b$Vm$FA)N|8<+w zx1D#A*D`xnD_#7q&eCVMag~bTzN2mpuO5rfdm9%O_0!oEn^xSr9OP{kH#YHWPwi)6 z`s=OY3fsj#{#813#3gh@hUcD1+?YXW5GJKmdm+x;V%bu*kvN8Wth$j=)8OdTtMh*3AaMyx?40hnIIDCsxXyN$&NL!%J z!7&aew!<%10#;UI!`z6xWv8{SzMtz{Tzo}~fizl6o!aqL?xFkjn`Xy`ek$lNeJn@n zB33uajY}8uNKRfB(oecqzppk{*0tw~v{!J?6sp>Ll?zYpkdw9Ezg&2*F;3sL=l<^A zAhA3*nUnx3*6O~yzrX0wsdM)Ax--EAiZk8TDi--2sYU>TSaqK7_=^*DvQnHg@;%9Ze5OxPeFS5R z5+0aL;b2YT*-&(x$EI80c8;<^Oa$Pzi{8^dKbt4AYP~v1WdRhu_byrYLh6A18K^yks?Qb5oU1YZia&Z;k*6Mwe zhEf4%widKPAu8k`UZ?7>BqStwwjMbslGehyivWAl0sMk5rn2&M`sLkB*M6 zKgyrDm1r5iWquLxAJcW|Yd!$RG+pH}!*VFh>hs8U~l-))7IYF-52s zD$XCqC#&_bfzN?oi0%4%g&K-Dv z%Cl8=nOB-G%7_dN4Fzoaeg~X9GmGU?!8-BCI9b!(Li4s&B6ldH(?z@TnwrKEkxmDN ze8d$dY-wTfBy&Z3E1#g8UqL~EecoZ`mFF%5jJ(P`*4HRoSsB~EmRIpf?{$1nQd(b0 zX3j(*a`{A)eAbpoT-eU;XCZ>kwLqlm|Wo{$}}rs~)<+p&-adW!pyb zfxd|TYkl-9={W{H!n%(kBhB1${LKsXJkILjonWPLsuW{=ZI!-*GI0gi<>^?KF%qdl zFfL;DEl=EA*77kuJUp=7z_k96U*@O5Azg!iX`tNd$xXm|s$Y+yjwLT%A;Zk$K}OtM z3RXJ&OV#W8jBAo_U&gJDJZ!XbRntV4QG!Xn_Of;hH$=B{lZk5lOYI@QHbVi&Bj9f_$I2C&KNPmcGnxivsT3Bo}WlzGy~NFAPjx55+(K{%u6bmHW&*?}p{b5x(w@Y0o>s7HH`Df&tSw!7G4F2xlKxXWSa zuj2LcYA?mNSq~3=#8n%8`t<1{x1*kxI$9A4^LvG=uSzUuxrmmiBE``#Xjng;Un*(& zbB5b`dL$LiOVXPkL9kyA$brT&+WEf61`5Aqg2M}d4-qDM|Ir(u+<>kz^)dtyj;({XO^vEs)4fZH6boinn23%O=kg zoT(c&KSs#e+vYx=&H2^RTd=LQHI#WFXu#eEBvvo8WFn&go#DRQzUI6ie?FA7NCs1{>BZ2`?! zf2=|od!IM2Z?CGXY>kwkI*f|pe&Z;D4xZEUmwEX7 zL1%PKpi^r%3a#wnTVwV8{B|54||~e@sSoPYcLa2qvcq zTy|@aq9p#hSO#9d>ya50t@GDs0pPOjuP0(2IExAyfFlKILvW%5Cup-QW^&ifxop$g z5C13vgH-!CYMenaJOT<=hE7HTC_@nc$c9ek3CdtCZ|VoO0nSkL1`uzI{Mp&esqDE zB4l=j0IDA`p=N3fhwJYB`IL|#Ptc*Y6lX=tfiAQ#x`t?2S5UhB{`6FA?Vl8_99mm2G znN%7w$@NT%42b^>CK-Dd1xg4q?F%+WS|2HyFivfBwGxX!P_U4FS01t4KFGTynElXI znM0;rW0TrY4zbXp5}xhHkFZ+MRAZSPMmOolxtpYB5Azl#YqfLz(GE~EtQNE`D6=kH zmGz-Vim9Z8)eE!s>Gp3BVbWXnghn zXOSN~@TUmGT``XC>>5>nUF((2vnt9wJO`nLRn zca_Y#0X(%<;MS_Bu9oTOU%?%_Dg(GuA?V59fJhB+&@ac(BCKt;df+>PD&b$Ivk*!050$p-?2q zfY25T(RWYJCgHD_=2b;?m|hitcb;MSL@rtVVifD}RfhBn?|CV1W;Aei%?+GtO9(}Z znHxfUvA-EVy!@==;83XCc>72muXE97yH{Ufq`f@7N^&dgrw1ANpKJ)eH}y9}jwJQU z2z_e|78gvX0ZhK|xHu7zUrN|DBB9PUgWOUfO|T%Yr9cV!5WP&*|1u?bJFk?oz23}$ z5`Cu{Qy%EgEUm137_c48$Ka}fjPd*`sW^5&qWCwQCQu1-=-FF)?xJ0?<1;K+k+>%~)+!cFI#1 z+;$b@1}w&IUPn#JIU$x=2YcTFbT);cnlZ&{f#iCuRn#j0-y~6`gln;oNQrtQ4c8j? zk&LEdN?ehC!KXqD+h-h+n|^!A964gKicF6$;7b`Kv}1)v(=58l7+t&RS_gX@WYGV1 zxCF_pde&@x6~YegATT}!$EycDpVfcQpyX>GIvJotS!8$b3Hr*%s&rE)|wJ}$0=>Js>KOU?|c z&LruYmNIH!&?ANAV3!ITL#kvsuQ1`n zEH)y>{!xLuO6XK6-7DIsk9QQ3xwdW{vjKr3IhYNp+5x_z*_ zMl5u)WdqXhP(pzs(iwBdN9L!756w(?&4^0XAK8g974&#|vR$649YSX?Je`m^;?weI zDXCYFE}R{wI6{)kIze|icpLFxi*Fbgm5W23nRI(*x>uiI`3iAOJP}-!tPD1B%+C-5{0BO0S3DVM{UXE0m=o z_6xPaA%zTX7Z*xpzr8>iKk@Q%6E&rh z#Te!;aA;3N>ydwF0{f!af7lp0MlBx?of+_IyQ*iKk33xr?#!Cn+HGeA>*@x;rog&?6m27NZ??WapHZtY0ALEc}v3g3wgfKKQ$Wy@DA@{9gG`Fr{yt^ zc{UwXC;As9xhB9Z9zrH;am1J7J zL!@%hRn*s8;V7PuJv{><#-?52Z66PE?n_xFPG*ua1`zcvc!xkG3;&YzJr*JZntb|h zJ;_ng(FKHEU~sjA1Svs(Z}3+;Pq)OTHrBq#1U6ufl*I+|7Kdm1c2g!+g3EHK{{D<9 zvfARA9h|QUG`kUe;WmrRv%5qwi2GG4@z0`{{Pk*@YpHiD3yuy%2(Jb)n@0KdZWbjU~_-3xs-f zD}Cc`?8bPFp$m&o+Mg2@VQYwUV?JUOixC-YU+{4|{@6H{ujk-GpG+7FrwR$$rfYGT%OOp9N>-sRqyR`BWZiDZTjBFw1Sdm zT8k)ICG{{g2F1?V10k#JpL|s(L8`MLTK*LQIy7PRuVUn*JPE}NF-vW5QbH5Hx%=SdY}>q^z%hhWc{bD26s!K%olgC0X+;fp?>R`d>( zBJX#=#Y*sIpz4E)%$EvipC+#v#hXvf$=VN`bFOhCHL6;Umj45tTIId(5w(Kdy?Z1w+ zL@@5b*C?}uoM&UB#b>yW?Ws9aJ=@{TD~(=i!av7XLr>dOwASTq9d+?!Vw2SV`SJwjXSNn`Yaz6nKBXbW;2y}sk96^jT>JL7zX=$-5u2w5L8Mz^Y@^r z`_@REF!vlfdMPzXUn{qjtihoh+vlyK;k5H8n2rASPct4hU|@pa*7rSJi&Bf>2YJ0g_aJkhsful^yU! z%*PYCJK~PI#|0>#Qu3r(5Ee343uZEHp2`Z}nD@BgkHh-SFJIzP#9$(0O2{E4RzN9A zP)TJymDxos0j&mvM?GF;7YLUt?E#PkO@@~^icw}yDY#ep9){-(y z3U|=Hd_|dM1wkf046ufcEos2UQHs`>Et6}?Np5NilK1jN7ef%t1MI$Y#r=;^;}1`1 z%1f|P{;GGpDsv9e`?)UbN=sJGCE=mcM*vJ6%9$Hlv5|ZM{1eG$+S5lD-^a_nOe5{pS-~wwUOcql zP4|BuW=$~s8UiG&v}sWZvQ6IzXn69W0~(hvGty84ZGj^3_FKdT6Et!XItRytR0D@u zpitV^`b??4E4KKs#~IPufzGriD1=iR-$HB7U;=>1%9ns1-j5OFC^Lc{Q?pymeSMQP zxb4LSi@3_JN{EY_G@cvK$cPs36Fz4COG@l2vQMd@VIVptt^h61 z!7W)M`-~=Pt0cGCVYDdsC0^Xtc_-Mp(S_m|Bw)VC6t{uZEkqF2K)gT+umyv}D$bw% z@W5o4#mG;2z|U3E#cH5U-~4}65Nc6Dc(Cqw1;c^91znz#&3xvlztVt&ZNY*^A%RMp z^(w*Fb}p7S$uOH8JN7l5dg**e9)2W^?e+$Mz}Zxe5&x6@=9H*)MQjypb^i*%KIwu) z%Z@tV($mvv(yv<09+;UUfw(M-H7Fqz@9&>1GXi1J@S+}dLpIyI9nSgG6-r{wNChv`HcR9L zFrs2&3X#D0QN4x->fnT|HMTb2K?5|_d!STenVe5cp8WJtnhI2TBL&zC(=W;`O|kQu zbj#(PidRu$&c5KBr9L|8meG@<$K<~5g`J+50Z>J1hsYo~277ut+-2^qbc_BUhf4L4 z?KMp_j`V|C^|zc&JLwFjm6+V2BtCG!fNE))nQE$leNZ&zoU^amWR!HG8Qlrp$yJ7F zXr6OGJBvBc!Xi}d0|*zsoq(R^UOnpcP1D1vV%Tt|9whD<%G6LGc`e{cJfZRRCsqtj zxW+42u2gh%gM~>S9Hd%v$;_fvr3}kP;!#};hwPouy{q67Ppo&g|3FZLh{V180f$DvA zW;M&9M0x`pUfK!&x}63QQX$aHtKcNt@Wq0rpU&NKE^o;Q+&YjH&@dMPJSrlrCf;)> zhAPMQi`luholD`0F#iS1_1Zki;1STy8nhlVwv~*8WML81E;%$9Io}zZU zr=I)nYq%^NP?ovF@7@6u*Y>Roof1+u#bunZMQ;o%rc_%`V;JlC?CZs^C~l;&_JIrA z2K*}p?J_;10OKpYi_3$ar`DZ{aDgP;lumWTG|w)CnAN3eSm#%#R&R=Os42y%?H zv9=&FU^MRiKA}ZWc@5mZJjx0bSZd^HMbZrQ9^FJN2f-p zBJ}N&|B{DSg3@Q0Q(?s$bzh1xJtyj#+Pr4^WT16|=fNlh7#x3jsue0Et}+qdB+PLn$R^eIzURR<^27UQ zr5X6wFoPPAIo!j0$rg+@ss~^@qzuO-AL6dDAP70*d3!I9!6^{4M=BGr{h^TIngae^ z6{53m-xZqwLhO&tTdw~KXjb;%pT9!?GLY8I0RMsE4w%JD{ZSjC zv<&;MXbCJ`IJV!$<~f|+R|*Igk~TtIvduU-Sr@826kr9sXSN6~U065R9)=}gW+-P` z#}Idn%4Gtghr!q%!?kG^&Q4LAhbW}78wUW%1OqXof+-Q(1DFF6A^2Mi`SzA|n7bAM zguu|!!Cv(-VG=-c`C97Yy73C*eGLwQD$abL4ebDEYP%n`g9u7|`gX*5nh4AMEq~)m z+O=rWq5ye2!)-l@M$tWQYPwghTqWk!IICk=7t{4rk!XZ<6RR>op3@N4oLe|j6xp>c zW~r$Pa_X{v)Fx?krf7|Y8OSDpQ88)Ox59g1oU0SZt-MT0@{u;dL20y=5eZIYjb`j~~?91gUTS!17wk-4@ zgoy4ao9Y>BlgbIeVso3)7g^V1JQ_IFq`kmfp~05I)TPn@*}i>^slScAEh;*8&^ydI zXxLuL6-h&*TneWhe+I$071fBUV(4D}&pBQ*FJg{`4KNzd@H*`m5D-vIp2eHl=WUAS z*;VIfdiH}NW`#q?ng+^!0DxE#{LCca{EV6ejF_bLxRmLa zOT73i)j8*^Ah?;6z(6&QN5;`1F2a2-+w}(>;$0IN_vtJMV@>hIh*YGp*yG5-+=9%tknij?;HziO{?JM{Y&OVHD%0_`kmEwE%r}uc84I|q3oLl5$WmU^V%Izu7EH3 zQ=Dlh#f9#7C4@F1vHEjdlSb^LaDn8st|ge>M#T(3sXrt8*Bzhh>cT2ZMRw>J$y!b} zgS&#w>||>u(i3TyJ?y=9htn6^BCMRO`1c@AYcvmVc=J8zv-P-peDg=#PL-C2sSF;KFt>#nAbom=TM zn;E>}Kn`@gY;>DAI?y#N~h-!&t7nZeNtmuVQj?ll~G0|_@amisJI1zIog&O%qRNvXpNxWEeeW?AOYmR2pAjDO* zHzRf5)ehOiW{iQ|Dv0aBkSEmH*Vw#D@xr;pR{ggjt@uD6M0+)_M<-;whF?E5t^@Cz zWKY2fqPj`r+(^(fy1iR_RE$A#1GvNb7n$xFBv*c8(Y=ff_>Q{4h739 zr5Oz(N`lPQO{GEe(PW$&5LHMC^C@o~s8zP}n{_m9O47+&2|sINkVO zhxWktDeip)``+^v#z~y2{`+kg3CZjcw;Z%xW_VpD$g1aM>5kvUL!1*=1O+@wD%6FO z4SdJhHzP?1>kYHfEoZjCu~9};=p#~w${9*hNRM!TfBiuslmMgb?^Lq$i%sik`d##9 zip@pukyn$?;-VEzWvX@Wtvf8l!0icT-=6fAnU=_9mP<0+mY>VLogw+@n(YsY<|5mm z;&?|Nq<;2rx-o}lU)dPr0`YLwcDs$IN`K=_o0Gs?*n$)u^WZBK2QFZ%-7QU%ZBoR@ z5gm${AYx2PSG=m_h@c|;n}o*zWXFb6b3(9qp__zFuCohHkgZf4mVmxq4Y6=Ux;67b z4$MuE#^E$$XwM@#uds(&A+9Wf46}*)gqlbZd#=B5DWt4oBB4f`y$J z$+28f&nwX2WN7*~nAuL?xv0s3TnvDf{m0E-GTJ&i{*3=rf|b(Ez7zJf=(M-5|_ioV!`LX;-BBv#dO4$#M^C9oyo5(h?|1-h-%B5}?`wWtf z*`qF$ydVm22{OWxzxoUb0O$c#3;@1kU{KJkVuvA&4w349svOW!wEe<`3nT>pQzho|n8&;Y=zo#?jEr;U7T5d4rq zX+_UzL+Gw@ud91GaPk<;7D=0je;-j-EG~VScc|&($s8<(hKSr`gvf8pg zs7GTeyXOiTy6GwB=LAdm;eWYVK_@GDgEq3gEwjr59XaS;<*Cv`_a9WQh=pRmlZ@(S zU$sGi?yy1J)!)5cP4@$31eg1!56;qixyqax2ZsndrSU;mmBq_Bxep!vz}EloeXQCN zo>vwkIg|Z_grH`p3TiwR$-GaX|6*tZ#4n&p2x{ zSCVeTa(XZDw|kYO9Tg7r!nP9YnP6)B#QkJ_M$X0B-N{-EufoVpLVE?Jj`VA98PtvU*c#}P#mQEham)Y(FvUixt!_7wY!ax6mtE^2N6w!31OGi?vXB5cjL@NvHqDhwl zQrl?yr~M3+o!dL1jy7%>sT0kCWQ>{M{>u+{;W+-R8{7C*y>==4X4QGwnT#e0 zS-BV8iGH%FYyE6Ew=GQf50sCd^kxWSrWu{?`7}F^#)}U{!iEm~T zb|WYd17pG(v2B%m)18hJyX76WkulM;TjV$6u1 zN^YnV2OpHadD&4NQMf#0#@}lq^{!s_c4m2Bi%*5=C0WJ5OGEE8(8Xo|s+p`pkqjYt zJXoy^6*E`C=9htx3YbDw+ud-~ibO2JnbxgRt){09)(?-ouOnXwnASxHa$B9%+6#8`0(oZrx zz#~S}{r<#5CT0U4wy-nwdU2v&+*0$ou+jB`&Q5oLWff2ur5?YK9(i4nb&PM*mu}Gc z)~yxr+BtWRsx5z?*Q7SyC)Aw~+_?14=%Qe0F|eRTB%=5k`@CGIH!l$waBh+`Bu6@7qV3<^AU(b1YdU68f#G`aa zti-1c+>g?icP-goHW-7QeHr;7_&R%J$vT=Xl>iP-CQrK$RrbaDm^|b$4>yLkZA`sZ zSmt-LwY(=aazB6ONcjGusTKP7ehE%A^putv>6tjuCBA&2OaF=-3s)O($(3#UDv|MS$`Qqo6&Z9}(pHeiEf+fq|;H{fyru$~D13QgC^~$$(2GmAA zh8C?JggggqHr_?8BlTW0Q>4^F+k7GfCvBh1)%bTRd*2*hfh+NT-uH-GwEpIpj-iVw zt3{@1Oogp)gn`;sqCk*>s*+H=WmVJYE~W!#nNvthuH}{MA^mPyHb%7!aqX-Ybi38O z80ARB6Wcg%^=>PBck&Z|5{6$~vS#~MWJa5<72!3R)K~aMm}^~QqtyFrI`se z@=r#qNbakle{#~(rje-cqlUoq&p_ zp{mnOw%FHbkSq}5)F_>?X!W(JybDS*6)F>D64T?O%`Vf|1v z9bUmsc@E?daK|Lp({6C|0||OBskYW>kF@xV720_wHdf1kp{zs`T<2X~-Ry(k$c@F? zdAwMusvHuy0j;547jn-Hl-s1Xw#xKA`9!Mzvd6AVA}&xTd0!3NX=)n1Os-~E&&@Zw zodyTHx#n@bdD&Pu-;CWDRldsKtaASz+@PHAy18~1mT5bkAWicJqIMFUR>@4GxLC)e zlE(hGc_FzPZ5VnlDe(OY z*;$f}8=sf^Fg7k8dW`s39546wXGVVdGC?FR)6ijk2ASbvbh}@0Y_iLl#Flf6{Qmje z37xU2>WLG*5!{5%+w3t-n`k02fDlLEauqpj6yqsS&GD9owqr*NA*GxWtR_KLtC3Yen^!ljdMb9iBvuh&=o->Kk7 ziZF$}V(#4CYwGA&Ui~1qJ$vl+t$?AS_zg@7Q9tob-FfyF-yNoW7+q@b!+uV~{5B72 z{6#Y?YjQ+n6YWb39g01K(B`-+0n!d;XLD~F9K>e}4x+<{5dXvw@b;#DF4t-qL$ zrxG@VhB;LlWD12kBkl6-Hz0lSLXL6wKoWmn+yOronZDqLj~5qBH$RS;IUH$&onkJo z^z-j~aI0giI%jfS14vZ$#~3IFzi^Uy;e9(V`PT0CNC0u)7Y9xbUD=&5cxpvLm)wN= z>y`2K9?oXL?)EFS<5l1^BK^`iM=8fgrS>|>v%-`uJ~)y8YJS@cyOvJ3Np<;=q}gIk zU-p2?w(bYbrxyqa1g?oQNa@~L72rHjoYn)$`}Ym$$L>Vso7<(WRz`nzyv0v)**gE!&8`Ya}D*L8G;+O>`*-j7|-1Y2= z-TpQ)G~#85GmB9v(|fIVYGMPkceU91(#ftIR9Q#*vTCe;Ln=CerKnCtRw8%DS5bGK z+&d>q_k*5OU|fa6lgpaUozF5wX*wSGgqDJX88s85&50v#^Wvpalx%D~hkjONFrUZn zzh7h3Ami!YsGGTUL9mCD;n<~_Cz#TTfw1rLR(5l)jNb0ilk75HYbE76S>dxG(zq&) z@4;iG)TU`lqR-~md;KzhRgrX8tACDjvN+3r;av>5w{xOHDqoq2P{rB!~~*ew@T`GQgsB#r3Qkq)b>nV23VtH}MMAN)w%WB-7-C6BcEWzj6o9-tlHW2IpTxqJ?yd!C# zq)u=>Zd=f%(Rv3l{3m7A`{Z!N7W>+y;x!AnB&qQV-!Tw#WkYy+LPGt~TEEke9|s?8 zk9E?SAJ$Dshqsx|KHUK1&f)jp?rfKPqdMbDK`pe@OAdKe6c5K9KS{sR&kvPunF_l! zsB74qGx{A>TDey7tv`p(Z09yFEjfo)!`!a;cBLiqnfU`(w~y$r$?1&lzE>zB-5)xW z#`DE2tB@o_HSacqH7N5<%-e4F<1dfk0@;BrR#iF0f4h7H7o8i>ZGG;I_n17-<&u-K7>q}nsnxpl7=f%pW zn)yVThFlisKsuaWB+uYpA&gAH$?B4!n@I_uxuD12^w4#38*W^1O<8x!1GnJ7O3tj2 zevOW9C8&eemC6NGIyy4%UkgM+wdU8;re!)gfrtnGGGC+726!fpTEx`x+~`-SC|DeO z|Hfs#^^>)WV;*WB77}M?WZCnEl?4hyZclYoChANdYYO$HD!^hhd(8Bhy-91Q|7@Sv zU#Yll+LClQNMfX0$FOEx4P%7uZxWqQznQCgNA4lxl8Mw$qI$!QRR=FzbZT;RP;l8W z9ctek(01TnbZ4|?O$2eSwX*W%@8xO#1Ah}8Hu$8_@jmgymd#h#+HV>qc-8)s?vjP^ z%2nLDDem7{j>vq;{0?3%N^XOl0W~9~>f^IKydt|-uQu>d`L#nTQ)4Hi;(GV@iub78 zcw-wUV{QQ@V%javA-QaRBDgbYpH?NWx^7SCN8d^J-t|&3jhHbNk9_?2biLzHRZrZUaiGHI^hTt!jsiVfK-+R&rW*fy@!qf+ zY?#KDHBF+Xb@s!=0f49e9^#xBBC^sT{aDSxOhE0Xd+@-^?B5|LreZGrErYO%#Vo)x z-_G`3nLXD(V$ARUtpMEwaqvMT!CBXDErJG00S67`p->g}Df+g-1VBlrnHK*~6k{NDwkAE;~^nCVlBd zsC<|f=_knJMR`pQ>SqV2j3tcmEVvXq$Yg=6V@R6?QiR%-`Ljv$O$`#0b3sPHQd z@>iMY_P3N7l!!qHEDDtpg?p8%V{-JO*h}$zrq9bRx6kzZcPCl;(Ip^0Pp&Ys-e;7P z>KOU->RW-ppO?(XqE&m5biSW#Y8^dSvCd|(yhp21a5kjj_gqp?PoJ&Jic-Y*qQOQ?dNOnV|z%SWP z%tEVH>t&)Q-@gUE+HeQ!odN($<7}{eNP&hxC*=5|SU?MTD0!h;`< zKV=Ka?uB}`U5C2^U~(LZ>RD-F;h&tQCLg)G`U)VnQYU2EwsS{S2 zLnuRg(0JNFN(jLfB1NI`Y3oIsT+CIbDnb*eU|K83Rd;9ZE7+F~Ya*;3!+cUASrB$x zb#{rhyEPS-w}-0UzrWe*+NPH~xkD4Pr-yDQw99*r71oS!8$CIPMjpUb#XwL7Lffwg zJxy(RzFKs?x9~IDIW`T?AaDq#SLYw}7aV(CpLY1}gy%%7&dHD0F|Ug5zpMqhd?0Y) z*JEAUaYewwu^~ezdTwxg>Hk#wZl8eis{sZ!<-E=&V^;B|kq}$-#xL*|{xDN`c&5wY z^$pb4J|_OPwL{lurrzcx+m7{z@88SPbi!6X7kQi9^bJBfRYDqrG?AMGORw9zNP%rf z$a(Y%f8rGC9HMI1KOFx zwZq(>KGb$lB0VVNM`8aN^4O@GF22O*=ywEpNbz}YUDTK9!{9m67Qh2n1Sd}-JSVfH zpv|WH&0wO$=6QRI&*Q~pL3H$|Yvkn|z}++Qi{NR3|7s2z0%_Kk>E*MH z?GRK?@H#a}EC_bgzBn@!KQIP_{u4-8hyNvcw`0^8xcZs6eKq%u$5%V;U!48==47*f zPNr-#a)dV@8G=RLD?eOK43<{Nw$|S|Dm2{s^Mf31zcV#G4Q=HZABMUT_$g8O5&aTq zSch!qH+ekt?pem`Z8Pga;_Y;>52iPbd3-sI)gsBsW0f1^t}t{{59OYTdup6B)t@`j ztHj#=R&<6H8K0Fh_zhO+QH259Z0S^wx92wA)$?C0Au-l1oZ7N>_nNfaE{s>{nN9F< z8L?e7Q_DAfnNf4!ZKQkiF^i3B=c#=?JH&hTVuHjpx{F~-){3XJzdYl=`7_+spyshv zwohy#uLmBwaamK-q&jy+BxtqF>4)=_9b6alurL(u+pleQX)~IEgUl(nzGB0*yBN$i z;ELC?hil!tfBd2xJf1qxE%V-y93nwkCQs(C2zNch8#tBx(Hl}YcrO{MLdWg^C@e!b z=Li|y`!c4hlSXX?O61k3;)$==UDSdAWPfR>pPW&Q)+2 zU{*x>Oa6o z0;Q9kI!qQ2pHH_>bjL11xO;fUbtJ^O{QEp!sqJs1QaklBtaS(GV}e1?Lj~?VEyD+I zAK%UcwV|3PVvzEBGH0?bhr$Gt<+&j=7)aQ@a(BG@ZeJ5x1rfHC`*toVR)MWfB^GL; zg2j>7v%+2Tcq_8JX4sTvOMurD`r=pgW=XkS_}3MNi!*de$MSgpuj<}Btmpju|9@eI z83r?Bo3Uj~#-1gzR(8q|p;RhMLK~q%>zH9?Y$Z#QWXX~wN(-rsVY0N4_C=W@T9Hbn z`rXcpd4JxY@8|Qret-O~>-W2UUDq(L)a&(pKAw-qIrnov=iHBucT`{Z8|x~>?9{hh z@3TDRyXj%ov$ax}edwgh=}7pue0S{sNj0zCow)@z$9miQ-&mDY3?y>hjQi7#X zPwt1{joTK5Ha+dUwW7289Pi$B56mq~XaN~3`%3Z3NzTdeHx0CCon3f346m@*z_3I; z(>&mKyS~wPtkX1W7JCOadbG-}ejM@Py(Sz#KDhg$@PxkE ztFkVC(NJpmc+I<48)qxgzLw};UCtH@mN>q?c+5pJ{|lV#TU2qo4fMZyrz|TU2!Eo! zqgGws7yOrWbP}}Qo?{xeKfm@zmvY?hJ6{hZ=3HA3Awh=VexK8(7tWubLwU!wRf?;h zVPl=~&|AQLEl0#y4Ag7k8+8;ukw>&(ifF+UBkf?Iq>k z79B!-WvWf+N0uaV@hBb4p*lqweGYc|uPoW4{2vtFQfT7$^i1x0cac85v$8V2aC^t+ z$7Yo?8`o;LW^(yUNFw z69iwZ*SXhW1b+`t(v3;8_SD>1aBKo=qqqtlm3LI+F1)hzN5al)tFF#7#_E+f(kn0UdshGap3bV1qnH8cSlj3Fdi{lvPI{roW8a)> zYz}<;`IOy@OpSts0kHX4DCJRceyL63))tNL(q+{aojmEK65w{m^oCr+TJ?{lQ)IZ% zYfFZHC;%`?as9A9@-uKz)CElGuw+>G$Bg-e12<4|;N@{2pNrz^7hKxI)?+oghuPHc zN~?65;=K!{PoLTs>FVKC0cYNFIm$2PhI{yRw*!B5-Bg;Q@}q-80y33O?RPkB%AYWu zQ0ze;0Vwj-?{}WZ)wLfuGxtpNHPL2JnKMWpz$Et%pS=3aRJqKMDI(|c^jC|gr)aNA zRjYVp?U`6#WOD{fsE9Pm4`?~|E6o+}Yf#X!dUWbWjMWm!#R^H>=-JDs`J8|4N$d9v zwyb{7xyq#D=0T5PcT2Bd-<-3x9X6#N zGS^)lk-B1&mCc~y!=J}T#th+O2Q2)5oQ}OeSHg3J@&5Zf83Q^}+OF2W&}Uh9TCn@l z#}ZC8MfH4B(;<^K&xxW>t_i6irAy9=D06fhz;9M$c0!M)P4dK{u7u5Fk-By?o ztcMTBWMn<`x`S|%{3l__W6KokweL7?#rP^J@FNL5(kbVc+uJA>I7WW*ljGk(L5r#%c6RgHfegX3TRYad)a9S;{Hgo6l{3tuU!Lu3 zy6R4F=<}0wZIF>KP)`-UFuy!3M*{-ud4S>pB7u!~+ z6vw8%C}?Ob;@rA9zWc?|!eo8ff{*yLc6_myGA~9ND(PTvTu|VcNzoTw8!OHxw3~m# z%M-ab@8Xn9GrxGPH*>rL-;IlLXP;koeQ){V;jp8V>f*`jh1H#3IH49@5v$l9b~moP zv=4at=i@7LeQfuBMoBFkV8x8VT3ySp02=;wts9_o{GE@x*k#39OV>`Cn}5DCJpYf? zDPMgrMFKkBvuTT5LN{Xe`}!cDv1(PUVV>)&!mkHN4s+9z#Rg}A?(1yw{@zsJ^7c_; zLqp~PRiO0h+n#QVO93)h73~8a&OfM((iO(X<-Gxu7WLad-MTI3>Wzuk28-&|b!sOS z?Qvighw9m2&6CD%PM0WbRLvCT#_L!<OzQ+ZU8eggNT z&?~#@?c?ftq#t7c?4;u@YkWBF)h)}A*Z1?C)-+s6umrONm8K0II6vTc&H=qAl8PPl z{Un#jHvcn+^Hm0aXY7d2@O z479ix{9b8w3;#N}`T3srn@Z-#+_KM8NxAcv*_-jX+KoR}SHct}$Ggkd#l5~fQOhjva(^eYjJRG;oKu?W&-$3@Un*R_BmMlzjeRCrHNAVStMg;p z`VKuV7!<@A2G`4*Y?$t65Ot})U7XQOWS8o74=%X1|9fLbuzIH)+pk*xEIDM_p&>fn zapg};{m%ZlU)90v#?k>tH4dpZHQXC`lxnQ7gI(|0r!BwxWqsRE))sWujxPB;)KJf@ zRjbU}Ke?B}3oCDyI3y=-F1$H4;$=;wi_5zd*O0A~?-{>+;$c(sOZuMoXZp#7^ebEa z_H(s0`}#V#&)?1P(>__~$@<5Bj-_losjYH7K>xi?Gi$c&6aR4DoK3OBviWm$9S z;E_8nEPk{i@AYla-L@SUhwG_yvrZcn><2FQu-Vlr;AY4E>dpEi2Y)v;e)zIMI+f;~ zbW?*go!^$73%H)}+3&lq&k9gdYIehXUw`7Wv!yxex#Np|b9Ej)Ss$Oye>KMDr)GAT z8DVwEH;FrdBbZvz+_WUdz2!kpWJNob^Zd80RvMX?6@2sR^Spkei$;9fWnb?{H|G1N z4r_6}_jOa8#qX*?NsU!Q>B{_Zt>iNlg;s+$HC9}&YOIRbRy?N(8+*@=i;FYLCoAF6dvB``h)wyU9%aV65>BJ$aKX6#jq+^Fun6ov!aVte z1YTb(ze%o-ubTs2bv<(N&o0a5g+JLiK@{h%9 zzj*h_rO`aTvR6La<;O?=Jn-53MazdQ4(d0-DF0|Ly%}46IqaVD@x3~^tEklPAHmOg zO_zn(HTihgJI8k|x^_SPPCFz-YrhK(MrPl?^b=BCE*>+9r) zJs!-66b$Ea%+p3`P7I&#?D=Xd!_gUUxQv{|E+!p1K;&OQy=xoF3No09{*?7b`7`AA zZw~OPkiYwte6O4hdh<}s$+j+JA%f9uI@5E^+7mK!mIeZu2uZWepgXs3pGR8sKmYoQ zwk|*X@Ixu7gZ-*|`%E%mh-2k-(on|qhG!y>7|I~q3BgA>a140;B#g>3o}7z?a$L!r zfL@k2OWr(tAY*wC4PG>hQD?g0%~X-@S7pRcmGL-%I*JXxcd4@Du~qN?aPh8oM_L6i zall8F;n$6)$`<)B;Lbc;1IwjQT>2tgX*D)#efY4zl#c3!n<%gI4n*|EbhB&e8%^ih zW&i1?JFJ~T?B0QrQ)M!KPiNQe-7&>im>|FL(KFcULyXU5cGHn$14}LS z$G@+dJ*K3>%=_3yyX1~{0|V8Mrk(C&sNLbV%B1r{mRz`NAKmbBX2*NFX`UBnb-Xv; zYL3CFofZ-02GeP0IKy0r)eh>TXIffL8r<%A%#oso=z(W_cC#36H7Db~-B{Jr*_AuLeYJJ?_S${M6HX?0FR<&z{~B&VOh0 zb5HM@Fq4k=4h{8b^jRG}Ia{M(=Ap#WAM(8P%dGyG{ApP9fN^JAFI-laIHl?CL@t4g z@BQ&gvO{)yRa{!;nB=%{z+6j*E_QQ*#*RI);G}%o&|TI6gFn1~#Ca9ppBwf&@M>sG zy4N1d7`DwChS{kO{_eY{r^?Vm8m`m6Q>VD%E2x?t+}iO^hv5#h2DhXrqV>}{E54oD z@sFBe>f!#kUAI4&Zu+U$S7~2<_~tH~UoTFX_pfhEwG{KWZ0G>pWA3+-<6Ilnu4_4L zUEEdgq(n;|<-0x{xd|#CHdty23uknCR!5ywMtyC3R1@?7T{yR-9AzjQJ^I5Qzgz!t z^Jc|7*B1He06lzZ{V@^ya0-ySwY?qtdat z;q5x7e0r$6F$4^~`}4Hx8KwWPr+Z4nyVVYDE}fxw+z5e*y>9Pc&!%N~{@$KrQ)%4e zv$Y408M9avc1nMcdsL|X^xyCC(y>*Rhtu~)A4XsBG-8tL(1>WDd3Oovnuv9eXNkpO z@a^IBqz_l$_P<|!^yqPU9uiId+n2|&zXk?VQ}o}yJmM4n$Isd9`*-)D{PFDGZKP1< z-@n2pwEeH*`w#zMu?ffU?>__Tdj4;A_`^SWCE*0_e!}g|Q+~tZ%l~FKF0FAh%2)I| zmlL*ncjWqlOGNNv*FNkQ?ANAWyZV|;^9V+d)5-8-$DU&zie^yyyw+aRZ~ygIDFe{b z5b;_PyKrcc=RRX^IdR?9`f z8+j@YezW9AnbGX14ZL2s}lD0d5rY;TZTvs($Fwiv0}x<4TT=>o0`TlO->W- z52Gw&#oev!($w8H?Fw~q5QU{5uhV1b%(&@}w>kBC~nt?C;NPuUE!(cfa>s zOTxS~?^-zW&-0l;U681ucoC+Jf}Wt6s2VKn@^@1}AxWTfcPvDRl9l6$QT zH^roLO~y~Zq)s6nA)5Z?=H{`8d-NP@bx=7ajf`Fglg|BuZJ*pQdi3bgkY&-AjU#Pd zM%ba2SNiBQ+S4z-bEwE!^3_*g?eg@@BnNtmF@tM!BW)=ZyMO9%n<=$8OD{9d;kQ#G z+ju0-{O6V2)E#cBrIDKOv728k2r&wu_0vy3U0`rD0ym%M)6L1y^@FN=`)V3<7mP^^ z&b-@1$Gi+fXebX)NkcL9V#8;Ou-|;wy?A)Cr@G}H=1mrl-^b`WSyx0K7J6@fp7^aR zql{>PkJCLgk>)&($l3sHZEYD1%{(#h7*qQA87(+2>YHJ^G*i$4GS^Bn3SW@aLyJBLyLLpBAogioOx@e+wo zEK*Y-aK{??Q|~}7BKoICiHw!)ufsVyzkU058A#{UN0Im0$NkzbmjCe*8!NlF-}iT& z?!Nry)$e7-ysuW%+eHqgX*yb3TJnGbJNz^v;_`eyjd{OzR>?q4wFdc<3#hiptXKMq zPa?vzh<;!{)obc)7uDC->o7n3^t18%_&56yt)3pi!&p2vat2i}r+@qHvzfDISv|S2 z)w|d+L-pm|{ogkrt(6|A(?wtqEtYAdP(!HlH)Py+Pt9#tRR<$LXr}I@@1A^Mpyy#V z(+jU&y&5yN?UdvQ_ZGK#zN2S?d1PEOr6#BU{IzpT*`mgNR5SRJX+MLg>kFQ}AyTq3fl%Q9~#mD~2cqjuiDb7!!cT5`qD@7_I(ZMVVopF_N-Uv8@@&;RwR zf5Fpjw_27&tG&0sdLyA||K?_UlrNjF)AM*i#qHfx@2i-$(5BuAkLb|4>ejUF&@dx? zn#mmMQhY`P(C2O@fYNTw>y0p{&MHM^IKw7oSS>ps^nOX3 z4Gw*@T$VCDu(B#>tOX5o+erF5FgADQj2VM|_+b{{^w8$G0`HzZJ0qLQ#B#Y>@4R!J zt}@HqJ1_e$_rpCq&EDJd3`1M<=>hO%|n{!&!{Sp5t!@iNVILa7y<0!k3ZbJ>`OlpA zW8(w+-ZIRZLESxUAD}f5!4MT0_>0|kY`LuPa+SM7;CKl>c`+_SoGRM3X&nFjo(u4@ zV$~6beTb&OL3-wGWfXSZuwybtKCx?$9_NaSE!e8}ie3*wO_UMZ2F#t7iIlf*-yTyj zhmqP$5m|x+7SOsh$7PG++geI1f{?P5(ZWpEW%ljBSh|IE=giqqHl1X)vr__HlWU(fqSLodv%u8z(lw4Mg&u%ne|TE|=F`%)ugwz;-|+NQU(Ta@ztOr5`W zsiCB;M=*GTL+Pj(y&YiT`|CficDel8YhSAg4-v~;!N}?eT7J`!Z?#6R#e-Edp-iuW z6I$uWmf7U)R(Y)?(Z2U2#$CoF7QH^=n~j~YCC-eLom$P4e$qSP3V?W8tbhMo42fHh7~ENhswesUu8G~XXCvyE zaUaKeB>esN-+8Np>RgzTJ8amnL_lTnrQQ4XMIx8(S2VsBvmxdsMlE6=vR>J(+q9_w z5{*+24-5=Ue1BeXX*(*kCQJdTKnW)T9jsiQK?tY#c~Q|SWC2I2SH*4j&8Dd;$-Z|l z>R^r*A;y1?%K&rcFbgvO8D{Y6r=NUsgiphYoXM_23(o+kX`LGQdv0BQ_10GX26Cze zOL+Xx<2~!PhhFjcfqKWUo;cCZI_GN8%-OSZY4S@-v|+f*^YQyQeJR$BY5TH}&RDar zC<~Lm+OlMlOOSsY0izLeufs-^5c`Ds1! zZ{ytE-AzEGOIq?2qZ3-1>$ErZPQXK~Vz*anHWeSxJ6bdz?4%W1(WOh5q{NHJP8nQX zyRebpO)7U8&hFGu59fYO_V=GD1MW077SCZlF4AZQePU}(m0Y-Sgw?e_M%B3RyAePk zV+3rH_KZ00ajc()Mn=xnb*rd*ia@^l+IJ%c556~T+O&&QE3T`3y=8KD6_xdriC)2N zZ*dLsH|{Lcz4sL@We`h6JOkck1o{xeOEze;p5O>&vjhM{InENqGvqq8 zezw}&9(|vh(c~;ZiS11uA))9J-sCvtnwk>)dTQ1lkqDe&-obb!8J7+IE7LkVVaevr zSIyj;9Ac3!xnLH1J!Ud4{eqjD!e>N8r}vKtpaPFK!3)9q#_A@78#3|a&Avf+p+oWi-p~WEnQ&w zLxR1k4i4d@3@t{e=TA*NeSQ6-)ot!hbay*5_xR+kR~;*i89i-ITh-zzQ#@2f;CbSv z=X^5Za!?W}JHot#ouTxUC$ZD^Q>>Xoq_e)6oC~nq^ZsT)^_XpEI zD%J&IOI3gOibJ8Hp#r)YAP|J0iY!H7^;n|4sm!G{d}ge;nLZ`5&4z3L7#2gG$jxVX zxVwYi^x1Hn5MD8EUv%{NPusOKf276MNw11;xrBOj@RXlgdk1=W$e`qP?4Aq85f_Fm zK7ZoKk#7WhS~0O4IMiRqu4GtE!NP?L3EvOmsY@d!V?&b`;|lyIZ`FsTS$JjTovvSh zeTsFVL*0K7Pc*i+tROyqSNGu;(8dt}lM}T*Ktg898;PGRplZrt5SA|n`^k>Yt zz1lXQ;m$+K4!eE%<%#D>enUr&T##BnVt?z_t(8_75qq%qL&CS9y}ibjQct>oqj{sX zS!p~f%tI6G#Xg5VORayHi3;;ro2&)cL_u!MN>|n4G3zg1exi4ReVqg>{PRd`i7SD` zZ-OWa+#2Tz-sF*|F&ep>+OqLoF7@^RoLMn={DMJ9&g!ZI#?y&d&jPjQ@Qe=v7UJXM zS(89;ofYWxIy$bK0kxo=A2DGIIa)1mj-c#*0M+tP{Fn?MPp!{cVcEW8$8?A-38DNa zY?|c_BGig{a&XYXY2JAwt`MUHqmGEEOW#y>+f`V1+w8EJa(UoV2gF&cBXlK}i0*S^)3)oA>!XmOCIjvG)sgy$cL zc4KIbM6OuarI=|k&4-5?s&(kpDUykF-z61gE*hY!nuVf|wcwhxX1eS0Pn|kd&R~(s zFxD^=Tg@1gBub~X7}#)(V~(jaKsdsLebLRo%@pIHx((>VbV zKASxP!^H`)#p8#e^dD2!Sw+i6PqXGAd(vpej9-|;nZo;DzC2QX)qlMG-=`{m-@7+L z|BSAvbTmiCy33k(=+GgYm?6*g&M&?qKZ6}#3Re}vQGQ-kwI0xW7)^9h+^Yba`ov99 zsI5b@5bHl--=;^6rAU-H@;0G{;eoX}`sRbl&?=YMoO zu4}@+RcydWKuhy#6V9_G@2;^XxwraU6LmF%=}0+@t*)*PhdaFX0F1PXSgQLs->jYI zrx79VP&A$q+*-l(OS_8LtMgVx;VubO+KE-=e?EjQ^2)LwkMd{3>qQqZ;NF3scIYlK ztG&H_rHxACpG7$g4LK{PiwRVfquKxVMYCkQyo4jfjm3TSycrr{#I??s-H({Z*llmu z*1UW?8@Kxq<=nGalQX`bADQH2x8%ZNo!75l6ODKp_G(E~f++;{mIAgGpsc<)aL!l6 zKl8n>+U9ELl$dvZjih8SzQy?R@}B^{5vw0`&qrzM+Z)>yy%QE&c8 zFd_-?{FVu1Wh`qZ+M{_*s_PI{H;#9UHQV-bK(|2)ZiY-EbdqqFI@loKViW4f8Hj?_ z&;@+Wb4C0SFRw2_F}jce_9~B>OKNpX%4Rb}9vQTg=mjihS+1jnm6xrFyEv3kv=AHo z{{8#6w3be18md0yz2_erf0BWPrUbFR8$SK1tl*5z=AkEUtl$f1+YS^=Ej@dpXC*@bKoWK&~IIizqj|wzMcu~yA81*Z2WOiSz;opgf_?} zo!4x5aaxuW*}5YV!_~4ln5sBH@tRckR;>zKzR%rX;jyQ`^3Q(!sj%o2)+f8u-9aDz zeAB1ClhMzu0-}cn&8*;;qqy~@78ZfZJ7)|AxV7Tg8QtMKkxsk9zh&X|Fx}3`8Q#b^ z$Hn?btbKHgvt`4UsO9>x_A)?gcjOvQi(9k<@?(efzEq@Eo* zc8tJ#-tEL5B@uEBqUkTwcpQ-2%8s2o$+|o%XZuC5dCnE>YBi^$ZV(Z4$37F&01{SW zKFQb)R64w~kPi7{;);FZdNL;_&Zi?M3es+D_v&LM!FF{d;sAt7Pbo^MtNti`Ek z)m9a_KdMOZR9XLN`>vM>Mvr9gNX{Zq3L!;Q3zucKIi8@-1SQA{W{iYUA(cVL)7oAd zHY3BUpKb)u;~&TJ{EtEcB*nddGdFa^6~(f+-8J1naLaL^;V9M^VQ`1_>vt10wOsrC ztDk*MKU-s2^0O%zzJmQH(V4NIhV6}gehpQDfvV0#*TYASd?)}Mn;g-9=B_M)x{AKl zDYENn|8aEJ3;7Ou;|B3yOayVqNhaYKFo}rnWlvk=_BEVqZRXCI!$tlSje?b&3+wE# zGcb>-m}0-5hJL2#A%^43T}O*a5`;no8scPb9{R9P`eR1CTt^mmxpe#X3=>$kSkhny z=jQj3F_L-DNXR&rUxqaZHjRlCzZD5flZ%03O3qD~D1=N_nHACpIZ+`W0Lx_b5N z*Junm51aRTUQ-jhQ&k;B}P zU(+udJ@(IEW7)k+msLX8%e{c*c=kh$!U8eQ##qoBIbk5$F@4Mw?js`HL=ZLO>bga8 z{+M^gu}DtCwPTtv-X%6T%EMk>%RzK9cP&Yvk>!&<7ujwm0C*edq*^?dpQoqiJm!Tb zU0dWCaPXi79T=Cf>>La9U7Az!pL?3C`TA<2Bip@i-^z@TG-o&7QJi{|n(DxTChU7g zStXN=uzi*SNucIn>2D`-q z^-dVY&rV|6?#w7zP^h^anp0K~1CI&s;Ogs6dz2hg<~sPtH1Pq-J&9+`zkhgmSzqml z&;t!d9K76sRv*m>5Ysfelf|UMH(nKl8JGHsB>`^c?_2P%jt zE*BOSj!8i1%%j_2{oA;w*u$Rex?<~&KPVLAe$$yTbuqDk6*OA}bgjwSwdXaN$k@YG zb(>Yk3ESSDhy2R{t-?oTk&}vz)Cu_xEzVJFff12>#PCt09zEGsHk_>_5v)@@XH7OH zTUr)i?7Gz30*4si6PKIa#!IW{uf3aETsnbmVW^GB+EyT&8IDiBmMa_y^m+wjEXRg- zX!?ol%Thczoor^+6YcP6hty1Snp&4J0F95gL7K9M?d;OmjN4fbcf~S{warNjUVp%N z4Pmu4qEy;K1Yz5rspVeVbPnPp3DZ;6ZT#uer-ws-a91#tM!b(LlRF}j=ljBRlH#4Z z(xrU%&z}kl<<7YK;TwdXUh3~YANPwyGsHVXIJQLP7M9dScwgQ4Ot*YcD3pzTExO2SOoh~jeZwp$Q4Y|}M zJkJ3-l6*Qfb2kK-7H<}ld#106cSfjrD3U64HOz@GiLMP-G5LdZw((?c>?4MmDM=`JN~he<3*YpHswV+6S{*~TRESIKIdLxa zn?XiP&M%nC=Pbc@93t2dREa~|==rp)EEvEw7%GCz@Ca3p&cs?&qjWTm-f&V7z?|@Q5l<9qEjH)KwopCDae&BUAy6TtJN6#Gfbca0 z?fGEZ4W;%kBqg$7A+y?Ca9E?;D?6;9BDk{!U@FDIk&B92E z-$~JGOw3nv(3sDHhv1tk-ylf|cd6l2xlBELx0aS945I$Nv)jbR24ClX&;>Ih5fFtY zMG+NZj)CJ5uU>5+s8z?12QLXe?8=82ZVpidMyyHc>x`s&2<01M!#T_b1ZVOy!;Qjj zTjw$kVkrX`8hiR`&yCHDvdLaa*5?Xzv;dBBh9i#Vnqyv`^|L{R-mDS0nX2ofW=mT% z*dg@jw;0WCBh2#HHBCOY$yg-;R+?4tGQ-D=NfSWYLS%L$vf#$naQgm~u$+L(;M9!I zdtop${<@ z{FDih4OD0;#HbiajA7m2Pl9TmldruB7h+5l=j%%ZbAMOY!7G`M!K+C{?>s}#zGE!K zY6zdo#GB&I0isEnnVA5)lCg6qV0I-=hrCy=JL^p%A(7bzr{gc~Q2Z+?`eC1ZNR57_ zOq9y@X!z43r7Vqof05b(0T;a{Z@HXwWYnKR?sCYZNV9}`jpykK;2~(vA|zcwh<%wr zcbn~jhC#*V?oH_u2yuM|RJh0vgZsHDsDPi3jsbX-4afWKD56=10Dl|_uQv?L~sgy%3KDd21R z;)}}Ms1&=H3Jt%B4i^mDjdJh<&E`PAtG~S8>NkiS7FA}FTT1>x@IhR2gLN;s&w7D>yq`l4n?5QzMyk5KLjB#f0N>F24y`xXlQ@i|!sTrJG|S?<`wx>a z<2%P=Y}mMRA|)Mw6)_@a8Lp~&Tln(%CRQ6JlaI%zJR}jlQV^qh_%FYFB1SH$pr8P( zW_R!2Jx3IFm%`4GGJ6p0_Kr(}nz6a*(WWJcP@HCM;nLTFh6^1f@#b@lIGu z(;IgA6Jzirw$RRzcm&z$6u{h)HIT;&BYi#BjRVsC%P-kx5`uy;a?wX#Z)wS0TqR4A z%aB7GNwds5HvSPakk!R`Vdk!f?_3nWOz3)&&ktS4O!lxY0lZblzH^f8{J?)wl(+;c zJn~$@6{{K7=OgjX%9SgH5|%g)(@h9dQ5jE=C`lbS$Aq$I7`lwpvpYW^-|y=mx(;6+ zNj@tZ8s3&UBJMW8*IGD23BQCH$R9WMour*3f$a|}^;+Fb|BnSMZ7#Wg@h-8geuoSU zzAe4*G8-ZEna>;8uN>6gYIk2N6DWxEOel@X@ zR8a`givdcoPKT3d?q5kx5Flk`Y5Du_zaKtwWUZwoXeS)3oDH-rXT($_a|z&qex^P& zi*F?++7#aWiESzNEI34QX<4aeHe ztPANVyFQGhNWgtXH8D9vQjtcu>|DKi$~5?{GYPM}l#UADz0jGOlCZ+KDn@t4!fq2r zv-_Jq9&=xR6(-4*qVY$ufaC^35R@4_apJ@zM}|&4#4h9)EYjD{3n)qQI|m7OFC|6K zySQjmvvzRNcuAWvv9D!t|Nhf3nvW^RlR|-KNq(961(}(KlAL3IgqBH8YQ^^akP|}QgTJ)!%pF6uRl-?!T?Aulc3<7_J#KY zGxiZY1aZf7hwZ?Eo#ULpU;{cWObUdW^l~CgY+w{&!B?FIe3FI73ri>>^9qPjc6?xYdkrmpnO3{UJgsxNv zSkJ`SFM<3aeR3`0UO|Dm6cO}wc{P_GJq12^P17R5HLf#G??;W~$}C?jw3W4IQX z1!C)moCj6^P!&#ucOYkE9Yt+J7M$?N+?x0!7it{4EyC{l%O4Odnns#x5tJuGVg|mv z)ubYZ<`@8nBA{zucj*%0+UV%(n;XBB1ObWC;-aGOC4(xyfx_XxkH@pEBm`smimR$d z=nzyKJMPx7F8%z%=@^z){lbN5kX>=t@#Y~Vi@D5I3FG_9pYT{t#gqC?zf|+t<6e)> z^gVP8FDP{a;vR8K@Hgp+H)e+tujUHO!iQYQT-zYfaQX7(M`x?HJOXnm*>Rv?7D>f* zj3o#oo0kP3sxa~YE37eDO0aCzd)kQS2q zk;z*&9JQ5+ctYhv5$1pyDjl<=^ab{L-s6Qg7B^ln47HO`fTQT=Tc^^=J?)&H3gEbI zo5g#o-c||bq*NrB`bUtOBq3oQ--P8V3PZs!ncM0rZCN$RCrSazJc?dS1Ga_E_Z@wQ zlvcG*pIh1!p@Kfv(h%zqoi8v^S+XsT|e9sL|USPmy<^UR7e3KoU$%C8Ew zx(jb5nH=JVyw$?_N+JoHBnVLs5_hgaN#`UWP`8LS3N>Zc@?$dUgCUpFON*}cq%T+9 zT=T=6;f~}nBzJ-G_HNKexHrzN3tjAc%vQv%jWM;ef?1_&RbRXt{{_}1MQJio~r(}W?xUZjx1c6cw4HxM&-u2Iv=fowX0yqil;qF z<%F2`Bn=iul3dxyzzWWA!$?<|BiA+Pt4%ac+M z>lUzej=&|$lK3whIBC|BFc+f4%>S}>^LuD46OO<|;E(|}T7VS3;LN&&JAug~^Poj2 zCe0*0P^cz{2aaVD7`xBpEe|1$?Ldt&z~wDFo-8IW5e3PgC(DJei%uNcwqH7cFfX8G zi>I7R_*v_DN5@=|piS_(JY!-DiQRm+*7d+ouiVkTTNuksYPD$?MR4xf?R4hMxr|vS zOOtV7$xw++rvBX4)WDeq%ze%*_am$lZ+b5=kz8y!}7I%}Nhf~aw=!29>1}nci z`7q6CA)vxnr<%S|d?5MebpA*MLkQnYpQ_|mlz%F@l~$Qwx6umq0CB*6KcSR!m1G}L zPv9Z}o z$NGJF^H6KZ_E+hW_Eg1~xYZCLqia>mPnj>1$YkBM6v5}j4E#nb zzM4v8zmy299Hey>Ct*Ymjylfw&w5xY)B;#D$m?82Vy<8>MNl#=^$s>*2T8p+#Yj*_ ziO4X{B`&yRNgD|vFpi6kt`f=`PiA4+!>n8N1KKfcodx-GbcI47DyV$(AmlsDAAyZB zv!EW1DK=uqTqNqq;1mhAB&JNEC}`881@8>Q+qY8I!Y}(=$@G3<5i?h%i{esO$sdVt z_DGx-7xQ*ht7WEIx5U@^O8G3iaEN)FOHXaL8>JZ`!B7-)y(J?rwenanr0`=tR)yUM z!a*At02RgE>FDd~?h<;JXx|#v`+`Z-CDkA{Z4R^~faH6f(ds(hQqGS#dpvJkzN*ed zpfjn;K>U2^h}?>@G9=EoOcvUap#IW`xOdZpQx)VFX&9tGQ;-hK%lX!ywqJYux39Lb zQ62m!J7^h1Y9{FXy*dmlB#?(3ujY)EXAQB&baQ6?Vkck{Us7|ih-#Lg0r?t zzi@~urGv+g4b!>KMwE(m@Y{o;*Dv6$ckS7ehSQV&F=;T6{ApZMt%+0_a9yXkKDC!F zvG%9ZF`YBxC3~umU`n9_^9m=ukm9ZmqD9%|-v@yp1?->}J^W;;7T7@c0#{oJ` zf@-wQi(Ys}_mDmY|5-^%N#FIXzExiI6%d^2y{0+>*$-6^i{V{Z`?;SNYgn)`T9ihsyf*E#netnk>G@V zr$vphfeM1L#Oxe0jij3TRkoih8$+UdE1@5OIwhXL0EEJgN_aSHf7M;5DUq8(W*@OM zB;1dH4a*^gw_e5lz5UUzUf}tJ^AWmp*u;rBQf~n5Zg6tir&0(y$$6kdt$FF3`n0ba zm=5ngT<`c~4GMWepHZ3=2}6rRX-rHk2gGX-$(6 zHI|fTMyqc|!qJWpJ`%haGd7SwO88KxwkuYyJmu@FjLN$i_lih zTkR~x%t`?iPdtZ+rW|m6MZ#9Ghdd+FRduD1Ss}3Wb_J3mHJ}jGx$viU&0!&pbyZPP z*vj@O=j>=uoilXtc`qtL|DuSlDx!i1EN&JlL(lA0o|=}DNtP=k722qKbTJ)nzQ0R_ zj0BdF+g_-xu%L^K>W42NcwSD8JH1npo!?Qr@G*o6Fw9oUODH#D4g0cZc6)j{W%~Jd zUM{gVTJT#cw7CQ!amcs(X5Ufy-NsVFb0u6%nntQua2`2=r)rXE6<^Fk+_`gStHb0) zxv7{^Axkr@2b;OouNGQ_pNuoOm1skf$Zo$Hu0RsDW?P?i3r%>?=5BPFb3DgVGN=3Z z?-$VK)WqQ(&h$TJb$11M@Og4In5BfoWcPbC*)%qCt%>E+vIKTuwEgfL6p>P!opEK= zZ1w8qze^>bfKhSQ?Ea+O<<}7qxwzt@jDwWRN9~;a_Th9h2P-KJi_)1MiY(`RD7oRt<=1CvQwzzf5tTtwbn8!bnS{iZ! zg7L|KQJQuU!cGGN80zfAcu_f$QS)ynB;@vS%3FaQ2`8DF%a({uUSlcsHNGGc!}Cpy zh?LB(5Gi0E>j0@8ky=4{WHFW3A8=cmjhuG%ZP|*#QyByPG)zd9JJw|WrZ~w9 zQAuM-X~;$&DV!oPGS10T;Ss!sXi`~Hj$@IOlcb29@}*3kcZ;N~SUqNe2^BU3A}d|IZjYr**W8&$jkI`DRP8lpsoQ(1%Pz_JPur#%7SO zBhzKG=v4E5?xqF%=YDsPfDpIc=pGmj{cN!k*_YS4bb;t#dnQ3O3M_7*q1hnLQiD{OJ-kX znPhIOcj{8r6m2%L@Nr@%6|M5-$&^(~!UNO+*PlJv+^zl$TU0VqzNUkALjOnz-r=*) zE;ILMOP_HOMGmBsmXdYN%hV36jM?yXnxu1tbjKMI5p`(aUdraAa7&Zf%8{!QI)!Ra zkz2-MUVkyPw_5G5UwH(Ns4-JPdIAjjK)^kitCqa7Uv|CoCaUNlhAl{knl&Eh`OEK8 z^H>QTe)w9b^($73+An3*-p`%2B(*5TJxPhfzWeSE3G_KpLsFu~ z`X|cBilXC0+a4Z?PC@6s48+oU$wnfUDUO&dZNcUbMN4g1> z47xk?tjERa|Gb(N*JeZP>#w?Y@BV{8u?rwd7~-o=Rc1Qg9B)vWHPBp2pPlj_z*@K`^a*f}EdFK*mEez5>Ls6ls4lpPckb%UoBFa=TIsn3x zyWOXveMz$40(v!UvQ7}2DDklBI>=b*l_Yn55_q$aoo&do#G{-fo}sVN$U9HFs_FfE z0o5Ee6EOV}F#T1sM4HIWnw6+vw*x!49!aYfeFTnx(0QuTatgT=I|d30!g{In^r3Gp zZE8^(w1Qkq=tE9eI`pV62-W&5;*%+k)BbtN|JyTje;?&l@s(iNpmG02Xl30m3d#D z-+5@(p45Xwl$|H~ajsZI_+PhX*BCEX`u^5YQ!1quwDm5bLbr)iB0ZPj3_lL9l zPfM`7oL$YQp9i>q{nb||p*#oicpxn+-%w;BPn9a^10cH}bSlgJ;Gsy|$AouSG&@=E z$jGUf@W;fQ7e7|KNOvg#dfvCwFonV-69ML(GNT8N#?Xw8eu}Voj@!^7Lv%&5Srt0V zh&<7*eftipQiiZhT4rc8%lDlY6w!a!upeP302c>UwNY7H0u%FpKA( zNuUxz4n?O&=?)T6RI5!FlyZ_!eL?k*ll5Fa>g}B#=4;73dIypn6cTish>P6VVlSyY zO?S3mZ0qH?jR?F*(s50bub+N7C3zNW4R@{hu-3Bj%2)rj*1daQB)Kt^wTAoIpmGNv zVW*T8pwL=kK60cGUU#9JJMO?IdvA_=97XO=*xn1|z$p(J%YZ9Var227(y`YO+iI)` z!YfbNzIy$7A-#T=o+Y&PnjfUTR_ay(o+fynLz~@xxQs!TqXb@1#1v?d;-yH#Y;K-=7PC$b$#nd`JUiz+u@MviNsq0Z{vm?~%>flI!sdq;Z)!a!KHjDEHD#BU;kB8#DHw>25>iN%VZS<;A6mMp=`on9Sk?SIfBAKqnBefhw zYXaY%6E~liwZQb2VSTodvrIoZy`5z5HD|H!g-;Meg_l(SzVfHCEC7UfbxCegw-80I zMR&lpE=Ec^HjukWUJC9B?7S|H^>V?(Q> z)VC=t+}&%sZiju~f&D1xjtAiYofGr)GT@oSnWOo)t1KG`C8RD8OGO~F$vDpC8dW6c z#hVLv>Z}FcktYQnI2_o)d~=(ddug~(QeN--h(ZN_d|a#INs0;nN*$29b|e}CNj{VX zojWegj@^3o3WiZSPvx6;AP48=X-}eEnR8XV@LE;38?OD-oo$5)}M!oCe!7uDwa`t^q;-n(zQ#M z(}ckJ_6dEqA$64Rab#EXXNqG|EI;18KZaT&xv#QD`X%c%4v`i;@-^L4UZu@XP6nupCh5RnRlff{es8@~qVOT^mEU zD@^Go@pDwiKX?54Ottv}fgk2`T1CgX)}$L+VV2NU2qHVMHAG@i$$LnpJs$puTht7-g*ZGf*_xT^tt`~x#buq3J`;TdBBhs(<@(c4 ztHy0;Z8s7NwSY1mv8rU;wrTDtl!pfF5q$?N*ihOx33?vEuNSqQr6c-DnN{_g%k07srY zUzzQA=4GuyH}fi5mqMYgRDDw3<8zL=5r}nm0y*%vM_HTtPB&{j2L4YC8ZG2w{*SNs zn*UFJh9W3xSKoi7^{GV2fme0+I8IhAdsG0Q}_ieYL~*lE8Jo#5J$aJFU9Vur@&~VJC0a-IMf(mN9urES&QLSVwzI4O&0%C! z`JaA>o6!PPI7_)4&D-%y67`T(8Zu$Rgro)#s7TdZ08yU};3(%AU*8-AL7j`({j)95 z9dAjb4WVIlL>Uh&B$;v8Sh3FLBEZ7D&SUf&? zrg^JTwmDMqk2#O0DFLyP3tMpV)&PoC_{#3{ z-&a$id?f1yp#qXJsHZ$hp}JE&nclPb<`vNzTUDlH!4(0O?dX!TrugE_XARtV~Z!W`gZ}R4?9SbGNg^DFi$X*f>5V?^TDu_ zPuv=9r43oa3c~LJI%MqANf^GR{`x%C_3qbNJ|f_?Y-Iwgrvq}g5}=hy7I|Oq38#?X znc+=Hn@0Pcbc9R3gqk{_`CGcZ+@BT;dz?1Y{AwV;OO+1r@2q1f7sSy3(ID{ly)#N3YgMG8ZUw z4m7fsFu*782}MVM;Z5c`qmWi2oS7qiK)n;{A4p&6ARQ44qR(OLy=u9LDf8h^`1^yR z;m2ip@Q`XRkpByC0rkg^^H%$urgK~RA*d=Or%^*j+5|o@zI+q1FN;%m82#*{PfLZg za0$L5{VBmciS*_QEdZSX5sL!_DpF_wB>lOg5_1>Q;)1=LKf@j?>G z5`=sxjBifM-VQHy$hWB?^7~nP9&*mpsMM}I)RjtswJ(i~bQ%u?RL$$P~&)pM}-d6R)Jh8In2^;VU_y5^}-b z5KfeZrSJ>Z(n2ZR0+dygPp+gG{>&I z{h7dOxpZCK`>O-A9U==+98$a9%)=7hE_T9Ohyan&Bp><^%o0T0U`jE^wTsnV;bz$5 z5wzfhiV>qh>LJCDBVznzb-euXB64l`lU545#Y&Sh)`)ZtUlT-ka6>HmL<)uBADGj(UE%@gs2@?b4AGiQ z4s$R;HzA0gKuiiTKU~C=`SMu)bxMzqx3o0MS8967m3{nrKlb=PBlQ0HmQrNzpBEqg m{{IZP{I|>a|JQ{zx(B^H^l=2ChBhWFiiUQ#EUfJ;C`S7oOl)i^ z)>bF@1dj8aI=26&o!u?l)BOCF|NS@ktZhvB|JJdu!KeIuOIFL4g=NQY6-lP<=dj7vosV_|_uP?%W#D%wV^VJ?^v!hGk85VX7_ffJ^nv z>G7PB{@|^zr>ukEz z58M9x|KUIG8hNkb{$IbrQnG!W+W+>A$GjWf{BPf2)nUDY=kPyYN2&h@FJ%6WJ+nDO z^H_6>=jJ^MJ#^np&136+Hm&c@abu9-gC0??hk9TYzJ!Y zZP_99VzhYS;lqb^@!8^AEWO`Yrk$V|jE#+PtHwRqEn<}%WZ&7QFpSd6OG-G4=i8~4%&$A&nk+`I@h&;2p`qdI>Y8+Cell-y zjMfw_v$Z->T&E>fuj+lgpM(sPTRrhbs(!)nEtlfsc4cgit<|czHYPNRrMHUZMu*q0 zUkly-a?yUYT^&n7{mPYR9UUFLeSNW)0=N_07H1cF{goPg`lg5Ip<<4yk#4hEo%Sty z+P}=l21TDSw4ubDa^VsVLiLkS$B8$ z5O(jOM~@#j4Aw_C#mN00z@;i}Wo5Nt!v>W3>0F1IQU z4AKtqFRv_)FN}MvCao?FNRq2uT3+t^^5vj`fkEf(GPdF7l(6mm`r%`9=cMO?sAFOzBBnU-zda`ocH`^{+v zQDga%t4oD7%BQCD~`r^;H7^Py*%~iKNR+i_w z7e+E`8w*?=n^JT%IOPMgO7-I}v&}GDl!{hYT=S;3?vv4+=at9G{1N#H6s`Si zq*_lZIyt>NP_dw08N2Fy84ofwgmk z5;e_isr|eZQXTX>CWa&0y1Q^!&G_i3dzZV#u(h#K!HZtAb}dsZVAZDBiY zY}R)4)G5u?mBqA~jzLC_b!u7Q9_-cFsvv&(JZC*-eYA`xJ?hl$^I<0~ZBAK@b>y}^ z{ybGHsbOUFy7Q}@Nh#R+wj2fv%_D)Z4EEGCz++yS9mpZ>Do6%&uTGx@0aGcKaK zX(pzR$>ibZ=cj#MU7At~^cCH=Z(r`acdeVFPTMCQRt$YHw3ILq6CE^aABRPSmJv>A zeQQ{(FrZ;!ksRr<;!GDx%b$FVPmff%o~!OUJ0eifUFb%89UW~y)*)IQE_$M+MWeAg zSl~kKnW-mejf%zM;^L#T_U_nZzAFpNv^82ehZ0iib8~asd~~pIOP}tG8WL;}i^BWp z`~2BZWZAY>JU&|HX{cu|jm8-D|MtykX0X2abN0!mzN#SMYk%EUxc*VedpoZRdV}I^ z$=TNHp(56i#pW7T-}=L>r_pSJJ++Q)pP!#^Q(PR1k1`u-h|SGg`Sp*XMeYjvn_CVS?@#Dw% z60z-9pjo}&eoXUO{MI&=jk8$Tc3(dH+?1$ZB_zS0$EheQd%R%lh7C5FM{kPh-Msmx z*em6MV!PJ|^aZtCn?Awt6=qt|<(xM|SXXsj1+Lbjp6K~Qi{nKio8@I?-!cowuI5ix z^5CAr`SkPOc<Wb#=p2b#q^* z6)seci5=tV6ku$xk8=~{{@6|FQpJ8&U+ytA`Y*sSqrgs82w zMyjszojdu!7s=>B{Vs+^(tiH__|rFcbfOMbhl#X&3^_4VE4eyY>a!>B*5`Yv21Ti( zj2woWj7e8G71W6YY9DZ7hYX3S_N9gf{@ zF3U45*-jHimJBLy6;lFlKw|0Z1^U}vCvF^-kkH3^7;Z_6vUkEu&s%4WduYbGNp`-3 z3y}EsX)W!Z5yL)~v}6S)cFE8oiOP^S0bf-$#Wb}JtX6M6K{|JV-03=isF>CloaL5 zherfBjO9;VUblXIRdMl!$jC_2tk_)!4`2tY%E`&a&1>ga@p_lMMu7|U>~1qJFNq#|rJpkI^gB|*H>DSBl z)|>#mRn^6wZU%6i{^HG_h~@;up$hOLsB7(_BI(^okb9PS3!nJX*J1+8E(IP-!0PUv z)>=l@7;a6ECg5juGzBl&yer>%OHh&X)QtkXGcyJ?1&iT;hlfXQUS6Vcg)I{=(ucm}XI`{7)sBX)31wDgdo`VO^i&%Bk z-T(P_wIX*nXJ_Y%+FB(5=$De14h=NA?#@uC8_)m0c&%NcwCV456$ zl)PzsH$eQ;9xM5s8K}d+hei>pDzTR?$;nmp{$3jz$g7ixl^JI!ARypC@9PbXdhsF# zyG3`jJzMqGt+ySWoqaTa9T8DaRI5lmTdce^FNB~*D2zzvxA@h{?QiB6n2WYLV_n1CQlc4SnYl7YiB|D!{VK zOp}HpJreDG2cK^F?T-}nEw}l;pt$DS1VfOwR29fe*D!E1VH+D60aLuu=W;;CrRAPZ z1&;+vxgKe=WbvbsJ3BeOIsae-*(&|jVFO=XwJM8T^FDpL#%oZh8!hdlvN$`co@Y0t z(_Z-K_amX&tzm*DkENfor{J$Jj}uOTS~lO+-gR%?M!{nnD$Tb_F1-$E za~;>Yx4bZ3*koIb8a86{YGs)aWUYqkac9Q%J#5)VoN}Pr+0G$s4%g7%CHVmWA zqoSz+Ui9_$ayU9VDu$n`&-37wp%hE` zI|-hE-qH2rx+A<33WM$ ziV}35b+i7x;T?T_{nB88>k7qaXo^#5n&}7Wqc4nwC=DMTdV9+|?xDM)QqqraD%K>d z#A&L(+6VV_I=sv%edosQ$KEoU@8AEi=h2bK@>^tgmZAfV_Ol6_y`bcoMmm?iQA_Zn zXf7|zWM*%*kn}$H=TE!%`1l;sfyH(L*$4tCTtjyheX@!+oT20H5~bQ6;ZqSxy3FiDR~`*z+s|2GA*H{ zU2Ru{HFe+;I*X5Kl-tS9orf=n-^^JF-|Mw2MAD;>vWu+wP9ZVJF(E~IhKW4$onx2t z5&1o^b5A?L`R%vN%njiRtt*G&F1;a$g)3Vcs$| zHI0UPq1ClrX0KaOtA1^n&mPm=tADNCYP`FMmtiX=T$Aix)aD)^Gk3{HTqG?e;*uS=2s)(y$9#?8N4c8#V~~`T6awv39*91KLF7qB;CY z=4G8UeR1YjU?bzaeU{_TEsAKW%K-D9nVFfCb}rR81n7- zbErNFhMmXwJm~siHq^|uTCo;S^hp`J>qo2Aj>p&t6=SxAz&(`RGILk3b3`DO7~g;J zU>E8~emVezLW{8Tq%qwFrOfaJk4BBqNOp@pE%sX`H_w3i@IYjn<24l(zlo)(T3}(& zfSn-z+1{K*V?!2gV`WdqOtAEPfIPJ0XQrl3oV08=KC?Vcmj;sZ!TO3uRU(_^$<#n? zcV+53PAWs=)%kVIZj{!n%L9Y$5#1^(Do-NB9CxE`oGhu&G*yCZGdHky>zQk6DcVhM zbaON6p^98De|8|wJURu!QCL4X&ug+2@d%D-zWqxm6+L!KT5U_3fe65Z$?{5Do_$hh zdQ_LNaQt33uNClF&!nUz3IUuxA3l6I^XTv9J=Xb#+K#{}PlAJY79Zc#(_7(Bu3E82 z#IpUx=Si!+;H-k8k-ArZb_!8je1L)_s~9Kkht+6Id6x!vQ>|lxXT*zu#;i=Uh-mt3 zE%x$;|NQ3}@B-x<*a)-?5n8IvBpoCv3vh&f{K%0bsKc5%lI7gcv`|!uidbD;Eo6lS zuv@_B{)`)z>9(Cak3#RF+o7uxE;0FKufEo#)b}OBsw*u;ZCghDL$QOalNM z+P2}G{37^|rjEm{4<#j1U@li7H}v*Sd*sYR697|xM|Omqh|c=|Y}%v-%1?2}ozPe0 zmS>U#A1b9ph84|bF7zHeogVcnj7-76-nTQ1?W-QY`9 ziax743I`Mga9D?w<(QTLMF-=VMc(Yb^sF*}yQyGVF}>mWz_7Au{GJqEr58t)-;e2Y zouG^}p)1?9=sB94DzMzhs+VV{jMuEtxeMEiuXB5+1vH>w?Md0rC%UIYmcTC&T!W z9#8JBbW4F#Noh~CBbFfsu(GnoWSCS{c6RFFDo=K*uKV%Fo4w`FLrSS&Z>02|c3*l& zweC*E8cqO|AUmOWsO;%J>&hsy4xy}6m6eq>ZiZCRG>sl8`cVqqGzlG1F3;HtrBH}| zYRWGe-EqNTJhFk*xiOPnbf3C+y;OtzAT*bO; z#J6=B4vfGJN)e|`Ts#1eXjdKPMj1hUeGL5pOPzy?JeE0eo`$W<{Qm)r(Yin!gTHt~ zg(!aRtILi+tcrM~s&;l+*o>^(w#5R2Xvcd)W5OqI;3&;BtsVX@dAQEDnc7c$k5iNzE{!I71B>}XXHkh420Han`>YzUG_h$D9FoW z?=;Nx+UNd^19ZoLNTw-tkTeQ}6;g3f6SHo8K2QI1!LZn~&EC`0#r(pX`MT7qV^oF42pF)Y=OA84g24cff-_ig$;lK>z#C z^+eD~gghCyo?_eoQNZ3`69f{A)B3Tuw<@k|=nq{84|+Q;=ca%yv_rikat^3T)AX&C z;ijax*Nyq7wx~jUO9Gm2p4-8%FJ#X&+v%C`teL@JXfNFwtUEyevjC(-{r2tK&CIa` z)p#M(n!Vt2O=IW2rDTlyaYd&N3i?y!MbQ8^v!7`_&3EjW0$Nv%%8u)Q{PBm&NP1cF zI4W-9#vOdY8)|;qve&6 zs00T`1Q=^rlBpf7n zmp?*3m6DR;;^wX#TfZG#x(SyzIx8;?eRJmT_4|T7A;0i5CaNRETI_t%``+*rsW~^< z8zfGT$!CUY#4MoBd_>djX!WIQJjmN|%-Sff*=+iGT@h+El}x zyJ2GD>P$t(65=Bp$mYzQ1nPS6?AbN9)$&76Z$eQZ@<`{>`mO9)d^r#bT%ea{Y`bp7 z{m^Q$fmnt3eX&Hp^F9Cpzz$`DnZ6H&6!w@Ds#-;peK#c9IJiG6w8EuHc|5B@D4n}c zS_BJs%l5rt*vQm`i(cpbsu)mjUx;28H%IXv1B!ZP|F5QXB+}_Ts;KJf%5u}Y+t<}o zwAK461L%-jD09CY%WjHVqTZRep-T>f3{F=Zi6R0@YHF%We~9Tce7qa)o&a`c->g4R z&=NXr4E}jzRPuPkmMzT}wu&1s$he|Qrc$|uTxTtaQ|4#2lG#b^`vc8KdvyhkL7}J1 zbzHm?!WP6a8wZExjr{r77SzY~c`O-PF8}?jl9H0!;%H8@M=+RrsC#wD@6b?Vz*D&a zi<*+PR24&o1bnt`-P-c<+@HO}!-z0Y8>Q22v8AQS)RxZ{gNRJ%-Yj z*5UNkg=exSH2vD&Ys`O*wwpTOj%D2oZVcrK0!1+UM`;-uCNR=vM`xIeL=>+K;^)T& zgD1*En-f;_pWRW9ot{n6g$+F2W*Vin(Jwqa+z#Y^5ORew{GSwh8&$%ex-AipqJH{p z96G`-_n$;>FHBQ-_48X(?Zvnj7i(KuT7+th(lz&eIR(QQTE*_uc5n24eKIO+N!oww z+gCARDe6!5h+KvZ**t;$R!d(Wg`%1vv3JA9jdY?e?~a{?s>9Q+$ z^l7(+teIuN5G!8KjLscc<8fXU+ql*0Ahx8;PYonNv(xg8-_IuQG9CC>nZDf{^x!P? z+??&V8{a5jux!t&nAv&aw;em;foHf5A3ki*;+i!1C3s~xDJ_{-1~zS{O4R|1WZFmV zB&}SV*lc0Wp&JLZhI1_#84a(cyjfvr#qMNx`t-vCPtQwvS|3h8t_F50KP|!kc)~(w zEEj9jH>eF(CUdSH`r-`C912*6pOtAHztxS}r z(<5WF@n*1vk+^KlV`gm`u^{cq%ZqbNRNlvr9@RtMziIYJA+PQIO*463UqX)zt9XfK zObB>v>&6CBIJ&36Vt0!L$E< zcPd3Uw?S=_8#Ec()!E5Pz%nP=vffhOWiKw zwlKYShx-8a?%lh`wkgF@e+r2RioTkQ;t&l_HiqHXg-+d=?>wc;f65$ccIPTLG3MvL zeJcm%O6^k|x}kji22}IusMV!1yQMSW1H;f*^QKz6cCgO69z!(c{bLeMxvTRFpabM$ z12lvN^m;rnK0I#)ycSi|%g(hC$t!usbJ$MZ`fFO^%$bfcjcL^J>RY?QM69pEZXqy~ zS1(TuMX?cxD5WSe$F1C7he4i%dHPvyM&PD~mGdpsfFS&SM)pFwCF3MrXqUbE1k_FJ z;G?w9rh|X1XFpz$)5*tJ-%9NaXqD*Li;@VK8UrEi3gCDHq|cP{a#{3}w&h|Wk*6ge zsXc{@V<)nyer*mNHIvLxLs#}EyH1pjosreCIB0T*y#tjN1t6TFDLwGYQu(#zD-ctO zhe1}`KR_5z^vhk8fM0PeoA3GatWGCz%`o43^jWj__kVVoDA}Pn)oZ-EJRrF$kgY&Z zzb-=p7C^bf?b%6a67gV9ttAv1zv{;G^g<*9%-O7WN=J&v*xT4-@bu;fFmGRedO-)O zU`~RtCxDiAOO*bcTSK+iBIRl3?Vi$#vND-huU@%)`|Krb^x;1#+S!$ANB#AvdV(gg zI*rpYqbfC2om65)!9|Q~elFOM?`+$YW?;}zz1|UOaiqO2T0myg`Kwp|wNJ5Sg~hK3 zZ@K_WO&I{nw$HYfC)Z7(8(|GI*dWv%xyAiW)Z=x5(PN7$bGDGs6bC-Qy2|KS@F_78 z*cOtH@JO@kdo(0Q_Sw)q7PJkDIuFD2;10*V?-00Ns<~R@&Bu#p#8-U$MR<5JFtXXi zmy+hdY+&eVbaUe&SH6=cucD3!Y;$Y`N|s!n_AlQ45{76jU==ZANCQ1{=1lMScpA{5 zCJ93%d^^a=$yxmVBRD0)UXe#`ac2}p4xfF=`FH1 zN1)hZmDZ$Y3&<=A^JkegRlblh&fbM-BJ^kuJkryjmF_8WV34pf@+M_NPsCfBmsG zW!)gmAXQ`I7(U&1FHvY(T_PlG`=20&W1hF-Hd%3`9g>Q$@A`drj>|_GxQ7Z{mk!)O zvD9(P5720Dei|Fb>wut>3eVN#D^T*@zkg4l7r^IDlx<>e#DsPJTr^R-$3b$bJOQOz z$73mA7oNKWyujEQyj?0ud~c{h4z zouARC>Sq6zCaI_dp2t|P!zd*hFgmZM>07dOL2XmmchdNGI67 zpcz-1<-GBYaeL?V`TaU685u@4`#ey-X#(fkioz0+p&LPVzyA7b&R(6X_|YnDU2zS@~Le1>dwvu-|j7-@YzmTy`Ut_*aO zvW~T(`aRK~;#>`)>WgOq*=XDA8mMLg#03-_Ic@gs>*t{w9nIFrT3_uW8OwX=+tXMB z&eSP0?GjSDV&^0*H~KpBTJoro(uqerY1!!uQJjl|P_a@!sxN-8ijY9iDpGI z64(j7Jw1mI$!mhZLE+TOkh=6E}unYJHea+*KA7@*0I8T)LqnpKcb=x;szi#n&wqi4LLhOUpZFR zg+!0)F`)lgti>BH*8+B|V=pW$bOFePA8W}L7#w`l~akv<+8si1bdMDj&K%DEpLMTtP|?C4`QusMt-xqIJ04 z1<+0cisW5lKWG9WY0w--S%3NET0}oWbXr#8Df`hgTE(Ynpx}WtDMf~fR=c4n@8yr& z(5b}n3>{K--e^4kQ0;B^&*&fn)9i;V|CW)~b42y4YY#O@B=1|hCS!Eiu`|#9G+0>R zms^eEX?O&ldK*nzh)>DA)6a_fF=!7A|L}w7{MZ(ry9gUS1 zxVS8IRttO8z{=Uv<8OF$K*rA~3i+1q$U9wk>FAiC9!g67*cHW8sE1K4&H>+6`VSR_I9#5sNKluvbOlgM5rUJ z*%GIqaBtov$kFLcwdxE_aof$xYFv2&S2*(60)nKaOH{pSs1Bf|@SShB$RdYwKnKOO z|Mb@nK6^zQ+Qlsrr?t&GsFq>cR13-iAMCmy_oaz41#g5U7Vm}M5rJC8YWhzxU&B~0 zKVG<)dj9 z>-;8@y(1g>9;vtiWb8UQ?=d@~Re+p}^KbI*8J#lxMN1h7S6Giy4`p5#D4JEe+PZS# zl%Qb4v^$#m{7%KO$|2!HNsAS;XPky&g9Jxc4vKiVSuWlxojq__np6cM!E~p=0c6#I zXb`98+kbzwxGPIfp}$nP8(R5Xmus57$Fe<1MiebgdS4DdWjK72%Thyux;TlVUo{;X zNFUp5EAPJ7iwLNMf4Z%L@rUL4+Su3#+E?`*Ek)@$M{-yMjROFxR{dNA$@SIPqU>4tVFzpk)Hqa$3N~BQri=?GUH~(5%+1^CKsC*&>W*e zC^z_8#pE=#>@t#hf$U$q9>Z}YhM?NrPsnR7)Rc2EMrG5~6GtisU4DbHsS*D9jZ`i` zd;<35>DyoK+qV)*g|Z)7D8E-L&yJEOrHJ6JLkY!KTTUn6DSfE**6rK%BwPclA+)&& zDwiE?(!4V-JE8SDpv11yh#e|41%-fFnn-~mWuPL>H7}e92ztK;Sa#i5?4mxio*t}^ zTNZYi9@O=j@K{}pnmJQ-N!bE2RQ8^E9p|>~6(wb5Iy80L57b{|1f%>R;+)8SW=3>$ zq3pdJA+-rvo$T@r06(LFhGNHZynCm<=uXefG(p)I)y`T(V0ARF?6N&}jKwG&Q675k z^~I1&vr&fn9&x+c@|`oAQ}xW<3=-A2y!7~2rs+y%^f;e{bzc$B(9670ongNDq&PK& zC9c2}OZIH@_iX&FjaV2aE|O3{Q7`w1SV60mXdpjv_IZf-blrtGk1X(vOAvw8fv`}u zg-|_6TBvj0GRGFBy4m9=WIwd=u&by4NU`MEffkn6R9*fXWBV^8Cx8YRy3)qBXAfbs z6!gJF-CUc<56`PpmD3bPqGFv%wD)TkOp13)(ryxMt zFE7}Wy`ha4=o{z(-JbsEy`?ih*|=1{?Np*UjypJw5%Dn2>xc3DDc>6(gAGGZB9#Pf zuL-7|8i`~6*&04vAMG`?j8aR|9a{TN?)H2IcJKyB!J$)Lwm-FGQ}OX-6`IUbcIU~L zhxqs|^A}9}K{zzp&AxlL*|OM?MQcbnE&!3j9xHdC-8L++Fph5;?WDCdHp#YIxm%=B z@PWY7NeWw+pkHN!MHhU1ztOpQ_ce#!mfOM9fHwels^_Xv*Q{F}^kQK)=GmJAaw~$B z3AuGamCxa+<4W%IgwDaR7aM{_)S`Dq8tUO2LjBMvi%9B6WB-dF5)gy%N?Y?FYarZs zHR3Bc`Vc73C0W@zu9XgbfSrW;~YZ25>)%y_Sz$4zEK&G1?udZ>~jcd8fmb z56sqRSib2~Q)RBVsLafr4*8ytrlwUH6A+a=-TQb;O2 zRKhK9Mog5GQwou^Bp625Tr|gfdt<;gT7?j(T_p90{6(yq3_Zt&2e~{&q;{%(mFJBrxFWCl|1s*;uLn#EcZfE$}Y(V#3Z+L+$U%BybhDlX~eBK!%M8sz=N(f5Ah8t3C)}F*o*919gy+u8CB$_lOmyb~mO zOnDd3ki%3EEV``DU6xgdQH}J^O-u;j-hBT0>ue8z3^Ed>_$<25A@wx<0tNaW75QlA(t!aNwIRw0O-XJ9d>BrE$+ z?+#X%LsUpnQ4vXIpFx&4_qTZb29)X(XZR%+M^4ZPX%axJYQ$6Qw6vhkDmaYb@-e~d zv@PT&(?ueONW|FKI>zoIOFsvcF10LeT*$^sU5)MH*wy#OS-ho z7Q3WAQJu$&IX5$tt?$UO3oLAlBueidv2f6m<(&MYy_R@H{`RZDq71#YoVy%5?Sm-0 zMB6+_s>R0^Lk4Tstl0#0m1cs=%#5qe08)3LpAr?_g-ByaBqJmujXL1g-m{-);L?o& zoy)$($(Ixqgdo#1Z;5x`neyqJ#&3`i71 zLS|2^l9JD0z?)EL@o%baby(hI>3zsIRpW;rejqu2Wd!q!OG-+t^05kux=FX`a_ZSk z8^S}|1O(|7b?!z>^>pgIR_^NYC;3IxuE!9Rf&)dcg=my6_eD#(5O|qmUKN5U;h#~9 zLftW{(>y^fbG$nKd@tKZszs41KonU5(dgy~FZezAVeL=Gl;x0=s#)l@jO!dcb1KxT z%VIU)fJezJir9x)dx)_+2`8LSLWU`{)F|E9X*C_W8~PFg-a^&mJw4ao(P(12tB{^Y zsN>N21r=2hEFmux)6djC*iRRcC4qi6eX9B1Pea-ShN;()FLN53&2wY?OTx% z8Z~E!DVN!#QOC3GuA#Wl>t8?p9q0V}@4x%JeS6%iMVDbn)Su@-_@R;_lhIF7eXx?D zahm3}%;X!nUWPip8-@DvT<8gl*5mXg>fG;sPmmNjz2xq1tbo9l&?-cZk(!~#Q+qr2 z{j_e~E|k!k2CB)7O9b7ve)dFGrvX%K8z&{}MXmlg^w`eN1LK1dN`C2p^@!JEDx-Od zYybYgtRk}p7(_M>6|v+CTaf&ZOql~DgpnHI;^QN}k~?+;ZAzOuq7-TOO|&sdqt-43 zvm<9`>(+dCOuu!nuEgW^=x0)4XnAIAvyIWxw~4sG4KZOwr-~d#LlJBhb{Ns5 zzso8r=R-*ButM;)oR5tL6-DS&*q6$l%%=|@KEsNkXts~-cw(3LdxdXXrYR%4-DsyL zh{d(JsK%=5d4dlK%j5C4az9C?1u*ZVka7g<`gVp)I>p6BT3Cb1HvJ$mp;+)J(3nvP!nUxc~~9fdPG~E%L1}4ajYS6 zw4^dP;Bdk{(?Fy~WV1(KfX0aShWy(-o@nDpEK+91yCu_#PjZ!;Cz1GXSYGe##uC6P z6__%{c~u(`S;0_^=hQ$DFfi0rBy8jzS_bgH=184~p0+pp<3peh_JbXy60ejFVjV$X zOlS7jcuiL(5*+Ily3T*(~sOjOX-j4K;Am#zpW7 ze4q^TdEIt&Y#}zq-n|w(yhfJ|iQA6yqFYp3#dOobL<^B&`EtUJ8{Pv>n98 z0`Q|lvE}$wczF0Uc0-Gyw>iK*_Lil$M?yoSH3F!zB7u#o=UN@=+hXN+K(2Ui=y-=( z25_VjNIR}T681My<(uc$xmiK1V>S6#tOHmk_Q!BrW<1I?4`dJ|qqX1^)&s95!c<_j zdw3OiK%3@}Qi|+EsIYlub+saVU;;Y9@XcFFo?AK|#pmaf_6|m^i_ppB#Dp?R8lKNF zn$^C&3BWptTg1q)J6}Wzci4L0#ve(e!j%PcVy_c#@V~Ow7}T1anj+!yY0M1`0~2MR z9585v*JegAYLAbITDF-w$%;dZAh~tW0RV8%1yYVqf(tymxkZFt0e>%f)BWt6b=9osXDFIj5 zhA$BEBUxhNRU-O}rx1u>h3VYa!|_W?OIgT3;l*8oq+T_22Y9oKqrf1LRQ<-2tsga2W53I1;7g z&__`Eo+35Snr!BK>Ad;l{4*|X!05!l#pZZM?hvtkh@Qh>jLmQmUqOxM*O};DkCu;Q z60t}Lp(OSU&1?Qa2oie$1dYjBCkp2K!%pA%_O|$*2nq>_!jYIK3}E7}Vu>XHVGvmW z*t_SB5A9`I3YC~e%x=I_lK4m-cN}du8MV>M)B5olb)%q+MVZG`waAuzHT~=HLU8-U zXeINqs!HOqfcYvUseyFyMvk~}$QU8G>NrS3JdhW$SZx5GI-`(3(FN(3j|{Ix(~Bp_ ziaaS?K2?A5&Ye4I2s$GdZA;Q!af)F#hEp4qTilAhw&w7ZKaath8Ag0QE9_)8PZ8lZ zj{cyHHt)ihJ|W@&o5>tH>EnkFzrXOlK2E%FOk^;T-=OxhVx^v_4nu>^eT7v@egUW( zd7MgE{kS@1g!CXlM`PFM>QF7x8JFeaQeQxuzo}stsDrl`5Sx0`R$fyxxNu>Zhr|I1 z7Q6T7pD7R0P^Z*EeudDoLC`elaf+Ed4qWJWmm|+45V;Krn5!TGP&KbFOWXtpB3ppC zlw^docqbH>G{}gma5!I)Yk(450lOR9g|L4_riEddE@Ln*cf~EpV895rGU8L*omiG} zpRuRu&@MulOj}ekkhV0aQe;Pv0arAPl*L&60v9qdLvjdkYvB^TXmrPlQ0O=6&(QS-eZ!*Ygzo!AT@wg^(Y(wK8JRl#})3U^=VA){*&)>om@8{TY)Xb zAqR_(jvw1_>L8xjYoG6S@A)z4@JJ5DVQi$-goEgxgRe6h&4dIj`})X0yw-CM(#LHA>=jDU{DB177W4^FrYdn9!M90 z6pgR~&)}6G&Y~o2*u1$3;El+*tahjV`7E4`aDV&%`{aYm>)8JN(|`ZP%KCrxbcFx8 znE%ViH2nYULPT#tjl)C_V&wO>kTW;V9+2uH&tN}KIUCE%PgjsBFW+8#JjoWL( zsrjzn&G^<|qXS4&lC(LQ@*{Z);4PA&`CaMEE4Wyu<{`aXSnejj-Pz`rx0&T=GepuP zfGf>z4QzcRo*N(+5UecM069Ab)mIQIUEG)+BcyX>s297>aN2D2nA{E3teA&el z5o=N>c~FQyy<=mk(9-0Hi=0{D3%}eP_EI~I(BRAvBBuWI!r`bB3&45e8=$aZF{T7U4XPeftFGD|cK@8mlF@3igbv98O z4xOqa`a3e1qbaTtx3vsB`9c!DJE&kL<+}=6-8@pXG5R3Go;d>H7&8zW)(6Bja=p1_FOStU$m6 z3GgC$B&yLYH^#yuOHOM!OGRY-JUa4_FXn>^IEK(%P~d8^XYzo1q2&9;;6B@YBppVA zNsA-E0;&>;5G1I?l4SBG@7#ZfKr!Ee?BgZk!6DI1o-R~evkwn{Me%RvEfd3R2~t%F zI5vbhfA&sjYUEjBny(VBDMkPiue`daoptjSo*onKPEI}_Zt3N5c?E^<<6StV11eqb zP+s{SeE;t2J}70)d`$e38zNl@nRy*Ke7L{er2pyKl*1l<#Aci9xQ7 zlpjv|FcGmDX-y|#C35TsX4**5gixFt&REp}pVMT~5@Uh#xKE$fMza4oh_Wx6*mrZC zyjrlu%)JAcW0--9o`eZCjBdvY|4h3xpFy}Cq*!@`(Gv9w@(9L-i$limfV`BEV{JrV z&{@1juKxT8f?7=k`jXTSHeLDD%yZ(0u}H}PvT>`uc}b5~GKW%1QY4f+Fc3h2c!q9l z-Ie>O;|G@g^^h#~Km8acqU1-0LIaK}A8ZNSgUB*K*(JZRJSHD);;Hy-FrpZ*u_&O+TA)h`2-Rkl_Lr}&%t;Dcv|fQBPt<3!rN|0GCnInd(S`^L340NwMdU2N z^P9k<%@?Kp$WbpO@tlX5yp;6Oj$DY_5e>vRM`W-9;}mRl4Urk z1ws1-L_CRz%zTT=A`YR^F9rpHtN0X*zEp(d&42{S-O50?DnJq7 zzM{aU%t+U<`d1gcgh1amjnl2y<*sGMc}CdbVnpxN0MDd{26d7E0)AxkEsg6f z4fP=pWFZZPnwV1axqsRXJ7MIIjV^-Tka%>;+!UF4fM!n$D*=6&K~a^JE$8Kr!ft%_ zYu?wjEWM9$2!;5!Pk**&nm$E!CNWPf5Z>~7K`4&N=%|FrEublo6BaQ0D~Gv;I(hyA zW07S@S}Bh{{@ZxTh{dBvj}YmC!0Q)-FT0kIF8&iLVFQYbNx$jfq33WUpsEfX7W5;- zE0CPuz^S{02|~gSkQgRA8caR=bRP?ONxxv-O!6w}U%dS664J6GV;x;245>TpeKVhh^FDy__5(6A_1= zo_URxPEtc;TfpOwOi6fFPQ)&o@83X* zlK8K2Q}56-F(E`E?xbX3@LArE#B|R1kb~07l~B(d(hzc%_k&oLTns7kIS=#DKb<@9 z-Me?klw1+Q($(Qd71+NGXQ$n@K=W(7{!}~;T0$ZQsYvG}GYtbwH|SGP{$fBY%}_;~ zKPM=*N=r+(S7Rm+ld!=9>E!B&a6|eKoN-gGS8_?%a(*!gd-O{=;fl@{k~q<(;#=QB zMQ!6aaPuEdCu2Y6Qmww@nwNvJqZ#AqbtDH+Oo33%DTkE7jnvdI(t?N!ULFVaQGBLR zRlD#`E@YT2O!PqK5jJRcBWF9{NC1D10z{viKSQ?iM60+qVbur<3DVa(Mo|vYU99&=Hd$n24`@ z>j45TO72P3FGwTydM zeT3@uaZUuW4ao6bB<6*}+5q^VhDlc+HnEf4Mg-tnY5co7KjEMX)E{j$TM}bJxGxrP zCK0+W#z)C{4;*qtn}b+j>$VIm&O!)cn+?2tW;gJ23zAk1Za~;vEiDEB0ni%H`3c?l zxRd!6!iG6;4%wklkjR`K8qXziRs?b&sb$ZfH{LJB^Z-fM5}R1A0dJXvbA>J2)yP>B z1PM&`%8^sJtn(*4-7!ao&fjaEjLZLP^x;2E?KDw!0Fu9lT_MYm2{5D`5}-2^WJ+>i zmdac?qyrp-A+(O|zqmC%1YfK_YU929FJJ#(K6KEAYmmk22!vf^1FnGkY&gw|D}PxB zSQ~?GiE|8c?``4iCijXFJk{{2p9oCDu1^f_-;#AUz#GA-7s0`rb%x|GNS(*QO!&@* z(@)l%!wLl@P`q8UhXZvMyf4WnRIF}|I^U(`b>%pZYr`q{&@3;%lJLMeuPfKU3Q+)) zxoVvMy!g-0s$aHU|9|9J2Fjl!S9CY|#HI_LhOBlw@3LWD*}FvL`)zz$J>+=SP>Ni` z6{lw+FOOG^cl_~>@yLGuqg zevtYZWs~LX-*V*GMT{$khwGBRURnEJ*E9mQON1zx!UR89TU(=7ZL`z78}Z@=E18Z1 zdm+652$mexcy9|@=gZdpj%Ss>zoIK|kw?L&9{X2M2Wwi#_S>f`?gRgIp)k|^V&EU5 zqh;KU{-I%j)JAEHQYE3dpP%}r_s^{obr6q{&UGi;kX#H(3M)H%DyUWh^vt*+aOD)J znK86{%pRKKXa#4slVqCn04_s0*cNZ&?w9?-jz{6d<4Bf`r*`jmn~xA98Uf1{*?)3K zvVK^Q_pgS$80Y(^cDkcP_nOdM4n$OPND?Q}1nLUrx{`1f6ZgS`ug~`Ij1h*6dVTc3fpd^3 z8?bd@za`^jg$kTEXSD$PA++o1UQrdo%P^$Ki4me>DVgA#TfFv(yQmRSLqXwViZ!Nq z@A{B6$;pYI)B+T=v3cF|$9pR<7lO_~%2Gf9G9wgiI`+h#W-J7#)^^*i}05H7K?tVaumoN8| z*7fK5t z`M&uicSR^Cyb*}9j|=d}vVP>WGh$7oPZ7dnW%ujdUdM!>@v@ zx_p`p_qNdJj=$jrsx0IdV|i`V8+(>l0MDWMTbQuw zg;&4i4`=*qoZ>cT>es$4efR#%zRjTS?cO)}@rR*TP_Ncjkc;|wGl;+6nIzQKaLk?{Q&9>*ENriW@1d)6yeRm6T}jgH~qG_R1itzcV` z{NA%zg^G`}I?pEeXL)GyJ$PgOHRc8DjBdJ+hRyoPpJL*_?w!D6D?fSU$PQ6au^qk| z60A1DH|x&ATWmIHlW)LpBImL`@bcn;{RS10WY&J#%KkkdU=ofLB@ynCG~kA(gUB*D zgDMn9e{^OctfGdTLUI)$4&<03Y*5qPOz=iLSs`p%=38)wDx>tHpbt1G;0hwxFs!1X z<1j-^BbCvhicLrvBsu}w6YM(3TJj`RsrBu{r-vX$DAU}KU+sn(j#dA@dpEb-<0r>- zQwmX0j34-WXNHGIY^deg_c*tl?RzJ?#ncTC-_Xg!|6Jsmi0tVM+JDD+-(}45Jn#E_-uL&<@ALaUAM;4{y}!$KUFUfo$8jE) zKafRWqSPTpCJ-Q4jBdxG(-w01tFRk6>F12vsCgxf|W?@&!!{PJ3 zlRAfJO>$TX2PD^IcmOy&=I5&n6cYPDO5UZWEEI!!l5VxtNU)R9=1*PdmjHemq3kC2 z3k;KT_!GdU%wC(4?|%`EptGw)PARXh4NTcs-C-bVM7#_`1R| z067lKvf5x@q(Zs|U~S+bjR(5w$^Q;A3MvB^Watpq!#nLy@0w$`efb-R0Z23fhqp+2 z?c$crt$^9oa1NS$WcEx*2@<++L>amp)(21s$AN6)h zf6T6tBg~u8hG>02EEIXJiTMGL7#){FR_h_htvC+JBFVcjI(isxwVDW1R0~suPrT#L zKi_~$Guk|vl%@Nu;R0P|v(gh7( zI1PLOwFA%4`F78vs(^=6^tT#KvKc0nF;8*mmoq$n=@YSXyWVJ%l+1DN8y+ue-Tb0JF8o+Tq)ZS%u)= zhX&j>j02lDP?t2FCe~M}?uO(|ZZ+7S!&h+G_=W5uC;YoW8PJNk*S?cFTv8Z%hH3){ zBiEwxz>BK@(zBXLNIQn{J%oyO;3w2llY+)X^4Zz8ttzlfTDB= z85+JLMEE0%0UZq*4RqkX=1x=q_4Xb_fLj3Mmmpda!=M8#1n;z|P5&8G&=i9JPbbhB zvBXj|AwmGg6Qma+)-6AO-GMi_0iKzgEoN}*)Xj~v8)vr(CQefYb$*^4S17{*ER5x+ zl5j~RsgTm_#|pPuK?d%gkfs5~rQwRDH~(!s-gwVH_w7^O;d;!gHh~|FaEQ?HSpz*i z%DCFT5_u6r$fsbu0Mwlfe6hH}2sjStP&lITIm|e+Gi}8mQK2O0jfQ~0Bx8D#j|r8D za5xJlGY!a$yj@*gt!n^b8RD5!J0AWT0!ayZC?NZ&_-WqC577KHwfBeWmjp8r6}+G; z2*py=ibR(z*kPbVQW#_(jq?2TCLbuXShFO)^r&-lt$Vea^GF(1OUAo@i;d4IJe$L3 z_Pu$${>E<}3A8RxT!{ti>#x5iszePvflRTvABZYK4LJlM4`EDiKSWGZ1xB3q_MS3} zOjZfj62D9ejS3o27HhE>a|j4R#}yU(AChN7lwq&Bg#Fe{>__=Zr%#KU)+Xlq83caJ z$vfj8xvfEFQHv?!bh=2AJvn7LB zKo7RMt8BkAuTj0BKTD@eNqE~KzBJR7_@SP4wgQPlN;K`U?uPD@VvljZ3TsWaykB_bB7>Mx0S1cilunJmIwRqo(UH|1T7CDyHdn=ie^`Re<~fzf6S5qc>Z{#j9Uk|BkMMbM&a(aGsf_SGZ*HrQ=z5%fv9RVNYzL)1pxu&Yh<+xy8vS(b^!G4rkP zOBhlSBM1!0_IHmPe+2RZl9Mhjf)kB6mCd-*GAxkXf{Kub$%5*BAC>s=H*h-uBIH?E z;0@2u+`SX6CInN5t$;rw5 zhR*ltkX>O}yFiIKUipQpm6eru==E#YycoE_7R~*ieun-sGoUung@E)wC4>H06P>HMJIt^jje3uM5P~hhw6Tm{`WtFrl63qQtVc{ZCxX5LadLQI}p|`9oyh3jDG4E7Z;w37cYWA)j=b94cnRh{mPXvJr4r(9)YZy z6%6QW_7c&(L`#ry1cY^h5)y~e#Z>|9ie3kZFhz)Cr)BE?;dAi*P0p`{{S>MXC9JR{ zP$fv7_-W)SeW)WyyGvr%=+a@lcf=X;VWGr=LXp<%zj8V{u6zHdz&BIjbry(aO z7@_nK(Qww#NkM8W_&^LfiVgz)K=OivSVevZ8lZKWy1PXnU&C7Q1&14%7Pg^zb|c^k zPClTyqs!$rR;Ap9;Q%G>BA)BnokKQ(Oh|a{O2P-I^TDX1UHbO5?HYs+_{=J)P43J;ajg}8(VYNV2 z4?lagm&Nj!->1M{z3H2K*(U_xF$kJvWm~hzDl$|7J068*J<7r+b-pL3Aya-Yu0&1SNW ze|vemy?RuDy<)85#jssTk3D&%Hk_QEzH3S2ob>A)M7h=_DJklf#p%rs84*D0>P5Cd zvwsf>j3MeIVkzNHZMcM35UMPZ&L+s88pG}dC|Tqw4I(L&mRk$_l|7LO^f>}CAzqR6 zK$HzZKIELAzaOPhVa3yu)f*SjM%hGV~1sK@o4cZ+jpD@JA`2gAw!#ZS~B-sKb-?VQ%a3Clb zeXM$YRv3m<>Vt44|MBRrYY;@JGQ_SWZygq1*^Q)79jSFp{<6(_SFP)i5=Q4^`uKlm z*Eo+XLm}=VyazBehIn{RSR1tY*so}%5ZngJc)Q60jkc6Ql~wF@2(T~om8*dyn+(i-5x8S2 zQtf?^50s$uoF)iU85c_Kfut1%#i)+Zk`mXCMPHsp&XMH&PMR@dTA^n$tbbrlZcy}& zTotfhb!CuxnQn||ETpo9>UFqy+2eAPMa#7GN1YRpv&+u6KpyrEESiD#a7EiJ$St&5 zjEzD5!#gMc=9LR5U?Dht;S+`3@gwQhyu6Diw3&P={ofi%Mk zu!h#1ZfM_^;DsCBgjAmP(;x%cM|NN;Bgl{t8-x(bLjLQ(706@~t6II#)9;x;eNy-; zAVJ~p+i^!5^O_oufl41!I1%h0P>f`%50x(&6WaV-0D4%%fb}h|DSKi1uCA5P*ATT= zw_>1PIQ@bOhxp+M4&wybN3SRpv=!L`hPxSh?dsJsNY#i6jzD7`tWK04^g{lx4JC*l zHt-7DZguON`q_X-Iue;jAPBV7^lYBb$G46NSnFna#t?hrz;P;NsUkYD0%;1myp66kGAn9e!Sg z`G=6hVB&OyveCyJ8x1XvMoHmVP3f>#A=GP~&Dek}J(0@7K!N30*6|mf#2@Nj_2(|- z;NY?eH-^^zePsZ0eoO4l# z`0Ke&!r^`X}cy$5hI|DHOh4!rh-mFA;EWY`1I1i7A890v{vq{}2W znl?rvz#}RyD-v~o%{3Wd?{3Rv?yP%DG|T1hEnNM%Pv|NZMyrg*Il^3dvjwPzguq?# zYrjdjg!oI0hxI5$6U?}9Ob*R`iX4g8`%Khr7H87!-opHN6;$H8u+$N%Kz4Vf z$s``dJS_uaO{Ece0DB8-N1{q=2YD^-wh90e48zLEt^jeNEy(pdWuRsxP^BgVaxJ1Z zAW~jErHht*#k%Er~EAWX?&m0pRTveUY6-n&Z%ciT+72Z3Ml9)HN6 z*5>b&-k@QlhM|fi;?mq@W}Vk1*}FANaRVZ@9j@oUa4&zOzV^jH z@e{k+DLk@W`FT=@ju(Khc744*Dt*@(wykhiEi`Zzi_)iFzB`h)l(UUe;7==$(=c>j z%n@zyaPqpfqX8HGP$6ZTx^aLR`bqBbZ}$z9P2e~p0rM@E9*&0Pir_?>2axV_pm$)* zXD&ud9Rg~JBMGLb#jm3CCgz6({{F!N0@Nl9BY<^G3`K8>JKC_lCBFkwd6~c1PKrXb zz@+%60g5NAt^OUj!R^=ia4y*{E)nBSo2Ibgf7#gh&o@B=Bc$tb4wJfs#K+y1!<&Y3 zNFZV_s5u6MMAKDn&z^P+&JsY>qE@X+j_5zK>Fq}ujYfjOZ-sF7{*Z^=2q&v;WpB9a zq)*kyWy`L>{lH0kH4_2^ftLm@(M&1z`ntNABP)QU$(}*Vd^dbU(;P$|>L69XMhb*l zDcqJ&V{vxp09QC|9!f#k*aUc4o=LARDng>hp@~u4bJjDLodsB+h_;9Jkda$k>#y@; zpb~ZnIfyXQcc`zgPw3@OxckeCEN1X7*ezrmuEWj>_MiK0F_;Y$Aq!6b6W;!QT24ef&!jD~JQkQw-BO5$TAGT?%y(N=D<2|MXt z1RCc+!+9NUVIy9`+BIe)5DF*Tb{|D`MIv=2s+%DTYB-Lmd`i=IlP>xDYO|f{&&5sU zwHdrI!A<`}_Jqhc2>FF|TdOe7ML)o(v^NcE5PkiYALby_3p_;py0WK*EA(4T)_3Rx zfsDTPS^yBIwoy|1R-L?V2viR3hPgfGFoE8j{s^728a8}+^7wJx&NAHi;$HM3Iyh{v z{}}Yl5||bG4nf2(;N$aum5kCtJWqs6ZZ-k~A-y-1AD52jY`FTvoUh|gCLHQX?k33c zlpC}RI}YY8#coHj)KE$wQN#F{{t{TVTf?91AVhl_Xz<4dBPqzp$Z35D8>>K?5Dm&_ zfl?z~3!3OvP}1e{DTO!LcF7W|Ejv%ybL=@~gn=*JC3(bqftE$0?nezx3UM;YdxM?O zj-SJBX)$4U&h10Z6bJAt41t?Gx3QHF)4RgPz2rX&4AJ5rnsGyIlJd`9t!u@+-b3ei$>%>` zLp&2WAd)L1ZkGbdLD}e#pAVoL?gK0al#?6M9&s-145r9v~(vdt+pme+~<4jHqL-5;xZrv-upYCk=jL+{(9rxVVUAA+r z9MPaIE<6O>AZsHfCaf?-OzR)0q34xSN0gzQ{gt379GnNGJdeu}Qs_<`0;zNYjBfo; zRd@Fn6y@E!Z;`J4^Fu&@Lva57PydSzTwQ8w!1@G2MMnAz>V-vIg|vX^O3vO}gNRQd zSjXVCE(g@S!g;KB8SZM!vXz*^++&cVKLS*kLYyCY5MmN=#H?6xS*ZL73WR9^=we2YV182Lp!OCG!RF@Io$OD&~W~iBTt@_XiS5MaQ`Ej!N&Qs z`L`)S(DEFGMbm4G%~GT~#_kvl#BH$Z3LIbxe~G}rAz~f5x{!=`Ynbiuv9A9R@d!zi-+_7bKIw4D+%xejhLEsP8UP>S7$F51&K!cF zqy`pN5-(=hfru7hVy5Y2{YJn8=jd_c>cha~9ES=PDTNCy>~Q{ug{mw(eJQWQYPX6B z0kkK>6&K%#mStcfpr3(_DZ&zEaG}iP>2mnEgGpc^>4IalGb)CpA;?8BPY>Ms5T-f< z4twkBo6bTyj}{TlwSke?Q|ZwwtYYL14c^9boj;w9R}w33#j@@6ciE#svpApe=V%n< zo`RQn9!~}hBvvE#?xN-$7D==-+StQglFSlP_x#OBsIB<-#PZwwK1a_nSp@IBMQE&e zg*2VIJ_&fFt$3UvSerN+#eIID^$!S19&qw9cm#q!A#E6k$}f)9Z*ziVT}bZh^uN+pZ{3^E~bZ#7fc^RiB=ty9Rf8JzYy{CefEM zb}Iqk&6{^4V+yUmbhzw%8u3|L$@!-rl)+=ro_&CmG%^3FgA`&tg87m283AE!D-we= zh1MHsY3a4FbQ$Xn7^m$j2EP^OoOa?rF2CN*S*S#tUcqAcB4CCXBvl+*56chj833Hv z5Iq|t3y1G6JuxJlAqd|s<9j|oEsuZ_$}?G;2|5RR0dAN*{%1ZX zzfj@Gf4Y=$K4e;OPO5DA-%9qLNsNFpcV|!=e+2}6#226t4x=s>s5NqivnQV`=S;jy zc2mZSTwyyf4}_f~gD%SV!oArKz9+9WH|q`TdYRej12BDSAMbk!f>ENCYH* zZ}BnIc8<=fV`8-5FtF(U&iqPhCiwl}U61j)JnY{I`7~K1jAmLK+P;bj-8NZBAn+pk zWB+4+^Y@cWG3T5D+jeKUv?I-`0OAnSy=&)AD*o`!G+kk6C(nW=t3=xkJ~2?I-3<96 z61{xe_?;lfL6NEn>_08>Yn(?m0S?3_qt8E`T6TwEkM_qH&7?h-jtjtq7-gX{Zj?Ie zA@TTO&Yu_%jek$lAf>&*&`!5lRX;fYAOBzo%GVEI_0E3vuFN|E35wYk2V6s7RS;>@ zxQEiWyn_qFP5#s6jPuKN-=5AnPuQ9YJ5tH7 z*mcQ(Xl1NBWW7W%F)|JBFX!8=ll?KQuY0AUahxVl>9)d7X7ok5qM3X@7ZJr!F)8E& zH~|)>ZuX~Chb}|*M&pDW*hx-Q2TJe!(dH{u%H<+df!m!A$^t^Z+;{cgM}qgOyEN>aPZcDBo*z^@GQm+uH+ zJpSq@7SFslKi&Oh$|~`i)boLweodACV7dJ)_x;b!{2uQA+-@Y+d9ZEk!RvPo3_M*{ z7=8cK4+mE&{xo&o!7I$>Un9@4WlGfLr{=xeImKGrxV*=4;6?EZc39qtgJq)~6Kjp= z;V(Erm@ZoO-YVHo(fR-X#{c$_h#p4UPA;5p-2wg+9A9l5_fYxZ!5a_@Ha?XZqdh=V zs*TYP6d>a8YL+2n1JuS;swgQ0h_UMDa?GPA%O~E5=wZ}F`bDs)sk5my6sr8uHMBm!(sK%q{B|%XwkTn)X_}?;aVOALZ>W^E&fWelyNC_y z_n80J=(Ojtk2bYqIl#shSYma@Q|$UOI{p59d|M7&3o~o5(7YKtZWkcN-?Z;aUjvvv zcU@d23uMqHnCw_U=a%rxmh0Pl_EMUK{G}02BKdUB)bb6g`GBUTCfF};vi{0}P=!Y; zE;ESul&XW2{T^|6|AfgRZtd#kq8y;wo1ld~f`&rsO6&o4)8sOBPE~7-h+~^P3c~z7 zopMX9%jU!Z%d4`>8taQ|d8;!%w5Jn@K)!-ly(h55CO|61NE*mhgpn)aJebeORnyS+ z%FD|eG6rtwFE5Sz=ZNDR8V~cQG0ia&YtQ24|$zlM5A1PXgTC_^oCJ<}Vk`M(cCQ|lyXHXX@ zux>#GLbw;}6b!|giMj7oS*oh4!Kf)TfnXt~`l42FYL@^h{BdI&c1=*V`ru0Sz=?^shu^cY@x|IQ)vh(j)dDzgD3H4JWTvX*Qwb>f|Of_L2Y0<9T*3&(G*V${&ioBP5edFaXyaf`=Y-yYAr8)*(U8hYoe?l2WKx z00o<~ALfda82~1rKH9D4W30H;|HuQ-=$U~-jYnfZFo4-Uwk+_1yoJ-kx%cDe=T~5F zdQ@7cC&W|Y(mjFpnby7oekZr}GbAksA~1+_@ow#~1IgyI?_SnfEb;`y8e=oMYD~^X0bCJ(_2YaN1Gr}Qo>5?)ds*K=z43GbFi~DFx|jH; zwIyqEWeQ=$#It+L~z}Z5Sj2;YF5SXH;OQQjs zWE5a4BVnw@dGB65vNgbU%D%691ctTho@pUHA|-)S=$K)$(CM`xV?RdT`MR@juJt)r znj_=P>WeWqwWz~WZ@>OZx-Hnl0(vZcG;xBK*H@GtY>d`nOQSfJ4^qm3e&Y_7{my7D zEP<&W+)J;iS(6(T8EFBsp%F@+{b)9S%b6VAP6HXfTa4A`VA1~OyX|N7JwerUe7USs z)IV5yZ;Bump4hoct8(Gx5P$-;87hwgta>H*bIYoPxp<3_Di*@DUB<|RFU!+RtvYGi z>CuP`wn*1U+Zzt_!)-t!P~`YTMn-OjL4qdqF{nMgzMjAO6q-z5_|k6m9kTeOB2Y7F zZ8dnQdRY5*$KZ`s4cg@MP$&5&bocD{6BTqGDh=p?7jsB-^m??n=F*G>d=QEm%Xb7a zht`o+IhmS{TN+2s?8eJ|uv8o1T>N~xCd|d$1sX?gtO;<~1Tmcv?9gr`CMNPby!&Ob zgq1#s7l_mqPUDvUoA9Mm13eW>?{~eEY}vlE!IB;U9c}UHi?B9R~;;r?XVS{ zMfQ{#CRUw3Ssy}Bb?V;hzvm`l>vdV_FWQZ*_`cVz>3H1FM+m zS+OWp5L?CUf_fLjAZtb5r()1%eul0~y9Yw=N~W^5_HE#%qNgcUl$IERaN_IHZ{iTh2RS?tifnL@{R&Li&~y6qeC;r8Z(bq_AEXRcI;kGy*%RxRy3He!V)1)u^i zy6mU$*yyCBq(pRB7#!9p3$?l~xgq^{S@~$EIk6vzPrh zdCK!e*tcd0-(QXeRAzjzn1bu{cV%dy0`(1gv!1_C0bLS+s5B0>aYQ z_x@Boiv~)4FPnlM4T^zu$q+XB;o_m8tQieO$CM z5&K#qA|i4#E1SbCtTp#_#;WjIr*OW6TwPG4VddCNdCp3WOFrfkU%PB=5(d>b-)>lr zjX~5M|I5f&O zM=zWF`DbT4Ql~`z-D()RQg-IWo#Ye1XalB_kpKHDJdWWMiMWJb(W3|scPKYR&OnMT zG0CjR$hBhiRw@Ym={tr&oLXSJ1U=*j;%JDVMVvDext||DOVGUai_|AbEDy1bRk6bL z#+W-Ad%U5Dh?uH}XqDcPnBN2cB^f|hiWud}YHD8C`Fu6y2SDLB0afn6>LJK zo~kuH9D!7QeCXct^3bc$6_Q*w{m|r-m3>ql%To^7TkoS~V;kV= zu^&Ng$Bd<(RKW~(=*#$H!337vpNkjhjyKa3;&U_)mYiO$-<=acxAJN795jwiUyZcA z@`|;{Aoqyh zeR*LZxe;f*zh>TuSkXJZbG&7IB!K_y%kzV;7qM?5DQ>Q97_&+6{xof?gm=$U8f_RK zcD6*gMM9LA)rp!#p27@PS1fI7a7_oy?!UUU9UUT14lkVixMThcgk))NuwV#7sd#j( z#d~=>K8$yF3Yt;USDL|*<%AHGTbv`l5Lb8(pF^!Svum8jha19>%ndFZf8)$x&H?i6 zFqp4X1ZNx7Xx+((!jr1`{0miGU0uyIGwsn~@ORA@7aiS;j7sJ3kWPLFA{!FEwH1kz zpTbicZ7?O0$_XqOgV;R5g8`AIGuACihGzc;@S1^WFZk!*h;!^TrqmI=GydHu%m!Z! zWcg4micTtuT&5zx3zhCIev6zHe|QL9i;V9&k34lQz&N>y2A$AIkr1CfuDCj{Y4gIJZNVgiqdwIisSfnf}X7yoQl>4R#(l-ODNKCvPl! z>$<}(Ynl7x(8%cqER$qJ9=O=lMN1Hx{8U1 zLj8|S-8jML6FOP77J~o|+@hE{He(;cWI~h#99|!yT8Mib(@;w0DaV}c9C?4;xd0UQ z9doB%e{l?J+SBrc0`fxcp=5 zVlme0UHjz4ZaU21(trOkbv$Ac zQ7wis4@Qg|Hie{DPUAZv6xUW|M4$@xf7lFsVx;YWa~}Gkyzssdg;%xQvQ$K{Pp=Np zLc37_6oTT=VVr&umo?HPGV1{5SjJU)9$4A*(DU~s z^W*C)Kv3*ajCoSwFOEGidm%K_M=r{Dq!&AIokC%Ovn#-WEFERRRC+U%lOdnd1?Z!R zCZ!8?+~J_)fUy8k?h2IHwh5ugJgdxT*~N~h_pRPmm|o}yTG15hZ?=zB2gRi(G}w*} ze*{eWp-F9K$qVPA<=hg*d)JTcoOQwFb`a(`cGcbB{PEE;diW<4KzYPJ`aGRwFkw9D z4|;tKvj{fI*J205O_5tmM(mq-7ds-B3sVqy2JDjqXnTT4| zyuar1<;&}JvK+2_q54;#MM;V$J`6yMg04?qhlYVK>jCSMu>N%=0bb?mkUt?ULx-_& zNgly?#WQ%~hf404s~yTrOJD@3P8-oDXovFZ`Lk7=*TXYA+<;aO7w!7LMIN-MXBvxT zpkb&)++OSE)ZWEGMY~9goeG_USzR`?qY_;yX~2>5$bZk0sTVa1HB=4+t4R zZ1L!^7$14KCVm(gRQ%?T5&Xb`cuAvxO)whi4Zs|56RNmtzmdHcvj?oRxrG~&R(asMC~*|az0`Q< zsiK&xZCjsGeNvWG6Y~|ftzcRp#+sGMAPZ*XB8J!;`Gs?J*@`KJQzDV&N6 zv9Tp~^V3DQNm~QknF>{y8h)4`M>rL$5sIclNUhpy5qo{XO@|;~z}ozAP7HTNtFpsh zioY%!d*_V$=+hXVjG&%~ z7J_E1aK&?--d&(%LhGRorczI#xzpxfl z(&Z|Q!LWCvHa3a4gA8OujzV`6z?cSu4r~^DlPWN5Nmi`E9xCrZPkU`qAmF9k}YeY5v&c`!o?mWD{ZEQOJlFz-q|~;YOq7O&`Uex z0@y5^GJ`)}K~ll=3MLDKiG(EywK{bZg1;z4Cgs6Ebn4nuc?;nu{naJDP1rAoR+_2{ zRM4SVe-Ua7t+XFtqJ=4#%{Y!#s(A90JEy_eu{$}t%z5B>mj;>h0@3hbU|)rO^eOme z7=UX&?NWeKj9iG=*}XGAkx@&I817o6Wb!{lw$m*qe$nP@Or%_(Kt9lr(AjA~GMTx0 zPq8WG1E+#e=`45%cRxl*bLmXkWfN#5fQ}<2;sr!;Ta`yF6&$HnW-$;z2lV1%!ozbA z2a}+fh4*I9{K~(g9+%puxJd0bH8owE!S7eGAVskL&Fl8r_hP%u14;q#pt6k)$`9I$ zq3N$Xm zU-E##eY9fF(##vIx?f099S-4TDOwq^jyShlFVeO;hywb-n_0-(Y{LHqBywR4NCo0F z5};}hOw0&)Fs#ZdK(C`m=d#ZI<*4_CBKdf#HkX%`oqA#1tCId;eTYq^;N=H3^d%Az ze@~z8dCnJCC2#^k{{7dN*DZN~&kUT>+3%VM7*G_OYvoXuhL%iSSa9%NYeW79L0MYE zi$;FJBI%(vfW$vY5GD7h+T2lby_-TQjF~v}6W(Eqx0V^bz&&rsfr~_iC_JbL#faP5 z>Wn=?SPYbq4T9F(k)FXu^3j6gxDcx<)jZ$uZ`(kr?k*n;Et8P?t+HxB-VjAL!$e3` zQM}2MKtq42FM=m9=N^lBopk`GL{&wnm9GPN~%=LCZ!V4aqm zoJKyPFXP0OmcI^-Scxev^;#Gjb7G?RTfZGuR(VjnOf*$KJSHk1R11LDT)m{#9Y--X z(%u?tCU-|K8*P>Im-;>qIq&~&jUKf|a*R4B7qxW~I^;+C;+(O*_WG0J4=s2)W3QHt z1qI3Dt)=dKZWB24{JN7T$jYq{6B`k}>d>@KhuJn~pEUN+y5@^U8&)tD(ZPzetd!g{ zI586-kq2YVZ7XGBHfUXhk8J@UomJ^3F_0lSnCM(^6$_(I4M}F+FSRSsgH< znC>#*R~+27z5a=kT05BJ5_%!zXa0%Jx$cC3C8|g{iH!MCzoq@@p0(#A1&rF;H^HR_ zkoW4Qri{*4rVa)DT@3Z)@maCxA*e%V0or3M(3c;?DZ_sg%ep~yEV7*^hgUOcyV{}3 zWz!LPpM8&Ns~oFNPabvitzHls>KEEJ{tq8&FtE-pyf!+G(6XAJ)YCGvAfIQ8du zf5vtay3HWeK)~qbxUV2Y-3fs)7+|V63^y%_KMD;E6)>jO)QdY`smN?yP=}T}1Hu$2 zKSG?egCb_4MuVgtfsM8PaB}$hbqLw1s?P^(Bq?6u&FP~hj1XFy5BR+1D4kIw33z@J zvhOQqel&pwIu3)=%WmI=D)o7`7rxRma$V|Hjj4N;UwjjuL}36T7bjz)Pyixrd%$Ri zz9qVeonF7Ej0Z=C$Ic+7@r>)BK(-J}o$Xz7d`jeXq|`D9*Ln1TL#YIC%ys^_i_(1n zLOMBAZY{%8D;*oQ0A)gT(_s_4TAy_fzYbA0IN{8@~E}}X9 z(VYAP;1x0ND3M=Y4|9bsV=Dkoca^RYKFV=JL2&P8FUUa*P#)KU*2z~V5s{Qi~Mf2)2chOkIw1g{?(p4Mc`;V__s~x6t#ZmAR#J_abdiTyp9mUG630za|O5 zM(&{VmfyPUpq8UH0E1G*nG;^z;sQSr|70HkfYcv9!3aD{sF=o_Eru~vFR2@nPEuJ4 z+G-m*KF=n2e+uSRdMHb&B^<#+9{GxxWGE(_FMzYLCY`a6KVvl5?>>UxuJH!a$0og} zkabDZ9pIU&{TZV|CTT{==7FWQ&vwkv1!V9CqzKPa4QmaNh$>HM2Pe{F{2AV9M~O{# z&NtxNZ(>sb>)9UM%tq|nB=Bdv%G}`%9})8!Sv_9ZSkeC882CPYLU$KTBzSNikqF=@iLIFH7S zd+Chz_z?Kni^owhK%4J0jP<8kVrR95mdwf5=whl~#-nCvXc+sTJ@UwzCkuR~>}|$| zD%q=<^pxn`Qi{0IhZ`*MYXBo>D>f2@K-3oBMC#`7RVVPTz*I#l`z+3VO4^29Iv!2=1M zk=Kj<0(9}8`6rt5jzqUjo{z`oK0Y`Wc?-dS#64f$vTE|1TugzW2D%tR#lan0B06!> zNuheHJ!J0s79f-LDwSn=c(0~6fn9Th(ewi;BR6reUU@&IZ67*V#h8^ zOklac%r%s_<38vBZVDJ53`inE-sT9xYA>p!ls}IQ$~^cX0T&bz8Gn2Rv=MRre?DU* z(l#yLtF#APA`;34#-H68hM*c&8wwVxYHFr71!y;D278w1LHGqDKsZR(ir` z?~Oq}(Zbj8agQ{ZP6sd%>;JGEWFDH2U1;ra&&pk(7YMR!Ll*|CkwO&g3VmtACqfVb zo!^W@I`zlLXF4O20mj+Mg90%vOGKzMa#5n8z6!=rYFr~T$TWj~hRI_LN9ye3Qqlu?Q3MTWgkK zYHy6|61coB_Ws=@D(W=%2ML;fi zqco$IdbE`|cyuBZ9e^2S5*n#GBj@u}JuI)N2u+mN?O&I?pQ+8<<&z{Qz&kUdL)rKD zg7zyaXrWhjSsEKYB@!B~7NWV-qYoWyPzRi;tep7Vo=^ga<$(SGMPE%-oxXIhF9EHT zA7l+Y>?UGP9t9HU?W$+G+~Kub6?a67Jd$>O0w{n3`*Lnh+)|H{w!z*OeF~|#SK*9~ zO>#PTcp1S^5Maz<16FbLqFN$6wfwZwEr)c8q%T31R(*F69c3B0{nu?mvAsZNKeR+0 z6-Guzo4Ql%1|4#Jln)=SdDhf1k0X7mnW!&z%eb7MevE`#$Mgki4m7b5#Bp~lx**cD zDQjx_GSY$JTVj}+bH+#kC1^Bl<} z@}E@WhkrNX>tn&-X0I7+U2y|aoGquh@lAFkA}NxGNC31EV~$w7S&RwL+k+7j62jpu zaa9Ag506p`h!x{QSG^=H92g)f+S;QLmD*O%cSE1kY7+;}K~xx_TD~Xk@mQ?hsU#kcrjTpC$YXzf17|}u)Jmi=b#08VpEJM6Tj+nX~R?vZd$^t0@iAX zm}RW((s`xQX&;XER3?|8hkm!@Vmb0V=3LwiS&5bVFy!es{KtR)zA=V`?y!{P$ z_-27Kx$|>rkb$JqZ9LF)s0cYwE(v87x4(Z(fC{P104tP0)0Ta-;;%WvI*<1qW*i(>Z?T~~OAi8_AhcnpnEKhH5J@|zZKZf4-8=0t)sHs~=wEWT zbD;Wu`CmmkopZ*|sjI0GyULg1oEZb)g;|0noAOGk)_dEd;`C5mkTMxq;w>zM9I6{b zBYxLgZticBg*edTD_wgkYGevr0=*N2SD~(Y48pP|Ioy00o*!B7rJFLc#$_F`g zr`xgHJ{?0`8L~cD*KFNuiXrW`5tR;FsI0F12Iu!pKx6COgt#pcRZYmm`k4Jl_lHgj z(8mJu+4Lv+!`(!w4n zNl1RFxE>rEUySte4!Q!v@TcngDOA-E)LG(=5bl?h$y1z5&1Q0%#2<4x0ymtYtW& zTkiL+d6OuUdptAX2$BEa`^K|yc(Sh{4gsFowx^w;T0lHd z?i$8oPfH-cq3-mf+3DC)ruTNYLs+;q#<}F4Peo09#vwQul5UM5jvaT!+Q+$EaSBS# zW3`j7!6XRf%qpZ?)!?g3aedc@>_5@KHmEnVw_5zj9fa8n562(d!7|yj-HU-dfQ-^; zE&+N9g)AvRq6H#~tlWO24}=A3W}bbYx(F#iKVhu~y1J2C-9CQG!;QSVOtrOv*cM=k zR#sQ9c-Ex!B}9$2eJLMqb62(aizI5`>ewYiq1I+5f*`o9j3$bB$Bqe#^O?&ZvW+6B zhN_+roHdqdXJ}K^qHH!J@@{xURQ`xvFDN5%Y9jV`lX|Ko< zlE|9N<06%B4^`^XLFaK1bL3I#nSJUNs%jzCS;L{)0n&rNLPO&=I_%ueKkGY3(E)zv z5xwEzqmI^DqUWsUMeUh*Z^vdKAD`f86P=;&BN+h*TNxy6E_+%B&%Eiujuw2$G1{5i z;>MpkH(rBOG!U@(@e4D=l&nqMCgx1`LfVl53%WN}q4h`ZZ@|i26*!I%hlXh+%p`w3 zwn*vB%k$zVKEAWs)_gM>`QK*e5BJtPHhJ;3xA(N>+|XJOC;?{4BKwA%*|XGQ=SxE} zT_Vs;F9dA&Qbn;kyt^r*Tak3;+lM=1*uHyqe!2^6@U@+a)bE@=zN$kcMf&x4`X|9S zQq!pbF03jwe*R!*dP`{6qq(JTmyI7h+dhAC zExT3&(33#)q3e{45Q%Hs3VB@R?0bGN_ES+LQ}jmxzqwmX`s@pLDbsg63b<6 zv7=pVbNO&rlyiN8^SG!fXpK1Sz9Pv8ZMJkds^J*JQj2bvDR#$4K3@B6_P1if+Tb}= zAwm2a0ssieEymlz!jO3JpLxD}u&3Bb{i;^=N%3E~sf|GyMhN!ih`v@M^8$%Q%8rJA zr($>P$brmYNt$g-LGFj~?s4Qn7-Vmp`l1dz3{8eybg8q$T4-vWv>ybY8|>LWdPS%4 zVRe$n&K1tzZv5VL_F`7(f(3n*o}S*#$vWc__pU9;+i)8A*{|BNgVsc?tS_By?nEeTu2z8TAchc3+RDFW7tK*x7g(71%L<^>v#-c|@*tf}$rPSJ3KiLj@qL||9x z$8RQMo$S1a;?_p1Z&~cCh&yy6QLoVAw;#WzF>ZRtJWf5PBr-8^Y_w)Pb(awjlDAcD^1sb8P2%}{3md4v%;;LKSRWc_aGV(p7RnzqN!kW z+Q%V0$?9;^`kp4u{`RQ8?t!rkc5NX0+>^_7wopwEmB?4K<*t9@`08SEd&#Qq!aKKz zXUxuXMaDind&?KU>uHs~jH|OZcal7{*9a<7n4k}|N2wr5d@)uU&@N*X8Rfy4J9nuc zl%oK}^on=>9gkf)Wv0e_SZS1}H!l@%+|B_9Nlx0r$Rb&=rc=UV*PxgNq zci_&gou-`)FL-C#jo3HT8WcPM-s31~JNDQv@{Yamb*C0dz7(IWhO8;jVXrRquS|$KAs<=!pQ3PpejxR+f% z2~Pz;`Ewf9rpV6w^tw8N5aoNDvK*6IZ`&?;;3>5yqrbDas=#-rDv$ktA?9%o7b#U> zNq22ugH4&s2kYQ1RfsGIAjeC5)FOgQR+- z>hrc07wgY<HFy8+Ceu zr|rC&Nb-s@@Hebe9BXp0HPS+_O!qtXIv}F8VG=Ngx;y7&Ul8YI;f7^?T^{`chDQ*xZMR!1_N_4*77aNPx(oGQ`Y$ z4{k~A)D9BvJ?ig+4&LBQhqTcabLro@;M|>E6kj{`r5zeHOv*nJ>%?S5#Xg9X-tILb z^pkb&Dxbznd}cY+G(?K+-7!ddtE9-8l8N}`SLjBd2dm1F3)%KBzxFzTcEr>JFv4Vk zM6kU#cD)8F&199g7|S+G$pV-~Wx9Wd4t|sR8Suq{iKV4~c}C|PRjoIFmV%CflU~Jy z-|FCPZ4fc>S6ctuBsB26+GvxAU3-{Jp*_Y|-`(AVr~BGPYi%1k8t%!teJj1azDUM7 ztN^iD_WhR2JmvplAoZvOti~YvQ}0vO50H8s{!9gA$~)}Xb=KP2dH9wv)b9|}&(@x^ z8%RcmyrUGHa5<_$>W;uZRxhr8yYa+@G0buX6YdaYJmph(o;{dlc03%d9rNem@DCDbfG$J=wh5upW5t4Q!VQha4^Y`)u^H>H-jNyr<>#^a5kG-@7R}QnKulL>6_g@?ppBvw{9mN zcAeAk>XzZRZ?3l#i`6I3Mykk@{dCLarQw%N|K)I#@rue8^j%|Nx>G21ExvB5AMf~ zp5P-UlGc<*Z>QrvEprURn%qkjYXeQ$d3htlH;{%`nE&!9T}$$5yKO_Ed9nDKEpIDA zs-=e;<1LFKy6wAt^*5b)7Kj9D)39y#9TZIFrH*){&-Q+yN(DF)t|YyrlhjAc2sr0} z3NwB<93Tk(d8^U*SuHiS^68GyD?7qV))&0eMxwUy#}di5E`StpK@B8Pb^X&c8MUHNlr_L={S>niy7HF zio$yR(AmB0V20~0r*n!Z!^#G6uUd#Y^N{wZo|;%8rG z-?DkSR8UZ;{*D$Vr@@CMPY$R7{GpaYYWKQ0Ymr&mIQmZ*Oze@JHL++P6H`|{>{rr* zZ6D2C3(ue{mMT4m`{%63-WcoT=ds7OEJY@@7|nx;s})l1C}@v`;%@$>cGid+*E^0* zA9>4eHoh+HSK4)dW#@38d{FAvIH`BCT@9C=bzXRjXN~j*gtbWzm$79)KbAtm)%k#m zXU^$aBphL~NYsR#ZlcTsCg=mwe86jOwi;Kfm~nm-El4|b#w?_5f0;(rUfw{>I%dy{ zj%rN+R+6XO=0XQKRsXgZa@FAA==IE=TFc_XNZS{JCE6G;qF{JjXg?=FXa`20g2nbY zbs-h|W>;R9nUt6U?WN9O>I=AKQ)4KOMhO)a6%Vv(OOgL=U8jEHswTz1OF%Hf zS@WTT-Sd|4(RYtV`s*xv``=s-YuYiR=`C_q8}v0?fCB`j&nfcWCd@(nuEP(=g^yuW zeva1~UC6>-yV4}gLTBGi^bmyL4+&p40ilMZ#oV>>8 zHA*84aDv|WcTyIQ-@R(H85<0cc>06fvO=k+%h=UpSumdvx%Qp*<4AJQ}@g@=9)9n$1=!-I}No-Oap)9IH)25cE zd_whd+XjG?eP|?HDCF6Lh0{%qeb(pFQh99XL-g6VF%Iu%$QT5+V-9IQ!HucjAcS(P%E-4RIyO|DZfXE$U{%v zxQE_s$(|h3%+0t4TmhyX@1vvGmvR`i=^GT&ceOL3wH}CCqDal$Y^ld;^yM?FisUK( zTCC1}>R7FaaDQ8w;k`4?5XL1NG)T_~u*lovD1x4{JV@M|(1g}hk26c=J#QE_I~pJ= zD1X#pqcJ2zJ69~|tPOK)$ZE4|3u&ujJ3nu!vL7Gae5$=HT08O7Jr~s{=Vvt68I~r# zE)?VFAGnR?*J@UT-S*z4C~0~j74=ka_xRwfpZXxhDt9owzU}*^*TtLXKg}HPNb)`R z{)r&kB6eXBbG^QF&FJ4?9cpi}=1cC0%NLf%pW3?#Q<3augGV%WyY7(uwk$MR;^RyD z6XS=1-{qPY2N#Kt5-2OzEtl+WOh5Ccy>^bo*vA;ZH`slCcm$^8BLSoR`46dkK<>M0 zgMum0#`iwDM5Qa0-ge{ZEiA^YfV1DTp)MI$H26xoHdl7x?Byy4ZFpY~dXD_5g-9bFFPtj zV1z z2}|QS_rSx=jhE-TIFD8mFt!ddjqSt5rU7Ua z>AteMl61>RnUBlMLBZo^_qKQ(cyVmMrpIdW_W;u1jVxGJ5$X{4&|(DY#^5q$E%);z zuh6JvRI}{|hAO?FuMLGES(2GHUAw%F{@`@%j0`9GRl(BkMfb<=q(ynY9 zyVnV8egr1sBE`Y>uhqm|mzL-p`oMHO>6R;fTSB*RxVxp=-(0b`^UNV-zHf5=(85uo|CtJFFq>>m4|`vHA@C&eu=uh8D4Nh8C=2F$&SnJnS^u zUO58ff_*Ur;&Uf2FLB%UGFAT5vK$b>O?P2U|*e0xwV+r+g-R3X=u&qsSeoo$dIToo#GAQt#!3-8~$jC+Pp-!;5Mfbm;jwaFZzpzX-4&PmPwX$vnAh! z-%B+T=2;R3S)J{|haQ`q2j>c=K1bjaSL!aF-Q&b=(ey?1Od*V9MLwAHBptCW;w z%|-G<(36Q;L3=>QY{FYy>i1~cd3bscuLG+?#}k#BXIxf-z7pmASJ(W9n^}u)e;({& zHHn%0`SJm7fhRb&NCjnamX5^M?#!fc?t5tv-W+iD2#(O1wfY%TYoP$$KIgE}rv_58#yDsX>VHWfipgWflc1hcW!Wi%9`N1f6!p$B6Skv5zf#%9IT(?J@v4C61C#9?mmAJ+73uNV#qEt|5t zHY)Z1sqVd_qRO`IVboT;wS{X|1TkO&1r?Aa!2nofiAq$WWXY1TZKV|zEDi5wm(DoekGo!z9z z>I$nf%hvXp=4KmO{SkIHHfPSQr|@X$Y1Pk8EtE)5*C^pc7JopTTxs4 z^${2YgyU3awK#qYXl{P%MoV8cpK9r3Uoi5c;wFEVOq8HOa*5OkU^9|w5MLcy0+z6# zcjEu#6V>0Z2ivt$x;BSB`m$o4JPuP<`Zv_AN57q5CUW~A4 zK)>|F>@oBv+~NQ^99{>h-|gBZRohDb?g+x*lWvb9{W3)Pc5+t7g|U_aRU zUVrIrIzjnxftOXsFb(?s1zg|_^AfGpk2hHFaf^0t!vnh{MW!>zcm$cKZ@lc$2~<-O zs-28E-_2RrAB_62a`;9KCtCWByv0G}rs!EC8MV62{J#}&?HYdO)>>kmQShH%b3{== zwVHHr!632aD*f$*{Dc#tZDw3UOKnS0wN3S@^F3k1@AP|fbB+yvn$(T?rVHxo{Z)Fz zCXH@n#q)8qMsci*%&g-$BK-{@quJ9qDAbXWk-2XkB5xt>vm@v1(jA7KW9+0?y8jvf z5H9y}o9kHjk;Sg@ayEX%8VttWWL^|KU!!Ee5weksuN?kf8aA_Bvx@QA-}$$E9mNy3 z{Yw|xy^+v&jpgJfWQ<3uD>(HvQ~P{)lTFSatF5o?N}o>RX$-w*7m6)ucT0Sl^AxVd zPT#GiVDWugV)Q*X!6Wx`atd8!nTuakewodhG7hZmv<{^OxET9BOM8ECagDl-EoggA zj(L-FXMggJE@mXiDJQM7Z^2AT_RASRY(ToAolpN4*`hkPoQ$ol!Qz*#4#N$Fm8+8q zL-udpOfr`l%#BXxYUSH~)1(^+VXR9UBm84@rb$Z}HJA+;vo?@LonR$FjvpqL4!cZu zD!F|-)n{y+VpmQj#g>N{IOXbhTO5)L4o{5SKMA~OVN7Jv(6?Yr!5E{3=DpQ<6Q6Em zr)n4GjnB3Z{Vuqyah^N2bdtGv*#P}QhRfnoOHZ$P2775@>{3&YbLrLRHB-VI1p(vf z4#Sv4bYsvDp{I9Z^Lvw)9;J)1!X z*jD|_e>6_JGuWT&A!`+g01KvWusDBVs`}_`y=BhuTr90yB-B-8=KROS(a_Pa=?#$? zIRc8e=-+*7(2{aUH5hN0>%{2PQ>a5Zqi^cn$oUtg zV%TplA!$Ojh4+dZ`jQE^iXqwy_&e@$k$JrDtw`=dngkX5=y2dKRLt!Qj|q{7^wW~0 z`k+R)!f7&b$Ye43Qf_Z5}L< z4I&HS-!?rjO+}x)6e}(i0>0Jae_VIf=?E7;lU`M9qos5Z_ePzj#sHM4>gZ8s^GjD_^ z7CJhF0~Y^taa%ZJ#g?Lw_zyotlXgjvv50`5U9zc^nn9cyW=ofsUd0`MC*tf1v@q{E zPFCPb1cBTP+EVl{PB#>GHMuQx=W*$ZYdL+o9`8I$n=7u7vzW-)&mw3Rlab?iqo8@b zpO=4duRd58UZE-gg@VYHq6K{q=P)Dt<-qX4s(Jd}x>E_9=C`vS69Bk{?R;Aw= z6bLvNgPIN&69b3A0GcK|@LWbK6TQgeNexFXQgZ?V3TYaT^iY7UpQJf`d$B_^*I@XI zG2gipTu*NvF|wG1FCais*4Q8Vy|*^Y8c=E2RoF9Ag!na@4uO zFP@{%SQhz)L`J1Q1_|&`s#A1JnMNKZ)h0~?H1y}YB2U1(S!I>3))+-a(8xst`gmYcuoWHax05u8?O8rAkjUKn?W5U?`n$~$umzhEp{H$6&~ly4{4uK z(VC@Mq<)$#tfFRHkG2rpoE;!h)P0uF5rcPqEh-!rykomVN9^GyKv`jF)l*Z#WE_eGAHb{Fy0wB;W2k2-sK0dAza`we>yzA~>Q z($CMBWegRC-*0}(PiJTU{b!%w_+M0x78ScM386mKe9tV3=v5#??$FCy*c4_4=<(uF7soBqp&^?QEpje=S{=$)`UX>IwYCkk zvL%{7#%F(qcM~>F>E^+MsIwDD{0v~N8{p_gQl2p6)^%%#uHlGO}1>Ww88E_6;P5iremzr=Fu%Bf>{>I z2yq`;S+oo}rp1NSDuX_to^VRP`ky$$JTo3Q`sw&Iqg?8}P4U*x`#NGSWcQJ>MN7MLq2NJ?^p1Ni9z031 zTy95Te(p5+algW^F`*0Gq*0{Kb4zD&U0jW6yGtq#vwqD4`b zCF%Xq0@Z^w$o>v|v{8)IvN=!u&w9*#a4rrQ3hI$cg?rm=MCGO)|04oD^@<AW6cg>K5$eUTT6^7(ZG}o%A8e*n&`$DtHA&dI>g#faF z)Y2?MZ^-fMhfix^XoK`(y-BU&_!5lg`}*i7?M0q`bZwJjPu0SxNO$mdu94;y40@d{ z7Tqi%u8tn@7waP^vU#6xwIXj>lb(L4-6V+#GPI=Hw7Y*99h?>NTEeq-pKsv#Ee?IJ z>O_~>xzD$O_;0mwEzxz|gYiu&D%V)wNPOpz{(0|=wcqT{0R z(+hI`M!QhBB=~8=Vuy%TBY!t5<)hm~lXU*n7jpT#Q4Tp%=sJ$kvv0)?JulZPqKtLf zx;wJ&iN}_X6R2$L`R`#;`y$TE^@p{BZe!5vw3(UYn>PeS-5+8pUU0(2F#s-oY#dKW zPUW0JbX-LmS_wB^4t+ZHJdxwTVt--e60Gz6Nr{nfAT>&B7Qs~=I1!7lOzJTzK`IWc zhs>&3S5FLTwl@?Yl%39PhmgfW2TT77r9Tf%;avM-;4BEMwIMsn^gdi-i}3JSS4`~1 z%zi5#TZnSo5CZsGv+Td3uX#QFD*ro-_1iIm6UJzsUBU@T^Ukc+szzO;n4;_FNR=^w zr@xRxhC4stuC)r2z}eM{+a{KKu~9G`h0kX3PTOS@5N)%u%GfK zkXJ6yyp6ilYL>k?VL_oLEqrF^wQ04aP#GHH%ljFGP%2%la!U+#dX2$xbSF~DQU<$- zjQSVe)6Si+xE#Z8ouaL;Ul}aGvl%mB*btqGz=vBv^98K_>)YEK73(9)Wv5S~_8S9w zY`@uwcW4eW>QR{3Mr+Qj(&aK-bAB1!z-hn`msbC?LU6l7SR+3NGhj85JH{Tl49u67 zx^Vx`N4eqKzX83@eU^?UAKeNUh89>RQDS?i&EmpMvFf8`8n!>d05SX4!{_Qot4Li? zAN@WNUqo1{8#ArDi9NQT9168Jtg0mYN_aJM8jW{#(ar>L8h=0`rhPTNq=S(W#=2VK z(0_cBK`teH>vzbH?#`v6!UdCh(l!mgo{;%&w@N`kA?d4kmc7YruM|PRYLH|CO8do} z|3v_}VbL2_J2xo2aJDT^*hs-QZ%PNP%KMO>H)qPtV!&v$TB-)ErcaZaa!eY2#VpOc zmNWv;o4s!UA|Eu0T3kG3)!ZzjoE!xMn}<5uND$ zOdYFJt(kdVe8QnkI9RZuP%Bf9gm4QP*2+}GA?od!LoZfa*w6xAdvXoG9mr)qt98?u3nldsU=NrwHJq;@3-R9dv z#gkAFnhbU{Ch6+L(1*Qew6E<>0C%!es$uuSsT(J>{~Y_>0hGv{_N|}rL47VB^=q-H zQZHO|(aPyhl;-R7shu-)kHN%f+H{qBEva2dQz#`qK6)?0z3Qtzj8digv=>R>*f&4f zEr%{C-Y8iaYT7{KF%OKRj_V3*;Fs9o0-UXO>LeVHnQ(J>`&dp$HmN(a;8BC?Y|G)P`Jq(3 z!PdEnAnU$a6gMkkK;oWvF?A;6KUFCf?1D=FLy;Hc>WTNSqyp|9;ciQ{@l!|J6Wn39RsgvvK z+?343BVH3?j(OQ@vZ}*bhy7c?;jdmuugn{Wb6wNjKL!qJ44h}W9 z5)HiM)n_jc=0(UYwOo=*rDKhmsn8y&m2(T2L=miSu`plID=y{jqR4`1k^PMJ{HZt=u)z5?rM`n`J7}T8o%l z#YBHfY-gcA;KlUGy@^wNkHj|TWe42f z_Cy>UTv_+5ohEwTKNS$YgVjC8j3k&V8%2fBY`TUy>uc`dNt#vt!8^bfu z3_v?E;WBdDb?WQJS+gLKq;QC^>&B_Fn-1&LdHqo~nT!}ai&6Cf*%e_w0r6Cvm1VY> zd26J~@a#g&&I_6?3%QRdOBnka%^1l?wWeRst+X9H++HNQPdhKV4FZcbEg2N*Q%tNg z9gwF`EkxbP4La>~mM{0?Fly0Wc5#j>pIR_scWScJ0^~B%BZl)}`k9+cJSR zu8Xee&Yy42@#ALQm%sc8_$_eW>vxX{xOAmC%4}z4;M_m^)nlxdJ7L=>zx`6V=F^7s zTzgg8U>nU>IbK!HT0O&u^_fPlZ^sG7bxhGv`S_|CdDrY>XURRDOtG~P`g*)g)64T= zQeh!$?@UwbOo|-Kv;7Au)DnaGCVO?&hcs-`p1yqPV?_&yJz-MPv4|^C*IkkXr}MeA zQ!8ZT+t}6dqvXQqrpqFt`tccdULA46--`JYw~l?f;ZH83YOuTIfa>(BpQ33dNywKM zB9!N6D-1g&SK8BJu?R+>gO|*9R9;84P;CaTZI!^{%YS=}xDhY+K*3ByFZv^D5f1}T z^cqXw-O$mYe?(jBI;}b8U6!S+8+CSUg4WD$M3uE@PmGwl=5Wm^?sTv~?2es@Cpt>X zqfDEY3opTvE6rSU?F3SFOD)zO-cmRC;mngz*MKP6e2PsBZ5G?=Ye%B?jJ6#gI>4(@TVJsFf#eauCKZJWG?Gxs?G3IyMoCO zS&O#VR6%<-oZV!TYVoD?)@>ntK|S-0PV%O*V`MR|uJiQpIGL9h zF*c@=I4aEQzowcrQ?&%V-L(mYV$R7qN@UU9LE;tMI7da zbV~ebEw&s2&E~=LsfPWHyzP9gA79*DJ8q&>+r~s5iTm>Rwlg+D>6YO+ZVTD5_C32E z`bHL4_?cY1a3L|n+G^Cm<-;>ibB*jh44j-o$(OD#SIewSSv)g@{wb?YQxObudyH^s z3RIP1mE|92t=RlYr(oGKV*#Dt2OYkh+NYfpVdggN$+1&FFZX9^O;nkXv-76f>{R@w zuvb}p^L!>M_h(y0oHFKORaFz-zn3;!y_Lf!sxHO9kr$%=nX5?`^JP4CSn2fstQ z>MGfmy6?=$q7fEe9SXy*xBpw4W;_~T<4C!f339}aFwm!j&K=L#yfe#}=B zw2C;Hpdyy29_1up^E{n-e#RtW=gA9O7#P;g_8w4)fgwt1VJ$#-yFAQsFeIVyyENpLY}IAt&EaziH7xf$Hnw!QR=Y zeDS1^es%SjetSV*rb8-jsmiYd_Wj_wR|@t&(3y3e^f@F@V3#l<8yGpo;_7Wi{h4nNNBtYGc%r<*o! ziL8Bf)?s#rO{Xr_^nl#6w!9epU|qRPpi+kTx^+#NaxewGZ<2M@<{EV+bFS{r^uS%q z_rQK+$-8R&RDOixH}c`Hi#etInhO~EYAM35u1ynemsu#(f)`Cql zJ$6q+g5sl+dv%86#>z&`N?Ylf-b0NE=2`8@(gbbx>%DV#o}{I5Z`aijx~ubAC%LSq zCRKjoQH$p;hY#Zq<$SEhW9eOnZ~EHMVkIJF>zWNiKJWP%*DilHs4ZW8hwGvfg^K;} z%^+N6$t=ghqHOwJ^ckB{+1M}N9;chwW>{#~{jxSy-F*0){G`LEO-G+Hvv#UD``*3# zC}fSWR;;1-$+)zrYnH=&j%yH)lt^Vj=kxg|LJZf4$Jj^h9%H9cdujLE4;|KBeEBB@x7!C>MVwAhib&{o*|ZKkDJ-2rqShnqp%5wGXsw}-D|4#iIez=b zWQ7ke5}Ie+{4DzU(P#VjziDYuT_2~yF9a(>tqjd{apT(kIdjOOf-aNOFJgyklCe>y zdCt^m6^RXRElh>8)h5JQ=h_)vyztn3|Gu`@>&cVF%iZbHa@c))r}eCySwEA2cGD4$ zEu8g!4<1NLQ63!LlFlNc1rI4}{?T++GT-tHb!`0olK|7?D=Dv^Hs0G|))L(saho=q zUL_nCuI-i}q!`ur{>$r(P8hAmfbQ9(5%V%KD4-`5LekdO* zsCiR%4mRoHr_%4El8_z0ZJ4)=!{*Acj!RuqsI;0|r01^f%XeNA)l!Ko*eReS;xggS z+FNc>SeL(hH#H*g_HFWXSPz+3?QC&W)zz_qg=snQrXk)TuJDHEt^!E-YGlf;-Rb<^ zZheHRk`KK3+75kk>)toQ<5b$-tfT7PJppo+n5r_}QYB$Nz6vv&prfZMii_b7Hs!kJ z>og=~niNhxkWd;;E;SX=_PnP&SKz{?9)4@j;wa1X%&Zg@N08mBP2k8tEyd8DRz03- zsGKN%FIR*_+T?Y0{eJmn96l^tPi^S7*03Y&d2n=pOEBA~Qk>nVJr zJ2kd)qg+ofR;Cish{3GgzF-kf2AC2Aq{K!{dTrn9`C6x6JLc31v3Rv-)ezcj&7W*G zi?a$B??c&fWNczT@6s=Y^BRE}_c{9G7>E4Wk*&7vP6BqdLd=2!`S_bnS;~)FM46OH3R1Ea7iHL}(w?ICuHvS?PG#Kogp`{yli#XAub>75;(tZ8CmHe@ZbDzQT zI$Rnw%s1D?C`w?A?QQKxXuCt{^bz}7rz_;UT7PaLhD=npj7TDh4lC&Fm&fP;12qD} z4*aH77ckCKaC!#`5$Pt+{L78yUCVZ)ySWe2#MDijHhq0Ar0XCpEq%WQl32HkJ6AL8 zL|>R61mz!>p8qJMdzVA=kD3%Oamkb{Q>NFR56t)X)_hKI5F0)J@+KGgk;7ZG>IBc1 zuDmY5@pG+ZeRRifqRd-=Qf(MU>Hlj-M+eo#FzT$n$CRhq)Jq$`drumVDL6{zd~_MH z2#OmiubYaS%YU%0FN;pTbo#zPh3F=Ba0H`-?vE_TEJy zs?x=F+S*t=%%qLvF+jhTg@whz(Xq>6$MkN#tVpRz?N%m!tFu(4x~i7fdk<;FPPO&- z?at!q<8#Ap-5}Q#REXR9+$S(GJe)kwJSZ!vWEZ`Cpn85Q@AmFkNqx4QYpky&UI*m) z@;|vhy45ylF}fq@eCf2O0LQk(_)coj4%W$XdY&6MG;?4I4KyXJ8stld6Qyxc_ELkY5MR z%>H5419BlYjUmZ-1~NOB45N>p-qO+J-@Rm9UrS+SiF`Ok!Nw-7c;ap+kugp?MmFPB zk^k_m$+{AcIJLDT?lXM=u?$x_=f#w>)Js3XUxm9G=YcAnU3J36WBa!am#n zdKqXzD#^=x$9a(TD3bZtL(8zw{5!xw?qcbs_w6d`L4SW0DgV>tExjN1O6OmH$bEz7 zFH7jqUq2>!c*)HF^>4a^$@mbzlnaM5lEBWa-!udXr03&n>FGJVlQr;^Ly-~_X3jYL z-xRe3eB%tbeuFx4@L+49)otu|%=CN?e8}5(pO;UU1ot&Qz}hrV2oieA+0MxOTyv0ZV?27v zeff$N`sr$$nV90y#@CBxx~BW(+rrTk8I5+)Sfv=Mq#s8DhBxEf78lU15jx@=FKX8s zqcYSbq)G*+$Le*A+~-c6I)$r`RY?fIqvS?UnzT#zTmNtz2n3lt3eO}T!M23=)y2iZ z&Ot$3%6B%)s`HMP+q^B0QT18$IG%@ts0}-{0?m|CcRW3X9X3PtvguW z8gri+Te{FP%5k3{U0K2xEBd{HVXW3s0(QR-(9Pin9}T!WU#8ni)thyM$ZpEO)-78M z`c=^xxO{sBm|zD=9uD(>3f#0|7Niz3Skt?#g!b7VTIv%jo@KB|pwoXY+GBtqnZ@*Ko1J^(rB`gAzz}R?s#Lx zig#cJOl6Kr?Pj5?JVf7p)M|yKfPHL-;Da4>PJ>EGFcI0xOqG}QR{PitDwW#r5u}@F zjnOKnFJq7n`%Q{+JR~F}5@obV$wzEm=rZ=72@-Mpc+JTv5RPqg4`m!am(0y~fpyxi z2~HHmRKF=6sKfbYq!^$*IYS?iyG8vjziqHiqVgbLdT3W!&y}We(*^2p8X1cE`mV6!$nB}Xp6`9-b576m#$&O3f ziMk@u(O~O{m>J9+Vx)9IndD8aA;Qz0EQ;AcP)e^eE#&~g}zrC>V5R3%B^VHGHt2nw7ZBM$)*z7F_1y0qL zT}<=4ANUC@z#3qMZ~z~I%q~+_Qu59(^zh{bD5nSXA5?6J!H zZbn^cOe){=@bG9tuv0LA#AD|%>RVjGfG?Cko6j8w0X=pf{0?nn@I}jsxfMkYV)IPm z=5(!@_siFb8Vsr>mB1{2jgJm3E~9s!tv}YZtn963f8Dl}6(c2Q+;{Xq{KU7aHweSn zb8~aGajKkK?;dtSiy)Qt=FOYs#_7%JpgNO5`#!XR{n8vpjd<0!(9yA!QO=%HGKk7( zAdasZwAMDCU#|3UL05V>DAC&q6|EA2(A;f8-i6phI1X`;QR&4Ed~jd3B83*{CxF|| zWQ~CZ9QpR#jeUG#LIwPg`zWA`eQpwXO!pS65gMLhtsVG|e;UkI#7dJ8@Ke#Uo4@}a zjq@;_B4c5JnzxsiSKEkS)yn%yi5g9xM_Pkm)3nYDc|6$R-kzn%{BVc+0Cq#ys-|V` zN;XwEW}kR*%=A|8B8M;glwDCU{70sIxcEBos$SG~oqo1q!v@m|=uHrD%ovc#l8|Yw zL{B3|)pt>_=N}O-kG*Pb>;x+7M>hSh)L>erX*fg;5U&;FF>RE`gR7|UzP`Le*7l_1 z*cH^ijL`DWKk-hA`FrS*tRv>puQzx7VKNO2d8Xd1AO<`|f&xs$CO~aO7R>LVK?@QL zpUjtTo;Lmvu|6hfO|UE&Rou#BKd6C^?KF16QEWhmrxHFAp1d+K0kn|R2+$*{!q#SV z;y(d=mQPuYU?k`>0|Q4H+*{dWi5>d3!#-BmYK6CueQZPPEMt?d1CfDd!ug;R#JHvEgZaO1l;HTBAD|C{-_L<;j?S;A;8!u245D$>m zId<%rtl1D|3p`k3Fo;KQ_@n?FXacTtSx_%`(+@~rleNM=x&ytbl8{ZU$B1qsOiL$U zS6;0pILo+>*@ zf4VZnq`}xLx|vPsdB33Dzy%?UDySvv+{EL4_U+r%n;srG6lybI2LsE#-a2Lh?iyh$ zr-fzSykf)Tzzt-d!B(T|&N`FdwWVp9=&h&M;67o=pWH8}ceieRqGjzl;4bKZOhfb* z+iY;YQ%$QiX2x-xF2n4YOi@0ei$0{YK<8VvDoK>V%g6-L?9SZBCeNCxRgfpED zmxayw9xfvevk)=RgCIhRL5zals)>npd095SR90{`Q`Y?a^LtqCIs~_`OG=E<0u47C z{PN{Xa$r5i^YkmWK)}~9Ba~e^HUfhs2f4Yq6_JE!V?sm{>gn-tY=-tG1JDb*0IHo! zk+vgNe_z}jm^1fL4b3EnQ+9OTPY#9dB(9%_^UrO{rW73p5uYniL>6$G zh?foG10MiAMoYA)QBj5&{jwn^v%SSH9;M%TG8#JPZ?=Ua%CDNx82J@!#g5QYQF?Ov z<{r2BgC8|COOpy0a>yqjzz`fi{5cOaZ*iDaf7a7;MF_0vP`!+0BUNcvJnMc%1&H4)hhAUsr9~nwW^w z|LLS^gNYWrgxDunjdtB8 z-)wdy@Dv3E$~f-CRJf=`4ku*7YM)7MjcTZnVpc!A1=d9xB1!1g%dO2YQ-1{ATyW{hYiI;5Km$lB z#ZAkh#Ff?P+ouzGep}pgrsMhAX$yRvemebO!;_6~?+@+}K z)XQbctjV!8A)FQnA!@?U zq8hXLQAcb=d$DnH&Rx%#@8S*XIT$;Xi@;6c1N;oLd%HJt8T~38KPr&qoU7E}cpoeCn|TEtIG4LiVYlxWcpzObZNFiJWw$3n@?X{ZkIv(3m?iU)0b z1$yFppU;Drhio?lPTZ!A&kq|mVACjU@e~UM>WR@AWNZ@y7byw|pW{{Hv|PrOTka|s zZPrjOg?UhMbj+C;YESa^{ztAW!7o_nVRh@kepHvB&YO=RE$WC?DMGwjIAPF4K1)!~ zWW)3(S$eo<(<*y&Sla}*Y^zS({S4`)uh(o<`?)8`NI1 z8{d0Kb)4(!Cs9nP_z|NCqMN-G0n0DHpD=Gdi0AP9`E%)h%!5e@3kx@ATSrkJK7{3o zIT;F9t$ALMxazlM>#!r4#b_>m19YG4#Ar<7c67)03VJik+iXgo)J$11`06LNtj+ke zva)mU9v?eH<%VC4s#j0ZyNyM%N>9Q;|2R%b$L=K8@H9M}Im7TUdN(gdX8iv9PvpP~ zT>a#}XU`sP-I8A~9`-#vsgK%;Okj#>dx1-CgnH@9ENWc;HJsOr6C&5Xg~3Ezne=$y zR+|j?2NC~fBoCR+u#{AvUWNehQue@Arwb`g%Wmb=^3hu?Umq5hm7`)Ehme};lQjNt z_?L>7(EN~LhHV7}1@so|1HHY6VDiXv`f+M!I8{2DSBcP#Bt{1#0TVU&=V>&5z)aV= z^fvv{+bQ1?7(+JB;p3{Sysg>yxiFUT>fCYU(hiXsdG(emm>$u6<9oz}O*PSUDar@i{EEHh zL?fog5#GO~qN3v5@#N&>Y(9Y^{y~kn%DnE9(o&K{b#!+dLdVcMW**G*T-3L}|FSXY z;}BBsd#s~ieQxw+7ILgTcbxhb3U6Eq)>Gdu?=7F2nF#=6P*;}M015Y1YpW3M7v=Z! zt;k-M^*Xr!(EyM}68hTFwdUUE@@95dZ_Oq-)Y@5(C(F&lU~2^27V?OD&0Cj{d!;7w z9x}aXZ03ASsAY7bN68VoiMFF%GSDYd`}px=Mh5DCF#K}xe<(Z!L;LLb^wdd~3*S*7=}~LULF_zwciq?d_xGi&xa0MdWTUOORavrRa0+a@-fpK< zgVJRm1v{%2dxDj6{rYv#cBQBrB8D<SBF*Er z;5~SuoB_)KQD8SvF z4oI*rKHG!@wFvEfVk}C`X02whEiWXNyrtQEW*bCC#AnT3(OR2)tmLeinE(6t@*Ds0 zk4rec_X-OOAqeC*jeyh|Gj;h+GH1{Jjx6NP@WjLk?8FnC`D4`EUS3uGyO;A@WvadF zcQQxt-~gd78F=V>$eLn+JUKv`<}H?)Yy2LPf3UwFg>^ax^rXZ%JpfTjAfj@A@U{lF z<=Pwa7hqQhg5Na&U{y=K37)wF^f{S7zTstWmE2m!^12f)Qk2qVZaeTZIhSpKjOcdp z-l;%V;ZA+q*4Kz?#8uDSI(9@A6*_m1@yn%>LgQaAG{_X!{V3J^`KIdS{~ILu=g*Tr z|IfJM|K&s61CuWtw$t9Zb7!plli$Es6@mET%{IlbSrOzvhqrhFz_Uu%LRf~s-$zk( zR|nfU%u^~NX6PIm;zV4pj59-=F~TEm>n`bM5fP2!4D|HAuXQXxTuHTqNrWlt!I*7$ z+XxYJ$yf}9@lI_M$Idh*xS(#smYq8jk-WgF>@M^mHK~)k1UCt%6k9C8%VMjWo_u@2 z?21B&ywXt&rnJ_2?>@tgZmV_Mj*7ww)FavEw)%Q*e5k#>?hLyMNVcDVoU8|#nhJ8f zm&L`KH*9!1HfDoiUb(#z_iJB!Zq*c$j>71K$;j~Vhb+sElmT#+ITDV+%j$qA06BSg z=2+y9`+oWI1=NTUxjWghydw-vPp*0|T?xvR0cG?zZw!3fLC({w3bGaW?79!nA)t+M znjBQiy82|*uiL^7gj{>cvw~e5*BA+kvKZ&-5w(2Bbg=GUY5)Lf=>ilLV5waNGV&z2 zjV)dB)|;4_6TsYl1%Mcd^NNKLYt|y_O1t!8`8??H%W8!|qfAjinul)!k^v1Z-6 zIz*O9TeY3fBPy?K(8UFde)MKuv$N^NPJ)z&UD+eJ>14_cA#%aLW0FX8J%&xZj1Wyy zBalwz+8urU3g>7a1rgwzO*#?knQw!UjI*i4U&+ivNNx-&$|rX}Tm+8zkaZno!zzRk z$jjecvzgm%VNTi%SqZsoM(!(Nkd)X}VFocrdH9kUNEi*ek`mv+f9QG4N!U%TTe_a9 zU4malzCYzVj6ljo*H#6OJP(Lk2`M%)_bC3RjcevzONKrZJHjD?FA={7u(qZH zHAKIN!y^0-HcM3Psk-3X))4HvFl~r@TZzZG*1OJF86PnQdD(tsz;EiV0jf!ZJH$20 zA%?9?3B`9Hnu7u4K;lrAXf{7HE@0ht7rVxrR)Aeg5`9FEdsTIRB25~55MVOxMjImP zE_$m|ZtO)+zi&MX)?q(qLtKzmKPTM;!8CplAUZ24KEl&0T)Gs-YP7g8Pp}~5Ux|3Y z?0fc{#)z+?l9CU$VHnm$u(bi_h-5b;2>|Qg2hdJLK7PFI-gYi=7*c!3w@{7r1o6fb z0@VQ0B-@R6E}-NG4OPv9OavyFh~+ z1W4>f;^{_%Z~{>(hLJ+eGFJ?i576X9p(g52N zXaKz<;hV&%w-JuCG@M}&(qr|9*lm+9R3@7zMDVl-%xbb!iU7!PH!3QssnD&kc;3FZ z`ZX&nYhyDC-e!XJ1mlN|YHr41)SsK5zsG6~&b$FFoj_Q&9SjT%d&`v))ao!|G#~0O zXrD-GkONwAf;cKQpND#6@!W`9Kt@O5{5F6z_d-KMF;*SiCZO#$pH7@H_U%z#^VX|< zd|D2joA;JWhYGh{o?QxexlnL^BgPX=!d!34yXAZ^^b%D2{EX7$DT5 z&zuB#`3{N!vKiTXq(NrojR8?CG+E{C*tjcj_^nEqqv?v?LBggNq>Ob~A!wSPxR{^< zNe+YqyW#XQJ_#XR%91nZ66>k))h z%;&6h*m64FN5gZ*NM8t`28HGGIC1Il9R!C|Y$q9R%M|25*(r|Dh3TZxf@56Z1nlO` z(U-AL2yBP`zR($wqo%=^F@;hJ>}YBf1;lK{6WDkZY5^K3q|K0J$7&ZAv>nI3Bes}Y z3ri;5kK&m+)do!Zrlq#1oy;8Czh4}Djy0%W+Td`O>m>I;%+nD~6t<8uW#{0i?4{{X zp<0`WWH<()v;S|+ZFwB`_3kF=yuKqlUiI@~{CMhoLraS~iHH^ZRb!PX)fQD&sN}kv zJOG*jI|$$VW0Q&2=V=Jx|L7v$cwp1-H3_MQ7(noF5?7~6V+#x2BD)_-Eu{)=YgXRZ zJ1k<`rxclSx9DmVM2_Q5-&~X9Je>l^5P|1u=lj z+iU-DI7iR+?slep+@-VHGw{dAj$1UR#UN{m(6C)5OAnpLu#zci)|XA0O>Q=8zCLH=?v96hu;Ufshf)UY zXK9XHt*$Ma5?02AJFx3&;Q3-vi|m0Io6U@G29@BSh@rIMRV(~BRYgTbBZapoNr_*0 zCyC-saI@j`Naqr;YwZ{WexLX3NqJ9Ebw0?$Q`g(lBU!jIYRzUgzI@UWu!g?rIu1Sl z=Fauwa`o<^Ixc?~M@o~rY2(I@=R&!1tglK+1T?yV#**7ZX*sxqB?S>ZHNr5&K^$z+ zmR7Zc;gIE}1}&Z~0iP*)4*2j+>oLhI)lxSBewov8?mpq(`GChw)*4F17FSW=Pmwm;`uOX|+Pvd>}tO!2QI|p8cBT2HSW9;lz_CPe5>#NgBQ4 z5S))pPBuVpCf$BW3%Fq~XbR;~=Ue+_Us*lYj~iSHAT-xPvU48Kzq7YD1`!giHCqw! zr7=(%WXJDJGtmR^3*nNde#JK+Zsi~F8+M*XMsED#x0QV;z5)yg$3M}>CO$Gg4x1cB z;(ZjxOi+5vXjyMacD6}KuSajf-yiR`oS)(h**g!(J)<&oQ-@? z@v0R#XIF-bK#R@T2oc3hG0VcIv4#G+xG0>O9(YqUU{|EM%-SJfVFMO{FiZ&-JO1^d zTvyG9u1cuIk3)L3C(xqc1GaD)aw*cS2LtX1KpkLY3{FEuON(F&t<^Q~3m|ucCeFQO z=M=!@MS6o|{Ev$vx*{;rR~jt=_JAEh<@)vO$@Ww%{oLIh1y^DOpqoIlJFk6O7FA#2 zi7fy;-2@lBGJ~v1)~cr6CVC#NsAZuOPg~C;{sFU<*8p9S<^^e41|5tRnOguWGHyzF zh6Lf6m+(w<(ZohXqHyf&s7k;}a6F&K&(a;-w&YXjDiO6DG&)Hf3f0)we7cmd(!nm%$yZ-pjq$BJw~=syGESUP$T4j{=rM*` z;4d5C*=@d-t)HHrMsb!jGi{<@vgmY%*!@1N#GL=_C9gC5gR$<W$J~Aht8BA_~A|`t_@${wN{vXn+pDO?W diff --git a/docs/benchmark-results/tflops_comparison.png b/docs/benchmark-results/tflops_comparison.png index 7f28d8d8cf120e045ec05afa13c68823a91d4e86..649c80621c0565a2e9dd27f24121a24c8ce3160a 100644 GIT binary patch literal 85572 zcmeFZcT`nb_bo^TwaWOZh!Ic;3Ia+NNhZQ2N{)g8l7l3XY#9m#$w`8sP8%6S> zA{EtU7b>b>hyK`zSJd09#qq}(J27=TB`ZTahpRT%sAR9&T{pL~GdIyYeB+vpt%;Q- zHwWh_j?-+1jqU8N+nzaj(&B&p1c#N4(aBd@_Vsv|Kd(z_*iupL-9`TU#VEQ-k?I#J zD$2!k$~PmwbvxXoR_Uyq>`mAkaQ6J4`<(XpB^+itd(l&Bg7puDos&B^oI9tWlYCV6 zEcJ#hV*l|!DjCR1n|$=PiY^aJ2*IanJqrO;9)o3CE#U$ttO~?%x^lr*L|(~=Jw0l+IF8` z-|O1`Ct0ScJ)1=kLR3^5hV_vGxT3q0rya(%Ej{iUyng+fZPhhkFZ(OanwX~k9Y#h* zdsq}N&W^M+e|mMt{@cf_uI+mllxu$N{NNd?L=*N%hB?*D{K}OpDVZ8-YHACMi#5Hy zx^Fyow3U19ZFW5rSRL1<>n_}4`!Uxnw9b`F^Zw&0!(073BqSvEu&H126`p(a>u&CvrS*iH)z6}>spPvc{I0?+}{n?umnzDy!mH7>ibq#R!Ydp$+0TN-KTS3w%&H= z^rcPP4@ugt1x?KM?{Ic@c64?gtv(k0;n~$UO>AzS-rg6+h>6*zmC;Mz}+)(}N z*A2?g4Ja*X8cHXNzu#t-dGusuY2N6^p}@QM?wKtAn5>w>W4UH!6>aLA-JaGIFRk@h zt+&**SxD}Nj+Oak`ETF8^?&=uW@TlilBTY}u9ZhV%dWr5e@2)yS#qp{*Rtz{^8CVN zCB4OX;8d=FQGKK{&Atqs63vO?X`ci9mTAwg)z&mOEBW~Nuqr3{nYLx5^Y^-Nx3n3_ z$UMxm>dk13l}N=+vtBOD>FVn09~w$}eQ!%DC6GD%v~`SScVX~ILELPI-0E@lXHUxA zBKRzxFV45;Swz(79};#_uCK4B85^vN2sM=nyUG_a`z^b{^h1)u8Jqt5M;REhbJikH zAt=2j7O!5tnw%$W-FLTiapc5?jT=WACFpZsWF3iZs}4S)mLlOx$EuZ=My}xV=chIU zHOVTes*5wt^7gYMDqeH)!m~jmt?4Hyo<2UURsIazwuL1AzQ4Dr8YACOQSI;%u>ER0 z+L~TrbM`D1LvQ~Pd?Jl#QV7Ro&EGa{e^+vAD(lgsefR$Uo0W%0d1-E}Ime`#$M&mC z$>K9l&-?e^$58^=S_RIW zsp;(03g{mn=b)T%UCjB~5N*|1-`?JS+|R!dwxgAjKKBp zpU%lf@X1~-v}<(ebv2rH4G6h=_xJ8%MZA|m3nFc8{o zUS>b48X=bAwfAJ}%y9F-w{MY8zZkX6R+#ghz`JmAauzyHC&)cM6Tj^spK;dCMhVLFd z{rtwm46#9D;kGiXW8I@GEOl+Rq=WOGN(QcZVWvhiq zFf1&Sy@M@kx-GiyD@^?S{IQo^T#B*j_w3%SpJvva;O#U!5_~O7vgvDGMEYjAK+nDL zZyWw9wjBA8e8zpXIP1{9ea0%r5&ePkB8^=IwiBZ_pFVv$(iHC{wi?=B|5)&h^ITeM zrfw#yN=niNkL@&_9$WY870-RGf1G;c!rw`V#rZuuNQeos?fbl&d_#4Z_7E*CHYDes z6RPiy|B%{6PoIfhr4-6>xh8(droJUrjh(KlNLyR`VVikP-1eAAZ>0_3@r>7@Zh;-2Lr)F&#`!g5Xbev@a zW(K3(HNUAP%F*7tcW?h#l;hVhHedF#to(fb)op>IiaHkpb#((pBO@ct=OQ$h;v_U< z6%60r+p>4&?QVJXJ#r+A|1k}@aFGr&4;#eJ}4?G+G$mD{`vj;f0WEL zNvpl3Vusxtib;rp*a;!V+9^s|EU_{!MQm>{cnXGtpo8kgRlo&-{^C zL=ZzO4Xd(bwqczVx6eCCM_H@Ry!6*^-guii<(jrUUw*XEjvybHn6Og$7RD2vn%;H= zk2yCmG&D5s?L;HD;k&9lbyB!KoxAYr*!oJIJ;P)E@%@j+H3Y&Uw$nAPu!m!fjg5q8K<6it+xHF7M>@-D=pZ1FA0 zrn@%9;-BYfozI`Yea2xt?a7nF`j2B{W0~cn(w;p#)&0Wy0>4$y2a-uP^ZC*Vhi5cK z^}5bqX6Qj+nNIeV(}+b_TU*mKr$mdmcJy&4;9@k(+)DKH^z2p^?Zw2zhQGCEhYLDn zkgYh}_=j4W`qTICFXP_5hEupP2Ra z>(`$ly@Y<{`MCd@8DqoA(jP}_7enK)%}p1k1~zZs&Zo1(VWOucM$GGBBTHbKc0rS= zJ3xv3c$Wr}vXX&89BN5?woyY05>VImc9q$aJ2hsR`|GcM~;ccnO$Z9L2WZ<>_-eVCU5E}KRv%;cDPxh*Jaw9q__(2 zgGR<{sPdZ25h4ryfgLZbV^M8QOf*lLfB87Jb992aKM4t(Y7{J;Hh_2ZX`;ORAz=^ zeQV0g#Y#&{M;c>2(X4ZMQ0wiV+giLl-r_-*P!Xz>WXP^X^D%A3yM_+ za;ph|AKy|hmP6#nr{8kT+ZBUZRRVl{eUSqJJ>y4OQb%*!bec?9 z%(C!sr)jd7*Rq69oq9jgJKkR%==AyCUgbE+oqLX7I=k%v&-;^Q%X;5zr}`)Ybu60ha?y;u!LxCaP_-IvSUy1#NFAZFW*qLWRU6IeBK zl8`Kw@uRpODenWkO@b4}v4BGDUHqtf;4un_T$PxMf9j_Ulji{Nx_QTrh7XR3bK(Bx zYeh_qj6bai)o4_2G=jUt1q z7Tp+APElsT`q#9#YcNR#T|$PM9ZAg`iLffoV%=AL0+5gA=JzZ5Jn4)qy4iV*vgu0` zWoxF>U+XFsfBd-VD+`i^V^K?)C?Eb?m6SEOhPT8pCsiRM;4HN-F z#EJ@R2NM>1{HA&a_cZ0%4J&#nY>0n*KIhz>jr+yyqN1V>2;F=pvbJoS>Z;^MlUUx< zqeIZfXl9vZmK!iqMX&qHY-oAjUR@UjcFmk;`@}M3!nm;Dp>7KUd+VY_MY}8IITqaR zZQi3uaD{o9UioIPhCu8^E$uBxl7S;ek?aE2C7fY0DVn({B%z^Xstb7li8mu2xV!Ti zy>==923Sox+G4A&{P^NqVWXpci&>yr948G4C})8_x5h4VM{`m8|`-Jm7NWD|N3sf>s0lzk*!LBMg0sXPQB254Su_CUELs^w{MT7Sv zt1lPdRKVhKhomYe$9Ny)9~n-J=CgEpmY=VN%{zmoPeF9?Em3s7JASFymadV_+|5af z)5|ySj^zJFzPq*j>9IqP0+Mjc;z52(cHL5Kpkrl@i;s>UJ$k-D zX!;>1t51;n+XWCilA8~lu`>=|T0?Dfnkbn!w=4oAf?S) z)|KVu!}zH-q;R%YtKQPff;Yas$6r3=nyGnN`1|{lXE_U~sWt1c{KJ}7)J>b@-qMvh zt=Vtw+;v}caih6rZLLV2<7)V^z`*xvW?`ucBW!= z6G>pTWn6B)^~-N;i%Wf8C)1*iQLxF{z)I|Ah7?@aR^|x{P+G}<_Yj*t8A+L@+}wS6 z(gSrMZn!y-uq~(YfFfF9cM&ys@Y$3&r1CsjjQo+wVdNOrIBbNTY zd-pgaqZPyVx-pLkAv!h0~o7++AH*DBIVDjSX(nQOXb9a=t9JJ8_5>R>N zR@0WoPIzPK(zqbyL2L`wOh!@fsajLrb{!oZ)5aLFJh=&u?j_;Sy#t{QC)A~M-Z4@J zYJ!zOAuaM;S7zIIufG1>>uAQp^k6FL@BTA}_*eFjz`arlE@M_b#Tw_%olEwydlbgf zvwPPrHcLszz6Zy~HGU77a@@Rit1>YyAX>wVhP}ysbvoLv+^@MUqBULX#Hmx4 z@VHB|ED=Qqcuj-qUXRTjM^XjNd;;D=5?*R%DVL|D5egiDij)*v9-B__58*_OhKHbH z0QxmCFXg2U<#H8-^O_}ec%TTSC@1SF6uV>&8X)(kXys>shYfuFnm`yKwjBw@30`@Nc=XzG=(DkMC>d}3-tPim!s-GqP=#R1w6dz~zO589XJ zS&WR6iN$@N((v)4I^+U5l8``^KRms#iNNwU@xv?F4D6-yLTk&@;hg$^lZBohY^cTl z2iR;nky#u=!>$?E(SY=Cl2v@ImImquXJ;SL?{00{z~J&LzB#*5Ku|FC`sdfGPCo6}?YYn)n-@Lg1W>#S=N?HB!Wd|tN zJN3D_In^w^^0b5Oy|=K3WlEfpWQV+krTcL$BEq=lML=C~JHB z-21#|&S;Xq?mJQwf(p`eJc>pA*_Dvhn~UXpb&qp!)Q=SR_4Unkm}Mpdx_Lf$;P2(- z)#5uuxB(cg;#lXaSLb?qdZuP(a*kI?N=m**l-hrK`3vO%!#C;^sxsKjH)XKLcp`cnGMdYo_ zEUfZq!C+J~aBK6!1-EbAMQz9D-g(d8usK;W6Hq>FXhz)){#&hw*wGCMIvsCbqbCnt zDx$5jcm$kr@zb3hY@T_bC+T>KUADbQ;LSglAUpATst3qRgFha3aF}O%_)!s(37=Ju zIVV@tojVs&Hrx#R+}|HZ=nse$%_uD`Eqg{Fb0DSMa}T%YUx6=Py!cog^>uw?iWDX= z3NL-jPNpdelVK*`(Ts8_y`66mT~lXU`tf6*dm`ILVV57zq(V8^-QC?w=6jq8AF;=N z-Ad100E(gRGLZ!S5I&Z9xhMfR^k6Nh6rS51gt}fM2San=V8tx+z7LSzKEl6b<8G&5cJIotm8u28h$Xe7U8-*2pVjAM}-orw5s!4W@h6 zD*2d4H32>3xUad9WLW7---a|dTQ4|47_ln%FN|tHPN~V=N^-xD;$l>*JicKw^(7Lg zI!fc?;}x^TAceytqwm7FNrF!E6jpMZ@bd9dEpuCW_}W;DNLx@Y!TwSwTCs?2h>wQ@ zN5wpo5KF4Pja&9|m`@f2sq|{x80)x}CFHhza~tg$Wm1*E9A**GBTxwxq1VkWj%Icq z8q<|p9@i#tO<=%&FDaW>4sM7hfBJ22PLOSed=Q_MbR4{f1qC4As#g{Dw9sL~GH1*7 z?X9TrHcS;K)G~$$4pW~;S!QKplfkB4950$kb7PYkhGNw+HBf6m+{9E~Ud{r65kdku zea75Gua&|}EOc#KdPtYzC* zNw=`i(CLV%<1pUUShlumrf@uRrb{kbclfNXW5{T#>}z_5k6GcpfRy+<#lg%nd6`p9M832Y-_=Q#q{3f)!;32lS;MZlNpL=oujGrh zK`i>qOL37>jv$rLUM?9MC&k3X*o}N(sf!d)0{IT;(!=WydjYqPwlg$(W3=xA!Z zbm+)~V^LMl^?A)dUc!dW4d>uD0mVm-Ep(bq5zF#sw4dnd9&rkf7Rj6n>~H%Lg(&T>yzO; zlO+Up z06*2cWE7Gv9r30U)1Tmw)|6G*hJV*>~Ix>N)gKAIhDp z&=Np;ZUMNY-n%yuk_gz`Wg<5qeY1;?%O3bsO)8T16W*Anw;?c7-YIUm{*eaDmcN<{ zKU))c!f&aa(iy006woWVfn)?p+J7?xwnB4zw$W_GL8~O45+_!*j7ZPDCo=$O)MZs7 zc~bP`w6YDo%Xj<@eotV4kdP43$klWl#gMyQ!d)4?sdJ4RkJ=BtKi86?qEsEgL?~mf zWw$23W!I^Wo>EsqX$gx+(0E{*qZFdhqG!XNckJX9X_40vL8nvRDai{n1y(bnp-?j0 zsznpq3L*KeY24d>Bxmy8DoK~lDeC$6l!)g_Wr9TNV8ufpKYomhHCIV%%rQ}#o#@qt z!ap@VtpwQ0mUm3liY4pOYcNcD_vNf`K8qBfhLnpBcCn)S)8d<~v7l+*^&)HI)_sT3 zNkP{|le(VrK(y88Aio?32L~&(ejS&|a-If;qF@#!aa50+T~?nfN+O{z87icfl$0QG zW}uZpP@w(D2Zb=MYs$qpt+8+oDYp(Ul!bYCcueNw8pS4k0s{kyD7Zu9$Jw&g#Z)nR zmR|ZTG;EhpCYdN8p^Hf0?4D&nZ4#*peA$YYrTI!y2eP;+Qwk!XCv`KBwDkobAsqi( zX4UEl6U`x}6=(E~f=}w|V2`&zCU$Cwc~;y9wF)|sQeo#Y`t)&<#gE@G?_qDl^ScBDB-qP{CbUX56v-$%$-b1mhxtIEh*tVCIK$4t^=c^Cm=8jV}sK)=Lw!Z zZ46nPW(lBG9gt5dyZ6qf9jv%v#7yn?Twxxb4|?sLoq~09EE412I&xFv-tOG3M3Z;z zMo>fKi=Mbc>1xkc4pjYtb@du9G=c&SxuZ#a94Zl*=1tv7$;=3Ts}TT=7RX#-qZ_|fnZEgL{;WNht5lH2vyY>nr4L!q!N{MO0D`>#t39_?5`}BaZGalP>l_P3W&;-<=mRMLi zqXW@DIvU~c*PJfqMMG1|(va#n{S|*Es>m;V83Mdcgi;ni`VoO%+~&w**7~kbV#WEf zgc~-m@$zw6^bRIP{@k*q89#cAEYPnHNXZ7STm)xpwtv{aW=MO+?v%KNH~K*8m3-3iSOYQ4G>WX)KM-~%>N?gHMwFc!#ib&5uSd&6)V6aF* zL>Mc#8)puzjv0cM%M9I(&$EXJ9awaUJ*>)?AVew0if>8aZ>yUa>@) z0p&DGy2Np9XJ~p~DI*p+`vv8Q$(eaZqA&qQc@N+x%+Sdk_GC!R!O|h3QEK^z6c99)r#_R4eBN4c@!j~Nb`x( zPkVx}XzaPtvAC*@MHmB*Pl{5lNqh?nGc4 zw(cWNeQH`-3$`8mP94V21`IeZEiIX$NYXIe>MpRAK!^MN?G0OajNcz7iZWms&s^<0 z=L=FxE7qbmOC=VgAFCyzh0$5NG0~Gw)F|-9ip9hzCn!8Nq`YE2f?3lF z-jFFXZz8{X^5d#&- ztLdWoKGK}%+p-9Nnu5jN@55da&{uY2EF2J#Jt00c4!}bNJ4oK%U8^lBvbeNAz9H4m z?>`$hZ8BW>SZ&uZFJ7b9ofgzNVWDL%J6dW`y~``+Pc4R1e8B32<3hsYweGt} z8cW1;P@X%FH9BzW%GtmBswt2Wn^5`KT34Wg4o8XP>Xx~wmUrc~UfT;jfl&4o#dy#1 z#eRnP?$M5;1K(zuL|t`WDK_1ArnF{Uju@%ig3!kE%T;ef0pM9$!@7GrF3!_?GNbV0HYpC{3F^BHIS zx&^H3LQP`_R@O-2Q#N2giH}(q@Jwh#^U`(8L|RsqqKv?Shz_kU08TF9 zbBMydQBbgUY4evaUncfD7=B38XztrI%A%A&(zH~rE0Isl7ZhsTV_)6blnQmfVyZ;P zOJH^)kd7tMbnE8L&mV;gE$}>Z3`_5ej-ww%bC=!o^5#9P2E5^VP8sy5vrcpEM#Ro? zU=+07a5AbF{lvxjiIdpAK%PB%n)9f=2^vB{p*{2hlRelEc_2F`GPxu;M!$c4Q*lFH zzCk(hG(UPDx@Dzelg?;@!Um85K}$+%Ilrn_(YY?cXz_z$yi_uXnfMV~)7bCV@`DEb ziqA3+IBJ9AAmGIt6<|k!;2OGeZdFZZu^1auJ8x#`iPLa=D4OuB`U_B4Pz^(L+*j;V z^SF)bc`|;Xpti|ysmuDaB# z*f%?eiLz#DipUoz&01-r5Xr;&j~QIVhGP@4^`v1BANep8wv&d&P{sI>c1@$S z63u6+2fdrnDM}>*qx#_3SUQSnGWts@z#BT@hO0?Orb!*P? z_Y>%xqI26 zbr@D>2(3j9nu{z9XKNx*5=sWCbK98)cznN9csC;{ppK^_=8d5d&bO=q9P;Enl)Qtm|J?4Q=58T@Uh3Np7fn@lGXjljy>$uLUpoP?>7nDbO zi#xY5)v^c&mWe`AChZQ~i^jtw`-Q1%!0R%ZrHd(e_0eizy`jKtkDSQuK~QqG3>EOv zeAWX($QvMFv7pnVU>-CL1IT7}5UunD@)Oi#eSr~&?(7hGrmhC{U=N4(>2BApbVBO# zqANK>pV+pa>#eD2$2W3Uf9_`97cEOc^)Oz({NnmmFSE<$=4ll^v}O;V{2C>kx z_1x4&GwFT@o>X{z#-6Q}kmp70*oVpWXkVo_h%CvVhUbYDVFQzDpm9JRWh2UYTtFpL z=Q63kNDh1U?mfmbsR_I$=eoKuNV6m~`F8jIGl(i)HgSJh3!bVV278+0mkzh=Gz;{V zbj9s>9qn+g&KUKE00s=ld*vk+F*C*7FSZ+QBEm*?|7nWI;t<0Zq1B1C)st@+loGGO zj6}=?q>%$TwNafO@*3jO_}wE;nlU9gCi*>l;z^^4+L;*EkY$UFvJx3B(1k<4R2l6Y zVv+j7{T+EA{M6Ok00CBwrC1$Ru?H-PHs~-tQ;<0gh4hPaa8Q2Y4M&?8{M1LHJHY+R3#@1GP-tI*^F-MMc=4X~0pU`!gya#L85jYc|%f<_6Dj?Uu z0eF>~^Y=}gQl)}f(R@&baLL#`tqqWB!Wqy2b})k;{(hxtA|viw7;poKbw9#1Me5{WaJ;`Ey zF&-YUDrkTI2D&0w{YoYkZ{Mm@FEE*a z#B$oG!1CD-2arJCAr(hlYlf6Z`8qc8m_H=C#X z)a^aK%LmR^;H23e=ZWgtTIqbt?&nUjs>zDwF*i9Q+!pl2BnZ(u(k@r3+sVy5h5*IuVuh~~$-~E#$sPaO@1-2q-LjscJehgXm z;Fp(|myJB#n9>lZ*O1@iB#kO4JN*N?u(-8R0CLsw)2G!TPHAWwa|4^C`ZuF`z?Xbg zMJUjwqjEU~4Pau!DK7*gjQt2>P1yXmShS#MYEfR%iB&@*rjC9vTN-`%bRBtw^*_zX zjXUTf^|RvCkozY!^HjcRq^h!~-fqVoe{_*&WMm|Yt?C+`$pcT%=i$scQiooR!6(5P zQc_%;K_CKLo9HqD}laWL-zK+buL1Gkwz(vIa#~Frf@S~U{q~T`9U#H>PA#7 z?mbF3VE^UP$jf;j9&g+ftA*< zd!+V>L2_hQe|A`jp`sKP%!*z-k^BP>oDkF%4i{J^T|c5Pt-J&*QB*z!oR_w_)zn({q z1)WfnBh)3lPh?6uvGmaIEyReXeqYWBzkTslkD zObv}_38Y8832LhjYJTJJ?V|#ul4m!!F?Rna>@AoM6R&Xh^z(0aa&}JCJiVQS z?z4+OeVSbtk`?1~942(2T1sOl1w&H?TaNwZw~fXytiho#Y3X5n-NB(PK{kTt#y52$ z)e-ByYT6^Gxo^opQTt<`Gfu*Ist)&Y+bk?jaN~)iBqyOVKq@6&l!szaiO#`4DSXQg3?$S-1Wr$i3V6KX&|8wfS(J&GenJl3cNpp$I*)BTe_zdYI`cw4`Z1AfLQ`j zftk?J|H9jhnzRjQ4WZnMUc&Q%h3d3OC#%i_)!dNg4Lc?1U6o;aeUCEG^6ct`V4xT@ zfo-l|_rty!Y0Jz6nN`Pis9*)TX_aoEeMTNWs_6%mYxc{9%A;+W`A%c~*C^2@7>6Nx zDY16^z3rf8F50q@L~SB2>&N z%QnHMIa%qL*MtWa`1!{un=|ux*6nQKvVAD?oU^uoMk>()<*;tRE&)ca4Aa#t6yJkw z_ps&~Z^Lwpr)o|t!rVj{ERlYYZv1gwSQ`}#hoHlAo`6yKA!gylxVA7!1b+hkNYy00 z8{nE+IN-<#2jW>X$M`AnPN3aEcx8Dh0O0)GTq|Pm+VtsYm#GJcPXo5Z$PQQJ(~4e% zX@S!P9dcLbSQ*2CVG0R;^Ii_NO`A5spqK)BOniW(JB?mMw`s4u$ihXCR8^EuOarjN z{fcr&SjKSS{t+_oQfNQ=q1e$D6F8^p+G(G^#(sZ_F0>XtgRCeqq+-jEJ!n-t`8RRr z^7q_)^R*PB22E`3+)yKfwBc>Czi)Z`RUO$py>Fd>*~r4aJOlIH{#s&}3K!prpV*O! zHo6I>4#1}_LTiYDsoq9_^{_qsi0^@Bm?)U;_yDh$jlj{aT0AK%BR}PVSRqsb1?cJc zS@JbZWq0oOIB8Da_-dIT6FtRL3|hqTv(?^&JQ zyLZPT8&ISo&$!JDH5MG&*`yC0x}vwlX*TBL`a}TNt)1%=QHOVsAt0$O`s3P0(>|9;J{^8dW_^J)B_C-HAo{PQFfF-IVP%@Zb*(6xQNyXYs^Z$o~_Z_Tgv z7pkW9csD_0Xe|C;5eDRFV@|JmSq1yB29plA{hXySUUV3*Iv-u z2#_nS5;-Oxr3}q%4>dI-W-mzR9_@89Zf4nA%8pS=Vx<8~Y;)Ohy6e}WzJkff#i`npEg1KJMdOH;&pD3uR5U+`mfIrkfr+yFn4B`K z8{QV6U1aEy7+4|dVO2P!eJJ?xs?6g*ey4i55j+TEAPI#lU|GS8-59JuO4PWa4KCJ_ ztRxX&HwT6?2Da0J)Qy=>Zi0fq@W|jL!83-FCLo>A>cN0wKYhAVGFfpDmW{yd>};^t zL=-v}R#w(Glf=h$p-^oBtLlg2@ zpVZH@;%_S|MlgvH7!eUs`lHXwT$tvc5Y{2e=RHRNlk~g@8D@P~b93_$+EDH7RDSdV z{WtK-{SvU#@UGslx36m|Wnf@nqF7p5s&+zgOC`Nt{DoB^<}L*8WHgubH+#Z=nCQ3( z#d9w)7~H#iH%T+s6ri#d(TVq#HFI!q7-OwfRc?@_N>;K9XuoY0rwN*2-JC2%x#^Uo`xc|dtS4qc}>nad!KpemxrZ>537q|WSXCpKb zCJGeHypA4NAji52l;8?%#U{xG(|9cGtcs418XAMSHz)NN__V}*>Cyq%htRxUgj{Zq zExUs5lMPr0_GAv0&$;3e*dF{@CeZbWu7rl531e^o0X4&j36er~cH(30C;P;Rhlv>I z0Z+LmzmYpVyLbrTfFL+P?^rPH6P>;i|As4+?BHsHCSq-XuA>S$SOqK|#*b9kBU!+} zf!bjn@P7X75!u|!_Xz^v8iGho{54Dzpar*(nf=fm5!fl{H*DUrMG5_(8T_kOt~0tF z1h_|joN)uX@%@4WZF$Sk?LkmI$ zkDmxGP#AKIZ80H0SvN_eyP;t|4Il(?hgs-q9^ds;8%BC}fV(Wv2GOzzDk*WzqxC{W z;_Eq{wD1f#2^kV@G0QATAP5-7YXH+Y0#hMGv(^=}FjOH)EbLb*{Ef=*7YGo9uz>J? zKw2gqp0M%J128-u+WZLI z>8-ovh5QFr0#Tr_xO1oNM}iQh%p^5q;)tYXyaH1V51gZ@S6Z_3Q9A31c>{5$#Ss(s z4D@V5QV7iA#Z<%6NG66whzA5&8zKNw1xf5kAE|iIeDg@;Uc?6S{{4I9r4nc$DV>Tv zv!QXY)CR;ss);L{fZHq{n0W|EtOY6nz`9{}H=(r%d^3PdveB`bnczakPA1N$fNBG& z*?nYXu8WevJ>ul@6`Hz_Z5_y7c^LH%kTY!_jwMsPL|prX|1y*jZ>-4b4}FM73S_u7?K z6SBpGv=1@)V+>VuON$DmkN2e6gyaUV1&7l5V-Y$|3dbipxD{Fo?afFrN>R^>;j`>I zPEQ}rCMt*hLXrz)Q4^5!MHq|N@-j8D#bGQ4I*6j-&`v@pf}BIJefxvoZY0`ox~i(4 zZ~3^mxR}Rr$^b1B#=1NxFz=B%lR(W)VeKOQ0Za@&!477DSq)>|^{wkuuR@XA>4c6` z78~4G88FP`RS9{Xk%CSZPb;otwA7c>I(*F=!L5U0j7!x^iFLrFpke@g;Hzu`%L13b+uC7)*cCU_? zxtQHS<|PU=^Z;0n9C@X_ zclYipNLHDi*ZIQxGaWS!L?bZY`EbLt0I!BRJunAis2Fz43_!?vW3c89x9{oHd zS6jx0_FMf;v(ZAf^5#!%5TziF(3y)LMExz-O)oJ z<^&Ax*7^TH3Qh#l+E;z8Mv%^9+rb2lY(ov%Yt{obd)Aw(aj*mnN$3&4d@J(&&(#gX zn2R)ElrlOpBQPz>MV`KDPSo$ex9*R@DpCTFV1nZ-nel}C`Kbv)d;{Ge4IU`T0FZAo zSqDRI@7FJQ_1)ALcgS22b2^G0vr&k!$+HzCM-u$67Z3Z! zVt@XRumA79WQ*fF4kI*JF}eu!UNd%yT)5C(4(%6*VYDE9{QL{mYNXBsqcYnRJY-mh z4}P8F{Gs}H8-xzy7ZJz>wCiJ69?IwxVE*1l@S%&xdYqiKJQ`Yidg?U!{em$!TLJMO z?|usyKPrS)2!{kARiCaBhfLS@Bf3Y|bBbR?)c}`|5CI}*EsJ`!eSRuCtnhOrC z``OQKd|O$0;-Bdj>{UyT-{NYZN@7rou;6cFW43~X)E-vDJCyw_+^fe}S*4KM6l7$! zZiudGw&4o>89R#l#0aE>_sG3-C;tJk`Xwz^vKvH~Xe6T7I{;fS|^@IGe07RnVLAF%3 z^dQNqf3uB1|K9ao@^TH08Zq+Q*4B+MT^`-Esiw=1f548TQjpYX$xZMdViE@bBO8XVsS}i&uj99Mk#FOqOWA+mD4rZLoxcLATj;W2^Y=VXGx7mq zwQD%qh>4UGKwvU2N!-N*Y@KnOigTJDPsec!{Mnz$L#Z-EU(O!{1M>Jod#yZk@_nR$ ztqnT|`INIGKlk9_wIiXm0^18{&l>U?zds8RkhtMhQ1gk}0DX73gCFq+qr56Q3HD$C zq)BgBgr?F^u$qnnj&4D3HwAEv5Q9JS##&Pk%DIJ+kvE z342JOWtk%X$e5aG;L3rSnVBi7=_zE;7oD1tO#SsMv^*U86BQ6i$iG%NjXY&Q4DON% z;?%)76bu(bc;s@q-T2j1Ahlep-k#%gL1cu_1PxHaA26aWv)*$e$Z(@~_)tyoldkRM zQV({O=Q>)=aVZvgCMb#0!1ldh{slt znp~gbS={>p9?PT?=hSQjsZTI@hQ1#kE2Ve z4L{LGb%vJHYK6P(#KuIODL1yyujy;6Qto@Td7cyXt6kZ|6>WLyJ%5mqvh zkMUX@N#F!2h>XXjig4%@Cqdp$LDs;NPyp#C!HEgF>Xb2*<=DMCH(w3q!{0@YEsKeb zo!CE$h{w4bgoNqC06mEBsrT(m!YM|?7*6(HaY@N6IZpsZnQv(kts$r}7szQG@Y9-N zjE0!=e2j=#aaKov@XAR+XFTnyzxvRV_?czOY@cP<+WLMI=aAr-zqH3?wRdeLnuIhl z5TH$IlD7>r@4%ys6rdfxPR4XmSl(MVk|fm6K==l3o!v5sO!k3-GQ(|34pt%VCb&go zFerN-047`VSB9fUal#SU!@b~3;sh9&cJVOdF zRTWFW%};PM1yRzbpqRVOfKL$ZX#nvA=pL!aL-XeDpnPAf(DsfXXJat2wAAzk;j;{F zS3tN`5@hz{l&*wSV#Vt(h8+P&vKpRjl=A=-$RelgUXmZ<^pZ$3}$$i)Ug7 zNyZf_$VhKlnb;CE--E8=5)!FsSrQ!%6q6j=5!+4l7ul2O2&3woNo zHvN)hS`Y(OZMF@W1?|WRUn}&Hn?QDn(p!QdRGP6sU!Fjr z&`umBA;!TEJ1|k$h;ayjJUZDHCpHiraZAa<*NA{!wguRQCb*MAG~xVf!A3NRcDHgp2M1U-dH?5$m=<@eFP!wixv56-_onaFs=iZRvTGP%Fn*wl_5po^y{G{6y z_%le<7;u>&@rQ}zqdz?)y0+k(WzH4$0moiIpMdMVN_`FInN`F006jgwW}wG$kd~;) zSf3iZ^`?VgOVvQtfVtyw@gI@!o~|xW%e%r^NDK*ub1X1JILe zY8ganu*i@vnziQAYlP>F!&LV5Bk7#vX4%k!w9e{d@!7tV@H?Zper9dcZii1 zEvu4vB(4#tNh%Pxtp{}iQ2eC=$dL=+GMeQ>Fqj7(lRvP+?5#cwDxW6pSqDs`9y-%o zN#`ts>__9u%=8R{%$yzMPz^L4$q_&*ThYu23OeoSuDH|{ep#SW2PC$zS-Mah~0A9~RY_;n4>kQk+^k|H@RPl$R$G<*m_ zfJHC`U_?vE%L4f@m=U6<3PgSbED887dxV@?+?@VQ#{NRJXn}LTDTj8VsSKO71B36JQ_e(;-ctceLzZ| zK@}odoR|g~lQF(UE*`$alN6Gm8HX{7Ez6q_7xA3yO{=U9g<{h*^POV-7SmtI_3gXX zbsmFb|I{a{g&hTl3bX?}Vm>JaV5{KXLq_Hx`CbS>B{WV;)yPi3YYA9b%{?v^=rUk) zkm?G`_MFKOcXYTc3926+c_J~WVf&D?Mh_i2WQWrtFunT9*oZHJ97U44f(CXbNxw+j z#EgOQ5MpLXg;RnIfv8}HRNr6)J#x4_#RYCyvY^aXRaREMF#8W#i>VX;2rTYZ19X;o zgVWA0iH#lesb3jP?CPO))tK{XQM;hdX$ItAx+7IYEnSn0{gatG^mhbo=nT}T}7DTy!@Mc2^u`IKQlEt;Q zK-I!5dqAo+xvG~dKgrmwWe@G~&vTckT?|#BcJ~htCqoQ3h9+CFNP1jVen$f0AY%@y z-Xs?h^&g1w031>QoOKsq$g;;dSWwjJV!9#BrOmBj(igI(X|9NgE($(H(PKL;nX&cW z&n*sfLzXE$hUwwa>t6e_`wcNPpg@o_umF%$JLktbjckR<#q~4w+l2f>pf}-Lx(aQM zUb*2?r&&`>aCfNt;M@;JV&hcjclmK$KdAoQcbG}A7MZx=WSo5oVD5)8Mu?V47>6SE zL`Yp>;Lz6GZ?S6TDB3Q3Bq|y)mW< z$U$b2Xlic|rF z!(!20r~;X+2?sBQr#GC#BbveK3PikDLvRw43OZHn(Pu z{twR3PN4RM^XJXC?~ib~th#le1AQS-3|ak(hmDw|eu`sjh=GTcICyI&irn!gLh8GA z_1OsizD0w!0U4MaTcl6s=E+s!I3FIEurVbpEwxOy0`bupJOfRr5MWAZJrd-M1JV@| zi-ri%1m`FzakQtepM`T7d^iYbF98u#3k+o3iJ2*KC=EFfDHoa|nF%I-BXp~YF`J04 z8cwCjVSEe3c&I<(B4s@=oF}I4} z0uUBCSSslhN2A*ME3dm=w!!MuQR4cOm;gAFzZN?47aXFKB046{E56&mKJP}rDOvw2 zq>=B>o(o$ZCx82|uc@v74@*4%Z@m32HgYa0PN5N+ht}LMmiYClKiZR=`V~pwKQ=Zt z=+owuCrqr zGPU&3i@#{CJJyk>fd?Mev4-+U&O;+71R&E_8%7|R=n{t;F)ZOsME#O#gXhLZTw$bE zld}!Lf(lXkj-WA?$`^pa?o_0m_J>QbC@I6;$wVQC7+|c=x?%c{KmK?RM9tpn=;)Y? zc?$vz$y63bt(@lqL!gkD{P_Nv^iJ^c9O%%HQ=%bU5o%A$F{U7pl4TIfg_-MJ^B3;W zmP|k7_}|XhIB?^*s^=0-TYb7_frT38f1J<6@n+Hm;=0XsNtt`ZjzN9|8RP@(Tr&{%_jPjV z!%z{hJ_8H0Fn4S8Y3uW3&It$X%m83sDO*JvkKt$66)36)xVZ|~aeL2;eBbt; z>mM96vQO*<3YZ1>hNNFg<|6w?MohCx;7B730a(B&SRL*0GE(M|($m4Tj7~@4fJ9<% zudo5iCtWI}QZq>KPK(2d++9WiscO3=Zpua}C*-cz>7FYSSE3U)i%kFeFO|-;&B+eD zz%0?^atRvGj{@x;7HdL1zrX*E!!Lk{a4c{vnX9mHxPJXT>9634pFjoI>qJ}VzWL-`Vj&bJmJ6s(B2o5OLUl}% zFKZR4ose!2&INp+2C9*hNqX7G-*7_a`u)HzNZLpcK-Uyo*~JJJS&Di`J>SS0U{~Cb z6Y^XmI!bozvZPdm!nXz(qIzpGs3Ejq<-kVFxE17Pl}|NSp)#Q(ze+N zpKo<8HpTp9TLaTd7hTRfxv`lCHd$rP`~+9WCv)=fxm|00<+tC)(V}gM8)xzE!LLZ$ z{QC;fDMZvYBqc{3M{-aSDnmmJ76x-8-o#eaDB_+;mm3jIpSX@73nIoE1TDp#EA=M$FW2ZU+<~bFowkd8 zE1QTv%@frK1rk=khDQ;&i{E4VVRxQ zLq=i_dQPXFo!d2D}RL-XoMNoHTz9BRPS0+Hs%2xrU#MlUl(@|{($OGcI zOZ%sxB=9v~*xnGm4YIU8Tv#pAIMV5rF^o?Cx_nnBl-uOHQKY93tq9;Q(gtj<&`u6+ zwIy&a0*}&Y(f%QuSnJawQ7uI^C6WCA@DR!MpkUKcetb9VDGY9q^XX*3_qUSP;d1%% zQR3R^*QXy$XD{T|dBoj`B=?G|=?+ScQYHzdU=sTw;MPPb3`MYpwIwizM~9Z9;+Q7Cqo=1bSU(W=V5ENcFr9@ zFZ@9Xzy!&ippiRqwn1S@t%jRy+6c*7ysj_LZn@%Hx+o^zR0#Ly^AOz8jO{jLbz;Ck zIw}ytlaMJ7sawNQeKKw{q0At{MtgX^8tDV)zzZC*wGbX4Sy1Uq85jKnU8pZC=Fk@n z6ft|dEC(5A5x33$rPDU30}uxu9Ffhvr_;Sq9RLUE6(R0DBuEllDiX0!lZ_)A z!zQ3l5FZd1cI}0n9QrmVEb4WkyQ&B9D9u<(y$78Z;PoMosq+9_L*h7Eo06g6AmWrJ z{_VKwcUA9|Mh)#R{!D%j~~K<>;~1* zT&|70yeDy*$U0GTpC$U3Lktp1fr)_kPe4Ag6(%#J_XK1lagIr&gk$rK2Wdm3E(FrX z7Ei>!_*Vlg0iTfEqnK0TM2abhDP$c?xx4P;=0GK$Y`wRlFk{746XhF0@r~ReEWxX| zl?oCrEjt-uj9eSGnwa?aX&`!}8&JughO^0u^XJbCRNzv%WJ&ddoD$3w=}be1gy2)} zTeo~EZ{8Lvfv)}iyvBfM%r>7pcl>32FTHsyu#g#e_wE^17$}}|6+f+8XIA%COAN21 zEV&P)CIeNQFu60N91fb)YCArDe$UEGs{64_yKG2b&GD9S?usEP5wTAvBj(l?G11~3 zi@KWZvs#5Jdk&XeuD+Q8Hj(7s0?NUWsE~2Nkd%;qC}6&|je?2YO=N_`qDo2x{#AVr zArv^wA#a}=5SJ3LGd0vNlAuxvl4vK3fk9l60)MOYn);Tz5E4t;JQ^gH9jAU9iq~gI zw`+g&V)Ys%Gbo*upjTta$L>x=2?knzPOA@JHWwb!)sxE_qM*=YPUuzOwE&v6e)g+5 zDEr9DI&kpd1X9*GIBLAjBrc5ij_}>{n?7h=-J@7cpmG4i#Fl0p{RnMim#2iMTTngc z7~v(_SD|$Ds2a3zptng!V@S5%t!QNjsconp3*Owad{v*=Y0Lc(*G)nnLLB7Br=Dbo zqURJpAsJS>^Q9nJ!QbHjWmq!0j<+WUh8-7JMcqmscH&rN^D*Yf0t=F2?=#54t%UZia|E)(X|02k5YrNzK$IzW`0 z{p?vRGPR~P9GbXu`C_3>4v6wvg5Q1o9_e$#caRMjO|9pH2ceu7rILN7E)zK{B+ti? zga+QarIh)4Ir*^}j{K4QiF{IDqq&DHVM0_9d7|c%TKOqNZ6Cnj8Sop2GeOxYPfRR; z8txYwtkd6H-C9q6a&-9@Aq6+_M{xq)H!$$b>3e5_{C#rzJD2a*V4IC3#1id2Z*8Fm zpu~Yac$o5T{!xuei*>1++DYo&Bx2nuPS!kZF9`$ z<7O)UbIB^je;y-ai=ZrCik(QCOnpI!+e!u8`NSI6KqLZqvhbW%-h;_HXwXMt7K0?8 z_|_o|SUo8OfE9ziES`Y5fRqT-iE%kNslVfV?q&IDP-t~ifFX^-gj3!+(hnM&L3wWYFL~#OHoTaV3ePqunXDyX( z+r=f_ivsL1GI%l<0~K5L#+7$b$BKcME*{*mf^5K$xSj;pOgJ#IUqEwTo(cYmG^(Kt zPKHBP$uk}agauUXq%5j^e1sd}q~xRLIPm3z#Qp@D7}6qwT$@Tv8@5A(OC}H@g*&@v zA4zTg1Jaepc>$cNT{aO0yCfOHZ$j?x%lN?6OI!6t3bY5HBOK?|b71v`_K(E-_wPfm zbCh(>B2B}^Y;kp+Ygt*i17)Ip7Cb=mFa*aYqYVo1&o*X{K4)E|UGV8V7zIu2m8j-z zR=XbKI0_=~B4>&UBAQ6f-f!M7cAZ5@?jq{8eJUuK0Qd$>3|F0s4dmNDi1Qd7q z_paa+a(90ueF~aSRy*^KriWI1&P60{@SpQ)@dfgQU0sRzJc?#QZD>e~g#7x{VE3Z@ zz!E?cd5K69{QL`q(oL$H7d57=t9w^Y^XD#Vvbel%lb{F&CYm@EgB^G~3n2+2fD(g*$WUqlQJH6ucnUEj%OUjA%Gd(jvOVn7w_xEK07Zrkp6$$mI&|3bUgay1i1Ej zpIIz9_7(gC=Jq&qGU9`DkD$0aUl&b0!P>zY5SMR@RV4P{RXy5p2@-q@JNN3M2(bGq z+KS|Ocm03j@NQgu3ZE?nipH?3)uPHKSTriGYP=bEgcfxbi!2x|k|22lBIfRpKcGq~u$5IS|0&$)qmf zNasMxSrYvYGES@zG8X{n(-%I6=Wqm@?&+uJu0khbnm|X(6Dr%Esc4zKnE7VKpI2nO z?DK8|XaVY)4D>KqZ!diP`X~+RW}5^aw80P*jC7)34I()>1g6%LO);wc&|C170=nP~ z06_ODs)s|MScu{UkjR>~Yon3R3vJr8iKl|YPcL4(!sngV_BhHF)CEQv3Ntwf;v7B` zBp$^aTJUDX?{bADUw^&*dxfoWkhK!IHsRX;%YJ}b)vT`f&uih5i*n0OwVv) zUH*oD_k%wkY`x(*f<`)my|@E7l~FIQH_Nx*lqawSmyWoIZT2e^@5w=uqI2K=P#2^bZ|_*XAwxg! zlt1F|Xe0Ty_hI#qF0vcWC$(>ojKpCz1yQ4QH0M7GDP`A=`&rP^XtUL z+Fd5LPeOv{@dX?4gPoB-zHT{O71pZFYtGn8-H9-go7|4J=+<{?bv+F2?)pR}hdnp` zfUB})cg4+HH)&}xF~SZ*R#wXuF00Ygt>f=AvNR(P{}BBT4iLhV3PL8`O!XVL$spd= zmfH>r5CRz_o~A{PjCHTMG#K0h2Zqn`jwzAH&iBcfgtHk#(~l0lZI_yz%=qbpub!A$ zC&(J^|8F27zVEv*2`}6B%{k#4G*x;3B=^dN$q;YRQxCp|2YB!Az#|-{ChC0o-8R{| z{r-9Du&+`< z7>D@a8d5zxmNNdwjX&O1vFpp4WOv7!wim2r6fRLsUPoeNLQ24I4+85tJhs3-t?vD``?7A(XUOL%>i{Jw_6F zKKtc;4chO*<=MJtCF1n@>n$c)ou*o3x+*qW_cW-|YTe$hkacx#wTqbL_lQOBM~3I9 zabiy`HL-wgpxf@Q?R4XC^8JyERQ0Rg8#N#N@?E?%s1EV=wQkb?R_-ay2tF=A91M^L7lgwR z(}?m84!g+oRLKA(Y$C*HT~>g{bP}>oB~hfJQl&*g4}$1tNjwQKIEAFk^KN(v zy_WeGSEx9yT&aHGr95M3dgS2M2*u=M`ngXI%uV(x9DW{2y}5FEO?$h047|sbjvY5< zi0WJHtmCZSP3{u3(kT0h=U-WS^ovqy`DtliM<0LM6Sz|-ptOhHjg z=8U4!mbodvH4fl%@JM^y7ARLjubL5*dGGY;ir}ar-OQeBeh;-9*M2bZ_GyS?mS!Zt z9epZyVVFKUc1yfdJ@#&RxPttvLhC)39)<3ezr&1)R`GuvZ+UHwge0%)Crs_K{lo?v zA4Y^oX^bc8rrKRp3%R0xc4N;;F#~G$x^lJCD)|?$RX-`#AUimwQNNQ_VFr2b(nXWvuRAxdplF zWwu8v>{ILJe@-&+m)*So1X5)Hcghe?f7=uElR3{Gs^p z2cMP8OGi827QXFk5Yx9eXGlByZI5fa&K-Ywem2KpGGT4j`G>g;$!0SR5u|YM)@y z@dus0SIJ+!GsAMvE~9H77v?%1UWt!)<8wbrOPAiEc7(KLI?T;G`G5bd+QaYHjKlgu zYkr7N>KGozFE!W{Nh2!Ek-^SraC#+M{Sh5-7$>`tjnN8 zSyXslq)X7QM+KB_otX=GJt5ckh+h&J;Qd{R{02L-$4gDx6jgD2Y z=lQ>*5!>)Z?dKdica&%-O~R5oG?gY_+1Wx*ss)-2qL<(#58M?m*0#P$>y0xDuH-9( z5%JT`10SlY)a}#Zy>xp5=0e9l<}_JP{w;>O}R$Q$ppv2vIvf#Z|?(%;OkKWn-vT^%Y#j}H=qem-C9X3P?E3t zw@Or%q>e%h1o0yP1{??+0N2l*-UscVSdwl`$|}Lt4)Zz2(xv4M+J`XNS@Y{9nhE=_ zY*&m{`tZ^s`|{IAHZ3W_J@(+XMFY22h22`m-**mOX5e;9R}B}$OG2YKtH|CaLHR`@ zgb8)|F%Sl)(P~ErjLf=!0kus3>;$>2Iv=kgj516;d*x+iSjVyh%uEJ<4EAQPgr#5k%V^f+3j{LzMm41xkTBJyJ&9NKmJ(-rJyw zGOr`b_zZL@%!8ssxV4Tpf@pCJel|fvWRv2OxD`sVyWB?ph@KzX?G9P;pGnV;_|wvo zGox3qd6FU@nkpdW8R@W_jz6bLMFq3#dj)ZhxpCwK_LzzSFLE3I`aC~4{7_`ttv@jf zh}{9wa)lRD13ZO>HKsf$Q%R3$VkWf2e;2rk=>2s7@+YtB@$bjTpf|$tLKAG=n6e*& zJ>@ia9BeBU(^a1~XTp~iKtUXcsXz7je?Lm(_1O1z|D$q)bF@uVPA3K_lc6G%4lp`* zjzYyX0w&gQSBT&1n z{U(A|`~Rq&O6#-#>%g2psry5GwFGm0fBNZuQYH>^_=TD2F1XC#F_(`oeEDvcqAQy( z%0Xw?hh2Gg>EVi^7ivGA*neP!f34at#qU=9a^lzOm46%y%1*mGxG)&ze%jxCO69f2 z>1s2P(_Nd@PVQTA{Oa-T{(D8f68YcRpw6 zs!d+EN5uq54!x|OmM(=6g_pJVw$+)vfsY$q@NCiiku)TMztz7xWm{M13PI5sOJ{Ly zcgL71+piRR zqc_O;4R7EzF*?bb31SV}#9Rp5Q>U+znCjS?6FSalACews{s%7kL4b%@QpldIZ5wbO z29DiSy7>+9Ida?y11M_og^MMWx+8iVJc9Fh9$Z_+>G)cFRkJ zcKaAgUzkYl1sw>xnQ$<1OlWZ`hCQZo2Q$`?b!k9d9D{{BUV9uW^h!It@itYV{H*uO zvWIf|iB2fgZM0YiLKU<|^g^=;%&GcBbr2+Jx;|9U7t# zDxpw3O)zx|`2+0vf)I9NV4FfL3z?y41Q+iPb27jSKu#hQH-<_hjcSD(o%Emry9WaU zDVSZf+|xyh9s6l-HAj<%B_GS4_F)%QdQ7H`Lr;P5w@~{DjrtC>PSCx{D_jNmmmWbk zz{~9z@CSQ+JAYaB9GxA4ouUhpO7acGW(s3Ldd^ILDh;YuD<2}e7^wcTjJ>S2w6p?_ z@Htbc)pSqx53>zfAX1Z#&Het}Drs3{FA7ZX_EX-7T4tYHx@7u<$b^41( z!R-!_bGksY+BFbFAuc=M%10ryGK0c{*rWjx4~9R(`&Hcp3I|gNN;}9GnZBFFMs&ot zou7jbSPf9X?cieUGO@8Tlpm@U?T23294HeBYPJ^s6Up;HD=lAJ8Y1GwtrX;_CvgG{ zE3l1J8s_9#*&YX><4YlaffC)mUGx2jT3*d1-y!mm&+`i%uwrjMKR4^E2qq$15|Fi* zUtplwWP*-)s;$W}i|0bV8C2(ocbn2o$)XSvid%TrnHet8w+L4voQ9yia2quh(^Ztc zbx>u~BH@^++2vN9=NX0*SJVCO&+|e(l}V*xyR}aDgcc6H{?y49yULOgQe|{`NQbwa*p|b$i{JtijEN9K zMTw|$2G+va_-+J7T==)cYD0iyzg3dS&z>&ykFlTZs}l*-v!y2G4(0G-*1c?Rq;D!D z2;^2_LtQUrangV4BDETluNK=<(e=A8o&ld0ful!b4Lc*qrNN$e1?=2icjJBdmMqYe zRr7_OtirW-=x{&qr34y!IGZ#|&g|Gt3`n|=16RV{SdSSQNKCfM!N}S{45@uh-nUaS zV&-@f+#^X0Tn}9#_i_XS=~pN)$Fk`2)>kHdvVfeY9BYT`QwXG?du3WK9web0AvX%4 zWag8PXDHUJ;aoE~X7XpbSQi|m^%a&< z2>RHZV#@k~feEODK6}%E8xbqochZSu1Sva#)fYiaR|8xgFQ@611wq>#RT@JHKjmx1F~U*@DM#=1k# zN5-b~i3wgk@bGXguJ(CFhakD=&mXDKRU|8l1;AqAN6~>kL!dC>)YG4-L_#ZUS6>Mm zlH!<(nO)HAR3BzEM6hsLrRArQi)UVk@lRQ>yhyx~cFeQaz20&f5A*Koq?vt*pFWMD zLjcXy=5~KT3h1?gOn3ipL`9t8^OakO3Si$Im?-_9MMWIsOyNIBM)2S;{7F4jV5mLJyCGStB(`Qc6))bUlvj(7*y#V2XoF*>%rCx* zHOKGo*bKHVo<&{&N?f%xoUcnhA>fp34b-EdJF*+ww< zpFTfm!;|irXHB}2XOr?Ln@Nbt{&0;)0&1;VbfS{;@bP4I@WRcRC%;0+EX*HD;I5k@ zNky=w!LO+AynAqEG3NJ@Jj}XXd2wMqSJ_0K+H)0>wsm3OYZV#;$Q*uSmEeU>8Jtt$ zdLYnj!r!}q4z*5CY*w}yp$6btn#eCsoT!Sa$c5g+4!ckc8n4W8nP~D2!(8Q$tGZbe z$)3u{I%^R0ND@jmG-R3W*2#&BhbI4Yd!#@4dnF00+3<+ZI$y!*!Esnvl_cz#FktOlEV74yV6pus}^Nz^~W){+(3Y4IuXt(Ts5}(wpB1dlF z;-~SMzVwLme{0X6k5p>RY{83=-G?oFPDp# zfmV*dEvpIcxgrZ`9eUft1qeITttm&54G_wWg;d};j&l`vS z3xN)wub#`%A$Q=r2;Fla5V1u&Kpp1~p`O*db`X1%V)R@YM$IGi$5*p20bedf9uW*V z-X0RXYmj7bRFmbZ)%!s=tD?I?QE`sXNlz=E?GZgc9^-8D_V&pH-N3?YE6ZQ-%WZTu zQsI#lV6sP*Rt@LRS_u5Ge)Pkv>u^~85q|wB_JrcRP-%B%W%p9nSjfat z=g;@kOP`wfts5lfd#Tm$jUM`W!hr7;YLiHbk!+6&!E$0cN+X+6h$dj)Fqvir;4WmW z0a~=16CAi6=MjL3L zd)9gKC>^uynW9H{vZg%EShC%nN?GmU?)2$5ZaXZs1js2W1xD-BJ%k1utLSklK`6lX z@YMv*ry49vFJuK<3g4i6t#G6sT>h{bSGKwRkDVYqX*o%e^k!QK;=PA9SHr zJ?q3{(O`D-{hc~#7c6cEJu3~t|sP&N+TEM(=g6{ zU@l6WIc><^bI5@;%)w)E_>{T0HG*!^v)eAFjvB?>osUNY4}}UpO+Ax}S!Zx=Gqti2 zUH_@rz!tqvd8`7VZkRn`NTq$Ot`4<-^@A>JxcT??`J~Z)@f90$if0V7KfSHh$=@U1 z;$|1UVm*ywUi0`!#ReHMhyI54l8yTYPZ}762c;P*Wzpm{Vj{Kjq+)m0uW_Gl&caaE zty1qZoN_+bcC~I9AHPk2FiL=mG(W3q0q2N3aBCB?Ady!gI7uoJWyMQvvy&)6JY54< zaOK{1_E8L148MF%%X4IZpPyf6t=#bO{z^}eTMehCSz}b%+AujY`YM&_*PjGe@Q1tb z&zam7H*if%`TRS%Qgq|7#)%(ddipMCb;?j}Wb9`{vEWOFEXGxPG@iY#*~@;0*Ru;- zMj2)V+{1#uyzJx;TvF{kq}&grhmxoMEPYqZo%Oye%|f5P>EAooZ>a6z{(7ol>V{E6 zfIqXqV?GseY_+@n&#g=5=gekUGrDeH>QOKKLs-So4k=={I7c%EBg!-GvtAO3HQY3X z2O*Y=LqI_PS0$3YYGidJQzXT4>?c#Gte?8iMY}OeYONc0n5o>qI~fyE@TNZj^AIG5 zvjwT@$y8t7(Q=y$wpM2coAdv+;@1;3pe4BQWo0um#aCDX&J1jGDN-`vV_QT!6B zq;zmn0>$Vi^`&U*T{;Bim`;-dbX2!_WhKDql>)*rG^zMzo!(t&Wjj$oS7LsfZc_nd zj(UzwCR7?cX6*$}a&t+Q-mZE)27f@I1RdYiYnb;VSd=$WQ9)DF9sZsr+Q}-k<9nxC z^fXWU#bb2D)YC%gQ~g)jbDoNNg4Avscaez=Qq4V5BK3)-SK3*o2A(4?YHjL-mk6k; zlwv2>;a0dG5HY8>md>($8)|#3e!FhwtI01*UrqnmlU%PvbWZ=yLJ&=6d6Xa7e-CnH z&CL1fLdB%LQEAAWvO8%?wt!Zc$XqI^xD`kfj6tc!#l{x>OXjkIclQotE~@Z9tU+P5 z7ey8+mcL{!Bl%zB&1wGqySQzu6>$U{YF=T9F-U59ZW$ zL;EU$!T?c&(3zU(C}i>D?3#$`JflKNF`w&D&{w_!%!&^0QrF}>+LT#&iJABAbEPtb z6eSF$ ztOJLW?do;R$T+3!k2gg|Vd0N7PD&P8ku9d7n7w$z$2=gX67%4JUP^oBC9pZuFIJD@~RMQ zH(Z_i10IX#Znt(BZ%lA#i;xnj0a{gt8p==fwBORi(@d{qx2=T><1yxW!v%7e*Kt{? z=34y)l{#uy`tSyIANEeD*2+A8!hlCnAz5+y%&a$G*cG4g)YY$L5+rF=iH0mcC}b0 z->EQ6b93&JU${(*d}fdzIv&za{yo6Pe!x^@ZMF;)ue8Lg5@k~mB!HKnqVuUj&iQ>p zntFkS=CP1n#V^dlG_%`sYhui-+6#2jtaMI0^rklX_$@m#cZnnK=A29I4r_0@q+^Ye z)X9N&4@;)l&Fo8-0b4KXeRUeGx|(~L=1Jp)baQ{Z4Ol7StZvWf zM`}gC+f@`JZf>Qvh3wp7q$0bO{B+^3OVVtnh8TH@T$vnjp7Xo72EJZ!u9WvT%<8S$ zLV`eN<5HCK$3mo)X$ksIf3Cd48=*N8y7pK0wOpx3DK|IGf#gURSUx zuVKuy-TIy*S4xHj4KJTfvTWSJ{R5w#iQd_|CqHye=Jh>s8)A*6Ay2r??$b^iD3_^; zKH?MVSSc@*r0c|xS@K-#$1lJxYDufM4xerx)H`+jZg=-=-6}g#Ay+dCM7Y+ua3O;2 zE6!^0jEeM_FUfoheegup5p55{=7grh-QhdjF84{9tIy1@!^`cUn7NWm&X5EDSn&h0 z0jg4`e>hPCWBE9B^J3-X)mn@D%q)%d6h%KN^mNxU_W?7DyGN)#0-b)TO#0h%HtET(ek;oW7kQKusl_dLy;An-0YE)jrc51cry9aK} zdR?axBH6Qu2{I})Wi=E8ybP9#!@$njR!`4Kc6DIp<*=L|Hgrk?{FoW38nIEBJVlH2 zxV5$^MmI~?XgT)nUl-;jZTf0=wb{@^C*B>LshR2S9u$ly>+w}7X^2))o?k1P>1@Mg zB@1??@dxx7880w~J3~Xlk6*gf+9wuMzioAWm-q5qS)Dbwq)%%Vrt}PEUhzLk_ebE) z#t>ujg$UKToIMi}@$7ACw%yni{v`VFjr1keTREo9vohcI=WZIm?mvT`7 z+@8r&*VB`6IK~>o&E2Qzy2~b9?BKP?aPbWf_oQXgBJ11TYbgdj{m0LDBq#tEn zxpL!Zu8%^fF2Jmqq^i-o2S+o6G-#B{Hv5{Gr3&lz&RL-*B5Cc4`+H^Q_*|__+AR^| zUm)MW+hIJSBp%hYf0=b}01n-_LUfPAvFm9SC2~;8`2!cl&!zWOZ;i-(|C{|neU>^_ zjfvY8twf14)scR%G~WLp?QDO2ucEVUK*6VLDT4c?J_mwW@&rq7VuFC(mLpo3gHvzB zFHH3hH~=SwRyS`1WQyzDIG&g5c2~S3ZL|G`WBb?(I$4g^-m_Dl>n4E;HU6I5;$m4~ z+n>nTmFZmQE$d!(Uo>ZERzqGUy6LNBk@&Kza^bR41z5V&n z(ZxOVL4fcR4MB%v$7l}yj*YYVHgx&;K(TTjZh5~;OO{fodsgz^_f>R!$?=02>HQ2> z0yO3oWKU+z^cnd?O$~cSxO>Y>ueo}_wAIOQXprG_T|W&yWNy8){Te6rLMdc%yM1c5 z)Hqq_6GH%+GfLAp2!YDAPqgZZQs98o%kN}6dD@G-ihVUw3;Doqp-LW}AB&Ld3>H}@ zTGCjfs)~^H=;KFHNj{AU3qbKWc(-o^J#Y;IOla-lZ)v@u=A%DXpgH+>OL#K)A zsO)Y(jnOudk~jJO z3YGQ^5*=nUpo!w!JB?_RM4vsv%2Vy^SD#Ew)f~2Pu zciK+dhtIjYHzw_4m{YYeeLmB}jyw`IB@Qr%N8IJjE4@AW4o~14aeAo#@^u1h)|fx5 zRA}nHZ)7{|L=OQFV>M+dJ>0mH)G4!QrKd77HD^!T_xYtEVO!mhC^7&LZa^J%GpO|^ zxwlUv1K)-&-a7rI`!YV0q>becFlw>KSNVpyFB#>P$>_mmlg70kvDw2<3ooQ5PjoBN z4_V@QpUgV&@UFPyaClaCW%7`^MO6M&lb{cH-vbf*5(WNh*opw<^s`N6L_{&5h`#zb zjaFu@uw_GHDV08-SflytY1O}yNNJ5$G_ig3qjEj@)li1qgvHPQF<*}&g>wY2=2*3D zkvNl_(pY~8@(`u+Vt7F)!$#WhqMpP^+kk)38b^OqKo1(A7EV82X4<;UOI}i6+i9R1VC6B z2y-<^5)iH1;2%^Bl6VvheHXrx-fkq%R`D-hRT+LJVmUpJ@C(R+ieUZa3kpjPCSgYj zE3ylIV@^oT(ZpL-BAeA>t3*1KVY1eVuF3NIJB>ygjeJZ{dxKCgDZu(5ZGIb^kRYnf z|4DTYc9CyJ7iBfT{ZP9KSzu`Tei5JrAx(;KsCED%_|!YXH){1!DsUVq*7py%~jyd!9`xCLkgt2p3WeK)?7> z;5p7(nFOl&f$3*x9goVZ^$6N=FXRqphPw2{&CE8?!@=H1qSZnt<;?zt(Lg!}HHV*n zFwhx6DqJW~7=nR6+@Ra80NAfQ$KziBwrbI@_R%7-N*-W@O*#}&>(*&I$!3(yau6^B zS4Zj)e_c#;tK#=pUrN_){_-kXX+w}vN#>E^B8*{o7{p0vl-@ohob+SK;k7B{K8pF z;2)0S5jfH(0av6~O-=}UqIuso*Kn0r^TWKny=i9U#{dp$s4r2u5wBdb{AyXaln6Mi zqtqMER^{?d`SfYj4DmBuO3qIVleY$+M(eUrSxFd~mteAwKa4mCAwVU5e#Wql3sUs- zAf%1%h3u{E^FvM|bLeBvu9`qg?L7+g2cIgMxCXeE8(!W_z~!+a$x7ol3>&k-n`=n< zo48VLdxRXDWxuC8R8_y@e~o+({2qn{7xsA{9x;c)JdPk3Na9udpk2eljDp)7p=A+r zrGJ84!M0qgq0u53-2`h(`^iJXO*S*Z?3O^EXa{!tQiLi~#xm?$qHYYs$HOm8K;?LZ+|W_v<-U|P{q@YJ z4-d-x#GZ{<@LPBIPIJ#`84`!@ zp8_B=J9Febh$31i{3_edv!Mg>rw~?_*6Fom_<+-7d`2M=mJHUsK$4L&0do~R?bScgl4=Ik7 zvCxog>(H8$ow@%?%WQTf^AkV&a+<>V|KRdcCd`ws+jP-nJF81ZbC#?vJ-c!#Dh+PF z+L_$*J;pCf#d5ckSq4^g2(QMTFX1MpB>w#`lakK0<`8BGK!r|IX8RF((JZx@*GbQx z&|T-pI~3a5Qylu;J}9y!kqZli+NreN-z7vHZM(gGJ>D*onv-;6nW~FzFPk0d)N2@e+;@p)&2ovk86yZy09Gt@%!_X>7}pu3Zu++5JNyy3(10kyIYFc0{#?)FY`FB6@ybbyw0X$&my~K4XayV+FSsw5{Osq) zstQE6A$Jye&VV~`BDT`#o#z}3APo^-C==}6}z{< z0;bGNajuDM>V~Y3>uMnUBKZRBxN`-iQgo*eW?fi+LN7FAvQ?(C)m}sWq~8+g(+n|7 zI`Q;y#8ivMj6(>Yyn6D9$HMdfo8V3(5~ie=>}+fZYDOFXhg+p%!oPc!-7X(1GvW`s zJx*Qt6mMvv?~Dy;@P=Sl0<=HLAYdF6W{0~7&HzYFJ80}>>mCAW- z`Q#j7vVjl)tpoYkcGt1;bl__-t#+z6Wk$yU+gOOvmqIJC?~W!M#uyiJMbUR{K{6uZ zxI}`?T2Od&7E_!98bk0E4b>QxMyNt6iuSasiTbQcSp0NbaXEUav`-&)F>rVAFC5R< z-FcTaH1f)qLW-T6NTN_(00-#fR)}TIjvc(-zzoMX!Pq%5R`SlIw5#9D_=@pLo^Z7O#pwId5(pYd49Z6mNQ(i9rL-`-TLCn4a^8S*QVf3c$!)v zq2f5FwxD{j@dAYrfZt4S^M!%t5ug@a9?Xc-r7uViFXQqGQzA8i*M>wG3!v@d82R1D zBNXIo6!$o!{F{X3Md5E3`8mRvV$k$(LjM;%VOqm)gJ*pzJ*4>)^nw!f5H>DU%E#Z< z$+0ylQTQ-ydJ6*8mLn*5_s*0WngZCrKW8ZD!q zeH(0!2m8(8)n|%m6JpFAY51V3qPuU$-?^|g>tcWhgAMB*DCAdt4CipMIlK|wBfcbw zvPg&Zx+D=wz>Sm+fwgBTjBCa)c}Wr~y9-3cirHX0L54v?{_u49dc}g#{!07)fSfxU z+&@kq6ffAl5o>n!nswcSs4;w(tt|6}7Qg|3-~$vX%%?^o{KExpPm$%e_Qsf%FXK0^ zMw-*{>$1@i*=~Y>*iXNevF%j#NIf_S4SR~*DP671Pu`Cnm56JL>x{r$= zKmf^#&>bIEy$ZjYVMzd+AQ=aX4Z~T6OXb;alSB2Q{CNv5ha>FO0dBQbL-e6`$Fb+T z2$GZ%Eggh!ChhK1%8-hU`6s)X95H}%j8|(BtaXX9Yyjdk^2Tp)Uwy70@84l>-XP?$ zW~}K?P}1m*whnt_rFt?8RTNmyzEHb)O;ULf3-VYSdXAbwVk+Y*+W2D|0s?EEz92#+ zD4j1TA`P~I77)+jS1U{NuJbj&f0ge95k;_0TB>1;3Qw6F0WakT)AK&PJLo*X8uO)W z#Ep#0Y7Ka_$0PZt%wJG+nU;t9*G0u^=zX-$42x4UC`xEbBZ@;mzsujQ{WC*WnfhJ7 zbnpmY5Mdk(3O5D{z#f|U+c(aW4Rn>6Bm3W>>Z$qocSE;P>G{B^MFf1PwgZejvSWz041N_3?a0X5W|iB2DT zuz#i`F>C1kMl7e?-2SB8RH$M@p%V2!Tj9fljg-^{M6HzDK7Zj~bLnS~&I$&DG|kRZ zjY!bR*#!{JwlB;n8c+v2ZY+GdS5&U%?bJs{IeFCb$zRcXAA zjc2K}b!fgvYfS4I))&UVX(;1m#pK&a>iRLJxEbL-#6-)@jowDST5o1m-?9#Jq(s*2 za?mEg(n9l5jBTvgD+$AZG=XIK>Gw;=w8T#PJ&lP7zjPZw>w!R7 zMQ$*iNNh=Jku_QU+_9Eqn}o3%zEo&Z2gQ4{s%P+&Y1z>?IS$TNJ@0r~RoC4^)ME_t z(;+1rup4-)lYQoefTzf(4ltC$cP5(zm$}W9@{H%ZH0a~bv<`3!eZ|$74^Ol`=pCz3 zwMl0~DSV8UKG)NEbIHoxn}|6JK{UQwB@l@WRrBqivWATsI|e9{h2G`U$?l?8r7YF6 z&b}JYWiJrI(t47Wdgzug-5}S^Ikmxn-(&vi`>tkYX?&SZzPL}K^l1PkpEYxMH)qnnTUZim8PQZ0uTC#EwtpoiPHM6mg6()k0hr3JrURzN-Zx$Yu6@K~A zu|Elmj!FeNCDY2b9EcfHbU76Z1F=&XV%Y=dvX{9vB+rE`S84%+ErZ!%|ao!?+) zqB%8{`&AgAIC(kyqim{g^~bb`a$w0d=iA*SZA$!Z^X%Ak^-gHQq8QJLUw7lQ9`X(- zd(-#N&1z_qDCo^IWBm{!oi5CPZEY^S;0stwu#ng8I1ECS zEbyCG-4kca`ma#_$m3P}LozJ`K_J9{ApL^mJ3jYDm4v`=%=2{wIv*?HStxVf(9@;+ zM9A*mWSo@HLeG+`VQ~P%*6G`y0Q$Tza$T`A2#hWsNEm760jB6iTBuM5Zh?=?d1fd< zw+MoZ)iw%uW5oKM&QL8$z%VmGFI$52ZeYekLCw_Ha@C5%*)gO2AO|Ab4*%B5=<7r? z#HIJo6AV%*Mk)%jl6YYG?z4XjJ5$=s9>0YK`Pc2j9j&7{;rmHSs2P3X_I5k zQ~iOY;!~kgkHM=y9=XvaP_GrdiVA2c^X~}3CMPL)oRCj7k?3-_%;>2VGMn#JB_TU_ z04#SK=|F)*euJ)iWbtOa%799l7#mKSWkndnfThvg!i1?sdruz9CR4nGZCKbDK#!$> zBpgL|FRmbc;%ZUGyHREg-or0`)Mbzo5)~sB~LA+4H1T z!LjY|nPo2BL1v>x^m$!~Leh81E4u9Mn$$WOqvF!;;i@IBA1uS?vJG6&QJOsQ}QDQ2OU4wF)*oX-#yAR1t`6xudA3L0il)DuQN%Xy%nHP;OO%`$U2piTz_=1zMq zjD_SAq8_P@L+0%7mC$q>6J60nt;QI^>@gX*2y(c5h(hBDmw=uR`ioF_ubBe2kd58I zSEqOSbdF938R9{4hPxEu5g>+`1?X0}w4;95W)TLDJv_$exz`ht_=JV`S3VbN*#{L< zM3Qu6409G{OOAXSF!1yUsndj>q}631(Odj@wbpA1#^V%*nSX5YF2a^ygX)2!AMcG0 zfwur?gZR+d>^9QKl3v7%$^#9-l?^TJC*B2ABEBAeUvjq4)u=l1_RMG?5frTs4CqSVX;gi7xr%D@hW~q} z@*RhC<(&7x&CER`I#6&CA9T`dk_MD1CNh$;jQ8Tc1$5uZvD?{GY-{@pq6333FC_*k zrPjK<3>0s64C|~Yv~_S0ebi24GtH1tkitnHV+o0B7r7%DqNzIi;c>TbB;@Qo`Zn%! z@K$rISbAnzkgTH>k=~910mlX%Dtq=oGaTtx=6Cg>$)-{$rW0g5mAoO@fQw8A8Qmz{ zEArjvOs#dAR^}^wx%bbLqK$P0WB_2pUe}#8!jx}dqXpBdl$ogmmnBKqy>a3qp#Esp zi15x=BYi}Nw}g9ditPXjKVJ&`;Tn5RlYTwgcE)ct-@=nQuWAxgL}Ez4Q~LQTXvY|@ zc&6GTgxY6RgulNU8M_SwT?3dVCeG%p>5`qIKD&R0>~N+OD!0j2CqZg~Tf2J`=C)F( zDB*&pMqW{A(BWxT=3pxsxm*>)k{`J0tN`tdu&zpXs_eK3{N_JtbyJYhRusw z4P?6J!$6uZMFgYKfehMi7|OL#Dd-L-ic0j)STjQRnjR-g=0iYTQVB%U?w;jv>+?CD z_)BdB#Y5-1yI*l-zLfq(0h4~m80`zv7_em6<~CN2X2584q|H<~75EPEhb?Gp{ErwB z(}t01C2{L?_xa)ocvZFxWSnsYStqaHa2$w!O}a^`uo2es>6xjFp(8OLJZKQ7YkS*3 z_g&#a#G+L{mjgqVNCHVjLNw@d<#;juOk)wXet+~+b`3OCCmL7Q3dL#CD^Da6eR2** zpKRddxPNFPhoWL#MA60-YmMFS-}IEzXR}v>p>6I{T?;Th1dS%L$iEUtbydmEIP|Q; zn*~a(f?%VA>YbAp8}yw_U=WW)RzvtzD8CFi*4zAw3M6ME9gOMoJ-@snotXCd6N6wP z*xn)=98>-#`alu4KJzMl=GlMYx^arn1D-&%$^bernne%GaRBiH%B>kK$kjHU{q~w% zUWqAnzgEJDT|p!II2BcqBky~VI*BA2go_4zM1(t#Uxib8KdwGNZq&ryr75$QS8T$<76qB^`%f}; zv+MqqaZ5VklUAT)2@JH%vQ91i(jYqn7k-B zl?v_){J|gp`S2A7BFSif*4+Dqb3(P>lbjd#o@suc{e;tQ28&Xg!BK`j){>-EV~U6mW@Ay}?n3BufJcS~ z=~E}vu#iwwUmzI+)p%!h;Oga{dw=DQhgk>dojf^yV_s^|{wge6yTPWF7YjxZ7EKUo zTsh{5gsxKAy_pc`K%aC>+)&w(WDY$zKNBjEAQxmTgjtw_R=CG(uiJ;^hOyD09>07t zilsNIW@GGT8A4k8Y(vD?IcZ_Ygk?K!OG>-CfqfKAh9GlDQv{C$$2;95*+9NrO@<;M_DTCNLXWsJISQScmP5e7i z$(JKMUz*u_Z|M1?CfLt`PlBdU0G@Ob%BWUw&8k?X!T;kdWLJwH3#cYy_XDYrU=KRk zRw%1RKYMlo5k7wP_m*~rS6f4q7IQy*ckNNe))5`_6%h;_Uf*R{>mh*pwo)-ECWthf z=EMC}1_%dfqIA_;>FE0Aje|zaFMK>-ew!D!wcR5tWIlxLOFdup=m(Xz0PJPPFqhjD zi7eEF9yCtVE9&l?#K1pKlPgv;Q)t8)*-EEw`>YnslL9f_R{|iHiUNEL%#!mbkI@Lb zh9=ZXekAmJ@v;%p&x1YJ-VKVOP3+BIH?npqd1+*94B*>H!-up0fZuLDa~zo}IzD{# zY>T(f%tP0;`QWwA-4y`fnY5Rp~2_RgUr~pj8*u>&7_eHv}Cf(hDnw-;nl`M z+F1f7CMKj=Idd}g%TGYMR-v^_fju6h7=_G4b~tZL+O-0m%W3T%b0!~O-_Ui{`SW|d zPMcv3z@V`l)m`?RG4DNnv1F}TihpVJ!rR!70F(y2J2KMvR`;59K%gI;``2&4?{bFm zTh`?^^2EqU%YXSEvI&==?w5e^e|fkh{z-N?gtg}8(l1(gtbjeZZ!9p(5fJ9O_3O5m zAbKe}usR^4g?J$fSsVn%?97Zn$V@(eVjak$Z2|>Ax-Q^Da3%Ee#~YlyZ5iL?O>khX zM93wR;>$oSa_=44SD`RBFh0;E3E!MzQXjY>dbNhTemNlC_C$ZH(oRBCB3k`l?7ewh z&UxGaf7-@iFf+EX#Sn(<*%F0e$QDIZ%9c{eUZKr0#*8II$WoSwQjzww%$O01sFXHa z5^b_nlNFm?$x(<=b7lH{+lt!4+muQ{DC}Lf?(I9Y;bC`vYxlms+|k7awXl=VVUfh?zCzkg_tDu_r*?sd+0f7cCA0PFyONlKj7s~Y|{Ww~Np%RZmm$Bw+^0?W)8=3D>o+fO3{ntr@%!0Ul zX2o5;-(f(%ewhUkpMJt-z0GE&(|=j9VLhmmpB)o-t1MYcQ^wiK-;eA4_xb^a;=LIg zWeodUa``KgjQ)Dl2S59_a-_kl|3+GfjLe)zKt`FMi{GxlcG=_FjBod@59B#cn)HY3 ze5FoO`35|`Z$GJFYhF9A$xKgcV-;=mp||K@{#$ zME>Y$42n0Ki2ikVJ?ol`ts`KHJx(#P4<}j}tt*VGlGq+1%X%m|(|)K81oBOzs&g@8D|;#MwU=L!7M!#o2&v_+%k zUL%~)+!!y=0p;+Ac*8M_Mg=Dz%ajsK2+{cGh%pw=B(~$Yza1Z?AXG( zw^!J;(Z({1y++otr}K|AA`s2mn`+y1lfwiD3{?sr%9f_Qel^t!-IjqU#P+@MTUc$i zBb_*Umj3;W%wIV0i{!X2bx5m!+(l1s-MS^Arw>_p0)rmPC{Y|`_c4v-d3Uz)8tY(($vBL+ zw7KW}V660kN{w+kG-CNaWr>52fnjatAk(W4tdj=DN1v$-x4-1KuJ*jv@ZuVJLWN#% z^>_Bxq-a3$8E%EI|<+n|vu7|)Yp~?_C=JhwdI!XdMSSL?unUi;T;ho{>3l4y| z=5=%_4oO#TuBlU}N{yd1RN>?(2^6+PKav$|h8N@D7gYeaBUN=sy`&1@&D%<84ax*c=QbsYo96Q>k9cU^5#?Fa2T^kwyn)Sn&HpQ%H) z3z6Q442K6UnyL@Z6ghVsv10zzY15PpkTf=O`$)M1ars%Or|BviBRn2=1gy7 za_$RTtBl#Ot%iO^7I5m4RCj;PG>dF>-3%0R8@4iggTo;p)}?6~=i>5_Iv$pE@UG{; zY!VY>EYv%2uZJ&evcMXOy9R{b0JZrm|XhMje2_A&Or*kkB!sf;yeZqA2z?V9~`WeK?To)e`m_b69N1!mEY zV%$=vLnDkK;#4}4Kyf4|cS;F4Dc}TDW0H`t06BS#*|=7k-%!jtivE&`oJ@TUXXaYZZ@ zortH8A6psOR}vu0s>r3k-L>#zk6yicNpx$cr<5qCfKh&R}j!0KY)$R z3(Z=YAQUou#|R*@G(VDlU|)0`s>&6f@A3%e`$adcF{C+WfV$Tv>-0H`^u%MBrJlG7 z+@4Gnz8~I`BUYf?YUB2hu={TqkXa_n@H&^iBCjQTT7B)a8)M2JER}v2;^j}ga_plF z0zOvu(U2=}0YbW|il|bajR=V~qa69vPLu0YGJgNf^L@o?1KGC`nK3obl4v!sGHrBG z%&mk3b?fqr7YCZ0c!JY`vQppw^hHLI2J0|` zu?Y5y=JLd&0{orR;^EoqA(!1zY?&wlUeZHXl&{ilioDK;3YXf7J+9rTmY+H5$&W&? zN-;Xszv>hfpI9&)rBv}1w61+Vlzvr=2tM;C82( zkL~Q507|mw3e*uuEqWHNMY^czV&2~nfak$DWGwn9jfqlbY7WpH^Lk<=9Z7_|bsOjJ06~_C8IFyP3_F`6?BAK_TsH@+Mne2o2=2EvC$dZl77s zljM+WAzve>;TG2iMWdlArcip@aMJR4776>l{a#eaw_YP>O=PIu70$3EjiCT$!p^Lv z(ZDkLLWr_81(#@*{grY}6e}XAHCkjLB0uTN)CC4cw4QW?I&zcZYr5oV5F)DAS82HC z9`0|bRN`HzTpm{@(kYo(NB<`2=ak~DbKpcMB}|($&8gvdNoOgM5>XZ2pTw3(r(-zP zW&oScyKCh^GE`BCCK49bxT$K8e^{5X>~LPHzHUxNUL89SH^OiDmZX2!bJ;t7?`qRs zs^_gl!+D(eSx2v<^pYE!!p7MXkO46uyHKu5ibuN6A1h}F9nwMQ12blP9~oXGVt1j*q|=%dWC*$PudwWj)r;(iw?Aaj zDDeayeHld;#zDR8zK7NB+G_ORJqeF=Pwb@e6zHscb8i#=Vn83VsXX{=kf6l#s zw2fA0g!doH;52CN1n(~??Y{cw$X-j&zJLf&hN=$RY^2{$(h=uOaiMMv+aS-}|5}M3 z8Ci9U*h4FOIh>oR7c_rCHi5ow6zyEq5a+*GEU18@0S1xT=un@A{px;!gSDC-wZB(UC&g z2v30JLm16l2pye9(4uaN+XD2k?#0r2PBur%jFEXTm={-gk!ly54CkZD4{y%&RZ+g1 z@_MEpb7<0AlwImj88R{v|q416O0BEA%l5B!ti_kw5ptzN57yPsPn_i+`u+JDk^Zb_fHvSJQ!tphtOSUp~fH<9AK(T`91cTFFQjnt%W!rNPE zIX2(61%{jBB)vwLG}V4TUOIXTV}#5~89NI8fT1APn@&_2mb*)y?pY=GMmFM>G|u7K z>UNDK>!YdzI2xgG4%SMV<T@;86lUe`T=c=46h{fn+H=jAMms~@MgldQjtj9ucy}| zC@$a+CHb{N`>(`&?q9r(e~p9&aXu)6U5*lM50H#6wE9Sm0OABGjIHMlD_%X)7GcJp zWJDuLM@y3@WIOZEI)O5E+z>*m(FAb1TQ+FhlDGMrLU{oyw?iOLiPo>kMqROhz97xw zyBw&mx;{Yw(7*cewuui8*{DjyJ#J4q0vV|&5xE1Ii2Z~ZOAjoy_)j`K`!A3xi>^zB zo*-@fLlw-$+>9`-#+k&ve``D(L?B||p$_y`j_0&nbe}LUmQTJq*2USm!ab%h5sS2W zH*i0kzgyE~Va~EkozFwh3(rjQ9j0_~tI;n`CURB|EJY8RrcB!lrOBB*i)u-^MPPwT ze%}e>X?2+Nc%G!YC{z|XmUF@Tf7dWC)UeOBi!RR|#UA~O+N5-@=pkq_vsvF7DqI_{9<0m*bD<;Lq*bm+9hX%h z9vBLLARPqrySkBR(Rr&3P71|Ef`FBuqpMhIH;MZw52A z4zW*?_z2mBthk+Co*b=fkkso??&tqV!xNZgV;kFlR6f|7Yov}NVeB#7!NA~fz|1u| zw>1}A+8cZ~$SkeM&=UJLb*feI-iQa2BJAgMQ{w!(vSh2LFFmtP|F7GFIjV3oshdLH z{Fnr2bj;U$q}2Rj>YScFsFUhRVS%(Gh6BCHk&<4y&q(1uJXI#9{RbK^R!EQJ?DZ)# zh<%jh!bsy*u1#d*xSQD(m3&eHad*~15i0{Xe46wqmZjnHJVx7EVSwl5pgAF_F8xSk-_Vi0J0f)gz%-p=prQF^SM!HYs;rJ*qDh0eu9D07!R zs{C>}*=Bus2O*s)MZXnORbssC&Y#--EpzK*((-wJDlcOQ$&{V6AwZ-9v~E{z`;u(c zjOEb5(yo~0&V%Dmq7W<8+Fv%33F)&RYOjLgb=*jFoeyFpBRmdEn@_fPx*O+e)3)L9 zyE`6GmC*D~Mt!z@MW2v@ACneV7tB*OOkgGvYP6^N@si%sNrpC@;Yzk+vDilxNKOU0 ziht3r`K0|`;!j@s+W4mD)_kbCpg((ow4W%{+Em;@+959q9ILFPc_KU^CjT#9JJH-SFE z8SDOIM`o?E^+~FtrT>e~v$HAk6gF>Nx%EqfAd7;7sYh$aSpR4{sas6B)Yboz?rE|Q z_r6+k5D)8e5cZ>?^8Oc%?N-|&+1VYZG)Qsf~EADkRRdQ0^t?H%1oL-y7o z*A@^=)dKX?47WrE)}FBrpo6reyrgM@p)yYz%HuEAn?3vWh1@$d)Yjcl_x6jzW)lPJ z1OW%9mq^^Mn`TN?;02&r?2j7v(dQ6Z}q9nRW@fA5Gfo93aV-JC0-}vDPR+X>BX@6 zMos_pGAdMimqv7yPRkO*N<9b3LQx;dTj4WjKwkMqjrrT}&(q6kGDrITpFMq=Q*eUy zlwD@;IR4LHQq=zTY080o?>}Vb=M{E)Stli45kayP|6MTfOd2$w?$siGdzRX#P}W$Q zOB_*&N3QeMkNGdCIeM_t?jP9gn~kPCKCtJ_v5fm?xH0G9O4{}O%R1rC2aW22Rd6OE z&Gb7MXnO3CNvWFlRQN;X%UUAL?2=ObKGCaII;<3mglk(~LT(>hx!SmZlf1H^ZTf|h zp;ql081t?3(T%k-#(~J6C%I_)eDPs!0b?`FEcz{s8f|Z?K0o__$mTF;W{dDipt?^u7f%K>L4 zUquZaPqzZW%|aqy(3Yki9Ydy`*cw((i7BFX{?$ZAe%)*Or-+At{BKOE38;%hsr^kSH%pPeXwZ4?7l{` zLT_cBo4d==w{_Y0UJ3I~nf2Xya0(p}+Wu4&WcJAX$dK!a$2V5jlpdSrbJ_LD@gaMw zipwL0q}x5rcZ@V$H+FBjZS+7wLpRDgV_6x)fUyi$$ z-|l*TOGW8$=Qf{qNd0fT*XZ;qxBlrhAG@@ap&5Vwa(4ZR(!GCtX;1#g=kT-5US}j9 z{r9i9qt0zy)Zw>xdw#l_4!b{HSodQ?)r&KtCWq}Q^4)xL+``se$Y)R3CdpL3Ht+f)EWGZofBxz0THgX_wR`D?F+-QG ze5^L0SFi1TJw7dD+_FzAF|Ps7@UuOgtdbu7`3#G`8T;pAJ`Orvw?LcyHhWm|OOZBa z^O`hnQn_hV#?)Ik&72+-cJ+Ky@BI{tfokAF7`^V4trUw!B;-jscMU;f)Y>J6);zOZ%AEMzmU%R~&idDlPo zDxCl z_e|dy6|p_5XaD1FyESkA`N=15o9DjehvN)>pIa~ZkxQ5^)5TGNoQIqDoqzc>Q@h=P zDfcfL>LXWlVdSYxnv?eX$Bpg>E?l^H@#227PHrat^jqlsA+NcslF|Lx;qTWr^ftKA z7X9OLC<^8%&!NCgX4Q+whi)+-&{<|Q@+P(r$d*Mg8`rF|dcdl8hR%ze29_vj2D53CCjdkac2b^S>kuRPv^U~sN)Wr-5bONGk zMpM+)Z~mo|;9P%haXC5?qpKS(H*D1Cezb!JPteZnLHRpkS7YD zaWsCO2UQS#Xfl)!Ni#QZ0wK)TO%v+Vr|2Yu)lX6)5tY!HiG<{I)R-ZNs^@CMGfF^I zJE_hR#H||Avb((N0u!sWi+y!QZ+zq3s+(4S9U0nAQoE^6vWb0;`%16iWoU4sC!A64 z-P(!&@@fF`ARV7Krdt{|QPn|kluEGJ?yIj(fg63Y=pi%O&HKfR7iuGC$%mDGpKC<;3^a)5D zC$EcZ-Q>e2#k^ZQ(K5=lr@Hl<7jTi>R3U(?5}t0@hSEiJ^_pA>S=g>)$Cn|GNS2>N zuVl<}&jFqtJaXhbN*tU;(Aq#l6CSLKZliD7wX+?cqHtxzCU<_H>b<}h z8$F@x2~HRx~H@Lpq~4+zs@L5yvo z0b|dn?r7EZR@{q0t+6;kc?pGPTRu*0c4I_R!Rr<}Dtr4G1YBGiu}*F9;Je};#;EMQ zbx6(U+K$$5Gc3Lgxn_U9Ys~k>H@c*@ess$Dy=&}!-esLm?u`)^15*^@g5&OQ|FodQ z72D`Y)I&W5(T#RRZ)L?z-1dYKy6+v)GQ5*+G{&AIS}vlLHOEr1?%9oRUw*8+Rkp~p z3reu@xRJ+@mXAMpHdosB@b%2?FrZScU7I#%=tw?+ioWydm6t9J*3p|V;Wxv*Yd*eT zqZ4_uO9hd<=bDX~2eezX>$iSdE6Z-J`xw4; zYtU^*w{CB{cd~KVirE~24@K%fXFmBpwUyT?JF`{=J6g}ycD(+mApWe@veb%Kajgf} zZC0}Z^Q=0J>svUz$&1$;=EN{rENr#L1=qkG2mCiYo1Hh+z1^mv6St_us7AYu@totX zR;aE2+H6v4t4F8o-nRzt-GJgXjjHY?G`g$HmY*9Wuu;Jpfnn0z1jDa7SGfESh|gvP-(CxyLQRX&L<~+momq* z{C_S@_;%eoJZO-P5&c96*8(AmFW>;?EqO2N^EfQ1EVf{1{XS^f@x*7Bm=A4Q=uu}J z2gbr86r#{Ru-?kxM{;MLYobW8MqTMm_^sTuBnWMwE~im5G)GU*EvCBM*u&$YORWiBxn@D&eAgIqhxJqK-Fp<&s(?wF5D+jq}W(d#9T z?ml9aoV@ydGw(W#WPf$dSG7~!`?;@t1kqv1#J17SAH$a{St3vRo^(B62AbEaZoSrR zuQ&Tz|wQCo^x3(2rEFU-hy7e2tA~>CL*cp!`+kAKm?K zjcDoh_uo@~JIHrMO6i9;34;nn=6E*-FDZ3GLv8g7SuI(C`mg_Xl47U3tKE9AlLkgPf zGt7`#hyzTcUL&vYTP3kkv%S9(FdMcuRMJm(_WYFeb_2C7WmHT0Qm+H6dH`+8DXNcV zAUU~9PWk?s$*mW*R%*3M2bpQe$KTj=Lvn|T)hmZNdVY`JLDMrpr<1z*?%#gw8KvDxD`_zquhE;{oxjD1P}EsI#x7(< zbliZR4rg~L*_}}>{9n|qPPML26i;w6n?;RJh|DOx=fdy&>8GTT--5}GC zy9-kep>WCFxku?eRB1<5%yY~2cU(b@gzue2Y zVt%W2&6QgQiQW^{hdbwYg{v^m?9d^9WZ5t^UfU|Qr1DgvB2+?R;VDZ;dkDL!RJTZX>r+kiEQnlOcy7w38QC-@yV8^Nt`qkm7{U=|` z*Z6V!b{)5`zyA78c+LCQxo+O2rKK%KKlE~`@z7{=ZLV6etA34fgVmSMcT)`Anz_ZV zonfnaoyyvd`(c)*!mMpsqj`RbiEEE{3;!}zU(s=Vv0~rs)YfK)6boC&dAWbLrr;M% z`0&xWmBD$vroEZiPj*ykhyS=oWY3d0?xn-vzxa;nJ_Bgmc)eX`CGqq)yY6J{E2}%G zk3TMdKW0bgmEq%2#@+`@gO7aI7cv?~Uq_G63MjoHAEL1M{*VBK%@=pn9ngHaS9*JV zkCOhdDm_bdTk=sf6SqPZP@x3vu1c{nNa5v*1YX^C%K10_j#tX zQF1zsRRW#3j{jmC0-11;xyCOjxP4c) zPhr-8@L0n0;I(;E+x43>1qAFzKJ$6u;s)amO+3_S-&bs&4!gQERe0>`-}Ld4{kaOW z#)hq89)A-ytij>iBx;7Oc{-7-;0#T>qFH8U-e0$AgPj>J3r-5KyzI*c@4r=-RP;M? zsa8y*yXB$oBj=ThhVvvxkB(Y%x_P^P&k7eeENx`oJAO^JS8J}35U&tT^Q;vsF81iz zGsGfErAgDKGAd4Jpw^RHC=8n#wi@>X&uziyYr7~GXw_fl;|cro9_zAj1Ml$`vI^}6 zE|@=i_H5Ha@}n0}JtHre8Ce{f)wX^6bI8A6kfhXu5EMWzd+L7X#@f<(FpwSJF+z7a+FPT`OSZ$l%|+2M77ds5&~DeMQ$L+f8f(3K z^yra#c3OwPnYG9vm(#eV64^I7>yaM{1}I7!sHdQEmid;w`t(Uy)1_Tsy@}8c`fwqt zI(=pyzl*@&6#K`IQofQH0;EW4=561-nC(>aA%ExY-S6&aDIPbX%;zqp%BN{$_?=75 zN2;Op$WjeocB^fe%17k!YrFKxnWIj+E0v(zj~v@PkptLw=%Fy{!%h90TeOEsRyj8(Ais}L22H+xgLuC;p{j2C_+hJWLGO+&(t(!Yugq*7;<9F#y4+b znZUcl`F-Z~N(=+s!=OGra2pkCBZY@6?$xBt&t^dLi$~Xkoy7^Lz&SIVX7Ah_*PyTroM5EijLmpC4b0ngVu~FR6Mr&;olxB z!>%p8;Z1z^aXBA9mWD@Lk`&IKs8FjL?_QzQ;qiNIwF+*_V3%X#X(gVp4QcY~O+rgfbTLnfrCf9hX%^S${l=+i( zw$+)pP+>L$CMu15BQ&u3_U+p*kl)UXb%~AAD$dL$@RwPq@5r7O@)YN?Ny-pzy@d9- zPr;Kfoi`S^RfO!w9ksIhXGl6UfDXl9eL3z6#cEsd;f>FOx@_LvJ-)%X=lm;tK8K>? zq(j&4|5)q0Z$R~xD6Qec=O$l>4~nINvLN~FF6WvL7COmpy{26)U)Nh0G}4l%Of zVRl9H^OC$4);qsmH`P>CH0`F#PoWT1Pt+v)l4G{Idvc3ZY6X;hOo5e>UVCM)9@Pdh zE9-C)ZNqHgnkw>xD~C9L++h3ap-EN6t9e)f*+3$j8EqTw)_!0`ah6YM>1tsn-LieY zVg%+)x9q6>wF3rBbY+Svg&X&6XkYVTRVzh6cjdy!3Ss=rRQT7fNv@6d%{J38YdXxa zhcRiE$|pOzy`$0Dg&)YKaDC_)H~P=N{M&#?mA#YsAI~BM#ZRj5T(tovk#mwSOpfrm zd2D2$x4y{-^XDbqI(72p*=x9-VJqt3B`mrmP#G8ORAnd59Lj3FON63Sms?54qQGCx z5dVRaF%51uWK)?{#m7M;0FI8Z@n!KWIZC%qXoew9=y|is^#r@(RwBa&d=4IL+oVa8 zzC1}(Sq4+5?sPkFAY}bZJxajNcAm_?W#~0CST7*UnD_j4qi|KCfe-=OJtplu&%G5q zxP0cJQ-w({Rn_p2QxN-d7DLQ#em$0IGovf!dzATl)AWuw0rDyW)m8hB8a3Z-4y}H4 zk|E`7Nzki5>@^A%tP_I!O+*v!Jtw;+)WkICu8xN_YpZlMxr*-V@Hb|SHE=xp^Y#^g zv#+HiSF9AKh-dcJ2&fS7mYNmqFR^+KUL8EOMj~oI%)1lU-=*51&2p3d<36t!~5>FARv*w1khGsDK!=Hm=NBKr3 zY^)8A*C5Kyrp4!{>qsvHf2ch)Q9F9z%pY}aLn+izc`H&M8JvV$GRyEfXK2!33B`NkT>(4GBv6#e{@ z?t(H`m9bW&k2pklZVl^HQz!eo@>#85sR(qAb zxOdHW&V?le%CVfH3_OsHxsTOEquU=I)z(;?I(2FUx}SvV&<5R8>Nj%SA3f^rsn`yz zYr?ltOQzZ{D69GSRu@d*hX?aDjgF+WrI!w2{65L@9Y5axWopl(XH)iW8ZS0BrqO|dmGAKy%qnU-Bgub;JK$?2;3`NPIm^7HCF~%$J$@+Afpb-eCyI1I*Y& zPnMi?otZOdR#Yk+b06u)kR+N$-SQkvp@hTi*C}pz_;X}k&hrc`#>~XE3&@h)J$mi4 zpS|@%`uc0_Q5kDQdNPew4+LLZ0#!t;hP(a_N}U2mozcU)8Z26La>S~K7g4|ZV&e<$ zp4!H>e+RNu*_$8a*j};EgFtH_SQ-?Yt9bkd4>zitJdqv=4SX7tON-0+o$j<_#(X?o zo)NLmr>3TA)!YkulV~b^Sd~ltZs<@wpzK`Yu{m5~S3m<@sA4dcXORd`b94CWaJi`I z=SHr##9u$K-guPV=*!xR37mO+l;i@{PzcJp*kY!{W?9E#SNve`W$cLEtB0bllWJ=0 z?P0n&ngaOxFYVS=F^I02?y87sOzxIjF6<;I1jr;D4%?;IiOQ{gNg0aOL$%n8Uwetf zGMP6i1B?3dWKF*vT%3egHgL^dP9$oYa901qjM%1fMx=QaYOYZML%j4B!J zgd{ZcipAcYZTt3kIVWeZIsw891mL0tdV_R>7Sn1OlNIuwhHu-Kq#}CECOm^K))#ysI$!@|oy|X*-2bg3L zSS^0?b=UaRw6@_{{8ToqB$M zSBdukK9pqgh3hfO}sPu5MiLv zxN)0_V6zgF(|4)g-;4PE_SUUirQJnI5F35|^5y4Q5hLvNMdl@QFL^f)#P^%)YevE_ zcf0c##p+*`Pf?CAadG#z5{-Vr5uQc}H?*mK)`_DwRhRSLD} z_=YH929x9L4}-AaIAI%SOQbZojuL*?vh zRcRI@$XXDk>_mT!{rmSz6f6RKSl?>;RoH9?xzcn@2B=G;C{jd_=9o z)#MAA`_Yncgz4L%BE|fOJqK3IJjuB|d+uC6jK}!#<0I*tAH3+-hAdp)-o1ZVb@la6 z*!!VR+B6PWnSZR`V#1(X2m0()wZwkEOmhMDS!D#BWd`=kfi1rHA{EkkI7eD^1Sm