Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tools/launcher/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ local_experiments/
# uv lock (generated, not portable)
uv.lock

# Auto-created symlink by launch.py in dev mode
modules/

# Python cache
__pycache__/

Expand Down
8 changes: 7 additions & 1 deletion tools/launcher/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters."""
"""modelopt_launcher — installable package exposing launcher scripts and examples."""

import os as _os

__all__ = ["PACKAGE_DIR"]

PACKAGE_DIR: str = _os.path.dirname(_os.path.abspath(__file__))
67 changes: 51 additions & 16 deletions tools/launcher/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,16 @@
import subprocess # nosec B404
import warnings

import modelopt_launcher as _pkg
import nemo_run as run
from core import SandboxPipeline, get_default_env, register_factory, run_jobs, set_slurm_config_type
from slurm_config import SlurmConfig, slurm_factory
from modelopt_launcher.core import (
SandboxPipeline,
get_default_env,
register_factory,
run_jobs,
set_slurm_config_type,
)
from modelopt_launcher.slurm_config import SlurmConfig, slurm_factory

set_slurm_config_type(SlurmConfig)
register_factory("slurm_factory", slurm_factory)
Expand All @@ -44,33 +51,54 @@
# Launcher-specific configuration
# ---------------------------------------------------------------------------

LAUNCHER_DIR = os.path.dirname(os.path.abspath(__file__))
LAUNCHER_DIR = _pkg.PACKAGE_DIR # tools/launcher/ (dev or installed)

# Detect dev checkout by probing the actual MODELOPT_ROOT, not the symlink
# path (which doesn't exist yet in a clean checkout). When running as an
# installed console script the cluster container already has modelopt
# pre-installed, so we skip packaging it from source.
MODELOPT_ROOT = os.path.dirname(os.path.dirname(LAUNCHER_DIR))
_has_modelopt_src = os.path.isdir(os.path.join(MODELOPT_ROOT, "modelopt"))

# Symlink path used by the PatternPackager to resolve modules/Model-Optimizer/*
# patterns; only valid in dev mode. Initialized to None so --clean in installed
# mode gets a clear error instead of a NameError.
_mo_symlink: str | None = None

# Ensure modules/Model-Optimizer symlink exists (points to parent Model-Optimizer root)
_mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
if not os.path.exists(_mo_symlink):
os.makedirs(os.path.join(LAUNCHER_DIR, "modules"), exist_ok=True)
os.symlink(os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink)
if _has_modelopt_src:
_mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
if not os.path.exists(_mo_symlink):
os.makedirs(os.path.join(LAUNCHER_DIR, "modules"), exist_ok=True)
os.symlink(
os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

_modelopt_src = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer", "modelopt")

EXPERIMENT_TITLE = "cicd"
DEFAULT_SLURM_ENV, DEFAULT_LOCAL_ENV = get_default_env(EXPERIMENT_TITLE)

packager = run.PatternPackager(
include_pattern=[
_include_pattern = ["examples/*", "common/*"]
_relative_path = [LAUNCHER_DIR, LAUNCHER_DIR]

if _has_modelopt_src:
_include_pattern = [
"modules/Megatron-LM/megatron/*",
"modules/Megatron-LM/examples/*",
"modules/Megatron-LM/*.py",
"modules/Model-Optimizer/modelopt/*",
"modules/Model-Optimizer/modelopt_recipes/*",
"modules/Model-Optimizer/examples/*",
"examples/*",
"common/*",
],
relative_path=[LAUNCHER_DIR] * 8,
*_include_pattern,
]
_relative_path = [LAUNCHER_DIR] * 6 + _relative_path

packager = run.PatternPackager(
include_pattern=_include_pattern,
relative_path=_relative_path,
)

MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
MODELOPT_SRC_PATH = _modelopt_src if _has_modelopt_src else None


# ---------------------------------------------------------------------------
Expand All @@ -91,6 +119,8 @@ def launch(
) -> None:
"""Launch ModelOpt jobs on Slurm or locally with Docker."""
if clean:
if _mo_symlink is None:
raise ValueError("--clean requires a dev checkout; modelopt source not found.")
examples_dir = os.path.join(_mo_symlink, "examples")
print(f"Cleaning {examples_dir} with git clean -xdf ...")
subprocess.run(["git", "clean", "-xdf", "."], cwd=examples_dir, check=True) # nosec B603 B607
Expand Down Expand Up @@ -125,5 +155,10 @@ def launch(
)


if __name__ == "__main__":
def main() -> None:
"""Console script entry point for the ``modelopt-launcher`` command."""
run.cli.main(launch)


if __name__ == "__main__":
main()
15 changes: 14 additions & 1 deletion tools/launcher/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,21 @@ dependencies = [
"pyyaml",
]

[project.scripts]
modelopt-launcher = "modelopt_launcher.launch:main"

[tool.setuptools]
py-modules = []
packages = ["modelopt_launcher"]

[tool.setuptools.package-dir]
modelopt_launcher = "."

[tool.setuptools.package-data]
modelopt_launcher = [
"common/**/*",
"examples/**/*.yaml",
"examples/**/*.jinja",
]

[tool.pytest.ini_options]
testpaths = ["tests"]
Expand Down
Loading
Loading