Skip to content

Commit b990a3a

Browse files
authored
Make sky gpunode reuse existing cluster if possible (skypilot-org#1787)
* Handle gpunode reuse * Improve error message + enforcing same resources is too hard
1 parent bb6429b commit b990a3a

File tree

1 file changed

+29
-16
lines changed

1 file changed

+29
-16
lines changed

sky/cli.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,35 @@ def _create_and_ssh_into_node(
823823
f'Name {cluster_name!r} taken by a local cluster and cannot '
824824
f'be used for a {node_type}.')
825825

826+
backend = backend if backend is not None else backends.CloudVmRayBackend()
827+
if not isinstance(backend, backends.CloudVmRayBackend):
828+
raise click.UsageError('Interactive nodes are only supported for '
829+
f'{backends.CloudVmRayBackend.__name__} '
830+
f'backend. Got {type(backend).__name__}.')
831+
832+
maybe_status, handle = backend_utils.refresh_cluster_status_handle(
833+
cluster_name)
834+
if maybe_status is not None:
835+
if user_requested_resources:
836+
if not resources.less_demanding_than(handle.launched_resources):
837+
name_arg = ''
838+
if cluster_name != _default_interactive_node_name(node_type):
839+
name_arg = f' -c {cluster_name}'
840+
raise click.UsageError(
841+
f'Relaunching interactive node {cluster_name!r} with '
842+
'mismatched resources.\n '
843+
f'Requested resources: {resources}\n '
844+
f'Launched resources: {handle.launched_resources}\n'
845+
'To login to existing cluster, use '
846+
f'{colorama.Style.BRIGHT}sky {node_type}{name_arg}'
847+
f'{colorama.Style.RESET_ALL}. To launch a new cluster, '
848+
f'use {colorama.Style.BRIGHT}sky {node_type} -c NEW_NAME '
849+
f'{colorama.Style.RESET_ALL}')
850+
else:
851+
# Use existing interactive node if it exists and no user
852+
# resources were specified.
853+
resources = handle.launched_resources
854+
826855
# TODO: Add conda environment replication
827856
# should be setup =
828857
# 'conda env export | grep -v "^prefix: " > environment.yml'
@@ -834,22 +863,6 @@ def _create_and_ssh_into_node(
834863
)
835864
task.set_resources(resources)
836865

837-
backend = backend if backend is not None else backends.CloudVmRayBackend()
838-
if not isinstance(backend, backends.CloudVmRayBackend):
839-
raise click.UsageError('Interactive nodes are only supported for '
840-
f'{backends.CloudVmRayBackend.__name__} '
841-
f'backend. Got {type(backend).__name__}.')
842-
maybe_status, _ = backend_utils.refresh_cluster_status_handle(cluster_name)
843-
if maybe_status is not None and user_requested_resources:
844-
name_arg = ''
845-
if cluster_name != _default_interactive_node_name(node_type):
846-
name_arg = f' -c {cluster_name}'
847-
raise click.UsageError(
848-
'Resources cannot be specified for an existing interactive node '
849-
f'{cluster_name!r}. To login to the cluster, use: '
850-
f'{colorama.Style.BRIGHT}'
851-
f'sky {node_type}{name_arg}{colorama.Style.RESET_ALL}')
852-
853866
_launch_with_confirm(
854867
task,
855868
backend,

0 commit comments

Comments
 (0)