Source code for schrodinger.tasks.hosts

import copy

import schrodinger.job.jobcontrol as jobcontrol  # for get_hosts()
from schrodinger.infra import gpgpu_utils
from schrodinger.models.json import JsonableClassMixin

LOCALHOST = 'localhost'
LOCALHOST_GPU = 'localhost-gpu'


def strip_gpu_from_localhost(hostname):
    """
    Check host strings to see if 'localhost-gpu' was specified and, if so,
    replace with 'localhost'

    :param hostname: Host name to be checked
    :type hostname: str

    :return: The actual hostname value to be used in a job command.
    :rtype: str
    """
    if hostname == LOCALHOST_GPU:
        return LOCALHOST
    return hostname


def get_GPGPUs(hostname):
    gpulist = []
    if hostname == LOCALHOST:
        return []
    if hostname == LOCALHOST_GPU:
        gpgpulist = gpgpu_utils.get_local_gpgpus()
    else:
        gpgpulist = gpgpu_utils.get_remote_gpgpus(hostname)
    for gpgpu in gpgpulist:
        gpulist.append(Gpu(gpgpu[0], gpgpu[1]))
    return gpulist


class Host(JsonableClassMixin, jobcontrol.Host):
    """
    Extension of jobcontrol.Host class with GPUs.
    """
    CPUTYPE, GPUTYPE = ('cpu', 'gpu')

    def __init__(self, name, processors=0, num_gpus=0, gpulist=None):
        jobcontrol.Host.__init__(self, name)
        self.processors = processors
        self.num_gpus = num_gpus
        self.gpu_list = gpulist
        self.ncpus = True  # Whether to display number of cpus in label

    def toJsonImplementation(self):
        return {
            'name': self.name,
            'processors': self.processors,
            'num_gpus': self.num_gpus,
            'gpulist': self.gpu_list,
        }

    @classmethod
    def fromJsonImplementation(cls, json_dict):
        gpulist = json_dict.pop('gpulist')
        if gpulist is not None:
            gpulist = [Gpu.fromJsonImplementation(data) for data in gpulist]
        return cls(gpulist=gpulist, **json_dict)

    @classmethod
    def fromJobControlHost(cls, jchost, ncpus):
        host = copy.copy(jchost)
        host.__class__ = Host
        host.ncpus = ncpus
        host.autoSetGpuList()
        return host

    def setGpuList(self, gpulist):
        self.gpu_list = gpulist
        self.num_gpus = len(gpulist)

    def autoSetGpuList(self):
        gpulist = get_GPGPUs(self.name)
        self.setGpuList(gpulist)

    def hostType(self):
        """
        Used to determine what type of host this is.
        """
        if self.num_gpus > 0:
            return self.GPUTYPE
        return self.CPUTYPE

    def label(self):
        """
        Returns the string to show in controls, etc.
        """
        if not self.ncpus:
            return self.name
        if self.hostType() == self.GPUTYPE:
            return '%s (%d, %d)' % (self.name, self.processors, self.num_gpus)
        return '%s (%d)' % (self.name, self.processors)

    def units(self):
        """
        Return the unit string for the type of processors provided by this
        host.
        """
        if self.hostType() == self.CPUTYPE:
            return 'processors'
        else:
            return 'GPUs'

    def maxNum(self):
        """
        Returns the number of processors for the type of host - for GPU host,
        return the number of GPUs, for non-GPU hosts, return the number of
        CPUs.
        """
        if self.hostType() == self.CPUTYPE:
            return self.processors
        else:
            return self.num_gpus

    def __str__(self):
        return self.label()

    def __repr__(self):
        return self.label()

    def __eq__(self, other):
        """
        Allows equality comparisons between two different host objects that
        represent the same host. This is necessary if get_hosts() is called
        two different times and hosts from one call are to be compared with
        hosts from the other call.

        Hosts are considered equal if they have the same name and processors.

        :param other: the host to be compared with self
        :type other: Host
        """
        if not isinstance(other, Host):
            return
        return self.label() == other.label()

    def __hash__(self):
        return hash(self.label())


class Gpu(JsonableClassMixin):

    def __init__(self, index, desc):
        self.index = index
        self.description = desc

    def toJsonImplementation(self):
        return {
            'index': self.index,
            'desc': self.description,
        }

    @classmethod
    def fromJsonImplementation(cls, json_dict):
        return cls(**json_dict)


def get_hosts(ncpus=True, excludeGPGPUs=True):
    """
    Return a list of Host objects for use in config dialogs. Note these are
    a subclass of jobcontrol.Host which has additional features for text
    labels and accounting for GPUs.
    If schrodinger.hosts file is missing, only localhost will be returned. If
    it is unreadable, then jobcontrol.UnreadableHostsFileException will be raised.

    :param ncpus: whether host text labels should include number of processors
    :type ncpus: bool

    :param excludeGPGPUs: whether to exclude GPU hosts from the list
    :type excludeGPGPUs: bool

    :return: a list of Host objects
    :rtype: list

    :raises jobcontrol.UnreadableHostsFileException: If host file cannot be read
    """

    hosts = []
    try:
        host_object_list = jobcontrol.get_hosts()
    except jobcontrol.MissingHostsFileException:
        print('WARNING: File at path $SCHRODINGER_HOSTS does not exist')
        # If hosts file is not found, return just the localhost PANEL-3653
        # because jobs to localhosts can still be submitted.
        host_object_list = [jobcontrol.Host('localhost')]
    _insert_local_gpu_host(host_object_list)
    for idx, oldhost in enumerate(host_object_list):
        host = Host.fromJobControlHost(oldhost, ncpus)
        if excludeGPGPUs and host.gpu_list:
            continue
        hosts.append(host)
    return hosts


def get_host_by_name(name):
    """
    Get the Host object from the hosts list that matches the specified hostname.
    """
    host_list = get_hosts(excludeGPGPUs=False)
    for host in host_list:
        if host.name == name:
            return host
    raise RuntimeError(f'Could not find a host named {name}.')


def _insert_local_gpu_host(host_object_list):
    """
    Given a list of host objects, create a local GPU host if appropriate
    and insert it in the list.

    :param host_object_list: List of host objectts
    :type host_object_list: list(Host)
    """
    local_gpu_host_idx = None
    local_gpu_host = None
    for i, host in enumerate(host_object_list):
        if host.name == LOCALHOST:
            if get_GPGPUs(LOCALHOST_GPU):
                local_gpu_host_idx = i + 1
                local_gpu_host = copy.copy(host)
            break
    if local_gpu_host_idx is not None:
        local_gpu_host.name = LOCALHOST_GPU
        host_object_list.insert(local_gpu_host_idx, local_gpu_host)