Source code for schrodinger.test.performance.reporter

"""
Basic client-side components for performance testing.  Typical clients should
only need to use `Test` class.

@copyright: (c) Schrodinger, LLC All rights reserved.

"""

import datetime
import getpass
import json
import numbers
import os
import platform
import re
import sys
from past.utils import old_div
from typing import Optional

import psutil
import requests

import schrodinger.job.util
import schrodinger.test.stu.client
from schrodinger.test.stu import client
from schrodinger.infra import mm
from schrodinger.utils import fileutils
from schrodinger.utils import sysinfo

HOST = 'https://stu.schrodinger.com'
MB = 1048576.
BUILD_TYPES = ('OB', 'NB', 'CB', 'Dev')

### PUBLIC API:
##############


[docs]class BadResponse(AssertionError):
    """When a http response status code does not match the expected."""


[docs]class Test:
    """
    A performance test. `name` and `product` must uniquely specify a test.
    `product` is required to match an existing product name in the database.
    New tests require descriptions when uploaded. The descriptions of existing
    tests are not changed by result upload.

    Invididual results are added with addResult(). All results are uploaded to
    the database when report() is called.

    Instantiate with `scival` set to `True` if you are working with scival
    performance tests.

    Typical pattern::

        test = performance.Test("distribution_size", "shared components",
                ("Determine the size of the SCHRODINGER distribution and "
                 "report it to the performance database."))
        # Result with a metric name and value
        test.addResult('file count', 200000)
        # Result with a metric name, value, and units
        test.addResult('size', 20000, 'MB')
        test.report()

    """

[docs]    def __init__(self,
                 name,
                 product,
                 description=None,
                 scival=False,
                 upload=True):
        if not name or not product:
            raise TypeError('name and product are required')

        if not isinstance(name, str):
            raise TypeError('Name must be a string')
        if not isinstance(product, str):
            raise TypeError('Product name must be a string')
        if description and not isinstance(description, str):
            raise TypeError('Description must be a string')
        if not isinstance(scival, bool):
            raise TypeError('scival must be a boolean')

        if upload:
            self.username = client.get_stu_username()
            self.test = get_or_create_test(
                name,
                description,
                product,
                username=self.username,
                scival=scival)
        else:
            self.username = None
            self.test = None
        self.results = []

[docs]    def addResult(self, name: str, value: float, units: Optional[str] = None):
        """
        Add a result to the current test. Results are not uploaded until
        report() is called.

        :param name: Name of the metric being reported
        :param value: Current value of the metric
        :param units: (optional) units of the value.

        """
        # Validate data types before attempting upload to the server.
        validate_types(name, value, units)

        metric = dict(name=name, units=units)
        result = dict(metric=metric, value=value)
        self.results.append(result)

[docs]    def report(self, build_id=None, buildtype=None, mmshare=None, release=None):
        """
        Once all results have been added to the test, report them to the
        database.

        """
        if not self.results:
            raise ValueError("No results to report")
        if not self.test:
            return

        auth = schrodinger.test.stu.client.ApiKeyAuth(self.username)
        system = post_system(auth)
        build = install_information(
            build_id, buildtype, mmshare=mmshare, release=release)
        build_uri = get_or_create(api_url('build'), auth, build)
        post_data = dict(
            test=self.test,
            system=system,
            build=build_uri,
            metrics=self.results)

        post_data = json.dumps(post_data)
        response = requests.post(
            performance_api_url('result'),
            data=post_data,
            headers={'content-type': 'application/json'},
            auth=auth)
        try:
            response.raise_for_status()
        except:
            sys.stderr.write('Failed while trying to upload:')
            sys.stderr.write(post_data)
            raise
        if response.status_code != 201:
            raise BadResponse(
                'Response %s (%s) did not match required status "%s"' %
                (response.reason, response.status_code, 201))


[docs]def validate_types(name, value, units=None):
    """Validate data types before attempting upload to the server."""
    if not isinstance(name, str):
        msg = f'Names of metrics values must be strings (found {name})'
        raise TypeError(msg)
    if not isinstance(value, numbers.Number):
        msg = 'Result values must be numeric (found {!r} for {})'.format(
            value, name)
        raise TypeError(msg)
    if units and not isinstance(units, str):
        msg = f'Units must be strings (found {units!r} for {name})'
        raise TypeError(msg)


### PRIVATE/SUPPORT code
###
### Everything below here is intended to support the public API above
#####################################################################


[docs]def get_or_create_test(name,
                       description,
                       product_name,
                       username=None,
                       scival=False):
    """
    Get or create a single test from the performance database.

    Setting `scival` to `True` will add the 'scival' tag when creating a new test.
    """
    if username is None:
        username = client.get_stu_username()
    auth = schrodinger.test.stu.client.ApiKeyAuth(username)

    product_url = api_url('product')
    response = requests.get(
        product_url, params=dict(name=product_name), auth=auth)
    no_product_msg = ('No product named "{}". See the list of product names '
                      'at {}/products. File a JIRA case in SHARED if you need '
                      'to add a product.'.format(product_name, HOST))
    if response.status_code == 404:
        raise BadResponse(no_product_msg)
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_error:
        if response.status_code == 401:
            raise BadResponse(
                f'{http_error}, please verify that the appropriate'
                f' user is making this request: {username=}')
        raise
    data = response.json()
    if not data['objects']:
        raise BadResponse(no_product_msg)
    product = data['objects'][0]['resource_uri']
    product_id = resource_id(product)

    test_url = performance_api_url('test')
    test_dict = dict(name=name, product=product_id)
    # Get an existing test:
    response = requests.get(test_url, params=test_dict, auth=auth)
    objects = response.json()['objects']
    if objects:
        return objects[0]['resource_uri']

    # Create a new test:
    if not description:
        raise ValueError("Description is required when uploading a new test.")

    test_dict['description'] = description
    if scival:
        test_dict['tags'] = ['scival']
    response = requests.post(
        test_url,
        data=json.dumps(test_dict),
        headers={'content-type': 'application/json'},
        auth=auth)
    response.raise_for_status()
    location = response.headers['location']
    return location.replace(HOST, '')


[docs]def api_url(resource_name, item_id=None, host=None):
    """Get an address on the core server"""
    host = host or HOST
    url = host + '/api/v1/' + resource_name + '/'
    if item_id is not None:
        url += str(item_id) + '/'
    return url


[docs]def performance_api_url(resource_name, item_id=None, host=None):
    """Get an address in the performance bit of the server."""
    host = host or HOST
    host += '/performance'
    return api_url(resource_name, item_id, host)


[docs]def resource_id(uri):
    """Get the resource's ID number from a uri"""
    match = re.search(r'(\d+)/?$', uri)
    return match.group(1)


[docs]def get_or_create(url, auth, params):
    """Get or create a resource matching the parameters."""
    response = requests.get(url, params=params, auth=auth)
    objects = response.json()['objects']
    if objects:
        return objects[0]['resource_uri']

    response = requests.post(
        url,
        data=json.dumps(params),
        headers={'content-type': 'application/json'},
        auth=auth)
    response.raise_for_status()
    location = response.headers['location']
    return location.replace(HOST, '')


[docs]def system_information(host):
    """
    System information required to report results.

    """
    processor_count = psutil.cpu_count()
    memory = int(old_div(psutil.virtual_memory().total, MB))
    home = fileutils.get_directory_path(fileutils.HOME)
    home_size = int(old_div(psutil.disk_usage(home).total, MB))
    scratch = fileutils.get_directory_path(fileutils.TEMP)
    scratch_size = int(old_div(psutil.disk_usage(scratch).total, MB))

    return dict(
        host=host,
        processor_count=processor_count,
        memory=memory,
        home_size=home_size,
        scratch_size=scratch_size)


[docs]def host_information():
    hostname = platform.node()
    mmshare_exec = os.getenv('MMSHARE_EXEC')
    plat = os.path.basename(mmshare_exec)
    os_vers = sysinfo.get_osname()
    processor = sysinfo.get_cpu()
    host = dict(
        name=hostname, platform=plat, os_version=os_vers, processor=processor)
    return host


[docs]def post_system(auth):
    """
    Post the current host's system information to the performance test server.

    :return URI for the new system.
    """
    host_data = host_information()
    host = get_or_create(api_url('host'), auth, host_data)
    system_data = system_information(resource_id(host))
    return get_or_create(api_url('system'), auth, system_data)


[docs]def install_information(build_id=None,
                        buildtype=None,
                        mmshare=None,
                        release=None):
    """
    Execution environment information required to report results.

    """
    install_path = os.getenv("SCHRODINGER")
    install_path = os.path.realpath(install_path)
    if not mmshare:
        if release:
            raise ValueError('mmshare is required if release is provided')
        mmshare = mm.mmfile_get_product_version("mmshare")
    if not release:
        release = mm.mmfile_get_release_name()
    if not build_id or not buildtype:
        buildtype, build_id = guess_build_type_and_id(mmshare, buildtype)

    if buildtype not in BUILD_TYPES:
        raise ValueError('Build type must be one of {}. Found "{}".'.format(
            ', '.join(BUILD_TYPES), buildtype))

    return dict(
        mmshare=mmshare,
        release=release,
        build_id=build_id,
        buildtype=buildtype)


[docs]def guess_build_type_and_id(mmshare, buildtype=None):
    """
    Provide reasonable default values for the buildtype and build_id. When
    possible, reads from the environment variables SCHRODINGER_BUILDTYPE and
    SCHRODINGER_BUILD_ID, otherwise guesses based on the date.

    """
    if not buildtype:
        buildtype = os.environ.get('SCHRODINGER_BUILDTYPE', 'Dev')
    build_id = os.environ.get('SCHRODINGER_BUILD_ID', None)
    if not build_id:
        if buildtype == 'OB':
            build_id = 'build' + str(mmshare)[3:]
        else:
            build_id = datetime.datetime.now().strftime('%Y-%m-%d')
    return buildtype, build_id