Source code for schrodinger.utils.pandasutils

"""
A module which contains functions to convert between Schrodinger project data
and a Pandas data frame.
"""
import collections
import re
from enum import Enum

import pandas as pd
from rdkit.Chem import PandasTools

from schrodinger import project
from schrodinger.structutils.smiles import SmilesGenerator

WhichRows = Enum('WhichRows', ('SELECTED', 'ALL'))
WhichColumns = Enum('WhichColumns', ('VISIBLE', 'ALL'))


[docs]def get_data_frame_from_project(pt, which_rows=WhichRows.ALL, which_columns=WhichColumns.ALL, prop_filter=None, with_rdkit=False, with_smiles=False): """ Return a Pandas frame given a Schrodinger project object (as might be returned from maestro.get_project_table() :param pt: Project to convert :type pt: project.Project object, already open via Maestro or stand-alone :param which_rows: Which rows from the project are to be converted (all or selected) :type which_rows: WhichRows (enum) :param which_columns: Which columns from the project are to be converted :type: which_columns: WhichColumns (enum) :param prop_filter: A regular expression which, if defined, will restrict the properties to datanames which match this expression :param prop_filter: str :param with_rdkit: A flag which indicates if RdKit MOL objects should be added :type with_rdkit: boolean :return: A Pandas dataframe populated with data from the project :rtype: pandas.DataFrame """ if which_columns == WhichColumns.ALL: prop_names = pt.getPropertyNames() else: prop_names = pt.getVisiblePropertyNames() if prop_filter: prop_names = [p for p in prop_names if re.match(prop_filter, p)] data_dict = collections.defaultdict(list) sg = SmilesGenerator() rows = pt.all_rows if (which_rows == WhichRows.ALL) else pt.selected_rows for row in rows: for p in prop_names: data_dict[p].append(row.property[p]) if with_rdkit or with_smiles: data_dict["smiles"].append(sg.getSmiles(row.getStructure())) df = pd.DataFrame(data_dict) if with_rdkit: PandasTools.AddMoleculeColumnToFrame(df, "smiles", "RDKit Mol") return df