1) Cheminformatic toolkits¶

Inspect basic functionality of popular toolkits in cheminformatics:

  1. Conversion between the two most popular formats: SMILES and InChI strings
  2. Calculation of molecular descriptors

Open Babel¶

  • www.openbabel.org
In [1]:
from openbabel import pybel

def openbabel_smiles_to_inchi(smiles):
    mol = pybel.readstring("smi", smiles)
    inchi = mol.write("inchi").strip()
    return inchi

def openbabel_inchi_to_smiles(inchi):
    mol = pybel.readstring("inchi", inchi)
    smiles = mol.write("smiles").strip()
    return smiles

smiles1 = "CCO"
inchi = openbabel_smiles_to_inchi(smiles1)
smiles2 = openbabel_inchi_to_smiles(inchi)
print(smiles1, inchi, smiles2, sep=" => ")
CCO => InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3 => CCO

RDKit¶

  • www.rdkit.org
In [2]:
from rdkit import Chem

def rdkit_smiles_to_inchi(smiles):
    mol = Chem.MolFromSmiles(smiles)
    inchi = Chem.MolToInchi(mol)
    return inchi

def rdkit_inchi_to_smiles(inchi):
    mol = Chem.MolFromInchi(inchi)
    smiles = Chem.MolToSmiles(mol)
    return smiles

smiles1 = "CCO"
inchi = rdkit_smiles_to_inchi(smiles1)
smiles2 = rdkit_inchi_to_smiles(inchi)
print(smiles1, inchi, smiles2, sep=" => ")
CCO => InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3 => CCO

Indigo toolkit¶

  • www.lifescience.opensource.epam.com/indigo/
In [3]:
from indigo import Indigo
from indigo import inchi as indigo_inchi

indigo = Indigo()
indigo_inchi = indigo_inchi.IndigoInchi(indigo)

def indigo_smiles_to_inchi(smiles):
    mol = indigo.loadMolecule(smiles)
    inchi = indigo_inchi.getInchi(mol)
    return inchi

def indigo_inchi_to_smiles(inchi):
    mol = indigo_inchi.loadMolecule(inchi)
    smiles = mol.smiles()
    return smiles

smiles = "CCO"
inchi = indigo_smiles_to_inchi(smiles)
smiles2 = indigo_inchi_to_smiles(inchi)
print(smiles1, inchi, smiles2, sep=" => ")
CCO => InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3 => CCO

PaDEL-Descriptor¶

  • www.yapcwsoft.com/dd/padeldescriptor
  • www.github.com/ecrl/padelpy
In [4]:
from padelpy import from_smiles

def padel_smiles_to_descriptors(smiles):
    descriptors = from_smiles([smiles])
    return descriptors[0]

descriptors = padel_smiles_to_descriptors("CCO")
print(str(descriptors)[:1000], '...')
{'nAcid': '0', 'ALogP': '-0.1075999999999997', 'ALogp2': '0.011577759999999935', 'AMR': '12.5551', 'apol': '8.322758', 'naAromAtom': '0', 'nAromBond': '0', 'nAtom': '9', 'nHeavyAtom': '3', 'nH': '6', 'nB': '0', 'nC': '2', 'nN': '0', 'nO': '1', 'nS': '0', 'nP': '0', 'nF': '0.0', 'nCl': '0.0', 'nBr': '0.0', 'nI': '0.0', 'nX': '0.0', 'ATS0m': '550.5926270000002', 'ATS1m': '413.0905419999999', 'ATS2m': '301.12475700000005', 'ATS3m': '68.61657600000001', 'ATS4m': '3.0481920000000002', 'ATS5m': '0.0', 'ATS6m': '0.0', 'ATS7m': '0.0', 'ATS8m': '0.0', 'ATS0v': '1249.9270470485976', 'ATS1v': '1381.9431163032111', 'ATS2v': '1279.5114392735875', 'ATS3v': '609.447463694361', 'ATS4v': '93.25123329279079', 'ATS5v': '0.0', 'ATS6v': '0.0', 'ATS7v': '0.0', 'ATS8v': '0.0', 'ATS0e': '68.74353199999999', 'ATS1e': '62.633728', 'ATS2e': '98.55586799999999', 'ATS3e': '89.27884799999998', 'ATS4e': '20.155392', 'ATS5e': '0.0', 'ATS6e': '0.0', 'ATS7e': '0.0', 'ATS8e': '0.0', 'ATS0p': '8.888681429094001', 'ATS1p' ...

2) OpenCog Hyperon¶

Bring basic functionality of popular toolkits in cheminformatics to OpenCog Hyperon.

Load MeTTa¶

In [5]:
from hyperon import *

metta = MeTTa()

Register functions as grounded atoms¶

In [6]:
def register_function(name, func):
    atom = OperationAtom(name, func)
    metta.register_atom(name, atom)

register_function("openbabel-smiles-to-inchi", openbabel_smiles_to_inchi)
register_function("openbabel-inchi-to-smiles", openbabel_inchi_to_smiles)

register_function("rdkit-smiles-to-inchi", rdkit_smiles_to_inchi)
register_function("rdkit-inchi-to-smiles", rdkit_inchi_to_smiles)

register_function("indigo-smiles-to-inchi", indigo_smiles_to_inchi)
register_function("indigo-inchi-to-smiles", indigo_inchi_to_smiles)

register_function("padel-smiles-to-descriptors", padel_smiles_to_descriptors)

Use the functions individually¶

In [7]:
metta.run('''
!(openbabel-smiles-to-inchi "CCO")
''')
Out[7]:
[["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]]
In [8]:
metta.run('''
!(rdkit-inchi-to-smiles "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3")
''')
Out[8]:
[["CCO"]]
In [9]:
metta.run('''
!(indigo-smiles-to-inchi "CCO")
''')
Out[9]:
[["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]]
In [10]:
metta.run('''
!(padel-smiles-to-descriptors "CCO")
''')
Out[10]:
[[{'nAcid': '0', 'ALogP': '-0.1075999999999997', 'ALogp2': '0.011577759999999935', 'AMR': '12.5551', 'apol': '8.322758', 'naAromAtom': '0', 'nAromBond': '0', 'nAtom': '9', 'nHeavyAtom': '3', 'nH': '6', 'nB': '0', 'nC': '2', 'nN': '0', 'nO': '1', 'nS': '0', 'nP': '0', 'nF': '0.0', 'nCl': '0.0', 'nBr': '0.0', 'nI': '0.0', 'nX': '0.0', 'ATS0m': '550.5926270000002', 'ATS1m': '413.0905419999999', 'ATS2m': '301.12475700000005', 'ATS3m': '68.61657600000001', 'ATS4m': '3.0481920000000002', 'ATS5m': '0.0', 'ATS6m': '0.0', 'ATS7m': '0.0', 'ATS8m': '0.0', 'ATS0v': '1249.9270470485976', 'ATS1v': '1381.9431163032111', 'ATS2v': '1279.5114392735875', 'ATS3v': '609.447463694361', 'ATS4v': '93.25123329279079', 'ATS5v': '0.0', 'ATS6v': '0.0', 'ATS7v': '0.0', 'ATS8v': '0.0', 'ATS0e': '68.74353199999999', 'ATS1e': '62.633728', 'ATS2e': '98.55586799999999', 'ATS3e': '89.27884799999998', 'ATS4e': '20.155392', 'ATS5e': '0.0', 'ATS6e': '0.0', 'ATS7e': '0.0', 'ATS8e': '0.0', 'ATS0p': '8.888681429094001', 'ATS1p': '10.230729536', 'ATS2p': '10.868593451396002', 'ATS3p': '6.2747515067920006', 'ATS4p': '1.3338387145469999', 'ATS5p': '0.0', 'ATS6p': '0.0', 'ATS7p': '0.0', 'ATS8p': '0.0', 'ATS0i': '1548.5459101279941', 'ATS1i': '1230.9346997656498', 'ATS2i': '2182.1177761816957', 'ATS3i': '2188.016593995342', 'ATS4i': '554.7529560727469', 'ATS5i': '0.0', 'ATS6i': '0.0', 'ATS7i': '0.0', 'ATS8i': '0.0', 'ATS0s': '48.25', 'ATS1s': '27.0', 'ATS2s': '38.0', 'ATS3s': '28.0', 'ATS4s': '3.0', 'ATS5s': '0.0', 'ATS6s': '0.0', 'ATS7s': '0.0', 'ATS8s': '0.0', 'AATS0m': '61.17695855555558', 'AATS1m': '51.63631774999999', 'AATS2m': '23.16344284615385', 'AATS3m': '5.7180480000000005', 'AATS4m': '1.016064', 'AATS5m': '0.0', 'AATS6m': '0.0', 'AATS7m': '0.0', 'AATS8m': '0.0', 'AATS0v': '138.88078300539973', 'AATS1v': '172.7428895379014', 'AATS2v': '98.42395686719904', 'AATS3v': '50.787288641196746', 'AATS4v': '31.083744430930263', 'AATS5v': '0.0', 'AATS6v': '0.0', 'AATS7v': '0.0', 'AATS8v': '0.0', 'AATS0e': '7.638170222222221', 'AATS1e': '7.829216', 'AATS2e': '7.5812206153846144', 'AATS3e': '7.4399039999999985', 'AATS4e': '6.718464', 'AATS5e': '0.0', 'AATS6e': '0.0', 'AATS7e': '0.0', 'AATS8e': '0.0', 'AATS0p': '0.9876312698993335', 'AATS1p': '1.278841192', 'AATS2p': '0.8360456501073847', 'AATS3p': '0.5228959588993334', 'AATS4p': '0.44461290484899996', 'AATS5p': '0.0', 'AATS6p': '0.0', 'AATS7p': '0.0', 'AATS8p': '0.0', 'AATS0i': '172.06065668088823', 'AATS1i': '153.86683747070623', 'AATS2i': '167.85521355243813', 'AATS3i': '182.33471616627853', 'AATS4i': '184.91765202424895', 'AATS5i': '0.0', 'AATS6i': '0.0', 'AATS7i': '0.0', 'AATS8i': '0.0', 'AATS0s': '5.361111111111111', 'AATS1s': '3.375', 'AATS2s': '2.923076923076923', 'AATS3s': '2.3333333333333335', 'AATS4s': '1.0', 'AATS5s': '0.0', 'AATS6s': '0.0', 'AATS7s': '0.0', 'AATS8s': '0.0', 'ATSC0c': '0.21232861100284747', 'ATSC1c': '-0.09952329081055883', 'ATSC2c': '-0.015583991379693823', 'ATSC3c': '-0.0069555382483837205', 'ATSC4c': '0.015898514937212657', 'ATSC5c': '0.0', 'ATSC6c': '0.0', 'ATSC7c': '0.0', 'ATSC8c': '0.0', 'ATSC0m': '314.7756535555555', 'ATSC1m': '-63.89650893827171', 'ATSC2m': '-116.86383002469134', 'ATSC3m': '-27.322969629629505', 'ATSC4m': '50.69548181481486', 'ATSC5m': '0.0', 'ATSC6m': '0.0', 'ATSC7m': '0.0', 'ATSC8m': '0.0', 'ATSC0v': '363.45665236232037', 'ATSC1v': '-88.00132617908156', 'ATSC2v': '-193.0207684487799', 'ATSC3v': '42.54529022654171', 'ATSC4v': '56.74847822015958', 'ATSC5v': '0.0', 'ATSC6v': '0.0', 'ATSC7v': '0.0', 'ATSC8v': '0.0', 'ATSC0e': '0.9667315555555552', 'ATSC1e': '-0.13822093827160434', 'ATSC2e': '-0.1842966913580244', 'ATSC3e': '-0.23036296296296305', 'ATSC4e': '0.06951481481481424', 'ATSC5e': '0.0', 'ATSC6e': '0.0', 'ATSC7e': '0.0', 'ATSC8e': '0.0', 'ATSC0p': '1.5215138905868888', 'ATSC1p': '-0.37905875450079074', 'ATSC2p': '-0.8958158073719507', 'ATSC3p': '0.34424574592592644', 'ATSC4p': '0.16987187065337056', 'ATSC5p': '0.0', 'ATSC6p': '0.0', 'ATSC7p': '0.0', 'ATSC8p': '0.0', 'ATSC0i': '8.524803221453547', 'ATSC1i': '-2.0951525621655884', 'ATSC2i': '-5.003585488201571', ']]

Use the functions combined¶

In [11]:
metta.run('''
!(padel-smiles-to-descriptors
    (indigo-inchi-to-smiles
     (rdkit-smiles-to-inchi
       (openbabel-inchi-to-smiles "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"))))
''')
Out[11]:
[[{'nAcid': '0', 'ALogP': '-0.1075999999999997', 'ALogp2': '0.011577759999999935', 'AMR': '12.5551', 'apol': '8.322758', 'naAromAtom': '0', 'nAromBond': '0', 'nAtom': '9', 'nHeavyAtom': '3', 'nH': '6', 'nB': '0', 'nC': '2', 'nN': '0', 'nO': '1', 'nS': '0', 'nP': '0', 'nF': '0.0', 'nCl': '0.0', 'nBr': '0.0', 'nI': '0.0', 'nX': '0.0', 'ATS0m': '550.5926270000002', 'ATS1m': '413.0905419999999', 'ATS2m': '301.12475700000005', 'ATS3m': '68.61657600000001', 'ATS4m': '3.0481920000000002', 'ATS5m': '0.0', 'ATS6m': '0.0', 'ATS7m': '0.0', 'ATS8m': '0.0', 'ATS0v': '1249.9270470485976', 'ATS1v': '1381.9431163032111', 'ATS2v': '1279.5114392735875', 'ATS3v': '609.447463694361', 'ATS4v': '93.25123329279079', 'ATS5v': '0.0', 'ATS6v': '0.0', 'ATS7v': '0.0', 'ATS8v': '0.0', 'ATS0e': '68.74353199999999', 'ATS1e': '62.633728', 'ATS2e': '98.55586799999999', 'ATS3e': '89.27884799999998', 'ATS4e': '20.155392', 'ATS5e': '0.0', 'ATS6e': '0.0', 'ATS7e': '0.0', 'ATS8e': '0.0', 'ATS0p': '8.888681429094001', 'ATS1p': '10.230729536', 'ATS2p': '10.868593451396002', 'ATS3p': '6.2747515067920006', 'ATS4p': '1.3338387145469999', 'ATS5p': '0.0', 'ATS6p': '0.0', 'ATS7p': '0.0', 'ATS8p': '0.0', 'ATS0i': '1548.5459101279941', 'ATS1i': '1230.9346997656498', 'ATS2i': '2182.1177761816957', 'ATS3i': '2188.016593995342', 'ATS4i': '554.7529560727469', 'ATS5i': '0.0', 'ATS6i': '0.0', 'ATS7i': '0.0', 'ATS8i': '0.0', 'ATS0s': '48.25', 'ATS1s': '27.0', 'ATS2s': '38.0', 'ATS3s': '28.0', 'ATS4s': '3.0', 'ATS5s': '0.0', 'ATS6s': '0.0', 'ATS7s': '0.0', 'ATS8s': '0.0', 'AATS0m': '61.17695855555558', 'AATS1m': '51.63631774999999', 'AATS2m': '23.16344284615385', 'AATS3m': '5.7180480000000005', 'AATS4m': '1.016064', 'AATS5m': '0.0', 'AATS6m': '0.0', 'AATS7m': '0.0', 'AATS8m': '0.0', 'AATS0v': '138.88078300539973', 'AATS1v': '172.7428895379014', 'AATS2v': '98.42395686719904', 'AATS3v': '50.787288641196746', 'AATS4v': '31.083744430930263', 'AATS5v': '0.0', 'AATS6v': '0.0', 'AATS7v': '0.0', 'AATS8v': '0.0', 'AATS0e': '7.638170222222221', 'AATS1e': '7.829216', 'AATS2e': '7.5812206153846144', 'AATS3e': '7.4399039999999985', 'AATS4e': '6.718464', 'AATS5e': '0.0', 'AATS6e': '0.0', 'AATS7e': '0.0', 'AATS8e': '0.0', 'AATS0p': '0.9876312698993335', 'AATS1p': '1.278841192', 'AATS2p': '0.8360456501073847', 'AATS3p': '0.5228959588993334', 'AATS4p': '0.44461290484899996', 'AATS5p': '0.0', 'AATS6p': '0.0', 'AATS7p': '0.0', 'AATS8p': '0.0', 'AATS0i': '172.06065668088823', 'AATS1i': '153.86683747070623', 'AATS2i': '167.85521355243813', 'AATS3i': '182.33471616627853', 'AATS4i': '184.91765202424895', 'AATS5i': '0.0', 'AATS6i': '0.0', 'AATS7i': '0.0', 'AATS8i': '0.0', 'AATS0s': '5.361111111111111', 'AATS1s': '3.375', 'AATS2s': '2.923076923076923', 'AATS3s': '2.3333333333333335', 'AATS4s': '1.0', 'AATS5s': '0.0', 'AATS6s': '0.0', 'AATS7s': '0.0', 'AATS8s': '0.0', 'ATSC0c': '0.21232861100284747', 'ATSC1c': '-0.09952329081055883', 'ATSC2c': '-0.015583991379693823', 'ATSC3c': '-0.0069555382483837205', 'ATSC4c': '0.015898514937212657', 'ATSC5c': '0.0', 'ATSC6c': '0.0', 'ATSC7c': '0.0', 'ATSC8c': '0.0', 'ATSC0m': '314.7756535555555', 'ATSC1m': '-63.89650893827171', 'ATSC2m': '-116.86383002469134', 'ATSC3m': '-27.322969629629505', 'ATSC4m': '50.69548181481486', 'ATSC5m': '0.0', 'ATSC6m': '0.0', 'ATSC7m': '0.0', 'ATSC8m': '0.0', 'ATSC0v': '363.45665236232037', 'ATSC1v': '-88.00132617908156', 'ATSC2v': '-193.0207684487799', 'ATSC3v': '42.54529022654171', 'ATSC4v': '56.74847822015958', 'ATSC5v': '0.0', 'ATSC6v': '0.0', 'ATSC7v': '0.0', 'ATSC8v': '0.0', 'ATSC0e': '0.9667315555555552', 'ATSC1e': '-0.13822093827160434', 'ATSC2e': '-0.1842966913580244', 'ATSC3e': '-0.23036296296296305', 'ATSC4e': '0.06951481481481424', 'ATSC5e': '0.0', 'ATSC6e': '0.0', 'ATSC7e': '0.0', 'ATSC8e': '0.0', 'ATSC0p': '1.5215138905868888', 'ATSC1p': '-0.37905875450079074', 'ATSC2p': '-0.8958158073719507', 'ATSC3p': '0.34424574592592644', 'ATSC4p': '0.16987187065337056', 'ATSC5p': '0.0', 'ATSC6p': '0.0', 'ATSC7p': '0.0', 'ATSC8p': '0.0', 'ATSC0i': '8.524803221453547', 'ATSC1i': '-2.0951525621655884', 'ATSC2i': '-5.003585488201571', ']]