我尝试使用 CDPL Chem 来读取 此代码库之后的 RDKit 输出 SDF 文件。然而,我遇到了一个错误:
mol = CDPL_readSDF(temp_file)
File "~/csv2sdf.py", line 462, in CDPL_readSDF
reader = CDPLChem.MoleculeReader(input_sd_file)
Boost.Python.ArgumentError: Python argument types in
MoleculeReader.__init__(MoleculeReader, str)
did not match C++ signature:
__init__(_object* self, std::istream {lvalue} is, CDPL::Base::DataFormat fmt)
__init__(_object* self, std::istream {lvalue} is, std::string fmt)
__init__(_object* self, std::string file_name, CDPL::Base::DataFormat fmt, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
__init__(_object* self, std::string file_name, std::string fmt, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
__init__(_object* self, std::string file_name, std::_Ios_Openmode mode=CDPL.Base._base.OpenMode(12))
我的导入:
import CDPL.Chem as CDPLChem
import CDPL.ConfGen as CCfGen
from rdkit import Chem
from rdkit.Chem import AllChem
我的输出 RDKit SDF 文件:
def prepare_mol_for_SDPL(smiles:str, columns: list, row: pd.Series, temp_file: str):
"""
Adds hydrogens to the provided SMILES string.
"""
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
AllChem.Compute2DCoords(mol)
for col in columns:
mol.SetProp(col, str(row[col]))
mol.SetProp("_InputInChIKey", str(Chem.inchi.InchiToInchiKey(Chem.inchi.MolToInchi(Chem.MolFromSmiles(smiles)))))
mol.SetProp("_SMILES", str(Chem.MolToSmiles(mol)))
mol.SetProp("_InChIKey", str(Chem.inchi.InchiToInchiKey(Chem.inchi.MolToInchi(mol))))
writer = Chem.SDWriter(temp_file)
writer.write(mol)
writer.close()
我的阅读功能:
def CDPL_readSDF(input_sd_file: str) -> None:
"""
Retrieves the structure data of each molecule in the provided SD file and outputs it to the console.
Parameters:
- input_sd_file (str): Path to the input SD file.
Refs:
- https://cdpkit.org/cdpl_python_tutorial/cdpl_python_tutorial.html#retrieving-structure-data-from-mdl-sd-files
"""
# Create reader for MDL SD-files
# reader = CDPLChem.FileSDFMoleculeReader(input_sd_file)
reader = CDPLChem.MoleculeReader(input_sd_file)
# create an instance of the default implementation of the Chem.Molecule interface
mol = CDPLChem.BasicMolecule()
# Iterate over each molecule in the file and retrieve structure data
try:
while reader.read(mol):
return mol
# try:
# if not CDPLChem.hasStructureData(mol):
# raise Exception('Error: no structure data available for molecule', Chem.getName(mol))
# return mol
# except Exception as e:
# raise Exception('Error: processing of molecule failed: ' + str(e))
except Exception as e: # handle exception raised in case of severe read errors
raise Exception('Error: reading molecule failed: ' + str(e))
我的主要功能:
for ind in df.index:
smiles = df.loc[ind, args.smi_column]
mol = None
try:
try:
os.remove(temp_file)
except:
print()
temp_file = output_file.replace('.sdf', '_temp.sdf')
prepare_mol_for_SDPL(smiles, df.columns, df.loc[ind], temp_file)
mol = CDPL_readSDF(temp_file)
os.remove(temp_file)
p/s:我看了一些参考文献: 它们都有效:
请帮我解决这个问题。非常感谢。