import os
import rdkit
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
path=‘POSCAR-2’
mols=[]
files= os.listdir(path)
for file in files:
mol = Chem.MolFromMolFile(path+’/’+file)
mols.append(mol)
#定义描述符列表
descs = [desc_name[0] for desc_name in Descriptors._descList]
#计算描述符
desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(descs)
descriptors = pd.DataFrame([desc_calc.CalcDescriptors(mol) for mol in mols])
descriptors.columns = descs
#将mol对象转换为smiles
smiles = [Chem.MolToSmiles(mol) for mol in mols]
descriptors.index = smiles
y_name = ‘_Name’
y = pd.DataFrame([mol.GetProp(y_name) for mol in mols])
y.index = smiles
y.columns = [y_name]
#保存数据
dataset = pd.concat([y, descriptors], axis=1)
dataset.to_csv(‘minidatabase.csv’)