import os from math import sqrt import numpy import torch from Bio.PDB import PDBParser from torch.utils.data import Dataset, DataLoader from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda") class P450Dataset(Dataset): def __init__(self, testp450, transform=None, target_transform=None): # 处理pdb数据 path = './testp450' arr = [] max_num = 0 index = 0 self.data = [] # 遍历文件夹下的pdb文件名 for filename in os.listdir('testp450'): p = PDBParser() struct_id = "1fat" filename = path + '/' + filename structure = p.get_structure(struct_id, filename) atoms = structure.get_atoms() atoms = list(atoms) # 获得一个结构中的原子总数 atom_num = 0 for atom in atoms: atom_num = atom_num + 1 print(atom_num) # arr.append(atom_num) # max_num = max(arr) # print(max_num) # 计算距离矩阵 i = 0 n = numpy.zeros(shape=(1, 1)) a = numpy.zeros(shape=(atom_num ** 2, 1)) b = numpy.zeros(shape=(atom_num, atom_num)) # 快速遍历一个结构中的所有原子 for atom in atoms: for ato in atoms: n = sqrt((atom.get_coord()[0] - ato.get_coord()[0]) ** 2 + (atom.get_coord()[1] - ato.get_coord()[1]) ** 2 + (atom.get_coord()[2] - ato.get_coord()[2]) ** 2) a[i] = n i = i + 1 # 创建距离矩阵 m = 0 for p in range(0, atom_num): for q in range(0, atom_num): b[p][q] = a[m] m = m + 1 # print(b) # 把所有数据padding到同一大小,计算剩余需要填补 padding_num = 4996 - atom_num # b = torch.tensor(b, dtype=torch.float32) b = numpy.pad(b, (0, padding_num), 'constant') b = torch.tensor(b, dtype=torch.float32) # b = torch.reshape(b, (1, 5000, 5000)) b = b[numpy.newaxis, :, :] print(b) print(b.shape) # 首先读取数据,计算残基距离矩阵 # 加载所有数据,处理成相应的格式, self.data.append(b) print(self.data) self.testp450 = testp450 def __len__(self): # 返回数据集长度,(有多少数据) return len(self.data) def __getitem__(self, item): return self.data[item] num_epochs = 1000 batch_size = 2 learning_rate = 1e-3 total_train_step = 0 model = autoencoder() model.to(device) criterion = nn.MSELoss() criterion.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) dataset = P450Dataset(testp450="testp450") dataloader = DataLoader(dataset, batch_size, shuffle=True) writer = SummaryWriter("./logs_testp450") for epoch in range(num_epochs): for data in dataloader: img = data img = img.to(device) # print(img.shape) # ===================forward===================== output = model(img) # print(output.shape) loss = criterion(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() total_train_step = total_train_step + 1 if total_train_step % 100 == 0: print("训练次数:{},Loss:{}".format(total_train_step, loss.item())) writer.add_scalar("train_loss1000", loss.item(), total_train_step) writer.close()