| import torch |
| from torch.utils.data import DataLoader |
| from torchvision import datasets |
| from torchvision import transforms |
| from torch import nn, optim |
| import torchvision.models as models |
| |
| from matplotlib import pyplot as plt |
| import numpy as np |
| |
| import mydataset |
| import myutils |
| import time |
| |
| def read_data(batchsz=32): |
| |
| |
| train_data = mydataset.MyDataset('./data/train.txt', transform=transforms.Compose([ |
| |
| transforms.RandomHorizontalFlip(), |
| |
| transforms.RandomVerticalFlip(), |
| |
| transforms.ToTensor(), |
| |
| transforms.Normalize([0.485, 0.456, 0.406], |
| [0.229, 0.224, 0.225]) |
| ])) |
| train_loader = DataLoader(train_data, batch_size=batchsz, shuffle=True) |
| |
| |
| test_data = mydataset.MyDataset('./data/test.txt', transform=transforms.Compose([ |
| |
| transforms.RandomHorizontalFlip(), |
| |
| transforms.RandomVerticalFlip(), |
| |
| transforms.ToTensor(), |
| |
| transforms.Normalize([0.485, 0.456, 0.406], |
| [0.229, 0.224, 0.225]) |
| ])) |
| test_loader = DataLoader(test_data, batch_size=batchsz, shuffle=True) |
| |
| x, label = iter(train_loader).next() |
| print('x:', x.shape, 'label:', label.shape) |
| print("数据加载完毕...") |
| |
| return train_data, train_loader, test_data, test_loader |
| |
| def load_model(model_path): |
| |
| |
| model = models.resnet50() |
| |
| |
| |
| fc_inputs = model.fc.in_features |
| |
| |
| model.fc = nn.Sequential( |
| nn.Linear(fc_inputs, 256), |
| nn.ReLU(), |
| nn.BatchNorm1d(256), |
| |
| nn.Dropout(0.3), |
| nn.Linear(256, 3), |
| nn.Softmax(1) |
| ) |
| |
| |
| device = torch.device('cuda') |
| model.to(device) |
| |
| |
| optimizer = optim.Adam(model.parameters(), lr=1e-3) |
| |
| epoch = 0 |
| record = [] |
| best_epoch = 0 |
| best_mae = 100.0 |
| |
| |
| if myutils.fileExists(model_path): |
| |
| checkpoint = torch.load(model_path) |
| model.load_state_dict(checkpoint['model_state_dict']) |
| optimizer.load_state_dict(checkpoint['optimizer_state_dict']) |
| epoch = checkpoint['epoch'] |
| record = checkpoint['record'] |
| best_epoch = checkpoint['best_epoch'] |
| best_mae = checkpoint['best_mae'] |
| |
| model.to(device) |
| |
| |
| loss_func = nn.MSELoss().to(device) |
| |
| return model, optimizer, loss_func, record, { |
| 'device': device, |
| 'epoch': epoch, |
| 'best_epoch': best_epoch, |
| 'best_mae': best_mae, |
| } |
| |
| |
| def train_and_valid(model, optimizer, loss_func, device, record=[], best_mae=100, best_epoch=0, batchsz=32, epochs=10, old_epoch=0): |
| |
| |
| train_data, train_loader, test_data, test_loader = read_data(batchsz=batchsz) |
| |
| |
| record = record |
| |
| best_mae = best_mae |
| best_epoch = best_epoch |
| |
| |
| for epoch in range(epochs): |
| |
| epoch_start = time.time() |
| |
| train_loss = 0.0 |
| train_mae = 0.0 |
| test_loss = 0.0 |
| test_mae = 0.0 |
| |
| |
| model.train() |
| for batchidx, (x, label) in enumerate(train_loader): |
| |
| |
| x, label = x.to(device), label.to(device) |
| |
| |
| logits = model(x) |
| |
| |
| loss = loss_func(logits, label) |
| |
| |
| optimizer.zero_grad() |
| loss.backward() |
| optimizer.step() |
| |
| |
| train_loss += loss.item() * x.size(0) |
| |
| train_mae += (logits - label).abs().sum().item() |
| |
| |
| myutils.progressBar(batchsz * (batchidx + 1), len(train_data)) |
| |
| |
| model.eval() |
| with torch.no_grad(): |
| |
| total_error = 0 |
| total_num = 0 |
| for batchidx, (x, label) in enumerate(test_loader): |
| |
| |
| x, label = x.to(device), label.to(device) |
| |
| |
| logits = model(x) |
| |
| |
| loss = loss_func(logits, label) |
| |
| |
| test_loss += loss.item() * x.size(0) |
| |
| test_mae += (logits - label).abs().sum().item() |
| |
| |
| myutils.progressBar(batchsz * (batchidx + 1), len(test_data)) |
| |
| |
| avg_train_loss = train_loss / len(train_data) |
| avg_train_mae = train_mae / len(train_data) |
| |
| avg_test_loss = test_loss / len(test_data) |
| avg_test_mae = test_mae / len(test_data) |
| |
| |
| record.append([avg_train_loss, avg_test_loss, avg_train_mae, avg_test_mae]) |
| |
| |
| if avg_test_mae < best_mae : |
| best_mae = avg_test_mae |
| best_epoch = old_epoch + epoch + 1 |
| |
| epoch_end = time.time() |
| |
| |
| print("Epoch: {:03d}/{:03d} Time: {:.4f}s ============> train_loss: {:.4f} train_mae: {:.4f} / val_loss: {:.4f} val_mae {:.4f}".format( |
| old_epoch + epoch + 1, old_epoch + epochs, epoch_end - epoch_start, avg_train_loss, avg_train_mae, avg_test_loss, avg_test_mae)) |
| |
| |
| print("Best MAE for validation : {:.4f} at epoch {:03d}".format(best_mae, best_epoch)) |
| |
| return model, optimizer, record, best_epoch, best_mae |
| |
| if __name__ == '__main__': |
| |
| epochs = 5 |
| batchsz = 32 |
| model_path = 'models/model_06071135_5.pth' |
| |
| |
| model, optimizer, loss_func, record, model_dict = load_model(model_path) |
| print(model_dict) |
| |
| |
| trained_model, optimizer, record, best_epoch, best_mae = train_and_valid( |
| model = model, |
| optimizer = optimizer, |
| loss_func = loss_func, |
| record = record, |
| device = model_dict['device'], |
| best_mae = model_dict['best_mae'], |
| best_epoch = model_dict['best_epoch'], |
| old_epoch = model_dict['epoch'], |
| batchsz = batchsz, |
| epochs = epochs |
| ) |
| |
| model_save_path = 'models/model_{}_{}.pth'.format(myutils.getTime(), model_dict['epoch']+epochs) |
| |
| torch.save({ |
| 'epoch': model_dict['epoch']+epochs, |
| 'model_state_dict': trained_model.state_dict(), |
| 'optimizer_state_dict': optimizer.state_dict(), |
| 'record': record, |
| 'best_epoch': best_epoch, |
| 'best_mae': best_mae |
| }, model_save_path) |
| |
| |
| myutils.show_loss_and_mae(record) |