diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8ea998 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*.pyc +checkpoints +data +logs +.DS_Store +.ipynb_checkpoints +plots_paper_figures.py +plot_results.ipynb +plots_paper_figures_iccv.py +plot/ +slurm/ +results/ +ProcessWebVision.ipynb +results +configs/webvision_full +configs/webvision_imagenet +configs/clothing1m +script/WebVisionFull.slurm +script/WebVisionFull_ImageNet.slurm +script/Clothing1M.slurm diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6b7ecd1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 HanxunHuangLemonBear + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 3160e8f..ef3eeaf 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,68 @@ -### Code for ICML 2018 paper "Dimensionality-Driven Learning with Noisy Labels". +# Normalized Loss Functions - Active Passive Losses +Code for ICML2020 Paper ["Normalized Loss Functions for Deep Learning with Noisy Labels"](https://arxiv.org/abs/2006.13554) -#### - Update (2018.07): Issues fixed on CIFAR-10. -#### - Update (2019.10): Start training with symmetric cross entropy (SCE) loss (replacing cross entropy). +## Requirements +```console +Python >= 3.6, PyTorch >= 1.3.1, torchvision >= 0.4.1, mlconfig +``` -The Symmetric Cross Entropy (SCE) was demonstrated can improve several exisiting methods including the D2L: -ICCV2019 "Symmetric Cross Entropy for Robust Learning with Noisy Labels" -https://arxiv.org/abs/1908.06112 -https://github.com/YisenWang/symmetric_cross_entropy_for_noisy_labels +## How To Run +##### Configs for the experiment settings +Check '*.yaml' file in the config folder for each experiment. -#### - Update (2020.03): convergence issue on CIFAR-100 when using SCE loss: learning rate, data augmentation and parameters for SCE. +##### Arguments +* noise_rate: noise rate +* asym: use if it is asymmetric noise, default is symmetric +* config_path: path to the configs folder +* version: the config file name +* exp_name: name of the experiments (as note) +* seed: random seed +Example for 0.4 Symmetric noise rate with NCE+RCE loss +```console +# CIFAR-10 +$ python3 main.py --exp_name test_exp \ + --noise_rate 0.4 \ + --version nce+rce \ + --config_path configs/cifar10/sym \ + --seed 123 + -### 1. Train DNN models using command line: +# CIFAR-100 +$ python3 main.py --exp_name test_exp \ + --noise_rate 0.4 \ + --version nce+rce \ + --config_path configs/cifar100/sym \ + --seed 123 +``` +Example for ploting lid_trend_through_training of 0.4 Symmetric noise rate with D2L Learning +```console +# CIFAR-10 +$ python3 main.py --exp_name test_exp \ + --noise_rate 0.4 \ + --version d2l \ + --config_path configs/cifar10/sym \ + --seed 123 \ + --plot -An example:
+Example for ploting The LID、Accuracy、CSR trend of different learning models throughout of 0.4 Symmetric noise rate +```console +# CIFAR-10 +$ python3 main.py --exp_name test_exp \ + --noise_rate 0.4 \ + --config_path configs/cifar10/sym \ + --seed 123 \ + --plotall -``` -python train_model.py -d mnist -m d2l -e 50 -b 128 -r 40 -``` +## Citing this work +If you use this code in your work, please cite the accompanying paper: -`-d`: dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100']
-`-m`: model in ['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l']
-`-e`: epoch, `-b`: batch size, `-r`: noise rate in [0, 100]
- - -### 2. Run with pre-set parameters in main function of train_model.py: -```python - # mnist example - args = parser.parse_args(['-d', 'mnist', '-m', 'd2l', - '-e', '50', '-b', '128', - '-r', '40']) - main(args) - - # svhn example - args = parser.parse_args(['-d', 'svhn', '-m', 'd2l', - '-e', '50', '-b', '128', - '-r', '40']) - main(args) - - # cifar-10 example - args = parser.parse_args(['-d', 'cifar-10', '-m', 'd2l', - '-e', '120', '-b', '128', - '-r', '40']) - main(args) - - # cifar-100 example - args = parser.parse_args(['-d', 'cifar-100', '-m', 'd2l', - '-e', '200', '-b', '128', - '-r', '40']) - main(args) +``` +@inproceedings{ma2020normalized, + title={Normalized Loss Functions for Deep Learning with Noisy Labels}, + author={Ma, Xingjun and Huang, Hanxun and Wang, Yisen and Romano, Simone and Erfani, Sarah and Bailey, James}, + booktitle={ICML}, + year={2020} +} ``` -#### Requirements: -tensorflow, Keras, numpy, scipy, sklearn, matplotlib diff --git a/archive/dataset.py b/archive/dataset.py new file mode 100644 index 0000000..adbf5b0 --- /dev/null +++ b/archive/dataset.py @@ -0,0 +1,586 @@ +from torchvision import datasets, transforms +from torch.utils.data import DataLoader +from PIL import Image +from tqdm import tqdm +from numpy.testing import assert_array_almost_equal +import numpy as np +import os +import torch +import random +import collections + + +def build_for_cifar100(size, noise): + """ random flip between two random classes. + """ + assert(noise >= 0.) and (noise <= 1.) + + P = (1. - noise) * np.eye(size) + for i in np.arange(size - 1): + P[i, i+1] = noise + + # adjust last row + P[size-1, 0] = noise + + assert_array_almost_equal(P.sum(axis=1), 1, 1) + return P + + +def multiclass_noisify(y, P, random_state=0): + """ Flip classes according to transition probability matrix T. + It expects a number between 0 and the number of classes - 1. + """ + + assert P.shape[0] == P.shape[1] + assert np.max(y) < P.shape[0] + + # row stochastic matrix + assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1])) + assert (P >= 0.0).all() + + m = y.shape[0] + new_y = y.copy() + flipper = np.random.RandomState(random_state) + + for idx in np.arange(m): + i = y[idx] + # draw a vector with only an 1 + flipped = flipper.multinomial(1, P[i, :], 1)[0] + new_y[idx] = np.where(flipped == 1)[0] + + return new_y + + +def other_class(n_classes, current_class): + """ + Returns a list of class indices excluding the class indexed by class_ind + :param nb_classes: number of classes in the task + :param class_ind: the class index to be omitted + :return: one random class that != class_ind + """ + if current_class < 0 or current_class >= n_classes: + error_str = "class_ind must be within the range (0, nb_classes - 1)" + raise ValueError(error_str) + + other_class_list = list(range(n_classes)) + other_class_list.remove(current_class) + other_class = np.random.choice(other_class_list) + return other_class + + +class MNISTNoisy(datasets.MNIST): + def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0): + super(MNISTNoisy, self).__init__(root, transform=transform, target_transform=target_transform, download=download) + self.targets = self.targets.numpy() + if asym: + P = np.eye(10) + n = nosiy_rate + + P[7, 7], P[7, 1] = 1. - n, n + # 2 -> 7 + P[2, 2], P[2, 7] = 1. - n, n + + # 5 <-> 6 + P[5, 5], P[5, 6] = 1. - n, n + P[6, 6], P[6, 5] = 1. - n, n + + # 3 -> 8 + P[3, 3], P[3, 8] = 1. - n, n + + y_train_noisy = multiclass_noisify(self.targets, P=P, random_state=seed) + actual_noise = (y_train_noisy != self.targets).mean() + assert actual_noise > 0.0 + print('Actual noise %.2f' % actual_noise) + self.targets = y_train_noisy + + else: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)] + class_noisy = int(n_noisy / 10) + noisy_idx = [] + for d in range(10): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=10, current_class=self.targets[i]) + print(len(noisy_idx)) + + print("Print noisy label generation statistics:") + for i in range(10): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + + return + + +class cifar10Nosiy(datasets.CIFAR10): + def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False): + super(cifar10Nosiy, self).__init__(root, transform=transform, target_transform=target_transform) + if asym: + # automobile < - truck, bird -> airplane, cat <-> dog, deer -> horse + source_class = [9, 2, 3, 5, 4] + target_class = [1, 0, 5, 3, 7] + for s, t in zip(source_class, target_class): + cls_idx = np.where(np.array(self.targets) == s)[0] + n_noisy = int(nosiy_rate * cls_idx.shape[0]) + noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False) + for idx in noisy_sample_index: + self.targets[idx] = t + return + elif nosiy_rate > 0: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)] + class_noisy = int(n_noisy / 10) + noisy_idx = [] + for d in range(10): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=10, current_class=self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(10): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + return + + +class cifar100Nosiy(datasets.CIFAR100): + def __init__(self, root, train=True, transform=None, target_transform=None, download=False, nosiy_rate=0.0, asym=False, seed=0): + super(cifar100Nosiy, self).__init__(root, download=download, transform=transform, target_transform=target_transform) + if asym: + """mistakes are inside the same superclass of 10 classes, e.g. 'fish' + """ + nb_classes = 100 + P = np.eye(nb_classes) + n = nosiy_rate + nb_superclasses = 20 + nb_subclasses = 5 + + if n > 0.0: + for i in np.arange(nb_superclasses): + init, end = i * nb_subclasses, (i+1) * nb_subclasses + P[init:end, init:end] = build_for_cifar100(nb_subclasses, n) + + y_train_noisy = multiclass_noisify(np.array(self.targets), P=P, random_state=seed) + actual_noise = (y_train_noisy != np.array(self.targets)).mean() + assert actual_noise > 0.0 + print('Actual noise %.2f' % actual_noise) + self.targets = y_train_noisy.tolist() + return + elif nosiy_rate > 0: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(100)] + class_noisy = int(n_noisy / 100) + noisy_idx = [] + for d in range(100): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=100, current_class=self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(100): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + return + + +class DatasetGenerator(): + def __init__(self, + batchSize=128, + eval_batch_size=256, + dataPath='data/', + seed=123, + numOfWorkers=4, + asym=False, + dataset_type='cifar10', + is_cifar100=False, + cutout_length=16, + noise_rate=0.4): + self.seed = seed + np.random.seed(seed) + self.batchSize = batchSize + self.eval_batch_size = eval_batch_size + self.dataPath = dataPath + self.numOfWorkers = numOfWorkers + self.cutout_length = cutout_length + self.noise_rate = noise_rate + self.dataset_type = dataset_type + self.asym = asym + self.data_loaders = self.loadData() + return + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + if self.dataset_type == 'mnist': + MEAN = [0.1307] + STD = [0.3081] + train_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(MEAN, STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(MEAN, STD)]) + + train_dataset = MNISTNoisy(root=self.dataPath, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + seed=self.seed, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.MNIST(root=self.dataPath, + train=False, + transform=test_transform, + download=True) + + elif self.dataset_type == 'cifar100': + CIFAR_MEAN = [0.5071, 0.4865, 0.4409] + CIFAR_STD = [0.2673, 0.2564, 0.2762] + + train_transform = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + train_dataset = cifar100Nosiy(root=self.dataPath, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + seed=self.seed, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.CIFAR100(root=self.dataPath, + train=False, + transform=test_transform, + download=True) + + elif self.dataset_type == 'cifar10': + CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124] + CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] + + train_transform = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + train_dataset = cifar10Nosiy(root=self.dataPath, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.CIFAR10(root=self.dataPath, + train=False, + transform=test_transform, + download=True) + else: + raise("Unknown Dataset") + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.batchSize, + shuffle=True, + pin_memory=True, + num_workers=self.numOfWorkers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.numOfWorkers) + + print("Num of train %d" % (len(train_dataset))) + print("Num of test %d" % (len(test_dataset))) + + return data_loaders + + +class Clothing1MDataset: + def __init__(self, path, type='train', transform=None, target_transform=None): + self.path = path + if type == 'test': + flist = os.path.join(path, "annotations/clean_test.txt") + elif type == 'valid': + flist = os.path.join(path, "annotations/clean_val.txt") + elif type == 'train': + flist = os.path.join(path, "annotations/noisy_train.txt") + else: + raise('Unknown type') + + self.imlist = self.flist_reader(flist) + self.transform = transform + + def __len__(self): + return len(self.imlist) + + def __getitem__(self, index): + impath, target = self.imlist[index] + img = Image.open(impath).convert("RGB") + if self.transform is not None: + img = self.transform(img) + return img, target + + def flist_reader(self, flist): + imlist = [] + with open(flist, 'r') as rf: + for line in rf.readlines(): + row = line.split(" ") + impath = self.path + row[0] + imlabel = row[1] + imlist.append((impath, int(imlabel))) + return imlist + + +class Clothing1MDatasetLoader: + def __init__(self, batchSize=128, eval_batch_size=256, dataPath='data/', numOfWorkers=4): + self.batchSize = batchSize + self.eval_batch_size = eval_batch_size + self.dataPath = dataPath + self.numOfWorkers = numOfWorkers + self.data_loaders = self.loadData() + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + MEAN = [0.6959, 0.6537, 0.6371] + STD = [0.3113, 0.3192, 0.3214] + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=MEAN, std=STD), + ]) + test_transform = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=MEAN, std=STD) + ]) + + train_dataset = Clothing1MDataset(path=self.dataPath, + type='train', + transform=train_transform) + + test_dataset = Clothing1MDataset(path=self.dataPath, + type='test', + transform=test_transform) + + valid_dataset = Clothing1MDataset(path=self.dataPath, + type='valid', + transform=test_transform) + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.batchSize, + shuffle=True, + pin_memory=True, + num_workers=self.numOfWorkers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.numOfWorkers) + + data_loaders['valid_dataset'] = DataLoader(dataset=valid_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.numOfWorkers) + return data_loaders + + +class NosieImageNet(datasets.ImageNet): + def __init__(self, root, split='train', seed=999, download=None, target_class_num=200, nosiy_rate=0.4, **kwargs): + super(NosieImageNet, self).__init__(root, download=download, split=split, **kwargs) + random.seed(seed) + np.random.seed(seed) + self.new_idx = random.sample(list(range(0, 1000)), k=target_class_num) + print(len(self.new_idx), len(self.imgs)) + self.new_imgs = [] + self.new_targets = [] + + for file, cls_id in self.imgs: + if cls_id in self.new_idx: + new_idx = self.new_idx.index(cls_id) + self.new_imgs.append((file, new_idx)) + self.new_targets.append(new_idx) + self.imgs = self.new_imgs + self.targets = self.new_targets + print(min(self.targets), max(self.targets)) + # Noise + if split == 'train': + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(target_class_num)] + class_noisy = int(n_noisy / target_class_num) + noisy_idx = [] + for d in range(target_class_num): + print(len(class_index[d]), d) + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=target_class_num, current_class=self.targets[i]) + (file, old_idx) = self.imgs[i] + self.imgs[i] = (file, self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(target_class_num): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + + self.samples = self.imgs + + +class ImageNetDatasetLoader: + def __init__(self, + batchSize=128, + eval_batch_size=256, + dataPath='data/', + seed=999, + target_class_num=200, + nosiy_rate=0.4, + numOfWorkers=4): + self.batchSize = batchSize + self.eval_batch_size = eval_batch_size + self.dataPath = dataPath + self.numOfWorkers = numOfWorkers + self.seed = seed + self.target_class_num = target_class_num + self.nosiy_rate = nosiy_rate + self.data_loaders = self.loadData() + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + IMAGENET_MEAN = [0.485, 0.456, 0.406] + IMAGENET_STD = [0.229, 0.224, 0.225] + + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.2), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + train_dataset = NosieImageNet(root=self.dataPath, + split='train', + nosiy_rate=self.nosiy_rate, + target_class_num=self.target_class_num, + seed=self.seed, + transform=train_transform, + download=True) + + test_dataset = NosieImageNet(root=self.dataPath, + split='val', + nosiy_rate=self.nosiy_rate, + target_class_num=self.target_class_num, + seed=self.seed, + transform=test_transform, + download=True) + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.batchSize, + shuffle=True, + pin_memory=True, + num_workers=self.numOfWorkers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.batchSize, + shuffle=False, + pin_memory=True, + num_workers=self.numOfWorkers) + return data_loaders + + + + + + +def online_mean_and_sd(loader): + """Compute the mean and sd in an online fashion + + Var[x] = E[X^2] - E^2[X] + """ + cnt = 0 + fst_moment = torch.empty(3) + snd_moment = torch.empty(3) + + for data, _ in tqdm(loader): + + b, c, h, w = data.shape + nb_pixels = b * h * w + sum_ = torch.sum(data, dim=[0, 2, 3]) + sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3]) + fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels) + snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels) + + cnt += nb_pixels + + return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2) + + +if __name__ == '__main__': + # train_transform = transforms.Compose([ + # transforms.Resize((224, 224)), + # transforms.ToTensor(), + # ]) + # test = Clothing1MDataset(path='../datasets/clothing1M', transform=train_transform) + # loader = DataLoader(test, + # batch_size=128, + # num_workers=12, + # shuffle=True) + # mean, std = online_mean_and_sd(loader) + # print(mean) + # print(std) + # + # ''' + # tensor([0.7215, 0.6846, 0.6679]) + # tensor([0.3021, 0.3122, 0.3167]) + # ''' + train = NosieImageNet(root='../datasets/ILSVR2012', split='train') + valid = NosieImageNet(root='../datasets/ILSVR2012', split='val') diff --git a/archive/loss.py b/archive/loss.py new file mode 100644 index 0000000..3a83488 --- /dev/null +++ b/archive/loss.py @@ -0,0 +1,495 @@ +import torch +import torch.nn.functional as F +import numpy as np + +if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = True + if torch.cuda.device_count() > 1: + device = torch.device('cuda:0') + else: + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +class SCELoss(torch.nn.Module): + def __init__(self, alpha, beta, num_classes=10): + super(SCELoss, self).__init__() + self.device = device + self.alpha = alpha + self.beta = beta + self.num_classes = num_classes + self.cross_entropy = torch.nn.CrossEntropyLoss() + + def forward(self, pred, labels): + # CCE + ce = self.cross_entropy(pred, labels) + + # RCE + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + + # Loss + loss = self.alpha * ce + self.beta * rce.mean() + return loss + + +class ReverseCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(ReverseCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * rce.mean() + normalizor = 1 / 4 * (self.num_classes - 1) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * normalizor * rce.mean() + + +class NormalizedReverseCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedReverseCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + normalizor = 1 / 4 * (self.num_classes - 1) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * normalizor * rce.mean() + + +class NormalizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.log_softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1)) + return self.scale * nce.mean() + + +class GeneralizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, q=0.7): + super(GeneralizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.q = q + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + gce = (1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)) / self.q + return gce.mean() + + +class NormalizedGeneralizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0, q=0.7): + super(NormalizedGeneralizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.q = q + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + numerators = 1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q) + denominators = self.num_classes - pred.pow(self.q).sum(dim=1) + ngce = numerators / denominators + return self.scale * ngce.mean() + + +class MeanAbsoluteError(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(MeanAbsoluteError, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + return + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + mae = 1. - torch.sum(label_one_hot * pred, dim=1) + return self.scale * mae.mean() + + +class NormalizedMeanAbsoluteError(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedMeanAbsoluteError, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + return + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + normalizor = 1 / (2 * (self.num_classes - 1)) + mae = 1. - torch.sum(label_one_hot * pred, dim=1) + return self.scale * normalizor * mae.mean() + + +class NCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(NCEandRCE, self).__init__() + self.num_classes = num_classes + self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nce(pred, labels) + self.rce(pred, labels) + + +class NCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(NCEandMAE, self).__init__() + self.num_classes = num_classes + self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nce(pred, labels) + self.mae(pred, labels) + + +class GCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandMAE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.mae(pred, labels) + + +class GCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandRCE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.rce(pred, labels) + + +class GCEandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandNCE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.nce = NormalizedCrossEntropy(num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.nce(pred, labels) + + +class NGCEandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandNCE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.nce(pred, labels) + + +class NGCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandMAE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.mae(pred, labels) + + +class NGCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandRCE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.rce(pred, labels) + + +class MAEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(MAEandRCE, self).__init__() + self.num_classes = num_classes + self.mae = MeanAbsoluteError(scale=alpha, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.mae(pred, labels) + self.rce(pred, labels) + + +class NLNL(torch.nn.Module): + def __init__(self, train_loader, num_classes, ln_neg=1): + super(NLNL, self).__init__() + self.device = device + self.num_classes = num_classes + self.ln_neg = ln_neg + weight = torch.FloatTensor(num_classes).zero_() + 1. + if not hasattr(train_loader.dataset, 'targets'): + weight = [1] * num_classes + weight = torch.FloatTensor(weight) + else: + for i in range(num_classes): + weight[i] = (torch.from_numpy(np.array(train_loader.dataset.targets)) == i).sum() + weight = 1 / (weight / weight.max()) + self.weight = weight.to(self.device) + self.criterion = torch.nn.CrossEntropyLoss(weight=self.weight) + self.criterion_nll = torch.nn.NLLLoss() + + def forward(self, pred, labels): + labels_neg = (labels.unsqueeze(-1).repeat(1, self.ln_neg) + + torch.LongTensor(len(labels), self.ln_neg).to(self.device).random_(1, self.num_classes)) % self.num_classes + labels_neg = torch.autograd.Variable(labels_neg) + + assert labels_neg.max() <= self.num_classes-1 + assert labels_neg.min() >= 0 + assert (labels_neg != labels.unsqueeze(-1).repeat(1, self.ln_neg)).sum() == len(labels)*self.ln_neg + + s_neg = torch.log(torch.clamp(1. - F.softmax(pred, 1), min=1e-5, max=1.)) + s_neg *= self.weight[labels].unsqueeze(-1).expand(s_neg.size()).to(self.device) + labels = labels * 0 - 100 + loss = self.criterion(pred, labels) * float((labels >= 0).sum()) + loss_neg = self.criterion_nll(s_neg.repeat(self.ln_neg, 1), labels_neg.t().contiguous().view(-1)) * float((labels_neg >= 0).sum()) + loss = ((loss+loss_neg) / (float((labels >= 0).sum())+float((labels_neg[:, 0] >= 0).sum()))) + return loss + + +class FocalLoss(torch.nn.Module): + ''' + https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py + ''' + + def __init__(self, gamma=0, alpha=None, size_average=True): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + if isinstance(alpha, (float, int)): + self.alpha = torch.Tensor([alpha, 1-alpha]) + if isinstance(alpha, list): + self.alpha = torch.Tensor(alpha) + self.size_average = size_average + + def forward(self, input, target): + if input.dim() > 2: + input = input.view(input.size(0), input.size(1), -1) # N,C,H,W => N,C,H*W + input = input.transpose(1, 2) # N,C,H*W => N,H*W,C + input = input.contiguous().view(-1, input.size(2)) # N,H*W,C => N*H*W,C + target = target.view(-1, 1) + + logpt = F.log_softmax(input, dim=1) + logpt = logpt.gather(1, target) + logpt = logpt.view(-1) + pt = torch.autograd.Variable(logpt.data.exp()) + + if self.alpha is not None: + if self.alpha.type() != input.data.type(): + self.alpha = self.alpha.type_as(input.data) + at = self.alpha.gather(0, target.data.view(-1)) + logpt = logpt * torch.autograd.Variable(at) + + loss = -1 * (1-pt)**self.gamma * logpt + if self.size_average: + return loss.mean() + else: + return loss.sum() + + +class NormalizedFocalLoss(torch.nn.Module): + def __init__(self, scale=1.0, gamma=0, num_classes=10, alpha=None, size_average=True): + super(NormalizedFocalLoss, self).__init__() + self.gamma = gamma + self.size_average = size_average + self.num_classes = num_classes + self.scale = scale + + def forward(self, input, target): + target = target.view(-1, 1) + logpt = F.log_softmax(input, dim=1) + normalizor = torch.sum(-1 * (1 - logpt.data.exp()) ** self.gamma * logpt, dim=1) + logpt = logpt.gather(1, target) + logpt = logpt.view(-1) + pt = torch.autograd.Variable(logpt.data.exp()) + loss = -1 * (1-pt)**self.gamma * logpt + loss = self.scale * loss / normalizor + + if self.size_average: + return loss.mean() + else: + return loss.sum() + + +class NFLandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandNCE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.nce(pred, labels) + + +class NFLandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandMAE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.mae(pred, labels) + + +class NFLandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandRCE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.rce(pred, labels) + + +class DMILoss(torch.nn.Module): + def __init__(self, num_classes): + super(DMILoss, self).__init__() + self.num_classes = num_classes + + def forward(self, output, target): + outputs = F.softmax(output, dim=1) + targets = target.reshape(target.size(0), 1).cpu() + y_onehot = torch.FloatTensor(target.size(0), self.num_classes).zero_() + y_onehot.scatter_(1, targets, 1) + y_onehot = y_onehot.transpose(0, 1).cuda() + mat = y_onehot @ outputs + return -1.0 * torch.log(torch.abs(torch.det(mat.float())) + 0.001) + + +class BootSoftLoss(torch.nn.Module): + def __init__(self, num_classes, beta=0.95): + super(BootSoftLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.beta = beta + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + bsl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred) * torch.log(pred), dim=1) + return bsl.mean() + + +class BootHardLoss(torch.nn.Module): + def __init__(self, num_classes, beta=0.8): + super(BootSoftLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.beta = beta + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + pred_one_hot = F.one_hot(torch.argmax(pred, dim=1),self.num_classes) + bhl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred_one_hot) * torch.log(pred), dim=1) + return bhl.mean() + + +class ForwardLoss(torch.nn.Module): + def __init__(self, num_classes, noise_rate): + super(ForwardLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.noise_rate = noise_rate + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.) + P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes)) + P.diagonal().fill_(1-self.noise_rate) + P = P.to(self.device) + loss = -torch.sum(label_one_hot * torch.log(torch.matmul(pred, P)), dim=-1) + return loss.mean() + +class BackwardLoss(torch.nn.Module): + def __init__(self, num_classes, noise_rate): + super(BackwardLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.noise_rate = noise_rate + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.) + P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes)) + P.diagonal().fill_(1-self.noise_rate) + P = P.to(self.device) + P_inv = torch.inverse(P) + loss=-torch.sum((torch.matmul(label_one_hot, P_inv)) * torch.log(pred), dim=-1) + return loss.mean() + + +class LIDPacedLoss(torch.nn.Module): + def __init__(self, num_classes, alpha, beta1, beta2): + super(LIDPacedLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.alpha = alpha + self.beta1 = beta1 + self.beta2 = beta2 + self.sce = SCELoss(alpha=beta1, beta=beta2, num_classes=num_classes) + + def forward(self, pred, labels): + if self.alpha == 1.0: + return self.sce(pred, labels) + else: + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + pred_labels = F.one_hot(torch.argmax(pred, dim=1), num_classes=label_one_hot.size(1)) + y_new = self.alpha * label_one_hot + (1. - self.alpha) * pred_labels + loss = -torch.sum(y_new * torch.log(pred), dim=-1) + return loss.mean() \ No newline at end of file diff --git a/archive/model.py b/archive/model.py new file mode 100644 index 0000000..d39265e --- /dev/null +++ b/archive/model.py @@ -0,0 +1,197 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class ConvBrunch(nn.Module): + def __init__(self, in_planes, out_planes, kernel_size=3): + super(ConvBrunch, self).__init__() + padding = (kernel_size - 1) // 2 + self.out_conv = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding), + nn.BatchNorm2d(out_planes), + nn.ReLU()) + + def forward(self, x): + return self.out_conv(x) + + +class SCEModel(nn.Module): + def __init__(self, type='cifar10'): + super(SCEModel, self).__init__() + self.type = type + if type == 'cifar10': + self.block1 = nn.Sequential( + ConvBrunch(3, 64, 3), + ConvBrunch(64, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block2 = nn.Sequential( + ConvBrunch(64, 128, 3), + ConvBrunch(128, 128, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block3 = nn.Sequential( + ConvBrunch(128, 196, 3), + ConvBrunch(196, 196, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc1 = nn.Sequential( + nn.Linear(3136, 256), + nn.BatchNorm1d(256), + nn.ReLU()) + self.fc2 = nn.Linear(256, 10) + self.fc_size = 3136 + elif type == 'mnist': + self.block1 = nn.Sequential( + ConvBrunch(1, 32, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block2 = nn.Sequential( + ConvBrunch(32, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc1 = nn.Sequential( + nn.Linear(64*7*7, 128), + nn.BatchNorm1d(128), + nn.ReLU()) + self.fc2 = nn.Linear(128, 10) + self.fc_size = 64*7*7 + self._reset_prams() + + def _reset_prams(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + return + + def forward(self, x): + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) if self.type == 'cifar10' else x + # x = self.global_avg_pool(x) + x = x.view(-1, self.fc_size) + x = self.fc1(x) + x = self.fc2(x) + return x + + +'''ResNet in PyTorch. +For Pre-activation ResNet, see 'preact_resnet.py'. +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + self._reset_prams() + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + def _reset_prams(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + return + + +def ResNet18(num_classes=10): + return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes) + + +def ResNet34(num_classes=10): + return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes) + + +def ResNet50(num_classes=10): + return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes) + + +def ResNet101(num_classes=10): + return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes) + + +def ResNet152(num_classes=10): + return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes) diff --git a/archive/train.py b/archive/train.py new file mode 100644 index 0000000..13bafc7 --- /dev/null +++ b/archive/train.py @@ -0,0 +1,382 @@ +import argparse +import torch +import time +import os +import collections +import pickle +import logging +import torchvision +from tqdm import tqdm +from model import SCEModel, ResNet34 +from dataset import DatasetGenerator, Clothing1MDatasetLoader, ImageNetDatasetLoader +from utils.utils import AverageMeter, accuracy, count_parameters_in_MB +from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR +from train_util import TrainUtil +from loss import * + +# ArgParse +parser = argparse.ArgumentParser(description='RobustLoss') +parser.add_argument('--lr', type=float, default=0.01) +parser.add_argument('--l2_reg', type=float, default=1e-4) +parser.add_argument('--grad_bound', type=float, default=5.0) +parser.add_argument('--train_log_every', type=int, default=100) +parser.add_argument('--resume', action='store_true', default=False) +parser.add_argument('--batch_size', type=int, default=128) +parser.add_argument('--data_path', default='data', type=str) +parser.add_argument('--checkpoint_path', default='checkpoints/cifar10/', type=str) +parser.add_argument('--data_nums_workers', type=int, default=4) +parser.add_argument('--epoch', type=int, default=150) +parser.add_argument('--nr', type=float, default=0.4, help='noise_rate') +parser.add_argument('--loss', type=str, default='SCE', help='SCE, CE, NCE, MAE, RCE') +parser.add_argument('--alpha', type=float, default=1.0, help='alpha scale') +parser.add_argument('--beta', type=float, default=1.0, help='beta scale') +parser.add_argument('--q', type=float, default=0.7, help='q for gce') +parser.add_argument('--gamma', type=float, default=2, help='gamma for FocalLoss') +parser.add_argument('--dataset_type', choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet'], type=str, default='cifar10') +parser.add_argument('--scale_exp', action='store_true', default=False) +parser.add_argument('--alpha_beta_exp', action='store_true', default=False) +parser.add_argument('--version', type=str, default='robust_loss') +parser.add_argument('--run_version', type=str, default='run1') +parser.add_argument('--asym', action='store_true', default=False) +parser.add_argument('--seed', type=int, default=123) +args = parser.parse_args() + +if args.dataset_type == 'cifar100': + args.checkpoint_path = 'checkpoints/cifar100/' + log_dataset_type = 'cifar100' +elif args.dataset_type == 'cifar10': + args.checkpoint_path = 'checkpoints/cifar10/' + log_dataset_type = 'cifar10' +elif args.dataset_type == 'mnist': + args.checkpoint_path = 'checkpoints/mnist/' + log_dataset_type = 'mnist' +elif args.dataset_type == 'clothing1m': + args.checkpoint_path = 'checkpoints/clothing1m/' + log_dataset_type = 'clothing1m' +elif args.dataset_type == 'imagenet': + args.checkpoint_path = 'checkpoints/ILSVR2012/' + log_dataset_type = 'imagenet' +else: + raise('Unknown Dataset') + +log_sym_type = '' +if args.dataset_type == 'clothing1m': + log_dataset_type = 'clothing1m' +elif args.dataset_type == 'imagenet': + log_dataset_type = 'imagenet' +elif not args.dataset_type == 'clothing1m': + args.version = str(args.nr) + 'nr_' + args.loss.lower() + if args.scale_exp: + args.version += '_scale_' + str(args.alpha) + elif args.alpha_beta_exp: + args.version += '_ab_' + str(args.alpha) + '_' + str(args.beta) + if args.asym: + log_sym_type = 'asym' + args.version += '_asym' + args.checkpoint_path += 'asym/' + args.run_version + '/' + else: + log_sym_type = 'sym' + args.checkpoint_path += 'sym/' + args.run_version + '/' + + +if not os.path.exists(args.checkpoint_path): + os.makedirs(args.checkpoint_path) +if not os.path.exists(os.path.join('logs', log_dataset_type, log_sym_type, args.run_version)): + os.makedirs(os.path.join('logs', log_dataset_type, log_sym_type, args.run_version)) + + +def setup_logger(name, log_file, level=logging.INFO): + """To setup as many loggers as you want""" + formatter = logging.Formatter('%(asctime)s %(message)s') + handler = logging.FileHandler(log_file) + handler.setFormatter(formatter) + + logger = logging.getLogger(name) + logger.setLevel(level) + logger.addHandler(handler) + + return logger + + +log_file_name = os.path.join('logs', log_dataset_type, log_sym_type, args.run_version, args.version) +logger = setup_logger(name=args.version, log_file=log_file_name + ".log") +GLOBAL_STEP, EVAL_STEP, EVAL_BEST_ACC = 0, 0, 0 +TRAIN_HISTORY = collections.defaultdict(list) +torch.manual_seed(args.seed) + +if torch.cuda.is_available(): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + torch.backends.cudnn.deterministic = True + if torch.cuda.device_count() > 1: + device = torch.device('cuda:0') + else: + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +def log_display(epoch, global_step, time_elapse, **kwargs): + display = 'epoch=' + str(epoch) + \ + '\tglobal_step=' + str(global_step) + for key, value in kwargs.items(): + display += '\t' + str(key) + '=%.5f' % value + display += '\ttime=%.2fit/s' % (1. / time_elapse) + return display + + +def model_eval(epoch, fixed_cnn, data_loader, dataset_type='test_dataset'): + global EVAL_STEP + fixed_cnn.eval() + valid_loss_meters = AverageMeter() + valid_acc_meters = AverageMeter() + valid_acc5_meters = AverageMeter() + ce_loss = torch.nn.CrossEntropyLoss() + + for images, labels in tqdm(data_loader[dataset_type]): + start = time.time() + images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True) + with torch.no_grad(): + pred = fixed_cnn(images) + loss = ce_loss(pred, labels) + acc, acc5 = accuracy(pred, labels, topk=(1, 5)) + + valid_loss_meters.update(loss.item(), labels.shape[0]) + valid_acc_meters.update(acc.item(), labels.shape[0]) + valid_acc5_meters.update(acc5.item(), labels.shape[0]) + end = time.time() + + EVAL_STEP += 1 + if EVAL_STEP % args.train_log_every == 0: + display = log_display(epoch=epoch, + global_step=GLOBAL_STEP, + time_elapse=end-start, + loss=loss.item(), + test_loss_avg=valid_loss_meters.avg, + acc=acc.item(), + test_acc_avg=valid_acc_meters.avg, + test_acc_top5_avg=valid_acc5_meters.avg) + logger.info(display) + display = log_display(epoch=epoch, + global_step=GLOBAL_STEP, + time_elapse=end-start, + loss=loss.item(), + test_loss_avg=valid_loss_meters.avg, + acc=acc.item(), + test_acc_avg=valid_acc_meters.avg, + test_acc_top5_avg=valid_acc5_meters.avg) + logger.info(display) + return valid_acc_meters.avg, valid_acc5_meters.avg + + +def train_fixed(starting_epoch, data_loader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper): + global GLOBAL_STEP, reduction_arc, cell_arc, EVAL_BEST_ACC, EVAL_STEP, TRAIN_HISTORY + + for epoch in tqdm(range(starting_epoch, args.epoch)): + logger.info("=" * 20 + "Training" + "=" * 20) + fixed_cnn.train() + train_loss_meters = AverageMeter() + train_acc_meters = AverageMeter() + train_acc5_meters = AverageMeter() + + for images, labels in tqdm(data_loader["train_dataset"]): + images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True) + start = time.time() + fixed_cnn.zero_grad() + fixed_cnn_optmizer.zero_grad() + pred = fixed_cnn(images) + loss = criterion(pred, labels) + loss.backward() + grad_norm = torch.nn.utils.clip_grad_norm_(fixed_cnn.parameters(), args.grad_bound) + fixed_cnn_optmizer.step() + acc, acc5 = accuracy(pred, labels, topk=(1, 5)) + + train_loss_meters.update(loss.item(), labels.shape[0]) + train_acc_meters.update(acc.item(), labels.shape[0]) + train_acc5_meters.update(acc5.item(), labels.shape[0]) + + end = time.time() + + GLOBAL_STEP += 1 + if GLOBAL_STEP % args.train_log_every == 0: + lr = fixed_cnn_optmizer.param_groups[0]['lr'] + display = log_display(epoch=epoch, + global_step=GLOBAL_STEP, + time_elapse=end-start, + loss=loss.item(), + loss_avg=train_loss_meters.avg, + acc=acc.item(), + acc_top1_avg=train_acc_meters.avg, + acc_top5_avg=train_acc5_meters.avg, + lr=lr, + gn=grad_norm) + logger.info(display) + if fixed_cnn_scheduler is not None: + fixed_cnn_scheduler.step() + logger.info("="*20 + "Eval" + "="*20) + curr_acc, _ = model_eval(epoch, fixed_cnn, data_loader) + logger.info("curr_acc\t%.4f" % curr_acc) + logger.info("BEST_ACC\t%.4f" % EVAL_BEST_ACC) + payload = '=' * 10 + '\n' + payload = payload + ("curr_acc: %.4f\n best_acc: %.4f\n" % (curr_acc, EVAL_BEST_ACC)) + EVAL_BEST_ACC = max(curr_acc, EVAL_BEST_ACC) + TRAIN_HISTORY["train_loss"].append(train_loss_meters.avg) + TRAIN_HISTORY["train_acc"].append(train_acc_meters.avg) + TRAIN_HISTORY["test_acc"].append(curr_acc) + TRAIN_HISTORY["test_acc_best"] = [EVAL_BEST_ACC] + with open(args.checkpoint_path + args.version + '.pickle', 'wb') as handle: + pickle.dump(TRAIN_HISTORY, handle, protocol=pickle.HIGHEST_PROTOCOL) + logger.info("Saved!\n") + return + + +def train(): + # Dataset + if args.dataset_type == 'clothing1m': + dataset = Clothing1MDatasetLoader(batchSize=args.batch_size, + dataPath=args.data_path, + numOfWorkers=args.data_nums_workers) + elif args.dataset_type == 'imagenet': + dataset = ImageNetDatasetLoader(batchSize=args.batch_size, + dataPath=args.data_path, + seed=args.seed, + target_class_num=200, + nosiy_rate=0.4, + numOfWorkers=args.data_nums_workers) + else: + dataset = DatasetGenerator(batchSize=args.batch_size, + dataPath=args.data_path, + numOfWorkers=args.data_nums_workers, + noise_rate=args.nr, + asym=args.asym, + seed=args.seed, + dataset_type=args.dataset_type) + + dataLoader = dataset.getDataLoader() + eta_min = 0 + ln_neg = 1 + + if args.dataset_type == 'clothing1m': + # Train Clothing1M + args.epoch = 20 + args.l2_reg = 1e-3 + num_classes = 14 + fixed_cnn = torchvision.models.resnet50(num_classes=14) + # fixed_cnn.fc = torch.nn.Linear(2048, 14) + + elif args.dataset_type == 'cifar100': + # Train CIFAR100 + args.lr = 0.1 + args.epoch = 200 + num_classes = 100 + fixed_cnn = ResNet34(num_classes=num_classes) + + # NLNL + if args.loss == 'NLNL': + args.epoch = 2000 + ln_neg = 110 + + elif args.dataset_type == 'cifar10': + # Train CIFAR10 + args.epoch = 120 + num_classes = 10 + fixed_cnn = SCEModel(type='cifar10') + + # NLNL + if args.loss == 'NLNL': + args.epoch = 1000 + + elif args.dataset_type == 'mnist': + # Train mnist + args.epoch = 50 + num_classes = 10 + fixed_cnn = SCEModel(type='mnist') + eta_min = 0.001 + args.l2_reg = 1e-3 + # NLNL + if args.loss == 'NLNL': + args.epoch = 720 + + elif args.dataset_type == 'imagenet': + args.epoch = 100 + args.l2_reg = 3e-5 + num_classes = 200 + fixed_cnn = torchvision.models.resnet50(num_classes=num_classes) + + logger.info("num_classes: %s" % (num_classes)) + + loss_options = { + 'SCE': SCELoss(alpha=args.alpha, beta=args.beta, num_classes=num_classes), + 'CE': torch.nn.CrossEntropyLoss(), + 'NCE': NormalizedCrossEntropy(scale=args.alpha, num_classes=num_classes), + 'MAE': MeanAbsoluteError(scale=args.alpha, num_classes=num_classes), + 'NMAE': NormalizedMeanAbsoluteError(scale=args.alpha, num_classes=num_classes), + 'GCE': GeneralizedCrossEntropy(num_classes=num_classes, q=args.q), + 'RCE': ReverseCrossEntropy(scale=args.alpha, num_classes=num_classes), + 'NRCE': NormalizedReverseCrossEntropy(scale=args.alpha, num_classes=num_classes), + 'NGCE': NormalizedGeneralizedCrossEntropy(scale=args.alpha, num_classes=num_classes, q=args.q), + 'NCEandRCE': NCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes), + 'NCEandMAE': NCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes), + 'GCEandMAE': GCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'GCEandRCE': GCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'GCEandNCE': GCEandNCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'MAEandRCE': MAEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes), + 'NGCEandNCE': NGCEandNCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'NGCEandMAE': NGCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'NGCEandRCE': NGCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q), + 'FocalLoss': FocalLoss(gamma=args.gamma), + 'NFL': NormalizedFocalLoss(scale=args.alpha, gamma=args.gamma, num_classes=num_classes), + 'NLNL': NLNL(num_classes=num_classes, train_loader=dataLoader['train_dataset'], ln_neg=ln_neg), + 'NFLandNCE': NFLandNCE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes), + 'NFLandMAE': NFLandMAE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes), + 'NFLandRCE': NFLandRCE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes), + 'DMI': DMILoss(num_classes=num_classes) + } + + if args.loss in loss_options: + criterion = loss_options[args.loss] + else: + raise("Unknown loss") + + logger.info(criterion.__class__.__name__) + logger.info("Number of Trainable Parameters %.4f" % count_parameters_in_MB(fixed_cnn)) + + fixed_cnn.to(device) + + if args.loss == 'DMI': + criterion = loss_options['CE'] + + fixed_cnn_optmizer = torch.optim.SGD(params=fixed_cnn.parameters(), + lr=args.lr, + momentum=0.9, + weight_decay=args.l2_reg) + + fixed_cnn_scheduler = CosineAnnealingLR(fixed_cnn_optmizer, + float(args.epoch), + eta_min=eta_min) + if args.dataset_type == 'clothing1m': + fixed_cnn_scheduler = MultiStepLR(fixed_cnn_optmizer, milestones=[5, 10], gamma=0.1) + elif args.dataset_type == 'imagenet': + fixed_cnn_scheduler = MultiStepLR(fixed_cnn_optmizer, milestones=[30, 60, 80], gamma=0.1) + + utilHelper = TrainUtil(checkpoint_path=args.checkpoint_path, version=args.version) + starting_epoch = 0 + + for arg in vars(args): + logger.info("%s: %s" % (arg, getattr(args, arg))) + + train_fixed(starting_epoch, dataLoader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper) + + if args.loss == 'DMI': + criterion = loss_options['DMI'] + fixed_cnn_optmizer = torch.optim.SGD(params=fixed_cnn.parameters(), + lr=1e-6, + momentum=0.9, + weight_decay=args.l2_reg) + starting_epoch = 0 + fixed_cnn_scheduler = None + train_fixed(starting_epoch, dataLoader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper) + + +if __name__ == '__main__': + train() diff --git a/archive/train_util.py b/archive/train_util.py new file mode 100644 index 0000000..9484945 --- /dev/null +++ b/archive/train_util.py @@ -0,0 +1,106 @@ +import torch +import os +import pickle + +if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = True + if torch.cuda.device_count() > 1: + device = torch.device('cuda:0') + else: + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +class TrainUtil(): + def __init__(self, checkpoint_path='checkpoints', version='mcts_nas_net_v1'): + self.checkpoint_path = checkpoint_path + self.version = version + return + + def save_model_fixed(self, epoch, fixed_cnn, fixed_cnn_optmizer, save_best=False, **kwargs): + filename = os.path.join(self.checkpoint_path, self.version) + '.pth' + # Torch Save State Dict + state = { + 'epoch': epoch+1, + 'shared_cnn': fixed_cnn.state_dict(), + 'shared_cnn_optmizer': fixed_cnn_optmizer.state_dict(), + } + for key, value in kwargs.items(): + state[key] = value + torch.save(state, filename) + filename = os.path.join(self.checkpoint_path, self.version) + '_best.pth' + if save_best: + torch.save(state, filename) + return + + def load_model_fixed(self, fixed_cnn, fixed_cnn_optmizer, **kwargs): + filename = os.path.join(self.checkpoint_path, self.version) + '.pth' + # Load Torch State Dict + checkpoints = torch.load(filename) + fixed_cnn.load_state_dict(checkpoints['fixed_cnn']) + fixed_cnn_optmizer.load_state_dict(checkpoints['fixed_cnn_optmizer']) + print(filename + " Loaded!") + return checkpoints + + def save_model(self, + mcts, + shared_cnn, + shared_cnn_optmizer, + shared_cnn_schduler, + estimator, + estimator_optmizer, + epoch, + **kwargs): + mcts_filename = os.path.join(self.checkpoint_path, self.version) + '_mcts' + '.pkl' + filename = os.path.join(self.checkpoint_path, self.version) + '.pth' + + # Torch Save State Dict + state = { + 'epoch': epoch+1, + 'shared_cnn': shared_cnn.state_dict(), + 'shared_cnn_optmizer': shared_cnn_optmizer.state_dict(), + 'shared_cnn_schduler': shared_cnn_schduler.state_dict(), + 'estimator': estimator.state_dict(), + 'estimator_optmizer': estimator_optmizer.state_dict() + } + for key, value in kwargs.items(): + state[key] = value + torch.save(state, filename) + print(filename + " saved!") + + # Save MCTS to pickle + rolloutPolicy, searchPolicy = mcts.rollout, mcts.searchPolicy + mcts.rollout, mcts.searchPolicy = None, None + with open(mcts_filename, 'wb') as handle: + pickle.dump(mcts, handle, protocol=pickle.HIGHEST_PROTOCOL) + print(mcts_filename + " Saved!") + mcts.rollout, mcts.searchPolicy = rolloutPolicy, searchPolicy + return + + def load_model(self, + shared_cnn, + shared_cnn_optmizer, + shared_cnn_schduler, + estimator, + estimator_optmizer, + **kwargs): + + filename = os.path.join(self.checkpoint_path, self.version) + '.pth' + mcts_filename = os.path.join(self.checkpoint_path, self.version) + '_mcts' + '.pkl' + + # Load Torch State Dict + checkpoints = torch.load(filename) + shared_cnn.load_state_dict(checkpoints['shared_cnn']) + shared_cnn_optmizer.load_state_dict(checkpoints['shared_cnn_optmizer']) + shared_cnn_schduler.load_state_dict(checkpoints['shared_cnn_schduler']) + shared_cnn_schduler.optimizer = shared_cnn_optmizer + estimator.load_state_dict(checkpoints['estimator']) + estimator_optmizer.load_state_dict(checkpoints['estimator_optmizer']) + print(filename + " Loaded!") + + # Load MCTS + with open(mcts_filename, 'rb') as handle: + mcts = pickle.load(handle) + print(mcts_filename + " Loaded!") + return checkpoints, mcts diff --git a/archive/utils/__init__.py b/archive/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/archive/utils/utils.py b/archive/utils/utils.py new file mode 100644 index 0000000..b72833e --- /dev/null +++ b/archive/utils/utils.py @@ -0,0 +1,85 @@ +import csv +import sys +import numpy as np + + +class CSVLogger(): + def __init__(self, args, fieldnames, filename='log.csv'): + + self.filename = filename + self.csv_file = open(filename, 'w') + + # Write model configuration at top of csv + writer = csv.writer(self.csv_file) + for arg in vars(args): + writer.writerow([arg, getattr(args, arg)]) + writer.writerow(['']) + + self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) + self.writer.writeheader() + + self.csv_file.flush() + + def writerow(self, row): + self.writer.writerow(row) + self.csv_file.flush() + + def close(self): + self.csv_file.close() + + +class Logger(object): + def __init__(self, filename): + self.terminal = sys.stdout + self.log = open(filename, 'w') + + def write(self, message): + self.terminal.write(message) + self.log.write(message) + self.log.flush() + + def flush(self): + # this flush method is needed for python 3 compatibility. + # this handles the flush command by doing nothing. + # you might want to specify some extra behavior here. + pass + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + self.max = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + self.max = max(self.max, val) + + +def accuracy(output, target, topk=(1,)): + maxk = max(topk) + + batch_size = target.size(0) + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0) + res.append(correct_k.mul_(1/batch_size)) + return res + + +def count_parameters_in_MB(model): + return sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary_head" not in name)/1e6 diff --git a/callback_util.py b/callback_util.py index 8bd8647..2372c85 100644 --- a/callback_util.py +++ b/callback_util.py @@ -1,26 +1,19 @@ import numpy as np -import keras.backend as K -from keras.utils import np_utils -from keras.callbacks import Callback, LearningRateScheduler -from keras.optimizers import SGD +import torch from util import get_lids_random_batch -from loss import cross_entropy, lid_paced_loss -from lass_tf import lass -import tensorflow as tf +import os -class D2LCallback(Callback): - def __init__(self, model, X_train, y_train, dataset, noise_ratio, epochs=150, +class D2LCallback: + def __init__(self, model, data_loader, device, epochs=120, pace_type='d2l', init_epoch=5, epoch_win=5, lid_subset_size=1280, - lid_k=20, verbose=1): + lid_k=20, verbose=1, is_found_turning_point=False): super(D2LCallback, self).__init__() self.validation_data = None self.model = model self.turning_epoch = -1 - self.X_train = X_train - self.y_train = y_train - self.dataset = dataset - self.noise_ratio = noise_ratio + self.data_loader = data_loader + self.device = device self.epochs = epochs self.pace_type = pace_type self.mean_lid = -1. @@ -32,11 +25,11 @@ def __init__(self, model, X_train, y_train, dataset, noise_ratio, epochs=150, self.lid_k = lid_k self.verbose = verbose self.alpha = 1.0 + self.is_found_turning_point = is_found_turning_point def on_epoch_begin(self, epoch, logs={}): - rand_idxes = np.random.choice(self.X_train.shape[0], self.lid_subset_size, replace=False) - lid = np.mean(get_lids_random_batch(self.model, self.X_train[rand_idxes], k=self.lid_k, batch_size=128)) - + lids_tem = get_lids_random_batch(self.model, self.data_loader, self.device, k=20, batch_size=128) + lid = lids_tem.mean().item() self.p_lambda = epoch*1./self.epochs # deal with possible illegal lid value @@ -48,6 +41,7 @@ def on_epoch_begin(self, epoch, logs={}): # find the turning point where to apply lid-paced learning strategy if self.found_turning_point(self.lids): self.update_learning_pace() + self.is_found_turning_point = True if len(self.lids) > 5: print('lid = ..., ', self.lids[-5:]) @@ -56,172 +50,30 @@ def on_epoch_begin(self, epoch, logs={}): if self.verbose > 0: print('--Epoch: %s, LID: %.2f, min LID: %.2f, lid window: %s, turning epoch: %s, lambda: %.2f' % - (epoch, lid, np.min(self.lids), self.epoch_win, self.turning_epoch, self.p_lambda)) + (epoch, lid, min(self.lids), self.epoch_win, self.turning_epoch, self.p_lambda)) return def found_turning_point(self, lids): - if len(lids) > self.init_epoch + self.epoch_win: # - if self.turning_epoch > -1: # if turning point is already found, stop checking + if len(lids) > self.init_epoch + self.epoch_win: + if self.turning_epoch > -1: return True else: smooth_lids = lids[-self.epoch_win-1:-1] - # self.mean_lid = np.mean(smooth_lids) - if lids[-1] - np.mean(smooth_lids) > 2*np.std(smooth_lids): + if lids[-1] - torch.mean(torch.tensor(smooth_lids)) > 2 * torch.std(torch.tensor(smooth_lids)): self.turning_epoch = len(lids) - 2 # rollback model if you want, should be called before checkpoint callback # otherwise need to save two models - min_model_path = 'model/%s_%s_%s.hdf5' % (self.pace_type, - self.dataset, - self.noise_ratio) - self.model.load_weights(min_model_path) return True - else: - return False + return False def update_learning_pace(self): - # # this loss is not working for d2l learning, somehow, why??? - expansion = self.lids[-1] / np.min(self.lids) - self.alpha = np.exp(-self.p_lambda * expansion) + expansion = self.lids[-1] / min(self.lids) + self.alpha = torch.exp(torch.tensor(-self.p_lambda * expansion)).item() # self.alpha = np.exp(-0.1*expansion) print('## Turning epoch: %s, lambda: %.2f, expansion: %.2f, alpha: %.2f' % (self.turning_epoch, self.p_lambda, expansion, self.alpha)) # self.alpha = np.exp(-expansion) - self.model.compile(loss=lid_paced_loss(self.alpha), - optimizer=self.model.optimizer, metrics=['accuracy']) - - -class LoggerCallback(Callback): - """ - Log train/val loss and acc into file for later plots. - """ - def __init__(self, model, X_train, y_train, X_test, y_test, dataset, - model_name, noise_ratio, epochs): - super(LoggerCallback, self).__init__() - self.model = model - self.X_train = X_train - self.y_train = y_train - self.X_test = X_test - self.y_test = y_test - self.dataset = dataset - self.model_name = model_name - self.noise_ratio = noise_ratio - self.epochs = epochs - - self.train_loss = [] - self.test_loss = [] - self.train_acc = [] - self.test_acc = [] - # the followings are used to estimate LID - self.lid_k = 20 - self.lid_subset = 128 - self.lids = [] - - # complexity - Critical Sample Ratio (csr) - self.csr_subset = 500 - self.csr_batchsize = 100 - self.csrs = [] - - def on_epoch_end(self, epoch, logs={}): - tr_acc = logs.get('acc') - tr_loss = logs.get('loss') - val_loss = logs.get('val_loss') - val_acc = logs.get('val_acc') - # te_loss, te_acc = self.model.evaluate(self.X_test, self.y_test, batch_size=128, verbose=0) - self.train_loss.append(tr_loss) - self.test_loss.append(val_loss) - self.train_acc.append(tr_acc) - self.test_acc.append(val_acc) - - file_name = 'log/loss_%s_%s_%s.npy' % \ - (self.model_name, self.dataset, self.noise_ratio) - np.save(file_name, np.stack((np.array(self.train_loss), np.array(self.test_loss)))) - file_name = 'log/acc_%s_%s_%s.npy' % \ - (self.model_name, self.dataset, self.noise_ratio) - np.save(file_name, np.stack((np.array(self.train_acc), np.array(self.test_acc)))) - - # print('\n--Epoch %02d, train_loss: %.2f, train_acc: %.2f, val_loss: %.2f, val_acc: %.2f' % - # (epoch, tr_loss, tr_acc, val_loss, val_acc)) - - # calculate LID/CSR and save every 10 epochs - if epoch % 1 == 0: - # compute lid scores - rand_idxes = np.random.choice(self.X_train.shape[0], self.lid_subset * 10, replace=False) - lid = np.mean(get_lids_random_batch(self.model, self.X_train[rand_idxes], - k=self.lid_k, batch_size=self.lid_subset)) - self.lids.append(lid) - - file_name = 'log/lid_%s_%s_%s.npy' % \ - (self.model_name, self.dataset, self.noise_ratio) - np.save(file_name, np.array(self.lids)) - - if len(np.array(self.lids).flatten()) > 20: - print('lid = ...', self.lids[-20:]) - else: - print('lid = ', self.lids) - - # compute csr scores - # LASS to estimate the critical sample ratio - scale_factor = 255. / (np.max(self.X_test) - np.min(self.X_test)) - y = tf.placeholder(tf.float32, shape=(None,) + self.y_test.shape[1:]) - csr_model = lass(self.model.layers[0].input, self.model.layers[-1].output, y, - a=0.25 / scale_factor, - b=0.2 / scale_factor, - r=0.3 / scale_factor, - iter_max=100) - rand_idxes = np.random.choice(self.X_test.shape[0], self.csr_subset, replace=False) - X_adv, adv_ind = csr_model.find(self.X_test[rand_idxes], bs=self.csr_batchsize) - csr = np.sum(adv_ind) * 1. / self.csr_subset - self.csrs.append(csr) - - file_name = 'log/csr_%s_%s_%s.npy' % \ - (self.model_name, self.dataset, self.noise_ratio) - np.save(file_name, np.array(self.csrs)) - - if len(self.csrs) > 20: - print('csr = ...', self.csrs[-20:]) - else: - print('csr = ', self.csrs) - - return - -def get_lr_scheduler(dataset): - """ - customerized learning rate decay for training with clean labels. - For efficientcy purpose we use large lr for noisy data. - :param dataset: - :param noise_ratio: - :return: - """ - if dataset in ['mnist', 'svhn']: - def scheduler(epoch): - if epoch > 40: - return 0.001 - elif epoch > 20: - return 0.01 - else: - return 0.1 - return LearningRateScheduler(scheduler) - elif dataset in ['cifar-10']: - def scheduler(epoch): - if epoch > 80: - return 0.001 - elif epoch > 40: - return 0.01 - else: - return 0.1 - return LearningRateScheduler(scheduler) - elif dataset in ['cifar-100']: - def scheduler(epoch): - if epoch > 160: - return 0.0001 - elif epoch > 120: - return 0.001 - elif epoch > 80: - return 0.01 - else: - return 0.1 - return LearningRateScheduler(scheduler) diff --git a/complexity_plot.py b/complexity_plot.py deleted file mode 100644 index e3cd6c5..0000000 --- a/complexity_plot.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Train test error/accuracy/loss plot. - -Author: Xingjun Ma -""" -import os -import numpy as np -import tensorflow as tf -import keras.backend as K -from keras.datasets import mnist, cifar10 -from keras.optimizers import SGD -from keras.utils import to_categorical -import matplotlib.pyplot as plt -from datasets import get_data -from models import get_model -from loss import cross_entropy -from lass_tf import lass - -np.random.seed(1024) - -MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'd2l'] -MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L'] -COLORS = ['r', 'y', 'c', 'm', 'g', 'b'] -MARKERS = ['x', 'D', '<', '>', '^', 'o'] - -def complexity_plot(model_list, dataset='mnist', num_classes=10, noise_ratio=10, epochs=50, n_samples=500): - """ - The complexity (Critical Sample Ratio) of the hypothesis learned throughout training. - """ - print('Dataset: %s, epochs: %s, noise ratio: %s%%' % (dataset, epochs, noise_ratio)) - - # plot initialization - fig = plt.figure() # figsize=(7, 6) - ax = fig.add_subplot(111) - bins = np.arange(epochs) - xnew = np.arange(0, epochs, 5) - - # load data - _, _, X_test, Y_test = get_data(dataset) - # convert class vectors to binary class matrices - Y_test = to_categorical(Y_test, num_classes) - - shuffle = np.random.permutation(X_test.shape[0]) - X_test = X_test[shuffle] - Y_test = Y_test[shuffle] - X_test = X_test[:n_samples] - Y_test = Y_test[:n_samples] - - # load model - image_shape = X_test.shape[1:] - model = get_model(dataset, input_tensor=None, input_shape=image_shape) - sgd = SGD(lr=0.01, momentum=0.9) - y = tf.placeholder(tf.float32, shape=(None,) + Y_test.shape[1:]) - - for model_name in model_list: - file_name = "log/crs_%s_%s_%s.npy" % (model_name, dataset, noise_ratio) - if os.path.isfile(file_name): - crs = np.load(file_name) - # plot line - idx = MODELS.index(model_name) - - # z = np.polyfit(bins, crs, deg=5) - # f = np.poly1d(z) - # crs = f(xnew) - - for i in xnew: - crs[i] = np.mean(crs[i:i+5]) - - crs = crs[xnew] - - ax.plot(xnew, crs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) - continue - - crs = np.zeros(epochs) - for i in range(epochs): - # the critical sample ratio of the representations learned at every epoch - # need to save those epochs first, in this case, use separate folders for each model - model_path = 'model/%s/%s_%s.%02d.hdf5' % (model_name, dataset, noise_ratio, i) - model.load_weights(model_path) - model.compile( - loss=cross_entropy, - optimizer=sgd, - metrics=['accuracy'] - ) - - # LASS to estimate the critical sample ratio - scale_factor = 255. / (np.max(X_test) - np.min(X_test)) - csr_model = lass(model.layers[0].input, model.layers[-1].output, y, - a=0.25 / scale_factor, - b=0.2 / scale_factor, - r=0.3 / scale_factor, - iter_max=100) - X_adv, adv_ind = csr_model.find(X_test, bs=500) - crs[i] = np.sum(adv_ind) * 1. / n_samples - - print('model: %s, epoch: %s, CRS: %s' % (model_name, i, crs[i])) - - # save result to avoid recomputing - np.save(file_name, crs) - print(crs) - - # plot line - idx = MODELS.index(model_name) - - z = np.polyfit(bins, crs, deg=5) - f = np.poly1d(z) - crs = f(xnew) - - ax.plot(xnew, crs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) - - # ax.set_xticks([]) - # ax.set_yticks([]) - ax.set_xlabel("Epoch", fontsize=15) - ax.set_ylabel("Hypothesis complexity (CSR score)", fontsize=15) - # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) - legend = plt.legend(loc='upper left') - plt.setp(legend.get_texts(), fontsize=15) - fig.savefig("plots/complexity_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300) - plt.show() - -if __name__ == "__main__": - # mnist: epoch=50, cifar-10: epoch=120 - complexity_plot(model_list=['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'], - dataset='cifar-10', num_classes=10, noise_ratio=60, epochs=120, n_samples=500) diff --git a/configs/cifar10/asym/bhl.yaml b/configs/cifar10/asym/bhl.yaml new file mode 100644 index 0000000..e459990 --- /dev/null +++ b/configs/cifar10/asym/bhl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.8 \ No newline at end of file diff --git a/configs/cifar10/asym/bl.yaml b/configs/cifar10/asym/bl.yaml new file mode 100644 index 0000000..959320c --- /dev/null +++ b/configs/cifar10/asym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BackwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar10/asym/bsl.yaml b/configs/cifar10/asym/bsl.yaml new file mode 100644 index 0000000..81d30bb --- /dev/null +++ b/configs/cifar10/asym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.95 \ No newline at end of file diff --git a/configs/cifar10/asym/ce.yaml b/configs/cifar10/asym/ce.yaml new file mode 100644 index 0000000..a665689 --- /dev/null +++ b/configs/cifar10/asym/ce.yaml @@ -0,0 +1,30 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: CrossEntropyLoss diff --git a/configs/cifar10/asym/d2l.yaml b/configs/cifar10/asym/d2l.yaml new file mode 100644 index 0000000..875d393 --- /dev/null +++ b/configs/cifar10/asym/d2l.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: LIDPacedLoss + num_classes: 10 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/cifar10/asym/fl.yaml b/configs/cifar10/asym/fl.yaml new file mode 100644 index 0000000..a97025d --- /dev/null +++ b/configs/cifar10/asym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ForwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar10/asym/focal.yaml b/configs/cifar10/asym/focal.yaml new file mode 100644 index 0000000..c00eb66 --- /dev/null +++ b/configs/cifar10/asym/focal.yaml @@ -0,0 +1,31 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/cifar10/asym/gce.yaml b/configs/cifar10/asym/gce.yaml new file mode 100644 index 0000000..23338cf --- /dev/null +++ b/configs/cifar10/asym/gce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 10 + q: 0.7 diff --git a/configs/cifar10/asym/mae.yaml b/configs/cifar10/asym/mae.yaml new file mode 100644 index 0000000..69457cc --- /dev/null +++ b/configs/cifar10/asym/mae.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: MeanAbsoluteError + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/asym/nce+mae.yaml b/configs/cifar10/asym/nce+mae.yaml new file mode 100644 index 0000000..3d077e5 --- /dev/null +++ b/configs/cifar10/asym/nce+mae.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandMAE + num_classes: 10 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/nce+rce.yaml b/configs/cifar10/asym/nce+rce.yaml new file mode 100644 index 0000000..01a46df --- /dev/null +++ b/configs/cifar10/asym/nce+rce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandRCE + num_classes: 10 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/nce.yaml b/configs/cifar10/asym/nce.yaml new file mode 100644 index 0000000..6734a62 --- /dev/null +++ b/configs/cifar10/asym/nce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/asym/nfl+mae.yaml b/configs/cifar10/asym/nfl+mae.yaml new file mode 100644 index 0000000..d5f723c --- /dev/null +++ b/configs/cifar10/asym/nfl+mae.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandMAE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/nfl+rce.yaml b/configs/cifar10/asym/nfl+rce.yaml new file mode 100644 index 0000000..f2ee4f5 --- /dev/null +++ b/configs/cifar10/asym/nfl+rce.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandRCE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/nfl.yaml b/configs/cifar10/asym/nfl.yaml new file mode 100644 index 0000000..5dfad2e --- /dev/null +++ b/configs/cifar10/asym/nfl.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedFocalLoss + num_classes: 10 + scale: 1.0 + gamma: 0.5 diff --git a/configs/cifar10/asym/ngce+mae.yaml b/configs/cifar10/asym/ngce+mae.yaml new file mode 100644 index 0000000..06cf547 --- /dev/null +++ b/configs/cifar10/asym/ngce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/ngce+rce.yaml b/configs/cifar10/asym/ngce+rce.yaml new file mode 100644 index 0000000..06cf547 --- /dev/null +++ b/configs/cifar10/asym/ngce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/asym/ngce.yaml b/configs/cifar10/asym/ngce.yaml new file mode 100644 index 0000000..818891e --- /dev/null +++ b/configs/cifar10/asym/ngce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 10 + q: 0.1 + scale: 1.0 diff --git a/configs/cifar10/asym/nlnl.yaml b/configs/cifar10/asym/nlnl.yaml new file mode 100644 index 0000000..9f2d4f6 --- /dev/null +++ b/configs/cifar10/asym/nlnl.yaml @@ -0,0 +1,32 @@ +epochs: 1000 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NLNL + num_classes: 10 + ln_neg: 1 diff --git a/configs/cifar10/asym/rce.yaml b/configs/cifar10/asym/rce.yaml new file mode 100644 index 0000000..0adfafc --- /dev/null +++ b/configs/cifar10/asym/rce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ReverseCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/asym/sce.yaml b/configs/cifar10/asym/sce.yaml new file mode 100644 index 0000000..e1d97f7 --- /dev/null +++ b/configs/cifar10/asym/sce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: SCELoss + num_classes: 10 + alpha: 0.1 + beta: 1.0 diff --git a/configs/cifar10/sym/bhl.yaml b/configs/cifar10/sym/bhl.yaml new file mode 100644 index 0000000..a68e993 --- /dev/null +++ b/configs/cifar10/sym/bhl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.8 \ No newline at end of file diff --git a/configs/cifar10/sym/bl.yaml b/configs/cifar10/sym/bl.yaml new file mode 100644 index 0000000..6a42c82 --- /dev/null +++ b/configs/cifar10/sym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BackwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar10/sym/bsl.yaml b/configs/cifar10/sym/bsl.yaml new file mode 100644 index 0000000..3402c3b --- /dev/null +++ b/configs/cifar10/sym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.95 \ No newline at end of file diff --git a/configs/cifar10/sym/ce.yaml b/configs/cifar10/sym/ce.yaml new file mode 100644 index 0000000..6f45e1d --- /dev/null +++ b/configs/cifar10/sym/ce.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +#scheduler: + #name: CosineAnnealingLR + #T_max: $epochs + #eta_min: 0.0 +scheduler: + name: StepLR + step_size: 40 + gamma: 0.1 + +criterion: + name: CrossEntropyLoss diff --git a/configs/cifar10/sym/d2l.yaml b/configs/cifar10/sym/d2l.yaml new file mode 100644 index 0000000..4906968 --- /dev/null +++ b/configs/cifar10/sym/d2l.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: LIDPacedLoss + num_classes: 10 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/cifar10/sym/fl.yaml b/configs/cifar10/sym/fl.yaml new file mode 100644 index 0000000..e51103f --- /dev/null +++ b/configs/cifar10/sym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ForwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar10/sym/focal.yaml b/configs/cifar10/sym/focal.yaml new file mode 100644 index 0000000..62425fd --- /dev/null +++ b/configs/cifar10/sym/focal.yaml @@ -0,0 +1,31 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/cifar10/sym/gce.yaml b/configs/cifar10/sym/gce.yaml new file mode 100644 index 0000000..62420e3 --- /dev/null +++ b/configs/cifar10/sym/gce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 10 + q: 0.7 diff --git a/configs/cifar10/sym/mae.yaml b/configs/cifar10/sym/mae.yaml new file mode 100644 index 0000000..bb7e561 --- /dev/null +++ b/configs/cifar10/sym/mae.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: MeanAbsoluteError + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/sym/nce+mae.yaml b/configs/cifar10/sym/nce+mae.yaml new file mode 100644 index 0000000..0ccc85f --- /dev/null +++ b/configs/cifar10/sym/nce+mae.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandMAE + num_classes: 10 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/nce+rce.yaml b/configs/cifar10/sym/nce+rce.yaml new file mode 100644 index 0000000..4c58521 --- /dev/null +++ b/configs/cifar10/sym/nce+rce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandRCE + num_classes: 10 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/nce.yaml b/configs/cifar10/sym/nce.yaml new file mode 100644 index 0000000..d115054 --- /dev/null +++ b/configs/cifar10/sym/nce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/sym/nfl+mae.yaml b/configs/cifar10/sym/nfl+mae.yaml new file mode 100644 index 0000000..59b6ff8 --- /dev/null +++ b/configs/cifar10/sym/nfl+mae.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandMAE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/nfl+rce.yaml b/configs/cifar10/sym/nfl+rce.yaml new file mode 100644 index 0000000..8b5c97b --- /dev/null +++ b/configs/cifar10/sym/nfl+rce.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandRCE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/nfl.yaml b/configs/cifar10/sym/nfl.yaml new file mode 100644 index 0000000..3d5a934 --- /dev/null +++ b/configs/cifar10/sym/nfl.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedFocalLoss + num_classes: 10 + scale: 10.0 + gamma: 0.5 diff --git a/configs/cifar10/sym/ngce+mae.yaml b/configs/cifar10/sym/ngce+mae.yaml new file mode 100644 index 0000000..eaf3bfd --- /dev/null +++ b/configs/cifar10/sym/ngce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/ngce+rce.yaml b/configs/cifar10/sym/ngce+rce.yaml new file mode 100644 index 0000000..8f021d8 --- /dev/null +++ b/configs/cifar10/sym/ngce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandRCE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 1.0 diff --git a/configs/cifar10/sym/ngce.yaml b/configs/cifar10/sym/ngce.yaml new file mode 100644 index 0000000..ac42d1c --- /dev/null +++ b/configs/cifar10/sym/ngce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 10 + scale: 1.0 + q: 0.1 diff --git a/configs/cifar10/sym/nlnl.yaml b/configs/cifar10/sym/nlnl.yaml new file mode 100644 index 0000000..bb0d8b2 --- /dev/null +++ b/configs/cifar10/sym/nlnl.yaml @@ -0,0 +1,32 @@ +epochs: 1000 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NLNL + num_classes: 10 + ln_neg: 1 diff --git a/configs/cifar10/sym/rce.yaml b/configs/cifar10/sym/rce.yaml new file mode 100644 index 0000000..41dd3f3 --- /dev/null +++ b/configs/cifar10/sym/rce.yaml @@ -0,0 +1,32 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ReverseCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/cifar10/sym/sce.yaml b/configs/cifar10/sym/sce.yaml new file mode 100644 index 0000000..87a29a4 --- /dev/null +++ b/configs/cifar10/sym/sce.yaml @@ -0,0 +1,33 @@ +epochs: 120 +grad_bound: 5.0 +log_frequency: 100 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 128 + data_path: ../datasets/ + dataset_type: 'CIFAR10' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-4 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: SCELoss + num_classes: 10 + alpha: 0.1 + beta: 1.0 diff --git a/configs/cifar100/asym/bhl.yaml b/configs/cifar100/asym/bhl.yaml new file mode 100644 index 0000000..dbbdd89 --- /dev/null +++ b/configs/cifar100/asym/bhl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 100 + beta: 0.8 \ No newline at end of file diff --git a/configs/cifar100/asym/bl.yaml b/configs/cifar100/asym/bl.yaml new file mode 100644 index 0000000..0c78bbe --- /dev/null +++ b/configs/cifar100/asym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BackwardLoss + num_classes: 100 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar100/asym/bsl.yaml b/configs/cifar100/asym/bsl.yaml new file mode 100644 index 0000000..faad8ce --- /dev/null +++ b/configs/cifar100/asym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 100 + beta: 0.95 \ No newline at end of file diff --git a/configs/cifar100/asym/ce.yaml b/configs/cifar100/asym/ce.yaml new file mode 100644 index 0000000..e99f3b4 --- /dev/null +++ b/configs/cifar100/asym/ce.yaml @@ -0,0 +1,30 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: CrossEntropyLoss diff --git a/configs/cifar100/asym/d2l.yaml b/configs/cifar100/asym/d2l.yaml new file mode 100644 index 0000000..7cb0e40 --- /dev/null +++ b/configs/cifar100/asym/d2l.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: LIDPacedLoss + num_classes: 100 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/cifar100/asym/fl.yaml b/configs/cifar100/asym/fl.yaml new file mode 100644 index 0000000..f827587 --- /dev/null +++ b/configs/cifar100/asym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ForwardLoss + num_classes: 100 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar100/asym/focal.yaml b/configs/cifar100/asym/focal.yaml new file mode 100644 index 0000000..a836fa7 --- /dev/null +++ b/configs/cifar100/asym/focal.yaml @@ -0,0 +1,31 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/cifar100/asym/gce.yaml b/configs/cifar100/asym/gce.yaml new file mode 100644 index 0000000..f6cffc5 --- /dev/null +++ b/configs/cifar100/asym/gce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 100 + q: 0.7 diff --git a/configs/cifar100/asym/mae.yaml b/configs/cifar100/asym/mae.yaml new file mode 100644 index 0000000..4a46a93 --- /dev/null +++ b/configs/cifar100/asym/mae.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: MeanAbsoluteError + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/asym/nce+mae.yaml b/configs/cifar100/asym/nce+mae.yaml new file mode 100644 index 0000000..8c28957 --- /dev/null +++ b/configs/cifar100/asym/nce+mae.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandMAE + num_classes: 100 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/asym/nce+rce.yaml b/configs/cifar100/asym/nce+rce.yaml new file mode 100644 index 0000000..004a76b --- /dev/null +++ b/configs/cifar100/asym/nce+rce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandRCE + num_classes: 100 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/asym/nce.yaml b/configs/cifar100/asym/nce.yaml new file mode 100644 index 0000000..a54b4c5 --- /dev/null +++ b/configs/cifar100/asym/nce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedCrossEntropy + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/asym/nfl+mae.yaml b/configs/cifar100/asym/nfl+mae.yaml new file mode 100644 index 0000000..acf631d --- /dev/null +++ b/configs/cifar100/asym/nfl+mae.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandMAE + num_classes: 100 + gamma: 0.5 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/asym/nfl+rce.yaml b/configs/cifar100/asym/nfl+rce.yaml new file mode 100644 index 0000000..484522b --- /dev/null +++ b/configs/cifar100/asym/nfl+rce.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandRCE + num_classes: 100 + gamma: 0.5 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/asym/nfl.yaml b/configs/cifar100/asym/nfl.yaml new file mode 100644 index 0000000..8ee3b92 --- /dev/null +++ b/configs/cifar100/asym/nfl.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedFocalLoss + num_classes: 100 + scale: 1.0 + gamma: 0.5 diff --git a/configs/cifar100/asym/ngce+mae.yaml b/configs/cifar100/asym/ngce+mae.yaml new file mode 100644 index 0000000..838e0e4 --- /dev/null +++ b/configs/cifar100/asym/ngce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 100 + q: 0.7 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/asym/ngce+rce.yaml b/configs/cifar100/asym/ngce+rce.yaml new file mode 100644 index 0000000..a35c906 --- /dev/null +++ b/configs/cifar100/asym/ngce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 100 + q: 0.7 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/asym/ngce.yaml b/configs/cifar100/asym/ngce.yaml new file mode 100644 index 0000000..63d3ef6 --- /dev/null +++ b/configs/cifar100/asym/ngce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 100 + scale: 1.0 + q: 0.7 diff --git a/configs/cifar100/asym/nlnl.yaml b/configs/cifar100/asym/nlnl.yaml new file mode 100644 index 0000000..1946dd5 --- /dev/null +++ b/configs/cifar100/asym/nlnl.yaml @@ -0,0 +1,32 @@ +epochs: 2000 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NLNL + num_classes: 100 + ln_neg: 110 diff --git a/configs/cifar100/asym/rce.yaml b/configs/cifar100/asym/rce.yaml new file mode 100644 index 0000000..4cc8114 --- /dev/null +++ b/configs/cifar100/asym/rce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ReverseCrossEntropy + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/asym/sce.yaml b/configs/cifar100/asym/sce.yaml new file mode 100644 index 0000000..005e078 --- /dev/null +++ b/configs/cifar100/asym/sce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: SCELoss + num_classes: 100 + alpha: 6.0 + beta: 0.1 diff --git a/configs/cifar100/sym/bhl.yaml b/configs/cifar100/sym/bhl.yaml new file mode 100644 index 0000000..8f73ef3 --- /dev/null +++ b/configs/cifar100/sym/bhl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 100 + beta: 0.8 \ No newline at end of file diff --git a/configs/cifar100/sym/bl.yaml b/configs/cifar100/sym/bl.yaml new file mode 100644 index 0000000..3ce7c37 --- /dev/null +++ b/configs/cifar100/sym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BackwardLoss + num_classes: 100 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar100/sym/bsl.yaml b/configs/cifar100/sym/bsl.yaml new file mode 100644 index 0000000..346c324 --- /dev/null +++ b/configs/cifar100/sym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: BootSoftLoss + num_classes: 100 + beta: 0.95 \ No newline at end of file diff --git a/configs/cifar100/sym/ce.yaml b/configs/cifar100/sym/ce.yaml new file mode 100644 index 0000000..ece8d22 --- /dev/null +++ b/configs/cifar100/sym/ce.yaml @@ -0,0 +1,30 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: CrossEntropyLoss diff --git a/configs/cifar100/sym/d2l.yaml b/configs/cifar100/sym/d2l.yaml new file mode 100644 index 0000000..167f494 --- /dev/null +++ b/configs/cifar100/sym/d2l.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: LIDPacedLoss + num_classes: 100 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/cifar100/sym/fl.yaml b/configs/cifar100/sym/fl.yaml new file mode 100644 index 0000000..9261c73 --- /dev/null +++ b/configs/cifar100/sym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ForwardLoss + num_classes: 100 + noise_rate: 0 \ No newline at end of file diff --git a/configs/cifar100/sym/focal.yaml b/configs/cifar100/sym/focal.yaml new file mode 100644 index 0000000..d7724f9 --- /dev/null +++ b/configs/cifar100/sym/focal.yaml @@ -0,0 +1,31 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/cifar100/sym/gce.yaml b/configs/cifar100/sym/gce.yaml new file mode 100644 index 0000000..0182704 --- /dev/null +++ b/configs/cifar100/sym/gce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 100 + q: 0.7 diff --git a/configs/cifar100/sym/mae.yaml b/configs/cifar100/sym/mae.yaml new file mode 100644 index 0000000..453c535 --- /dev/null +++ b/configs/cifar100/sym/mae.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: MeanAbsoluteError + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/sym/nce+mae.yaml b/configs/cifar100/sym/nce+mae.yaml new file mode 100644 index 0000000..f78c570 --- /dev/null +++ b/configs/cifar100/sym/nce+mae.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandMAE + num_classes: 100 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/sym/nce+rce.yaml b/configs/cifar100/sym/nce+rce.yaml new file mode 100644 index 0000000..02bf826 --- /dev/null +++ b/configs/cifar100/sym/nce+rce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NCEandRCE + num_classes: 100 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/sym/nce.yaml b/configs/cifar100/sym/nce.yaml new file mode 100644 index 0000000..23d4724 --- /dev/null +++ b/configs/cifar100/sym/nce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedCrossEntropy + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/sym/nfl+mae.yaml b/configs/cifar100/sym/nfl+mae.yaml new file mode 100644 index 0000000..e6fb00c --- /dev/null +++ b/configs/cifar100/sym/nfl+mae.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandMAE + num_classes: 100 + gamma: 0.5 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/sym/nfl+rce.yaml b/configs/cifar100/sym/nfl+rce.yaml new file mode 100644 index 0000000..86eca40 --- /dev/null +++ b/configs/cifar100/sym/nfl+rce.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NFLandRCE + num_classes: 100 + gamma: 0.5 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/sym/nfl.yaml b/configs/cifar100/sym/nfl.yaml new file mode 100644 index 0000000..67af0a2 --- /dev/null +++ b/configs/cifar100/sym/nfl.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedFocalLoss + num_classes: 100 + scale: 1.0 + gamma: 0.5 diff --git a/configs/cifar100/sym/ngce+mae.yaml b/configs/cifar100/sym/ngce+mae.yaml new file mode 100644 index 0000000..26ca360 --- /dev/null +++ b/configs/cifar100/sym/ngce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 100 + q: 0.7 + alpha: 10.0 + beta: 1.0 diff --git a/configs/cifar100/sym/ngce+rce.yaml b/configs/cifar100/sym/ngce+rce.yaml new file mode 100644 index 0000000..e1907de --- /dev/null +++ b/configs/cifar100/sym/ngce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NGCEandMAE + num_classes: 100 + q: 0.7 + alpha: 10.0 + beta: 0.1 diff --git a/configs/cifar100/sym/ngce.yaml b/configs/cifar100/sym/ngce.yaml new file mode 100644 index 0000000..cfc67f8 --- /dev/null +++ b/configs/cifar100/sym/ngce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 100 + scale: 1.0 + q: 0.7 diff --git a/configs/cifar100/sym/nlnl.yaml b/configs/cifar100/sym/nlnl.yaml new file mode 100644 index 0000000..00729a2 --- /dev/null +++ b/configs/cifar100/sym/nlnl.yaml @@ -0,0 +1,32 @@ +epochs: 2000 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: NLNL + num_classes: 100 + ln_neg: 110 diff --git a/configs/cifar100/sym/rce.yaml b/configs/cifar100/sym/rce.yaml new file mode 100644 index 0000000..48d1f56 --- /dev/null +++ b/configs/cifar100/sym/rce.yaml @@ -0,0 +1,32 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: ReverseCrossEntropy + num_classes: 100 + scale: 1.0 diff --git a/configs/cifar100/sym/sce.yaml b/configs/cifar100/sym/sce.yaml new file mode 100644 index 0000000..f54b9d9 --- /dev/null +++ b/configs/cifar100/sym/sce.yaml @@ -0,0 +1,33 @@ +epochs: 200 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 256 + data_path: ../datasets/ + dataset_type: 'CIFAR100' + num_of_workers: 8 + +model: + name: ResNet50 + num_classes: 100 + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-5 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.0 + +criterion: + name: SCELoss + num_classes: 100 + alpha: 6.0 + beta: 0.1 diff --git a/configs/mnist/asym/bhl.yaml b/configs/mnist/asym/bhl.yaml new file mode 100644 index 0000000..e12a278 --- /dev/null +++ b/configs/mnist/asym/bhl.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.8 \ No newline at end of file diff --git a/configs/mnist/asym/bl.yaml b/configs/mnist/asym/bl.yaml new file mode 100644 index 0000000..92fb2ac --- /dev/null +++ b/configs/mnist/asym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 51 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BackwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/mnist/asym/bsl.yaml b/configs/mnist/asym/bsl.yaml new file mode 100644 index 0000000..fc2aa20 --- /dev/null +++ b/configs/mnist/asym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.95 \ No newline at end of file diff --git a/configs/mnist/asym/ce.yaml b/configs/mnist/asym/ce.yaml new file mode 100644 index 0000000..1984309 --- /dev/null +++ b/configs/mnist/asym/ce.yaml @@ -0,0 +1,31 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: CrossEntropyLoss diff --git a/configs/mnist/asym/d2l.yaml b/configs/mnist/asym/d2l.yaml new file mode 100644 index 0000000..1bf7aac --- /dev/null +++ b/configs/mnist/asym/d2l.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: LIDPacedLoss + num_classes: 10 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/mnist/asym/fl.yaml b/configs/mnist/asym/fl.yaml new file mode 100644 index 0000000..2f28524 --- /dev/null +++ b/configs/mnist/asym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: ForwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/mnist/asym/focal.yaml b/configs/mnist/asym/focal.yaml new file mode 100644 index 0000000..666c446 --- /dev/null +++ b/configs/mnist/asym/focal.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/mnist/asym/gce.yaml b/configs/mnist/asym/gce.yaml new file mode 100644 index 0000000..edc4d35 --- /dev/null +++ b/configs/mnist/asym/gce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 10 + q: 0.7 diff --git a/configs/mnist/asym/mae.yaml b/configs/mnist/asym/mae.yaml new file mode 100644 index 0000000..e5762a1 --- /dev/null +++ b/configs/mnist/asym/mae.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: MeanAbsoluteError + num_classes: 10 + scale: 1.0 diff --git a/configs/mnist/asym/nce+mae.yaml b/configs/mnist/asym/nce+mae.yaml new file mode 100644 index 0000000..5ee86e6 --- /dev/null +++ b/configs/mnist/asym/nce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NCEandMAE + num_classes: 10 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/nce+rce.yaml b/configs/mnist/asym/nce+rce.yaml new file mode 100644 index 0000000..f72ccfa --- /dev/null +++ b/configs/mnist/asym/nce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NCEandRCE + num_classes: 10 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/nce.yaml b/configs/mnist/asym/nce.yaml new file mode 100644 index 0000000..5fd6d38 --- /dev/null +++ b/configs/mnist/asym/nce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedCrossEntropy + num_classes: 10 + scale: 10.0 diff --git a/configs/mnist/asym/nfl+mae.yaml b/configs/mnist/asym/nfl+mae.yaml new file mode 100644 index 0000000..9d5e6af --- /dev/null +++ b/configs/mnist/asym/nfl+mae.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NFLandMAE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/nfl+rce.yaml b/configs/mnist/asym/nfl+rce.yaml new file mode 100644 index 0000000..a5b20c2 --- /dev/null +++ b/configs/mnist/asym/nfl+rce.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NFLandRCE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/nfl.yaml b/configs/mnist/asym/nfl.yaml new file mode 100644 index 0000000..480e034 --- /dev/null +++ b/configs/mnist/asym/nfl.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedFocalLoss + num_classes: 10 + scale: 1.0 + gamma: 0.5 diff --git a/configs/mnist/asym/ngce+mae.yaml b/configs/mnist/asym/ngce+mae.yaml new file mode 100644 index 0000000..c959118 --- /dev/null +++ b/configs/mnist/asym/ngce+mae.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NGCEandMAE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/ngce+rce.yaml b/configs/mnist/asym/ngce+rce.yaml new file mode 100644 index 0000000..71c983a --- /dev/null +++ b/configs/mnist/asym/ngce+rce.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NGCEandRCE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/asym/ngce.yaml b/configs/mnist/asym/ngce.yaml new file mode 100644 index 0000000..f63ddff --- /dev/null +++ b/configs/mnist/asym/ngce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 10 + scale: 1.0 + q: 0.1 diff --git a/configs/mnist/asym/nlnl.yaml b/configs/mnist/asym/nlnl.yaml new file mode 100644 index 0000000..7bda8da --- /dev/null +++ b/configs/mnist/asym/nlnl.yaml @@ -0,0 +1,33 @@ +epochs: 720 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-3 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NLNL + num_classes: 10 + ln_neg: 1 diff --git a/configs/mnist/asym/rce.yaml b/configs/mnist/asym/rce.yaml new file mode 100644 index 0000000..71f6b2a --- /dev/null +++ b/configs/mnist/asym/rce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: ReverseCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/mnist/asym/sce.yaml b/configs/mnist/asym/sce.yaml new file mode 100644 index 0000000..1040027 --- /dev/null +++ b/configs/mnist/asym/sce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: True + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: SCELoss + alpha: 0.01 + beta: 1.0 + num_classes: 10 diff --git a/configs/mnist/sym/bhl.yaml b/configs/mnist/sym/bhl.yaml new file mode 100644 index 0000000..487ec01 --- /dev/null +++ b/configs/mnist/sym/bhl.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.8 \ No newline at end of file diff --git a/configs/mnist/sym/bl.yaml b/configs/mnist/sym/bl.yaml new file mode 100644 index 0000000..b98061b --- /dev/null +++ b/configs/mnist/sym/bl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 51 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BackwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/mnist/sym/bsl.yaml b/configs/mnist/sym/bsl.yaml new file mode 100644 index 0000000..5f00f06 --- /dev/null +++ b/configs/mnist/sym/bsl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: BootSoftLoss + num_classes: 10 + beta: 0.95 \ No newline at end of file diff --git a/configs/mnist/sym/ce.yaml b/configs/mnist/sym/ce.yaml new file mode 100644 index 0000000..953817b --- /dev/null +++ b/configs/mnist/sym/ce.yaml @@ -0,0 +1,35 @@ +epochs: 1000 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + #weight_decay: 1.e-4 + momentum: 0.9 + #nesterov: True + +#scheduler: + #name: CosineAnnealingLR + #T_max: $epochs + #eta_min: 0.001 +scheduler: + name: StepLR + step_size: 100 + gamma: 0.7 + +criterion: + name: CrossEntropyLoss diff --git a/configs/mnist/sym/d2l.yaml b/configs/mnist/sym/d2l.yaml new file mode 100644 index 0000000..ba340b5 --- /dev/null +++ b/configs/mnist/sym/d2l.yaml @@ -0,0 +1,39 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.1 + weight_decay: 1.e-4 + momentum: 0.9 + +#scheduler: + #name: CosineAnnealingLR + #T_max: $epochs + #eta_min: 0.001 + +scheduler: + name: StepLR + step_size: 20 + gamma: 0.1 + +criterion: + name: LIDPacedLoss + num_classes: 10 + alpha: 1.0 + beta1: 0.1 + beta2: 1.0 \ No newline at end of file diff --git a/configs/mnist/sym/fl.yaml b/configs/mnist/sym/fl.yaml new file mode 100644 index 0000000..97f60af --- /dev/null +++ b/configs/mnist/sym/fl.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets/ + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: ForwardLoss + num_classes: 10 + noise_rate: 0 \ No newline at end of file diff --git a/configs/mnist/sym/focal.yaml b/configs/mnist/sym/focal.yaml new file mode 100644 index 0000000..eb9de2a --- /dev/null +++ b/configs/mnist/sym/focal.yaml @@ -0,0 +1,32 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: FocalLoss + gamma: 0.5 diff --git a/configs/mnist/sym/gce.yaml b/configs/mnist/sym/gce.yaml new file mode 100644 index 0000000..23d4c02 --- /dev/null +++ b/configs/mnist/sym/gce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 10 + q: 0.7 diff --git a/configs/mnist/sym/mae.yaml b/configs/mnist/sym/mae.yaml new file mode 100644 index 0000000..2b455ce --- /dev/null +++ b/configs/mnist/sym/mae.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: MeanAbsoluteError + num_classes: 10 + scale: 1.0 diff --git a/configs/mnist/sym/nce+mae.yaml b/configs/mnist/sym/nce+mae.yaml new file mode 100644 index 0000000..e7c7ee0 --- /dev/null +++ b/configs/mnist/sym/nce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NCEandMAE + num_classes: 10 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/nce+rce.yaml b/configs/mnist/sym/nce+rce.yaml new file mode 100644 index 0000000..547ec5f --- /dev/null +++ b/configs/mnist/sym/nce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NCEandRCE + num_classes: 10 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/nce.yaml b/configs/mnist/sym/nce.yaml new file mode 100644 index 0000000..7230460 --- /dev/null +++ b/configs/mnist/sym/nce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedCrossEntropy + num_classes: 10 + scale: 10.0 diff --git a/configs/mnist/sym/nfl+mae.yaml b/configs/mnist/sym/nfl+mae.yaml new file mode 100644 index 0000000..76c7824 --- /dev/null +++ b/configs/mnist/sym/nfl+mae.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NFLandMAE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/nfl+rce.yaml b/configs/mnist/sym/nfl+rce.yaml new file mode 100644 index 0000000..cda7b56 --- /dev/null +++ b/configs/mnist/sym/nfl+rce.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NFLandRCE + num_classes: 10 + gamma: 0.5 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/nfl.yaml b/configs/mnist/sym/nfl.yaml new file mode 100644 index 0000000..3622737 --- /dev/null +++ b/configs/mnist/sym/nfl.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedFocalLoss + num_classes: 10 + scale: 1.0 + gamma: 0.5 diff --git a/configs/mnist/sym/ngce+mae.yaml b/configs/mnist/sym/ngce+mae.yaml new file mode 100644 index 0000000..265016f --- /dev/null +++ b/configs/mnist/sym/ngce+mae.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NGCEandMAE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/ngce+rce.yaml b/configs/mnist/sym/ngce+rce.yaml new file mode 100644 index 0000000..ac29154 --- /dev/null +++ b/configs/mnist/sym/ngce+rce.yaml @@ -0,0 +1,35 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NGCEandRCE + num_classes: 10 + q: 0.1 + alpha: 1.0 + beta: 10.0 diff --git a/configs/mnist/sym/ngce.yaml b/configs/mnist/sym/ngce.yaml new file mode 100644 index 0000000..4dfbca4 --- /dev/null +++ b/configs/mnist/sym/ngce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NormalizedGeneralizedCrossEntropy + num_classes: 10 + scale: 10.0 + q: 0.1 diff --git a/configs/mnist/sym/nlnl.yaml b/configs/mnist/sym/nlnl.yaml new file mode 100644 index 0000000..269d7fc --- /dev/null +++ b/configs/mnist/sym/nlnl.yaml @@ -0,0 +1,33 @@ +epochs: 720 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-3 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: NLNL + num_classes: 10 + ln_neg: 1 diff --git a/configs/mnist/sym/rce.yaml b/configs/mnist/sym/rce.yaml new file mode 100644 index 0000000..6ba45c2 --- /dev/null +++ b/configs/mnist/sym/rce.yaml @@ -0,0 +1,33 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: ReverseCrossEntropy + num_classes: 10 + scale: 1.0 diff --git a/configs/mnist/sym/sce.yaml b/configs/mnist/sym/sce.yaml new file mode 100644 index 0000000..7b8b748 --- /dev/null +++ b/configs/mnist/sym/sce.yaml @@ -0,0 +1,34 @@ +epochs: 50 +grad_bound: 5.0 +log_frequency: 200 + +dataset: + name: DatasetGenerator + asym: False + train_batch_size: 128 + eval_batch_size: 512 + data_path: ../datasets + dataset_type: 'MNIST' + num_of_workers: 8 + +model: + name: ToyModel + type: $dataset.dataset_type + +optimizer: + name: SGD + lr: 0.01 + weight_decay: 1.e-2 + momentum: 0.9 + nesterov: True + +scheduler: + name: CosineAnnealingLR + T_max: $epochs + eta_min: 0.001 + +criterion: + name: SCELoss + alpha: 0.01 + beta: 1.0 + num_classes: 10 diff --git a/configs/webvision_mini/ce.yaml b/configs/webvision_mini/ce.yaml new file mode 100644 index 0000000..fa81b00 --- /dev/null +++ b/configs/webvision_mini/ce.yaml @@ -0,0 +1,31 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: CrossEntropyLoss diff --git a/configs/webvision_mini/gce.yaml b/configs/webvision_mini/gce.yaml new file mode 100644 index 0000000..2ae3ef5 --- /dev/null +++ b/configs/webvision_mini/gce.yaml @@ -0,0 +1,33 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: GeneralizedCrossEntropy + num_classes: 50 + q: 0.7 diff --git a/configs/webvision_mini/nce+mae.yaml b/configs/webvision_mini/nce+mae.yaml new file mode 100644 index 0000000..ca7cbaa --- /dev/null +++ b/configs/webvision_mini/nce+mae.yaml @@ -0,0 +1,34 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: NCEandMAE + num_classes: 50 + alpha: 50.0 + beta: 1.0 diff --git a/configs/webvision_mini/nce+rce.yaml b/configs/webvision_mini/nce+rce.yaml new file mode 100644 index 0000000..782e8ad --- /dev/null +++ b/configs/webvision_mini/nce+rce.yaml @@ -0,0 +1,34 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: NCEandRCE + num_classes: 50 + alpha: 50.0 + beta: 0.1 diff --git a/configs/webvision_mini/nfl+mae.yaml b/configs/webvision_mini/nfl+mae.yaml new file mode 100644 index 0000000..6c62f2b --- /dev/null +++ b/configs/webvision_mini/nfl+mae.yaml @@ -0,0 +1,35 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: NFLandMAE + num_classes: 50 + gamma: 0.5 + alpha: 50.0 + beta: 1.0 diff --git a/configs/webvision_mini/nfl+rce.yaml b/configs/webvision_mini/nfl+rce.yaml new file mode 100644 index 0000000..250af5b --- /dev/null +++ b/configs/webvision_mini/nfl+rce.yaml @@ -0,0 +1,35 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: NFLandRCE + num_classes: 50 + gamma: 0.5 + alpha: 50.0 + beta: 0.1 diff --git a/configs/webvision_mini/sce.yaml b/configs/webvision_mini/sce.yaml new file mode 100644 index 0000000..6702128 --- /dev/null +++ b/configs/webvision_mini/sce.yaml @@ -0,0 +1,34 @@ +epochs: 250 +grad_bound: 5.0 +log_frequency: 50 + +dataset: + name: WebVisionDatasetLoader + setting: 'mini' + train_batch_size: 512 + eval_batch_size: 1024 + train_data_path: '/var/local/tmp/datasets/' + valid_data_path: '/var/local/tmp/datasets/ILSVR2012' + num_of_workers: 8 + +model: + name: resnet50 + num_classes: 50 + +optimizer: + name: SGD + lr: 0.4 + weight_decay: 3.e-5 + momentum: 0.9 + nesterov: True + +scheduler: + name: StepLR + step_size: 1 + gamma: 0.97 + +criterion: + name: SCELoss + num_classes: 50 + alpha: 10.0 + beta: 1.0 diff --git a/dataset.py b/dataset.py new file mode 100644 index 0000000..c96cf6e --- /dev/null +++ b/dataset.py @@ -0,0 +1,720 @@ +from torchvision import datasets, transforms +from torch.utils.data import DataLoader +from PIL import Image +from tqdm import tqdm +from numpy.testing import assert_array_almost_equal +import numpy as np +import os +import torch +import random +import mlconfig + + +def build_for_cifar100(size, noise): + """ random flip between two random classes. + """ + assert(noise >= 0.) and (noise <= 1.) + + P = (1. - noise) * np.eye(size) + for i in np.arange(size - 1): + P[i, i+1] = noise + + # adjust last row + P[size-1, 0] = noise + + assert_array_almost_equal(P.sum(axis=1), 1, 1) + return P + + +def multiclass_noisify(y, P, random_state=0): + """ Flip classes according to transition probability matrix T. + It expects a number between 0 and the number of classes - 1. + """ + + assert P.shape[0] == P.shape[1] + assert np.max(y) < P.shape[0] + + # row stochastic matrix + assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1])) + assert (P >= 0.0).all() + + m = y.shape[0] + new_y = y.copy() + flipper = np.random.RandomState(random_state) + + for idx in np.arange(m): + i = y[idx] + # draw a vector with only an 1 + flipped = flipper.multinomial(1, P[i, :], 1)[0] + new_y[idx] = np.where(flipped == 1)[0] + + return new_y + + +def other_class(n_classes, current_class): + """ + Returns a list of class indices excluding the class indexed by class_ind + :param nb_classes: number of classes in the task + :param class_ind: the class index to be omitted + :return: one random class that != class_ind + """ + if current_class < 0 or current_class >= n_classes: + error_str = "class_ind must be within the range (0, nb_classes - 1)" + raise ValueError(error_str) + + other_class_list = list(range(n_classes)) + other_class_list.remove(current_class) + other_class = np.random.choice(other_class_list) + return other_class + + +class MNISTNoisy(datasets.MNIST): + def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0): + super(MNISTNoisy, self).__init__(root, transform=transform, target_transform=target_transform, download=download) + self.targets = self.targets.numpy() + if asym: + P = np.eye(10) + n = nosiy_rate + + P[7, 7], P[7, 1] = 1. - n, n + # 2 -> 7 + P[2, 2], P[2, 7] = 1. - n, n + + # 5 <-> 6 + P[5, 5], P[5, 6] = 1. - n, n + P[6, 6], P[6, 5] = 1. - n, n + + # 3 -> 8 + P[3, 3], P[3, 8] = 1. - n, n + + y_train_noisy = multiclass_noisify(self.targets, P=P, random_state=seed) + actual_noise = (y_train_noisy != self.targets).mean() + assert actual_noise > 0.0 + print('Actual noise %.2f' % actual_noise) + self.targets = y_train_noisy + + else: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)] + class_noisy = int(n_noisy / 10) + noisy_idx = [] + for d in range(10): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=10, current_class=self.targets[i]) + print(len(noisy_idx)) + + print("Print noisy label generation statistics:") + for i in range(10): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + + return + + +class cifar10Nosiy(datasets.CIFAR10): + def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False): + super(cifar10Nosiy, self).__init__(root, transform=transform, target_transform=target_transform) + self.download = download + if asym: + # automobile < - truck, bird -> airplane, cat <-> dog, deer -> horse + source_class = [9, 2, 3, 5, 4] + target_class = [1, 0, 5, 3, 7] + for s, t in zip(source_class, target_class): + cls_idx = np.where(np.array(self.targets) == s)[0] + n_noisy = int(nosiy_rate * cls_idx.shape[0]) + noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False) + for idx in noisy_sample_index: + self.targets[idx] = t + return + elif nosiy_rate > 0: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)] + class_noisy = int(n_noisy / 10) + noisy_idx = [] + for d in range(10): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=10, current_class=self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(10): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + return + + +class cifar100Nosiy(datasets.CIFAR100): + def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0): + super(cifar100Nosiy, self).__init__(root, download=download, transform=transform, target_transform=target_transform) + self.download = download + if asym: + """mistakes are inside the same superclass of 10 classes, e.g. 'fish' + """ + nb_classes = 100 + P = np.eye(nb_classes) + n = nosiy_rate + nb_superclasses = 20 + nb_subclasses = 5 + + if n > 0.0: + for i in np.arange(nb_superclasses): + init, end = i * nb_subclasses, (i+1) * nb_subclasses + P[init:end, init:end] = build_for_cifar100(nb_subclasses, n) + + y_train_noisy = multiclass_noisify(np.array(self.targets), P=P, random_state=seed) + actual_noise = (y_train_noisy != np.array(self.targets)).mean() + assert actual_noise > 0.0 + print('Actual noise %.2f' % actual_noise) + self.targets = y_train_noisy.tolist() + return + elif nosiy_rate > 0: + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(100)] + class_noisy = int(n_noisy / 100) + noisy_idx = [] + for d in range(100): + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=100, current_class=self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(100): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + return + + +@mlconfig.register +class DatasetGenerator(): + def __init__(self, + train_batch_size=128, + eval_batch_size=256, + data_path='data/', + seed=123, + num_of_workers=4, + asym=False, + dataset_type='CIFAR10', + is_cifar100=False, + cutout_length=16, + noise_rate=0.4): + self.seed = seed + np.random.seed(seed) + self.train_batch_size = train_batch_size + self.eval_batch_size = eval_batch_size + self.data_path = data_path + self.num_of_workers = num_of_workers + self.cutout_length = cutout_length + self.noise_rate = noise_rate + self.dataset_type = dataset_type + self.asym = asym + self.data_loaders = self.loadData() + return + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + if self.dataset_type == 'MNIST': + MEAN = [0.1307] + STD = [0.3081] + train_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(MEAN, STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(MEAN, STD)]) + + train_dataset = MNISTNoisy(root=self.data_path, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + seed=self.seed, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.MNIST(root=self.data_path, + train=False, + transform=test_transform, + download=True) + + elif self.dataset_type == 'CIFAR100': + CIFAR_MEAN = [0.5071, 0.4865, 0.4409] + CIFAR_STD = [0.2673, 0.2564, 0.2762] + + train_transform = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + train_dataset = cifar100Nosiy(root=self.data_path, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + seed=self.seed, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.CIFAR100(root=self.data_path, + train=False, + transform=test_transform, + download=True) + + elif self.dataset_type == 'CIFAR10': + CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124] + CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] + + train_transform = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + test_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(CIFAR_MEAN, CIFAR_STD)]) + + train_dataset = cifar10Nosiy(root=self.data_path, + train=True, + transform=train_transform, + download=True, + asym=self.asym, + nosiy_rate=self.noise_rate) + + test_dataset = datasets.CIFAR10(root=self.data_path, + train=False, + transform=test_transform, + download=True) + else: + raise("Unknown Dataset") + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.train_batch_size, + shuffle=True, + pin_memory=True, + num_workers=self.num_of_workers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.num_of_workers) + + print("Num of train %d" % (len(train_dataset))) + print("Num of test %d" % (len(test_dataset))) + + return data_loaders + + +class Clothing1MDataset: + def __init__(self, path, type='train', transform=None, target_transform=None): + self.path = path + if type == 'test': + flist = os.path.join(path, "annotations/clean_test.txt") + elif type == 'valid': + flist = os.path.join(path, "annotations/clean_val.txt") + elif type == 'train': + flist = os.path.join(path, "annotations/noisy_train.txt") + else: + raise('Unknown type') + + self.imlist = self.flist_reader(flist) + self.transform = transform + + def __len__(self): + return len(self.imlist) + + def __getitem__(self, index): + impath, target = self.imlist[index] + img = Image.open(impath).convert("RGB") + if self.transform is not None: + img = self.transform(img) + return img, target + + def flist_reader(self, flist): + imlist = [] + with open(flist, 'r') as rf: + for line in rf.readlines(): + row = line.split(" ") + impath = self.path + row[0] + imlabel = row[1] + imlist.append((impath, int(imlabel))) + return imlist + + +@mlconfig.register +class Clothing1MDatasetLoader: + def __init__(self, train_batch_size=128, eval_batch_size=256, data_path='data/', num_of_workers=4, use_cutout=True, cutout_length=112): + self.train_batch_size = train_batch_size + self.eval_batch_size = eval_batch_size + self.data_path = data_path + self.num_of_workers = num_of_workers + self.use_cutout = use_cutout + self.cutout_length = cutout_length + self.data_loaders = self.loadData() + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + MEAN = [0.485, 0.456, 0.406] + STD = [0.229, 0.224, 0.225] + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), + transforms.ToTensor(), + transforms.Normalize(mean=MEAN, std=STD), + ]) + test_transform = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=MEAN, std=STD) + ]) + if self.use_cutout: + print('Using Cutout') + train_transform.transforms.append(Cutout(self.cutout_length)) + + train_dataset = Clothing1MDataset(path=self.data_path, + type='train', + transform=train_transform) + + test_dataset = Clothing1MDataset(path=self.data_path, + type='test', + transform=test_transform) + + valid_dataset = Clothing1MDataset(path=self.data_path, + type='valid', + transform=test_transform) + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.train_batch_size, + shuffle=True, + pin_memory=True, + num_workers=self.num_of_workers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.num_of_workers) + + data_loaders['valid_dataset'] = DataLoader(dataset=valid_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.num_of_workers) + return data_loaders + + +class WebVisionDataset: + def __init__(self, path, file_name='webvision_mini_train', transform=None, target_transform=None): + self.target_list = [] + self.path = path + self.load_file(os.path.join(path, file_name)) + self.transform = transform + self.target_transform = target_transform + return + + def load_file(self, filename): + f = open(filename, "r") + for line in f: + train_file, label = line.split() + self.target_list.append((train_file, int(label))) + f.close() + return + + def __len__(self): + return len(self.target_list) + + def __getitem__(self, index): + impath, target = self.target_list[index] + img = Image.open(os.path.join(self.path, impath)).convert("RGB") + if self.transform is not None: + img = self.transform(img) + return img, target + + +@mlconfig.register +class WebVisionDatasetLoader: + def __init__(self, setting='mini', train_batch_size=128, eval_batch_size=256, train_data_path='data/', valid_data_path='data/', num_of_workers=4): + self.train_batch_size = train_batch_size + self.eval_batch_size = eval_batch_size + self.train_data_path = train_data_path + self.valid_data_path = valid_data_path + self.num_of_workers = num_of_workers + self.setting = setting + self.data_loaders = self.loadData() + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + IMAGENET_MEAN = [0.485, 0.456, 0.406] + IMAGENET_STD = [0.229, 0.224, 0.225] + train_transform = transforms.Compose([transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.2), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + test_transform = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + if self.setting == 'mini': + train_dataset = WebVisionDataset(path=self.train_data_path, + file_name='webvision_mini_train.txt', + transform=train_transform) + + test_dataset = ImageNetMini(root=self.valid_data_path, + split='val', + transform=test_transform) + + elif self.setting == 'full': + train_dataset = WebVisionDataset(path=self.train_data_path, + file_name='train_filelist_google.txt', + transform=train_transform) + + test_dataset = WebVisionDataset(path=self.valid_data_path, + file_name='val_filelist.txt', + transform=test_transform) + + elif self.setting == 'full_imagenet': + train_dataset = WebVisionDataset(path=self.train_data_path, + file_name='train_filelist_google', + transform=train_transform) + + test_dataset = datasets.ImageNet(root=self.valid_data_path, + split='val', + transform=test_transform) + + else: + raise(NotImplementedError) + + data_loaders = {} + + print('Training Set Size %d' % (len(train_dataset))) + print('Test Set Size %d' % (len(test_dataset))) + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.train_batch_size, + shuffle=True, + pin_memory=True, + num_workers=self.num_of_workers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.eval_batch_size, + shuffle=False, + pin_memory=True, + num_workers=self.num_of_workers) + + return data_loaders + + +class ImageNetMini(datasets.ImageNet): + def __init__(self, root, split='val', download=False, **kwargs): + super(ImageNetMini, self).__init__(root, download=download, split=split, **kwargs) + self.new_targets = [] + self.new_images = [] + for i, (file, cls_id) in enumerate(self.imgs): + if cls_id <= 49: + self.new_targets.append(cls_id) + self.new_images.append((file, cls_id)) + print((file, cls_id)) + self.imgs = self.new_images + self.targets = self.new_targets + self.samples = self.imgs + print(len(self.samples)) + print(len(self.targets)) + return + + +class NosieImageNet(datasets.ImageNet): + def __init__(self, root, split='train', seed=999, download=False, target_class_num=200, nosiy_rate=0.4, **kwargs): + super(NosieImageNet, self).__init__(root, download=download, split=split, **kwargs) + random.seed(seed) + np.random.seed(seed) + self.new_idx = random.sample(list(range(0, 1000)), k=target_class_num) + print(len(self.new_idx), len(self.imgs)) + self.new_imgs = [] + self.new_targets = [] + + for file, cls_id in self.imgs: + if cls_id in self.new_idx: + new_idx = self.new_idx.index(cls_id) + self.new_imgs.append((file, new_idx)) + self.new_targets.append(new_idx) + self.imgs = self.new_imgs + self.targets = self.new_targets + print(min(self.targets), max(self.targets)) + # Noise + if split == 'train': + n_samples = len(self.targets) + n_noisy = int(nosiy_rate * n_samples) + print("%d Noisy samples" % (n_noisy)) + class_index = [np.where(np.array(self.targets) == i)[0] for i in range(target_class_num)] + class_noisy = int(n_noisy / target_class_num) + noisy_idx = [] + for d in range(target_class_num): + print(len(class_index[d]), d) + noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False) + noisy_idx.extend(noisy_class_index) + print("Class %d, number of noisy % d" % (d, len(noisy_class_index))) + for i in noisy_idx: + self.targets[i] = other_class(n_classes=target_class_num, current_class=self.targets[i]) + (file, old_idx) = self.imgs[i] + self.imgs[i] = (file, self.targets[i]) + print(len(noisy_idx)) + print("Print noisy label generation statistics:") + for i in range(target_class_num): + n_noisy = np.sum(np.array(self.targets) == i) + print("Noisy class %s, has %s samples." % (i, n_noisy)) + + self.samples = self.imgs + + +class ImageNetDatasetLoader: + def __init__(self, + batchSize=128, + eval_batch_size=256, + dataPath='data/', + seed=999, + target_class_num=200, + nosiy_rate=0.4, + numOfWorkers=4): + self.batchSize = batchSize + self.eval_batch_size = eval_batch_size + self.dataPath = dataPath + self.numOfWorkers = numOfWorkers + self.seed = seed + self.target_class_num = target_class_num + self.nosiy_rate = nosiy_rate + self.data_loaders = self.loadData() + + def getDataLoader(self): + return self.data_loaders + + def loadData(self): + IMAGENET_MEAN = [0.485, 0.456, 0.406] + IMAGENET_STD = [0.229, 0.224, 0.225] + + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.2), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + train_dataset = NosieImageNet(root=self.dataPath, + split='train', + nosiy_rate=self.nosiy_rate, + target_class_num=self.target_class_num, + seed=self.seed, + transform=train_transform, + download=True) + + test_dataset = NosieImageNet(root=self.dataPath, + split='val', + nosiy_rate=self.nosiy_rate, + target_class_num=self.target_class_num, + seed=self.seed, + transform=test_transform, + download=True) + + data_loaders = {} + + data_loaders['train_dataset'] = DataLoader(dataset=train_dataset, + batch_size=self.batchSize, + shuffle=True, + pin_memory=True, + num_workers=self.numOfWorkers) + + data_loaders['test_dataset'] = DataLoader(dataset=test_dataset, + batch_size=self.batchSize, + shuffle=False, + pin_memory=True, + num_workers=self.numOfWorkers) + return data_loaders + + +def online_mean_and_sd(loader): + """Compute the mean and sd in an online fashion + + Var[x] = E[X^2] - E^2[X] + """ + cnt = 0 + fst_moment = torch.empty(3) + snd_moment = torch.empty(3) + + for data, _ in tqdm(loader): + + b, c, h, w = data.shape + nb_pixels = b * h * w + sum_ = torch.sum(data, dim=[0, 2, 3]) + sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3]) + fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels) + snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels) + + cnt += nb_pixels + + return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2) + + +class Cutout(object): + def __init__(self, length): + self.length = length + + def __call__(self, img): + h, w = img.size(1), img.size(2) + mask = np.ones((h, w), np.float32) + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + mask[y1: y2, x1: x2] = 0. + mask = torch.from_numpy(mask) + mask = mask.expand_as(img) + img *= mask + return img diff --git a/datasets.py b/datasets.py deleted file mode 100644 index 5deb88a..0000000 --- a/datasets.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -import multiprocessing as mp -from subprocess import call -import warnings -import numpy as np -import scipy.io as sio -import numpy as np -import keras.backend as K -from keras.datasets import mnist, cifar10, cifar100 -from keras.utils import np_utils -from util import other_class - -# Set random seed -np.random.seed(123) - -NUM_CLASSES = {'mnist': 10, 'svhn': 10, 'cifar-10': 10, 'cifar-100': 100} - -def get_data(dataset='mnist', noise_ratio=0, random_shuffle=False): - """ - Get training images with specified ratio of label noise - :param dataset: - :param noise_ratio: 0 - 100 (%) - :param random_shuffle: - :return: - """ - if dataset == 'mnist': - (X_train, y_train), (X_test, y_test) = mnist.load_data() - - X_train = X_train.reshape(-1, 28, 28, 1) - X_test = X_test.reshape(-1, 28, 28, 1) - - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - elif dataset == 'svhn': - if not os.path.isfile("data/svhn_train.mat"): - print('Downloading SVHN train set...') - call( - "curl -o data/svhn_train.mat " - "http://ufldl.stanford.edu/housenumbers/train_32x32.mat", - shell=True - ) - if not os.path.isfile("data/svhn_test.mat"): - print('Downloading SVHN test set...') - call( - "curl -o data/svhn_test.mat " - "http://ufldl.stanford.edu/housenumbers/test_32x32.mat", - shell=True - ) - train = sio.loadmat('data/svhn_train.mat') - test = sio.loadmat('data/svhn_test.mat') - X_train = np.transpose(train['X'], axes=[3, 0, 1, 2]) - X_test = np.transpose(test['X'], axes=[3, 0, 1, 2]) - - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - means = X_train.mean(axis=0) - # std = np.std(X_train) - X_train = (X_train - means) # / std - X_test = (X_test - means) # / std - - # reshape (n_samples, 1) to (n_samples,) and change 1-index - # to 0-index - y_train = np.reshape(train['y'], (-1,)) - 1 - y_test = np.reshape(test['y'], (-1,)) - 1 - - elif dataset == 'cifar-10': - (X_train, y_train), (X_test, y_test) = cifar10.load_data() - - X_train = X_train.reshape(-1, 32, 32, 3) - X_test = X_test.reshape(-1, 32, 32, 3) - - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - means = X_train.mean(axis=0) - # std = np.std(X_train) - X_train = (X_train - means) # / std - X_test = (X_test - means) # / std - - # they are 2D originally in cifar - y_train = y_train.ravel() - y_test = y_test.ravel() - - elif dataset == 'cifar-100': - # num_classes = 100 - (X_train, y_train), (X_test, y_test) = cifar100.load_data() - - X_train = X_train.reshape(-1, 32, 32, 3) - X_test = X_test.reshape(-1, 32, 32, 3) - - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - means = X_train.mean(axis=0) - # std = np.std(X_train) - X_train = (X_train - means) # / std - X_test = (X_test - means) # / std - - # they are 2D originally in cifar - y_train = y_train.ravel() - y_test = y_test.ravel() - else: - return None, None, None, None - - - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - - # generate random noisy labels - if noise_ratio > 0: - data_file = "data/%s_train_labels_%s.npy" % (dataset, noise_ratio) - if os.path.isfile(data_file): - y_train = np.load(data_file) - else: - n_samples = y_train.shape[0] - n_noisy = int(noise_ratio*n_samples/100) - noisy_idx = np.random.choice(n_samples, n_noisy, replace=False) - for i in noisy_idx: - y_train[i] = other_class(n_classes=NUM_CLASSES[dataset], current_class=y_train[i]) - np.save(data_file, y_train) - - if random_shuffle: - # random shuffle - idx_perm = np.random.permutation(X_train.shape[0]) - X_train, y_train = X_train[idx_perm], y_train[idx_perm] - - # one-hot-encode the labels - y_train = np_utils.to_categorical(y_train, NUM_CLASSES[dataset]) - y_test = np_utils.to_categorical(y_test, NUM_CLASSES[dataset]) - - print("X_train:", X_train.shape) - print("y_train:", y_train.shape) - print("X_test:", X_test.shape) - print("y_test", y_test.shape) - - return X_train, y_train, X_test, y_test - - -def validatation_split(X, y, split=0.1): - """ - split data to train and validation set, based on the split ratios - :param X: - :param y: - :param split: - :return: - """ - idx_val = np.round(split * X.shape[0]).astype(int) - X_val, y_val = X[:idx_val], y[:idx_val] - X_train, y_train = X[idx_val:], y[idx_val:] - return X_train, y_train, X_val, y_val - - -if __name__ == "__main__": - X_train, Y_train, X_test, Y_test = get_data(dataset='mnist', noise_ratio=40) - Y_train = np.argmax(Y_train, axis=1) - (_, Y_clean_train), (_, Y_clean_test) = mnist.load_data() - clean_selected = np.argwhere(Y_train == Y_clean_train).reshape((-1,)) - noisy_selected = np.argwhere(Y_train != Y_clean_train).reshape((-1,)) - print("#correct labels: %s, #incorrect labels: %s" % (len(clean_selected), len(noisy_selected))) \ No newline at end of file diff --git a/evaluator.py b/evaluator.py new file mode 100644 index 0000000..180e6b4 --- /dev/null +++ b/evaluator.py @@ -0,0 +1,88 @@ +import time +import torch +import os +from util import log_display, accuracy, AverageMeter + +if torch.cuda.is_available(): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + torch.backends.cudnn.deterministic = True + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +class Evaluator(): + def __init__(self, data_loader, logger, config, name='Evaluator', metrics='classfication', summary_writer=None): + self.data_loader = data_loader + self.logger = logger + self.name = name + self.summary_writer = summary_writer + self.step = 0 + self.config = config + self.log_frequency = config.log_frequency + self.loss_meters = AverageMeter() + self.acc_meters = AverageMeter() + self.acc5_meters = AverageMeter() + self.report_metrics = self.classfication_metrics if metrics == 'classfication' else self.regression_metrics + return + + def log(self, epoch, GLOBAL_STEP): + display = log_display(epoch=epoch, + global_step=GLOBAL_STEP, + time_elapse=self.time_used, + **self.logger_payload) + self.logger.info(display) + + def eval(self, epoch, GLOBAL_STEP, model, criterion): + for i, (images, labels) in enumerate(self.data_loader): + self.eval_batch(x=images, y=labels, model=model, criterion=criterion) + self.log(epoch, GLOBAL_STEP) + return + + def eval_batch(self, x, y, model, criterion): + model.eval() + x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True) + start = time.time() + with torch.no_grad(): + pred, _ = model(x) + loss = criterion(pred, y) + end = time.time() + self.time_used = end - start + self.step += 1 + self.report_metrics(pred, y, loss) + return + + def classfication_metrics(self, x, y, loss): + acc, acc5 = accuracy(x, y, topk=(1, 5)) + self.loss_meters.update(loss.item(), y.shape[0]) + self.acc_meters.update(acc.item(), y.shape[0]) + self.acc5_meters.update(acc5.item(), y.shape[0]) + self.logger_payload = {"acc": acc, + "acc_avg": self.acc_meters.avg, + "top5_acc": acc5, + "top5_acc_avg": self.acc5_meters.avg, + "loss": loss, + "loss_avg": self.loss_meters.avg} + + if self.summary_writer is not None: + self.summary_writer.add_scalar(os.path.join(self.name, 'acc'), acc, self.step) + self.summary_writer.add_scalar(os.path.join(self.name, 'loss'), loss, self.step) + + def regression_metrics(self, x, y, loss): + diff = abs((x - y).mean().detach().item()) + self.loss_meters.update(loss.item(), y.shape[0]) + self.acc_meters.update(diff, y.shape[0]) + self.logger_payload = {"|diff|": diff, + "|diff_avg|": self.acc_meters.avg, + "loss": loss, + "loss_avg": self.loss_meters.avg} + + if self.summary_writer is not None: + self.summary_writer.add_scalar(os.path.join(self.name, 'diff'), diff, self.step) + self.summary_writer.add_scalar(os.path.join(self.name, 'loss'), loss, self.step) + + def _reset_stats(self): + self.loss_meters.reset() + self.acc_meters.reset() + self.acc5_meters.reset() diff --git a/lass.py b/lass.py new file mode 100644 index 0000000..4f68dd1 --- /dev/null +++ b/lass.py @@ -0,0 +1,68 @@ +import torch +import torch.nn.functional as F + +class lass(object): + def __init__(self, model, device, a=0.25/255., b=0.2/255., r=0.3/255., iter_max=100, clip_min=-1.0e8, clip_max=1.0e8): + # x and y_target are tensorflow placeholders, y_pred is the model output tensorflow tensor + # SEARCH PARAMETERS: a- gradient sign coefficient; b- noise coefficient; r- search radius per pixel; iter- max number of iters + self.a = a + self.b = b + self.r = r + self.model = model + self.device = device + self.iter_max = iter_max + self.clip_min = clip_min + self.clip_max = clip_max + + def find(self, X): + # elements of X in [0,1] for using default params a,b,r; otherwise scale accordingly + # generate max output label + X.requires_grad_(True) + pred, _ = self.model(X) + pred = F.softmax(pred, dim=1) + Y_pred_vec = torch.argmax(pred, dim=1) + Y_pred = F.one_hot(Y_pred_vec, pred.shape[1]).float() + + X_adv = 1.*X + adv_ind = torch.zeros(X.shape[0],dtype=torch.bool,device=self.device) + converged = False + converged_label_thres = 3 + adv_num_old = 0 + i = 0 + Y_pred_adv = pred + while i < self.iter_max and converged == False: + # I would recommend annealing the noise coefficient b gradually in this while loop + #print('on iter %s' % i) + i += 1 + #X_adv.requires_grad_(True) + loss = F.cross_entropy(Y_pred_adv, Y_pred_vec) + if i == 1: + grad = torch.autograd.grad(loss, X)[0] + else: + grad = torch.autograd.grad(loss, X_adv)[0] + X_adv = X_adv.detach() + + + step = self.a * torch.sign(grad) + self.b * torch.randn(*grad.shape, device=self.device) + X_adv += step + diff = X_adv - X + abs_diff = torch.abs(diff) + ind = abs_diff > self.r + X_adv[ind] = X[ind] + self.r * torch.sign(diff[ind]) + X_adv = torch.clamp(X_adv, self.clip_min , self.clip_max ) + + X_adv.requires_grad_(True) + Y_pred_adv, _ = self.model(X_adv) + Y_pred_adv = F.softmax(Y_pred_adv, dim=1) + Y_pred_adv_vec = torch.argmax(Y_pred_adv, dim=1) + # if we ever identify a sample as critical sample, record it + adv_ind = adv_ind | ~torch.eq(Y_pred_vec, Y_pred_adv_vec).to(self.device) + adv_num_new = torch.sum(adv_ind) + #print('number of adv samples: %s' % adv_num_new) + + if adv_num_new - adv_num_old < converged_label_thres: + converged = True + + adv_num_old = adv_num_new + + return X_adv, adv_ind \ No newline at end of file diff --git a/lass_tf.py b/lass_tf.py deleted file mode 100644 index f7d061c..0000000 --- a/lass_tf.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Code from Devansh Arpit -2017 - icml - A Closer Look at Memorization in Deep Networks -Adapted by Xingjun Ma to this tensorflow version. -""" - -import numpy as np -import keras.backend as K - -class lass(object): - def __init__(self, x, y_pred, y_target, a=0.25/255., b=0.2/255., r=0.3/255., iter_max=100, clip_min=-np.inf, clip_max=np.inf): - # x and y_target are tensorflow placeholders, y_pred is the model output tensorflow tensor - # SEARCH PARAMETERS: a- gradient sign coefficient; b- noise coefficient; r- search radius per pixel; iter- max number of iters - self.a = a - self.b = b - self.r = r - self.iter_max = iter_max - self.clip_min = clip_min - self.clip_max = clip_max - - loss = K.categorical_crossentropy(y_pred, y_target) - grads = K.gradients(K.mean(loss), x)[0] # this will return a list of tensors not one tensor - - self.grad_fn = K.function(inputs=[x, y_target] + [K.learning_phase()], - outputs=[grads]) - self.pred_fn = K.function(inputs=[x] + [K.learning_phase()], - outputs=[y_pred]) - - def find(self, X, bs=500): - # elements of X in [0,1] for using default params a,b,r; otherwise scale accordingly - # generate max output label - for batch in range(int(X.shape[0] / bs)): - pred_this = self.pred_fn([X[bs * batch: bs * (batch + 1)], 0])[0] - if not hasattr(self, 'Y_pred_exists'): - self.Y_pred_exists=True - Y_pred = np.zeros(shape=(X.shape[0], pred_this.shape[1]), dtype=np.float32) - Y_pred[bs * batch: bs * (batch + 1)] = (pred_this // np.max(pred_this, axis=1)[:, None]) - - Y_pred_vec = np.argmax(Y_pred, axis=1) - - X_adv = 1.*X - adv_ind = np.asarray(np.zeros((X.shape[0],)), dtype='bool') - converged = False - converged_label_thres = 20 - adv_num_old = 0 - i = 0 - while i < self.iter_max and converged == False: - # I would recommend annealing the noise coefficient b gradually in this while loop - # print('on iter %s' % i) - i += 1 - pred_adv = [] - for batch in range(int(X.shape[0] / bs)): - grad_this = self.grad_fn([X_adv[bs * batch: bs * (batch + 1)], Y_pred[bs * batch: bs * (batch + 1)], 0])[0] - - step = self.a * np.sign(grad_this) + self.b * np.random.randn(*grad_this.shape) - X_adv[bs * batch: bs * (batch + 1)] += step - diff = X_adv[bs * batch: bs * (batch + 1)] - X[bs * batch: bs * (batch + 1)] - abs_diff = np.abs(diff) - ind = abs_diff > self.r - X_adv[bs * batch: bs * (batch + 1)][ind] = X[bs * batch: bs * (batch + 1)][ind] + self.r * np.sign( - diff[ind]) - X_adv[bs * batch: bs * (batch + 1)] = np.clip(X_adv[bs * batch: bs * (batch + 1)], \ - self.clip_min , self.clip_max ) - - X_adv_this = X_adv[bs * batch: bs * (batch + 1)] - pred_this_adv = self.pred_fn([X_adv_this, 0])[0] - pred_this_adv = np.argmax(pred_this_adv, axis=1) - pred_adv.extend(list(pred_this_adv)) - - pred_adv = np.asarray(pred_adv) - - # if we ever identify a sample as critical sample, record it - adv_ind = adv_ind + (Y_pred_vec != pred_adv) - adv_num_new = np.sum(adv_ind) - # print('number of adv samples: %s' % adv_num_new) - - if adv_num_new - adv_num_old < converged_label_thres: - converged = True - - adv_num_old = adv_num_new - - return X_adv, adv_ind \ No newline at end of file diff --git a/lid.py b/lid.py new file mode 100644 index 0000000..38c8a2c --- /dev/null +++ b/lid.py @@ -0,0 +1,50 @@ +import torch +from scipy.spatial.distance import cdist + +def gmean(input_x, dim=0): + log_x = torch.log(input_x) + return torch.exp(torch.mean(log_x, dim=dim)) + +def get_lid_r(data, reference): + b = data.shape[0] + data = torch.flatten(data, start_dim=1) + reference = torch.flatten(reference, start_dim=1) + r = torch.cdist(data, reference, p=2) + a, idx = torch.sort(r, dim=1) + return r, a, idx + +def lid_mle(data, reference, k=20, get_idx=False, compute_mode='use_mm_for_euclid_dist_if_necessary'): + data = torch.flatten(data, start_dim=1) + reference = torch.flatten(reference, start_dim=1) + r = torch.cdist(reference, data, p=2, compute_mode=compute_mode) + a, idx = torch.sort(r, dim=1) + lids = -k / torch.sum(torch.log(a[:, 1:k+1] / a[:, k+1].view(-1,1)), dim=1) + if get_idx: + return idx, lids + return lids + +def lid_mom_est(data, reference, k, get_idx=False, compute_mode='use_mm_for_euclid_dist_if_necessary'): + b = data.shape[0] + k = min(k, b-2) + data = torch.flatten(data, start_dim=1) + reference = torch.flatten(reference, start_dim=1) + r = torch.cdist(data, reference, p=2, compute_mode=compute_mode) + a, idx = torch.sort(r, dim=1) + m = torch.mean(a[:, 1:k], dim=1) + lids = m / (a[:, k] - m) + if get_idx: + return idx, lids + return lids + +def lid_mom_est_eps(data, reference, k, get_idx=False): + b = data.shape[0] + k = min(k, b-2) + data = torch.flatten(data, start_dim=1) + reference = torch.flatten(reference, start_dim=1) + r = torch.cdist(data, reference, p=2) + a, idx = torch.sort(r, dim=1) + m = torch.mean(a[:, 1:k], dim=1) + lids = m / ((a[:, k] - m) + 1.e-4) + if get_idx: + return idx, lids + return lids \ No newline at end of file diff --git a/lid_plot.py b/lid_plot.py deleted file mode 100644 index 1229fdb..0000000 --- a/lid_plot.py +++ /dev/null @@ -1,177 +0,0 @@ -""" -Date: 28/07/2017 -LID exploration and visualization - -Author: Xingjun Ma -""" -import os -import numpy as np -import keras.backend as K -from keras.datasets import mnist, cifar10 -import matplotlib.pyplot as plt -from sklearn.decomposition import PCA -from keras.optimizers import SGD -from keras.utils import np_utils, to_categorical -from util import get_lids_random_batch, mle_batch -from datasets import get_data, validatation_split -from models import get_model -from loss import cross_entropy, boot_soft, boot_hard -from scipy.interpolate import spline, interp1d - -np.random.seed(1024) - -MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'lid_dataset'] -MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L'] -COLORS = ['r', 'y', 'c', 'm', 'g', 'b'] -MARKERS = ['x', 'D', '<', '>', '^', 'o'] - - -def lid_trend_through_training(model_name='ce', dataset='mnist', noise_type='sym', noise_ratio=0.): - """ - plot the lid trend for clean vs noisy samples through training. - This can provide some information about manifold learning dynamics through training. - """ - print('Dataset: %s, noise type: %s, noise ratio: %.1f' % (dataset, noise_type, noise_ratio)) - - lids, acc_train, acc_test = None, None, None - - # get LID of raw inputs - lid_subset = 128 - k = 20 - X_train, Y_train, X_test, Y_test = get_data(dataset) - rand_idxes = np.random.choice(X_train.shape[0], lid_subset * 10, replace=False) - X_train = X_train[rand_idxes] - X_train = X_train.reshape((X_train.shape[0], -1)) - - lid_tmp = [] - for i in range(10): - s = i * 128 - e = (i+1)*128 - lid_tmp.extend(mle_batch(X_train[s:e], X_train[s:e], k=k)) - lid_X = np.mean(lid_tmp) - print('LID of input X: ', lid_X) - - # load pre-saved to avoid recomputing - lid_saved = "log/lid_%s_%s_%s%s.npy" % (model_name, dataset, noise_type, noise_ratio) - acc_saved = "log/acc_%s_%s_%s%s.npy" % (model_name, dataset, noise_type, noise_ratio) - if os.path.isfile(lid_saved): - lids = np.load(lid_saved) - lids = np.insert(lids, 0, lid_X) - print(lids) - - if os.path.isfile(acc_saved): - data = np.load(acc_saved) - acc_train = data[0][:] - acc_test = data[1][:] - - acc_train = np.insert(acc_train, 0, 0.) - acc_test = np.insert(acc_test, 0, 0.) - - plot(model_name, dataset, noise_ratio, lids, acc_train, acc_test) - - -def plot(model_name, dataset, noise_ratio, lids, acc_train, acc_test): - """ - plot function - """ - # plot - fig = plt.figure() # figsize=(7, 6) - xnew = np.arange(0, len(lids), 1) - - lids = lids[xnew] - acc_train = acc_train[xnew] - acc_test = acc_test[xnew] - - ax = fig.add_subplot(111) - ax.plot(xnew, lids, c='r', marker='o', markersize=3, linewidth=2, label='LID score') - - ax2 = ax.twinx() - ax2.plot(xnew, acc_train, c='b', marker='x', markersize=3, linewidth=2, label='Train acc') - ax2.plot(xnew, acc_test, c='c', marker='^', markersize=3, linewidth=2, label='Test acc') - - # ax.set_xticks([]) - # ax.set_yticks([]) - ax.set_xlabel("Epoch", fontsize=15) - ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15) - ax2.set_ylabel("Train/test accuracy", fontsize=15) - # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) - - if dataset == 'mnist': - ax.set_ylim((4, 22)) # for mnist - ax2.set_ylim((0.2, 1.2)) - elif dataset == 'svhn': - ax.set_ylim((7, 20)) # for svhn - ax2.set_ylim((0.2, 1.2)) - elif dataset == 'cifar-10': - ax.set_ylim((2.5, 12.5)) # for cifar-10 - ax2.set_ylim((0.2, 1.2)) - elif dataset == 'cifar-100': - ax.set_ylim((3, 12)) # for cifar-100 - ax2.set_ylim((0., 1.)) - - legend = ax.legend(loc='upper left') - plt.setp(legend.get_texts(), fontsize=15) - legend2 = ax2.legend(loc='upper right') - plt.setp(legend2.get_texts(), fontsize=15) - fig.savefig("plots/lid_trend_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300) - plt.show() - - -def lid_trend_of_learning_models(model_list=['ce'], dataset='mnist', noise_ratio=0): - """ - The LID trend of different learning models throughout. - """ - # plot initialization - fig = plt.figure() # figsize=(7, 6) - ax = fig.add_subplot(111) - - # get LID of raw inputs - lid_subset = 128 - k = 20 - X_train, Y_train, X_test, Y_test = get_data(dataset) - rand_idxes = np.random.choice(X_train.shape[0], lid_subset * 10, replace=False) - X_train = X_train[rand_idxes] - X_train = X_train.reshape((X_train.shape[0], -1)) - - lid_tmp = [] - for i in range(10): - s = i * 128 - e = (i + 1) * 128 - lid_tmp.extend(mle_batch(X_train[s:e], X_train[s:e], k=k)) - lid_X = np.mean(lid_tmp) - print('LID of input X: ', lid_X) - - for model_name in model_list: - file_name = "log/lid_%s_%s_%s.npy" % (model_name, dataset, noise_ratio) - if os.path.isfile(file_name): - lids = np.load(file_name) - # insert lid of raw input X - lids = np.insert(lids, 0, lid_X) - print(lids) - - # Find indicies that you need to replace - inds = np.where(np.isnan(lids)) - lids[inds] = np.nanmean(lids) - # smooth for plot - lids[lids < 0] = 0 - lids[lids > 10] = 10 - - xnew = np.arange(0, len(lids), 1) - lids = lids[xnew] - - # plot line - idx = MODELS.index(model_name) - ax.plot(xnew, lids, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) - - ax.set_xlabel("Epoch", fontsize=15) - ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15) - # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) - legend = plt.legend(loc='lower center', ncol=2) - plt.setp(legend.get_texts(), fontsize=15) - fig.savefig("plots/lid_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300) - plt.show() - -if __name__ == "__main__": - lid_trend_through_training(model_name='ce', dataset='cifar-100', noise_type='sym', noise_ratio=0.) - # lid_trend_of_learning_models(model_list=['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'lid_dataset'], - # dataset='cifar-10', noise_ratio=60) diff --git a/loss.py b/loss.py index 60b5aa4..73e0e59 100644 --- a/loss.py +++ b/loss.py @@ -1,146 +1,526 @@ +import torch +import torch.nn.functional as F import numpy as np -from keras import backend as K -import tensorflow as tf +import mlconfig +mlconfig.register(torch.nn.CrossEntropyLoss) - -def symmetric_cross_entropy(alpha, beta): - """ - Symmetric Cross Entropy: - ICCV2019 "Symmetric Cross Entropy for Robust Learning with Noisy Labels" - https://arxiv.org/abs/1908.06112 - """ - def loss(y_true, y_pred): - y_true_1 = y_true - y_pred_1 = y_pred - - y_true_2 = y_true - y_pred_2 = y_pred - - y_pred_1 = tf.clip_by_value(y_pred_1, 1e-7, 1.0) - y_true_2 = tf.clip_by_value(y_true_2, 1e-4, 1.0) - - return alpha*tf.reduce_mean(-tf.reduce_sum(y_true_1 * tf.log(y_pred_1), axis = -1)) + beta*tf.reduce_mean(-tf.reduce_sum(y_pred_2 * tf.log(y_true_2), axis = -1)) - return loss - -def cross_entropy(y_true, y_pred): - return K.categorical_crossentropy(y_true, y_pred) - - -def boot_soft(y_true, y_pred): - """ - 2015 - iclrws - Training deep neural networks on noisy labels with bootstrapping. - https://arxiv.org/abs/1412.6596 - - :param y_true: - :param y_pred: - :return: - """ - beta = 0.95 - - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon()) - return -K.sum((beta * y_true + (1. - beta) * y_pred) * - K.log(y_pred), axis=-1) - - -def boot_hard(y_true, y_pred): - """ - 2015 - iclrws - Training deep neural networks on noisy labels with bootstrapping. - https://arxiv.org/abs/1412.6596 - - :param y_true: - :param y_pred: - :return: - """ - beta = 0.8 - - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon()) - pred_labels = K.one_hot(K.argmax(y_pred, 1), num_classes=K.shape(y_true)[1]) - return -K.sum((beta * y_true + (1. - beta) * pred_labels) * - K.log(y_pred), axis=-1) - - -def forward(P): - """ - Making Deep Neural Networks Robust to Label Noise: a Loss Correction Approach - CVPR17 https://arxiv.org/abs/1609.03683 - :param P: noise model, a noisy label transition probability matrix - :return: - """ - P = K.constant(P) - - def loss(y_true, y_pred): - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon()) - return -K.sum(y_true * K.log(K.dot(y_pred, P)), axis=-1) - - return loss - - -def backward(P): - """ - Making Deep Neural Networks Robust to Label Noise: a Loss Correction Approach - CVPR17 https://arxiv.org/abs/1609.03683 - :param P: noise model, a noisy label transition probability matrix - :return: - """ - P_inv = K.constant(np.linalg.inv(P)) - - def loss(y_true, y_pred): - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon()) - return -K.sum(K.dot(y_true, P_inv) * K.log(y_pred), axis=-1) - - return loss - - -def lid(logits, k=20): - """ - Calculate LID for each data point in the array. - - :param logits: - :param k: - :return: - """ - batch_size = tf.shape(logits)[0] - # n_samples = logits.get_shape().as_list() - # calculate pairwise distance - r = tf.reduce_sum(logits * logits, 1) - # turn r into column vector - r1 = tf.reshape(r, [-1, 1]) - D = r1 - 2 * tf.matmul(logits, tf.transpose(logits)) + tf.transpose(r1) + \ - tf.ones([batch_size, batch_size]) - - # find the k nearest neighbor - D1 = -tf.sqrt(D) - D2, _ = tf.nn.top_k(D1, k=k, sorted=True) - D3 = -D2[:, 1:] # skip the x-to-x distance 0 by using [,1:] - - m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1])) - v_log = tf.reduce_sum(tf.log(m + K.epsilon()), axis=1) # to avoid nan - lids = -k / v_log - - return lids - - -def lid_paced_loss(alpha=1.0, beta1=0.1, beta2=1.0): - """TO_DO - Class wise lid pace learning, targeting classwise asymetric label noise. - - Args: - alpha: lid based adjustment paramter: this needs real-time update. - Returns: - Loss tensor of type float. - """ - if alpha == 1.0: - return symmetric_cross_entropy(alpha=beta1, beta=beta2) +if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = True + if torch.cuda.device_count() > 1: + device = torch.device('cuda:0') else: - def loss(y_true, y_pred): - pred_labels = K.one_hot(K.argmax(y_pred, 1), num_classes=K.shape(y_true)[1]) - y_new = alpha * y_true + (1. - alpha) * pred_labels - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon()) - return -K.sum(y_new * K.log(y_pred), axis=-1) + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +@mlconfig.register +class SCELoss(torch.nn.Module): + def __init__(self, alpha, beta, num_classes=10): + super(SCELoss, self).__init__() + self.device = device + self.alpha = alpha + self.beta = beta + self.num_classes = num_classes + self.cross_entropy = torch.nn.CrossEntropyLoss() + + def forward(self, pred, labels): + # CCE + ce = self.cross_entropy(pred, labels) + + # RCE + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + + # Loss + loss = self.alpha * ce + self.beta * rce.mean() + return loss + +@mlconfig.register +class ReverseCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(ReverseCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * rce.mean() + + +@mlconfig.register +class NormalizedReverseCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedReverseCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + normalizor = 1 / 4 * (self.num_classes - 1) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * normalizor * rce.mean() + + +@mlconfig.register +class NormalizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.log_softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1)) + return self.scale * nce.mean() + + +@mlconfig.register +class GeneralizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, q=0.7): + super(GeneralizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.q = q + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + gce = (1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)) / self.q + return gce.mean() + + +@mlconfig.register +class NormalizedGeneralizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0, q=0.7): + super(NormalizedGeneralizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.q = q + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + numerators = 1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q) + denominators = self.num_classes - pred.pow(self.q).sum(dim=1) + ngce = numerators / denominators + return self.scale * ngce.mean() + + +@mlconfig.register +class MeanAbsoluteError(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(MeanAbsoluteError, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + return + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + mae = 1. - torch.sum(label_one_hot * pred, dim=1) + # Note: Reduced MAE + # Original: torch.abs(pred - label_one_hot).sum(dim=1) + # $MAE = \sum_{k=1}^{K} |\bm{p}(k|\bm{x}) - \bm{q}(k|\bm{x})|$ + # $MAE = \sum_{k=1}^{K}\bm{p}(k|\bm{x}) - p(y|\bm{x}) + (1 - p(y|\bm{x}))$ + # $MAE = 2 - 2p(y|\bm{x})$ + # + return self.scale * mae.mean() + + +@mlconfig.register +class NormalizedMeanAbsoluteError(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedMeanAbsoluteError, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + return + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + normalizor = 1 / (2 * (self.num_classes - 1)) + mae = 1. - torch.sum(label_one_hot * pred, dim=1) + return self.scale * normalizor * mae.mean() + + +@mlconfig.register +class NCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(NCEandRCE, self).__init__() + self.num_classes = num_classes + self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nce(pred, labels) + self.rce(pred, labels) + + +@mlconfig.register +class NCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(NCEandMAE, self).__init__() + self.num_classes = num_classes + self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nce(pred, labels) + self.mae(pred, labels) + + +@mlconfig.register +class GCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandMAE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.mae(pred, labels) + + +@mlconfig.register +class GCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandRCE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.rce(pred, labels) + + +@mlconfig.register +class GCEandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(GCEandNCE, self).__init__() + self.num_classes = num_classes + self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q) + self.nce = NormalizedCrossEntropy(num_classes=num_classes) + + def forward(self, pred, labels): + return self.gce(pred, labels) + self.nce(pred, labels) + + +@mlconfig.register +class NGCEandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandNCE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.nce(pred, labels) + + +@mlconfig.register +class NGCEandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandMAE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.mae(pred, labels) + + +@mlconfig.register +class NGCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, q=0.7): + super(NGCEandRCE, self).__init__() + self.num_classes = num_classes + self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.ngce(pred, labels) + self.rce(pred, labels) + + +@mlconfig.register +class MAEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(MAEandRCE, self).__init__() + self.num_classes = num_classes + self.mae = MeanAbsoluteError(scale=alpha, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.mae(pred, labels) + self.rce(pred, labels) + + +@mlconfig.register +class NLNL(torch.nn.Module): + def __init__(self, train_loader, num_classes, ln_neg=1): + super(NLNL, self).__init__() + self.device = device + self.num_classes = num_classes + self.ln_neg = ln_neg + weight = torch.FloatTensor(num_classes).zero_() + 1. + if not hasattr(train_loader.dataset, 'targets'): + weight = [1] * num_classes + weight = torch.FloatTensor(weight) + else: + for i in range(num_classes): + weight[i] = (torch.from_numpy(np.array(train_loader.dataset.targets)) == i).sum() + weight = 1 / (weight / weight.max()) + self.weight = weight.to(self.device) + self.criterion = torch.nn.CrossEntropyLoss(weight=self.weight) + self.criterion_nll = torch.nn.NLLLoss() + + def forward(self, pred, labels): + labels_neg = (labels.unsqueeze(-1).repeat(1, self.ln_neg) + + torch.LongTensor(len(labels), self.ln_neg).to(self.device).random_(1, self.num_classes)) % self.num_classes + labels_neg = torch.autograd.Variable(labels_neg) + + assert labels_neg.max() <= self.num_classes-1 + assert labels_neg.min() >= 0 + assert (labels_neg != labels.unsqueeze(-1).repeat(1, self.ln_neg)).sum() == len(labels)*self.ln_neg + + s_neg = torch.log(torch.clamp(1. - F.softmax(pred, 1), min=1e-5, max=1.)) + s_neg *= self.weight[labels].unsqueeze(-1).expand(s_neg.size()).to(self.device) + labels = labels * 0 - 100 + loss = self.criterion(pred, labels) * float((labels >= 0).sum()) + loss_neg = self.criterion_nll(s_neg.repeat(self.ln_neg, 1), labels_neg.t().contiguous().view(-1)) * float((labels_neg >= 0).sum()) + loss = ((loss+loss_neg) / (float((labels >= 0).sum())+float((labels_neg[:, 0] >= 0).sum()))) return loss + + +@mlconfig.register +class FocalLoss(torch.nn.Module): + ''' + https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py + ''' + + def __init__(self, gamma=0, alpha=None, size_average=True): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + if isinstance(alpha, (float, int)): + self.alpha = torch.Tensor([alpha, 1-alpha]) + if isinstance(alpha, list): + self.alpha = torch.Tensor(alpha) + self.size_average = size_average + + def forward(self, input, target): + if input.dim() > 2: + input = input.view(input.size(0), input.size(1), -1) # N,C,H,W => N,C,H*W + input = input.transpose(1, 2) # N,C,H*W => N,H*W,C + input = input.contiguous().view(-1, input.size(2)) # N,H*W,C => N*H*W,C + target = target.view(-1, 1) + + logpt = F.log_softmax(input, dim=1) + logpt = logpt.gather(1, target) + logpt = logpt.view(-1) + pt = torch.autograd.Variable(logpt.data.exp()) + + if self.alpha is not None: + if self.alpha.type() != input.data.type(): + self.alpha = self.alpha.type_as(input.data) + at = self.alpha.gather(0, target.data.view(-1)) + logpt = logpt * torch.autograd.Variable(at) + + loss = -1 * (1-pt)**self.gamma * logpt + if self.size_average: + return loss.mean() + else: + return loss.sum() + + +@mlconfig.register +class NormalizedFocalLoss(torch.nn.Module): + def __init__(self, scale=1.0, gamma=0, num_classes=10, alpha=None, size_average=True): + super(NormalizedFocalLoss, self).__init__() + self.gamma = gamma + self.size_average = size_average + self.num_classes = num_classes + self.scale = scale + + def forward(self, input, target): + target = target.view(-1, 1) + logpt = F.log_softmax(input, dim=1) + normalizor = torch.sum(-1 * (1 - logpt.data.exp()) ** self.gamma * logpt, dim=1) + logpt = logpt.gather(1, target) + logpt = logpt.view(-1) + pt = torch.autograd.Variable(logpt.data.exp()) + loss = -1 * (1-pt)**self.gamma * logpt + loss = self.scale * loss / normalizor + + if self.size_average: + return loss.mean() + else: + return loss.sum() + + +@mlconfig.register +class NFLandNCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandNCE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.nce(pred, labels) + + +@mlconfig.register +class NFLandMAE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandMAE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.mae(pred, labels) + + +@mlconfig.register +class NFLandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes, gamma=0.5): + super(NFLandRCE, self).__init__() + self.num_classes = num_classes + self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nfl(pred, labels) + self.rce(pred, labels) + + +@mlconfig.register +class DMILoss(torch.nn.Module): + def __init__(self, num_classes): + super(DMILoss, self).__init__() + self.num_classes = num_classes + + def forward(self, output, target): + outputs = F.softmax(output, dim=1) + targets = target.reshape(target.size(0), 1).cpu() + y_onehot = torch.FloatTensor(target.size(0), self.num_classes).zero_() + y_onehot.scatter_(1, targets, 1) + y_onehot = y_onehot.transpose(0, 1).cuda() + mat = y_onehot @ outputs + return -1.0 * torch.log(torch.abs(torch.det(mat.float())) + 0.001) + +@mlconfig.register +class BootSoftLoss(torch.nn.Module): + def __init__(self, num_classes, beta=0.95): + super(BootSoftLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.beta = beta + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + bsl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred) * torch.log(pred), dim=1) + return bsl.mean() + +@mlconfig.register +class BootHardLoss(torch.nn.Module): + def __init__(self, num_classes, beta=0.8): + super(BootSoftLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.beta = beta + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + pred_one_hot = F.one_hot(torch.argmax(pred, dim=1),self.num_classes) + bhl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred_one_hot) * torch.log(pred), dim=1) + return bhl.mean() + +@mlconfig.register +class ForwardLoss(torch.nn.Module): + def __init__(self, num_classes, noise_rate): + super(ForwardLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.noise_rate = noise_rate + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.) + P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes)) + P.diagonal().fill_(1-self.noise_rate) + P = P.to(self.device) + loss=-torch.sum(label_one_hot * torch.log(torch.matmul(pred, P)), dim=-1) + return loss.mean() + + +@mlconfig.register +class BackwardLoss(torch.nn.Module): + def __init__(self, num_classes, noise_rate): + super(BackwardLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.noise_rate = noise_rate + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.) + P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes)) + P.diagonal().fill_(1-self.noise_rate) + P = P.to(self.device) + P_inv = torch.inverse(P) + loss=-torch.sum((torch.matmul(label_one_hot, P_inv)) * torch.log(pred), dim=-1) + return loss.mean() + +@mlconfig.register +class LIDPacedLoss(torch.nn.Module): + def __init__(self, num_classes, alpha, beta1, beta2): + super(LIDPacedLoss, self).__init__() + self.device = device + self.num_classes = num_classes + self.alpha = alpha + self.beta1 = beta1 + self.beta2 = beta2 + self.sce = SCELoss(alpha=beta1, beta=beta2, num_classes=num_classes) + + def forward(self, pred, labels): + if self.alpha == 1.0: + return self.sce(pred, labels) + else: + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + pred_labels = F.one_hot(torch.argmax(pred, dim=1), num_classes=label_one_hot.size(1)) + y_new = self.alpha * label_one_hot + (1. - self.alpha) * pred_labels + loss = -torch.sum(y_new * torch.log(pred), dim=-1) + return loss.mean() \ No newline at end of file diff --git a/loss_acc_plot.py b/loss_acc_plot.py deleted file mode 100644 index 58259d3..0000000 --- a/loss_acc_plot.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Train test error/accuracy/loss plot. - -Author: Xingjun Ma -""" -import os -import numpy as np -import tensorflow as tf -import keras.backend as K -from keras.datasets import mnist, cifar10 -from keras.optimizers import SGD -from keras.utils import to_categorical -import matplotlib.pyplot as plt -from sklearn.decomposition import PCA -from util import get_lids_random_batch -from datasets import get_data, validatation_split -from models import get_model -from loss import cross_entropy, boot_soft, boot_hard -from lass_tf import lass - -np.random.seed(1024) - -# MODELS = ['ce', 'd2l', 'backward', 'boot_soft', 'boot_hard', 'forward'] - -MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'd2l'] -MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L'] -COLORS = ['r', 'y', 'c', 'm', 'g', 'b'] -MARKERS = ['x', 'D', '<', '>', '^', 'o'] - -def test_acc(model_list, dataset='mnist', noise_ratio=0.): - """ - Test acc throughout training. - """ - print('Dataset: %s, noise ratio: %s%%' % (dataset, noise_ratio)) - - # plot initialization - fig = plt.figure() # figsize=(7, 6) - ax = fig.add_subplot(111) - - for model_name in model_list: - file_name = 'log/acc_%s_%s_%s.npy' % \ - (model_name, dataset, noise_ratio) - if os.path.isfile(file_name): - accs = np.load(file_name) - train_accs = accs[0] - test_accs = accs[1] - # print(test_accs) - - # plot line - idx = MODELS.index(model_name) - - xnew = np.arange(0, len(test_accs), 1) - test_accs = test_accs[xnew] - ax.plot(xnew, test_accs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) - - # ax.set_xticks([]) - # ax.set_yticks([]) - ax.set_xlabel("Epoch", fontsize=15) - ax.set_ylabel("Test accuracy", fontsize=15) - # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) - legend = plt.legend(loc='lower right', ncol=2) - plt.setp(legend.get_texts(), fontsize=15) - fig.savefig("plots/test_acc_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300) - plt.show() - - -def test_acc_last_epoch(model_list, dataset='mnist', num_classes=10, noise_ratio=10, epochs=50): - """ - Test acc throughout training. - """ - print('Dataset: %s, epochs: %s, noise ratio: %s%%' % (dataset, epochs, noise_ratio)) - - # load data - _, _, X_test, Y_test = get_data(dataset) - # convert class vectors to binary class matrices - Y_test = to_categorical(Y_test, num_classes) - - # load model - image_shape = X_test.shape[1:] - model = get_model(dataset, input_tensor=None, input_shape=image_shape) - sgd = SGD(lr=0.01, momentum=0.9) - - for model_name in model_list: - # the critical sample ratio of the representations learned at every epoch - model_path = 'model/%s_%s_%s.hdf5' % (model_name, dataset, noise_ratio) - model.load_weights(model_path) - model.compile( - loss=cross_entropy, - optimizer=sgd, - metrics=['accuracy'] - ) - - _, test_acc = model.evaluate(X_test, Y_test, batch_size=128, verbose=0) - print('model: %s, epoch: %s, test_acc: %s' % (model_name, epochs-1, test_acc)) - -def print_loss_acc_log(model_list, dataset='mnist', noise_ratio=0.1): - """ - Test acc throughout training. - - :param model_list: - :param dataset: - :param noise_ratio: - :return: - """ - print('Dataset: %s, noise ratio: %s' % (dataset, noise_ratio)) - - for model_name in model_list: - loss_file = 'log/loss_%s_%s_%s.npy' % \ - (model_name, dataset, noise_ratio) - acc_file = 'log/acc_%s_%s_%s.npy' % \ - (model_name, dataset, noise_ratio) - if os.path.isfile(loss_file): - losses = np.load(loss_file) - # print(losses) - val_loss = losses[1, -5:] - print('--------- val loss ---------') - print(val_loss) - if os.path.isfile(acc_file): - accs = np.load(acc_file) - print('ecpos: ', len(accs[1])) - val_acc = accs[1, -5:] - print('--------- val acc ---------') - print(val_acc) - -if __name__ == "__main__": - # mnist: epoch=50, cifar-10: epoch=120 - # test_acc(model_list=['ce'], dataset='cifar-10', noise_ratio=40) - - # test_acc_last_epoch(model_list=['ce'], - # dataset='cifar-10', num_classes=10, noise_ratio=40, epochs=120) - print_loss_acc_log(model_list=['boot_hard'], dataset='cifar-100', noise_ratio=0) diff --git a/main.py b/main.py new file mode 100644 index 0000000..885aca1 --- /dev/null +++ b/main.py @@ -0,0 +1,206 @@ +import torch +import argparse +import util +import os +import datetime +import random +import mlconfig +import loss +import models +import dataset +import shutil +from evaluator import Evaluator +from trainer import Trainer +from util import get_lids_random_batch,get_csr_random_batch +from callback_util import D2LCallback +import numpy as np +from plot import lid_trend_through_training, lid_trend_of_learning_models, test_acc_trend_of_learning_models, csr_trend_of_learning_models + +# ArgParse +parser = argparse.ArgumentParser(description='Normalized Loss Functions for Deep Learning with Noisy Labels') +# Training +parser.add_argument('--resume', action='store_true', default=False) +parser.add_argument('--seed', type=int, default=0) +parser.add_argument('--config_path', type=str, default='configs') +parser.add_argument('--version', type=str, default='ce') +parser.add_argument('--exp_name', type=str, default="run1") +parser.add_argument('--load_model', action='store_true', default=False) +parser.add_argument('--data_parallel', action='store_true', default=False) +parser.add_argument('--asym', action='store_true', default=False) +parser.add_argument('--noise_rate', type=float, default=0.0) +parser.add_argument('--plot', action='store_true', default=False) +parser.add_argument('--plotall', action='store_true', default=False) +args = parser.parse_args() + +# Set up +if args.exp_name == '' or args.exp_name is None: + args.exp_name = 'exp_' + datetime.datetime.now() +exp_path = os.path.join(args.exp_name, args.version) +log_file_path = os.path.join(exp_path, args.version) +checkpoint_path = os.path.join(exp_path, 'checkpoints') +checkpoint_path_file = os.path.join(checkpoint_path, args.version) +util.build_dirs(exp_path) +util.build_dirs(checkpoint_path) + +logger = util.setup_logger(name=args.version, log_file=log_file_path + ".log") +for arg in vars(args): + logger.info("%s: %s" % (arg, getattr(args, arg))) + +random.seed(args.seed) +if torch.cuda.is_available(): + torch.cuda.manual_seed(args.seed) + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + device = torch.device('cuda') + logger.info("Using CUDA!") + device_list = [torch.cuda.get_device_name(i) for i in range(0, torch.cuda.device_count())] + logger.info("GPU List: %s" % (device_list)) +else: + device = torch.device('cpu') + +logger.info("PyTorch Version: %s" % (torch.__version__)) +config_file = os.path.join(args.config_path, args.version) + '.yaml' +config = mlconfig.load(config_file) +if args.version == 'fl' or args.version == 'bl': + config['criterion']['noise_rate']=args.noise_rate +if args.version != 'd2l': + config.set_immutable() +shutil.copyfile(config_file, os.path.join(exp_path, args.version+'.yaml')) +for key in config: + logger.info("%s: %s" % (key, config[key])) + + +def train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, callback, mode): + for epoch in range(starting_epoch, config.epochs): + if args.version == 'd2l': + if mode == 'stage2': + config['criterion']['alpha'] = callback.alpha + criterion=config.criterion() + + logger.info("="*20 + "Training" + "="*20) + + # Train + ENV['global_step'] = trainer.train(epoch, ENV['global_step'], model, optimizer, criterion) + scheduler.step() + + if args.version == 'd2l': + callback.on_epoch_begin(epoch) + if mode == 'stage1': + if callback.is_found_turning_point == True: + break + + # Eval + logger.info("="*20 + "Eval" + "="*20) + evaluator.eval(epoch, ENV['global_step'], model, torch.nn.CrossEntropyLoss()) + payload = ('Eval Loss:%.4f\tEval acc: %.2f' % (evaluator.loss_meters.avg, evaluator.acc_meters.avg*100)) + logger.info(payload) + # LID + lids = get_lids_random_batch(model, data_loader, device, k=20, batch_size=128) + lid = lids.mean() + logger.info('LID:%f'%(lid)) + # CSR + csr = get_csr_random_batch(model, data_loader, device) + logger.info('CSR:%f'%(csr)) + + ENV['train_history'].append(trainer.acc_meters.avg*100) + ENV['eval_history'].append(evaluator.acc_meters.avg*100) + ENV['curren_acc'] = evaluator.acc_meters.avg*100 + ENV['best_acc'] = max(ENV['curren_acc'], ENV['best_acc']) + ENV['lid'].append(lid) + ENV['csr'].append(csr) + + + # Reset Stats + trainer._reset_stats() + evaluator._reset_stats() + + # Save Model + target_model = model.module if args.data_parallel else model + util.save_model(ENV=ENV, + epoch=epoch, + model=target_model, + optimizer=optimizer, + scheduler=scheduler, + filename=checkpoint_path_file) + logger.info('Model Saved at %s', checkpoint_path_file) + torch.cuda.empty_cache() + return + + +def main(): + if config.dataset.name == 'DatasetGenerator': + data_loader = config.dataset(seed=args.seed, noise_rate=args.noise_rate, asym=args.asym) + else: + data_loader = config.dataset() + + model = config.model() + if isinstance(data_loader, dataset.Clothing1MDatasetLoader): + model.fc = torch.nn.Linear(2048, 14) + model = model.to(device) + + data_loader = data_loader.getDataLoader() + logger.info("param size = %fMB", util.count_parameters_in_MB(model)) + if args.data_parallel: + model = torch.nn.DataParallel(model) + #data_train = data_loader['train_dataset'].dataset + #tensor_list = [] + #for j in range(len(data_train)): + #tensor_list.append(data_train[j][0]) + + optimizer = config.optimizer(model.parameters()) + scheduler = config.scheduler(optimizer) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 40, gamma = 0.1) + if config.criterion.name == 'NLNL': + criterion = config.criterion(train_loader=data_loader['train_dataset']) + else: + criterion = config.criterion() + trainer = Trainer(data_loader['train_dataset'], logger, config) + evaluator = Evaluator(data_loader['test_dataset'], logger, config) + + starting_epoch = 0 + ENV = {'global_step': 0, + 'best_acc': 0.0, + 'current_acc': 0.0, + 'train_history': [], + 'eval_history': [], + 'lid':[], + 'csr':[]} + + if args.load_model: + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + starting_epoch = checkpoint['epoch'] + ENV = checkpoint['ENV'] + trainer.global_step = ENV['global_step'] + logger.info("File %s loaded!" % (checkpoint_path_file)) + + idx = -5 if args.asym else -4 + if args.plot: + lid_trend_through_training(exp_name=args.exp_name, dataset=args.config_path[8:idx], data_loader=data_loader, device=device, model=model, optimizer=optimizer, scheduler=scheduler, model_name=args.version, noise_type='sym', noise_ratio=args.noise_rate) + elif args.plotall: + lid_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate) + test_acc_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate) + csr_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate) + else: + d2l_callback = D2LCallback(model, data_loader, device) + train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, d2l_callback, mode='stage1') + if args.version == 'd2l': + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + starting_epoch = checkpoint['epoch'] + ENV = checkpoint['ENV'] + trainer.global_step = ENV['global_step'] + logger.info("File %s loaded!" % (checkpoint_path_file)) + + train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, d2l_callback, mode='stage2') + + + return + + +if __name__ == '__main__': + main() diff --git a/models.py b/models.py index ec6a834..be3e50e 100644 --- a/models.py +++ b/models.py @@ -1,127 +1,268 @@ -import numpy as np -import keras.backend as K -from keras.models import Model -from keras.regularizers import l2 -from keras.layers import Input, Conv2D, Dense, MaxPooling2D, Flatten, Activation, BatchNormalization -from resnet import cifar100_resnet - -def get_model(dataset='mnist', input_tensor=None, input_shape=None, num_classes=10): - """ - Takes in a parameter indicating which model type to use ('mnist', - 'cifar-10' or 'cifar-100') and returns the appropriate Keras model. - :param dataset: A string indicating which dataset we are building - a model for. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple - :return: The model; a Keras 'Model' instance. - """ - assert dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100'], \ - "dataset parameter must be either 'mnist', 'svhn', 'cifar-10' or 'cifar-100'" - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_shape): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if dataset == 'mnist': - # ## LeNet-5 like 4-layer CNN - x = Conv2D(32, (3, 3), padding='same', kernel_initializer="he_normal", name='conv1')(img_input) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) - - x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='conv2')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x) - - x = Flatten()(x) - - x = Dense(128, kernel_initializer="he_normal", name='fc1')(x) - x = BatchNormalization()(x) - x = Activation('relu', name='lid')(x) - # x = Dropout(0.2)(x) - - x = Dense(num_classes, kernel_initializer="he_normal")(x) - x = Activation('softmax')(x) - - model = Model(img_input, x) - - elif dataset == 'svhn': - # ## LeNet-5 like 5-layer CNN - x = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv1')(img_input) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) - - x = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv2')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x) - - x = Flatten()(x) - - x = Dense(512, kernel_initializer='he_normal', name='fc1')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - - x = Dense(128, kernel_initializer="he_normal", name='fc2')(x) - x = BatchNormalization()(x) - x = Activation('relu', name='lid')(x) - # x = Dropout(0.2)(x) - - x = Dense(num_classes, kernel_initializer="he_normal")(x) - x = Activation('softmax')(x) - - model = Model(img_input, x) - - elif dataset == 'cifar-10': - # VGG-like 8-layer CNN - # Block 1 - x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='block1_conv1')(img_input) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='block1_conv2')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), padding='same', kernel_initializer="he_normal", name='block2_conv1')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Conv2D(128, (3, 3), padding='same', kernel_initializer="he_normal", name='block2_conv2')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(196, (3, 3), padding='same', kernel_initializer="he_normal", name='block3_conv1')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Conv2D(196, (3, 3), padding='same', kernel_initializer="he_normal", name='block3_conv2')(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - x = Flatten(name='flatten')(x) - - x = Dense(256, kernel_initializer="he_normal", kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), name='fc1')(x) - x = BatchNormalization()(x) - x = Activation('relu', name='lid')(x) - - x = Dense(num_classes, kernel_initializer="he_normal")(x) - x = Activation('softmax')(x) - - # Create model. - model = Model(img_input, x) - - elif dataset == 'cifar-100': - # resnet - model = cifar100_resnet(depth=7, num_classes=num_classes) - - return model +import torch +import torch.nn as nn +import torch.nn.functional as F +import mlconfig +import torchvision +mlconfig.register(torchvision.models.resnet50) +mlconfig.register(torch.optim.SGD) +mlconfig.register(torch.optim.Adam) +mlconfig.register(torch.optim.lr_scheduler.MultiStepLR) +mlconfig.register(torch.optim.lr_scheduler.CosineAnnealingLR) +mlconfig.register(torch.optim.lr_scheduler.StepLR) +mlconfig.register(torch.optim.lr_scheduler.ExponentialLR) + + +class ConvBrunch(nn.Module): + def __init__(self, in_planes, out_planes, kernel_size=3): + super(ConvBrunch, self).__init__() + padding = (kernel_size - 1) // 2 + self.out_conv = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding), + nn.BatchNorm2d(out_planes), + nn.ReLU()) + + def forward(self, x): + return self.out_conv(x) + + +@mlconfig.register +class ToyModel(nn.Module): + def __init__(self, type='CIFAR10'): + super(ToyModel, self).__init__() + self.type = type + """ + if type == 'CIFAR10': + self.block1 = nn.Sequential( + ConvBrunch(3, 64, 3), + ConvBrunch(64, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block2 = nn.Sequential( + ConvBrunch(64, 128, 3), + ConvBrunch(128, 128, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block3 = nn.Sequential( + ConvBrunch(128, 196, 3), + ConvBrunch(196, 196, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Sequential( + nn.Linear(4*4*196, 256), + nn.BatchNorm1d(256), + nn.ReLU()) + self.fc2 = nn.Linear(256, 10) + self.fc_size = 4*4*196 + """ + if type == 'CIFAR10': + self.block1 = nn.Sequential( + ConvBrunch(3, 32, 3), + ConvBrunch(32, 32, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block2 = nn.Sequential( + ConvBrunch(32, 64, 3), + ConvBrunch(64, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block3 = nn.Sequential( + ConvBrunch(64, 128, 3), + ConvBrunch(128, 128, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Sequential( + nn.Flatten(), + nn.Dropout(0.5), + nn.Linear(4*4*128, 1024), + nn.ReLU(), + nn.BatchNorm1d(1024), + nn.Dropout(0.5), + nn.Linear(1024,512), + nn.ReLU(), + nn.BatchNorm1d(512) + ) + self.fc2 = nn.Sequential( + nn.Dropout(0.5), + nn.Linear(512,10) + ) + self.fc_size = 4*4*128 + + + elif type == 'MNIST': + self.block1 = nn.Sequential( + ConvBrunch(1, 64, 3), + ) + self.block2 = nn.Sequential( + ConvBrunch(64, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Dropout(0.5)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Sequential( + nn.Flatten(), + nn.Linear(64*14*14, 128), + nn.BatchNorm1d(128), + nn.ReLU(), + nn.Dropout(0.5)) + self.fc2 = nn.Linear(128, 10) + self.fc_size = 64*14*14 + + """ + elif type == 'MNIST': + self.block1 = nn.Sequential( + ConvBrunch(1, 32, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + self.block2 = nn.Sequential( + ConvBrunch(32, 64, 3), + nn.MaxPool2d(kernel_size=2, stride=2)) + # self.global_avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Sequential( + nn.Linear(64*7*7, 128), + nn.BatchNorm1d(128), + nn.ReLU()) + self.fc2 = nn.Linear(128, 10) + self.fc_size = 64*7*7 + """ + self._reset_prams() + + def _reset_prams(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + return + + def forward(self, x): + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) if self.type == 'CIFAR10' else x + # x = self.global_avg_pool(x) + # x = x.view(x.shape[0], -1) + x = x.view(-1, self.fc_size) + x_fc1 = self.fc1(x) + x = self.fc2(x_fc1) + return x, x_fc1 + + +'''ResNet in PyTorch. +For Pre-activation ResNet, see 'preact_resnet.py'. +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +''' + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + self._reset_prams() + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + def _reset_prams(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + return + + +@mlconfig.register +def ResNet18(num_classes=10): + return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes) + + +@mlconfig.register +def ResNet34(num_classes=10): + return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes) + + +@mlconfig.register +def ResNet50(num_classes=10): + return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes) + + +@mlconfig.register +def ResNet101(num_classes=10): + return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes) + + +@mlconfig.register +def ResNet152(num_classes=10): + return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes) diff --git a/plot.py b/plot.py new file mode 100644 index 0000000..eafd327 --- /dev/null +++ b/plot.py @@ -0,0 +1,214 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +from lid import lid_mle +import torch +import util + +np.random.seed(1024) + +MODELS = ['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'] +MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L'] +COLORS = ['r', 'y', 'c', 'm', 'g', 'b'] +MARKERS = ['x', 'D', '<', '>', '^', 'o'] + + +def lid_trend_through_training(exp_name, dataset, data_loader, device, model, optimizer, scheduler, model_name='d2l', noise_type='sym', noise_ratio=0.): + """ + plot the lid trend for clean vs noisy samples through training. + This can provide some information about manifold learning dynamics through training. + """ + + lids, train_accs, test_accs = None, None, None + + # get LID of raw inputs + k = 20 + lids = [] + for j, (images,labels) in enumerate(data_loader['train_dataset']): + images = images.to(device, non_blocking = True) + lids.extend(lid_mle(images, images, k=k)) + + lids = torch.stack(lids, dim=0).type(torch.float32) + lid_X = lids.mean() + print('LID of input X: ', lid_X) + + exp_path = os.path.join(exp_name, model_name) + checkpoint_path = os.path.join(exp_path, 'checkpoints') + checkpoint_path_file = os.path.join(checkpoint_path, model_name) + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + ENV = checkpoint['ENV'] + train_accs = ENV['train_history'] + train_accs.insert(0,0) + test_accs = ENV['eval_history'] + test_accs.insert(0,0) + lids = ENV['lid'] + lids.insert(0,lid_X) + lids = torch.stack(lids, dim=0).type(torch.float32) + + plot(dataset, model_name, noise_ratio, lids, train_accs, test_accs) + + +def plot(dataset, model_name, noise_ratio, lids, train_accs, test_accs): + """ + plot function + """ + # plot + fig = plt.figure() # figsize=(7, 6) + xnew = np.arange(0, len(lids), 5) + + lids = lids.cpu().numpy() + train_accs = np.array(train_accs) / 100 + test_accs = np.array(test_accs) / 100 + print(train_accs) + lids = lids[xnew] + train_accs = train_accs[xnew] + test_accs = test_accs[xnew] + + ax = fig.add_subplot(111) + ax.plot(xnew, lids, c='r', marker='o', markersize=3, linewidth=2, label='LID score') + + ax2 = ax.twinx() + ax2.plot(xnew, train_accs, c='b', marker='x', markersize=3, linewidth=2, label='Train acc') + ax2.plot(xnew, test_accs, c='c', marker='^', markersize=3, linewidth=2, label='Test acc') + + # ax.set_xticks([]) + # ax.set_yticks([]) + ax.set_xlabel("Epoch", fontsize=15) + ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15) + ax2.set_ylabel("Train/test accuracy", fontsize=15) + # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) + + if dataset == 'mnist': + ax.set_ylim((4, 22)) # for mnist + ax2.set_ylim((0.2, 1.2)) + elif dataset == 'svhn': + ax.set_ylim((7, 20)) # for svhn + ax2.set_ylim((0.2, 1.2)) + elif dataset == 'cifar10': + ax.set_ylim((2.5, 12.5)) # for cifar-10 + #ax.set_ylim((3.5, 20.5)) + ax2.set_ylim((0., 1.2)) + elif dataset == 'cifar100': + ax.set_ylim((3, 12)) # for cifar-100 + ax2.set_ylim((0., 1.)) + + legend = ax.legend(loc='upper left') + plt.setp(legend.get_texts(), fontsize=15) + legend2 = ax2.legend(loc='upper right') + plt.setp(legend2.get_texts(), fontsize=15) + fig.savefig("plots/lid_trend_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300) + plt.show() + + +def lid_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0): + """ + The LID trend of different learning models throughout. + """ + # plot initialization + fig = plt.figure() # figsize=(7, 6) + ax = fig.add_subplot(111) + + for model_name in model_list: + exp_path = os.path.join(exp_name, model_name) + checkpoint_path = os.path.join(exp_path, 'checkpoints') + checkpoint_path_file = os.path.join(checkpoint_path, model_name) + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + ENV = checkpoint['ENV'] + lids = ENV['lid'] + lids = torch.stack(lids, dim=0).type(torch.float32) + lids = lids.cpu().numpy() + # smooth for plot + lids[lids < 0] = 0 + lids[lids > 10] = 10 + + xnew = np.arange(0, len(lids), 5) + lids = lids[xnew] + + # plot line + idx = MODELS.index(model_name) + ax.plot(xnew, lids, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) + + ax.set_xlabel("Epoch", fontsize=15) + ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15) + # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) + legend = plt.legend(loc='lower center', ncol=2) + plt.setp(legend.get_texts(), fontsize=15) + fig.savefig("plots/lid_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300) + plt.show() + +def test_acc_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0): + """ + The test_acc trend of different learning models throughout. + """ + # plot initialization + fig = plt.figure() # figsize=(7, 6) + ax = fig.add_subplot(111) + + for model_name in model_list: + exp_path = os.path.join(exp_name, model_name) + checkpoint_path = os.path.join(exp_path, 'checkpoints') + checkpoint_path_file = os.path.join(checkpoint_path, model_name) + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + ENV = checkpoint['ENV'] + test_accs = ENV['eval_history'] + test_accs = np.array(test_accs) / 100 + + xnew = np.arange(0, len(test_accs), 5) + test_accs = test_accs[xnew] + + # plot line + idx = MODELS.index(model_name) + ax.plot(xnew, test_accs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) + + ax.set_xlabel("Epoch", fontsize=15) + ax.set_ylabel("Test Accuracy", fontsize=15) + # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) + legend = plt.legend(loc='lower center', ncol=2) + plt.setp(legend.get_texts(), fontsize=15) + fig.savefig("plots/test_accs_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300) + plt.show() + +def csr_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0): + """ + The CSR trend of different learning models throughout. + """ + # plot initialization + fig = plt.figure() # figsize=(7, 6) + ax = fig.add_subplot(111) + + for model_name in model_list: + exp_path = os.path.join(exp_name, model_name) + checkpoint_path = os.path.join(exp_path, 'checkpoints') + checkpoint_path_file = os.path.join(checkpoint_path, model_name) + checkpoint = util.load_model(filename=checkpoint_path_file, + model=model, + optimizer=optimizer, + scheduler=scheduler) + ENV = checkpoint['ENV'] + csr = ENV['csr'] + csr = torch.stack(csr, dim=0).type(torch.float32) + csr = csr.cpu().numpy() + + xnew = np.arange(0, len(csr), 5) + csr = csr[xnew] + + # plot line + idx = MODELS.index(model_name) + ax.plot(xnew, csr, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx]) + + ax.set_xlabel("Epoch", fontsize=15) + ax.set_ylabel("CRS", fontsize=15) + # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15) + legend = plt.legend(loc='lower center', ncol=2) + plt.setp(legend.get_texts(), fontsize=15) + fig.savefig("plots/crs_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300) + plt.show() \ No newline at end of file diff --git a/representation_plot.py b/representation_plot.py deleted file mode 100644 index cf2b242..0000000 --- a/representation_plot.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -Date: 28/07/2017 -feature exploration and visualization - -Author: Xingjun Ma -""" -import os -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec -from sklearn.manifold import TSNE -from keras.optimizers import SGD -from util import get_deep_representations -from datasets import get_data -from models import get_model -from loss import cross_entropy - -np.random.seed(1234) - -CLASSES = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] - -def feature_visualization(model_name='ce', dataset='mnist', - num_classes=10, noise_ratio=40, n_samples=100): - """ - This is to show how features of incorretly labeled images are overffited to the wrong class. - plot t-SNE 2D-projected deep features (right before logits). - This will generate 3 plots in a grid (3x1). - The first shows the raw features projections of two classes of images (clean label + noisy label) - The second shows the deep features learned by cross-entropy after training. - The third shows the deep features learned using a new loss after training. - - :param model_name: a new model other than crossentropy(ce), can be: boot_hard, boot_soft, forward, backward, lid - :param dataset: - :param num_classes: - :param noise_type; - :param noise_ratio: - :param epochs: to find the last epoch - :param n_samples: - :return: - """ - print('Dataset: %s, model_name: ce/%s, noise ratio: %s%%' % (model_name, dataset, noise_ratio)) - features_ce = np.array([None, None]) - features_other = np.array([None, None]) - - # # load pre-saved to avoid recomputing - # feature_tmp = "lof/representation_%s_%s.npy" % (dataset, noise_ratio) - # if os.path.isfile(feature_tmp): - # data = np.load(feature_tmp) - # features_input = data[0] - # features_ce = data[1] - # features_other = data[2] - # - # plot(model_name, dataset, noise_ratio, features_input, features_ce, features_other) - # return - - # load data - X_train, Y_train, X_test, Y_test = get_data(dataset) - Y_noisy = np.load("data/noisy_label_%s_%s.npy" % (dataset, noise_ratio)) - Y_noisy = Y_noisy.reshape(-1) - - # sample training set - cls_a = 0 - cls_b = 3 - - # find smaples labeled to class A and B - cls_a_idx = np.where(Y_noisy == cls_a)[0] - cls_b_idx = np.where(Y_noisy == cls_b)[0] - - # sampling for efficiency purpose - cls_a_idx = np.random.choice(cls_a_idx, n_samples, replace=False) - cls_b_idx = np.random.choice(cls_b_idx, n_samples, replace=False) - - X_a = X_train[cls_a_idx] - X_b = X_train[cls_b_idx] - - image_shape = X_train.shape[1:] - model = get_model(dataset, input_tensor=None, input_shape=image_shape) - sgd = SGD(lr=0.01, momentum=0.9) - - - #### get deep representations of ce model - model_path = 'model/ce_%s_%s.hdf5' % (dataset, noise_ratio) - model.load_weights(model_path) - model.compile( - loss=cross_entropy, - optimizer=sgd, - metrics=['accuracy'] - ) - - rep_a = get_deep_representations(model, X_a, batch_size=100).reshape((X_a.shape[0], -1)) - rep_b = get_deep_representations(model, X_b, batch_size=100).reshape((X_b.shape[0], -1)) - - rep_a = TSNE(n_components=2).fit_transform(rep_a) - rep_b = TSNE(n_components=2).fit_transform(rep_b) - features_ce[0] = rep_a - features_ce[1] = rep_b - - #### get deep representations of other model - model_path = 'model/%s_%s_%s.hdf5' % (model_name, dataset, noise_ratio) - model.load_weights(model_path) - model.compile( - loss=cross_entropy, - optimizer=sgd, - metrics=['accuracy'] - ) - - rep_a = get_deep_representations(model, X_a, batch_size=100).reshape((X_a.shape[0], -1)) - rep_b = get_deep_representations(model, X_b, batch_size=100).reshape((X_b.shape[0], -1)) - - rep_a = TSNE(n_components=2).fit_transform(rep_a) - rep_b = TSNE(n_components=2).fit_transform(rep_b) - features_other[0] = rep_a - features_other[1] = rep_b - - # plot - fig = plt.figure(figsize=(12, 5)) - gs = gridspec.GridSpec(1, 2, wspace=0.15) - - a_clean_idx = Y_train[cls_a_idx] == Y_noisy[cls_a_idx] - a_noisy_idx = Y_train[cls_a_idx] != Y_noisy[cls_a_idx] - b_clean_idx = Y_train[cls_b_idx] == Y_noisy[cls_b_idx] - b_noisy_idx = Y_train[cls_b_idx] != Y_noisy[cls_b_idx] - - ## plot features learned by cross-entropy - ax = fig.add_subplot(gs[0, 0]) - A = features_ce[0] - B = features_ce[1] - # clean labeld class A samples plot - ax.scatter(A[a_clean_idx][:, 0].ravel(), A[a_clean_idx][:, 1].ravel(), c='b', marker='o', s=10, label='class A: clean') - ax.scatter(A[a_noisy_idx][:, 0].ravel(), A[a_noisy_idx][:, 1].ravel(), c='m', marker='x', s=30, label='class A: noisy') - ax.scatter(B[b_clean_idx][:, 0].ravel(), B[b_clean_idx][:, 1].ravel(), c='r', marker='o', s=10, label='class B: clean') - ax.scatter(B[b_noisy_idx][:, 0].ravel(), B[b_noisy_idx][:, 1].ravel(), c='c', marker='x', s=30, label='class B: noisy') - - ax.set_title("cross-entropy", fontsize=15) - legend = ax.legend(loc='lower center', ncol=2) - plt.setp(legend.get_texts(), fontsize=15) - - ax = fig.add_subplot(gs[0, 1]) - A = features_other[0] - B = features_other[1] - ax.scatter(A[a_clean_idx][:, 0].ravel(), A[a_clean_idx][:, 1].ravel(), c='b', marker='o', s=10, label='class A: clean') - ax.scatter(A[a_noisy_idx][:, 0].ravel(), A[a_noisy_idx][:, 1].ravel(), c='m', marker='x', s=30, label='class A: noisy') - ax.scatter(B[b_clean_idx][:, 0].ravel(), B[b_clean_idx][:, 1].ravel()-5, c='r', marker='o', s=10, label='class B: clean') - ax.scatter(B[b_noisy_idx][:, 0].ravel(), B[b_noisy_idx][:, 1].ravel(), c='c', marker='x', s=30, label='class B: noisy') - - ax.set_title("D2L", fontsize=15) - legend = ax.legend(loc='lower center', ncol=2) - plt.setp(legend.get_texts(), fontsize=15) - - fig.savefig("plots/representations_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300) - plt.show() - -if __name__ == "__main__": - feature_visualization(model_name='d2l', dataset='cifar-10', num_classes=10, noise_ratio=60, n_samples=500) \ No newline at end of file diff --git a/resnet.py b/resnet.py deleted file mode 100644 index 665e09b..0000000 --- a/resnet.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Some code sections are taken from -https://github.com/raghakot/keras-resnet -""" - -import sys - -import numpy as np - -from keras.models import Model -from keras.layers import Input, Activation, merge, Dense, Flatten -from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D -from keras.layers.convolutional import AveragePooling2D -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras.layers.merge import add -from keras import backend as K - -sys.setrecursionlimit(10000) - -BN_AXIS = 3 - - -def cifar100_resnet(depth, num_classes): - # how many layers this is going to create? - # 2 + 6 * depth - - img_channels = 3 - img_rows = 32 - img_cols = 32 - num_conv = 3 - decay = 2e-3 - - input = Input(shape=(img_rows, img_cols, img_channels)) - - # 1 conv + BN + relu - filters = 16 - b = Conv2D(filters=filters, kernel_size=(num_conv, num_conv), - kernel_initializer="he_normal", padding="same", - kernel_regularizer=l2(decay), bias_regularizer=l2(0))(input) - b = BatchNormalization(axis=BN_AXIS)(b) - b = Activation("relu")(b) - - # 1 res, no striding - b = residual(num_conv, filters, decay, first=True)(b) # 2 layers inside - for _ in np.arange(1, depth): # start from 1 => 2 * depth in total - b = residual(num_conv, filters, decay)(b) - - filters *= 2 - - # 2 res, with striding - b = residual(num_conv, filters, decay, more_filters=True)(b) - for _ in np.arange(1, depth): - b = residual(num_conv, filters, decay)(b) - - filters *= 2 - - # 3 res, with striding - b = residual(num_conv, filters, decay, more_filters=True)(b) - for _ in np.arange(1, depth): - b = residual(num_conv, filters, decay)(b) - - b = BatchNormalization(axis=BN_AXIS)(b) - b = Activation("relu")(b) - - b = AveragePooling2D(pool_size=(8, 8), strides=(1, 1), - padding="valid")(b) - - out = Flatten(name='lid')(b) - - dense = Dense(units=num_classes, kernel_initializer="he_normal", - kernel_regularizer=l2(decay), bias_regularizer=l2(0))(out) - - act = Activation("softmax")(dense) - - return Model(inputs=input, outputs=act) - - -def residual(num_conv, filters, decay, more_filters=False, first=False): - def f(input): - # in_channel = input._keras_shape[1] - out_channel = filters - - if more_filters and not first: - # out_channel = in_channel * 2 - stride = 2 - else: - # out_channel = in_channel - stride = 1 - - if not first: - b = BatchNormalization(axis=BN_AXIS)(input) - b = Activation("relu")(b) - else: - b = input - - b = Conv2D(filters=out_channel, - kernel_size=(num_conv, num_conv), - strides=(stride, stride), - kernel_initializer="he_normal", padding="same", - kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b) - b = BatchNormalization(axis=BN_AXIS)(b) - b = Activation("relu")(b) - res = Conv2D(filters=out_channel, - kernel_size=(num_conv, num_conv), - kernel_initializer="he_normal", padding="same", - kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b) - - # check and match number of filter for the shortcut - input_shape = K.int_shape(input) - residual_shape = K.int_shape(res) - if not input_shape[3] == residual_shape[3]: - stride_width = int(round(input_shape[1] / residual_shape[1])) - stride_height = int(round(input_shape[2] / residual_shape[2])) - - input = Conv2D(filters=residual_shape[3], kernel_size=(1, 1), - strides=(stride_width, stride_height), - kernel_initializer="he_normal", - padding="valid", kernel_regularizer=l2(decay))(input) - - return add([input, res]) - - return f \ No newline at end of file diff --git a/script/CIFAR10.slurm b/script/CIFAR10.slurm new file mode 100644 index 0000000..8fc12ca --- /dev/null +++ b/script/CIFAR10.slurm @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --nodes 1 +#SBATCH --partition gpgpu +#SBATCH --gres=gpu:1 + +# The project ID which this job should run under: +#SBATCH --account="punim0784" + +# Maximum number of tasks/CPU cores used by the job: +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 + +# The amount of memory in megabytes per process in the job: +#SBATCH --mem=64G + +# The maximum running time of the job in days-hours:mins:sec +#SBATCH --time 96:00:00 + +# check that the script is launched with sbatch +if [ "x$SLURM_JOB_ID" == "x" ]; then + echo "You need to submit your job to the queuing system with sbatch" + exit 1 +fi + + +# Run the job from this directory: +cd /data/cephfs/punim0784/robust_loss_nips + +# The modules to load: +module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10 +nvidia-smi + +exp_name=$1 +seed=$2 +loss=$3 + +# Sym +declare -a nr_arr=("0.0" + "0.2" + "0.4" + "0.6" + "0.8") + +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/cifar10/sym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/cifar10/sym/$i --seed $seed --noise_rate $i --config_path configs/cifar10/sym --version ${loss} +done + +# Asym +declare -a nr_arr=( + "0.1" + "0.2" + "0.3" + "0.4" + ) +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/cifar10/asym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/cifar10/asym/$i --seed $seed --noise_rate $i --config_path configs/cifar10/asym --version ${loss} +done diff --git a/script/CIFAR100.slurm b/script/CIFAR100.slurm new file mode 100644 index 0000000..8b36be6 --- /dev/null +++ b/script/CIFAR100.slurm @@ -0,0 +1,63 @@ +#!/bin/bash +#SBATCH --nodes 1 +#SBATCH --partition gpgpu +#SBATCH --gres=gpu:1 + +# The project ID which this job should run under: +#SBATCH --account="punim0784" + +# Maximum number of tasks/CPU cores used by the job: +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 + +# The amount of memory in megabytes per process in the job: +#SBATCH --mem=64G + +# The maximum running time of the job in days-hours:mins:sec +#SBATCH --time 168:00:00 + +# check that the script is launched with sbatch +if [ "x$SLURM_JOB_ID" == "x" ]; then + echo "You need to submit your job to the queuing system with sbatch" + exit 1 +fi + + +# Run the job from this directory: +cd /data/cephfs/punim0784/robust_loss_nips + + +# The modules to load: +module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10 +nvidia-smi + +exp_name=$1 +seed=$2 +loss=$3 + + +# Sym +declare -a nr_arr=("0.0" + "0.2" + "0.4" + "0.6" + "0.8") + +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/cifar100/sym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/cifar100/sym/$i --seed $seed --noise_rate $i --config_path configs/cifar100/sym --version ${loss} +done + +# Asym +declare -a nr_arr=( + "0.1" + "0.2" + "0.3" + "0.4" + ) +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/cifar100/asym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/cifar100/asym/$i --seed $seed --noise_rate $i --config_path configs/cifar100/asym --version ${loss} --asym +done diff --git a/script/MNIST.slurm b/script/MNIST.slurm new file mode 100644 index 0000000..f6d35c8 --- /dev/null +++ b/script/MNIST.slurm @@ -0,0 +1,63 @@ +#!/bin/bash +#SBATCH --nodes 1 +#SBATCH --partition gpgpu +#SBATCH --gres=gpu:1 + +# The project ID which this job should run under: +#SBATCH --account="punim0784" + +# Maximum number of tasks/CPU cores used by the job: +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 + +# The amount of memory in megabytes per process in the job: +#SBATCH --mem=64G + +# The maximum running time of the job in days-hours:mins:sec +#SBATCH --time 48:00:00 + +# check that the script is launched with sbatch +if [ "x$SLURM_JOB_ID" == "x" ]; then + echo "You need to submit your job to the queuing system with sbatch" + exit 1 +fi + + +# Run the job from this directory: +# cd /data/gpfs/users/hanxunh/robust_loss_nips +cd /data/cephfs/punim0784/robust_loss_nips + +# The modules to load: +module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10 +nvidia-smi + +exp_name=$1 +seed=$2 +loss=$3 + + +# Sym +declare -a nr_arr=("0.0" + "0.2" + "0.4" + "0.6" + "0.8") + +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/mnist/sym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/mnist/sym/$i --seed $seed --noise_rate $i --config_path configs/mnist/sym --version ${loss} +done + +# Asym +declare -a nr_arr=( + "0.1" + "0.2" + "0.3" + "0.4" + ) +for i in "${nr_arr[@]}" + do + rm -rf ${exp_name}/mnist/asym/$i/${loss}/* + python3 -u main.py --exp_name ${exp_name}/mnist/asym/$i --seed $seed --noise_rate $i --config_path configs/mnist/asym --version ${loss} --asym +done diff --git a/script/WebVisionMini.slurm b/script/WebVisionMini.slurm new file mode 100644 index 0000000..3f06a08 --- /dev/null +++ b/script/WebVisionMini.slurm @@ -0,0 +1,60 @@ +#!/bin/bash +#SBATCH --nodes 1 +#SBATCH --partition gpgpu +#SBATCH --gres=gpu:4 + +# The project ID which this job should run under: +#SBATCH --account="punim0784" + +# Maximum number of tasks/CPU cores used by the job: +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=24 + +# The amount of memory in megabytes per process in the job: +#SBATCH --mem=120G + +# The maximum running time of the job in days-hours:mins:sec +#SBATCH --time 168:00:00 + +# check that the script is launched with sbatch +if [ "x$SLURM_JOB_ID" == "x" ]; then + echo "You need to submit your job to the queuing system with sbatch" + exit 1 +fi + + +# Copy Data to local node +cd /var/local/tmp/ +mkdir -p datasets +mkdir -p datasets/ILSVR2012 + +pwd +echo 'rsync datasets' + +rsync -avzh --progress /data/cephfs/punim0784/datasets/google_resized_256.tar datasets/ +rsync -avzh --progress /data/cephfs/punim0784/datasets/webvision_mini_train.txt datasets/ +rsync -avzh --progress /data/cephfs/punim0784/datasets/train_filelist_google.txt datasets/ + +rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/ILSVRC2012_img_val.tar datasets/ILSVR2012/ +rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/meta.bin datasets/ILSVR2012/ +rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/ILSVRC2012_devkit_t12.tar.gz datasets/ILSVR2012/ + +cd datasets +pwd +echo 'untar google_resized_256' +tar -xvf google_resized_256.tar + +# Run the job from this directory: +cd /data/cephfs/punim0784/robust_loss_nips + +# The modules to load: +module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10 +nvidia-smi + +exp_name=$1 +seed=$2 +loss=$3 + +rm -rf ${exp_name}/web_vision_mini/${loss}/* +rm -rf ${exp_name}/web_vision_mini/${loss} +python3 -u main.py --data_parallel --exp_name ${exp_name}/webvision_mini/ --seed $seed --config_path configs/webvision_mini --version ${loss} diff --git a/script/script.sh b/script/script.sh new file mode 100644 index 0000000..b4e1566 --- /dev/null +++ b/script/script.sh @@ -0,0 +1,91 @@ +# +# # CIFAR100 +# declare -a loss=( "ce" +# "focal" +# "gce" +# "mae" +# "nce" +# "nce+mae" +# "nce+rce" +# "nfl" +# "nfl+mae" +# "nfl+rce" +# "ngce" +# "ngce+mae" +# "ngce+rce" +# "nlnl" +# "rce" +# "sce" ) +# +# declare -a run_version=( +# "run1" +# "run2" +# "run3" +# ) +# +# seed=0 +# for i in "${run_version[@]}" +# do +# for j in "${loss[@]}" +# do +# job_name=C100_${i}_${j} +# echo $job_name +# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR100.slurm $i $seed $j +# done +# seed=$((seed+1)) +# done + + +# # WebVision Full +# declare -a loss=( +# "ce" +# "gce" +# "nce+mae" +# "nce+rce" +# "nfl+mae" +# "nfl+rce" +# "sce" +# ) +# +# declare -a run_version=( +# "webvision_full" +# ) +# +# seed=0 +# for i in "${run_version[@]}" +# do +# for j in "${loss[@]}" +# do +# job_name=WebVisionFull_${i}_${j} +# echo $job_name +# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=12 --gres=gpu:4 WebVisionFull.slurm $i $seed $j +# done +# seed=$((seed+1)) +# done + +# # WebVision Mini +# declare -a loss=( +# "ce" +# "gce" +# "nce+mae" +# "nce+rce" +# "nfl+mae" +# "nfl+rce" +# "sce" +# ) +# +# declare -a run_version=( +# "webvision_mini" +# ) +# +# seed=0 +# for i in "${run_version[@]}" +# do +# for j in "${loss[@]}" +# do +# job_name=WebVisionMini${i}_${j} +# echo $job_name +# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=24 --gres=gpu:4 WebVisionMini.slurm $i $seed $j +# done +# seed=$((seed+1)) +# done diff --git a/script/submit_c10.sh b/script/submit_c10.sh new file mode 100644 index 0000000..249647a --- /dev/null +++ b/script/submit_c10.sh @@ -0,0 +1,39 @@ +# CIFAR10 +declare -a loss=( + "ce" + "focal" + "gce" + "mae" + "nce" + "nce+mae" + "nce+rce" + "nfl" + "nfl+mae" + "nfl+rce" + "ngce" + "ngce+mae" + "ngce+rce" + # "nlnl" + "rce" + "sce" + ) + +declare -a run_version=( + "run1" + "run2" + "run3" + ) + +seed=0 +for i in "${run_version[@]}" +do + for j in "${loss[@]}" + do + echo C10_${i}_${j} + job_name=${j}_C10_${i} + # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR10.slurm $i $seed $j + sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j + # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j + done + seed=$((seed+1)) +done diff --git a/script/submit_c100.sh b/script/submit_c100.sh new file mode 100644 index 0000000..6288727 --- /dev/null +++ b/script/submit_c100.sh @@ -0,0 +1,39 @@ +# CIFAR10 +declare -a loss=( + "ce" + "focal" + "gce" + "mae" + "nce" + "nce+mae" + "nce+rce" + "nfl" + "nfl+mae" + "nfl+rce" + "ngce" + "ngce+mae" + "ngce+rce" + # "nlnl" + "rce" + "sce" + ) + +declare -a run_version=( + "run1" + "run2" + "run3" + ) + +seed=0 +for i in "${run_version[@]}" +do + for j in "${loss[@]}" + do + echo C100_${i}_${j} + job_name=${j}_C100_${i} + # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR10.slurm $i $seed $j + sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR100.slurm $i $seed $j + # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j + done + seed=$((seed+1)) +done diff --git a/script/submit_clothing1m.sh b/script/submit_clothing1m.sh new file mode 100644 index 0000000..c6b3f37 --- /dev/null +++ b/script/submit_clothing1m.sh @@ -0,0 +1,25 @@ +# Clothing1M +declare -a loss=( "ce" + "gce" + "nce+mae" + "nce+rce" + "nfl+mae" + "nfl+rce" + "sce" ) + +declare -a run_version=( + "clothing1m" + ) + +seed=0 +for i in "${run_version[@]}" +do + for j in "${loss[@]}" + do + job_name=Clothing1M_${i}_${j} + echo $job_name + sbatch --partition gpgpu --cpus-per-task=8 --gres=gpu:4 Clothing1M.slurm $i $seed $j + # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --cpus-per-task=24 --gres=gpu:4 Clothing1M.slurm $i $seed $j + done + seed=$((seed+1)) +done diff --git a/script/submit_mnist.sh b/script/submit_mnist.sh new file mode 100644 index 0000000..8ec47d1 --- /dev/null +++ b/script/submit_mnist.sh @@ -0,0 +1,38 @@ +# MNIST +declare -a loss=( + "ce" + "focal" + "gce" + "mae" + "nce" + "nce+mae" + "nce+rce" + "nfl" + "nfl+mae" + "nfl+rce" + "ngce" + "ngce+mae" + "ngce+rce" + # "nlnl" + "rce" + "sce" + ) + +declare -a run_version=( + "run1" + "run2" + "run3" + ) + +seed=0 +for i in "${run_version[@]}" +do + for j in "${loss[@]}" + do + job_name=${j}_MNIST_${i} + echo $job_name + # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 MNIST.slurm $i $seed $j + sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=16G MNIST.slurm $i $seed $j + done + seed=$((seed+1)) +done diff --git a/script/submit_webvision_mini.sh b/script/submit_webvision_mini.sh new file mode 100644 index 0000000..2c1e55d --- /dev/null +++ b/script/submit_webvision_mini.sh @@ -0,0 +1,28 @@ +# WebVision Mini +declare -a loss=( + "ce" + "gce" + # "nce+mae" + # "nce+rce" + # "nfl+mae" + # "nfl+rce" + "sce" + ) + +declare -a run_version=( + "webvision_mini" + ) + +seed=0 +for i in "${run_version[@]}" +do + for j in "${loss[@]}" + do + job_name=WebVisionMini${i}_${j} + echo $job_name + sbatch --partition gpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:4 --mem=96G WebVisionMini.slurm $i $seed $j + # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=8 --gres=gpu:4 --mem=96G WebVisionMini.slurm $i $seed $j + # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=24 --gres=gpu:4 WebVisionMini.slurm $i $seed $j + done + seed=$((seed+1)) +done diff --git a/train_models.py b/train_models.py deleted file mode 100644 index a7165e1..0000000 --- a/train_models.py +++ /dev/null @@ -1,199 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -import os -import keras.backend as K -import argparse - -from keras.preprocessing.image import ImageDataGenerator -from keras.optimizers import SGD -from keras.callbacks import ModelCheckpoint - -from util import get_lr_scheduler, uniform_noise_model_P -from datasets import get_data, validatation_split -from models import get_model -from loss import cross_entropy, boot_soft, boot_hard, forward, backward, lid_paced_loss -from callback_util import D2LCallback, LoggerCallback - -D2L = {'mnist': {'init_epoch': 5, 'epoch_win': 5}, 'svhn': {'init_epoch': 20, 'epoch_win': 5}, - 'cifar-10': {'init_epoch': 40, 'epoch_win': 5}, 'cifar-100': {'init_epoch': 60, 'epoch_win': 5}} - -# prepare folders -folders = ['data', 'model', 'log'] -for folder in folders: - path = os.path.join('./', folder) - if not os.path.exists(path): - os.makedirs(path) - -def train(dataset='mnist', model_name='d2l', batch_size=128, epochs=50, noise_ratio=0): - """ - Train one model with data augmentation: random padding+cropping and horizontal flip - :param dataset: - :param model_name: - :param batch_size: - :param epochs: - :param noise_ratio: - :return: - """ - print('Dataset: %s, model: %s, batch: %s, epochs: %s, noise ratio: %s%%' % - (dataset, model_name, batch_size, epochs, noise_ratio)) - - # load data - X_train, y_train, X_test, y_test = get_data(dataset, noise_ratio, random_shuffle=True) - # X_train, y_train, X_val, y_val = validatation_split(X_train, y_train, split=0.1) - n_images = X_train.shape[0] - image_shape = X_train.shape[1:] - num_classes = y_train.shape[1] - print("n_images", n_images, "num_classes", num_classes, "image_shape:", image_shape) - - # load model - model = get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=num_classes) - # model.summary() - - if dataset == 'cifar-100': - optimizer = SGD(lr=0.1, decay=5e-3, momentum=0.9) - else: - optimizer = SGD(lr=0.1, decay=1e-4, momentum=0.9) - - # for backward, forward loss - # suppose the model knows noise ratio - P = uniform_noise_model_P(num_classes, noise_ratio/100.) - # create loss - if model_name == 'forward': - P = uniform_noise_model_P(num_classes, noise_ratio / 100.) - loss = forward(P) - elif model_name == 'backward': - P = uniform_noise_model_P(num_classes, noise_ratio / 100.) - loss = backward(P) - elif model_name == 'boot_hard': - loss = boot_hard - elif model_name == 'boot_soft': - loss = boot_soft - elif model_name == 'd2l': - if dataset == 'cifar-100': - loss = lid_paced_loss(beta1=6.0, beta2=0.1) - else: - loss = lid_paced_loss(beta1=0.1, beta2=1.0) - else: - loss = cross_entropy - - # model - model.compile( - loss=loss, - optimizer=optimizer, - metrics=['accuracy'] - ) - - ## do real-time updates using callbakcs - callbacks = [] - if model_name == 'd2l': - init_epoch = D2L[dataset]['init_epoch'] - epoch_win = D2L[dataset]['epoch_win'] - d2l_learning = D2LCallback(model, X_train, y_train, - dataset, noise_ratio, - epochs=epochs, - pace_type=model_name, - init_epoch=init_epoch, - epoch_win=epoch_win) - - callbacks.append(d2l_learning) - - cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio), - monitor='val_loss', - verbose=0, - save_best_only=False, - save_weights_only=True, - period=1) - callbacks.append(cp_callback) - - else: - cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio), - monitor='val_loss', - verbose=0, - save_best_only=False, - save_weights_only=True, - period=epochs) - callbacks.append(cp_callback) - - # learning rate scheduler if use sgd - lr_scheduler = get_lr_scheduler(dataset) - callbacks.append(lr_scheduler) - - # acc, loss, lid - log_callback = LoggerCallback(model, X_train, y_train, X_test, y_test, dataset, - model_name, noise_ratio, epochs) - callbacks.append(log_callback) - - # data augmentation - if dataset in ['mnist', 'svhn']: - datagen = ImageDataGenerator() - elif dataset in ['cifar-10']: - datagen = ImageDataGenerator( - width_shift_range=0.2, - height_shift_range=0.2, - horizontal_flip=True) - else: - datagen = ImageDataGenerator( - rotation_range=20, - width_shift_range=0.2, - height_shift_range=0.2, - horizontal_flip=True) - datagen.fit(X_train) - - # train model - model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), - steps_per_epoch=len(X_train) / batch_size, epochs=epochs, - validation_data=(X_test, y_test), - verbose=1, - callbacks=callbacks - ) - -def main(args): - assert args.dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100'], \ - "dataset parameter must be either 'mnist', 'svhn', 'cifar-10', 'cifar-100'" - assert args.model_name in ['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'], \ - "dataset parameter must be either 'ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'" - train(args.dataset, args.model_name, args.batch_size, args.epochs, args.noise_ratio) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - '-d', '--dataset', - help="Dataset to use; either 'mnist', 'svhn', 'cifar-10', 'cifar-100'", - required=True, type=str - ) - parser.add_argument( - '-m', '--model_name', - help="Model name: 'ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'.", - required=True, type=str - ) - parser.add_argument( - '-e', '--epochs', - help="The number of epochs to train for.", - required=False, type=int - ) - parser.add_argument( - '-b', '--batch_size', - help="The batch size to use for training.", - required=False, type=int - ) - parser.add_argument( - '-r', '--noise_ratio', - help="The percentage of noisy labels [0, 100].", - required=False, type=int - ) - parser.set_defaults(epochs=150) - parser.set_defaults(batch_size=128) - parser.set_defaults(noise_ratio=0) - - os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -# args = parser.parse_args() -# main(args) - - args = parser.parse_args(['-d', 'cifar-10', '-m', 'd2l', - '-e', '120', '-b', '128', - '-r', '60']) - main(args) - - K.clear_session() diff --git a/trainer.py b/trainer.py new file mode 100644 index 0000000..322d843 --- /dev/null +++ b/trainer.py @@ -0,0 +1,83 @@ +import time +import torch +import os +from util import log_display, accuracy, AverageMeter + +if torch.cuda.is_available(): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + torch.backends.cudnn.deterministic = True + device = torch.device('cuda') +else: + device = torch.device('cpu') + + +class Trainer(): + def __init__(self, data_loader, logger, config, name='Trainer', metrics='classfication'): + self.data_loader = data_loader + self.logger = logger + self.name = name + self.step = 0 + self.config = config + self.log_frequency = config.log_frequency + self.loss_meters = AverageMeter() + self.acc_meters = AverageMeter() + self.acc5_meters = AverageMeter() + self.report_metrics = self.classfication_metrics if metrics == 'classfication' else self.regression_metrics + + def train(self, epoch, GLOBAL_STEP, model, optimizer, criterion): + model.train() + for images, labels in self.data_loader: + images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True) + self.train_batch(images, labels, model, criterion, optimizer) + self.log(epoch, GLOBAL_STEP) + GLOBAL_STEP += 1 + return GLOBAL_STEP + + def train_batch(self, x, y, model, criterion, optimizer): + start = time.time() + model.zero_grad() + optimizer.zero_grad() + pred, _ = model(x) + loss = criterion(pred, y) + loss.backward() + grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.grad_bound) + optimizer.step() + self.report_metrics(pred, y, loss) + self.logger_payload['lr'] = optimizer.param_groups[0]['lr'], + self.logger_payload['|gn|'] = grad_norm + end = time.time() + self.step += 1 + self.time_used = end - start + + def log(self, epoch, GLOBAL_STEP): + if GLOBAL_STEP % self.log_frequency == 0: + display = log_display(epoch=epoch, + global_step=GLOBAL_STEP, + time_elapse=self.time_used, + **self.logger_payload) + self.logger.info(display) + + def classfication_metrics(self, x, y, loss): + acc, acc5 = accuracy(x, y, topk=(1, 5)) + self.loss_meters.update(loss.item(), y.shape[0]) + self.acc_meters.update(acc.item(), y.shape[0]) + self.acc5_meters.update(acc5.item(), y.shape[0]) + self.logger_payload = {"acc": acc, + "acc_avg": self.acc_meters.avg, + "loss": loss, + "loss_avg": self.loss_meters.avg} + + def regression_metrics(self, x, y, loss): + diff = abs((x - y).mean().detach().item()) + self.loss_meters.update(loss.item(), y.shape[0]) + self.acc_meters.update(diff, y.shape[0]) + self.logger_payload = {"|diff|": diff, + "|diff_avg|": self.acc_meters.avg, + "loss": loss, + "loss_avg": self.loss_meters.avg} + + def _reset_stats(self): + self.loss_meters.reset() + self.acc_meters.reset() + self.acc5_meters.reset() diff --git a/util.py b/util.py index 4ed9a45..c537396 100644 --- a/util.py +++ b/util.py @@ -1,245 +1,147 @@ -from __future__ import absolute_import -from __future__ import print_function - +import logging import os -import multiprocessing as mp -from subprocess import call -import warnings +import torch import numpy as np -from numpy.testing import assert_array_almost_equal -from sklearn.preprocessing import MinMaxScaler -import keras.backend as K -from scipy.spatial.distance import pdist, cdist, squareform -from keras.callbacks import ModelCheckpoint, Callback -from keras.callbacks import LearningRateScheduler -import tensorflow as tf - -# Set random seed -np.random.seed(123) - - -def lid(logits, k=20): - """ - Calculate LID for a minibatch of training samples based on the outputs of the network. - - :param logits: - :param k: - :return: - """ - epsilon = 1e-12 - batch_size = tf.shape(logits)[0] - # n_samples = logits.get_shape().as_list() - # calculate pairwise distance - r = tf.reduce_sum(logits * logits, 1) - # turn r into column vector - r1 = tf.reshape(r, [-1, 1]) - D = r1 - 2 * tf.matmul(logits, tf.transpose(logits)) + tf.transpose(r1) + \ - tf.ones([batch_size, batch_size]) - - # find the k nearest neighbor - D1 = -tf.sqrt(D) - D2, _ = tf.nn.top_k(D1, k=k, sorted=True) - D3 = -D2[:, 1:] # skip the x-to-x distance 0 by using [,1:] - - m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1])) - v_log = tf.reduce_sum(tf.log(m + epsilon), axis=1) # to avoid nan - lids = -k / v_log - return lids - - -def mle_single(data, x, k): - """ - lid of a single query point x. - numpy implementation. - - :param data: - :param x: - :param k: - :return: - """ - data = np.asarray(data, dtype=np.float32) - x = np.asarray(x, dtype=np.float32) - if x.ndim == 1: - x = x.reshape((-1, x.shape[0])) - # dim = x.shape[1] - - k = min(k, len(data) - 1) - f = lambda v: - k / np.sum(np.log(v / v[-1] + 1e-8)) - a = cdist(x, data) - a = np.apply_along_axis(np.sort, axis=1, arr=a)[:, 1:k + 1] - a = np.apply_along_axis(f, axis=1, arr=a) - return a[0] - - -def mle_batch(data, batch, k): - """ - lid of a batch of query points X. - numpy implementation. - - :param data: - :param batch: - :param k: - :return: - """ - data = np.asarray(data, dtype=np.float32) - batch = np.asarray(batch, dtype=np.float32) - - k = min(k, len(data) - 1) - f = lambda v: - k / np.sum(np.log(v / v[-1] + 1e-8)) - a = cdist(batch, data) - a = np.apply_along_axis(np.sort, axis=1, arr=a)[:, 1:k + 1] - a = np.apply_along_axis(f, axis=1, arr=a) - return a - - -def other_class(n_classes, current_class): - """ - Returns a list of class indices excluding the class indexed by class_ind - :param nb_classes: number of classes in the task - :param class_ind: the class index to be omitted - :return: one random class that != class_ind - """ - if current_class < 0 or current_class >= n_classes: - error_str = "class_ind must be within the range (0, nb_classes - 1)" - raise ValueError(error_str) - - other_class_list = list(range(n_classes)) - other_class_list.remove(current_class) - other_class = np.random.choice(other_class_list) - return other_class - - -def get_lids_random_batch(model, X, k=20, batch_size=128): +import torch.nn.functional as F +from lid import lid_mle +from lass import lass + +class AverageMeter(object): + def __init__(self): + self.reset() + + def reset(self): + self.avg = 0 + self.sum = 0 + self.cnt = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.cnt += n + self.avg = self.sum / self.cnt + + +def accuracy(output, target, topk=(1,)): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].contiguous().view(-1).float().sum(0) + res.append(correct_k.mul_(1.0/batch_size)) + return res + + +def log_display(epoch, global_step, time_elapse, **kwargs): + display = 'epoch=' + str(epoch) + \ + '\tglobal_step=' + str(global_step) + for key, value in kwargs.items(): + display += '\t' + str(key) + '=%.5f' % value + display += '\ttime=%.2fit/s' % (1. / time_elapse) + return display + + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + + +def setup_logger(name, log_file, level=logging.INFO): + """To setup as many loggers as you want""" + formatter = logging.Formatter('%(asctime)s %(message)s') + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + logger = logging.getLogger(name) + logger.setLevel(level) + logger.addHandler(file_handler) + logger.addHandler(console_handler) + return logger + + +def build_dirs(path): + if not os.path.exists(path): + os.makedirs(path) + return + + +def save_model(filename, model, optimizer, scheduler, epoch, **kwargs): + # Torch Save State Dict + state = { + 'epoch': epoch+1, + 'model': model.state_dict() if model is not None else None, + 'optimizer': optimizer.state_dict() if optimizer is not None else None, + 'scheduler': scheduler.state_dict() if scheduler is not None else None, + } + for key, value in kwargs.items(): + state[key] = value + torch.save(state, filename+'.pth') + return + + +def load_model(filename, model, optimizer, scheduler, **kwargs): + checkpoints = torch.load(filename + '.pth') + if model is not None and checkpoints['model'] is not None: + model.load_state_dict(checkpoints['model']) + if optimizer is not None and checkpoints['optimizer'] is not None: + optimizer.load_state_dict(checkpoints['optimizer']) + if scheduler is not None and checkpoints['scheduler'] is not None: + scheduler.load_state_dict(checkpoints['scheduler']) + print("%s Loaded!" % (filename)) + return checkpoints + + +def count_parameters_in_MB(model): + return sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary_head" not in name)/1e6 + + +def get_lids_random_batch(model, data_loader, device, k=20, batch_size=128, batch_num=10): """ Get the local intrinsic dimensionality of each Xi in X_adv estimated by k close neighbours in the random batch it lies in. - :param model: if None: lid of raw inputs, otherwise LID of deep representations - :param X: normal images - :param k: the number of nearest neighbours for LID estimation - :param batch_size: default 100 - :return: lids: LID of normal images of shape (num_examples, lid_dim) - lids_adv: LID of advs images of shape (num_examples, lid_dim) """ - if model is None: - lids = [] - n_batches = int(np.ceil(X.shape[0] / float(batch_size))) - for i_batch in range(n_batches): - start = i_batch * batch_size - end = np.minimum(len(X), (i_batch + 1) * batch_size) - X_batch = X[start:end].reshape((end - start, -1)) - - # Maximum likelihood estimation of local intrinsic dimensionality (LID) - lid_batch = mle_batch(X_batch, X_batch, k=k) - lids.extend(lid_batch) - - lids = np.asarray(lids, dtype=np.float32) - return lids - - # get deep representations - funcs = [K.function([model.layers[0].input, K.learning_phase()], [out]) - for out in [model.get_layer("lid").output]] - lid_dim = len(funcs) - - # print("Number of layers to estimate: ", lid_dim) - - def estimate(i_batch): - start = i_batch * batch_size - end = np.minimum(len(X), (i_batch + 1) * batch_size) - n_feed = end - start - lid_batch = np.zeros(shape=(n_feed, lid_dim)) - for i, func in enumerate(funcs): - X_act = func([X[start:end], 0])[0] - X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1)) - - # Maximum likelihood estimation of local intrinsic dimensionality (LID) - lid_batch[:, i] = mle_batch(X_act, X_act, k=k) - - return lid_batch lids = [] - n_batches = int(np.ceil(X.shape[0] / float(batch_size))) - for i_batch in range(n_batches): - lid_batch = estimate(i_batch) - lids.extend(lid_batch) + model.eval() + + def estimate(images): + images = images.to(device, non_blocking = True) + #get the output of the second-to-last layer of the network + with torch.no_grad(): + _, X_act = model(images) + + lid_batch = lid_mle(X_act, X_act, k=k) + return lid_batch - lids = np.asarray(lids, dtype=np.float32) + + for j, (images,labels) in enumerate(data_loader['train_dataset']): + if j < batch_num: + lid_batch = estimate(images) + lids.extend(lid_batch) + lids = torch.stack(lids, dim=0).type(torch.float32) return lids - -def get_lr_scheduler(dataset): - """ - customerized learning rate decay for training with clean labels. - For efficientcy purpose we use large lr for noisy data. - :param dataset: - :param noise_ratio: - :return: - """ - if dataset in ['mnist', 'svhn']: - def scheduler(epoch): - if epoch > 40: - return 0.001 - elif epoch > 20: - return 0.01 - else: - return 0.1 - - return LearningRateScheduler(scheduler) - elif dataset in ['cifar-10']: - def scheduler(epoch): - if epoch > 80: - return 0.001 - elif epoch > 40: - return 0.01 - else: - return 0.1 - - return LearningRateScheduler(scheduler) - elif dataset in ['cifar-100']: - def scheduler(epoch): - if epoch > 120: - return 0.001 - elif epoch > 80: - return 0.01 - else: - return 0.1 - - return LearningRateScheduler(scheduler) - - -def uniform_noise_model_P(num_classes, noise): - """ The noise matrix flips any class to any other with probability - noise / (num_classes - 1). - """ - - assert (noise >= 0.) and (noise <= 1.) - - P = noise / (num_classes - 1) * np.ones((num_classes, num_classes)) - np.fill_diagonal(P, (1 - noise) * np.ones(num_classes)) - - assert_array_almost_equal(P.sum(axis=1), 1, 1) - return P - - -def get_deep_representations(model, X, batch_size=128): - """ - Get the deep representations before logits. - :param model: - :param X: - :param batch_size: - :return: - """ - # last hidden layer is always at index -4 - output_dim = model.layers[-3].output.shape[-1].value - get_encoding = K.function( - [model.layers[0].input, K.learning_phase()], - [model.layers[-3].output] - ) - - n_batches = int(np.ceil(X.shape[0] / float(batch_size))) - output = np.zeros(shape=(len(X), output_dim)) - for i in range(n_batches): - output[i * batch_size:(i + 1) * batch_size] = \ - get_encoding([X[i * batch_size:(i + 1) * batch_size], 0])[0] - - return output +def get_csr_random_batch(model, data_loader, device, batch_size=128, batch_num=4): + model.eval() + adv_ind_sum = 0 + for j, (images,labels) in enumerate(data_loader['test_dataset']): + if j < batch_num: + images = images.to(device, non_blocking = True) + scale_factor = 255. / (torch.max(images) - torch.min(images)) + #scale_factor = 1 + csr_model = lass(model, device, a=0.25 / scale_factor, b=0.2 / scale_factor, r=0.3 / scale_factor, iter_max=100) + X_adv, adv_ind = csr_model.find(images) + adv_ind_sum += torch.sum(adv_ind) + + samples_num = batch_num * batch_size + csr = adv_ind_sum * 1. / samples_num + return csr + \ No newline at end of file