diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..dfe0770
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e8ea998
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,20 @@
+*.pyc
+checkpoints
+data
+logs
+.DS_Store
+.ipynb_checkpoints
+plots_paper_figures.py
+plot_results.ipynb
+plots_paper_figures_iccv.py
+plot/
+slurm/
+results/
+ProcessWebVision.ipynb
+results
+configs/webvision_full
+configs/webvision_imagenet
+configs/clothing1m
+script/WebVisionFull.slurm
+script/WebVisionFull_ImageNet.slurm
+script/Clothing1M.slurm
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6b7ecd1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 HanxunHuangLemonBear
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 3160e8f..ef3eeaf 100644
--- a/README.md
+++ b/README.md
@@ -1,55 +1,68 @@
-### Code for ICML 2018 paper "Dimensionality-Driven Learning with Noisy Labels".
+# Normalized Loss Functions - Active Passive Losses
+Code for ICML2020 Paper ["Normalized Loss Functions for Deep Learning with Noisy Labels"](https://arxiv.org/abs/2006.13554)
-#### - Update (2018.07): Issues fixed on CIFAR-10.
-#### - Update (2019.10): Start training with symmetric cross entropy (SCE) loss (replacing cross entropy).
+## Requirements
+```console
+Python >= 3.6, PyTorch >= 1.3.1, torchvision >= 0.4.1, mlconfig
+```
-The Symmetric Cross Entropy (SCE) was demonstrated can improve several exisiting methods including the D2L:
-ICCV2019 "Symmetric Cross Entropy for Robust Learning with Noisy Labels"
-https://arxiv.org/abs/1908.06112
-https://github.com/YisenWang/symmetric_cross_entropy_for_noisy_labels
+## How To Run
+##### Configs for the experiment settings
+Check '*.yaml' file in the config folder for each experiment.
-#### - Update (2020.03): convergence issue on CIFAR-100 when using SCE loss: learning rate, data augmentation and parameters for SCE.
+##### Arguments
+* noise_rate: noise rate
+* asym: use if it is asymmetric noise, default is symmetric
+* config_path: path to the configs folder
+* version: the config file name
+* exp_name: name of the experiments (as note)
+* seed: random seed
+Example for 0.4 Symmetric noise rate with NCE+RCE loss
+```console
+# CIFAR-10
+$ python3 main.py --exp_name test_exp \
+ --noise_rate 0.4 \
+ --version nce+rce \
+ --config_path configs/cifar10/sym \
+ --seed 123
+
-### 1. Train DNN models using command line:
+# CIFAR-100
+$ python3 main.py --exp_name test_exp \
+ --noise_rate 0.4 \
+ --version nce+rce \
+ --config_path configs/cifar100/sym \
+ --seed 123
+```
+Example for ploting lid_trend_through_training of 0.4 Symmetric noise rate with D2L Learning
+```console
+# CIFAR-10
+$ python3 main.py --exp_name test_exp \
+ --noise_rate 0.4 \
+ --version d2l \
+ --config_path configs/cifar10/sym \
+ --seed 123 \
+ --plot
-An example:
+Example for ploting The LID、Accuracy、CSR trend of different learning models throughout of 0.4 Symmetric noise rate
+```console
+# CIFAR-10
+$ python3 main.py --exp_name test_exp \
+ --noise_rate 0.4 \
+ --config_path configs/cifar10/sym \
+ --seed 123 \
+ --plotall
-```
-python train_model.py -d mnist -m d2l -e 50 -b 128 -r 40
-```
+## Citing this work
+If you use this code in your work, please cite the accompanying paper:
-`-d`: dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100']
-`-m`: model in ['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l']
-`-e`: epoch, `-b`: batch size, `-r`: noise rate in [0, 100]
-
-
-### 2. Run with pre-set parameters in main function of train_model.py:
-```python
- # mnist example
- args = parser.parse_args(['-d', 'mnist', '-m', 'd2l',
- '-e', '50', '-b', '128',
- '-r', '40'])
- main(args)
-
- # svhn example
- args = parser.parse_args(['-d', 'svhn', '-m', 'd2l',
- '-e', '50', '-b', '128',
- '-r', '40'])
- main(args)
-
- # cifar-10 example
- args = parser.parse_args(['-d', 'cifar-10', '-m', 'd2l',
- '-e', '120', '-b', '128',
- '-r', '40'])
- main(args)
-
- # cifar-100 example
- args = parser.parse_args(['-d', 'cifar-100', '-m', 'd2l',
- '-e', '200', '-b', '128',
- '-r', '40'])
- main(args)
+```
+@inproceedings{ma2020normalized,
+ title={Normalized Loss Functions for Deep Learning with Noisy Labels},
+ author={Ma, Xingjun and Huang, Hanxun and Wang, Yisen and Romano, Simone and Erfani, Sarah and Bailey, James},
+ booktitle={ICML},
+ year={2020}
+}
```
-#### Requirements:
-tensorflow, Keras, numpy, scipy, sklearn, matplotlib
diff --git a/archive/dataset.py b/archive/dataset.py
new file mode 100644
index 0000000..adbf5b0
--- /dev/null
+++ b/archive/dataset.py
@@ -0,0 +1,586 @@
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+from PIL import Image
+from tqdm import tqdm
+from numpy.testing import assert_array_almost_equal
+import numpy as np
+import os
+import torch
+import random
+import collections
+
+
+def build_for_cifar100(size, noise):
+ """ random flip between two random classes.
+ """
+ assert(noise >= 0.) and (noise <= 1.)
+
+ P = (1. - noise) * np.eye(size)
+ for i in np.arange(size - 1):
+ P[i, i+1] = noise
+
+ # adjust last row
+ P[size-1, 0] = noise
+
+ assert_array_almost_equal(P.sum(axis=1), 1, 1)
+ return P
+
+
+def multiclass_noisify(y, P, random_state=0):
+ """ Flip classes according to transition probability matrix T.
+ It expects a number between 0 and the number of classes - 1.
+ """
+
+ assert P.shape[0] == P.shape[1]
+ assert np.max(y) < P.shape[0]
+
+ # row stochastic matrix
+ assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1]))
+ assert (P >= 0.0).all()
+
+ m = y.shape[0]
+ new_y = y.copy()
+ flipper = np.random.RandomState(random_state)
+
+ for idx in np.arange(m):
+ i = y[idx]
+ # draw a vector with only an 1
+ flipped = flipper.multinomial(1, P[i, :], 1)[0]
+ new_y[idx] = np.where(flipped == 1)[0]
+
+ return new_y
+
+
+def other_class(n_classes, current_class):
+ """
+ Returns a list of class indices excluding the class indexed by class_ind
+ :param nb_classes: number of classes in the task
+ :param class_ind: the class index to be omitted
+ :return: one random class that != class_ind
+ """
+ if current_class < 0 or current_class >= n_classes:
+ error_str = "class_ind must be within the range (0, nb_classes - 1)"
+ raise ValueError(error_str)
+
+ other_class_list = list(range(n_classes))
+ other_class_list.remove(current_class)
+ other_class = np.random.choice(other_class_list)
+ return other_class
+
+
+class MNISTNoisy(datasets.MNIST):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0):
+ super(MNISTNoisy, self).__init__(root, transform=transform, target_transform=target_transform, download=download)
+ self.targets = self.targets.numpy()
+ if asym:
+ P = np.eye(10)
+ n = nosiy_rate
+
+ P[7, 7], P[7, 1] = 1. - n, n
+ # 2 -> 7
+ P[2, 2], P[2, 7] = 1. - n, n
+
+ # 5 <-> 6
+ P[5, 5], P[5, 6] = 1. - n, n
+ P[6, 6], P[6, 5] = 1. - n, n
+
+ # 3 -> 8
+ P[3, 3], P[3, 8] = 1. - n, n
+
+ y_train_noisy = multiclass_noisify(self.targets, P=P, random_state=seed)
+ actual_noise = (y_train_noisy != self.targets).mean()
+ assert actual_noise > 0.0
+ print('Actual noise %.2f' % actual_noise)
+ self.targets = y_train_noisy
+
+ else:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)]
+ class_noisy = int(n_noisy / 10)
+ noisy_idx = []
+ for d in range(10):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=10, current_class=self.targets[i])
+ print(len(noisy_idx))
+
+ print("Print noisy label generation statistics:")
+ for i in range(10):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+
+ return
+
+
+class cifar10Nosiy(datasets.CIFAR10):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False):
+ super(cifar10Nosiy, self).__init__(root, transform=transform, target_transform=target_transform)
+ if asym:
+ # automobile < - truck, bird -> airplane, cat <-> dog, deer -> horse
+ source_class = [9, 2, 3, 5, 4]
+ target_class = [1, 0, 5, 3, 7]
+ for s, t in zip(source_class, target_class):
+ cls_idx = np.where(np.array(self.targets) == s)[0]
+ n_noisy = int(nosiy_rate * cls_idx.shape[0])
+ noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False)
+ for idx in noisy_sample_index:
+ self.targets[idx] = t
+ return
+ elif nosiy_rate > 0:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)]
+ class_noisy = int(n_noisy / 10)
+ noisy_idx = []
+ for d in range(10):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=10, current_class=self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(10):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+ return
+
+
+class cifar100Nosiy(datasets.CIFAR100):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=False, nosiy_rate=0.0, asym=False, seed=0):
+ super(cifar100Nosiy, self).__init__(root, download=download, transform=transform, target_transform=target_transform)
+ if asym:
+ """mistakes are inside the same superclass of 10 classes, e.g. 'fish'
+ """
+ nb_classes = 100
+ P = np.eye(nb_classes)
+ n = nosiy_rate
+ nb_superclasses = 20
+ nb_subclasses = 5
+
+ if n > 0.0:
+ for i in np.arange(nb_superclasses):
+ init, end = i * nb_subclasses, (i+1) * nb_subclasses
+ P[init:end, init:end] = build_for_cifar100(nb_subclasses, n)
+
+ y_train_noisy = multiclass_noisify(np.array(self.targets), P=P, random_state=seed)
+ actual_noise = (y_train_noisy != np.array(self.targets)).mean()
+ assert actual_noise > 0.0
+ print('Actual noise %.2f' % actual_noise)
+ self.targets = y_train_noisy.tolist()
+ return
+ elif nosiy_rate > 0:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(100)]
+ class_noisy = int(n_noisy / 100)
+ noisy_idx = []
+ for d in range(100):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=100, current_class=self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(100):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+ return
+
+
+class DatasetGenerator():
+ def __init__(self,
+ batchSize=128,
+ eval_batch_size=256,
+ dataPath='data/',
+ seed=123,
+ numOfWorkers=4,
+ asym=False,
+ dataset_type='cifar10',
+ is_cifar100=False,
+ cutout_length=16,
+ noise_rate=0.4):
+ self.seed = seed
+ np.random.seed(seed)
+ self.batchSize = batchSize
+ self.eval_batch_size = eval_batch_size
+ self.dataPath = dataPath
+ self.numOfWorkers = numOfWorkers
+ self.cutout_length = cutout_length
+ self.noise_rate = noise_rate
+ self.dataset_type = dataset_type
+ self.asym = asym
+ self.data_loaders = self.loadData()
+ return
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ if self.dataset_type == 'mnist':
+ MEAN = [0.1307]
+ STD = [0.3081]
+ train_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(MEAN, STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(MEAN, STD)])
+
+ train_dataset = MNISTNoisy(root=self.dataPath,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ seed=self.seed,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.MNIST(root=self.dataPath,
+ train=False,
+ transform=test_transform,
+ download=True)
+
+ elif self.dataset_type == 'cifar100':
+ CIFAR_MEAN = [0.5071, 0.4865, 0.4409]
+ CIFAR_STD = [0.2673, 0.2564, 0.2762]
+
+ train_transform = transforms.Compose([
+ transforms.RandomCrop(32, padding=4),
+ transforms.RandomHorizontalFlip(),
+ transforms.RandomRotation(20),
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ train_dataset = cifar100Nosiy(root=self.dataPath,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ seed=self.seed,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.CIFAR100(root=self.dataPath,
+ train=False,
+ transform=test_transform,
+ download=True)
+
+ elif self.dataset_type == 'cifar10':
+ CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
+ CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
+
+ train_transform = transforms.Compose([
+ transforms.RandomCrop(32, padding=4),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ train_dataset = cifar10Nosiy(root=self.dataPath,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.CIFAR10(root=self.dataPath,
+ train=False,
+ transform=test_transform,
+ download=True)
+ else:
+ raise("Unknown Dataset")
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.batchSize,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ print("Num of train %d" % (len(train_dataset)))
+ print("Num of test %d" % (len(test_dataset)))
+
+ return data_loaders
+
+
+class Clothing1MDataset:
+ def __init__(self, path, type='train', transform=None, target_transform=None):
+ self.path = path
+ if type == 'test':
+ flist = os.path.join(path, "annotations/clean_test.txt")
+ elif type == 'valid':
+ flist = os.path.join(path, "annotations/clean_val.txt")
+ elif type == 'train':
+ flist = os.path.join(path, "annotations/noisy_train.txt")
+ else:
+ raise('Unknown type')
+
+ self.imlist = self.flist_reader(flist)
+ self.transform = transform
+
+ def __len__(self):
+ return len(self.imlist)
+
+ def __getitem__(self, index):
+ impath, target = self.imlist[index]
+ img = Image.open(impath).convert("RGB")
+ if self.transform is not None:
+ img = self.transform(img)
+ return img, target
+
+ def flist_reader(self, flist):
+ imlist = []
+ with open(flist, 'r') as rf:
+ for line in rf.readlines():
+ row = line.split(" ")
+ impath = self.path + row[0]
+ imlabel = row[1]
+ imlist.append((impath, int(imlabel)))
+ return imlist
+
+
+class Clothing1MDatasetLoader:
+ def __init__(self, batchSize=128, eval_batch_size=256, dataPath='data/', numOfWorkers=4):
+ self.batchSize = batchSize
+ self.eval_batch_size = eval_batch_size
+ self.dataPath = dataPath
+ self.numOfWorkers = numOfWorkers
+ self.data_loaders = self.loadData()
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ MEAN = [0.6959, 0.6537, 0.6371]
+ STD = [0.3113, 0.3192, 0.3214]
+ train_transform = transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=MEAN, std=STD),
+ ])
+ test_transform = transforms.Compose([
+ transforms.Resize((224, 224)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=MEAN, std=STD)
+ ])
+
+ train_dataset = Clothing1MDataset(path=self.dataPath,
+ type='train',
+ transform=train_transform)
+
+ test_dataset = Clothing1MDataset(path=self.dataPath,
+ type='test',
+ transform=test_transform)
+
+ valid_dataset = Clothing1MDataset(path=self.dataPath,
+ type='valid',
+ transform=test_transform)
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.batchSize,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ data_loaders['valid_dataset'] = DataLoader(dataset=valid_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+ return data_loaders
+
+
+class NosieImageNet(datasets.ImageNet):
+ def __init__(self, root, split='train', seed=999, download=None, target_class_num=200, nosiy_rate=0.4, **kwargs):
+ super(NosieImageNet, self).__init__(root, download=download, split=split, **kwargs)
+ random.seed(seed)
+ np.random.seed(seed)
+ self.new_idx = random.sample(list(range(0, 1000)), k=target_class_num)
+ print(len(self.new_idx), len(self.imgs))
+ self.new_imgs = []
+ self.new_targets = []
+
+ for file, cls_id in self.imgs:
+ if cls_id in self.new_idx:
+ new_idx = self.new_idx.index(cls_id)
+ self.new_imgs.append((file, new_idx))
+ self.new_targets.append(new_idx)
+ self.imgs = self.new_imgs
+ self.targets = self.new_targets
+ print(min(self.targets), max(self.targets))
+ # Noise
+ if split == 'train':
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(target_class_num)]
+ class_noisy = int(n_noisy / target_class_num)
+ noisy_idx = []
+ for d in range(target_class_num):
+ print(len(class_index[d]), d)
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=target_class_num, current_class=self.targets[i])
+ (file, old_idx) = self.imgs[i]
+ self.imgs[i] = (file, self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(target_class_num):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+
+ self.samples = self.imgs
+
+
+class ImageNetDatasetLoader:
+ def __init__(self,
+ batchSize=128,
+ eval_batch_size=256,
+ dataPath='data/',
+ seed=999,
+ target_class_num=200,
+ nosiy_rate=0.4,
+ numOfWorkers=4):
+ self.batchSize = batchSize
+ self.eval_batch_size = eval_batch_size
+ self.dataPath = dataPath
+ self.numOfWorkers = numOfWorkers
+ self.seed = seed
+ self.target_class_num = target_class_num
+ self.nosiy_rate = nosiy_rate
+ self.data_loaders = self.loadData()
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
+ IMAGENET_STD = [0.229, 0.224, 0.225]
+
+ train_transform = transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ColorJitter(brightness=0.4,
+ contrast=0.4,
+ saturation=0.4,
+ hue=0.2),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ train_dataset = NosieImageNet(root=self.dataPath,
+ split='train',
+ nosiy_rate=self.nosiy_rate,
+ target_class_num=self.target_class_num,
+ seed=self.seed,
+ transform=train_transform,
+ download=True)
+
+ test_dataset = NosieImageNet(root=self.dataPath,
+ split='val',
+ nosiy_rate=self.nosiy_rate,
+ target_class_num=self.target_class_num,
+ seed=self.seed,
+ transform=test_transform,
+ download=True)
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.batchSize,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.batchSize,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+ return data_loaders
+
+
+
+
+
+
+def online_mean_and_sd(loader):
+ """Compute the mean and sd in an online fashion
+
+ Var[x] = E[X^2] - E^2[X]
+ """
+ cnt = 0
+ fst_moment = torch.empty(3)
+ snd_moment = torch.empty(3)
+
+ for data, _ in tqdm(loader):
+
+ b, c, h, w = data.shape
+ nb_pixels = b * h * w
+ sum_ = torch.sum(data, dim=[0, 2, 3])
+ sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3])
+ fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
+ snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)
+
+ cnt += nb_pixels
+
+ return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)
+
+
+if __name__ == '__main__':
+ # train_transform = transforms.Compose([
+ # transforms.Resize((224, 224)),
+ # transforms.ToTensor(),
+ # ])
+ # test = Clothing1MDataset(path='../datasets/clothing1M', transform=train_transform)
+ # loader = DataLoader(test,
+ # batch_size=128,
+ # num_workers=12,
+ # shuffle=True)
+ # mean, std = online_mean_and_sd(loader)
+ # print(mean)
+ # print(std)
+ #
+ # '''
+ # tensor([0.7215, 0.6846, 0.6679])
+ # tensor([0.3021, 0.3122, 0.3167])
+ # '''
+ train = NosieImageNet(root='../datasets/ILSVR2012', split='train')
+ valid = NosieImageNet(root='../datasets/ILSVR2012', split='val')
diff --git a/archive/loss.py b/archive/loss.py
new file mode 100644
index 0000000..3a83488
--- /dev/null
+++ b/archive/loss.py
@@ -0,0 +1,495 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+if torch.cuda.is_available():
+ torch.backends.cudnn.benchmark = True
+ if torch.cuda.device_count() > 1:
+ device = torch.device('cuda:0')
+ else:
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+class SCELoss(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes=10):
+ super(SCELoss, self).__init__()
+ self.device = device
+ self.alpha = alpha
+ self.beta = beta
+ self.num_classes = num_classes
+ self.cross_entropy = torch.nn.CrossEntropyLoss()
+
+ def forward(self, pred, labels):
+ # CCE
+ ce = self.cross_entropy(pred, labels)
+
+ # RCE
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+
+ # Loss
+ loss = self.alpha * ce + self.beta * rce.mean()
+ return loss
+
+
+class ReverseCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(ReverseCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+ return self.scale * rce.mean()
+ normalizor = 1 / 4 * (self.num_classes - 1)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+ return self.scale * normalizor * rce.mean()
+
+
+class NormalizedReverseCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedReverseCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ normalizor = 1 / 4 * (self.num_classes - 1)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+ return self.scale * normalizor * rce.mean()
+
+
+class NormalizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.log_softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1))
+ return self.scale * nce.mean()
+
+
+class GeneralizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, q=0.7):
+ super(GeneralizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.q = q
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ gce = (1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)) / self.q
+ return gce.mean()
+
+
+class NormalizedGeneralizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0, q=0.7):
+ super(NormalizedGeneralizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.q = q
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ numerators = 1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)
+ denominators = self.num_classes - pred.pow(self.q).sum(dim=1)
+ ngce = numerators / denominators
+ return self.scale * ngce.mean()
+
+
+class MeanAbsoluteError(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(MeanAbsoluteError, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+ return
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ mae = 1. - torch.sum(label_one_hot * pred, dim=1)
+ return self.scale * mae.mean()
+
+
+class NormalizedMeanAbsoluteError(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedMeanAbsoluteError, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+ return
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ normalizor = 1 / (2 * (self.num_classes - 1))
+ mae = 1. - torch.sum(label_one_hot * pred, dim=1)
+ return self.scale * normalizor * mae.mean()
+
+
+class NCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(NCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nce(pred, labels) + self.rce(pred, labels)
+
+
+class NCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(NCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nce(pred, labels) + self.mae(pred, labels)
+
+
+class GCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.mae(pred, labels)
+
+
+class GCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.rce(pred, labels)
+
+
+class GCEandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.nce = NormalizedCrossEntropy(num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.nce(pred, labels)
+
+
+class NGCEandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.nce(pred, labels)
+
+
+class NGCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.mae(pred, labels)
+
+
+class NGCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.rce(pred, labels)
+
+
+class MAEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(MAEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.mae = MeanAbsoluteError(scale=alpha, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.mae(pred, labels) + self.rce(pred, labels)
+
+
+class NLNL(torch.nn.Module):
+ def __init__(self, train_loader, num_classes, ln_neg=1):
+ super(NLNL, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.ln_neg = ln_neg
+ weight = torch.FloatTensor(num_classes).zero_() + 1.
+ if not hasattr(train_loader.dataset, 'targets'):
+ weight = [1] * num_classes
+ weight = torch.FloatTensor(weight)
+ else:
+ for i in range(num_classes):
+ weight[i] = (torch.from_numpy(np.array(train_loader.dataset.targets)) == i).sum()
+ weight = 1 / (weight / weight.max())
+ self.weight = weight.to(self.device)
+ self.criterion = torch.nn.CrossEntropyLoss(weight=self.weight)
+ self.criterion_nll = torch.nn.NLLLoss()
+
+ def forward(self, pred, labels):
+ labels_neg = (labels.unsqueeze(-1).repeat(1, self.ln_neg)
+ + torch.LongTensor(len(labels), self.ln_neg).to(self.device).random_(1, self.num_classes)) % self.num_classes
+ labels_neg = torch.autograd.Variable(labels_neg)
+
+ assert labels_neg.max() <= self.num_classes-1
+ assert labels_neg.min() >= 0
+ assert (labels_neg != labels.unsqueeze(-1).repeat(1, self.ln_neg)).sum() == len(labels)*self.ln_neg
+
+ s_neg = torch.log(torch.clamp(1. - F.softmax(pred, 1), min=1e-5, max=1.))
+ s_neg *= self.weight[labels].unsqueeze(-1).expand(s_neg.size()).to(self.device)
+ labels = labels * 0 - 100
+ loss = self.criterion(pred, labels) * float((labels >= 0).sum())
+ loss_neg = self.criterion_nll(s_neg.repeat(self.ln_neg, 1), labels_neg.t().contiguous().view(-1)) * float((labels_neg >= 0).sum())
+ loss = ((loss+loss_neg) / (float((labels >= 0).sum())+float((labels_neg[:, 0] >= 0).sum())))
+ return loss
+
+
+class FocalLoss(torch.nn.Module):
+ '''
+ https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
+ '''
+
+ def __init__(self, gamma=0, alpha=None, size_average=True):
+ super(FocalLoss, self).__init__()
+ self.gamma = gamma
+ self.alpha = alpha
+ if isinstance(alpha, (float, int)):
+ self.alpha = torch.Tensor([alpha, 1-alpha])
+ if isinstance(alpha, list):
+ self.alpha = torch.Tensor(alpha)
+ self.size_average = size_average
+
+ def forward(self, input, target):
+ if input.dim() > 2:
+ input = input.view(input.size(0), input.size(1), -1) # N,C,H,W => N,C,H*W
+ input = input.transpose(1, 2) # N,C,H*W => N,H*W,C
+ input = input.contiguous().view(-1, input.size(2)) # N,H*W,C => N*H*W,C
+ target = target.view(-1, 1)
+
+ logpt = F.log_softmax(input, dim=1)
+ logpt = logpt.gather(1, target)
+ logpt = logpt.view(-1)
+ pt = torch.autograd.Variable(logpt.data.exp())
+
+ if self.alpha is not None:
+ if self.alpha.type() != input.data.type():
+ self.alpha = self.alpha.type_as(input.data)
+ at = self.alpha.gather(0, target.data.view(-1))
+ logpt = logpt * torch.autograd.Variable(at)
+
+ loss = -1 * (1-pt)**self.gamma * logpt
+ if self.size_average:
+ return loss.mean()
+ else:
+ return loss.sum()
+
+
+class NormalizedFocalLoss(torch.nn.Module):
+ def __init__(self, scale=1.0, gamma=0, num_classes=10, alpha=None, size_average=True):
+ super(NormalizedFocalLoss, self).__init__()
+ self.gamma = gamma
+ self.size_average = size_average
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, input, target):
+ target = target.view(-1, 1)
+ logpt = F.log_softmax(input, dim=1)
+ normalizor = torch.sum(-1 * (1 - logpt.data.exp()) ** self.gamma * logpt, dim=1)
+ logpt = logpt.gather(1, target)
+ logpt = logpt.view(-1)
+ pt = torch.autograd.Variable(logpt.data.exp())
+ loss = -1 * (1-pt)**self.gamma * logpt
+ loss = self.scale * loss / normalizor
+
+ if self.size_average:
+ return loss.mean()
+ else:
+ return loss.sum()
+
+
+class NFLandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.nce(pred, labels)
+
+
+class NFLandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.mae(pred, labels)
+
+
+class NFLandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.rce(pred, labels)
+
+
+class DMILoss(torch.nn.Module):
+ def __init__(self, num_classes):
+ super(DMILoss, self).__init__()
+ self.num_classes = num_classes
+
+ def forward(self, output, target):
+ outputs = F.softmax(output, dim=1)
+ targets = target.reshape(target.size(0), 1).cpu()
+ y_onehot = torch.FloatTensor(target.size(0), self.num_classes).zero_()
+ y_onehot.scatter_(1, targets, 1)
+ y_onehot = y_onehot.transpose(0, 1).cuda()
+ mat = y_onehot @ outputs
+ return -1.0 * torch.log(torch.abs(torch.det(mat.float())) + 0.001)
+
+
+class BootSoftLoss(torch.nn.Module):
+ def __init__(self, num_classes, beta=0.95):
+ super(BootSoftLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.beta = beta
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ bsl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred) * torch.log(pred), dim=1)
+ return bsl.mean()
+
+
+class BootHardLoss(torch.nn.Module):
+ def __init__(self, num_classes, beta=0.8):
+ super(BootSoftLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.beta = beta
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ pred_one_hot = F.one_hot(torch.argmax(pred, dim=1),self.num_classes)
+ bhl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred_one_hot) * torch.log(pred), dim=1)
+ return bhl.mean()
+
+
+class ForwardLoss(torch.nn.Module):
+ def __init__(self, num_classes, noise_rate):
+ super(ForwardLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.noise_rate = noise_rate
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.)
+ P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes))
+ P.diagonal().fill_(1-self.noise_rate)
+ P = P.to(self.device)
+ loss = -torch.sum(label_one_hot * torch.log(torch.matmul(pred, P)), dim=-1)
+ return loss.mean()
+
+class BackwardLoss(torch.nn.Module):
+ def __init__(self, num_classes, noise_rate):
+ super(BackwardLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.noise_rate = noise_rate
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.)
+ P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes))
+ P.diagonal().fill_(1-self.noise_rate)
+ P = P.to(self.device)
+ P_inv = torch.inverse(P)
+ loss=-torch.sum((torch.matmul(label_one_hot, P_inv)) * torch.log(pred), dim=-1)
+ return loss.mean()
+
+
+class LIDPacedLoss(torch.nn.Module):
+ def __init__(self, num_classes, alpha, beta1, beta2):
+ super(LIDPacedLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.alpha = alpha
+ self.beta1 = beta1
+ self.beta2 = beta2
+ self.sce = SCELoss(alpha=beta1, beta=beta2, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ if self.alpha == 1.0:
+ return self.sce(pred, labels)
+ else:
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ pred_labels = F.one_hot(torch.argmax(pred, dim=1), num_classes=label_one_hot.size(1))
+ y_new = self.alpha * label_one_hot + (1. - self.alpha) * pred_labels
+ loss = -torch.sum(y_new * torch.log(pred), dim=-1)
+ return loss.mean()
\ No newline at end of file
diff --git a/archive/model.py b/archive/model.py
new file mode 100644
index 0000000..d39265e
--- /dev/null
+++ b/archive/model.py
@@ -0,0 +1,197 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ConvBrunch(nn.Module):
+ def __init__(self, in_planes, out_planes, kernel_size=3):
+ super(ConvBrunch, self).__init__()
+ padding = (kernel_size - 1) // 2
+ self.out_conv = nn.Sequential(
+ nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding),
+ nn.BatchNorm2d(out_planes),
+ nn.ReLU())
+
+ def forward(self, x):
+ return self.out_conv(x)
+
+
+class SCEModel(nn.Module):
+ def __init__(self, type='cifar10'):
+ super(SCEModel, self).__init__()
+ self.type = type
+ if type == 'cifar10':
+ self.block1 = nn.Sequential(
+ ConvBrunch(3, 64, 3),
+ ConvBrunch(64, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block2 = nn.Sequential(
+ ConvBrunch(64, 128, 3),
+ ConvBrunch(128, 128, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block3 = nn.Sequential(
+ ConvBrunch(128, 196, 3),
+ ConvBrunch(196, 196, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc1 = nn.Sequential(
+ nn.Linear(3136, 256),
+ nn.BatchNorm1d(256),
+ nn.ReLU())
+ self.fc2 = nn.Linear(256, 10)
+ self.fc_size = 3136
+ elif type == 'mnist':
+ self.block1 = nn.Sequential(
+ ConvBrunch(1, 32, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block2 = nn.Sequential(
+ ConvBrunch(32, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc1 = nn.Sequential(
+ nn.Linear(64*7*7, 128),
+ nn.BatchNorm1d(128),
+ nn.ReLU())
+ self.fc2 = nn.Linear(128, 10)
+ self.fc_size = 64*7*7
+ self._reset_prams()
+
+ def _reset_prams(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, nn.Linear):
+ nn.init.xavier_uniform_(m.weight)
+ return
+
+ def forward(self, x):
+ x = self.block1(x)
+ x = self.block2(x)
+ x = self.block3(x) if self.type == 'cifar10' else x
+ # x = self.global_avg_pool(x)
+ x = x.view(-1, self.fc_size)
+ x = self.fc1(x)
+ x = self.fc2(x)
+ return x
+
+
+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, in_planes, planes, stride=1):
+ super(BasicBlock, self).__init__()
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion*planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(self.expansion*planes)
+ )
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = self.bn2(self.conv2(out))
+ out += self.shortcut(x)
+ out = F.relu(out)
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, in_planes, planes, stride=1):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion*planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(self.expansion*planes)
+ )
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = F.relu(self.bn2(self.conv2(out)))
+ out = self.bn3(self.conv3(out))
+ out += self.shortcut(x)
+ out = F.relu(out)
+ return out
+
+
+class ResNet(nn.Module):
+ def __init__(self, block, num_blocks, num_classes=10):
+ super(ResNet, self).__init__()
+ self.in_planes = 64
+
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(64)
+ self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+ self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+ self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+ self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+ self.linear = nn.Linear(512*block.expansion, num_classes)
+ self._reset_prams()
+
+ def _make_layer(self, block, planes, num_blocks, stride):
+ strides = [stride] + [1]*(num_blocks-1)
+ layers = []
+ for stride in strides:
+ layers.append(block(self.in_planes, planes, stride))
+ self.in_planes = planes * block.expansion
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = self.layer1(out)
+ out = self.layer2(out)
+ out = self.layer3(out)
+ out = self.layer4(out)
+ out = F.avg_pool2d(out, 4)
+ out = out.view(out.size(0), -1)
+ out = self.linear(out)
+ return out
+
+ def _reset_prams(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, nn.Linear):
+ nn.init.xavier_uniform_(m.weight)
+ return
+
+
+def ResNet18(num_classes=10):
+ return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
+
+
+def ResNet34(num_classes=10):
+ return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
+
+
+def ResNet50(num_classes=10):
+ return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
+
+
+def ResNet101(num_classes=10):
+ return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
+
+
+def ResNet152(num_classes=10):
+ return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
diff --git a/archive/train.py b/archive/train.py
new file mode 100644
index 0000000..13bafc7
--- /dev/null
+++ b/archive/train.py
@@ -0,0 +1,382 @@
+import argparse
+import torch
+import time
+import os
+import collections
+import pickle
+import logging
+import torchvision
+from tqdm import tqdm
+from model import SCEModel, ResNet34
+from dataset import DatasetGenerator, Clothing1MDatasetLoader, ImageNetDatasetLoader
+from utils.utils import AverageMeter, accuracy, count_parameters_in_MB
+from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR
+from train_util import TrainUtil
+from loss import *
+
+# ArgParse
+parser = argparse.ArgumentParser(description='RobustLoss')
+parser.add_argument('--lr', type=float, default=0.01)
+parser.add_argument('--l2_reg', type=float, default=1e-4)
+parser.add_argument('--grad_bound', type=float, default=5.0)
+parser.add_argument('--train_log_every', type=int, default=100)
+parser.add_argument('--resume', action='store_true', default=False)
+parser.add_argument('--batch_size', type=int, default=128)
+parser.add_argument('--data_path', default='data', type=str)
+parser.add_argument('--checkpoint_path', default='checkpoints/cifar10/', type=str)
+parser.add_argument('--data_nums_workers', type=int, default=4)
+parser.add_argument('--epoch', type=int, default=150)
+parser.add_argument('--nr', type=float, default=0.4, help='noise_rate')
+parser.add_argument('--loss', type=str, default='SCE', help='SCE, CE, NCE, MAE, RCE')
+parser.add_argument('--alpha', type=float, default=1.0, help='alpha scale')
+parser.add_argument('--beta', type=float, default=1.0, help='beta scale')
+parser.add_argument('--q', type=float, default=0.7, help='q for gce')
+parser.add_argument('--gamma', type=float, default=2, help='gamma for FocalLoss')
+parser.add_argument('--dataset_type', choices=['mnist', 'cifar10', 'cifar100', 'clothing1m', 'imagenet'], type=str, default='cifar10')
+parser.add_argument('--scale_exp', action='store_true', default=False)
+parser.add_argument('--alpha_beta_exp', action='store_true', default=False)
+parser.add_argument('--version', type=str, default='robust_loss')
+parser.add_argument('--run_version', type=str, default='run1')
+parser.add_argument('--asym', action='store_true', default=False)
+parser.add_argument('--seed', type=int, default=123)
+args = parser.parse_args()
+
+if args.dataset_type == 'cifar100':
+ args.checkpoint_path = 'checkpoints/cifar100/'
+ log_dataset_type = 'cifar100'
+elif args.dataset_type == 'cifar10':
+ args.checkpoint_path = 'checkpoints/cifar10/'
+ log_dataset_type = 'cifar10'
+elif args.dataset_type == 'mnist':
+ args.checkpoint_path = 'checkpoints/mnist/'
+ log_dataset_type = 'mnist'
+elif args.dataset_type == 'clothing1m':
+ args.checkpoint_path = 'checkpoints/clothing1m/'
+ log_dataset_type = 'clothing1m'
+elif args.dataset_type == 'imagenet':
+ args.checkpoint_path = 'checkpoints/ILSVR2012/'
+ log_dataset_type = 'imagenet'
+else:
+ raise('Unknown Dataset')
+
+log_sym_type = ''
+if args.dataset_type == 'clothing1m':
+ log_dataset_type = 'clothing1m'
+elif args.dataset_type == 'imagenet':
+ log_dataset_type = 'imagenet'
+elif not args.dataset_type == 'clothing1m':
+ args.version = str(args.nr) + 'nr_' + args.loss.lower()
+ if args.scale_exp:
+ args.version += '_scale_' + str(args.alpha)
+ elif args.alpha_beta_exp:
+ args.version += '_ab_' + str(args.alpha) + '_' + str(args.beta)
+ if args.asym:
+ log_sym_type = 'asym'
+ args.version += '_asym'
+ args.checkpoint_path += 'asym/' + args.run_version + '/'
+ else:
+ log_sym_type = 'sym'
+ args.checkpoint_path += 'sym/' + args.run_version + '/'
+
+
+if not os.path.exists(args.checkpoint_path):
+ os.makedirs(args.checkpoint_path)
+if not os.path.exists(os.path.join('logs', log_dataset_type, log_sym_type, args.run_version)):
+ os.makedirs(os.path.join('logs', log_dataset_type, log_sym_type, args.run_version))
+
+
+def setup_logger(name, log_file, level=logging.INFO):
+ """To setup as many loggers as you want"""
+ formatter = logging.Formatter('%(asctime)s %(message)s')
+ handler = logging.FileHandler(log_file)
+ handler.setFormatter(formatter)
+
+ logger = logging.getLogger(name)
+ logger.setLevel(level)
+ logger.addHandler(handler)
+
+ return logger
+
+
+log_file_name = os.path.join('logs', log_dataset_type, log_sym_type, args.run_version, args.version)
+logger = setup_logger(name=args.version, log_file=log_file_name + ".log")
+GLOBAL_STEP, EVAL_STEP, EVAL_BEST_ACC = 0, 0, 0
+TRAIN_HISTORY = collections.defaultdict(list)
+torch.manual_seed(args.seed)
+
+if torch.cuda.is_available():
+ torch.backends.cudnn.enabled = True
+ torch.backends.cudnn.benchmark = True
+ torch.backends.cudnn.deterministic = True
+ if torch.cuda.device_count() > 1:
+ device = torch.device('cuda:0')
+ else:
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+def log_display(epoch, global_step, time_elapse, **kwargs):
+ display = 'epoch=' + str(epoch) + \
+ '\tglobal_step=' + str(global_step)
+ for key, value in kwargs.items():
+ display += '\t' + str(key) + '=%.5f' % value
+ display += '\ttime=%.2fit/s' % (1. / time_elapse)
+ return display
+
+
+def model_eval(epoch, fixed_cnn, data_loader, dataset_type='test_dataset'):
+ global EVAL_STEP
+ fixed_cnn.eval()
+ valid_loss_meters = AverageMeter()
+ valid_acc_meters = AverageMeter()
+ valid_acc5_meters = AverageMeter()
+ ce_loss = torch.nn.CrossEntropyLoss()
+
+ for images, labels in tqdm(data_loader[dataset_type]):
+ start = time.time()
+ images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
+ with torch.no_grad():
+ pred = fixed_cnn(images)
+ loss = ce_loss(pred, labels)
+ acc, acc5 = accuracy(pred, labels, topk=(1, 5))
+
+ valid_loss_meters.update(loss.item(), labels.shape[0])
+ valid_acc_meters.update(acc.item(), labels.shape[0])
+ valid_acc5_meters.update(acc5.item(), labels.shape[0])
+ end = time.time()
+
+ EVAL_STEP += 1
+ if EVAL_STEP % args.train_log_every == 0:
+ display = log_display(epoch=epoch,
+ global_step=GLOBAL_STEP,
+ time_elapse=end-start,
+ loss=loss.item(),
+ test_loss_avg=valid_loss_meters.avg,
+ acc=acc.item(),
+ test_acc_avg=valid_acc_meters.avg,
+ test_acc_top5_avg=valid_acc5_meters.avg)
+ logger.info(display)
+ display = log_display(epoch=epoch,
+ global_step=GLOBAL_STEP,
+ time_elapse=end-start,
+ loss=loss.item(),
+ test_loss_avg=valid_loss_meters.avg,
+ acc=acc.item(),
+ test_acc_avg=valid_acc_meters.avg,
+ test_acc_top5_avg=valid_acc5_meters.avg)
+ logger.info(display)
+ return valid_acc_meters.avg, valid_acc5_meters.avg
+
+
+def train_fixed(starting_epoch, data_loader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper):
+ global GLOBAL_STEP, reduction_arc, cell_arc, EVAL_BEST_ACC, EVAL_STEP, TRAIN_HISTORY
+
+ for epoch in tqdm(range(starting_epoch, args.epoch)):
+ logger.info("=" * 20 + "Training" + "=" * 20)
+ fixed_cnn.train()
+ train_loss_meters = AverageMeter()
+ train_acc_meters = AverageMeter()
+ train_acc5_meters = AverageMeter()
+
+ for images, labels in tqdm(data_loader["train_dataset"]):
+ images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
+ start = time.time()
+ fixed_cnn.zero_grad()
+ fixed_cnn_optmizer.zero_grad()
+ pred = fixed_cnn(images)
+ loss = criterion(pred, labels)
+ loss.backward()
+ grad_norm = torch.nn.utils.clip_grad_norm_(fixed_cnn.parameters(), args.grad_bound)
+ fixed_cnn_optmizer.step()
+ acc, acc5 = accuracy(pred, labels, topk=(1, 5))
+
+ train_loss_meters.update(loss.item(), labels.shape[0])
+ train_acc_meters.update(acc.item(), labels.shape[0])
+ train_acc5_meters.update(acc5.item(), labels.shape[0])
+
+ end = time.time()
+
+ GLOBAL_STEP += 1
+ if GLOBAL_STEP % args.train_log_every == 0:
+ lr = fixed_cnn_optmizer.param_groups[0]['lr']
+ display = log_display(epoch=epoch,
+ global_step=GLOBAL_STEP,
+ time_elapse=end-start,
+ loss=loss.item(),
+ loss_avg=train_loss_meters.avg,
+ acc=acc.item(),
+ acc_top1_avg=train_acc_meters.avg,
+ acc_top5_avg=train_acc5_meters.avg,
+ lr=lr,
+ gn=grad_norm)
+ logger.info(display)
+ if fixed_cnn_scheduler is not None:
+ fixed_cnn_scheduler.step()
+ logger.info("="*20 + "Eval" + "="*20)
+ curr_acc, _ = model_eval(epoch, fixed_cnn, data_loader)
+ logger.info("curr_acc\t%.4f" % curr_acc)
+ logger.info("BEST_ACC\t%.4f" % EVAL_BEST_ACC)
+ payload = '=' * 10 + '\n'
+ payload = payload + ("curr_acc: %.4f\n best_acc: %.4f\n" % (curr_acc, EVAL_BEST_ACC))
+ EVAL_BEST_ACC = max(curr_acc, EVAL_BEST_ACC)
+ TRAIN_HISTORY["train_loss"].append(train_loss_meters.avg)
+ TRAIN_HISTORY["train_acc"].append(train_acc_meters.avg)
+ TRAIN_HISTORY["test_acc"].append(curr_acc)
+ TRAIN_HISTORY["test_acc_best"] = [EVAL_BEST_ACC]
+ with open(args.checkpoint_path + args.version + '.pickle', 'wb') as handle:
+ pickle.dump(TRAIN_HISTORY, handle, protocol=pickle.HIGHEST_PROTOCOL)
+ logger.info("Saved!\n")
+ return
+
+
+def train():
+ # Dataset
+ if args.dataset_type == 'clothing1m':
+ dataset = Clothing1MDatasetLoader(batchSize=args.batch_size,
+ dataPath=args.data_path,
+ numOfWorkers=args.data_nums_workers)
+ elif args.dataset_type == 'imagenet':
+ dataset = ImageNetDatasetLoader(batchSize=args.batch_size,
+ dataPath=args.data_path,
+ seed=args.seed,
+ target_class_num=200,
+ nosiy_rate=0.4,
+ numOfWorkers=args.data_nums_workers)
+ else:
+ dataset = DatasetGenerator(batchSize=args.batch_size,
+ dataPath=args.data_path,
+ numOfWorkers=args.data_nums_workers,
+ noise_rate=args.nr,
+ asym=args.asym,
+ seed=args.seed,
+ dataset_type=args.dataset_type)
+
+ dataLoader = dataset.getDataLoader()
+ eta_min = 0
+ ln_neg = 1
+
+ if args.dataset_type == 'clothing1m':
+ # Train Clothing1M
+ args.epoch = 20
+ args.l2_reg = 1e-3
+ num_classes = 14
+ fixed_cnn = torchvision.models.resnet50(num_classes=14)
+ # fixed_cnn.fc = torch.nn.Linear(2048, 14)
+
+ elif args.dataset_type == 'cifar100':
+ # Train CIFAR100
+ args.lr = 0.1
+ args.epoch = 200
+ num_classes = 100
+ fixed_cnn = ResNet34(num_classes=num_classes)
+
+ # NLNL
+ if args.loss == 'NLNL':
+ args.epoch = 2000
+ ln_neg = 110
+
+ elif args.dataset_type == 'cifar10':
+ # Train CIFAR10
+ args.epoch = 120
+ num_classes = 10
+ fixed_cnn = SCEModel(type='cifar10')
+
+ # NLNL
+ if args.loss == 'NLNL':
+ args.epoch = 1000
+
+ elif args.dataset_type == 'mnist':
+ # Train mnist
+ args.epoch = 50
+ num_classes = 10
+ fixed_cnn = SCEModel(type='mnist')
+ eta_min = 0.001
+ args.l2_reg = 1e-3
+ # NLNL
+ if args.loss == 'NLNL':
+ args.epoch = 720
+
+ elif args.dataset_type == 'imagenet':
+ args.epoch = 100
+ args.l2_reg = 3e-5
+ num_classes = 200
+ fixed_cnn = torchvision.models.resnet50(num_classes=num_classes)
+
+ logger.info("num_classes: %s" % (num_classes))
+
+ loss_options = {
+ 'SCE': SCELoss(alpha=args.alpha, beta=args.beta, num_classes=num_classes),
+ 'CE': torch.nn.CrossEntropyLoss(),
+ 'NCE': NormalizedCrossEntropy(scale=args.alpha, num_classes=num_classes),
+ 'MAE': MeanAbsoluteError(scale=args.alpha, num_classes=num_classes),
+ 'NMAE': NormalizedMeanAbsoluteError(scale=args.alpha, num_classes=num_classes),
+ 'GCE': GeneralizedCrossEntropy(num_classes=num_classes, q=args.q),
+ 'RCE': ReverseCrossEntropy(scale=args.alpha, num_classes=num_classes),
+ 'NRCE': NormalizedReverseCrossEntropy(scale=args.alpha, num_classes=num_classes),
+ 'NGCE': NormalizedGeneralizedCrossEntropy(scale=args.alpha, num_classes=num_classes, q=args.q),
+ 'NCEandRCE': NCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes),
+ 'NCEandMAE': NCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes),
+ 'GCEandMAE': GCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'GCEandRCE': GCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'GCEandNCE': GCEandNCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'MAEandRCE': MAEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes),
+ 'NGCEandNCE': NGCEandNCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'NGCEandMAE': NGCEandMAE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'NGCEandRCE': NGCEandRCE(alpha=args.alpha, beta=args.beta, num_classes=num_classes, q=args.q),
+ 'FocalLoss': FocalLoss(gamma=args.gamma),
+ 'NFL': NormalizedFocalLoss(scale=args.alpha, gamma=args.gamma, num_classes=num_classes),
+ 'NLNL': NLNL(num_classes=num_classes, train_loader=dataLoader['train_dataset'], ln_neg=ln_neg),
+ 'NFLandNCE': NFLandNCE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes),
+ 'NFLandMAE': NFLandMAE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes),
+ 'NFLandRCE': NFLandRCE(alpha=args.alpha, beta=args.beta, gamma=args.gamma, num_classes=num_classes),
+ 'DMI': DMILoss(num_classes=num_classes)
+ }
+
+ if args.loss in loss_options:
+ criterion = loss_options[args.loss]
+ else:
+ raise("Unknown loss")
+
+ logger.info(criterion.__class__.__name__)
+ logger.info("Number of Trainable Parameters %.4f" % count_parameters_in_MB(fixed_cnn))
+
+ fixed_cnn.to(device)
+
+ if args.loss == 'DMI':
+ criterion = loss_options['CE']
+
+ fixed_cnn_optmizer = torch.optim.SGD(params=fixed_cnn.parameters(),
+ lr=args.lr,
+ momentum=0.9,
+ weight_decay=args.l2_reg)
+
+ fixed_cnn_scheduler = CosineAnnealingLR(fixed_cnn_optmizer,
+ float(args.epoch),
+ eta_min=eta_min)
+ if args.dataset_type == 'clothing1m':
+ fixed_cnn_scheduler = MultiStepLR(fixed_cnn_optmizer, milestones=[5, 10], gamma=0.1)
+ elif args.dataset_type == 'imagenet':
+ fixed_cnn_scheduler = MultiStepLR(fixed_cnn_optmizer, milestones=[30, 60, 80], gamma=0.1)
+
+ utilHelper = TrainUtil(checkpoint_path=args.checkpoint_path, version=args.version)
+ starting_epoch = 0
+
+ for arg in vars(args):
+ logger.info("%s: %s" % (arg, getattr(args, arg)))
+
+ train_fixed(starting_epoch, dataLoader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper)
+
+ if args.loss == 'DMI':
+ criterion = loss_options['DMI']
+ fixed_cnn_optmizer = torch.optim.SGD(params=fixed_cnn.parameters(),
+ lr=1e-6,
+ momentum=0.9,
+ weight_decay=args.l2_reg)
+ starting_epoch = 0
+ fixed_cnn_scheduler = None
+ train_fixed(starting_epoch, dataLoader, fixed_cnn, criterion, fixed_cnn_optmizer, fixed_cnn_scheduler, utilHelper)
+
+
+if __name__ == '__main__':
+ train()
diff --git a/archive/train_util.py b/archive/train_util.py
new file mode 100644
index 0000000..9484945
--- /dev/null
+++ b/archive/train_util.py
@@ -0,0 +1,106 @@
+import torch
+import os
+import pickle
+
+if torch.cuda.is_available():
+ torch.backends.cudnn.benchmark = True
+ if torch.cuda.device_count() > 1:
+ device = torch.device('cuda:0')
+ else:
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+class TrainUtil():
+ def __init__(self, checkpoint_path='checkpoints', version='mcts_nas_net_v1'):
+ self.checkpoint_path = checkpoint_path
+ self.version = version
+ return
+
+ def save_model_fixed(self, epoch, fixed_cnn, fixed_cnn_optmizer, save_best=False, **kwargs):
+ filename = os.path.join(self.checkpoint_path, self.version) + '.pth'
+ # Torch Save State Dict
+ state = {
+ 'epoch': epoch+1,
+ 'shared_cnn': fixed_cnn.state_dict(),
+ 'shared_cnn_optmizer': fixed_cnn_optmizer.state_dict(),
+ }
+ for key, value in kwargs.items():
+ state[key] = value
+ torch.save(state, filename)
+ filename = os.path.join(self.checkpoint_path, self.version) + '_best.pth'
+ if save_best:
+ torch.save(state, filename)
+ return
+
+ def load_model_fixed(self, fixed_cnn, fixed_cnn_optmizer, **kwargs):
+ filename = os.path.join(self.checkpoint_path, self.version) + '.pth'
+ # Load Torch State Dict
+ checkpoints = torch.load(filename)
+ fixed_cnn.load_state_dict(checkpoints['fixed_cnn'])
+ fixed_cnn_optmizer.load_state_dict(checkpoints['fixed_cnn_optmizer'])
+ print(filename + " Loaded!")
+ return checkpoints
+
+ def save_model(self,
+ mcts,
+ shared_cnn,
+ shared_cnn_optmizer,
+ shared_cnn_schduler,
+ estimator,
+ estimator_optmizer,
+ epoch,
+ **kwargs):
+ mcts_filename = os.path.join(self.checkpoint_path, self.version) + '_mcts' + '.pkl'
+ filename = os.path.join(self.checkpoint_path, self.version) + '.pth'
+
+ # Torch Save State Dict
+ state = {
+ 'epoch': epoch+1,
+ 'shared_cnn': shared_cnn.state_dict(),
+ 'shared_cnn_optmizer': shared_cnn_optmizer.state_dict(),
+ 'shared_cnn_schduler': shared_cnn_schduler.state_dict(),
+ 'estimator': estimator.state_dict(),
+ 'estimator_optmizer': estimator_optmizer.state_dict()
+ }
+ for key, value in kwargs.items():
+ state[key] = value
+ torch.save(state, filename)
+ print(filename + " saved!")
+
+ # Save MCTS to pickle
+ rolloutPolicy, searchPolicy = mcts.rollout, mcts.searchPolicy
+ mcts.rollout, mcts.searchPolicy = None, None
+ with open(mcts_filename, 'wb') as handle:
+ pickle.dump(mcts, handle, protocol=pickle.HIGHEST_PROTOCOL)
+ print(mcts_filename + " Saved!")
+ mcts.rollout, mcts.searchPolicy = rolloutPolicy, searchPolicy
+ return
+
+ def load_model(self,
+ shared_cnn,
+ shared_cnn_optmizer,
+ shared_cnn_schduler,
+ estimator,
+ estimator_optmizer,
+ **kwargs):
+
+ filename = os.path.join(self.checkpoint_path, self.version) + '.pth'
+ mcts_filename = os.path.join(self.checkpoint_path, self.version) + '_mcts' + '.pkl'
+
+ # Load Torch State Dict
+ checkpoints = torch.load(filename)
+ shared_cnn.load_state_dict(checkpoints['shared_cnn'])
+ shared_cnn_optmizer.load_state_dict(checkpoints['shared_cnn_optmizer'])
+ shared_cnn_schduler.load_state_dict(checkpoints['shared_cnn_schduler'])
+ shared_cnn_schduler.optimizer = shared_cnn_optmizer
+ estimator.load_state_dict(checkpoints['estimator'])
+ estimator_optmizer.load_state_dict(checkpoints['estimator_optmizer'])
+ print(filename + " Loaded!")
+
+ # Load MCTS
+ with open(mcts_filename, 'rb') as handle:
+ mcts = pickle.load(handle)
+ print(mcts_filename + " Loaded!")
+ return checkpoints, mcts
diff --git a/archive/utils/__init__.py b/archive/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/archive/utils/utils.py b/archive/utils/utils.py
new file mode 100644
index 0000000..b72833e
--- /dev/null
+++ b/archive/utils/utils.py
@@ -0,0 +1,85 @@
+import csv
+import sys
+import numpy as np
+
+
+class CSVLogger():
+ def __init__(self, args, fieldnames, filename='log.csv'):
+
+ self.filename = filename
+ self.csv_file = open(filename, 'w')
+
+ # Write model configuration at top of csv
+ writer = csv.writer(self.csv_file)
+ for arg in vars(args):
+ writer.writerow([arg, getattr(args, arg)])
+ writer.writerow([''])
+
+ self.writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames)
+ self.writer.writeheader()
+
+ self.csv_file.flush()
+
+ def writerow(self, row):
+ self.writer.writerow(row)
+ self.csv_file.flush()
+
+ def close(self):
+ self.csv_file.close()
+
+
+class Logger(object):
+ def __init__(self, filename):
+ self.terminal = sys.stdout
+ self.log = open(filename, 'w')
+
+ def write(self, message):
+ self.terminal.write(message)
+ self.log.write(message)
+ self.log.flush()
+
+ def flush(self):
+ # this flush method is needed for python 3 compatibility.
+ # this handles the flush command by doing nothing.
+ # you might want to specify some extra behavior here.
+ pass
+
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+ self.max = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+ self.max = max(self.max, val)
+
+
+def accuracy(output, target, topk=(1,)):
+ maxk = max(topk)
+
+ batch_size = target.size(0)
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].view(-1).float().sum(0)
+ res.append(correct_k.mul_(1/batch_size))
+ return res
+
+
+def count_parameters_in_MB(model):
+ return sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary_head" not in name)/1e6
diff --git a/callback_util.py b/callback_util.py
index 8bd8647..2372c85 100644
--- a/callback_util.py
+++ b/callback_util.py
@@ -1,26 +1,19 @@
import numpy as np
-import keras.backend as K
-from keras.utils import np_utils
-from keras.callbacks import Callback, LearningRateScheduler
-from keras.optimizers import SGD
+import torch
from util import get_lids_random_batch
-from loss import cross_entropy, lid_paced_loss
-from lass_tf import lass
-import tensorflow as tf
+import os
-class D2LCallback(Callback):
- def __init__(self, model, X_train, y_train, dataset, noise_ratio, epochs=150,
+class D2LCallback:
+ def __init__(self, model, data_loader, device, epochs=120,
pace_type='d2l', init_epoch=5, epoch_win=5, lid_subset_size=1280,
- lid_k=20, verbose=1):
+ lid_k=20, verbose=1, is_found_turning_point=False):
super(D2LCallback, self).__init__()
self.validation_data = None
self.model = model
self.turning_epoch = -1
- self.X_train = X_train
- self.y_train = y_train
- self.dataset = dataset
- self.noise_ratio = noise_ratio
+ self.data_loader = data_loader
+ self.device = device
self.epochs = epochs
self.pace_type = pace_type
self.mean_lid = -1.
@@ -32,11 +25,11 @@ def __init__(self, model, X_train, y_train, dataset, noise_ratio, epochs=150,
self.lid_k = lid_k
self.verbose = verbose
self.alpha = 1.0
+ self.is_found_turning_point = is_found_turning_point
def on_epoch_begin(self, epoch, logs={}):
- rand_idxes = np.random.choice(self.X_train.shape[0], self.lid_subset_size, replace=False)
- lid = np.mean(get_lids_random_batch(self.model, self.X_train[rand_idxes], k=self.lid_k, batch_size=128))
-
+ lids_tem = get_lids_random_batch(self.model, self.data_loader, self.device, k=20, batch_size=128)
+ lid = lids_tem.mean().item()
self.p_lambda = epoch*1./self.epochs
# deal with possible illegal lid value
@@ -48,6 +41,7 @@ def on_epoch_begin(self, epoch, logs={}):
# find the turning point where to apply lid-paced learning strategy
if self.found_turning_point(self.lids):
self.update_learning_pace()
+ self.is_found_turning_point = True
if len(self.lids) > 5:
print('lid = ..., ', self.lids[-5:])
@@ -56,172 +50,30 @@ def on_epoch_begin(self, epoch, logs={}):
if self.verbose > 0:
print('--Epoch: %s, LID: %.2f, min LID: %.2f, lid window: %s, turning epoch: %s, lambda: %.2f' %
- (epoch, lid, np.min(self.lids), self.epoch_win, self.turning_epoch, self.p_lambda))
+ (epoch, lid, min(self.lids), self.epoch_win, self.turning_epoch, self.p_lambda))
return
def found_turning_point(self, lids):
- if len(lids) > self.init_epoch + self.epoch_win: #
- if self.turning_epoch > -1: # if turning point is already found, stop checking
+ if len(lids) > self.init_epoch + self.epoch_win:
+ if self.turning_epoch > -1:
return True
else:
smooth_lids = lids[-self.epoch_win-1:-1]
- # self.mean_lid = np.mean(smooth_lids)
- if lids[-1] - np.mean(smooth_lids) > 2*np.std(smooth_lids):
+ if lids[-1] - torch.mean(torch.tensor(smooth_lids)) > 2 * torch.std(torch.tensor(smooth_lids)):
self.turning_epoch = len(lids) - 2
# rollback model if you want, should be called before checkpoint callback
# otherwise need to save two models
- min_model_path = 'model/%s_%s_%s.hdf5' % (self.pace_type,
- self.dataset,
- self.noise_ratio)
- self.model.load_weights(min_model_path)
return True
- else:
- return False
+ return False
def update_learning_pace(self):
- # # this loss is not working for d2l learning, somehow, why???
- expansion = self.lids[-1] / np.min(self.lids)
- self.alpha = np.exp(-self.p_lambda * expansion)
+ expansion = self.lids[-1] / min(self.lids)
+ self.alpha = torch.exp(torch.tensor(-self.p_lambda * expansion)).item()
# self.alpha = np.exp(-0.1*expansion)
print('## Turning epoch: %s, lambda: %.2f, expansion: %.2f, alpha: %.2f' %
(self.turning_epoch, self.p_lambda, expansion, self.alpha))
# self.alpha = np.exp(-expansion)
- self.model.compile(loss=lid_paced_loss(self.alpha),
- optimizer=self.model.optimizer, metrics=['accuracy'])
-
-
-class LoggerCallback(Callback):
- """
- Log train/val loss and acc into file for later plots.
- """
- def __init__(self, model, X_train, y_train, X_test, y_test, dataset,
- model_name, noise_ratio, epochs):
- super(LoggerCallback, self).__init__()
- self.model = model
- self.X_train = X_train
- self.y_train = y_train
- self.X_test = X_test
- self.y_test = y_test
- self.dataset = dataset
- self.model_name = model_name
- self.noise_ratio = noise_ratio
- self.epochs = epochs
-
- self.train_loss = []
- self.test_loss = []
- self.train_acc = []
- self.test_acc = []
- # the followings are used to estimate LID
- self.lid_k = 20
- self.lid_subset = 128
- self.lids = []
-
- # complexity - Critical Sample Ratio (csr)
- self.csr_subset = 500
- self.csr_batchsize = 100
- self.csrs = []
-
- def on_epoch_end(self, epoch, logs={}):
- tr_acc = logs.get('acc')
- tr_loss = logs.get('loss')
- val_loss = logs.get('val_loss')
- val_acc = logs.get('val_acc')
- # te_loss, te_acc = self.model.evaluate(self.X_test, self.y_test, batch_size=128, verbose=0)
- self.train_loss.append(tr_loss)
- self.test_loss.append(val_loss)
- self.train_acc.append(tr_acc)
- self.test_acc.append(val_acc)
-
- file_name = 'log/loss_%s_%s_%s.npy' % \
- (self.model_name, self.dataset, self.noise_ratio)
- np.save(file_name, np.stack((np.array(self.train_loss), np.array(self.test_loss))))
- file_name = 'log/acc_%s_%s_%s.npy' % \
- (self.model_name, self.dataset, self.noise_ratio)
- np.save(file_name, np.stack((np.array(self.train_acc), np.array(self.test_acc))))
-
- # print('\n--Epoch %02d, train_loss: %.2f, train_acc: %.2f, val_loss: %.2f, val_acc: %.2f' %
- # (epoch, tr_loss, tr_acc, val_loss, val_acc))
-
- # calculate LID/CSR and save every 10 epochs
- if epoch % 1 == 0:
- # compute lid scores
- rand_idxes = np.random.choice(self.X_train.shape[0], self.lid_subset * 10, replace=False)
- lid = np.mean(get_lids_random_batch(self.model, self.X_train[rand_idxes],
- k=self.lid_k, batch_size=self.lid_subset))
- self.lids.append(lid)
-
- file_name = 'log/lid_%s_%s_%s.npy' % \
- (self.model_name, self.dataset, self.noise_ratio)
- np.save(file_name, np.array(self.lids))
-
- if len(np.array(self.lids).flatten()) > 20:
- print('lid = ...', self.lids[-20:])
- else:
- print('lid = ', self.lids)
-
- # compute csr scores
- # LASS to estimate the critical sample ratio
- scale_factor = 255. / (np.max(self.X_test) - np.min(self.X_test))
- y = tf.placeholder(tf.float32, shape=(None,) + self.y_test.shape[1:])
- csr_model = lass(self.model.layers[0].input, self.model.layers[-1].output, y,
- a=0.25 / scale_factor,
- b=0.2 / scale_factor,
- r=0.3 / scale_factor,
- iter_max=100)
- rand_idxes = np.random.choice(self.X_test.shape[0], self.csr_subset, replace=False)
- X_adv, adv_ind = csr_model.find(self.X_test[rand_idxes], bs=self.csr_batchsize)
- csr = np.sum(adv_ind) * 1. / self.csr_subset
- self.csrs.append(csr)
-
- file_name = 'log/csr_%s_%s_%s.npy' % \
- (self.model_name, self.dataset, self.noise_ratio)
- np.save(file_name, np.array(self.csrs))
-
- if len(self.csrs) > 20:
- print('csr = ...', self.csrs[-20:])
- else:
- print('csr = ', self.csrs)
-
- return
-
-def get_lr_scheduler(dataset):
- """
- customerized learning rate decay for training with clean labels.
- For efficientcy purpose we use large lr for noisy data.
- :param dataset:
- :param noise_ratio:
- :return:
- """
- if dataset in ['mnist', 'svhn']:
- def scheduler(epoch):
- if epoch > 40:
- return 0.001
- elif epoch > 20:
- return 0.01
- else:
- return 0.1
- return LearningRateScheduler(scheduler)
- elif dataset in ['cifar-10']:
- def scheduler(epoch):
- if epoch > 80:
- return 0.001
- elif epoch > 40:
- return 0.01
- else:
- return 0.1
- return LearningRateScheduler(scheduler)
- elif dataset in ['cifar-100']:
- def scheduler(epoch):
- if epoch > 160:
- return 0.0001
- elif epoch > 120:
- return 0.001
- elif epoch > 80:
- return 0.01
- else:
- return 0.1
- return LearningRateScheduler(scheduler)
diff --git a/complexity_plot.py b/complexity_plot.py
deleted file mode 100644
index e3cd6c5..0000000
--- a/complexity_plot.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""
-Train test error/accuracy/loss plot.
-
-Author: Xingjun Ma
-"""
-import os
-import numpy as np
-import tensorflow as tf
-import keras.backend as K
-from keras.datasets import mnist, cifar10
-from keras.optimizers import SGD
-from keras.utils import to_categorical
-import matplotlib.pyplot as plt
-from datasets import get_data
-from models import get_model
-from loss import cross_entropy
-from lass_tf import lass
-
-np.random.seed(1024)
-
-MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'd2l']
-MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L']
-COLORS = ['r', 'y', 'c', 'm', 'g', 'b']
-MARKERS = ['x', 'D', '<', '>', '^', 'o']
-
-def complexity_plot(model_list, dataset='mnist', num_classes=10, noise_ratio=10, epochs=50, n_samples=500):
- """
- The complexity (Critical Sample Ratio) of the hypothesis learned throughout training.
- """
- print('Dataset: %s, epochs: %s, noise ratio: %s%%' % (dataset, epochs, noise_ratio))
-
- # plot initialization
- fig = plt.figure() # figsize=(7, 6)
- ax = fig.add_subplot(111)
- bins = np.arange(epochs)
- xnew = np.arange(0, epochs, 5)
-
- # load data
- _, _, X_test, Y_test = get_data(dataset)
- # convert class vectors to binary class matrices
- Y_test = to_categorical(Y_test, num_classes)
-
- shuffle = np.random.permutation(X_test.shape[0])
- X_test = X_test[shuffle]
- Y_test = Y_test[shuffle]
- X_test = X_test[:n_samples]
- Y_test = Y_test[:n_samples]
-
- # load model
- image_shape = X_test.shape[1:]
- model = get_model(dataset, input_tensor=None, input_shape=image_shape)
- sgd = SGD(lr=0.01, momentum=0.9)
- y = tf.placeholder(tf.float32, shape=(None,) + Y_test.shape[1:])
-
- for model_name in model_list:
- file_name = "log/crs_%s_%s_%s.npy" % (model_name, dataset, noise_ratio)
- if os.path.isfile(file_name):
- crs = np.load(file_name)
- # plot line
- idx = MODELS.index(model_name)
-
- # z = np.polyfit(bins, crs, deg=5)
- # f = np.poly1d(z)
- # crs = f(xnew)
-
- for i in xnew:
- crs[i] = np.mean(crs[i:i+5])
-
- crs = crs[xnew]
-
- ax.plot(xnew, crs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
- continue
-
- crs = np.zeros(epochs)
- for i in range(epochs):
- # the critical sample ratio of the representations learned at every epoch
- # need to save those epochs first, in this case, use separate folders for each model
- model_path = 'model/%s/%s_%s.%02d.hdf5' % (model_name, dataset, noise_ratio, i)
- model.load_weights(model_path)
- model.compile(
- loss=cross_entropy,
- optimizer=sgd,
- metrics=['accuracy']
- )
-
- # LASS to estimate the critical sample ratio
- scale_factor = 255. / (np.max(X_test) - np.min(X_test))
- csr_model = lass(model.layers[0].input, model.layers[-1].output, y,
- a=0.25 / scale_factor,
- b=0.2 / scale_factor,
- r=0.3 / scale_factor,
- iter_max=100)
- X_adv, adv_ind = csr_model.find(X_test, bs=500)
- crs[i] = np.sum(adv_ind) * 1. / n_samples
-
- print('model: %s, epoch: %s, CRS: %s' % (model_name, i, crs[i]))
-
- # save result to avoid recomputing
- np.save(file_name, crs)
- print(crs)
-
- # plot line
- idx = MODELS.index(model_name)
-
- z = np.polyfit(bins, crs, deg=5)
- f = np.poly1d(z)
- crs = f(xnew)
-
- ax.plot(xnew, crs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
-
- # ax.set_xticks([])
- # ax.set_yticks([])
- ax.set_xlabel("Epoch", fontsize=15)
- ax.set_ylabel("Hypothesis complexity (CSR score)", fontsize=15)
- # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
- legend = plt.legend(loc='upper left')
- plt.setp(legend.get_texts(), fontsize=15)
- fig.savefig("plots/complexity_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300)
- plt.show()
-
-if __name__ == "__main__":
- # mnist: epoch=50, cifar-10: epoch=120
- complexity_plot(model_list=['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'],
- dataset='cifar-10', num_classes=10, noise_ratio=60, epochs=120, n_samples=500)
diff --git a/configs/cifar10/asym/bhl.yaml b/configs/cifar10/asym/bhl.yaml
new file mode 100644
index 0000000..e459990
--- /dev/null
+++ b/configs/cifar10/asym/bhl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/cifar10/asym/bl.yaml b/configs/cifar10/asym/bl.yaml
new file mode 100644
index 0000000..959320c
--- /dev/null
+++ b/configs/cifar10/asym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BackwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar10/asym/bsl.yaml b/configs/cifar10/asym/bsl.yaml
new file mode 100644
index 0000000..81d30bb
--- /dev/null
+++ b/configs/cifar10/asym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/cifar10/asym/ce.yaml b/configs/cifar10/asym/ce.yaml
new file mode 100644
index 0000000..a665689
--- /dev/null
+++ b/configs/cifar10/asym/ce.yaml
@@ -0,0 +1,30 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/cifar10/asym/d2l.yaml b/configs/cifar10/asym/d2l.yaml
new file mode 100644
index 0000000..875d393
--- /dev/null
+++ b/configs/cifar10/asym/d2l.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 10
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/cifar10/asym/fl.yaml b/configs/cifar10/asym/fl.yaml
new file mode 100644
index 0000000..a97025d
--- /dev/null
+++ b/configs/cifar10/asym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ForwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar10/asym/focal.yaml b/configs/cifar10/asym/focal.yaml
new file mode 100644
index 0000000..c00eb66
--- /dev/null
+++ b/configs/cifar10/asym/focal.yaml
@@ -0,0 +1,31 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/cifar10/asym/gce.yaml b/configs/cifar10/asym/gce.yaml
new file mode 100644
index 0000000..23338cf
--- /dev/null
+++ b/configs/cifar10/asym/gce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 10
+ q: 0.7
diff --git a/configs/cifar10/asym/mae.yaml b/configs/cifar10/asym/mae.yaml
new file mode 100644
index 0000000..69457cc
--- /dev/null
+++ b/configs/cifar10/asym/mae.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/asym/nce+mae.yaml b/configs/cifar10/asym/nce+mae.yaml
new file mode 100644
index 0000000..3d077e5
--- /dev/null
+++ b/configs/cifar10/asym/nce+mae.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandMAE
+ num_classes: 10
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/nce+rce.yaml b/configs/cifar10/asym/nce+rce.yaml
new file mode 100644
index 0000000..01a46df
--- /dev/null
+++ b/configs/cifar10/asym/nce+rce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandRCE
+ num_classes: 10
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/nce.yaml b/configs/cifar10/asym/nce.yaml
new file mode 100644
index 0000000..6734a62
--- /dev/null
+++ b/configs/cifar10/asym/nce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/asym/nfl+mae.yaml b/configs/cifar10/asym/nfl+mae.yaml
new file mode 100644
index 0000000..d5f723c
--- /dev/null
+++ b/configs/cifar10/asym/nfl+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandMAE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/nfl+rce.yaml b/configs/cifar10/asym/nfl+rce.yaml
new file mode 100644
index 0000000..f2ee4f5
--- /dev/null
+++ b/configs/cifar10/asym/nfl+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandRCE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/nfl.yaml b/configs/cifar10/asym/nfl.yaml
new file mode 100644
index 0000000..5dfad2e
--- /dev/null
+++ b/configs/cifar10/asym/nfl.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 10
+ scale: 1.0
+ gamma: 0.5
diff --git a/configs/cifar10/asym/ngce+mae.yaml b/configs/cifar10/asym/ngce+mae.yaml
new file mode 100644
index 0000000..06cf547
--- /dev/null
+++ b/configs/cifar10/asym/ngce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/ngce+rce.yaml b/configs/cifar10/asym/ngce+rce.yaml
new file mode 100644
index 0000000..06cf547
--- /dev/null
+++ b/configs/cifar10/asym/ngce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/asym/ngce.yaml b/configs/cifar10/asym/ngce.yaml
new file mode 100644
index 0000000..818891e
--- /dev/null
+++ b/configs/cifar10/asym/ngce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 10
+ q: 0.1
+ scale: 1.0
diff --git a/configs/cifar10/asym/nlnl.yaml b/configs/cifar10/asym/nlnl.yaml
new file mode 100644
index 0000000..9f2d4f6
--- /dev/null
+++ b/configs/cifar10/asym/nlnl.yaml
@@ -0,0 +1,32 @@
+epochs: 1000
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NLNL
+ num_classes: 10
+ ln_neg: 1
diff --git a/configs/cifar10/asym/rce.yaml b/configs/cifar10/asym/rce.yaml
new file mode 100644
index 0000000..0adfafc
--- /dev/null
+++ b/configs/cifar10/asym/rce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/asym/sce.yaml b/configs/cifar10/asym/sce.yaml
new file mode 100644
index 0000000..e1d97f7
--- /dev/null
+++ b/configs/cifar10/asym/sce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: SCELoss
+ num_classes: 10
+ alpha: 0.1
+ beta: 1.0
diff --git a/configs/cifar10/sym/bhl.yaml b/configs/cifar10/sym/bhl.yaml
new file mode 100644
index 0000000..a68e993
--- /dev/null
+++ b/configs/cifar10/sym/bhl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/cifar10/sym/bl.yaml b/configs/cifar10/sym/bl.yaml
new file mode 100644
index 0000000..6a42c82
--- /dev/null
+++ b/configs/cifar10/sym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BackwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar10/sym/bsl.yaml b/configs/cifar10/sym/bsl.yaml
new file mode 100644
index 0000000..3402c3b
--- /dev/null
+++ b/configs/cifar10/sym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/cifar10/sym/ce.yaml b/configs/cifar10/sym/ce.yaml
new file mode 100644
index 0000000..6f45e1d
--- /dev/null
+++ b/configs/cifar10/sym/ce.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+#scheduler:
+ #name: CosineAnnealingLR
+ #T_max: $epochs
+ #eta_min: 0.0
+scheduler:
+ name: StepLR
+ step_size: 40
+ gamma: 0.1
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/cifar10/sym/d2l.yaml b/configs/cifar10/sym/d2l.yaml
new file mode 100644
index 0000000..4906968
--- /dev/null
+++ b/configs/cifar10/sym/d2l.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 10
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/cifar10/sym/fl.yaml b/configs/cifar10/sym/fl.yaml
new file mode 100644
index 0000000..e51103f
--- /dev/null
+++ b/configs/cifar10/sym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ForwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar10/sym/focal.yaml b/configs/cifar10/sym/focal.yaml
new file mode 100644
index 0000000..62425fd
--- /dev/null
+++ b/configs/cifar10/sym/focal.yaml
@@ -0,0 +1,31 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/cifar10/sym/gce.yaml b/configs/cifar10/sym/gce.yaml
new file mode 100644
index 0000000..62420e3
--- /dev/null
+++ b/configs/cifar10/sym/gce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 10
+ q: 0.7
diff --git a/configs/cifar10/sym/mae.yaml b/configs/cifar10/sym/mae.yaml
new file mode 100644
index 0000000..bb7e561
--- /dev/null
+++ b/configs/cifar10/sym/mae.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/sym/nce+mae.yaml b/configs/cifar10/sym/nce+mae.yaml
new file mode 100644
index 0000000..0ccc85f
--- /dev/null
+++ b/configs/cifar10/sym/nce+mae.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandMAE
+ num_classes: 10
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/nce+rce.yaml b/configs/cifar10/sym/nce+rce.yaml
new file mode 100644
index 0000000..4c58521
--- /dev/null
+++ b/configs/cifar10/sym/nce+rce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandRCE
+ num_classes: 10
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/nce.yaml b/configs/cifar10/sym/nce.yaml
new file mode 100644
index 0000000..d115054
--- /dev/null
+++ b/configs/cifar10/sym/nce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/sym/nfl+mae.yaml b/configs/cifar10/sym/nfl+mae.yaml
new file mode 100644
index 0000000..59b6ff8
--- /dev/null
+++ b/configs/cifar10/sym/nfl+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandMAE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/nfl+rce.yaml b/configs/cifar10/sym/nfl+rce.yaml
new file mode 100644
index 0000000..8b5c97b
--- /dev/null
+++ b/configs/cifar10/sym/nfl+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandRCE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/nfl.yaml b/configs/cifar10/sym/nfl.yaml
new file mode 100644
index 0000000..3d5a934
--- /dev/null
+++ b/configs/cifar10/sym/nfl.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 10
+ scale: 10.0
+ gamma: 0.5
diff --git a/configs/cifar10/sym/ngce+mae.yaml b/configs/cifar10/sym/ngce+mae.yaml
new file mode 100644
index 0000000..eaf3bfd
--- /dev/null
+++ b/configs/cifar10/sym/ngce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/ngce+rce.yaml b/configs/cifar10/sym/ngce+rce.yaml
new file mode 100644
index 0000000..8f021d8
--- /dev/null
+++ b/configs/cifar10/sym/ngce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandRCE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 1.0
diff --git a/configs/cifar10/sym/ngce.yaml b/configs/cifar10/sym/ngce.yaml
new file mode 100644
index 0000000..ac42d1c
--- /dev/null
+++ b/configs/cifar10/sym/ngce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 10
+ scale: 1.0
+ q: 0.1
diff --git a/configs/cifar10/sym/nlnl.yaml b/configs/cifar10/sym/nlnl.yaml
new file mode 100644
index 0000000..bb0d8b2
--- /dev/null
+++ b/configs/cifar10/sym/nlnl.yaml
@@ -0,0 +1,32 @@
+epochs: 1000
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NLNL
+ num_classes: 10
+ ln_neg: 1
diff --git a/configs/cifar10/sym/rce.yaml b/configs/cifar10/sym/rce.yaml
new file mode 100644
index 0000000..41dd3f3
--- /dev/null
+++ b/configs/cifar10/sym/rce.yaml
@@ -0,0 +1,32 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/cifar10/sym/sce.yaml b/configs/cifar10/sym/sce.yaml
new file mode 100644
index 0000000..87a29a4
--- /dev/null
+++ b/configs/cifar10/sym/sce.yaml
@@ -0,0 +1,33 @@
+epochs: 120
+grad_bound: 5.0
+log_frequency: 100
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 128
+ data_path: ../datasets/
+ dataset_type: 'CIFAR10'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: SCELoss
+ num_classes: 10
+ alpha: 0.1
+ beta: 1.0
diff --git a/configs/cifar100/asym/bhl.yaml b/configs/cifar100/asym/bhl.yaml
new file mode 100644
index 0000000..dbbdd89
--- /dev/null
+++ b/configs/cifar100/asym/bhl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 100
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/cifar100/asym/bl.yaml b/configs/cifar100/asym/bl.yaml
new file mode 100644
index 0000000..0c78bbe
--- /dev/null
+++ b/configs/cifar100/asym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BackwardLoss
+ num_classes: 100
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar100/asym/bsl.yaml b/configs/cifar100/asym/bsl.yaml
new file mode 100644
index 0000000..faad8ce
--- /dev/null
+++ b/configs/cifar100/asym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 100
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/cifar100/asym/ce.yaml b/configs/cifar100/asym/ce.yaml
new file mode 100644
index 0000000..e99f3b4
--- /dev/null
+++ b/configs/cifar100/asym/ce.yaml
@@ -0,0 +1,30 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/cifar100/asym/d2l.yaml b/configs/cifar100/asym/d2l.yaml
new file mode 100644
index 0000000..7cb0e40
--- /dev/null
+++ b/configs/cifar100/asym/d2l.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 100
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/cifar100/asym/fl.yaml b/configs/cifar100/asym/fl.yaml
new file mode 100644
index 0000000..f827587
--- /dev/null
+++ b/configs/cifar100/asym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ForwardLoss
+ num_classes: 100
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar100/asym/focal.yaml b/configs/cifar100/asym/focal.yaml
new file mode 100644
index 0000000..a836fa7
--- /dev/null
+++ b/configs/cifar100/asym/focal.yaml
@@ -0,0 +1,31 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/cifar100/asym/gce.yaml b/configs/cifar100/asym/gce.yaml
new file mode 100644
index 0000000..f6cffc5
--- /dev/null
+++ b/configs/cifar100/asym/gce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 100
+ q: 0.7
diff --git a/configs/cifar100/asym/mae.yaml b/configs/cifar100/asym/mae.yaml
new file mode 100644
index 0000000..4a46a93
--- /dev/null
+++ b/configs/cifar100/asym/mae.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/asym/nce+mae.yaml b/configs/cifar100/asym/nce+mae.yaml
new file mode 100644
index 0000000..8c28957
--- /dev/null
+++ b/configs/cifar100/asym/nce+mae.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandMAE
+ num_classes: 100
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/asym/nce+rce.yaml b/configs/cifar100/asym/nce+rce.yaml
new file mode 100644
index 0000000..004a76b
--- /dev/null
+++ b/configs/cifar100/asym/nce+rce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandRCE
+ num_classes: 100
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/asym/nce.yaml b/configs/cifar100/asym/nce.yaml
new file mode 100644
index 0000000..a54b4c5
--- /dev/null
+++ b/configs/cifar100/asym/nce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/asym/nfl+mae.yaml b/configs/cifar100/asym/nfl+mae.yaml
new file mode 100644
index 0000000..acf631d
--- /dev/null
+++ b/configs/cifar100/asym/nfl+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandMAE
+ num_classes: 100
+ gamma: 0.5
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/asym/nfl+rce.yaml b/configs/cifar100/asym/nfl+rce.yaml
new file mode 100644
index 0000000..484522b
--- /dev/null
+++ b/configs/cifar100/asym/nfl+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandRCE
+ num_classes: 100
+ gamma: 0.5
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/asym/nfl.yaml b/configs/cifar100/asym/nfl.yaml
new file mode 100644
index 0000000..8ee3b92
--- /dev/null
+++ b/configs/cifar100/asym/nfl.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 100
+ scale: 1.0
+ gamma: 0.5
diff --git a/configs/cifar100/asym/ngce+mae.yaml b/configs/cifar100/asym/ngce+mae.yaml
new file mode 100644
index 0000000..838e0e4
--- /dev/null
+++ b/configs/cifar100/asym/ngce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 100
+ q: 0.7
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/asym/ngce+rce.yaml b/configs/cifar100/asym/ngce+rce.yaml
new file mode 100644
index 0000000..a35c906
--- /dev/null
+++ b/configs/cifar100/asym/ngce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 100
+ q: 0.7
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/asym/ngce.yaml b/configs/cifar100/asym/ngce.yaml
new file mode 100644
index 0000000..63d3ef6
--- /dev/null
+++ b/configs/cifar100/asym/ngce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 100
+ scale: 1.0
+ q: 0.7
diff --git a/configs/cifar100/asym/nlnl.yaml b/configs/cifar100/asym/nlnl.yaml
new file mode 100644
index 0000000..1946dd5
--- /dev/null
+++ b/configs/cifar100/asym/nlnl.yaml
@@ -0,0 +1,32 @@
+epochs: 2000
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NLNL
+ num_classes: 100
+ ln_neg: 110
diff --git a/configs/cifar100/asym/rce.yaml b/configs/cifar100/asym/rce.yaml
new file mode 100644
index 0000000..4cc8114
--- /dev/null
+++ b/configs/cifar100/asym/rce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/asym/sce.yaml b/configs/cifar100/asym/sce.yaml
new file mode 100644
index 0000000..005e078
--- /dev/null
+++ b/configs/cifar100/asym/sce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: SCELoss
+ num_classes: 100
+ alpha: 6.0
+ beta: 0.1
diff --git a/configs/cifar100/sym/bhl.yaml b/configs/cifar100/sym/bhl.yaml
new file mode 100644
index 0000000..8f73ef3
--- /dev/null
+++ b/configs/cifar100/sym/bhl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 100
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/cifar100/sym/bl.yaml b/configs/cifar100/sym/bl.yaml
new file mode 100644
index 0000000..3ce7c37
--- /dev/null
+++ b/configs/cifar100/sym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BackwardLoss
+ num_classes: 100
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar100/sym/bsl.yaml b/configs/cifar100/sym/bsl.yaml
new file mode 100644
index 0000000..346c324
--- /dev/null
+++ b/configs/cifar100/sym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 100
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/cifar100/sym/ce.yaml b/configs/cifar100/sym/ce.yaml
new file mode 100644
index 0000000..ece8d22
--- /dev/null
+++ b/configs/cifar100/sym/ce.yaml
@@ -0,0 +1,30 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/cifar100/sym/d2l.yaml b/configs/cifar100/sym/d2l.yaml
new file mode 100644
index 0000000..167f494
--- /dev/null
+++ b/configs/cifar100/sym/d2l.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 100
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/cifar100/sym/fl.yaml b/configs/cifar100/sym/fl.yaml
new file mode 100644
index 0000000..9261c73
--- /dev/null
+++ b/configs/cifar100/sym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ForwardLoss
+ num_classes: 100
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/cifar100/sym/focal.yaml b/configs/cifar100/sym/focal.yaml
new file mode 100644
index 0000000..d7724f9
--- /dev/null
+++ b/configs/cifar100/sym/focal.yaml
@@ -0,0 +1,31 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/cifar100/sym/gce.yaml b/configs/cifar100/sym/gce.yaml
new file mode 100644
index 0000000..0182704
--- /dev/null
+++ b/configs/cifar100/sym/gce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 100
+ q: 0.7
diff --git a/configs/cifar100/sym/mae.yaml b/configs/cifar100/sym/mae.yaml
new file mode 100644
index 0000000..453c535
--- /dev/null
+++ b/configs/cifar100/sym/mae.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/sym/nce+mae.yaml b/configs/cifar100/sym/nce+mae.yaml
new file mode 100644
index 0000000..f78c570
--- /dev/null
+++ b/configs/cifar100/sym/nce+mae.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandMAE
+ num_classes: 100
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/sym/nce+rce.yaml b/configs/cifar100/sym/nce+rce.yaml
new file mode 100644
index 0000000..02bf826
--- /dev/null
+++ b/configs/cifar100/sym/nce+rce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NCEandRCE
+ num_classes: 100
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/sym/nce.yaml b/configs/cifar100/sym/nce.yaml
new file mode 100644
index 0000000..23d4724
--- /dev/null
+++ b/configs/cifar100/sym/nce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/sym/nfl+mae.yaml b/configs/cifar100/sym/nfl+mae.yaml
new file mode 100644
index 0000000..e6fb00c
--- /dev/null
+++ b/configs/cifar100/sym/nfl+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandMAE
+ num_classes: 100
+ gamma: 0.5
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/sym/nfl+rce.yaml b/configs/cifar100/sym/nfl+rce.yaml
new file mode 100644
index 0000000..86eca40
--- /dev/null
+++ b/configs/cifar100/sym/nfl+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NFLandRCE
+ num_classes: 100
+ gamma: 0.5
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/sym/nfl.yaml b/configs/cifar100/sym/nfl.yaml
new file mode 100644
index 0000000..67af0a2
--- /dev/null
+++ b/configs/cifar100/sym/nfl.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 100
+ scale: 1.0
+ gamma: 0.5
diff --git a/configs/cifar100/sym/ngce+mae.yaml b/configs/cifar100/sym/ngce+mae.yaml
new file mode 100644
index 0000000..26ca360
--- /dev/null
+++ b/configs/cifar100/sym/ngce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 100
+ q: 0.7
+ alpha: 10.0
+ beta: 1.0
diff --git a/configs/cifar100/sym/ngce+rce.yaml b/configs/cifar100/sym/ngce+rce.yaml
new file mode 100644
index 0000000..e1907de
--- /dev/null
+++ b/configs/cifar100/sym/ngce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 100
+ q: 0.7
+ alpha: 10.0
+ beta: 0.1
diff --git a/configs/cifar100/sym/ngce.yaml b/configs/cifar100/sym/ngce.yaml
new file mode 100644
index 0000000..cfc67f8
--- /dev/null
+++ b/configs/cifar100/sym/ngce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 100
+ scale: 1.0
+ q: 0.7
diff --git a/configs/cifar100/sym/nlnl.yaml b/configs/cifar100/sym/nlnl.yaml
new file mode 100644
index 0000000..00729a2
--- /dev/null
+++ b/configs/cifar100/sym/nlnl.yaml
@@ -0,0 +1,32 @@
+epochs: 2000
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: NLNL
+ num_classes: 100
+ ln_neg: 110
diff --git a/configs/cifar100/sym/rce.yaml b/configs/cifar100/sym/rce.yaml
new file mode 100644
index 0000000..48d1f56
--- /dev/null
+++ b/configs/cifar100/sym/rce.yaml
@@ -0,0 +1,32 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 100
+ scale: 1.0
diff --git a/configs/cifar100/sym/sce.yaml b/configs/cifar100/sym/sce.yaml
new file mode 100644
index 0000000..f54b9d9
--- /dev/null
+++ b/configs/cifar100/sym/sce.yaml
@@ -0,0 +1,33 @@
+epochs: 200
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 256
+ data_path: ../datasets/
+ dataset_type: 'CIFAR100'
+ num_of_workers: 8
+
+model:
+ name: ResNet50
+ num_classes: 100
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-5
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.0
+
+criterion:
+ name: SCELoss
+ num_classes: 100
+ alpha: 6.0
+ beta: 0.1
diff --git a/configs/mnist/asym/bhl.yaml b/configs/mnist/asym/bhl.yaml
new file mode 100644
index 0000000..e12a278
--- /dev/null
+++ b/configs/mnist/asym/bhl.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/mnist/asym/bl.yaml b/configs/mnist/asym/bl.yaml
new file mode 100644
index 0000000..92fb2ac
--- /dev/null
+++ b/configs/mnist/asym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 51
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BackwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/mnist/asym/bsl.yaml b/configs/mnist/asym/bsl.yaml
new file mode 100644
index 0000000..fc2aa20
--- /dev/null
+++ b/configs/mnist/asym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/mnist/asym/ce.yaml b/configs/mnist/asym/ce.yaml
new file mode 100644
index 0000000..1984309
--- /dev/null
+++ b/configs/mnist/asym/ce.yaml
@@ -0,0 +1,31 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/mnist/asym/d2l.yaml b/configs/mnist/asym/d2l.yaml
new file mode 100644
index 0000000..1bf7aac
--- /dev/null
+++ b/configs/mnist/asym/d2l.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 10
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/mnist/asym/fl.yaml b/configs/mnist/asym/fl.yaml
new file mode 100644
index 0000000..2f28524
--- /dev/null
+++ b/configs/mnist/asym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: ForwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/mnist/asym/focal.yaml b/configs/mnist/asym/focal.yaml
new file mode 100644
index 0000000..666c446
--- /dev/null
+++ b/configs/mnist/asym/focal.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/mnist/asym/gce.yaml b/configs/mnist/asym/gce.yaml
new file mode 100644
index 0000000..edc4d35
--- /dev/null
+++ b/configs/mnist/asym/gce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 10
+ q: 0.7
diff --git a/configs/mnist/asym/mae.yaml b/configs/mnist/asym/mae.yaml
new file mode 100644
index 0000000..e5762a1
--- /dev/null
+++ b/configs/mnist/asym/mae.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/mnist/asym/nce+mae.yaml b/configs/mnist/asym/nce+mae.yaml
new file mode 100644
index 0000000..5ee86e6
--- /dev/null
+++ b/configs/mnist/asym/nce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NCEandMAE
+ num_classes: 10
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/nce+rce.yaml b/configs/mnist/asym/nce+rce.yaml
new file mode 100644
index 0000000..f72ccfa
--- /dev/null
+++ b/configs/mnist/asym/nce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NCEandRCE
+ num_classes: 10
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/nce.yaml b/configs/mnist/asym/nce.yaml
new file mode 100644
index 0000000..5fd6d38
--- /dev/null
+++ b/configs/mnist/asym/nce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 10
+ scale: 10.0
diff --git a/configs/mnist/asym/nfl+mae.yaml b/configs/mnist/asym/nfl+mae.yaml
new file mode 100644
index 0000000..9d5e6af
--- /dev/null
+++ b/configs/mnist/asym/nfl+mae.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NFLandMAE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/nfl+rce.yaml b/configs/mnist/asym/nfl+rce.yaml
new file mode 100644
index 0000000..a5b20c2
--- /dev/null
+++ b/configs/mnist/asym/nfl+rce.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NFLandRCE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/nfl.yaml b/configs/mnist/asym/nfl.yaml
new file mode 100644
index 0000000..480e034
--- /dev/null
+++ b/configs/mnist/asym/nfl.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 10
+ scale: 1.0
+ gamma: 0.5
diff --git a/configs/mnist/asym/ngce+mae.yaml b/configs/mnist/asym/ngce+mae.yaml
new file mode 100644
index 0000000..c959118
--- /dev/null
+++ b/configs/mnist/asym/ngce+mae.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/ngce+rce.yaml b/configs/mnist/asym/ngce+rce.yaml
new file mode 100644
index 0000000..71c983a
--- /dev/null
+++ b/configs/mnist/asym/ngce+rce.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NGCEandRCE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/asym/ngce.yaml b/configs/mnist/asym/ngce.yaml
new file mode 100644
index 0000000..f63ddff
--- /dev/null
+++ b/configs/mnist/asym/ngce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 10
+ scale: 1.0
+ q: 0.1
diff --git a/configs/mnist/asym/nlnl.yaml b/configs/mnist/asym/nlnl.yaml
new file mode 100644
index 0000000..7bda8da
--- /dev/null
+++ b/configs/mnist/asym/nlnl.yaml
@@ -0,0 +1,33 @@
+epochs: 720
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-3
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NLNL
+ num_classes: 10
+ ln_neg: 1
diff --git a/configs/mnist/asym/rce.yaml b/configs/mnist/asym/rce.yaml
new file mode 100644
index 0000000..71f6b2a
--- /dev/null
+++ b/configs/mnist/asym/rce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/mnist/asym/sce.yaml b/configs/mnist/asym/sce.yaml
new file mode 100644
index 0000000..1040027
--- /dev/null
+++ b/configs/mnist/asym/sce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: True
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: SCELoss
+ alpha: 0.01
+ beta: 1.0
+ num_classes: 10
diff --git a/configs/mnist/sym/bhl.yaml b/configs/mnist/sym/bhl.yaml
new file mode 100644
index 0000000..487ec01
--- /dev/null
+++ b/configs/mnist/sym/bhl.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.8
\ No newline at end of file
diff --git a/configs/mnist/sym/bl.yaml b/configs/mnist/sym/bl.yaml
new file mode 100644
index 0000000..b98061b
--- /dev/null
+++ b/configs/mnist/sym/bl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 51
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BackwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/mnist/sym/bsl.yaml b/configs/mnist/sym/bsl.yaml
new file mode 100644
index 0000000..5f00f06
--- /dev/null
+++ b/configs/mnist/sym/bsl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: BootSoftLoss
+ num_classes: 10
+ beta: 0.95
\ No newline at end of file
diff --git a/configs/mnist/sym/ce.yaml b/configs/mnist/sym/ce.yaml
new file mode 100644
index 0000000..953817b
--- /dev/null
+++ b/configs/mnist/sym/ce.yaml
@@ -0,0 +1,35 @@
+epochs: 1000
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ #weight_decay: 1.e-4
+ momentum: 0.9
+ #nesterov: True
+
+#scheduler:
+ #name: CosineAnnealingLR
+ #T_max: $epochs
+ #eta_min: 0.001
+scheduler:
+ name: StepLR
+ step_size: 100
+ gamma: 0.7
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/mnist/sym/d2l.yaml b/configs/mnist/sym/d2l.yaml
new file mode 100644
index 0000000..ba340b5
--- /dev/null
+++ b/configs/mnist/sym/d2l.yaml
@@ -0,0 +1,39 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.1
+ weight_decay: 1.e-4
+ momentum: 0.9
+
+#scheduler:
+ #name: CosineAnnealingLR
+ #T_max: $epochs
+ #eta_min: 0.001
+
+scheduler:
+ name: StepLR
+ step_size: 20
+ gamma: 0.1
+
+criterion:
+ name: LIDPacedLoss
+ num_classes: 10
+ alpha: 1.0
+ beta1: 0.1
+ beta2: 1.0
\ No newline at end of file
diff --git a/configs/mnist/sym/fl.yaml b/configs/mnist/sym/fl.yaml
new file mode 100644
index 0000000..97f60af
--- /dev/null
+++ b/configs/mnist/sym/fl.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets/
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: ForwardLoss
+ num_classes: 10
+ noise_rate: 0
\ No newline at end of file
diff --git a/configs/mnist/sym/focal.yaml b/configs/mnist/sym/focal.yaml
new file mode 100644
index 0000000..eb9de2a
--- /dev/null
+++ b/configs/mnist/sym/focal.yaml
@@ -0,0 +1,32 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: FocalLoss
+ gamma: 0.5
diff --git a/configs/mnist/sym/gce.yaml b/configs/mnist/sym/gce.yaml
new file mode 100644
index 0000000..23d4c02
--- /dev/null
+++ b/configs/mnist/sym/gce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 10
+ q: 0.7
diff --git a/configs/mnist/sym/mae.yaml b/configs/mnist/sym/mae.yaml
new file mode 100644
index 0000000..2b455ce
--- /dev/null
+++ b/configs/mnist/sym/mae.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: MeanAbsoluteError
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/mnist/sym/nce+mae.yaml b/configs/mnist/sym/nce+mae.yaml
new file mode 100644
index 0000000..e7c7ee0
--- /dev/null
+++ b/configs/mnist/sym/nce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NCEandMAE
+ num_classes: 10
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/nce+rce.yaml b/configs/mnist/sym/nce+rce.yaml
new file mode 100644
index 0000000..547ec5f
--- /dev/null
+++ b/configs/mnist/sym/nce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NCEandRCE
+ num_classes: 10
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/nce.yaml b/configs/mnist/sym/nce.yaml
new file mode 100644
index 0000000..7230460
--- /dev/null
+++ b/configs/mnist/sym/nce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedCrossEntropy
+ num_classes: 10
+ scale: 10.0
diff --git a/configs/mnist/sym/nfl+mae.yaml b/configs/mnist/sym/nfl+mae.yaml
new file mode 100644
index 0000000..76c7824
--- /dev/null
+++ b/configs/mnist/sym/nfl+mae.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NFLandMAE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/nfl+rce.yaml b/configs/mnist/sym/nfl+rce.yaml
new file mode 100644
index 0000000..cda7b56
--- /dev/null
+++ b/configs/mnist/sym/nfl+rce.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NFLandRCE
+ num_classes: 10
+ gamma: 0.5
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/nfl.yaml b/configs/mnist/sym/nfl.yaml
new file mode 100644
index 0000000..3622737
--- /dev/null
+++ b/configs/mnist/sym/nfl.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedFocalLoss
+ num_classes: 10
+ scale: 1.0
+ gamma: 0.5
diff --git a/configs/mnist/sym/ngce+mae.yaml b/configs/mnist/sym/ngce+mae.yaml
new file mode 100644
index 0000000..265016f
--- /dev/null
+++ b/configs/mnist/sym/ngce+mae.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NGCEandMAE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/ngce+rce.yaml b/configs/mnist/sym/ngce+rce.yaml
new file mode 100644
index 0000000..ac29154
--- /dev/null
+++ b/configs/mnist/sym/ngce+rce.yaml
@@ -0,0 +1,35 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NGCEandRCE
+ num_classes: 10
+ q: 0.1
+ alpha: 1.0
+ beta: 10.0
diff --git a/configs/mnist/sym/ngce.yaml b/configs/mnist/sym/ngce.yaml
new file mode 100644
index 0000000..4dfbca4
--- /dev/null
+++ b/configs/mnist/sym/ngce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NormalizedGeneralizedCrossEntropy
+ num_classes: 10
+ scale: 10.0
+ q: 0.1
diff --git a/configs/mnist/sym/nlnl.yaml b/configs/mnist/sym/nlnl.yaml
new file mode 100644
index 0000000..269d7fc
--- /dev/null
+++ b/configs/mnist/sym/nlnl.yaml
@@ -0,0 +1,33 @@
+epochs: 720
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-3
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: NLNL
+ num_classes: 10
+ ln_neg: 1
diff --git a/configs/mnist/sym/rce.yaml b/configs/mnist/sym/rce.yaml
new file mode 100644
index 0000000..6ba45c2
--- /dev/null
+++ b/configs/mnist/sym/rce.yaml
@@ -0,0 +1,33 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: ReverseCrossEntropy
+ num_classes: 10
+ scale: 1.0
diff --git a/configs/mnist/sym/sce.yaml b/configs/mnist/sym/sce.yaml
new file mode 100644
index 0000000..7b8b748
--- /dev/null
+++ b/configs/mnist/sym/sce.yaml
@@ -0,0 +1,34 @@
+epochs: 50
+grad_bound: 5.0
+log_frequency: 200
+
+dataset:
+ name: DatasetGenerator
+ asym: False
+ train_batch_size: 128
+ eval_batch_size: 512
+ data_path: ../datasets
+ dataset_type: 'MNIST'
+ num_of_workers: 8
+
+model:
+ name: ToyModel
+ type: $dataset.dataset_type
+
+optimizer:
+ name: SGD
+ lr: 0.01
+ weight_decay: 1.e-2
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: CosineAnnealingLR
+ T_max: $epochs
+ eta_min: 0.001
+
+criterion:
+ name: SCELoss
+ alpha: 0.01
+ beta: 1.0
+ num_classes: 10
diff --git a/configs/webvision_mini/ce.yaml b/configs/webvision_mini/ce.yaml
new file mode 100644
index 0000000..fa81b00
--- /dev/null
+++ b/configs/webvision_mini/ce.yaml
@@ -0,0 +1,31 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: CrossEntropyLoss
diff --git a/configs/webvision_mini/gce.yaml b/configs/webvision_mini/gce.yaml
new file mode 100644
index 0000000..2ae3ef5
--- /dev/null
+++ b/configs/webvision_mini/gce.yaml
@@ -0,0 +1,33 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: GeneralizedCrossEntropy
+ num_classes: 50
+ q: 0.7
diff --git a/configs/webvision_mini/nce+mae.yaml b/configs/webvision_mini/nce+mae.yaml
new file mode 100644
index 0000000..ca7cbaa
--- /dev/null
+++ b/configs/webvision_mini/nce+mae.yaml
@@ -0,0 +1,34 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: NCEandMAE
+ num_classes: 50
+ alpha: 50.0
+ beta: 1.0
diff --git a/configs/webvision_mini/nce+rce.yaml b/configs/webvision_mini/nce+rce.yaml
new file mode 100644
index 0000000..782e8ad
--- /dev/null
+++ b/configs/webvision_mini/nce+rce.yaml
@@ -0,0 +1,34 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: NCEandRCE
+ num_classes: 50
+ alpha: 50.0
+ beta: 0.1
diff --git a/configs/webvision_mini/nfl+mae.yaml b/configs/webvision_mini/nfl+mae.yaml
new file mode 100644
index 0000000..6c62f2b
--- /dev/null
+++ b/configs/webvision_mini/nfl+mae.yaml
@@ -0,0 +1,35 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: NFLandMAE
+ num_classes: 50
+ gamma: 0.5
+ alpha: 50.0
+ beta: 1.0
diff --git a/configs/webvision_mini/nfl+rce.yaml b/configs/webvision_mini/nfl+rce.yaml
new file mode 100644
index 0000000..250af5b
--- /dev/null
+++ b/configs/webvision_mini/nfl+rce.yaml
@@ -0,0 +1,35 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: NFLandRCE
+ num_classes: 50
+ gamma: 0.5
+ alpha: 50.0
+ beta: 0.1
diff --git a/configs/webvision_mini/sce.yaml b/configs/webvision_mini/sce.yaml
new file mode 100644
index 0000000..6702128
--- /dev/null
+++ b/configs/webvision_mini/sce.yaml
@@ -0,0 +1,34 @@
+epochs: 250
+grad_bound: 5.0
+log_frequency: 50
+
+dataset:
+ name: WebVisionDatasetLoader
+ setting: 'mini'
+ train_batch_size: 512
+ eval_batch_size: 1024
+ train_data_path: '/var/local/tmp/datasets/'
+ valid_data_path: '/var/local/tmp/datasets/ILSVR2012'
+ num_of_workers: 8
+
+model:
+ name: resnet50
+ num_classes: 50
+
+optimizer:
+ name: SGD
+ lr: 0.4
+ weight_decay: 3.e-5
+ momentum: 0.9
+ nesterov: True
+
+scheduler:
+ name: StepLR
+ step_size: 1
+ gamma: 0.97
+
+criterion:
+ name: SCELoss
+ num_classes: 50
+ alpha: 10.0
+ beta: 1.0
diff --git a/dataset.py b/dataset.py
new file mode 100644
index 0000000..c96cf6e
--- /dev/null
+++ b/dataset.py
@@ -0,0 +1,720 @@
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+from PIL import Image
+from tqdm import tqdm
+from numpy.testing import assert_array_almost_equal
+import numpy as np
+import os
+import torch
+import random
+import mlconfig
+
+
+def build_for_cifar100(size, noise):
+ """ random flip between two random classes.
+ """
+ assert(noise >= 0.) and (noise <= 1.)
+
+ P = (1. - noise) * np.eye(size)
+ for i in np.arange(size - 1):
+ P[i, i+1] = noise
+
+ # adjust last row
+ P[size-1, 0] = noise
+
+ assert_array_almost_equal(P.sum(axis=1), 1, 1)
+ return P
+
+
+def multiclass_noisify(y, P, random_state=0):
+ """ Flip classes according to transition probability matrix T.
+ It expects a number between 0 and the number of classes - 1.
+ """
+
+ assert P.shape[0] == P.shape[1]
+ assert np.max(y) < P.shape[0]
+
+ # row stochastic matrix
+ assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1]))
+ assert (P >= 0.0).all()
+
+ m = y.shape[0]
+ new_y = y.copy()
+ flipper = np.random.RandomState(random_state)
+
+ for idx in np.arange(m):
+ i = y[idx]
+ # draw a vector with only an 1
+ flipped = flipper.multinomial(1, P[i, :], 1)[0]
+ new_y[idx] = np.where(flipped == 1)[0]
+
+ return new_y
+
+
+def other_class(n_classes, current_class):
+ """
+ Returns a list of class indices excluding the class indexed by class_ind
+ :param nb_classes: number of classes in the task
+ :param class_ind: the class index to be omitted
+ :return: one random class that != class_ind
+ """
+ if current_class < 0 or current_class >= n_classes:
+ error_str = "class_ind must be within the range (0, nb_classes - 1)"
+ raise ValueError(error_str)
+
+ other_class_list = list(range(n_classes))
+ other_class_list.remove(current_class)
+ other_class = np.random.choice(other_class_list)
+ return other_class
+
+
+class MNISTNoisy(datasets.MNIST):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0):
+ super(MNISTNoisy, self).__init__(root, transform=transform, target_transform=target_transform, download=download)
+ self.targets = self.targets.numpy()
+ if asym:
+ P = np.eye(10)
+ n = nosiy_rate
+
+ P[7, 7], P[7, 1] = 1. - n, n
+ # 2 -> 7
+ P[2, 2], P[2, 7] = 1. - n, n
+
+ # 5 <-> 6
+ P[5, 5], P[5, 6] = 1. - n, n
+ P[6, 6], P[6, 5] = 1. - n, n
+
+ # 3 -> 8
+ P[3, 3], P[3, 8] = 1. - n, n
+
+ y_train_noisy = multiclass_noisify(self.targets, P=P, random_state=seed)
+ actual_noise = (y_train_noisy != self.targets).mean()
+ assert actual_noise > 0.0
+ print('Actual noise %.2f' % actual_noise)
+ self.targets = y_train_noisy
+
+ else:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)]
+ class_noisy = int(n_noisy / 10)
+ noisy_idx = []
+ for d in range(10):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=10, current_class=self.targets[i])
+ print(len(noisy_idx))
+
+ print("Print noisy label generation statistics:")
+ for i in range(10):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+
+ return
+
+
+class cifar10Nosiy(datasets.CIFAR10):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False):
+ super(cifar10Nosiy, self).__init__(root, transform=transform, target_transform=target_transform)
+ self.download = download
+ if asym:
+ # automobile < - truck, bird -> airplane, cat <-> dog, deer -> horse
+ source_class = [9, 2, 3, 5, 4]
+ target_class = [1, 0, 5, 3, 7]
+ for s, t in zip(source_class, target_class):
+ cls_idx = np.where(np.array(self.targets) == s)[0]
+ n_noisy = int(nosiy_rate * cls_idx.shape[0])
+ noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False)
+ for idx in noisy_sample_index:
+ self.targets[idx] = t
+ return
+ elif nosiy_rate > 0:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(10)]
+ class_noisy = int(n_noisy / 10)
+ noisy_idx = []
+ for d in range(10):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=10, current_class=self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(10):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+ return
+
+
+class cifar100Nosiy(datasets.CIFAR100):
+ def __init__(self, root, train=True, transform=None, target_transform=None, download=True, nosiy_rate=0.0, asym=False, seed=0):
+ super(cifar100Nosiy, self).__init__(root, download=download, transform=transform, target_transform=target_transform)
+ self.download = download
+ if asym:
+ """mistakes are inside the same superclass of 10 classes, e.g. 'fish'
+ """
+ nb_classes = 100
+ P = np.eye(nb_classes)
+ n = nosiy_rate
+ nb_superclasses = 20
+ nb_subclasses = 5
+
+ if n > 0.0:
+ for i in np.arange(nb_superclasses):
+ init, end = i * nb_subclasses, (i+1) * nb_subclasses
+ P[init:end, init:end] = build_for_cifar100(nb_subclasses, n)
+
+ y_train_noisy = multiclass_noisify(np.array(self.targets), P=P, random_state=seed)
+ actual_noise = (y_train_noisy != np.array(self.targets)).mean()
+ assert actual_noise > 0.0
+ print('Actual noise %.2f' % actual_noise)
+ self.targets = y_train_noisy.tolist()
+ return
+ elif nosiy_rate > 0:
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(100)]
+ class_noisy = int(n_noisy / 100)
+ noisy_idx = []
+ for d in range(100):
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=100, current_class=self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(100):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+ return
+
+
+@mlconfig.register
+class DatasetGenerator():
+ def __init__(self,
+ train_batch_size=128,
+ eval_batch_size=256,
+ data_path='data/',
+ seed=123,
+ num_of_workers=4,
+ asym=False,
+ dataset_type='CIFAR10',
+ is_cifar100=False,
+ cutout_length=16,
+ noise_rate=0.4):
+ self.seed = seed
+ np.random.seed(seed)
+ self.train_batch_size = train_batch_size
+ self.eval_batch_size = eval_batch_size
+ self.data_path = data_path
+ self.num_of_workers = num_of_workers
+ self.cutout_length = cutout_length
+ self.noise_rate = noise_rate
+ self.dataset_type = dataset_type
+ self.asym = asym
+ self.data_loaders = self.loadData()
+ return
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ if self.dataset_type == 'MNIST':
+ MEAN = [0.1307]
+ STD = [0.3081]
+ train_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(MEAN, STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(MEAN, STD)])
+
+ train_dataset = MNISTNoisy(root=self.data_path,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ seed=self.seed,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.MNIST(root=self.data_path,
+ train=False,
+ transform=test_transform,
+ download=True)
+
+ elif self.dataset_type == 'CIFAR100':
+ CIFAR_MEAN = [0.5071, 0.4865, 0.4409]
+ CIFAR_STD = [0.2673, 0.2564, 0.2762]
+
+ train_transform = transforms.Compose([
+ transforms.RandomCrop(32, padding=4),
+ transforms.RandomHorizontalFlip(),
+ transforms.RandomRotation(20),
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ train_dataset = cifar100Nosiy(root=self.data_path,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ seed=self.seed,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.CIFAR100(root=self.data_path,
+ train=False,
+ transform=test_transform,
+ download=True)
+
+ elif self.dataset_type == 'CIFAR10':
+ CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
+ CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
+
+ train_transform = transforms.Compose([
+ transforms.RandomCrop(32, padding=4),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(CIFAR_MEAN, CIFAR_STD)])
+
+ train_dataset = cifar10Nosiy(root=self.data_path,
+ train=True,
+ transform=train_transform,
+ download=True,
+ asym=self.asym,
+ nosiy_rate=self.noise_rate)
+
+ test_dataset = datasets.CIFAR10(root=self.data_path,
+ train=False,
+ transform=test_transform,
+ download=True)
+ else:
+ raise("Unknown Dataset")
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.train_batch_size,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ print("Num of train %d" % (len(train_dataset)))
+ print("Num of test %d" % (len(test_dataset)))
+
+ return data_loaders
+
+
+class Clothing1MDataset:
+ def __init__(self, path, type='train', transform=None, target_transform=None):
+ self.path = path
+ if type == 'test':
+ flist = os.path.join(path, "annotations/clean_test.txt")
+ elif type == 'valid':
+ flist = os.path.join(path, "annotations/clean_val.txt")
+ elif type == 'train':
+ flist = os.path.join(path, "annotations/noisy_train.txt")
+ else:
+ raise('Unknown type')
+
+ self.imlist = self.flist_reader(flist)
+ self.transform = transform
+
+ def __len__(self):
+ return len(self.imlist)
+
+ def __getitem__(self, index):
+ impath, target = self.imlist[index]
+ img = Image.open(impath).convert("RGB")
+ if self.transform is not None:
+ img = self.transform(img)
+ return img, target
+
+ def flist_reader(self, flist):
+ imlist = []
+ with open(flist, 'r') as rf:
+ for line in rf.readlines():
+ row = line.split(" ")
+ impath = self.path + row[0]
+ imlabel = row[1]
+ imlist.append((impath, int(imlabel)))
+ return imlist
+
+
+@mlconfig.register
+class Clothing1MDatasetLoader:
+ def __init__(self, train_batch_size=128, eval_batch_size=256, data_path='data/', num_of_workers=4, use_cutout=True, cutout_length=112):
+ self.train_batch_size = train_batch_size
+ self.eval_batch_size = eval_batch_size
+ self.data_path = data_path
+ self.num_of_workers = num_of_workers
+ self.use_cutout = use_cutout
+ self.cutout_length = cutout_length
+ self.data_loaders = self.loadData()
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ MEAN = [0.485, 0.456, 0.406]
+ STD = [0.229, 0.224, 0.225]
+ train_transform = transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.RandomRotation(20),
+ transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=MEAN, std=STD),
+ ])
+ test_transform = transforms.Compose([
+ transforms.Resize((224, 224)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=MEAN, std=STD)
+ ])
+ if self.use_cutout:
+ print('Using Cutout')
+ train_transform.transforms.append(Cutout(self.cutout_length))
+
+ train_dataset = Clothing1MDataset(path=self.data_path,
+ type='train',
+ transform=train_transform)
+
+ test_dataset = Clothing1MDataset(path=self.data_path,
+ type='test',
+ transform=test_transform)
+
+ valid_dataset = Clothing1MDataset(path=self.data_path,
+ type='valid',
+ transform=test_transform)
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.train_batch_size,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ data_loaders['valid_dataset'] = DataLoader(dataset=valid_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+ return data_loaders
+
+
+class WebVisionDataset:
+ def __init__(self, path, file_name='webvision_mini_train', transform=None, target_transform=None):
+ self.target_list = []
+ self.path = path
+ self.load_file(os.path.join(path, file_name))
+ self.transform = transform
+ self.target_transform = target_transform
+ return
+
+ def load_file(self, filename):
+ f = open(filename, "r")
+ for line in f:
+ train_file, label = line.split()
+ self.target_list.append((train_file, int(label)))
+ f.close()
+ return
+
+ def __len__(self):
+ return len(self.target_list)
+
+ def __getitem__(self, index):
+ impath, target = self.target_list[index]
+ img = Image.open(os.path.join(self.path, impath)).convert("RGB")
+ if self.transform is not None:
+ img = self.transform(img)
+ return img, target
+
+
+@mlconfig.register
+class WebVisionDatasetLoader:
+ def __init__(self, setting='mini', train_batch_size=128, eval_batch_size=256, train_data_path='data/', valid_data_path='data/', num_of_workers=4):
+ self.train_batch_size = train_batch_size
+ self.eval_batch_size = eval_batch_size
+ self.train_data_path = train_data_path
+ self.valid_data_path = valid_data_path
+ self.num_of_workers = num_of_workers
+ self.setting = setting
+ self.data_loaders = self.loadData()
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
+ IMAGENET_STD = [0.229, 0.224, 0.225]
+ train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ColorJitter(brightness=0.4,
+ contrast=0.4,
+ saturation=0.4,
+ hue=0.2),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ test_transform = transforms.Compose([transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ if self.setting == 'mini':
+ train_dataset = WebVisionDataset(path=self.train_data_path,
+ file_name='webvision_mini_train.txt',
+ transform=train_transform)
+
+ test_dataset = ImageNetMini(root=self.valid_data_path,
+ split='val',
+ transform=test_transform)
+
+ elif self.setting == 'full':
+ train_dataset = WebVisionDataset(path=self.train_data_path,
+ file_name='train_filelist_google.txt',
+ transform=train_transform)
+
+ test_dataset = WebVisionDataset(path=self.valid_data_path,
+ file_name='val_filelist.txt',
+ transform=test_transform)
+
+ elif self.setting == 'full_imagenet':
+ train_dataset = WebVisionDataset(path=self.train_data_path,
+ file_name='train_filelist_google',
+ transform=train_transform)
+
+ test_dataset = datasets.ImageNet(root=self.valid_data_path,
+ split='val',
+ transform=test_transform)
+
+ else:
+ raise(NotImplementedError)
+
+ data_loaders = {}
+
+ print('Training Set Size %d' % (len(train_dataset)))
+ print('Test Set Size %d' % (len(test_dataset)))
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.train_batch_size,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.eval_batch_size,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.num_of_workers)
+
+ return data_loaders
+
+
+class ImageNetMini(datasets.ImageNet):
+ def __init__(self, root, split='val', download=False, **kwargs):
+ super(ImageNetMini, self).__init__(root, download=download, split=split, **kwargs)
+ self.new_targets = []
+ self.new_images = []
+ for i, (file, cls_id) in enumerate(self.imgs):
+ if cls_id <= 49:
+ self.new_targets.append(cls_id)
+ self.new_images.append((file, cls_id))
+ print((file, cls_id))
+ self.imgs = self.new_images
+ self.targets = self.new_targets
+ self.samples = self.imgs
+ print(len(self.samples))
+ print(len(self.targets))
+ return
+
+
+class NosieImageNet(datasets.ImageNet):
+ def __init__(self, root, split='train', seed=999, download=False, target_class_num=200, nosiy_rate=0.4, **kwargs):
+ super(NosieImageNet, self).__init__(root, download=download, split=split, **kwargs)
+ random.seed(seed)
+ np.random.seed(seed)
+ self.new_idx = random.sample(list(range(0, 1000)), k=target_class_num)
+ print(len(self.new_idx), len(self.imgs))
+ self.new_imgs = []
+ self.new_targets = []
+
+ for file, cls_id in self.imgs:
+ if cls_id in self.new_idx:
+ new_idx = self.new_idx.index(cls_id)
+ self.new_imgs.append((file, new_idx))
+ self.new_targets.append(new_idx)
+ self.imgs = self.new_imgs
+ self.targets = self.new_targets
+ print(min(self.targets), max(self.targets))
+ # Noise
+ if split == 'train':
+ n_samples = len(self.targets)
+ n_noisy = int(nosiy_rate * n_samples)
+ print("%d Noisy samples" % (n_noisy))
+ class_index = [np.where(np.array(self.targets) == i)[0] for i in range(target_class_num)]
+ class_noisy = int(n_noisy / target_class_num)
+ noisy_idx = []
+ for d in range(target_class_num):
+ print(len(class_index[d]), d)
+ noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
+ noisy_idx.extend(noisy_class_index)
+ print("Class %d, number of noisy % d" % (d, len(noisy_class_index)))
+ for i in noisy_idx:
+ self.targets[i] = other_class(n_classes=target_class_num, current_class=self.targets[i])
+ (file, old_idx) = self.imgs[i]
+ self.imgs[i] = (file, self.targets[i])
+ print(len(noisy_idx))
+ print("Print noisy label generation statistics:")
+ for i in range(target_class_num):
+ n_noisy = np.sum(np.array(self.targets) == i)
+ print("Noisy class %s, has %s samples." % (i, n_noisy))
+
+ self.samples = self.imgs
+
+
+class ImageNetDatasetLoader:
+ def __init__(self,
+ batchSize=128,
+ eval_batch_size=256,
+ dataPath='data/',
+ seed=999,
+ target_class_num=200,
+ nosiy_rate=0.4,
+ numOfWorkers=4):
+ self.batchSize = batchSize
+ self.eval_batch_size = eval_batch_size
+ self.dataPath = dataPath
+ self.numOfWorkers = numOfWorkers
+ self.seed = seed
+ self.target_class_num = target_class_num
+ self.nosiy_rate = nosiy_rate
+ self.data_loaders = self.loadData()
+
+ def getDataLoader(self):
+ return self.data_loaders
+
+ def loadData(self):
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
+ IMAGENET_STD = [0.229, 0.224, 0.225]
+
+ train_transform = transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ColorJitter(brightness=0.4,
+ contrast=0.4,
+ saturation=0.4,
+ hue=0.2),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ test_transform = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
+
+ train_dataset = NosieImageNet(root=self.dataPath,
+ split='train',
+ nosiy_rate=self.nosiy_rate,
+ target_class_num=self.target_class_num,
+ seed=self.seed,
+ transform=train_transform,
+ download=True)
+
+ test_dataset = NosieImageNet(root=self.dataPath,
+ split='val',
+ nosiy_rate=self.nosiy_rate,
+ target_class_num=self.target_class_num,
+ seed=self.seed,
+ transform=test_transform,
+ download=True)
+
+ data_loaders = {}
+
+ data_loaders['train_dataset'] = DataLoader(dataset=train_dataset,
+ batch_size=self.batchSize,
+ shuffle=True,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+
+ data_loaders['test_dataset'] = DataLoader(dataset=test_dataset,
+ batch_size=self.batchSize,
+ shuffle=False,
+ pin_memory=True,
+ num_workers=self.numOfWorkers)
+ return data_loaders
+
+
+def online_mean_and_sd(loader):
+ """Compute the mean and sd in an online fashion
+
+ Var[x] = E[X^2] - E^2[X]
+ """
+ cnt = 0
+ fst_moment = torch.empty(3)
+ snd_moment = torch.empty(3)
+
+ for data, _ in tqdm(loader):
+
+ b, c, h, w = data.shape
+ nb_pixels = b * h * w
+ sum_ = torch.sum(data, dim=[0, 2, 3])
+ sum_of_square = torch.sum(data ** 2, dim=[0, 2, 3])
+ fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
+ snd_moment = (cnt * snd_moment + sum_of_square) / (cnt + nb_pixels)
+
+ cnt += nb_pixels
+
+ return fst_moment, torch.sqrt(snd_moment - fst_moment ** 2)
+
+
+class Cutout(object):
+ def __init__(self, length):
+ self.length = length
+
+ def __call__(self, img):
+ h, w = img.size(1), img.size(2)
+ mask = np.ones((h, w), np.float32)
+ y = np.random.randint(h)
+ x = np.random.randint(w)
+
+ y1 = np.clip(y - self.length // 2, 0, h)
+ y2 = np.clip(y + self.length // 2, 0, h)
+ x1 = np.clip(x - self.length // 2, 0, w)
+ x2 = np.clip(x + self.length // 2, 0, w)
+
+ mask[y1: y2, x1: x2] = 0.
+ mask = torch.from_numpy(mask)
+ mask = mask.expand_as(img)
+ img *= mask
+ return img
diff --git a/datasets.py b/datasets.py
deleted file mode 100644
index 5deb88a..0000000
--- a/datasets.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import os
-import multiprocessing as mp
-from subprocess import call
-import warnings
-import numpy as np
-import scipy.io as sio
-import numpy as np
-import keras.backend as K
-from keras.datasets import mnist, cifar10, cifar100
-from keras.utils import np_utils
-from util import other_class
-
-# Set random seed
-np.random.seed(123)
-
-NUM_CLASSES = {'mnist': 10, 'svhn': 10, 'cifar-10': 10, 'cifar-100': 100}
-
-def get_data(dataset='mnist', noise_ratio=0, random_shuffle=False):
- """
- Get training images with specified ratio of label noise
- :param dataset:
- :param noise_ratio: 0 - 100 (%)
- :param random_shuffle:
- :return:
- """
- if dataset == 'mnist':
- (X_train, y_train), (X_test, y_test) = mnist.load_data()
-
- X_train = X_train.reshape(-1, 28, 28, 1)
- X_test = X_test.reshape(-1, 28, 28, 1)
-
- X_train = X_train / 255.0
- X_test = X_test / 255.0
-
- elif dataset == 'svhn':
- if not os.path.isfile("data/svhn_train.mat"):
- print('Downloading SVHN train set...')
- call(
- "curl -o data/svhn_train.mat "
- "http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
- shell=True
- )
- if not os.path.isfile("data/svhn_test.mat"):
- print('Downloading SVHN test set...')
- call(
- "curl -o data/svhn_test.mat "
- "http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
- shell=True
- )
- train = sio.loadmat('data/svhn_train.mat')
- test = sio.loadmat('data/svhn_test.mat')
- X_train = np.transpose(train['X'], axes=[3, 0, 1, 2])
- X_test = np.transpose(test['X'], axes=[3, 0, 1, 2])
-
- X_train = X_train / 255.0
- X_test = X_test / 255.0
-
- means = X_train.mean(axis=0)
- # std = np.std(X_train)
- X_train = (X_train - means) # / std
- X_test = (X_test - means) # / std
-
- # reshape (n_samples, 1) to (n_samples,) and change 1-index
- # to 0-index
- y_train = np.reshape(train['y'], (-1,)) - 1
- y_test = np.reshape(test['y'], (-1,)) - 1
-
- elif dataset == 'cifar-10':
- (X_train, y_train), (X_test, y_test) = cifar10.load_data()
-
- X_train = X_train.reshape(-1, 32, 32, 3)
- X_test = X_test.reshape(-1, 32, 32, 3)
-
- X_train = X_train / 255.0
- X_test = X_test / 255.0
-
- means = X_train.mean(axis=0)
- # std = np.std(X_train)
- X_train = (X_train - means) # / std
- X_test = (X_test - means) # / std
-
- # they are 2D originally in cifar
- y_train = y_train.ravel()
- y_test = y_test.ravel()
-
- elif dataset == 'cifar-100':
- # num_classes = 100
- (X_train, y_train), (X_test, y_test) = cifar100.load_data()
-
- X_train = X_train.reshape(-1, 32, 32, 3)
- X_test = X_test.reshape(-1, 32, 32, 3)
-
- X_train = X_train / 255.0
- X_test = X_test / 255.0
-
- means = X_train.mean(axis=0)
- # std = np.std(X_train)
- X_train = (X_train - means) # / std
- X_test = (X_test - means) # / std
-
- # they are 2D originally in cifar
- y_train = y_train.ravel()
- y_test = y_test.ravel()
- else:
- return None, None, None, None
-
-
- X_train = X_train.astype('float32')
- X_test = X_test.astype('float32')
-
- # generate random noisy labels
- if noise_ratio > 0:
- data_file = "data/%s_train_labels_%s.npy" % (dataset, noise_ratio)
- if os.path.isfile(data_file):
- y_train = np.load(data_file)
- else:
- n_samples = y_train.shape[0]
- n_noisy = int(noise_ratio*n_samples/100)
- noisy_idx = np.random.choice(n_samples, n_noisy, replace=False)
- for i in noisy_idx:
- y_train[i] = other_class(n_classes=NUM_CLASSES[dataset], current_class=y_train[i])
- np.save(data_file, y_train)
-
- if random_shuffle:
- # random shuffle
- idx_perm = np.random.permutation(X_train.shape[0])
- X_train, y_train = X_train[idx_perm], y_train[idx_perm]
-
- # one-hot-encode the labels
- y_train = np_utils.to_categorical(y_train, NUM_CLASSES[dataset])
- y_test = np_utils.to_categorical(y_test, NUM_CLASSES[dataset])
-
- print("X_train:", X_train.shape)
- print("y_train:", y_train.shape)
- print("X_test:", X_test.shape)
- print("y_test", y_test.shape)
-
- return X_train, y_train, X_test, y_test
-
-
-def validatation_split(X, y, split=0.1):
- """
- split data to train and validation set, based on the split ratios
- :param X:
- :param y:
- :param split:
- :return:
- """
- idx_val = np.round(split * X.shape[0]).astype(int)
- X_val, y_val = X[:idx_val], y[:idx_val]
- X_train, y_train = X[idx_val:], y[idx_val:]
- return X_train, y_train, X_val, y_val
-
-
-if __name__ == "__main__":
- X_train, Y_train, X_test, Y_test = get_data(dataset='mnist', noise_ratio=40)
- Y_train = np.argmax(Y_train, axis=1)
- (_, Y_clean_train), (_, Y_clean_test) = mnist.load_data()
- clean_selected = np.argwhere(Y_train == Y_clean_train).reshape((-1,))
- noisy_selected = np.argwhere(Y_train != Y_clean_train).reshape((-1,))
- print("#correct labels: %s, #incorrect labels: %s" % (len(clean_selected), len(noisy_selected)))
\ No newline at end of file
diff --git a/evaluator.py b/evaluator.py
new file mode 100644
index 0000000..180e6b4
--- /dev/null
+++ b/evaluator.py
@@ -0,0 +1,88 @@
+import time
+import torch
+import os
+from util import log_display, accuracy, AverageMeter
+
+if torch.cuda.is_available():
+ torch.backends.cudnn.enabled = True
+ torch.backends.cudnn.benchmark = True
+ torch.backends.cudnn.deterministic = True
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+class Evaluator():
+ def __init__(self, data_loader, logger, config, name='Evaluator', metrics='classfication', summary_writer=None):
+ self.data_loader = data_loader
+ self.logger = logger
+ self.name = name
+ self.summary_writer = summary_writer
+ self.step = 0
+ self.config = config
+ self.log_frequency = config.log_frequency
+ self.loss_meters = AverageMeter()
+ self.acc_meters = AverageMeter()
+ self.acc5_meters = AverageMeter()
+ self.report_metrics = self.classfication_metrics if metrics == 'classfication' else self.regression_metrics
+ return
+
+ def log(self, epoch, GLOBAL_STEP):
+ display = log_display(epoch=epoch,
+ global_step=GLOBAL_STEP,
+ time_elapse=self.time_used,
+ **self.logger_payload)
+ self.logger.info(display)
+
+ def eval(self, epoch, GLOBAL_STEP, model, criterion):
+ for i, (images, labels) in enumerate(self.data_loader):
+ self.eval_batch(x=images, y=labels, model=model, criterion=criterion)
+ self.log(epoch, GLOBAL_STEP)
+ return
+
+ def eval_batch(self, x, y, model, criterion):
+ model.eval()
+ x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
+ start = time.time()
+ with torch.no_grad():
+ pred, _ = model(x)
+ loss = criterion(pred, y)
+ end = time.time()
+ self.time_used = end - start
+ self.step += 1
+ self.report_metrics(pred, y, loss)
+ return
+
+ def classfication_metrics(self, x, y, loss):
+ acc, acc5 = accuracy(x, y, topk=(1, 5))
+ self.loss_meters.update(loss.item(), y.shape[0])
+ self.acc_meters.update(acc.item(), y.shape[0])
+ self.acc5_meters.update(acc5.item(), y.shape[0])
+ self.logger_payload = {"acc": acc,
+ "acc_avg": self.acc_meters.avg,
+ "top5_acc": acc5,
+ "top5_acc_avg": self.acc5_meters.avg,
+ "loss": loss,
+ "loss_avg": self.loss_meters.avg}
+
+ if self.summary_writer is not None:
+ self.summary_writer.add_scalar(os.path.join(self.name, 'acc'), acc, self.step)
+ self.summary_writer.add_scalar(os.path.join(self.name, 'loss'), loss, self.step)
+
+ def regression_metrics(self, x, y, loss):
+ diff = abs((x - y).mean().detach().item())
+ self.loss_meters.update(loss.item(), y.shape[0])
+ self.acc_meters.update(diff, y.shape[0])
+ self.logger_payload = {"|diff|": diff,
+ "|diff_avg|": self.acc_meters.avg,
+ "loss": loss,
+ "loss_avg": self.loss_meters.avg}
+
+ if self.summary_writer is not None:
+ self.summary_writer.add_scalar(os.path.join(self.name, 'diff'), diff, self.step)
+ self.summary_writer.add_scalar(os.path.join(self.name, 'loss'), loss, self.step)
+
+ def _reset_stats(self):
+ self.loss_meters.reset()
+ self.acc_meters.reset()
+ self.acc5_meters.reset()
diff --git a/lass.py b/lass.py
new file mode 100644
index 0000000..4f68dd1
--- /dev/null
+++ b/lass.py
@@ -0,0 +1,68 @@
+import torch
+import torch.nn.functional as F
+
+class lass(object):
+ def __init__(self, model, device, a=0.25/255., b=0.2/255., r=0.3/255., iter_max=100, clip_min=-1.0e8, clip_max=1.0e8):
+ # x and y_target are tensorflow placeholders, y_pred is the model output tensorflow tensor
+ # SEARCH PARAMETERS: a- gradient sign coefficient; b- noise coefficient; r- search radius per pixel; iter- max number of iters
+ self.a = a
+ self.b = b
+ self.r = r
+ self.model = model
+ self.device = device
+ self.iter_max = iter_max
+ self.clip_min = clip_min
+ self.clip_max = clip_max
+
+ def find(self, X):
+ # elements of X in [0,1] for using default params a,b,r; otherwise scale accordingly
+ # generate max output label
+ X.requires_grad_(True)
+ pred, _ = self.model(X)
+ pred = F.softmax(pred, dim=1)
+ Y_pred_vec = torch.argmax(pred, dim=1)
+ Y_pred = F.one_hot(Y_pred_vec, pred.shape[1]).float()
+
+ X_adv = 1.*X
+ adv_ind = torch.zeros(X.shape[0],dtype=torch.bool,device=self.device)
+ converged = False
+ converged_label_thres = 3
+ adv_num_old = 0
+ i = 0
+ Y_pred_adv = pred
+ while i < self.iter_max and converged == False:
+ # I would recommend annealing the noise coefficient b gradually in this while loop
+ #print('on iter %s' % i)
+ i += 1
+ #X_adv.requires_grad_(True)
+ loss = F.cross_entropy(Y_pred_adv, Y_pred_vec)
+ if i == 1:
+ grad = torch.autograd.grad(loss, X)[0]
+ else:
+ grad = torch.autograd.grad(loss, X_adv)[0]
+ X_adv = X_adv.detach()
+
+
+ step = self.a * torch.sign(grad) + self.b * torch.randn(*grad.shape, device=self.device)
+ X_adv += step
+ diff = X_adv - X
+ abs_diff = torch.abs(diff)
+ ind = abs_diff > self.r
+ X_adv[ind] = X[ind] + self.r * torch.sign(diff[ind])
+ X_adv = torch.clamp(X_adv, self.clip_min , self.clip_max )
+
+ X_adv.requires_grad_(True)
+ Y_pred_adv, _ = self.model(X_adv)
+ Y_pred_adv = F.softmax(Y_pred_adv, dim=1)
+ Y_pred_adv_vec = torch.argmax(Y_pred_adv, dim=1)
+ # if we ever identify a sample as critical sample, record it
+ adv_ind = adv_ind | ~torch.eq(Y_pred_vec, Y_pred_adv_vec).to(self.device)
+ adv_num_new = torch.sum(adv_ind)
+ #print('number of adv samples: %s' % adv_num_new)
+
+ if adv_num_new - adv_num_old < converged_label_thres:
+ converged = True
+
+ adv_num_old = adv_num_new
+
+ return X_adv, adv_ind
\ No newline at end of file
diff --git a/lass_tf.py b/lass_tf.py
deleted file mode 100644
index f7d061c..0000000
--- a/lass_tf.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Code from Devansh Arpit
-2017 - icml - A Closer Look at Memorization in Deep Networks
-Adapted by Xingjun Ma to this tensorflow version.
-"""
-
-import numpy as np
-import keras.backend as K
-
-class lass(object):
- def __init__(self, x, y_pred, y_target, a=0.25/255., b=0.2/255., r=0.3/255., iter_max=100, clip_min=-np.inf, clip_max=np.inf):
- # x and y_target are tensorflow placeholders, y_pred is the model output tensorflow tensor
- # SEARCH PARAMETERS: a- gradient sign coefficient; b- noise coefficient; r- search radius per pixel; iter- max number of iters
- self.a = a
- self.b = b
- self.r = r
- self.iter_max = iter_max
- self.clip_min = clip_min
- self.clip_max = clip_max
-
- loss = K.categorical_crossentropy(y_pred, y_target)
- grads = K.gradients(K.mean(loss), x)[0] # this will return a list of tensors not one tensor
-
- self.grad_fn = K.function(inputs=[x, y_target] + [K.learning_phase()],
- outputs=[grads])
- self.pred_fn = K.function(inputs=[x] + [K.learning_phase()],
- outputs=[y_pred])
-
- def find(self, X, bs=500):
- # elements of X in [0,1] for using default params a,b,r; otherwise scale accordingly
- # generate max output label
- for batch in range(int(X.shape[0] / bs)):
- pred_this = self.pred_fn([X[bs * batch: bs * (batch + 1)], 0])[0]
- if not hasattr(self, 'Y_pred_exists'):
- self.Y_pred_exists=True
- Y_pred = np.zeros(shape=(X.shape[0], pred_this.shape[1]), dtype=np.float32)
- Y_pred[bs * batch: bs * (batch + 1)] = (pred_this // np.max(pred_this, axis=1)[:, None])
-
- Y_pred_vec = np.argmax(Y_pred, axis=1)
-
- X_adv = 1.*X
- adv_ind = np.asarray(np.zeros((X.shape[0],)), dtype='bool')
- converged = False
- converged_label_thres = 20
- adv_num_old = 0
- i = 0
- while i < self.iter_max and converged == False:
- # I would recommend annealing the noise coefficient b gradually in this while loop
- # print('on iter %s' % i)
- i += 1
- pred_adv = []
- for batch in range(int(X.shape[0] / bs)):
- grad_this = self.grad_fn([X_adv[bs * batch: bs * (batch + 1)], Y_pred[bs * batch: bs * (batch + 1)], 0])[0]
-
- step = self.a * np.sign(grad_this) + self.b * np.random.randn(*grad_this.shape)
- X_adv[bs * batch: bs * (batch + 1)] += step
- diff = X_adv[bs * batch: bs * (batch + 1)] - X[bs * batch: bs * (batch + 1)]
- abs_diff = np.abs(diff)
- ind = abs_diff > self.r
- X_adv[bs * batch: bs * (batch + 1)][ind] = X[bs * batch: bs * (batch + 1)][ind] + self.r * np.sign(
- diff[ind])
- X_adv[bs * batch: bs * (batch + 1)] = np.clip(X_adv[bs * batch: bs * (batch + 1)], \
- self.clip_min , self.clip_max )
-
- X_adv_this = X_adv[bs * batch: bs * (batch + 1)]
- pred_this_adv = self.pred_fn([X_adv_this, 0])[0]
- pred_this_adv = np.argmax(pred_this_adv, axis=1)
- pred_adv.extend(list(pred_this_adv))
-
- pred_adv = np.asarray(pred_adv)
-
- # if we ever identify a sample as critical sample, record it
- adv_ind = adv_ind + (Y_pred_vec != pred_adv)
- adv_num_new = np.sum(adv_ind)
- # print('number of adv samples: %s' % adv_num_new)
-
- if adv_num_new - adv_num_old < converged_label_thres:
- converged = True
-
- adv_num_old = adv_num_new
-
- return X_adv, adv_ind
\ No newline at end of file
diff --git a/lid.py b/lid.py
new file mode 100644
index 0000000..38c8a2c
--- /dev/null
+++ b/lid.py
@@ -0,0 +1,50 @@
+import torch
+from scipy.spatial.distance import cdist
+
+def gmean(input_x, dim=0):
+ log_x = torch.log(input_x)
+ return torch.exp(torch.mean(log_x, dim=dim))
+
+def get_lid_r(data, reference):
+ b = data.shape[0]
+ data = torch.flatten(data, start_dim=1)
+ reference = torch.flatten(reference, start_dim=1)
+ r = torch.cdist(data, reference, p=2)
+ a, idx = torch.sort(r, dim=1)
+ return r, a, idx
+
+def lid_mle(data, reference, k=20, get_idx=False, compute_mode='use_mm_for_euclid_dist_if_necessary'):
+ data = torch.flatten(data, start_dim=1)
+ reference = torch.flatten(reference, start_dim=1)
+ r = torch.cdist(reference, data, p=2, compute_mode=compute_mode)
+ a, idx = torch.sort(r, dim=1)
+ lids = -k / torch.sum(torch.log(a[:, 1:k+1] / a[:, k+1].view(-1,1)), dim=1)
+ if get_idx:
+ return idx, lids
+ return lids
+
+def lid_mom_est(data, reference, k, get_idx=False, compute_mode='use_mm_for_euclid_dist_if_necessary'):
+ b = data.shape[0]
+ k = min(k, b-2)
+ data = torch.flatten(data, start_dim=1)
+ reference = torch.flatten(reference, start_dim=1)
+ r = torch.cdist(data, reference, p=2, compute_mode=compute_mode)
+ a, idx = torch.sort(r, dim=1)
+ m = torch.mean(a[:, 1:k], dim=1)
+ lids = m / (a[:, k] - m)
+ if get_idx:
+ return idx, lids
+ return lids
+
+def lid_mom_est_eps(data, reference, k, get_idx=False):
+ b = data.shape[0]
+ k = min(k, b-2)
+ data = torch.flatten(data, start_dim=1)
+ reference = torch.flatten(reference, start_dim=1)
+ r = torch.cdist(data, reference, p=2)
+ a, idx = torch.sort(r, dim=1)
+ m = torch.mean(a[:, 1:k], dim=1)
+ lids = m / ((a[:, k] - m) + 1.e-4)
+ if get_idx:
+ return idx, lids
+ return lids
\ No newline at end of file
diff --git a/lid_plot.py b/lid_plot.py
deleted file mode 100644
index 1229fdb..0000000
--- a/lid_plot.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-Date: 28/07/2017
-LID exploration and visualization
-
-Author: Xingjun Ma
-"""
-import os
-import numpy as np
-import keras.backend as K
-from keras.datasets import mnist, cifar10
-import matplotlib.pyplot as plt
-from sklearn.decomposition import PCA
-from keras.optimizers import SGD
-from keras.utils import np_utils, to_categorical
-from util import get_lids_random_batch, mle_batch
-from datasets import get_data, validatation_split
-from models import get_model
-from loss import cross_entropy, boot_soft, boot_hard
-from scipy.interpolate import spline, interp1d
-
-np.random.seed(1024)
-
-MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'lid_dataset']
-MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L']
-COLORS = ['r', 'y', 'c', 'm', 'g', 'b']
-MARKERS = ['x', 'D', '<', '>', '^', 'o']
-
-
-def lid_trend_through_training(model_name='ce', dataset='mnist', noise_type='sym', noise_ratio=0.):
- """
- plot the lid trend for clean vs noisy samples through training.
- This can provide some information about manifold learning dynamics through training.
- """
- print('Dataset: %s, noise type: %s, noise ratio: %.1f' % (dataset, noise_type, noise_ratio))
-
- lids, acc_train, acc_test = None, None, None
-
- # get LID of raw inputs
- lid_subset = 128
- k = 20
- X_train, Y_train, X_test, Y_test = get_data(dataset)
- rand_idxes = np.random.choice(X_train.shape[0], lid_subset * 10, replace=False)
- X_train = X_train[rand_idxes]
- X_train = X_train.reshape((X_train.shape[0], -1))
-
- lid_tmp = []
- for i in range(10):
- s = i * 128
- e = (i+1)*128
- lid_tmp.extend(mle_batch(X_train[s:e], X_train[s:e], k=k))
- lid_X = np.mean(lid_tmp)
- print('LID of input X: ', lid_X)
-
- # load pre-saved to avoid recomputing
- lid_saved = "log/lid_%s_%s_%s%s.npy" % (model_name, dataset, noise_type, noise_ratio)
- acc_saved = "log/acc_%s_%s_%s%s.npy" % (model_name, dataset, noise_type, noise_ratio)
- if os.path.isfile(lid_saved):
- lids = np.load(lid_saved)
- lids = np.insert(lids, 0, lid_X)
- print(lids)
-
- if os.path.isfile(acc_saved):
- data = np.load(acc_saved)
- acc_train = data[0][:]
- acc_test = data[1][:]
-
- acc_train = np.insert(acc_train, 0, 0.)
- acc_test = np.insert(acc_test, 0, 0.)
-
- plot(model_name, dataset, noise_ratio, lids, acc_train, acc_test)
-
-
-def plot(model_name, dataset, noise_ratio, lids, acc_train, acc_test):
- """
- plot function
- """
- # plot
- fig = plt.figure() # figsize=(7, 6)
- xnew = np.arange(0, len(lids), 1)
-
- lids = lids[xnew]
- acc_train = acc_train[xnew]
- acc_test = acc_test[xnew]
-
- ax = fig.add_subplot(111)
- ax.plot(xnew, lids, c='r', marker='o', markersize=3, linewidth=2, label='LID score')
-
- ax2 = ax.twinx()
- ax2.plot(xnew, acc_train, c='b', marker='x', markersize=3, linewidth=2, label='Train acc')
- ax2.plot(xnew, acc_test, c='c', marker='^', markersize=3, linewidth=2, label='Test acc')
-
- # ax.set_xticks([])
- # ax.set_yticks([])
- ax.set_xlabel("Epoch", fontsize=15)
- ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15)
- ax2.set_ylabel("Train/test accuracy", fontsize=15)
- # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
-
- if dataset == 'mnist':
- ax.set_ylim((4, 22)) # for mnist
- ax2.set_ylim((0.2, 1.2))
- elif dataset == 'svhn':
- ax.set_ylim((7, 20)) # for svhn
- ax2.set_ylim((0.2, 1.2))
- elif dataset == 'cifar-10':
- ax.set_ylim((2.5, 12.5)) # for cifar-10
- ax2.set_ylim((0.2, 1.2))
- elif dataset == 'cifar-100':
- ax.set_ylim((3, 12)) # for cifar-100
- ax2.set_ylim((0., 1.))
-
- legend = ax.legend(loc='upper left')
- plt.setp(legend.get_texts(), fontsize=15)
- legend2 = ax2.legend(loc='upper right')
- plt.setp(legend2.get_texts(), fontsize=15)
- fig.savefig("plots/lid_trend_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300)
- plt.show()
-
-
-def lid_trend_of_learning_models(model_list=['ce'], dataset='mnist', noise_ratio=0):
- """
- The LID trend of different learning models throughout.
- """
- # plot initialization
- fig = plt.figure() # figsize=(7, 6)
- ax = fig.add_subplot(111)
-
- # get LID of raw inputs
- lid_subset = 128
- k = 20
- X_train, Y_train, X_test, Y_test = get_data(dataset)
- rand_idxes = np.random.choice(X_train.shape[0], lid_subset * 10, replace=False)
- X_train = X_train[rand_idxes]
- X_train = X_train.reshape((X_train.shape[0], -1))
-
- lid_tmp = []
- for i in range(10):
- s = i * 128
- e = (i + 1) * 128
- lid_tmp.extend(mle_batch(X_train[s:e], X_train[s:e], k=k))
- lid_X = np.mean(lid_tmp)
- print('LID of input X: ', lid_X)
-
- for model_name in model_list:
- file_name = "log/lid_%s_%s_%s.npy" % (model_name, dataset, noise_ratio)
- if os.path.isfile(file_name):
- lids = np.load(file_name)
- # insert lid of raw input X
- lids = np.insert(lids, 0, lid_X)
- print(lids)
-
- # Find indicies that you need to replace
- inds = np.where(np.isnan(lids))
- lids[inds] = np.nanmean(lids)
- # smooth for plot
- lids[lids < 0] = 0
- lids[lids > 10] = 10
-
- xnew = np.arange(0, len(lids), 1)
- lids = lids[xnew]
-
- # plot line
- idx = MODELS.index(model_name)
- ax.plot(xnew, lids, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
-
- ax.set_xlabel("Epoch", fontsize=15)
- ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15)
- # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
- legend = plt.legend(loc='lower center', ncol=2)
- plt.setp(legend.get_texts(), fontsize=15)
- fig.savefig("plots/lid_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300)
- plt.show()
-
-if __name__ == "__main__":
- lid_trend_through_training(model_name='ce', dataset='cifar-100', noise_type='sym', noise_ratio=0.)
- # lid_trend_of_learning_models(model_list=['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'lid_dataset'],
- # dataset='cifar-10', noise_ratio=60)
diff --git a/loss.py b/loss.py
index 60b5aa4..73e0e59 100644
--- a/loss.py
+++ b/loss.py
@@ -1,146 +1,526 @@
+import torch
+import torch.nn.functional as F
import numpy as np
-from keras import backend as K
-import tensorflow as tf
+import mlconfig
+mlconfig.register(torch.nn.CrossEntropyLoss)
-
-def symmetric_cross_entropy(alpha, beta):
- """
- Symmetric Cross Entropy:
- ICCV2019 "Symmetric Cross Entropy for Robust Learning with Noisy Labels"
- https://arxiv.org/abs/1908.06112
- """
- def loss(y_true, y_pred):
- y_true_1 = y_true
- y_pred_1 = y_pred
-
- y_true_2 = y_true
- y_pred_2 = y_pred
-
- y_pred_1 = tf.clip_by_value(y_pred_1, 1e-7, 1.0)
- y_true_2 = tf.clip_by_value(y_true_2, 1e-4, 1.0)
-
- return alpha*tf.reduce_mean(-tf.reduce_sum(y_true_1 * tf.log(y_pred_1), axis = -1)) + beta*tf.reduce_mean(-tf.reduce_sum(y_pred_2 * tf.log(y_true_2), axis = -1))
- return loss
-
-def cross_entropy(y_true, y_pred):
- return K.categorical_crossentropy(y_true, y_pred)
-
-
-def boot_soft(y_true, y_pred):
- """
- 2015 - iclrws - Training deep neural networks on noisy labels with bootstrapping.
- https://arxiv.org/abs/1412.6596
-
- :param y_true:
- :param y_pred:
- :return:
- """
- beta = 0.95
-
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
- y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
- return -K.sum((beta * y_true + (1. - beta) * y_pred) *
- K.log(y_pred), axis=-1)
-
-
-def boot_hard(y_true, y_pred):
- """
- 2015 - iclrws - Training deep neural networks on noisy labels with bootstrapping.
- https://arxiv.org/abs/1412.6596
-
- :param y_true:
- :param y_pred:
- :return:
- """
- beta = 0.8
-
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
- y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
- pred_labels = K.one_hot(K.argmax(y_pred, 1), num_classes=K.shape(y_true)[1])
- return -K.sum((beta * y_true + (1. - beta) * pred_labels) *
- K.log(y_pred), axis=-1)
-
-
-def forward(P):
- """
- Making Deep Neural Networks Robust to Label Noise: a Loss Correction Approach
- CVPR17 https://arxiv.org/abs/1609.03683
- :param P: noise model, a noisy label transition probability matrix
- :return:
- """
- P = K.constant(P)
-
- def loss(y_true, y_pred):
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
- y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
- return -K.sum(y_true * K.log(K.dot(y_pred, P)), axis=-1)
-
- return loss
-
-
-def backward(P):
- """
- Making Deep Neural Networks Robust to Label Noise: a Loss Correction Approach
- CVPR17 https://arxiv.org/abs/1609.03683
- :param P: noise model, a noisy label transition probability matrix
- :return:
- """
- P_inv = K.constant(np.linalg.inv(P))
-
- def loss(y_true, y_pred):
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
- y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
- return -K.sum(K.dot(y_true, P_inv) * K.log(y_pred), axis=-1)
-
- return loss
-
-
-def lid(logits, k=20):
- """
- Calculate LID for each data point in the array.
-
- :param logits:
- :param k:
- :return:
- """
- batch_size = tf.shape(logits)[0]
- # n_samples = logits.get_shape().as_list()
- # calculate pairwise distance
- r = tf.reduce_sum(logits * logits, 1)
- # turn r into column vector
- r1 = tf.reshape(r, [-1, 1])
- D = r1 - 2 * tf.matmul(logits, tf.transpose(logits)) + tf.transpose(r1) + \
- tf.ones([batch_size, batch_size])
-
- # find the k nearest neighbor
- D1 = -tf.sqrt(D)
- D2, _ = tf.nn.top_k(D1, k=k, sorted=True)
- D3 = -D2[:, 1:] # skip the x-to-x distance 0 by using [,1:]
-
- m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1]))
- v_log = tf.reduce_sum(tf.log(m + K.epsilon()), axis=1) # to avoid nan
- lids = -k / v_log
-
- return lids
-
-
-def lid_paced_loss(alpha=1.0, beta1=0.1, beta2=1.0):
- """TO_DO
- Class wise lid pace learning, targeting classwise asymetric label noise.
-
- Args:
- alpha: lid based adjustment paramter: this needs real-time update.
- Returns:
- Loss tensor of type float.
- """
- if alpha == 1.0:
- return symmetric_cross_entropy(alpha=beta1, beta=beta2)
+if torch.cuda.is_available():
+ torch.backends.cudnn.benchmark = True
+ if torch.cuda.device_count() > 1:
+ device = torch.device('cuda:0')
else:
- def loss(y_true, y_pred):
- pred_labels = K.one_hot(K.argmax(y_pred, 1), num_classes=K.shape(y_true)[1])
- y_new = alpha * y_true + (1. - alpha) * pred_labels
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
- y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
- return -K.sum(y_new * K.log(y_pred), axis=-1)
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+@mlconfig.register
+class SCELoss(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes=10):
+ super(SCELoss, self).__init__()
+ self.device = device
+ self.alpha = alpha
+ self.beta = beta
+ self.num_classes = num_classes
+ self.cross_entropy = torch.nn.CrossEntropyLoss()
+
+ def forward(self, pred, labels):
+ # CCE
+ ce = self.cross_entropy(pred, labels)
+
+ # RCE
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+
+ # Loss
+ loss = self.alpha * ce + self.beta * rce.mean()
+ return loss
+
+@mlconfig.register
+class ReverseCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(ReverseCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+ return self.scale * rce.mean()
+
+
+@mlconfig.register
+class NormalizedReverseCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedReverseCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ normalizor = 1 / 4 * (self.num_classes - 1)
+ rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
+ return self.scale * normalizor * rce.mean()
+
+
+@mlconfig.register
+class NormalizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.log_softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1))
+ return self.scale * nce.mean()
+
+
+@mlconfig.register
+class GeneralizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, q=0.7):
+ super(GeneralizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.q = q
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ gce = (1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)) / self.q
+ return gce.mean()
+
+
+@mlconfig.register
+class NormalizedGeneralizedCrossEntropy(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0, q=0.7):
+ super(NormalizedGeneralizedCrossEntropy, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.q = q
+ self.scale = scale
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ numerators = 1. - torch.pow(torch.sum(label_one_hot * pred, dim=1), self.q)
+ denominators = self.num_classes - pred.pow(self.q).sum(dim=1)
+ ngce = numerators / denominators
+ return self.scale * ngce.mean()
+
+
+@mlconfig.register
+class MeanAbsoluteError(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(MeanAbsoluteError, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+ return
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ mae = 1. - torch.sum(label_one_hot * pred, dim=1)
+ # Note: Reduced MAE
+ # Original: torch.abs(pred - label_one_hot).sum(dim=1)
+ # $MAE = \sum_{k=1}^{K} |\bm{p}(k|\bm{x}) - \bm{q}(k|\bm{x})|$
+ # $MAE = \sum_{k=1}^{K}\bm{p}(k|\bm{x}) - p(y|\bm{x}) + (1 - p(y|\bm{x}))$
+ # $MAE = 2 - 2p(y|\bm{x})$
+ #
+ return self.scale * mae.mean()
+
+
+@mlconfig.register
+class NormalizedMeanAbsoluteError(torch.nn.Module):
+ def __init__(self, num_classes, scale=1.0):
+ super(NormalizedMeanAbsoluteError, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.scale = scale
+ return
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
+ normalizor = 1 / (2 * (self.num_classes - 1))
+ mae = 1. - torch.sum(label_one_hot * pred, dim=1)
+ return self.scale * normalizor * mae.mean()
+
+
+@mlconfig.register
+class NCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(NCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nce(pred, labels) + self.rce(pred, labels)
+
+
+@mlconfig.register
+class NCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(NCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nce(pred, labels) + self.mae(pred, labels)
+
+
+@mlconfig.register
+class GCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.mae(pred, labels)
+
+
+@mlconfig.register
+class GCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.rce(pred, labels)
+
+
+@mlconfig.register
+class GCEandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(GCEandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.gce = GeneralizedCrossEntropy(num_classes=num_classes, q=q)
+ self.nce = NormalizedCrossEntropy(num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.gce(pred, labels) + self.nce(pred, labels)
+
+
+@mlconfig.register
+class NGCEandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.nce(pred, labels)
+
+
+@mlconfig.register
+class NGCEandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.mae(pred, labels)
+
+
+@mlconfig.register
+class NGCEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, q=0.7):
+ super(NGCEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.ngce = NormalizedGeneralizedCrossEntropy(scale=alpha, q=q, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.ngce(pred, labels) + self.rce(pred, labels)
+
+
+@mlconfig.register
+class MAEandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes):
+ super(MAEandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.mae = MeanAbsoluteError(scale=alpha, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.mae(pred, labels) + self.rce(pred, labels)
+
+
+@mlconfig.register
+class NLNL(torch.nn.Module):
+ def __init__(self, train_loader, num_classes, ln_neg=1):
+ super(NLNL, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.ln_neg = ln_neg
+ weight = torch.FloatTensor(num_classes).zero_() + 1.
+ if not hasattr(train_loader.dataset, 'targets'):
+ weight = [1] * num_classes
+ weight = torch.FloatTensor(weight)
+ else:
+ for i in range(num_classes):
+ weight[i] = (torch.from_numpy(np.array(train_loader.dataset.targets)) == i).sum()
+ weight = 1 / (weight / weight.max())
+ self.weight = weight.to(self.device)
+ self.criterion = torch.nn.CrossEntropyLoss(weight=self.weight)
+ self.criterion_nll = torch.nn.NLLLoss()
+
+ def forward(self, pred, labels):
+ labels_neg = (labels.unsqueeze(-1).repeat(1, self.ln_neg)
+ + torch.LongTensor(len(labels), self.ln_neg).to(self.device).random_(1, self.num_classes)) % self.num_classes
+ labels_neg = torch.autograd.Variable(labels_neg)
+
+ assert labels_neg.max() <= self.num_classes-1
+ assert labels_neg.min() >= 0
+ assert (labels_neg != labels.unsqueeze(-1).repeat(1, self.ln_neg)).sum() == len(labels)*self.ln_neg
+
+ s_neg = torch.log(torch.clamp(1. - F.softmax(pred, 1), min=1e-5, max=1.))
+ s_neg *= self.weight[labels].unsqueeze(-1).expand(s_neg.size()).to(self.device)
+ labels = labels * 0 - 100
+ loss = self.criterion(pred, labels) * float((labels >= 0).sum())
+ loss_neg = self.criterion_nll(s_neg.repeat(self.ln_neg, 1), labels_neg.t().contiguous().view(-1)) * float((labels_neg >= 0).sum())
+ loss = ((loss+loss_neg) / (float((labels >= 0).sum())+float((labels_neg[:, 0] >= 0).sum())))
return loss
+
+
+@mlconfig.register
+class FocalLoss(torch.nn.Module):
+ '''
+ https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
+ '''
+
+ def __init__(self, gamma=0, alpha=None, size_average=True):
+ super(FocalLoss, self).__init__()
+ self.gamma = gamma
+ self.alpha = alpha
+ if isinstance(alpha, (float, int)):
+ self.alpha = torch.Tensor([alpha, 1-alpha])
+ if isinstance(alpha, list):
+ self.alpha = torch.Tensor(alpha)
+ self.size_average = size_average
+
+ def forward(self, input, target):
+ if input.dim() > 2:
+ input = input.view(input.size(0), input.size(1), -1) # N,C,H,W => N,C,H*W
+ input = input.transpose(1, 2) # N,C,H*W => N,H*W,C
+ input = input.contiguous().view(-1, input.size(2)) # N,H*W,C => N*H*W,C
+ target = target.view(-1, 1)
+
+ logpt = F.log_softmax(input, dim=1)
+ logpt = logpt.gather(1, target)
+ logpt = logpt.view(-1)
+ pt = torch.autograd.Variable(logpt.data.exp())
+
+ if self.alpha is not None:
+ if self.alpha.type() != input.data.type():
+ self.alpha = self.alpha.type_as(input.data)
+ at = self.alpha.gather(0, target.data.view(-1))
+ logpt = logpt * torch.autograd.Variable(at)
+
+ loss = -1 * (1-pt)**self.gamma * logpt
+ if self.size_average:
+ return loss.mean()
+ else:
+ return loss.sum()
+
+
+@mlconfig.register
+class NormalizedFocalLoss(torch.nn.Module):
+ def __init__(self, scale=1.0, gamma=0, num_classes=10, alpha=None, size_average=True):
+ super(NormalizedFocalLoss, self).__init__()
+ self.gamma = gamma
+ self.size_average = size_average
+ self.num_classes = num_classes
+ self.scale = scale
+
+ def forward(self, input, target):
+ target = target.view(-1, 1)
+ logpt = F.log_softmax(input, dim=1)
+ normalizor = torch.sum(-1 * (1 - logpt.data.exp()) ** self.gamma * logpt, dim=1)
+ logpt = logpt.gather(1, target)
+ logpt = logpt.view(-1)
+ pt = torch.autograd.Variable(logpt.data.exp())
+ loss = -1 * (1-pt)**self.gamma * logpt
+ loss = self.scale * loss / normalizor
+
+ if self.size_average:
+ return loss.mean()
+ else:
+ return loss.sum()
+
+
+@mlconfig.register
+class NFLandNCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandNCE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.nce = NormalizedCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.nce(pred, labels)
+
+
+@mlconfig.register
+class NFLandMAE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandMAE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.mae(pred, labels)
+
+
+@mlconfig.register
+class NFLandRCE(torch.nn.Module):
+ def __init__(self, alpha, beta, num_classes, gamma=0.5):
+ super(NFLandRCE, self).__init__()
+ self.num_classes = num_classes
+ self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
+ self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ return self.nfl(pred, labels) + self.rce(pred, labels)
+
+
+@mlconfig.register
+class DMILoss(torch.nn.Module):
+ def __init__(self, num_classes):
+ super(DMILoss, self).__init__()
+ self.num_classes = num_classes
+
+ def forward(self, output, target):
+ outputs = F.softmax(output, dim=1)
+ targets = target.reshape(target.size(0), 1).cpu()
+ y_onehot = torch.FloatTensor(target.size(0), self.num_classes).zero_()
+ y_onehot.scatter_(1, targets, 1)
+ y_onehot = y_onehot.transpose(0, 1).cuda()
+ mat = y_onehot @ outputs
+ return -1.0 * torch.log(torch.abs(torch.det(mat.float())) + 0.001)
+
+@mlconfig.register
+class BootSoftLoss(torch.nn.Module):
+ def __init__(self, num_classes, beta=0.95):
+ super(BootSoftLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.beta = beta
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ bsl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred) * torch.log(pred), dim=1)
+ return bsl.mean()
+
+@mlconfig.register
+class BootHardLoss(torch.nn.Module):
+ def __init__(self, num_classes, beta=0.8):
+ super(BootSoftLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.beta = beta
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ pred_one_hot = F.one_hot(torch.argmax(pred, dim=1),self.num_classes)
+ bhl = -torch.sum((self.beta * label_one_hot + (1. - self.beta) * pred_one_hot) * torch.log(pred), dim=1)
+ return bhl.mean()
+
+@mlconfig.register
+class ForwardLoss(torch.nn.Module):
+ def __init__(self, num_classes, noise_rate):
+ super(ForwardLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.noise_rate = noise_rate
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.)
+ P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes))
+ P.diagonal().fill_(1-self.noise_rate)
+ P = P.to(self.device)
+ loss=-torch.sum(label_one_hot * torch.log(torch.matmul(pred, P)), dim=-1)
+ return loss.mean()
+
+
+@mlconfig.register
+class BackwardLoss(torch.nn.Module):
+ def __init__(self, num_classes, noise_rate):
+ super(BackwardLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.noise_rate = noise_rate
+
+ def forward(self, pred, labels):
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ assert (self.noise_rate >= 0.) and (self.noise_rate <= 1.)
+ P = self.noise_rate / (self.num_classes - 1) * torch.ones((self.num_classes, self.num_classes))
+ P.diagonal().fill_(1-self.noise_rate)
+ P = P.to(self.device)
+ P_inv = torch.inverse(P)
+ loss=-torch.sum((torch.matmul(label_one_hot, P_inv)) * torch.log(pred), dim=-1)
+ return loss.mean()
+
+@mlconfig.register
+class LIDPacedLoss(torch.nn.Module):
+ def __init__(self, num_classes, alpha, beta1, beta2):
+ super(LIDPacedLoss, self).__init__()
+ self.device = device
+ self.num_classes = num_classes
+ self.alpha = alpha
+ self.beta1 = beta1
+ self.beta2 = beta2
+ self.sce = SCELoss(alpha=beta1, beta=beta2, num_classes=num_classes)
+
+ def forward(self, pred, labels):
+ if self.alpha == 1.0:
+ return self.sce(pred, labels)
+ else:
+ pred = F.softmax(pred, dim=1)
+ pred = torch.clamp(pred, min=1e-7, max=1.0)
+ label_one_hot = F.one_hot(labels, self.num_classes).float().to(self.device)
+ label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
+ pred_labels = F.one_hot(torch.argmax(pred, dim=1), num_classes=label_one_hot.size(1))
+ y_new = self.alpha * label_one_hot + (1. - self.alpha) * pred_labels
+ loss = -torch.sum(y_new * torch.log(pred), dim=-1)
+ return loss.mean()
\ No newline at end of file
diff --git a/loss_acc_plot.py b/loss_acc_plot.py
deleted file mode 100644
index 58259d3..0000000
--- a/loss_acc_plot.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""
-Train test error/accuracy/loss plot.
-
-Author: Xingjun Ma
-"""
-import os
-import numpy as np
-import tensorflow as tf
-import keras.backend as K
-from keras.datasets import mnist, cifar10
-from keras.optimizers import SGD
-from keras.utils import to_categorical
-import matplotlib.pyplot as plt
-from sklearn.decomposition import PCA
-from util import get_lids_random_batch
-from datasets import get_data, validatation_split
-from models import get_model
-from loss import cross_entropy, boot_soft, boot_hard
-from lass_tf import lass
-
-np.random.seed(1024)
-
-# MODELS = ['ce', 'd2l', 'backward', 'boot_soft', 'boot_hard', 'forward']
-
-MODELS = ['ce', 'forward', 'backward', 'boot_soft', 'boot_hard', 'd2l']
-MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L']
-COLORS = ['r', 'y', 'c', 'm', 'g', 'b']
-MARKERS = ['x', 'D', '<', '>', '^', 'o']
-
-def test_acc(model_list, dataset='mnist', noise_ratio=0.):
- """
- Test acc throughout training.
- """
- print('Dataset: %s, noise ratio: %s%%' % (dataset, noise_ratio))
-
- # plot initialization
- fig = plt.figure() # figsize=(7, 6)
- ax = fig.add_subplot(111)
-
- for model_name in model_list:
- file_name = 'log/acc_%s_%s_%s.npy' % \
- (model_name, dataset, noise_ratio)
- if os.path.isfile(file_name):
- accs = np.load(file_name)
- train_accs = accs[0]
- test_accs = accs[1]
- # print(test_accs)
-
- # plot line
- idx = MODELS.index(model_name)
-
- xnew = np.arange(0, len(test_accs), 1)
- test_accs = test_accs[xnew]
- ax.plot(xnew, test_accs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
-
- # ax.set_xticks([])
- # ax.set_yticks([])
- ax.set_xlabel("Epoch", fontsize=15)
- ax.set_ylabel("Test accuracy", fontsize=15)
- # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
- legend = plt.legend(loc='lower right', ncol=2)
- plt.setp(legend.get_texts(), fontsize=15)
- fig.savefig("plots/test_acc_trend_all_models_%s_%s.png" % (dataset, noise_ratio), dpi=300)
- plt.show()
-
-
-def test_acc_last_epoch(model_list, dataset='mnist', num_classes=10, noise_ratio=10, epochs=50):
- """
- Test acc throughout training.
- """
- print('Dataset: %s, epochs: %s, noise ratio: %s%%' % (dataset, epochs, noise_ratio))
-
- # load data
- _, _, X_test, Y_test = get_data(dataset)
- # convert class vectors to binary class matrices
- Y_test = to_categorical(Y_test, num_classes)
-
- # load model
- image_shape = X_test.shape[1:]
- model = get_model(dataset, input_tensor=None, input_shape=image_shape)
- sgd = SGD(lr=0.01, momentum=0.9)
-
- for model_name in model_list:
- # the critical sample ratio of the representations learned at every epoch
- model_path = 'model/%s_%s_%s.hdf5' % (model_name, dataset, noise_ratio)
- model.load_weights(model_path)
- model.compile(
- loss=cross_entropy,
- optimizer=sgd,
- metrics=['accuracy']
- )
-
- _, test_acc = model.evaluate(X_test, Y_test, batch_size=128, verbose=0)
- print('model: %s, epoch: %s, test_acc: %s' % (model_name, epochs-1, test_acc))
-
-def print_loss_acc_log(model_list, dataset='mnist', noise_ratio=0.1):
- """
- Test acc throughout training.
-
- :param model_list:
- :param dataset:
- :param noise_ratio:
- :return:
- """
- print('Dataset: %s, noise ratio: %s' % (dataset, noise_ratio))
-
- for model_name in model_list:
- loss_file = 'log/loss_%s_%s_%s.npy' % \
- (model_name, dataset, noise_ratio)
- acc_file = 'log/acc_%s_%s_%s.npy' % \
- (model_name, dataset, noise_ratio)
- if os.path.isfile(loss_file):
- losses = np.load(loss_file)
- # print(losses)
- val_loss = losses[1, -5:]
- print('--------- val loss ---------')
- print(val_loss)
- if os.path.isfile(acc_file):
- accs = np.load(acc_file)
- print('ecpos: ', len(accs[1]))
- val_acc = accs[1, -5:]
- print('--------- val acc ---------')
- print(val_acc)
-
-if __name__ == "__main__":
- # mnist: epoch=50, cifar-10: epoch=120
- # test_acc(model_list=['ce'], dataset='cifar-10', noise_ratio=40)
-
- # test_acc_last_epoch(model_list=['ce'],
- # dataset='cifar-10', num_classes=10, noise_ratio=40, epochs=120)
- print_loss_acc_log(model_list=['boot_hard'], dataset='cifar-100', noise_ratio=0)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..885aca1
--- /dev/null
+++ b/main.py
@@ -0,0 +1,206 @@
+import torch
+import argparse
+import util
+import os
+import datetime
+import random
+import mlconfig
+import loss
+import models
+import dataset
+import shutil
+from evaluator import Evaluator
+from trainer import Trainer
+from util import get_lids_random_batch,get_csr_random_batch
+from callback_util import D2LCallback
+import numpy as np
+from plot import lid_trend_through_training, lid_trend_of_learning_models, test_acc_trend_of_learning_models, csr_trend_of_learning_models
+
+# ArgParse
+parser = argparse.ArgumentParser(description='Normalized Loss Functions for Deep Learning with Noisy Labels')
+# Training
+parser.add_argument('--resume', action='store_true', default=False)
+parser.add_argument('--seed', type=int, default=0)
+parser.add_argument('--config_path', type=str, default='configs')
+parser.add_argument('--version', type=str, default='ce')
+parser.add_argument('--exp_name', type=str, default="run1")
+parser.add_argument('--load_model', action='store_true', default=False)
+parser.add_argument('--data_parallel', action='store_true', default=False)
+parser.add_argument('--asym', action='store_true', default=False)
+parser.add_argument('--noise_rate', type=float, default=0.0)
+parser.add_argument('--plot', action='store_true', default=False)
+parser.add_argument('--plotall', action='store_true', default=False)
+args = parser.parse_args()
+
+# Set up
+if args.exp_name == '' or args.exp_name is None:
+ args.exp_name = 'exp_' + datetime.datetime.now()
+exp_path = os.path.join(args.exp_name, args.version)
+log_file_path = os.path.join(exp_path, args.version)
+checkpoint_path = os.path.join(exp_path, 'checkpoints')
+checkpoint_path_file = os.path.join(checkpoint_path, args.version)
+util.build_dirs(exp_path)
+util.build_dirs(checkpoint_path)
+
+logger = util.setup_logger(name=args.version, log_file=log_file_path + ".log")
+for arg in vars(args):
+ logger.info("%s: %s" % (arg, getattr(args, arg)))
+
+random.seed(args.seed)
+if torch.cuda.is_available():
+ torch.cuda.manual_seed(args.seed)
+ torch.backends.cudnn.enabled = True
+ torch.backends.cudnn.benchmark = True
+ device = torch.device('cuda')
+ logger.info("Using CUDA!")
+ device_list = [torch.cuda.get_device_name(i) for i in range(0, torch.cuda.device_count())]
+ logger.info("GPU List: %s" % (device_list))
+else:
+ device = torch.device('cpu')
+
+logger.info("PyTorch Version: %s" % (torch.__version__))
+config_file = os.path.join(args.config_path, args.version) + '.yaml'
+config = mlconfig.load(config_file)
+if args.version == 'fl' or args.version == 'bl':
+ config['criterion']['noise_rate']=args.noise_rate
+if args.version != 'd2l':
+ config.set_immutable()
+shutil.copyfile(config_file, os.path.join(exp_path, args.version+'.yaml'))
+for key in config:
+ logger.info("%s: %s" % (key, config[key]))
+
+
+def train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, callback, mode):
+ for epoch in range(starting_epoch, config.epochs):
+ if args.version == 'd2l':
+ if mode == 'stage2':
+ config['criterion']['alpha'] = callback.alpha
+ criterion=config.criterion()
+
+ logger.info("="*20 + "Training" + "="*20)
+
+ # Train
+ ENV['global_step'] = trainer.train(epoch, ENV['global_step'], model, optimizer, criterion)
+ scheduler.step()
+
+ if args.version == 'd2l':
+ callback.on_epoch_begin(epoch)
+ if mode == 'stage1':
+ if callback.is_found_turning_point == True:
+ break
+
+ # Eval
+ logger.info("="*20 + "Eval" + "="*20)
+ evaluator.eval(epoch, ENV['global_step'], model, torch.nn.CrossEntropyLoss())
+ payload = ('Eval Loss:%.4f\tEval acc: %.2f' % (evaluator.loss_meters.avg, evaluator.acc_meters.avg*100))
+ logger.info(payload)
+ # LID
+ lids = get_lids_random_batch(model, data_loader, device, k=20, batch_size=128)
+ lid = lids.mean()
+ logger.info('LID:%f'%(lid))
+ # CSR
+ csr = get_csr_random_batch(model, data_loader, device)
+ logger.info('CSR:%f'%(csr))
+
+ ENV['train_history'].append(trainer.acc_meters.avg*100)
+ ENV['eval_history'].append(evaluator.acc_meters.avg*100)
+ ENV['curren_acc'] = evaluator.acc_meters.avg*100
+ ENV['best_acc'] = max(ENV['curren_acc'], ENV['best_acc'])
+ ENV['lid'].append(lid)
+ ENV['csr'].append(csr)
+
+
+ # Reset Stats
+ trainer._reset_stats()
+ evaluator._reset_stats()
+
+ # Save Model
+ target_model = model.module if args.data_parallel else model
+ util.save_model(ENV=ENV,
+ epoch=epoch,
+ model=target_model,
+ optimizer=optimizer,
+ scheduler=scheduler,
+ filename=checkpoint_path_file)
+ logger.info('Model Saved at %s', checkpoint_path_file)
+ torch.cuda.empty_cache()
+ return
+
+
+def main():
+ if config.dataset.name == 'DatasetGenerator':
+ data_loader = config.dataset(seed=args.seed, noise_rate=args.noise_rate, asym=args.asym)
+ else:
+ data_loader = config.dataset()
+
+ model = config.model()
+ if isinstance(data_loader, dataset.Clothing1MDatasetLoader):
+ model.fc = torch.nn.Linear(2048, 14)
+ model = model.to(device)
+
+ data_loader = data_loader.getDataLoader()
+ logger.info("param size = %fMB", util.count_parameters_in_MB(model))
+ if args.data_parallel:
+ model = torch.nn.DataParallel(model)
+ #data_train = data_loader['train_dataset'].dataset
+ #tensor_list = []
+ #for j in range(len(data_train)):
+ #tensor_list.append(data_train[j][0])
+
+ optimizer = config.optimizer(model.parameters())
+ scheduler = config.scheduler(optimizer)
+ scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 40, gamma = 0.1)
+ if config.criterion.name == 'NLNL':
+ criterion = config.criterion(train_loader=data_loader['train_dataset'])
+ else:
+ criterion = config.criterion()
+ trainer = Trainer(data_loader['train_dataset'], logger, config)
+ evaluator = Evaluator(data_loader['test_dataset'], logger, config)
+
+ starting_epoch = 0
+ ENV = {'global_step': 0,
+ 'best_acc': 0.0,
+ 'current_acc': 0.0,
+ 'train_history': [],
+ 'eval_history': [],
+ 'lid':[],
+ 'csr':[]}
+
+ if args.load_model:
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ starting_epoch = checkpoint['epoch']
+ ENV = checkpoint['ENV']
+ trainer.global_step = ENV['global_step']
+ logger.info("File %s loaded!" % (checkpoint_path_file))
+
+ idx = -5 if args.asym else -4
+ if args.plot:
+ lid_trend_through_training(exp_name=args.exp_name, dataset=args.config_path[8:idx], data_loader=data_loader, device=device, model=model, optimizer=optimizer, scheduler=scheduler, model_name=args.version, noise_type='sym', noise_ratio=args.noise_rate)
+ elif args.plotall:
+ lid_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate)
+ test_acc_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate)
+ csr_trend_of_learning_models(exp_name=args.exp_name, dataset=args.config_path[8:idx], model=model, optimizer=optimizer, scheduler=scheduler, model_list=['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l'], noise_ratio=args.noise_rate)
+ else:
+ d2l_callback = D2LCallback(model, data_loader, device)
+ train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, d2l_callback, mode='stage1')
+ if args.version == 'd2l':
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ starting_epoch = checkpoint['epoch']
+ ENV = checkpoint['ENV']
+ trainer.global_step = ENV['global_step']
+ logger.info("File %s loaded!" % (checkpoint_path_file))
+
+ train(starting_epoch, model, data_loader, optimizer, scheduler, criterion, trainer, evaluator, ENV, d2l_callback, mode='stage2')
+
+
+ return
+
+
+if __name__ == '__main__':
+ main()
diff --git a/models.py b/models.py
index ec6a834..be3e50e 100644
--- a/models.py
+++ b/models.py
@@ -1,127 +1,268 @@
-import numpy as np
-import keras.backend as K
-from keras.models import Model
-from keras.regularizers import l2
-from keras.layers import Input, Conv2D, Dense, MaxPooling2D, Flatten, Activation, BatchNormalization
-from resnet import cifar100_resnet
-
-def get_model(dataset='mnist', input_tensor=None, input_shape=None, num_classes=10):
- """
- Takes in a parameter indicating which model type to use ('mnist',
- 'cifar-10' or 'cifar-100') and returns the appropriate Keras model.
- :param dataset: A string indicating which dataset we are building
- a model for.
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
- to use as image input for the model.
- input_shape: optional shape tuple
- :return: The model; a Keras 'Model' instance.
- """
- assert dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100'], \
- "dataset parameter must be either 'mnist', 'svhn', 'cifar-10' or 'cifar-100'"
-
- if input_tensor is None:
- img_input = Input(shape=input_shape)
- else:
- if not K.is_keras_tensor(input_shape):
- img_input = Input(tensor=input_tensor, shape=input_shape)
- else:
- img_input = input_tensor
-
- if dataset == 'mnist':
- # ## LeNet-5 like 4-layer CNN
- x = Conv2D(32, (3, 3), padding='same', kernel_initializer="he_normal", name='conv1')(img_input)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
-
- x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='conv2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
-
- x = Flatten()(x)
-
- x = Dense(128, kernel_initializer="he_normal", name='fc1')(x)
- x = BatchNormalization()(x)
- x = Activation('relu', name='lid')(x)
- # x = Dropout(0.2)(x)
-
- x = Dense(num_classes, kernel_initializer="he_normal")(x)
- x = Activation('softmax')(x)
-
- model = Model(img_input, x)
-
- elif dataset == 'svhn':
- # ## LeNet-5 like 5-layer CNN
- x = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv1')(img_input)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
-
- x = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', name='conv2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
-
- x = Flatten()(x)
-
- x = Dense(512, kernel_initializer='he_normal', name='fc1')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
-
- x = Dense(128, kernel_initializer="he_normal", name='fc2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu', name='lid')(x)
- # x = Dropout(0.2)(x)
-
- x = Dense(num_classes, kernel_initializer="he_normal")(x)
- x = Activation('softmax')(x)
-
- model = Model(img_input, x)
-
- elif dataset == 'cifar-10':
- # VGG-like 8-layer CNN
- # Block 1
- x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='block1_conv1')(img_input)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='block1_conv2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
-
- # Block 2
- x = Conv2D(128, (3, 3), padding='same', kernel_initializer="he_normal", name='block2_conv1')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = Conv2D(128, (3, 3), padding='same', kernel_initializer="he_normal", name='block2_conv2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
-
- # Block 3
- x = Conv2D(196, (3, 3), padding='same', kernel_initializer="he_normal", name='block3_conv1')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = Conv2D(196, (3, 3), padding='same', kernel_initializer="he_normal", name='block3_conv2')(x)
- x = BatchNormalization()(x)
- x = Activation('relu')(x)
- x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
-
- x = Flatten(name='flatten')(x)
-
- x = Dense(256, kernel_initializer="he_normal", kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), name='fc1')(x)
- x = BatchNormalization()(x)
- x = Activation('relu', name='lid')(x)
-
- x = Dense(num_classes, kernel_initializer="he_normal")(x)
- x = Activation('softmax')(x)
-
- # Create model.
- model = Model(img_input, x)
-
- elif dataset == 'cifar-100':
- # resnet
- model = cifar100_resnet(depth=7, num_classes=num_classes)
-
- return model
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import mlconfig
+import torchvision
+mlconfig.register(torchvision.models.resnet50)
+mlconfig.register(torch.optim.SGD)
+mlconfig.register(torch.optim.Adam)
+mlconfig.register(torch.optim.lr_scheduler.MultiStepLR)
+mlconfig.register(torch.optim.lr_scheduler.CosineAnnealingLR)
+mlconfig.register(torch.optim.lr_scheduler.StepLR)
+mlconfig.register(torch.optim.lr_scheduler.ExponentialLR)
+
+
+class ConvBrunch(nn.Module):
+ def __init__(self, in_planes, out_planes, kernel_size=3):
+ super(ConvBrunch, self).__init__()
+ padding = (kernel_size - 1) // 2
+ self.out_conv = nn.Sequential(
+ nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding),
+ nn.BatchNorm2d(out_planes),
+ nn.ReLU())
+
+ def forward(self, x):
+ return self.out_conv(x)
+
+
+@mlconfig.register
+class ToyModel(nn.Module):
+ def __init__(self, type='CIFAR10'):
+ super(ToyModel, self).__init__()
+ self.type = type
+ """
+ if type == 'CIFAR10':
+ self.block1 = nn.Sequential(
+ ConvBrunch(3, 64, 3),
+ ConvBrunch(64, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block2 = nn.Sequential(
+ ConvBrunch(64, 128, 3),
+ ConvBrunch(128, 128, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block3 = nn.Sequential(
+ ConvBrunch(128, 196, 3),
+ ConvBrunch(196, 196, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
+ self.fc1 = nn.Sequential(
+ nn.Linear(4*4*196, 256),
+ nn.BatchNorm1d(256),
+ nn.ReLU())
+ self.fc2 = nn.Linear(256, 10)
+ self.fc_size = 4*4*196
+ """
+ if type == 'CIFAR10':
+ self.block1 = nn.Sequential(
+ ConvBrunch(3, 32, 3),
+ ConvBrunch(32, 32, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block2 = nn.Sequential(
+ ConvBrunch(32, 64, 3),
+ ConvBrunch(64, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block3 = nn.Sequential(
+ ConvBrunch(64, 128, 3),
+ ConvBrunch(128, 128, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
+ self.fc1 = nn.Sequential(
+ nn.Flatten(),
+ nn.Dropout(0.5),
+ nn.Linear(4*4*128, 1024),
+ nn.ReLU(),
+ nn.BatchNorm1d(1024),
+ nn.Dropout(0.5),
+ nn.Linear(1024,512),
+ nn.ReLU(),
+ nn.BatchNorm1d(512)
+ )
+ self.fc2 = nn.Sequential(
+ nn.Dropout(0.5),
+ nn.Linear(512,10)
+ )
+ self.fc_size = 4*4*128
+
+
+ elif type == 'MNIST':
+ self.block1 = nn.Sequential(
+ ConvBrunch(1, 64, 3),
+ )
+ self.block2 = nn.Sequential(
+ ConvBrunch(64, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2),
+ nn.Dropout(0.5))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
+ self.fc1 = nn.Sequential(
+ nn.Flatten(),
+ nn.Linear(64*14*14, 128),
+ nn.BatchNorm1d(128),
+ nn.ReLU(),
+ nn.Dropout(0.5))
+ self.fc2 = nn.Linear(128, 10)
+ self.fc_size = 64*14*14
+
+ """
+ elif type == 'MNIST':
+ self.block1 = nn.Sequential(
+ ConvBrunch(1, 32, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ self.block2 = nn.Sequential(
+ ConvBrunch(32, 64, 3),
+ nn.MaxPool2d(kernel_size=2, stride=2))
+ # self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
+ self.fc1 = nn.Sequential(
+ nn.Linear(64*7*7, 128),
+ nn.BatchNorm1d(128),
+ nn.ReLU())
+ self.fc2 = nn.Linear(128, 10)
+ self.fc_size = 64*7*7
+ """
+ self._reset_prams()
+
+ def _reset_prams(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, nn.Linear):
+ nn.init.xavier_uniform_(m.weight)
+ return
+
+ def forward(self, x):
+ x = self.block1(x)
+ x = self.block2(x)
+ x = self.block3(x) if self.type == 'CIFAR10' else x
+ # x = self.global_avg_pool(x)
+ # x = x.view(x.shape[0], -1)
+ x = x.view(-1, self.fc_size)
+ x_fc1 = self.fc1(x)
+ x = self.fc2(x_fc1)
+ return x, x_fc1
+
+
+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+ Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, in_planes, planes, stride=1):
+ super(BasicBlock, self).__init__()
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion*planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(self.expansion*planes)
+ )
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = self.bn2(self.conv2(out))
+ out += self.shortcut(x)
+ out = F.relu(out)
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, in_planes, planes, stride=1):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+ self.shortcut = nn.Sequential()
+ if stride != 1 or in_planes != self.expansion*planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(self.expansion*planes)
+ )
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = F.relu(self.bn2(self.conv2(out)))
+ out = self.bn3(self.conv3(out))
+ out += self.shortcut(x)
+ out = F.relu(out)
+ return out
+
+
+class ResNet(nn.Module):
+ def __init__(self, block, num_blocks, num_classes=10):
+ super(ResNet, self).__init__()
+ self.in_planes = 64
+
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(64)
+ self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+ self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+ self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+ self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+ self.linear = nn.Linear(512*block.expansion, num_classes)
+ self._reset_prams()
+
+ def _make_layer(self, block, planes, num_blocks, stride):
+ strides = [stride] + [1]*(num_blocks-1)
+ layers = []
+ for stride in strides:
+ layers.append(block(self.in_planes, planes, stride))
+ self.in_planes = planes * block.expansion
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = self.layer1(out)
+ out = self.layer2(out)
+ out = self.layer3(out)
+ out = self.layer4(out)
+ out = F.avg_pool2d(out, 4)
+ out = out.view(out.size(0), -1)
+ out = self.linear(out)
+ return out
+
+ def _reset_prams(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
+ elif isinstance(m, nn.Linear):
+ nn.init.xavier_uniform_(m.weight)
+ return
+
+
+@mlconfig.register
+def ResNet18(num_classes=10):
+ return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
+
+
+@mlconfig.register
+def ResNet34(num_classes=10):
+ return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes)
+
+
+@mlconfig.register
+def ResNet50(num_classes=10):
+ return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes)
+
+
+@mlconfig.register
+def ResNet101(num_classes=10):
+ return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes)
+
+
+@mlconfig.register
+def ResNet152(num_classes=10):
+ return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes)
diff --git a/plot.py b/plot.py
new file mode 100644
index 0000000..eafd327
--- /dev/null
+++ b/plot.py
@@ -0,0 +1,214 @@
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from lid import lid_mle
+import torch
+import util
+
+np.random.seed(1024)
+
+MODELS = ['ce', 'fl', 'bl', 'bsl', 'bhl', 'd2l']
+MODEL_LABELS = ['cross-entropy', 'forward', 'backward', 'boot-soft', 'boot-hard', 'D2L']
+COLORS = ['r', 'y', 'c', 'm', 'g', 'b']
+MARKERS = ['x', 'D', '<', '>', '^', 'o']
+
+
+def lid_trend_through_training(exp_name, dataset, data_loader, device, model, optimizer, scheduler, model_name='d2l', noise_type='sym', noise_ratio=0.):
+ """
+ plot the lid trend for clean vs noisy samples through training.
+ This can provide some information about manifold learning dynamics through training.
+ """
+
+ lids, train_accs, test_accs = None, None, None
+
+ # get LID of raw inputs
+ k = 20
+ lids = []
+ for j, (images,labels) in enumerate(data_loader['train_dataset']):
+ images = images.to(device, non_blocking = True)
+ lids.extend(lid_mle(images, images, k=k))
+
+ lids = torch.stack(lids, dim=0).type(torch.float32)
+ lid_X = lids.mean()
+ print('LID of input X: ', lid_X)
+
+ exp_path = os.path.join(exp_name, model_name)
+ checkpoint_path = os.path.join(exp_path, 'checkpoints')
+ checkpoint_path_file = os.path.join(checkpoint_path, model_name)
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ ENV = checkpoint['ENV']
+ train_accs = ENV['train_history']
+ train_accs.insert(0,0)
+ test_accs = ENV['eval_history']
+ test_accs.insert(0,0)
+ lids = ENV['lid']
+ lids.insert(0,lid_X)
+ lids = torch.stack(lids, dim=0).type(torch.float32)
+
+ plot(dataset, model_name, noise_ratio, lids, train_accs, test_accs)
+
+
+def plot(dataset, model_name, noise_ratio, lids, train_accs, test_accs):
+ """
+ plot function
+ """
+ # plot
+ fig = plt.figure() # figsize=(7, 6)
+ xnew = np.arange(0, len(lids), 5)
+
+ lids = lids.cpu().numpy()
+ train_accs = np.array(train_accs) / 100
+ test_accs = np.array(test_accs) / 100
+ print(train_accs)
+ lids = lids[xnew]
+ train_accs = train_accs[xnew]
+ test_accs = test_accs[xnew]
+
+ ax = fig.add_subplot(111)
+ ax.plot(xnew, lids, c='r', marker='o', markersize=3, linewidth=2, label='LID score')
+
+ ax2 = ax.twinx()
+ ax2.plot(xnew, train_accs, c='b', marker='x', markersize=3, linewidth=2, label='Train acc')
+ ax2.plot(xnew, test_accs, c='c', marker='^', markersize=3, linewidth=2, label='Test acc')
+
+ # ax.set_xticks([])
+ # ax.set_yticks([])
+ ax.set_xlabel("Epoch", fontsize=15)
+ ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15)
+ ax2.set_ylabel("Train/test accuracy", fontsize=15)
+ # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
+
+ if dataset == 'mnist':
+ ax.set_ylim((4, 22)) # for mnist
+ ax2.set_ylim((0.2, 1.2))
+ elif dataset == 'svhn':
+ ax.set_ylim((7, 20)) # for svhn
+ ax2.set_ylim((0.2, 1.2))
+ elif dataset == 'cifar10':
+ ax.set_ylim((2.5, 12.5)) # for cifar-10
+ #ax.set_ylim((3.5, 20.5))
+ ax2.set_ylim((0., 1.2))
+ elif dataset == 'cifar100':
+ ax.set_ylim((3, 12)) # for cifar-100
+ ax2.set_ylim((0., 1.))
+
+ legend = ax.legend(loc='upper left')
+ plt.setp(legend.get_texts(), fontsize=15)
+ legend2 = ax2.legend(loc='upper right')
+ plt.setp(legend2.get_texts(), fontsize=15)
+ fig.savefig("plots/lid_trend_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300)
+ plt.show()
+
+
+def lid_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0):
+ """
+ The LID trend of different learning models throughout.
+ """
+ # plot initialization
+ fig = plt.figure() # figsize=(7, 6)
+ ax = fig.add_subplot(111)
+
+ for model_name in model_list:
+ exp_path = os.path.join(exp_name, model_name)
+ checkpoint_path = os.path.join(exp_path, 'checkpoints')
+ checkpoint_path_file = os.path.join(checkpoint_path, model_name)
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ ENV = checkpoint['ENV']
+ lids = ENV['lid']
+ lids = torch.stack(lids, dim=0).type(torch.float32)
+ lids = lids.cpu().numpy()
+ # smooth for plot
+ lids[lids < 0] = 0
+ lids[lids > 10] = 10
+
+ xnew = np.arange(0, len(lids), 5)
+ lids = lids[xnew]
+
+ # plot line
+ idx = MODELS.index(model_name)
+ ax.plot(xnew, lids, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
+
+ ax.set_xlabel("Epoch", fontsize=15)
+ ax.set_ylabel("Subspace dimensionality (LID score)", fontsize=15)
+ # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
+ legend = plt.legend(loc='lower center', ncol=2)
+ plt.setp(legend.get_texts(), fontsize=15)
+ fig.savefig("plots/lid_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300)
+ plt.show()
+
+def test_acc_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0):
+ """
+ The test_acc trend of different learning models throughout.
+ """
+ # plot initialization
+ fig = plt.figure() # figsize=(7, 6)
+ ax = fig.add_subplot(111)
+
+ for model_name in model_list:
+ exp_path = os.path.join(exp_name, model_name)
+ checkpoint_path = os.path.join(exp_path, 'checkpoints')
+ checkpoint_path_file = os.path.join(checkpoint_path, model_name)
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ ENV = checkpoint['ENV']
+ test_accs = ENV['eval_history']
+ test_accs = np.array(test_accs) / 100
+
+ xnew = np.arange(0, len(test_accs), 5)
+ test_accs = test_accs[xnew]
+
+ # plot line
+ idx = MODELS.index(model_name)
+ ax.plot(xnew, test_accs, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
+
+ ax.set_xlabel("Epoch", fontsize=15)
+ ax.set_ylabel("Test Accuracy", fontsize=15)
+ # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
+ legend = plt.legend(loc='lower center', ncol=2)
+ plt.setp(legend.get_texts(), fontsize=15)
+ fig.savefig("plots/test_accs_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300)
+ plt.show()
+
+def csr_trend_of_learning_models(exp_name, dataset, model, optimizer, scheduler, model_list=['ce'], noise_ratio=0):
+ """
+ The CSR trend of different learning models throughout.
+ """
+ # plot initialization
+ fig = plt.figure() # figsize=(7, 6)
+ ax = fig.add_subplot(111)
+
+ for model_name in model_list:
+ exp_path = os.path.join(exp_name, model_name)
+ checkpoint_path = os.path.join(exp_path, 'checkpoints')
+ checkpoint_path_file = os.path.join(checkpoint_path, model_name)
+ checkpoint = util.load_model(filename=checkpoint_path_file,
+ model=model,
+ optimizer=optimizer,
+ scheduler=scheduler)
+ ENV = checkpoint['ENV']
+ csr = ENV['csr']
+ csr = torch.stack(csr, dim=0).type(torch.float32)
+ csr = csr.cpu().numpy()
+
+ xnew = np.arange(0, len(csr), 5)
+ csr = csr[xnew]
+
+ # plot line
+ idx = MODELS.index(model_name)
+ ax.plot(xnew, csr, c=COLORS[idx], marker=MARKERS[idx], markersize=3, linewidth=2, label=MODEL_LABELS[idx])
+
+ ax.set_xlabel("Epoch", fontsize=15)
+ ax.set_ylabel("CRS", fontsize=15)
+ # ax.set_title("%s with %s%% noisy labels" % (dataset.upper(), noise_ratio), fontsize=15)
+ legend = plt.legend(loc='lower center', ncol=2)
+ plt.setp(legend.get_texts(), fontsize=15)
+ fig.savefig("plots/crs_trend_all_models_%s_%s_%s.png" % (exp_name, dataset, noise_ratio), dpi=300)
+ plt.show()
\ No newline at end of file
diff --git a/representation_plot.py b/representation_plot.py
deleted file mode 100644
index cf2b242..0000000
--- a/representation_plot.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""
-Date: 28/07/2017
-feature exploration and visualization
-
-Author: Xingjun Ma
-"""
-import os
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.gridspec as gridspec
-from sklearn.manifold import TSNE
-from keras.optimizers import SGD
-from util import get_deep_representations
-from datasets import get_data
-from models import get_model
-from loss import cross_entropy
-
-np.random.seed(1234)
-
-CLASSES = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
-
-def feature_visualization(model_name='ce', dataset='mnist',
- num_classes=10, noise_ratio=40, n_samples=100):
- """
- This is to show how features of incorretly labeled images are overffited to the wrong class.
- plot t-SNE 2D-projected deep features (right before logits).
- This will generate 3 plots in a grid (3x1).
- The first shows the raw features projections of two classes of images (clean label + noisy label)
- The second shows the deep features learned by cross-entropy after training.
- The third shows the deep features learned using a new loss after training.
-
- :param model_name: a new model other than crossentropy(ce), can be: boot_hard, boot_soft, forward, backward, lid
- :param dataset:
- :param num_classes:
- :param noise_type;
- :param noise_ratio:
- :param epochs: to find the last epoch
- :param n_samples:
- :return:
- """
- print('Dataset: %s, model_name: ce/%s, noise ratio: %s%%' % (model_name, dataset, noise_ratio))
- features_ce = np.array([None, None])
- features_other = np.array([None, None])
-
- # # load pre-saved to avoid recomputing
- # feature_tmp = "lof/representation_%s_%s.npy" % (dataset, noise_ratio)
- # if os.path.isfile(feature_tmp):
- # data = np.load(feature_tmp)
- # features_input = data[0]
- # features_ce = data[1]
- # features_other = data[2]
- #
- # plot(model_name, dataset, noise_ratio, features_input, features_ce, features_other)
- # return
-
- # load data
- X_train, Y_train, X_test, Y_test = get_data(dataset)
- Y_noisy = np.load("data/noisy_label_%s_%s.npy" % (dataset, noise_ratio))
- Y_noisy = Y_noisy.reshape(-1)
-
- # sample training set
- cls_a = 0
- cls_b = 3
-
- # find smaples labeled to class A and B
- cls_a_idx = np.where(Y_noisy == cls_a)[0]
- cls_b_idx = np.where(Y_noisy == cls_b)[0]
-
- # sampling for efficiency purpose
- cls_a_idx = np.random.choice(cls_a_idx, n_samples, replace=False)
- cls_b_idx = np.random.choice(cls_b_idx, n_samples, replace=False)
-
- X_a = X_train[cls_a_idx]
- X_b = X_train[cls_b_idx]
-
- image_shape = X_train.shape[1:]
- model = get_model(dataset, input_tensor=None, input_shape=image_shape)
- sgd = SGD(lr=0.01, momentum=0.9)
-
-
- #### get deep representations of ce model
- model_path = 'model/ce_%s_%s.hdf5' % (dataset, noise_ratio)
- model.load_weights(model_path)
- model.compile(
- loss=cross_entropy,
- optimizer=sgd,
- metrics=['accuracy']
- )
-
- rep_a = get_deep_representations(model, X_a, batch_size=100).reshape((X_a.shape[0], -1))
- rep_b = get_deep_representations(model, X_b, batch_size=100).reshape((X_b.shape[0], -1))
-
- rep_a = TSNE(n_components=2).fit_transform(rep_a)
- rep_b = TSNE(n_components=2).fit_transform(rep_b)
- features_ce[0] = rep_a
- features_ce[1] = rep_b
-
- #### get deep representations of other model
- model_path = 'model/%s_%s_%s.hdf5' % (model_name, dataset, noise_ratio)
- model.load_weights(model_path)
- model.compile(
- loss=cross_entropy,
- optimizer=sgd,
- metrics=['accuracy']
- )
-
- rep_a = get_deep_representations(model, X_a, batch_size=100).reshape((X_a.shape[0], -1))
- rep_b = get_deep_representations(model, X_b, batch_size=100).reshape((X_b.shape[0], -1))
-
- rep_a = TSNE(n_components=2).fit_transform(rep_a)
- rep_b = TSNE(n_components=2).fit_transform(rep_b)
- features_other[0] = rep_a
- features_other[1] = rep_b
-
- # plot
- fig = plt.figure(figsize=(12, 5))
- gs = gridspec.GridSpec(1, 2, wspace=0.15)
-
- a_clean_idx = Y_train[cls_a_idx] == Y_noisy[cls_a_idx]
- a_noisy_idx = Y_train[cls_a_idx] != Y_noisy[cls_a_idx]
- b_clean_idx = Y_train[cls_b_idx] == Y_noisy[cls_b_idx]
- b_noisy_idx = Y_train[cls_b_idx] != Y_noisy[cls_b_idx]
-
- ## plot features learned by cross-entropy
- ax = fig.add_subplot(gs[0, 0])
- A = features_ce[0]
- B = features_ce[1]
- # clean labeld class A samples plot
- ax.scatter(A[a_clean_idx][:, 0].ravel(), A[a_clean_idx][:, 1].ravel(), c='b', marker='o', s=10, label='class A: clean')
- ax.scatter(A[a_noisy_idx][:, 0].ravel(), A[a_noisy_idx][:, 1].ravel(), c='m', marker='x', s=30, label='class A: noisy')
- ax.scatter(B[b_clean_idx][:, 0].ravel(), B[b_clean_idx][:, 1].ravel(), c='r', marker='o', s=10, label='class B: clean')
- ax.scatter(B[b_noisy_idx][:, 0].ravel(), B[b_noisy_idx][:, 1].ravel(), c='c', marker='x', s=30, label='class B: noisy')
-
- ax.set_title("cross-entropy", fontsize=15)
- legend = ax.legend(loc='lower center', ncol=2)
- plt.setp(legend.get_texts(), fontsize=15)
-
- ax = fig.add_subplot(gs[0, 1])
- A = features_other[0]
- B = features_other[1]
- ax.scatter(A[a_clean_idx][:, 0].ravel(), A[a_clean_idx][:, 1].ravel(), c='b', marker='o', s=10, label='class A: clean')
- ax.scatter(A[a_noisy_idx][:, 0].ravel(), A[a_noisy_idx][:, 1].ravel(), c='m', marker='x', s=30, label='class A: noisy')
- ax.scatter(B[b_clean_idx][:, 0].ravel(), B[b_clean_idx][:, 1].ravel()-5, c='r', marker='o', s=10, label='class B: clean')
- ax.scatter(B[b_noisy_idx][:, 0].ravel(), B[b_noisy_idx][:, 1].ravel(), c='c', marker='x', s=30, label='class B: noisy')
-
- ax.set_title("D2L", fontsize=15)
- legend = ax.legend(loc='lower center', ncol=2)
- plt.setp(legend.get_texts(), fontsize=15)
-
- fig.savefig("plots/representations_%s_%s_%s.png" % (model_name, dataset, noise_ratio), dpi=300)
- plt.show()
-
-if __name__ == "__main__":
- feature_visualization(model_name='d2l', dataset='cifar-10', num_classes=10, noise_ratio=60, n_samples=500)
\ No newline at end of file
diff --git a/resnet.py b/resnet.py
deleted file mode 100644
index 665e09b..0000000
--- a/resnet.py
+++ /dev/null
@@ -1,122 +0,0 @@
-"""Some code sections are taken from
-https://github.com/raghakot/keras-resnet
-"""
-
-import sys
-
-import numpy as np
-
-from keras.models import Model
-from keras.layers import Input, Activation, merge, Dense, Flatten
-from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
-from keras.layers.convolutional import AveragePooling2D
-from keras.layers.normalization import BatchNormalization
-from keras.regularizers import l2
-from keras.layers.merge import add
-from keras import backend as K
-
-sys.setrecursionlimit(10000)
-
-BN_AXIS = 3
-
-
-def cifar100_resnet(depth, num_classes):
- # how many layers this is going to create?
- # 2 + 6 * depth
-
- img_channels = 3
- img_rows = 32
- img_cols = 32
- num_conv = 3
- decay = 2e-3
-
- input = Input(shape=(img_rows, img_cols, img_channels))
-
- # 1 conv + BN + relu
- filters = 16
- b = Conv2D(filters=filters, kernel_size=(num_conv, num_conv),
- kernel_initializer="he_normal", padding="same",
- kernel_regularizer=l2(decay), bias_regularizer=l2(0))(input)
- b = BatchNormalization(axis=BN_AXIS)(b)
- b = Activation("relu")(b)
-
- # 1 res, no striding
- b = residual(num_conv, filters, decay, first=True)(b) # 2 layers inside
- for _ in np.arange(1, depth): # start from 1 => 2 * depth in total
- b = residual(num_conv, filters, decay)(b)
-
- filters *= 2
-
- # 2 res, with striding
- b = residual(num_conv, filters, decay, more_filters=True)(b)
- for _ in np.arange(1, depth):
- b = residual(num_conv, filters, decay)(b)
-
- filters *= 2
-
- # 3 res, with striding
- b = residual(num_conv, filters, decay, more_filters=True)(b)
- for _ in np.arange(1, depth):
- b = residual(num_conv, filters, decay)(b)
-
- b = BatchNormalization(axis=BN_AXIS)(b)
- b = Activation("relu")(b)
-
- b = AveragePooling2D(pool_size=(8, 8), strides=(1, 1),
- padding="valid")(b)
-
- out = Flatten(name='lid')(b)
-
- dense = Dense(units=num_classes, kernel_initializer="he_normal",
- kernel_regularizer=l2(decay), bias_regularizer=l2(0))(out)
-
- act = Activation("softmax")(dense)
-
- return Model(inputs=input, outputs=act)
-
-
-def residual(num_conv, filters, decay, more_filters=False, first=False):
- def f(input):
- # in_channel = input._keras_shape[1]
- out_channel = filters
-
- if more_filters and not first:
- # out_channel = in_channel * 2
- stride = 2
- else:
- # out_channel = in_channel
- stride = 1
-
- if not first:
- b = BatchNormalization(axis=BN_AXIS)(input)
- b = Activation("relu")(b)
- else:
- b = input
-
- b = Conv2D(filters=out_channel,
- kernel_size=(num_conv, num_conv),
- strides=(stride, stride),
- kernel_initializer="he_normal", padding="same",
- kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b)
- b = BatchNormalization(axis=BN_AXIS)(b)
- b = Activation("relu")(b)
- res = Conv2D(filters=out_channel,
- kernel_size=(num_conv, num_conv),
- kernel_initializer="he_normal", padding="same",
- kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b)
-
- # check and match number of filter for the shortcut
- input_shape = K.int_shape(input)
- residual_shape = K.int_shape(res)
- if not input_shape[3] == residual_shape[3]:
- stride_width = int(round(input_shape[1] / residual_shape[1]))
- stride_height = int(round(input_shape[2] / residual_shape[2]))
-
- input = Conv2D(filters=residual_shape[3], kernel_size=(1, 1),
- strides=(stride_width, stride_height),
- kernel_initializer="he_normal",
- padding="valid", kernel_regularizer=l2(decay))(input)
-
- return add([input, res])
-
- return f
\ No newline at end of file
diff --git a/script/CIFAR10.slurm b/script/CIFAR10.slurm
new file mode 100644
index 0000000..8fc12ca
--- /dev/null
+++ b/script/CIFAR10.slurm
@@ -0,0 +1,61 @@
+#!/bin/bash
+#SBATCH --nodes 1
+#SBATCH --partition gpgpu
+#SBATCH --gres=gpu:1
+
+# The project ID which this job should run under:
+#SBATCH --account="punim0784"
+
+# Maximum number of tasks/CPU cores used by the job:
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=8
+
+# The amount of memory in megabytes per process in the job:
+#SBATCH --mem=64G
+
+# The maximum running time of the job in days-hours:mins:sec
+#SBATCH --time 96:00:00
+
+# check that the script is launched with sbatch
+if [ "x$SLURM_JOB_ID" == "x" ]; then
+ echo "You need to submit your job to the queuing system with sbatch"
+ exit 1
+fi
+
+
+# Run the job from this directory:
+cd /data/cephfs/punim0784/robust_loss_nips
+
+# The modules to load:
+module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10
+nvidia-smi
+
+exp_name=$1
+seed=$2
+loss=$3
+
+# Sym
+declare -a nr_arr=("0.0"
+ "0.2"
+ "0.4"
+ "0.6"
+ "0.8")
+
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/cifar10/sym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/cifar10/sym/$i --seed $seed --noise_rate $i --config_path configs/cifar10/sym --version ${loss}
+done
+
+# Asym
+declare -a nr_arr=(
+ "0.1"
+ "0.2"
+ "0.3"
+ "0.4"
+ )
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/cifar10/asym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/cifar10/asym/$i --seed $seed --noise_rate $i --config_path configs/cifar10/asym --version ${loss}
+done
diff --git a/script/CIFAR100.slurm b/script/CIFAR100.slurm
new file mode 100644
index 0000000..8b36be6
--- /dev/null
+++ b/script/CIFAR100.slurm
@@ -0,0 +1,63 @@
+#!/bin/bash
+#SBATCH --nodes 1
+#SBATCH --partition gpgpu
+#SBATCH --gres=gpu:1
+
+# The project ID which this job should run under:
+#SBATCH --account="punim0784"
+
+# Maximum number of tasks/CPU cores used by the job:
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=8
+
+# The amount of memory in megabytes per process in the job:
+#SBATCH --mem=64G
+
+# The maximum running time of the job in days-hours:mins:sec
+#SBATCH --time 168:00:00
+
+# check that the script is launched with sbatch
+if [ "x$SLURM_JOB_ID" == "x" ]; then
+ echo "You need to submit your job to the queuing system with sbatch"
+ exit 1
+fi
+
+
+# Run the job from this directory:
+cd /data/cephfs/punim0784/robust_loss_nips
+
+
+# The modules to load:
+module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10
+nvidia-smi
+
+exp_name=$1
+seed=$2
+loss=$3
+
+
+# Sym
+declare -a nr_arr=("0.0"
+ "0.2"
+ "0.4"
+ "0.6"
+ "0.8")
+
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/cifar100/sym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/cifar100/sym/$i --seed $seed --noise_rate $i --config_path configs/cifar100/sym --version ${loss}
+done
+
+# Asym
+declare -a nr_arr=(
+ "0.1"
+ "0.2"
+ "0.3"
+ "0.4"
+ )
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/cifar100/asym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/cifar100/asym/$i --seed $seed --noise_rate $i --config_path configs/cifar100/asym --version ${loss} --asym
+done
diff --git a/script/MNIST.slurm b/script/MNIST.slurm
new file mode 100644
index 0000000..f6d35c8
--- /dev/null
+++ b/script/MNIST.slurm
@@ -0,0 +1,63 @@
+#!/bin/bash
+#SBATCH --nodes 1
+#SBATCH --partition gpgpu
+#SBATCH --gres=gpu:1
+
+# The project ID which this job should run under:
+#SBATCH --account="punim0784"
+
+# Maximum number of tasks/CPU cores used by the job:
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=8
+
+# The amount of memory in megabytes per process in the job:
+#SBATCH --mem=64G
+
+# The maximum running time of the job in days-hours:mins:sec
+#SBATCH --time 48:00:00
+
+# check that the script is launched with sbatch
+if [ "x$SLURM_JOB_ID" == "x" ]; then
+ echo "You need to submit your job to the queuing system with sbatch"
+ exit 1
+fi
+
+
+# Run the job from this directory:
+# cd /data/gpfs/users/hanxunh/robust_loss_nips
+cd /data/cephfs/punim0784/robust_loss_nips
+
+# The modules to load:
+module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10
+nvidia-smi
+
+exp_name=$1
+seed=$2
+loss=$3
+
+
+# Sym
+declare -a nr_arr=("0.0"
+ "0.2"
+ "0.4"
+ "0.6"
+ "0.8")
+
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/mnist/sym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/mnist/sym/$i --seed $seed --noise_rate $i --config_path configs/mnist/sym --version ${loss}
+done
+
+# Asym
+declare -a nr_arr=(
+ "0.1"
+ "0.2"
+ "0.3"
+ "0.4"
+ )
+for i in "${nr_arr[@]}"
+ do
+ rm -rf ${exp_name}/mnist/asym/$i/${loss}/*
+ python3 -u main.py --exp_name ${exp_name}/mnist/asym/$i --seed $seed --noise_rate $i --config_path configs/mnist/asym --version ${loss} --asym
+done
diff --git a/script/WebVisionMini.slurm b/script/WebVisionMini.slurm
new file mode 100644
index 0000000..3f06a08
--- /dev/null
+++ b/script/WebVisionMini.slurm
@@ -0,0 +1,60 @@
+#!/bin/bash
+#SBATCH --nodes 1
+#SBATCH --partition gpgpu
+#SBATCH --gres=gpu:4
+
+# The project ID which this job should run under:
+#SBATCH --account="punim0784"
+
+# Maximum number of tasks/CPU cores used by the job:
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=24
+
+# The amount of memory in megabytes per process in the job:
+#SBATCH --mem=120G
+
+# The maximum running time of the job in days-hours:mins:sec
+#SBATCH --time 168:00:00
+
+# check that the script is launched with sbatch
+if [ "x$SLURM_JOB_ID" == "x" ]; then
+ echo "You need to submit your job to the queuing system with sbatch"
+ exit 1
+fi
+
+
+# Copy Data to local node
+cd /var/local/tmp/
+mkdir -p datasets
+mkdir -p datasets/ILSVR2012
+
+pwd
+echo 'rsync datasets'
+
+rsync -avzh --progress /data/cephfs/punim0784/datasets/google_resized_256.tar datasets/
+rsync -avzh --progress /data/cephfs/punim0784/datasets/webvision_mini_train.txt datasets/
+rsync -avzh --progress /data/cephfs/punim0784/datasets/train_filelist_google.txt datasets/
+
+rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/ILSVRC2012_img_val.tar datasets/ILSVR2012/
+rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/meta.bin datasets/ILSVR2012/
+rsync -avzh --progress /data/cephfs/punim0784/datasets/ILSVR2012/ILSVRC2012_devkit_t12.tar.gz datasets/ILSVR2012/
+
+cd datasets
+pwd
+echo 'untar google_resized_256'
+tar -xvf google_resized_256.tar
+
+# Run the job from this directory:
+cd /data/cephfs/punim0784/robust_loss_nips
+
+# The modules to load:
+module load Python/3.6.4-intel-2017.u2-GCC-6.2.0-CUDA10
+nvidia-smi
+
+exp_name=$1
+seed=$2
+loss=$3
+
+rm -rf ${exp_name}/web_vision_mini/${loss}/*
+rm -rf ${exp_name}/web_vision_mini/${loss}
+python3 -u main.py --data_parallel --exp_name ${exp_name}/webvision_mini/ --seed $seed --config_path configs/webvision_mini --version ${loss}
diff --git a/script/script.sh b/script/script.sh
new file mode 100644
index 0000000..b4e1566
--- /dev/null
+++ b/script/script.sh
@@ -0,0 +1,91 @@
+#
+# # CIFAR100
+# declare -a loss=( "ce"
+# "focal"
+# "gce"
+# "mae"
+# "nce"
+# "nce+mae"
+# "nce+rce"
+# "nfl"
+# "nfl+mae"
+# "nfl+rce"
+# "ngce"
+# "ngce+mae"
+# "ngce+rce"
+# "nlnl"
+# "rce"
+# "sce" )
+#
+# declare -a run_version=(
+# "run1"
+# "run2"
+# "run3"
+# )
+#
+# seed=0
+# for i in "${run_version[@]}"
+# do
+# for j in "${loss[@]}"
+# do
+# job_name=C100_${i}_${j}
+# echo $job_name
+# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR100.slurm $i $seed $j
+# done
+# seed=$((seed+1))
+# done
+
+
+# # WebVision Full
+# declare -a loss=(
+# "ce"
+# "gce"
+# "nce+mae"
+# "nce+rce"
+# "nfl+mae"
+# "nfl+rce"
+# "sce"
+# )
+#
+# declare -a run_version=(
+# "webvision_full"
+# )
+#
+# seed=0
+# for i in "${run_version[@]}"
+# do
+# for j in "${loss[@]}"
+# do
+# job_name=WebVisionFull_${i}_${j}
+# echo $job_name
+# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=12 --gres=gpu:4 WebVisionFull.slurm $i $seed $j
+# done
+# seed=$((seed+1))
+# done
+
+# # WebVision Mini
+# declare -a loss=(
+# "ce"
+# "gce"
+# "nce+mae"
+# "nce+rce"
+# "nfl+mae"
+# "nfl+rce"
+# "sce"
+# )
+#
+# declare -a run_version=(
+# "webvision_mini"
+# )
+#
+# seed=0
+# for i in "${run_version[@]}"
+# do
+# for j in "${loss[@]}"
+# do
+# job_name=WebVisionMini${i}_${j}
+# echo $job_name
+# sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=24 --gres=gpu:4 WebVisionMini.slurm $i $seed $j
+# done
+# seed=$((seed+1))
+# done
diff --git a/script/submit_c10.sh b/script/submit_c10.sh
new file mode 100644
index 0000000..249647a
--- /dev/null
+++ b/script/submit_c10.sh
@@ -0,0 +1,39 @@
+# CIFAR10
+declare -a loss=(
+ "ce"
+ "focal"
+ "gce"
+ "mae"
+ "nce"
+ "nce+mae"
+ "nce+rce"
+ "nfl"
+ "nfl+mae"
+ "nfl+rce"
+ "ngce"
+ "ngce+mae"
+ "ngce+rce"
+ # "nlnl"
+ "rce"
+ "sce"
+ )
+
+declare -a run_version=(
+ "run1"
+ "run2"
+ "run3"
+ )
+
+seed=0
+for i in "${run_version[@]}"
+do
+ for j in "${loss[@]}"
+ do
+ echo C10_${i}_${j}
+ job_name=${j}_C10_${i}
+ # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR10.slurm $i $seed $j
+ sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j
+ # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j
+ done
+ seed=$((seed+1))
+done
diff --git a/script/submit_c100.sh b/script/submit_c100.sh
new file mode 100644
index 0000000..6288727
--- /dev/null
+++ b/script/submit_c100.sh
@@ -0,0 +1,39 @@
+# CIFAR10
+declare -a loss=(
+ "ce"
+ "focal"
+ "gce"
+ "mae"
+ "nce"
+ "nce+mae"
+ "nce+rce"
+ "nfl"
+ "nfl+mae"
+ "nfl+rce"
+ "ngce"
+ "ngce+mae"
+ "ngce+rce"
+ # "nlnl"
+ "rce"
+ "sce"
+ )
+
+declare -a run_version=(
+ "run1"
+ "run2"
+ "run3"
+ )
+
+seed=0
+for i in "${run_version[@]}"
+do
+ for j in "${loss[@]}"
+ do
+ echo C100_${i}_${j}
+ job_name=${j}_C100_${i}
+ # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 CIFAR10.slurm $i $seed $j
+ sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR100.slurm $i $seed $j
+ # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=32G CIFAR10.slurm $i $seed $j
+ done
+ seed=$((seed+1))
+done
diff --git a/script/submit_clothing1m.sh b/script/submit_clothing1m.sh
new file mode 100644
index 0000000..c6b3f37
--- /dev/null
+++ b/script/submit_clothing1m.sh
@@ -0,0 +1,25 @@
+# Clothing1M
+declare -a loss=( "ce"
+ "gce"
+ "nce+mae"
+ "nce+rce"
+ "nfl+mae"
+ "nfl+rce"
+ "sce" )
+
+declare -a run_version=(
+ "clothing1m"
+ )
+
+seed=0
+for i in "${run_version[@]}"
+do
+ for j in "${loss[@]}"
+ do
+ job_name=Clothing1M_${i}_${j}
+ echo $job_name
+ sbatch --partition gpgpu --cpus-per-task=8 --gres=gpu:4 Clothing1M.slurm $i $seed $j
+ # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --cpus-per-task=24 --gres=gpu:4 Clothing1M.slurm $i $seed $j
+ done
+ seed=$((seed+1))
+done
diff --git a/script/submit_mnist.sh b/script/submit_mnist.sh
new file mode 100644
index 0000000..8ec47d1
--- /dev/null
+++ b/script/submit_mnist.sh
@@ -0,0 +1,38 @@
+# MNIST
+declare -a loss=(
+ "ce"
+ "focal"
+ "gce"
+ "mae"
+ "nce"
+ "nce+mae"
+ "nce+rce"
+ "nfl"
+ "nfl+mae"
+ "nfl+rce"
+ "ngce"
+ "ngce+mae"
+ "ngce+rce"
+ # "nlnl"
+ "rce"
+ "sce"
+ )
+
+declare -a run_version=(
+ "run1"
+ "run2"
+ "run3"
+ )
+
+seed=0
+for i in "${run_version[@]}"
+do
+ for j in "${loss[@]}"
+ do
+ job_name=${j}_MNIST_${i}
+ echo $job_name
+ # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:1 MNIST.slurm $i $seed $j
+ sbatch --partition gpgpu --job-name $job_name --cpus-per-task=4 --gres=gpu:1 --mem=16G MNIST.slurm $i $seed $j
+ done
+ seed=$((seed+1))
+done
diff --git a/script/submit_webvision_mini.sh b/script/submit_webvision_mini.sh
new file mode 100644
index 0000000..2c1e55d
--- /dev/null
+++ b/script/submit_webvision_mini.sh
@@ -0,0 +1,28 @@
+# WebVision Mini
+declare -a loss=(
+ "ce"
+ "gce"
+ # "nce+mae"
+ # "nce+rce"
+ # "nfl+mae"
+ # "nfl+rce"
+ "sce"
+ )
+
+declare -a run_version=(
+ "webvision_mini"
+ )
+
+seed=0
+for i in "${run_version[@]}"
+do
+ for j in "${loss[@]}"
+ do
+ job_name=WebVisionMini${i}_${j}
+ echo $job_name
+ sbatch --partition gpgpu --job-name $job_name --cpus-per-task=8 --gres=gpu:4 --mem=96G WebVisionMini.slurm $i $seed $j
+ # sbatch --partition deeplearn --qos gpgpudeeplearn --job-name $job_name --cpus-per-task=8 --gres=gpu:4 --mem=96G WebVisionMini.slurm $i $seed $j
+ # sbatch --partition gpgputest --qos=gpgpuhpcadmingpgpu --job-name $job_name --cpus-per-task=24 --gres=gpu:4 WebVisionMini.slurm $i $seed $j
+ done
+ seed=$((seed+1))
+done
diff --git a/train_models.py b/train_models.py
deleted file mode 100644
index a7165e1..0000000
--- a/train_models.py
+++ /dev/null
@@ -1,199 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-
-import os
-import keras.backend as K
-import argparse
-
-from keras.preprocessing.image import ImageDataGenerator
-from keras.optimizers import SGD
-from keras.callbacks import ModelCheckpoint
-
-from util import get_lr_scheduler, uniform_noise_model_P
-from datasets import get_data, validatation_split
-from models import get_model
-from loss import cross_entropy, boot_soft, boot_hard, forward, backward, lid_paced_loss
-from callback_util import D2LCallback, LoggerCallback
-
-D2L = {'mnist': {'init_epoch': 5, 'epoch_win': 5}, 'svhn': {'init_epoch': 20, 'epoch_win': 5},
- 'cifar-10': {'init_epoch': 40, 'epoch_win': 5}, 'cifar-100': {'init_epoch': 60, 'epoch_win': 5}}
-
-# prepare folders
-folders = ['data', 'model', 'log']
-for folder in folders:
- path = os.path.join('./', folder)
- if not os.path.exists(path):
- os.makedirs(path)
-
-def train(dataset='mnist', model_name='d2l', batch_size=128, epochs=50, noise_ratio=0):
- """
- Train one model with data augmentation: random padding+cropping and horizontal flip
- :param dataset:
- :param model_name:
- :param batch_size:
- :param epochs:
- :param noise_ratio:
- :return:
- """
- print('Dataset: %s, model: %s, batch: %s, epochs: %s, noise ratio: %s%%' %
- (dataset, model_name, batch_size, epochs, noise_ratio))
-
- # load data
- X_train, y_train, X_test, y_test = get_data(dataset, noise_ratio, random_shuffle=True)
- # X_train, y_train, X_val, y_val = validatation_split(X_train, y_train, split=0.1)
- n_images = X_train.shape[0]
- image_shape = X_train.shape[1:]
- num_classes = y_train.shape[1]
- print("n_images", n_images, "num_classes", num_classes, "image_shape:", image_shape)
-
- # load model
- model = get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=num_classes)
- # model.summary()
-
- if dataset == 'cifar-100':
- optimizer = SGD(lr=0.1, decay=5e-3, momentum=0.9)
- else:
- optimizer = SGD(lr=0.1, decay=1e-4, momentum=0.9)
-
- # for backward, forward loss
- # suppose the model knows noise ratio
- P = uniform_noise_model_P(num_classes, noise_ratio/100.)
- # create loss
- if model_name == 'forward':
- P = uniform_noise_model_P(num_classes, noise_ratio / 100.)
- loss = forward(P)
- elif model_name == 'backward':
- P = uniform_noise_model_P(num_classes, noise_ratio / 100.)
- loss = backward(P)
- elif model_name == 'boot_hard':
- loss = boot_hard
- elif model_name == 'boot_soft':
- loss = boot_soft
- elif model_name == 'd2l':
- if dataset == 'cifar-100':
- loss = lid_paced_loss(beta1=6.0, beta2=0.1)
- else:
- loss = lid_paced_loss(beta1=0.1, beta2=1.0)
- else:
- loss = cross_entropy
-
- # model
- model.compile(
- loss=loss,
- optimizer=optimizer,
- metrics=['accuracy']
- )
-
- ## do real-time updates using callbakcs
- callbacks = []
- if model_name == 'd2l':
- init_epoch = D2L[dataset]['init_epoch']
- epoch_win = D2L[dataset]['epoch_win']
- d2l_learning = D2LCallback(model, X_train, y_train,
- dataset, noise_ratio,
- epochs=epochs,
- pace_type=model_name,
- init_epoch=init_epoch,
- epoch_win=epoch_win)
-
- callbacks.append(d2l_learning)
-
- cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio),
- monitor='val_loss',
- verbose=0,
- save_best_only=False,
- save_weights_only=True,
- period=1)
- callbacks.append(cp_callback)
-
- else:
- cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio),
- monitor='val_loss',
- verbose=0,
- save_best_only=False,
- save_weights_only=True,
- period=epochs)
- callbacks.append(cp_callback)
-
- # learning rate scheduler if use sgd
- lr_scheduler = get_lr_scheduler(dataset)
- callbacks.append(lr_scheduler)
-
- # acc, loss, lid
- log_callback = LoggerCallback(model, X_train, y_train, X_test, y_test, dataset,
- model_name, noise_ratio, epochs)
- callbacks.append(log_callback)
-
- # data augmentation
- if dataset in ['mnist', 'svhn']:
- datagen = ImageDataGenerator()
- elif dataset in ['cifar-10']:
- datagen = ImageDataGenerator(
- width_shift_range=0.2,
- height_shift_range=0.2,
- horizontal_flip=True)
- else:
- datagen = ImageDataGenerator(
- rotation_range=20,
- width_shift_range=0.2,
- height_shift_range=0.2,
- horizontal_flip=True)
- datagen.fit(X_train)
-
- # train model
- model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
- steps_per_epoch=len(X_train) / batch_size, epochs=epochs,
- validation_data=(X_test, y_test),
- verbose=1,
- callbacks=callbacks
- )
-
-def main(args):
- assert args.dataset in ['mnist', 'svhn', 'cifar-10', 'cifar-100'], \
- "dataset parameter must be either 'mnist', 'svhn', 'cifar-10', 'cifar-100'"
- assert args.model_name in ['ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'], \
- "dataset parameter must be either 'ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'"
- train(args.dataset, args.model_name, args.batch_size, args.epochs, args.noise_ratio)
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '-d', '--dataset',
- help="Dataset to use; either 'mnist', 'svhn', 'cifar-10', 'cifar-100'",
- required=True, type=str
- )
- parser.add_argument(
- '-m', '--model_name',
- help="Model name: 'ce', 'forward', 'backward', 'boot_hard', 'boot_soft', 'd2l'.",
- required=True, type=str
- )
- parser.add_argument(
- '-e', '--epochs',
- help="The number of epochs to train for.",
- required=False, type=int
- )
- parser.add_argument(
- '-b', '--batch_size',
- help="The batch size to use for training.",
- required=False, type=int
- )
- parser.add_argument(
- '-r', '--noise_ratio',
- help="The percentage of noisy labels [0, 100].",
- required=False, type=int
- )
- parser.set_defaults(epochs=150)
- parser.set_defaults(batch_size=128)
- parser.set_defaults(noise_ratio=0)
-
- os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-# args = parser.parse_args()
-# main(args)
-
- args = parser.parse_args(['-d', 'cifar-10', '-m', 'd2l',
- '-e', '120', '-b', '128',
- '-r', '60'])
- main(args)
-
- K.clear_session()
diff --git a/trainer.py b/trainer.py
new file mode 100644
index 0000000..322d843
--- /dev/null
+++ b/trainer.py
@@ -0,0 +1,83 @@
+import time
+import torch
+import os
+from util import log_display, accuracy, AverageMeter
+
+if torch.cuda.is_available():
+ torch.backends.cudnn.enabled = True
+ torch.backends.cudnn.benchmark = True
+ torch.backends.cudnn.deterministic = True
+ device = torch.device('cuda')
+else:
+ device = torch.device('cpu')
+
+
+class Trainer():
+ def __init__(self, data_loader, logger, config, name='Trainer', metrics='classfication'):
+ self.data_loader = data_loader
+ self.logger = logger
+ self.name = name
+ self.step = 0
+ self.config = config
+ self.log_frequency = config.log_frequency
+ self.loss_meters = AverageMeter()
+ self.acc_meters = AverageMeter()
+ self.acc5_meters = AverageMeter()
+ self.report_metrics = self.classfication_metrics if metrics == 'classfication' else self.regression_metrics
+
+ def train(self, epoch, GLOBAL_STEP, model, optimizer, criterion):
+ model.train()
+ for images, labels in self.data_loader:
+ images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
+ self.train_batch(images, labels, model, criterion, optimizer)
+ self.log(epoch, GLOBAL_STEP)
+ GLOBAL_STEP += 1
+ return GLOBAL_STEP
+
+ def train_batch(self, x, y, model, criterion, optimizer):
+ start = time.time()
+ model.zero_grad()
+ optimizer.zero_grad()
+ pred, _ = model(x)
+ loss = criterion(pred, y)
+ loss.backward()
+ grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.grad_bound)
+ optimizer.step()
+ self.report_metrics(pred, y, loss)
+ self.logger_payload['lr'] = optimizer.param_groups[0]['lr'],
+ self.logger_payload['|gn|'] = grad_norm
+ end = time.time()
+ self.step += 1
+ self.time_used = end - start
+
+ def log(self, epoch, GLOBAL_STEP):
+ if GLOBAL_STEP % self.log_frequency == 0:
+ display = log_display(epoch=epoch,
+ global_step=GLOBAL_STEP,
+ time_elapse=self.time_used,
+ **self.logger_payload)
+ self.logger.info(display)
+
+ def classfication_metrics(self, x, y, loss):
+ acc, acc5 = accuracy(x, y, topk=(1, 5))
+ self.loss_meters.update(loss.item(), y.shape[0])
+ self.acc_meters.update(acc.item(), y.shape[0])
+ self.acc5_meters.update(acc5.item(), y.shape[0])
+ self.logger_payload = {"acc": acc,
+ "acc_avg": self.acc_meters.avg,
+ "loss": loss,
+ "loss_avg": self.loss_meters.avg}
+
+ def regression_metrics(self, x, y, loss):
+ diff = abs((x - y).mean().detach().item())
+ self.loss_meters.update(loss.item(), y.shape[0])
+ self.acc_meters.update(diff, y.shape[0])
+ self.logger_payload = {"|diff|": diff,
+ "|diff_avg|": self.acc_meters.avg,
+ "loss": loss,
+ "loss_avg": self.loss_meters.avg}
+
+ def _reset_stats(self):
+ self.loss_meters.reset()
+ self.acc_meters.reset()
+ self.acc5_meters.reset()
diff --git a/util.py b/util.py
index 4ed9a45..c537396 100644
--- a/util.py
+++ b/util.py
@@ -1,245 +1,147 @@
-from __future__ import absolute_import
-from __future__ import print_function
-
+import logging
import os
-import multiprocessing as mp
-from subprocess import call
-import warnings
+import torch
import numpy as np
-from numpy.testing import assert_array_almost_equal
-from sklearn.preprocessing import MinMaxScaler
-import keras.backend as K
-from scipy.spatial.distance import pdist, cdist, squareform
-from keras.callbacks import ModelCheckpoint, Callback
-from keras.callbacks import LearningRateScheduler
-import tensorflow as tf
-
-# Set random seed
-np.random.seed(123)
-
-
-def lid(logits, k=20):
- """
- Calculate LID for a minibatch of training samples based on the outputs of the network.
-
- :param logits:
- :param k:
- :return:
- """
- epsilon = 1e-12
- batch_size = tf.shape(logits)[0]
- # n_samples = logits.get_shape().as_list()
- # calculate pairwise distance
- r = tf.reduce_sum(logits * logits, 1)
- # turn r into column vector
- r1 = tf.reshape(r, [-1, 1])
- D = r1 - 2 * tf.matmul(logits, tf.transpose(logits)) + tf.transpose(r1) + \
- tf.ones([batch_size, batch_size])
-
- # find the k nearest neighbor
- D1 = -tf.sqrt(D)
- D2, _ = tf.nn.top_k(D1, k=k, sorted=True)
- D3 = -D2[:, 1:] # skip the x-to-x distance 0 by using [,1:]
-
- m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1]))
- v_log = tf.reduce_sum(tf.log(m + epsilon), axis=1) # to avoid nan
- lids = -k / v_log
- return lids
-
-
-def mle_single(data, x, k):
- """
- lid of a single query point x.
- numpy implementation.
-
- :param data:
- :param x:
- :param k:
- :return:
- """
- data = np.asarray(data, dtype=np.float32)
- x = np.asarray(x, dtype=np.float32)
- if x.ndim == 1:
- x = x.reshape((-1, x.shape[0]))
- # dim = x.shape[1]
-
- k = min(k, len(data) - 1)
- f = lambda v: - k / np.sum(np.log(v / v[-1] + 1e-8))
- a = cdist(x, data)
- a = np.apply_along_axis(np.sort, axis=1, arr=a)[:, 1:k + 1]
- a = np.apply_along_axis(f, axis=1, arr=a)
- return a[0]
-
-
-def mle_batch(data, batch, k):
- """
- lid of a batch of query points X.
- numpy implementation.
-
- :param data:
- :param batch:
- :param k:
- :return:
- """
- data = np.asarray(data, dtype=np.float32)
- batch = np.asarray(batch, dtype=np.float32)
-
- k = min(k, len(data) - 1)
- f = lambda v: - k / np.sum(np.log(v / v[-1] + 1e-8))
- a = cdist(batch, data)
- a = np.apply_along_axis(np.sort, axis=1, arr=a)[:, 1:k + 1]
- a = np.apply_along_axis(f, axis=1, arr=a)
- return a
-
-
-def other_class(n_classes, current_class):
- """
- Returns a list of class indices excluding the class indexed by class_ind
- :param nb_classes: number of classes in the task
- :param class_ind: the class index to be omitted
- :return: one random class that != class_ind
- """
- if current_class < 0 or current_class >= n_classes:
- error_str = "class_ind must be within the range (0, nb_classes - 1)"
- raise ValueError(error_str)
-
- other_class_list = list(range(n_classes))
- other_class_list.remove(current_class)
- other_class = np.random.choice(other_class_list)
- return other_class
-
-
-def get_lids_random_batch(model, X, k=20, batch_size=128):
+import torch.nn.functional as F
+from lid import lid_mle
+from lass import lass
+
+class AverageMeter(object):
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.avg = 0
+ self.sum = 0
+ self.cnt = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.cnt += n
+ self.avg = self.sum / self.cnt
+
+
+def accuracy(output, target, topk=(1,)):
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].contiguous().view(-1).float().sum(0)
+ res.append(correct_k.mul_(1.0/batch_size))
+ return res
+
+
+def log_display(epoch, global_step, time_elapse, **kwargs):
+ display = 'epoch=' + str(epoch) + \
+ '\tglobal_step=' + str(global_step)
+ for key, value in kwargs.items():
+ display += '\t' + str(key) + '=%.5f' % value
+ display += '\ttime=%.2fit/s' % (1. / time_elapse)
+ return display
+
+
+def chunks(l, n):
+ """Yield successive n-sized chunks from l."""
+ for i in range(0, len(l), n):
+ yield l[i:i + n]
+
+
+def setup_logger(name, log_file, level=logging.INFO):
+ """To setup as many loggers as you want"""
+ formatter = logging.Formatter('%(asctime)s %(message)s')
+ console_handler = logging.StreamHandler()
+ console_handler.setFormatter(formatter)
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setFormatter(formatter)
+ logger = logging.getLogger(name)
+ logger.setLevel(level)
+ logger.addHandler(file_handler)
+ logger.addHandler(console_handler)
+ return logger
+
+
+def build_dirs(path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+ return
+
+
+def save_model(filename, model, optimizer, scheduler, epoch, **kwargs):
+ # Torch Save State Dict
+ state = {
+ 'epoch': epoch+1,
+ 'model': model.state_dict() if model is not None else None,
+ 'optimizer': optimizer.state_dict() if optimizer is not None else None,
+ 'scheduler': scheduler.state_dict() if scheduler is not None else None,
+ }
+ for key, value in kwargs.items():
+ state[key] = value
+ torch.save(state, filename+'.pth')
+ return
+
+
+def load_model(filename, model, optimizer, scheduler, **kwargs):
+ checkpoints = torch.load(filename + '.pth')
+ if model is not None and checkpoints['model'] is not None:
+ model.load_state_dict(checkpoints['model'])
+ if optimizer is not None and checkpoints['optimizer'] is not None:
+ optimizer.load_state_dict(checkpoints['optimizer'])
+ if scheduler is not None and checkpoints['scheduler'] is not None:
+ scheduler.load_state_dict(checkpoints['scheduler'])
+ print("%s Loaded!" % (filename))
+ return checkpoints
+
+
+def count_parameters_in_MB(model):
+ return sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary_head" not in name)/1e6
+
+
+def get_lids_random_batch(model, data_loader, device, k=20, batch_size=128, batch_num=10):
"""
Get the local intrinsic dimensionality of each Xi in X_adv
estimated by k close neighbours in the random batch it lies in.
- :param model: if None: lid of raw inputs, otherwise LID of deep representations
- :param X: normal images
- :param k: the number of nearest neighbours for LID estimation
- :param batch_size: default 100
- :return: lids: LID of normal images of shape (num_examples, lid_dim)
- lids_adv: LID of advs images of shape (num_examples, lid_dim)
"""
- if model is None:
- lids = []
- n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
- for i_batch in range(n_batches):
- start = i_batch * batch_size
- end = np.minimum(len(X), (i_batch + 1) * batch_size)
- X_batch = X[start:end].reshape((end - start, -1))
-
- # Maximum likelihood estimation of local intrinsic dimensionality (LID)
- lid_batch = mle_batch(X_batch, X_batch, k=k)
- lids.extend(lid_batch)
-
- lids = np.asarray(lids, dtype=np.float32)
- return lids
-
- # get deep representations
- funcs = [K.function([model.layers[0].input, K.learning_phase()], [out])
- for out in [model.get_layer("lid").output]]
- lid_dim = len(funcs)
-
- # print("Number of layers to estimate: ", lid_dim)
-
- def estimate(i_batch):
- start = i_batch * batch_size
- end = np.minimum(len(X), (i_batch + 1) * batch_size)
- n_feed = end - start
- lid_batch = np.zeros(shape=(n_feed, lid_dim))
- for i, func in enumerate(funcs):
- X_act = func([X[start:end], 0])[0]
- X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1))
-
- # Maximum likelihood estimation of local intrinsic dimensionality (LID)
- lid_batch[:, i] = mle_batch(X_act, X_act, k=k)
-
- return lid_batch
lids = []
- n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
- for i_batch in range(n_batches):
- lid_batch = estimate(i_batch)
- lids.extend(lid_batch)
+ model.eval()
+
+ def estimate(images):
+ images = images.to(device, non_blocking = True)
+ #get the output of the second-to-last layer of the network
+ with torch.no_grad():
+ _, X_act = model(images)
+
+ lid_batch = lid_mle(X_act, X_act, k=k)
+ return lid_batch
- lids = np.asarray(lids, dtype=np.float32)
+
+ for j, (images,labels) in enumerate(data_loader['train_dataset']):
+ if j < batch_num:
+ lid_batch = estimate(images)
+ lids.extend(lid_batch)
+ lids = torch.stack(lids, dim=0).type(torch.float32)
return lids
-
-def get_lr_scheduler(dataset):
- """
- customerized learning rate decay for training with clean labels.
- For efficientcy purpose we use large lr for noisy data.
- :param dataset:
- :param noise_ratio:
- :return:
- """
- if dataset in ['mnist', 'svhn']:
- def scheduler(epoch):
- if epoch > 40:
- return 0.001
- elif epoch > 20:
- return 0.01
- else:
- return 0.1
-
- return LearningRateScheduler(scheduler)
- elif dataset in ['cifar-10']:
- def scheduler(epoch):
- if epoch > 80:
- return 0.001
- elif epoch > 40:
- return 0.01
- else:
- return 0.1
-
- return LearningRateScheduler(scheduler)
- elif dataset in ['cifar-100']:
- def scheduler(epoch):
- if epoch > 120:
- return 0.001
- elif epoch > 80:
- return 0.01
- else:
- return 0.1
-
- return LearningRateScheduler(scheduler)
-
-
-def uniform_noise_model_P(num_classes, noise):
- """ The noise matrix flips any class to any other with probability
- noise / (num_classes - 1).
- """
-
- assert (noise >= 0.) and (noise <= 1.)
-
- P = noise / (num_classes - 1) * np.ones((num_classes, num_classes))
- np.fill_diagonal(P, (1 - noise) * np.ones(num_classes))
-
- assert_array_almost_equal(P.sum(axis=1), 1, 1)
- return P
-
-
-def get_deep_representations(model, X, batch_size=128):
- """
- Get the deep representations before logits.
- :param model:
- :param X:
- :param batch_size:
- :return:
- """
- # last hidden layer is always at index -4
- output_dim = model.layers[-3].output.shape[-1].value
- get_encoding = K.function(
- [model.layers[0].input, K.learning_phase()],
- [model.layers[-3].output]
- )
-
- n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
- output = np.zeros(shape=(len(X), output_dim))
- for i in range(n_batches):
- output[i * batch_size:(i + 1) * batch_size] = \
- get_encoding([X[i * batch_size:(i + 1) * batch_size], 0])[0]
-
- return output
+def get_csr_random_batch(model, data_loader, device, batch_size=128, batch_num=4):
+ model.eval()
+ adv_ind_sum = 0
+ for j, (images,labels) in enumerate(data_loader['test_dataset']):
+ if j < batch_num:
+ images = images.to(device, non_blocking = True)
+ scale_factor = 255. / (torch.max(images) - torch.min(images))
+ #scale_factor = 1
+ csr_model = lass(model, device, a=0.25 / scale_factor, b=0.2 / scale_factor, r=0.3 / scale_factor, iter_max=100)
+ X_adv, adv_ind = csr_model.find(images)
+ adv_ind_sum += torch.sum(adv_ind)
+
+ samples_num = batch_num * batch_size
+ csr = adv_ind_sum * 1. / samples_num
+ return csr
+
\ No newline at end of file