Skip to content
Snippets Groups Projects
Commit 9352f90c authored by Raul Marichal's avatar Raul Marichal
Browse files

Subo los avances en el estudio de estimadores, no solo nas_wot

parent 31dd805e
No related branches found
No related tags found
No related merge requests found
Showing
with 283 additions and 16 deletions
......@@ -4,7 +4,9 @@ MASTER_MODEL_FILENAME_TEMPLATE = 'iter_{}_best_model.pth.tar'
# Worker-related filenames.
WORKER_MODEL_FILENAME_TEMPLATE = 'iter_{}_block_{}_model.pth.tar'
WORKER_ACCURACY_FILENAME_TEMPLATE = 'iter_{}_block_{}_accuracy.txt'
WORKER_TEST_ACCURACY_FILENAME_TEMPLATE = 'iter_{}_block_{}_test_accuracy.txt'
WORKER_RESOURCE_FILENAME_TEMPLATE = 'iter_{}_block_{}_resource.txt'
WORKER_LOG_FILENAME_TEMPLATE = 'iter_{}_block_{}_log.txt'
WORKER_FINISH_FILENAME_TEMPLATE = 'iter_{}_block_{}_finish.signal'
WORKER_SCORE_FILENAME_TEMPLATE = 'iter_{}_block_{}_score.txt'
WORKER_TIMES_FILENAME_TEMPLATE = 'iter_{}_block_{}_times.txt'
\ No newline at end of file
# Configuration file for netAdapt.
DATASET_TO_USE = "cifar10"
IMAGENET_PATH = "~/datasets/imagenet"
GLOBAL_BS = 8 # Mobilenet
GLOBAL_BS = 64 # Mobilenet
GLOBAL_BS = 16 # VGG
# GLOBAL_BS = 128 # Alexnet
CUDA_VISIBLE_DEVICES=0,1,2 nohup bash -c 'time python master.py /tmp/rmarichal/hcl-netadapt/nas-wot/mobilenet/ 3 224 224 -im ../../netadapt/models/mobilenet/model-trained-5-epochs.pth.tar -gp 1 -bur 0.50 -rt LATENCY -irr 0.025 -rd 0.96 -lr 0.001 -st 10 -lt ~/hcl-ai/na/latency_lut/lut_mobilenet_xavier.pkl -dp data/ --arch mobilenet --sc nas_wot & ' > mobilnet-nas-wot.out
arch="alexnet"
irr="0.025"
score="nas_wot"
# score="latency"
# score="accuracy"
CUDA_VISIBLE_DEVICES=0,1 nohup bash -c "time python master.py /tmp/rmarichal/hcl-netadapt/$score/$irr/$arch/ 3 224 224 -im models/$arch/$arch-cifar10.pth.tar -gp 0 1 -mi 500 -bur 0.50 -rt LATENCY -irr $irr -rd 0.96 -lr 0.001 -st 100 -lt ~/hcl-ai/hcl-netadapt/latency_lut/lut_$arch\_xavier.pkl -dp data/ --arch $arch --sc $score & " >> $arch-$score-$irr.out
# CUDA_VISIBLE_DEVICES=0,1 nohup bash -c "time python master.py /tmp/rmarichal/hcl-netadapt/$score-con-accu/$irr/$arch/ 3 224 224 -im models/$arch/$arch-cifar10.pth.tar -gp 0 1 -mi 500 -bur 0.50 -rt LATENCY -irr $irr -rd 0.96 -lr 0.001 -st 100 -lt ~/hcl-ai/hcl-netadapt/latency_lut/lut_$arch\_xavier.pkl -dp data/ --arch $arch --sc $score & " > $arch-$score.out
# CUDA_VISIBLE_DEVICES=0,1 nohup bash -c "time python master.py /tmp/rmarichal/hcl-netadapt/$score-con-accu-untrained-new-ratio-2/$irr/$arch/ 3 224 224 -im models/$arch/$arch-cifar10-untrained.pth.tar -gp 0 1 -mi 500 -bur 0.50 -rt LATENCY -irr $irr -rd 0.96 -lr 0.001 -st 100 -lt ~/hcl-ai/hcl-netadapt/latency_lut/lut_$arch\_xavier.pkl -dp data/ --arch $arch --sc $score & " > $arch-$score.out
# CUDA_VISIBLE_DEVICES=0,1 nohup bash -c "time python master.py /tmp/rmarichal/hcl-netadapt/-con-accu/$irr/$arch/ 3 224 224 -im models/$arch/$arch-cifar10.pth.tar -gp 0 1 -mi 500 -bur 0.50 -rt LATENCY -irr $irr -rd 0.96 -lr 0.001 -st 100 -lt ~/hcl-ai/hcl-netadapt/latency_lut/lut_$arch\_xavier.pkl -dp data/ --arch $arch & " > $arch-.out
from argparse import ArgumentParser
import os
import time
import math
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.backends.cudnn as cudnn
import pickle
import nets as models
import functions as fns
_NUM_CLASSES = 100
model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
and callable(models.__dict__[name]))
def compute_topk_accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
def compute_accuracy(output, target):
output = output.argmax(dim=1)
acc = 0.0
acc = torch.sum(target == output).item()
acc = acc/output.size(0)*100
return acc
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def get_avg(self):
return self.avg
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def eval(test_loader, model, args):
batch_time = AverageMeter()
acc = AverageMeter()
# switch to eval mode
model.eval()
end = time.time()
for i, (images, target) in enumerate(test_loader):
if not args.no_cuda:
images = images.cuda()
target = target.cuda()
output = model(images)
batch_acc = compute_accuracy(output, target)
acc.update(batch_acc, images.size(0))
batch_time.update(time.time() - end)
end = time.time()
# Update statistics
estimated_time_remained = batch_time.get_avg()*(len(test_loader)-i-1)
fns.update_progress(i, len(test_loader),
ESA='{:8.2f}'.format(estimated_time_remained)+'s',
acc='{:4.2f}'.format(float(batch_acc))
)
print()
print('Test accuracy: {:4.2f}% (time = {:8.2f}s)'.format(
float(acc.get_avg()), batch_time.get_avg()*len(test_loader)))
print('===================================================================')
return float(acc.get_avg())
if __name__ == '__main__':
# Parse the input arguments.
arg_parser = ArgumentParser()
arg_parser.add_argument('data', metavar='DIR', help='path to dataset')
arg_parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
arg_parser.add_argument('-a', '--arch', metavar='ARCH', default='alexnet',
choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: alexnet)')
arg_parser.add_argument('-b', '--batch-size', default=128, type=int,
metavar='N',
help='batch size (default: 128)')
arg_parser.add_argument('--dir', type=str, default='models/', dest='save_dir',
help='path to save models (default: models/')
arg_parser.add_argument('--no-cuda', action='store_true', default=False, dest='no_cuda',
help='disables training on GPU')
args = arg_parser.parse_args()
print(args)
# Data loader
test_dataset = datasets.CIFAR100(root=args.data, train=False, download=True,
transform=transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
]))
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True)
# Network
model_arch = args.arch
cudnn.benchmark = True
num_classes = _NUM_CLASSES
model = models.__dict__[model_arch](num_classes=num_classes)
if not args.no_cuda:
model = model.cuda()
# Evaluation
filename = os.path.join(args.save_dir)
model = torch.load(filename)
print(model)
best_acc = eval(test_loader, model, args)
print('Testing accuracy:', best_acc)
......@@ -6,6 +6,7 @@ import pickle
import numpy as np
import warnings
from scipy.interpolate import Rbf
import os
from collections import OrderedDict
from constants import *
......@@ -300,7 +301,32 @@ def measure_latency(model, input_data_shape, runtimes=500):
return total_time/float(runtimes)
def compute_latency_from_lookup_table(network_def, lookup_table_path):
def compute_interpolation_per_layer(network_def, lookup_table_path, file_reuse=False):
if file_reuse:
lookup_table = lookup_table_path
else:
with open(lookup_table_path, 'rb') as file_id:
lookup_table = pickle.load(file_id)
interpolation_per_layer = {}
for layer_name in network_def.keys():
feature_samples = np.array(list(lookup_table[layer_name][KEY_LATENCY].keys()))
feature_samples_in = feature_samples[:, 0]
feature_samples_out = feature_samples[:, 1]
measurement = np.array(list(lookup_table[layer_name][KEY_LATENCY].values()))
assert feature_samples_in.shape == feature_samples_out.shape
assert feature_samples_in.shape == measurement.shape
feature_samples_in = feature_samples_in[::2]
feature_samples_out = feature_samples_out[::2]
measurement = measurement[::2]
print(layer_name, " " , feature_samples_in.shape)
rbf = Rbf(feature_samples_in, feature_samples_out, \
measurement, function='cubic')
interpolation_per_layer[layer_name] = rbf
return interpolation_per_layer
def compute_latency_from_lookup_table(network_def, lookup_table_path, interpolated_lut_path=None, file_reuse=False):
'''
Compute the latency of all layers defined in `network_def` (only including Conv and FC).
......@@ -314,6 +340,10 @@ def compute_latency_from_lookup_table(network_def, lookup_table_path):
`latency`: (float) latency
'''
latency = .0
if file_reuse:
lookup_table = lookup_table_path
# print('Reusa lut')
else:
with open(lookup_table_path, 'rb') as file_id:
lookup_table = pickle.load(file_id)
for layer_name, layer_properties in network_def.items():
......@@ -325,18 +355,32 @@ def compute_latency_from_lookup_table(network_def, lookup_table_path):
if (num_in_channels, num_out_channels) in lookup_table[layer_name][KEY_LATENCY].keys():
latency += lookup_table[layer_name][KEY_LATENCY][(num_in_channels, num_out_channels)]
else:
print('usa interpolacion')
# Not found in the lookup table, then interpolate the latency
if file_reuse:
interpolated_lut = interpolated_lut_path
# print('Reusa int')
else:
directory = os.path.dirname(lookup_table_path)
filename = os.path.basename(lookup_table_path)
interpolated_lut_filename = 'interpolated_' + filename
interpolated_lut_path = os.path.join(directory,interpolated_lut_filename)
if os.path.exists(interpolated_lut_path):
# if interpolated_lut_path != None:
with open(interpolated_lut_path,'rb') as file_id:
interpolated_lut = pickle.load(file_id)
else:
feature_samples = np.array(list(lookup_table[layer_name][KEY_LATENCY].keys()))
feature_samples_in = feature_samples[:, 0]
feature_samples_out = feature_samples[:, 1]
measurement = np.array(list(lookup_table[layer_name][KEY_LATENCY].values()))
assert feature_samples_in.shape == feature_samples_out.shape
assert feature_samples_in.shape == measurement.shape
rbf = Rbf(feature_samples_in, feature_samples_out, \
interpolated_lut = Rbf(feature_samples_in, feature_samples_out, \
measurement, function='cubic')
num_in_channels = np.array([num_in_channels])
num_out_channels = np.array([num_out_channels])
estimated_latency = rbf(num_in_channels, num_out_channels)
estimated_latency = interpolated_lut[layer_name](num_in_channels, num_out_channels)
latency += estimated_latency[0]
return latency
......
import torch
import torch.nn as nn
import numpy as np
class DimensionAndSumRecorder:
def __init__(self):
self.dimensions = []
self.sum_of_flattened = 0
def __call__(self, module, input, output):
self.dimensions.append(output.shape)
self.sum_of_flattened += np.prod(output.shape)
def count_relu_layers_and_record_dimensions_and_sum(model, input_tensor):
relu_count = 0
recorder = DimensionAndSumRecorder()
for module in model.modules():
if isinstance(module, nn.ReLU):
module.register_forward_hook(recorder)
relu_count += 1
# Run a forward pass to record dimensions and compute sum of flattened outputs
with torch.no_grad():
model(input_tensor)
return relu_count, recorder.dimensions, recorder.sum_of_flattened
# Example usage:
if __name__ == "__main__":
# Instantiate the model
current_model_path = 'models/mobilenet/mobilenet-cifar100.pth.tar'
model = torch.load(current_model_path)
# Create a dummy input tensor with appropriate dimensions
input_tensor = torch.randn(1, 3, 224, 224) # Adjust based on your model's expected input size
input_tensor = input_tensor.cuda()
# Count ReLU layers, record their output dimensions, and compute sum of flattened outputs
relu_count, relu_dimensions, relu_flattened_sum = count_relu_layers_and_record_dimensions_and_sum(model, input_tensor)
print(f"The model has {relu_count} ReLU layers.")
suma = 0
for i, dims in enumerate(relu_dimensions):
print(f"ReLU layer {i+1} output dimensions: {np.prod(dims)}")
suma += np.prod(dims)
print(f"The sum of the flattened outputs of all ReLU layers is: {relu_flattened_sum} y {suma}")
def generate_interpolated_lut(network_def, lookup_table_path):
if lookup_table_path != None:
directory = os.path.dirname(lookup_table_path)
filename = os.path.basename(lookup_table_path)
interpolated_lut_filename = 'interpolated_' + filename
interpolated_lut_path = os.path.join(directory,interpolated_lut_filename)
interpolated_lut = fns.compute_interpolation_per_layer(network_def,lookup_table_path)
with open(interpolated_lut_path, 'wb') as file_id:
pickle.dump(interpolated_lut,file_id)
del interpolated_lut
File added
File added
File added
File added
File added
File added
../../../hcl-ai/na/latency_lut/lut_alexnet_xavier_0.pkl
\ No newline at end of file
../../../hcl-ai/na/latency_lut/lut_alexnet_100_xavier_3_bs-32.pkl
\ No newline at end of file
../../../hcl-ai/na/latency_lut/lut_mobilenet_xavier.pkl
\ No newline at end of file
../../../hcl-ai/na/latency_lut/lut_mobilenet_100_xavier_3_bs-32.pkl
\ No newline at end of file
File added
../../na/latency_lut/lut_vgg_xavier_0_bs-32.pkl
\ No newline at end of file
lut_vggin_100_xavier_3_bs_16.pkl
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment