Commit 37024f66 authored by Quan's avatar Quan

Add code for encode,decode and model

parent c35334a1
Face-Detector-1MB-with-landmark/
Subproject commit 2b075657aef954b9426f938ac7fce100b6910fe6
...@@ -79,7 +79,7 @@ class FaceDataset(data.Dataset): ...@@ -79,7 +79,7 @@ class FaceDataset(data.Dataset):
landms = target[:, 4:14] landms = target[:, 4:14]
# TODO write landms to target_transforms # TODO write landms to target_transforms
if self.target_transform: if self.target_transform:
boxes, labels = self.target_transform(boxes, labels) boxes,landms, labels = self.target_transform(boxes,landms,labels)
return torch.from_numpy(img), target return torch.from_numpy(img), target
@staticmethod @staticmethod
......
...@@ -45,9 +45,16 @@ def create_rfb_tiny_mb_ssd(num_classes, is_test=False, device="cuda"): ...@@ -45,9 +45,16 @@ def create_rfb_tiny_mb_ssd(num_classes, is_test=False, device="cuda"):
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=6 * num_classes, kernel_size=3, padding=1),
Conv2d(in_channels=base_net.base_channel * 16, out_channels=9 * num_classes, kernel_size=3, padding=1) Conv2d(in_channels=base_net.base_channel * 16, out_channels=9 * num_classes, kernel_size=3, padding=1)
]) ])
landmark_headers = ModuleList([
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=9 * 10, kernel_size=3, padding=1),
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=6 * 10, kernel_size=3, padding=1),
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=6 * 10, kernel_size=3, padding=1),
Conv2d(in_channels=base_net.base_channel * 16, out_channels=9 * 10, kernel_size=3, padding=1)
])
return SSD(num_classes, base_net_model, source_layer_indexes, return SSD(num_classes, base_net_model, source_layer_indexes,
extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device) extras, classification_headers, regression_headers, landmark_headers, is_test=is_test, config=config, device=device)
def create_rfb_tiny_mb_ssd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): def create_rfb_tiny_mb_ssd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
......
...@@ -13,7 +13,7 @@ GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1']) # ...@@ -13,7 +13,7 @@ GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1']) #
class SSD(nn.Module): class SSD(nn.Module):
def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int], def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int],
extras: nn.ModuleList, classification_headers: nn.ModuleList, extras: nn.ModuleList, classification_headers: nn.ModuleList,
regression_headers: nn.ModuleList, is_test=False, config=None, device=None): regression_headers: nn.ModuleList,landmark_headers:nn.ModuleList, is_test=False, config=None, device=None):
"""Compose a SSD model using the given components. """Compose a SSD model using the given components.
""" """
super(SSD, self).__init__() super(SSD, self).__init__()
...@@ -24,6 +24,7 @@ class SSD(nn.Module): ...@@ -24,6 +24,7 @@ class SSD(nn.Module):
self.extras = extras self.extras = extras
self.classification_headers = classification_headers self.classification_headers = classification_headers
self.regression_headers = regression_headers self.regression_headers = regression_headers
self.landmark_headers = landmark_headers
self.is_test = is_test self.is_test = is_test
self.config = config self.config = config
...@@ -41,6 +42,7 @@ class SSD(nn.Module): ...@@ -41,6 +42,7 @@ class SSD(nn.Module):
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
confidences = [] confidences = []
locations = [] locations = []
landmarks = []
start_layer_index = 0 start_layer_index = 0
header_index = 0 header_index = 0
for end_layer_index in self.source_layer_indexes: for end_layer_index in self.source_layer_indexes:
...@@ -70,33 +72,38 @@ class SSD(nn.Module): ...@@ -70,33 +72,38 @@ class SSD(nn.Module):
x = layer(x) x = layer(x)
end_layer_index += 1 end_layer_index += 1
start_layer_index = end_layer_index start_layer_index = end_layer_index
confidence, location = self.compute_header(header_index, y) confidence, location,landmark = self.compute_header(header_index, y)
header_index += 1 header_index += 1
confidences.append(confidence) confidences.append(confidence)
locations.append(location) locations.append(location)
landmarks.append(landmark)
for layer in self.base_net[end_layer_index:]: for layer in self.base_net[end_layer_index:]:
x = layer(x) x = layer(x)
for layer in self.extras: for layer in self.extras:
x = layer(x) x = layer(x)
confidence, location = self.compute_header(header_index, x) confidence, location,landmark = self.compute_header(header_index, x)
header_index += 1 header_index += 1
confidences.append(confidence) confidences.append(confidence)
locations.append(location) locations.append(location)
landmarks.append(landmark)
confidences = torch.cat(confidences, 1) confidences = torch.cat(confidences, 1)
locations = torch.cat(locations, 1) locations = torch.cat(locations, 1)
landmarks = torch.cat(landmarks, 1)
if self.is_test: if self.is_test:
confidences = F.softmax(confidences, dim=2) confidences = F.softmax(confidences, dim=2)
boxes = box_utils.convert_locations_to_boxes( boxes = box_utils.convert_locations_to_boxes(
locations, self.priors, self.config.center_variance, self.config.size_variance locations, self.priors, self.config.center_variance, self.config.size_variance
) )
landmarks = box_utils.decode_landm(landmarks.data.squeeze(0), self.priors, self.config.center_variance, self.config.size_variance)
boxes = box_utils.center_form_to_corner_form(boxes) boxes = box_utils.center_form_to_corner_form(boxes)
return confidences, boxes
return confidences, boxes,landmarks
else: else:
return confidences, locations return confidences, locations,landmarks
def compute_header(self, i, x): def compute_header(self, i, x):
confidence = self.classification_headers[i](x) confidence = self.classification_headers[i](x)
...@@ -106,8 +113,12 @@ class SSD(nn.Module): ...@@ -106,8 +113,12 @@ class SSD(nn.Module):
location = self.regression_headers[i](x) location = self.regression_headers[i](x)
location = location.permute(0, 2, 3, 1).contiguous() location = location.permute(0, 2, 3, 1).contiguous()
location = location.view(location.size(0), -1, 4) location = location.view(location.size(0), -1, 4)
landmark = self.landmark_headers[i](x)
landmark = landmark.permute(0, 2, 3, 1).contiguous()
landmark = landmark.view(location.size(0), -1, 10)
return confidence, location return confidence, location,landmark
def init_from_base_net(self, model): def init_from_base_net(self, model):
self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True) self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True)
...@@ -147,16 +158,17 @@ class MatchPrior(object): ...@@ -147,16 +158,17 @@ class MatchPrior(object):
self.size_variance = size_variance self.size_variance = size_variance
self.iou_threshold = iou_threshold self.iou_threshold = iou_threshold
def __call__(self, gt_boxes, gt_labels): def __call__(self, gt_boxes,gt_landmarks, gt_labels):
if type(gt_boxes) is np.ndarray: if type(gt_boxes) is np.ndarray:
gt_boxes = torch.from_numpy(gt_boxes) gt_boxes = torch.from_numpy(gt_boxes)
if type(gt_labels) is np.ndarray: if type(gt_labels) is np.ndarray:
gt_labels = torch.from_numpy(gt_labels) gt_labels = torch.from_numpy(gt_labels)
boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels, boxes, labels,landmarks = box_utils.assign_priors(gt_boxes, gt_labels,gt_landmarks,
self.corner_form_priors, self.iou_threshold) self.corner_form_priors, self.iou_threshold)
boxes = box_utils.corner_form_to_center_form(boxes) boxes = box_utils.corner_form_to_center_form(boxes)
locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance) locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
return locations, labels landmarks = box_utils.decode_landm(landmarks, self.center_form_priors, self.center_variance, self.size_variance)
return locations, landmarks, labels
def _xavier_init_(m: nn.Module): def _xavier_init_(m: nn.Module):
......
import sys import sys
sys.path.append('/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector') sys.path.append('/home/quannm/ssd_landmarks')
from model.mb_ssd_lite_f19 import create_mb_ssd_lite_f19, create_mb_ssd_lite_f19_predictor from model.mb_ssd_lite_f19 import create_mb_ssd_lite_f19, create_mb_ssd_lite_f19_predictor
from model.mb_ssd_lite_f38 import create_mb_ssd_lite_f38, create_mb_ssd_lite_f38_predictor from model.mb_ssd_lite_f38 import create_mb_ssd_lite_f38, create_mb_ssd_lite_f38_predictor
from model.mb_ssd_lite_f38_person import create_mb_ssd_lite_f38_person, create_mb_ssd_lite_f38_person_predictor from model.mb_ssd_lite_f38_person import create_mb_ssd_lite_f38_person, create_mb_ssd_lite_f38_person_predictor
from model.rfb_tiny_mb_ssd import create_rfb_tiny_mb_ssd, create_rfb_tiny_mb_ssd_predictor from model.rfb_tiny_mb_ssd import create_rfb_tiny_mb_ssd, create_rfb_tiny_mb_ssd_predictor
from utils.misc import Timer from utils.misc import Timer
from torchscope import scope #from torchscope import scope
import argparse import argparse
import cv2 import cv2
import sys import sys
...@@ -16,12 +16,12 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "-1" ...@@ -16,12 +16,12 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector predictor With Pytorch') parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector predictor With Pytorch')
parser.add_argument("--net_type", default="rfb_tiny_mb2_ssd", type=str,help='mb2-ssd-lite_f19, mb2-ssd-lite_f38, rfb_tiny_mb2_ssd') parser.add_argument("--net_type", default="rfb_tiny_mb2_ssd", type=str,help='mb2-ssd-lite_f19, mb2-ssd-lite_f38, rfb_tiny_mb2_ssd')
parser.add_argument('--model_path', default = '/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector/app/person/rfb_tiny_mb2_ssd_c32/rfb_tiny_mb2_ssd_c32_63_208_222.pth', parser.add_argument('--model_path', default = '',
help='model weight') help='model weight')
parser.add_argument('--label_path', default = '/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector/utils/labels/person.txt', help='class names lable') parser.add_argument('--label_path', default = '/home/quannm/ssd_landmarks/utils/labels/head.txt', help='class names lable')
parser.add_argument('--result_path', default = 'detect_results', help='result path to save') parser.add_argument('--result_path', default = 'detect_results', help='result path to save')
parser.add_argument('--test_path', default = "/media/ducanh/DATA/tienln/data/test_data/mall", help='path of folder test') parser.add_argument('--test_path', default = "/home/quannm/ssd_landmarks/testdata", help='path of folder test')
parser.add_argument('--test_device', default="cuda:0", type=str,help='cuda:0 or cpu') parser.add_argument('--test_device', default="cpu", type=str,help='cuda:0 or cpu')
args = parser.parse_args() args = parser.parse_args()
def load_model(): def load_model():
...@@ -41,12 +41,12 @@ def load_model(): ...@@ -41,12 +41,12 @@ def load_model():
net.load(args.model_path) net.load(args.model_path)
elif args.net_type == 'rfb_tiny_mb2_ssd': elif args.net_type == 'rfb_tiny_mb2_ssd':
net = create_rfb_tiny_mb_ssd(len(class_names), is_test=True, device=args.test_device) net = create_rfb_tiny_mb_ssd(len(class_names), is_test=True, device=args.test_device)
net.load(args.model_path) #net.load(args.model_path)
predictor = create_rfb_tiny_mb_ssd_predictor(net, candidate_size=5000, device=args.test_device) predictor = create_rfb_tiny_mb_ssd_predictor(net, candidate_size=5000, device=args.test_device)
else: else:
print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.") print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
sys.exit(1) sys.exit(1)
scope(net, (3, 300, 300)) #scope(net, (3, 300, 300))
return predictor return predictor
if __name__ == "__main__": if __name__ == "__main__":
...@@ -64,14 +64,20 @@ if __name__ == "__main__": ...@@ -64,14 +64,20 @@ if __name__ == "__main__":
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
import time import time
t1 = time.time() t1 = time.time()
boxes, labels, probs = predictor.predict(image, 2000,0.5) boxes,landmarks, labels, probs = predictor.predict(image, 2000,0.5)
tt_time += (time.time()-t1) tt_time += (time.time()-t1)
probs = probs.numpy() probs = probs.numpy()
sum += boxes.size(0) sum += boxes.size(0)
for i in range(boxes.size(0)): for i in range(boxes.size(0)):
box = boxes[i, :] box = boxes[i, :]
cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (0,0,255), 2) landmark = landmarks[i, :]
cv2.putText(orig_image, str(probs[i]), (box[0], box[1]+20),cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255)) cv2.rectangle(orig_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0,0,255), 2)
cv2.putText(orig_image, str(probs[i]), (int(box[0]), int(box[1]+20)),cv2.FONT_HERSHEY_DUPLEX, 0.3, (255, 255, 255))
cv2.circle(orig_image, (int(landmark[0]), int(landmark[1])), 1, (0, 0, 255), 4)
cv2.circle(orig_image, (int(landmark[2]), int(landmark[3])), 1, (0, 255, 255), 4)
cv2.circle(orig_image, (int(landmark[4]), int(landmark[5])), 1, (255, 0, 255), 4)
cv2.circle(orig_image, (int(landmark[6]), int(landmark[7])), 1, (0, 255, 0), 4)
cv2.circle(orig_image, (int(landmark[8]), int(landmark[9])), 1, (255, 0, 0), 4)
cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(orig_image, str(boxes.size(0)), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.imwrite(os.path.join(args.result_path, image_path), orig_image) cv2.imwrite(os.path.join(args.result_path, image_path), orig_image)
print(f"Found {len(probs)} object. The output image is {args.result_path}") print(f"Found {len(probs)} object. The output image is {args.result_path}")
......
import sys import sys
sys.path.append('/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector/') sys.path.append('/home/quannm/ssd_landmarks/')
from utils.misc import str2bool, Timer, freeze_net_layers, store_labels from utils.misc import str2bool, Timer, freeze_net_layers, store_labels
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR
import os import os
...@@ -8,8 +8,8 @@ import logging ...@@ -8,8 +8,8 @@ import logging
import sys import sys
import itertools import itertools
import torch import torch
from torchscope import scope #from torchscope import scope
from torchsummary import summary #from torchsummary import summary
from utils.loss import MultiboxLoss, FocalLoss from utils.loss import MultiboxLoss, FocalLoss
from utils.argument import _argument from utils.argument import _argument
from train import train, test, data_loader, create_network from train import train, test, data_loader, create_network
......
from utils.argument import _argument from utils.argument import _argument
import logging import logging
import sys import sys
...@@ -9,19 +10,16 @@ from module.ssd import MatchPrior ...@@ -9,19 +10,16 @@ from module.ssd import MatchPrior
from datasets.data_preprocessing import TrainAugmentation, TestTransform from datasets.data_preprocessing import TrainAugmentation, TestTransform
from torch.utils.data import DataLoader, ConcatDataset from torch.utils.data import DataLoader, ConcatDataset
from utils.loss import MultiboxLoss, FocalLoss from utils.loss import MultiboxLoss, FocalLoss
from torchsummary import summary #from torchsummary import summary
import torch import torch
from torchscope import scope #from torchscope import scope
import sys import sys
sys.path.append('/home/quannm/ssd_landmarks/')
sys.path.append('/media/ducanh/DATA/tienln/ai_camera/detector/')
from utils.misc import str2bool, Timer, freeze_net_layers, store_labels from utils.misc import str2bool, Timer, freeze_net_layers, store_labels
timer = Timer() timer = Timer()
args = _argument() args = _argument()
def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1): def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
net.train(True) net.train(True)
running_loss = 0.0 running_loss = 0.0
...@@ -62,7 +60,6 @@ def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1): ...@@ -62,7 +60,6 @@ def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
return training_loss return training_loss
def test(loader, net, criterion, device): def test(loader, net, criterion, device):
net.eval() net.eval()
running_loss = 0.0 running_loss = 0.0
...@@ -85,10 +82,9 @@ def test(loader, net, criterion, device): ...@@ -85,10 +82,9 @@ def test(loader, net, criterion, device):
running_classification_loss += classification_loss.item() running_classification_loss += classification_loss.item()
return running_loss / num, running_regression_loss / num, running_classification_loss / num return running_loss / num, running_regression_loss / num, running_classification_loss / num
def data_loader(config): def data_loader(config):
train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, config.iou_threshold) target_transform = MatchPrior(config.priors, config.center_variance,config.size_variance, config.iou_threshold)
test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) test_transform = TestTransform(config.image_size, config.image_mean, config.image_std)
logging.info("Prepare training datasets.") logging.info("Prepare training datasets.")
...@@ -96,42 +92,41 @@ def data_loader(config): ...@@ -96,42 +92,41 @@ def data_loader(config):
Data_Valid = [] Data_Valid = []
datasets = [] datasets = []
path_dataset = open("/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector/datasets/train_dataset.txt", "r") path_dataset = open("/home/quannm/ssd_landmarks/datasets/train_dataset.txt", "r")
for line in path_dataset: for line in path_dataset:
data = line.split('+') data = line.split('+')
Data_Train.append([data[0], data[1][:-1]]) Data_Train.append([data[0],data[1][:-1]])
# training datasets # training datasets
# dataset_paths = [Data_Train[0],Data_Train[1],Data_Train[2],Data_Train[3],Data_Train[4],Data_Train[5]] # dataset_paths = [Data_Train[0],Data_Train[1],Data_Train[2],Data_Train[3],Data_Train[4],Data_Train[5]]
dataset_paths = [Data_Train[3]] dataset_paths = [Data_Train[0],Data_Train[1]]
for dataset_path in dataset_paths: for dataset_path in dataset_paths:
print(dataset_path) print(dataset_path)
dataset = _DataLoader(dataset_path, transform=train_transform, target_transform=target_transform) dataset = _DataLoader(dataset_path, transform=train_transform,target_transform=target_transform)
print(len(dataset.ids)) print(len(dataset.ids))
datasets.append(dataset) datasets.append(dataset)
num_classes = len(dataset.class_names) num_classes = len(dataset.class_names)
train_dataset = ConcatDataset(datasets) train_dataset = ConcatDataset(datasets)
logging.info("Train dataset size: {}".format(len(train_dataset))) logging.info("Train dataset size: {}".format(len(train_dataset)))
train_loader = DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True) train_loader = DataLoader(train_dataset, args.batch_size,num_workers=args.num_workers,shuffle=True)
if args.valid: if args.valid:
# Validation datasets # Validation datasets
path_dataset = open("/media/ducanh/DATA/tienln/ai_camera/ai_camera_detector/datasets/valid_dataset.txt", "r") path_dataset = open("/home/quannm/ssd_landmarks/datasets/valid_dataset.txt", "r")
for line in path_dataset: for line in path_dataset:
data = line.split('+') data = line.split('+')
Data_Valid.append([data[0], data[1][:-1]]) Data_Valid.append([data[0],data[1][:-1]])
# print(Data_Valid) # print(Data_Valid)
logging.info("Prepare Validation datasets.") logging.info("Prepare Validation datasets.")
valid_dataset_paths = [Data_Valid[0]] valid_dataset_paths = [Data_Valid[0]]
for dataset_path in valid_dataset_paths: for dataset_path in valid_dataset_paths:
val_dataset = _DataLoader(dataset_path, transform=test_transform, target_transform=target_transform) val_dataset = _DataLoader(dataset_path, transform=test_transform,target_transform=target_transform)
val_loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True) val_loader = DataLoader(val_dataset, args.batch_size,num_workers=args.num_workers,shuffle=True)
return train_loader, val_loader, num_classes return train_loader, val_loader, num_classes
else: else:
return train_loader, num_classes return train_loader, num_classes
def create_network(create_net,num_classes, DEVICE ):
def create_network(create_net, num_classes, DEVICE):
logging.info("Build network.") logging.info("Build network.")
net = create_net(num_classes) net = create_net(num_classes)
# print(net) # print(net)
...@@ -200,7 +195,7 @@ def create_network(create_net, num_classes, DEVICE): ...@@ -200,7 +195,7 @@ def create_network(create_net, num_classes, DEVICE):
logging.info("Uses MultiStepLR scheduler.") logging.info("Uses MultiStepLR scheduler.")
milestones = [int(v.strip()) for v in args.milestones.split(",")] milestones = [int(v.strip()) for v in args.milestones.split(",")]
scheduler = MultiStepLR(optimizer, milestones=milestones, scheduler = MultiStepLR(optimizer, milestones=milestones,
gamma=0.1, last_epoch=last_epoch) gamma=0.1, last_epoch=last_epoch)
elif args.scheduler == 'cosine': elif args.scheduler == 'cosine':
logging.info("Uses CosineAnnealingLR scheduler.") logging.info("Uses CosineAnnealingLR scheduler.")
scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch) scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch)
...@@ -208,5 +203,5 @@ def create_network(create_net, num_classes, DEVICE): ...@@ -208,5 +203,5 @@ def create_network(create_net, num_classes, DEVICE):
logging.fatal(f"Unsupported Scheduler: {args.scheduler}.") logging.fatal(f"Unsupported Scheduler: {args.scheduler}.")
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
return net, criterion, optimizer, scheduler return net, criterion, optimizer, scheduler
...@@ -95,6 +95,22 @@ def convert_locations_to_boxes(locations, priors, center_variance, ...@@ -95,6 +95,22 @@ def convert_locations_to_boxes(locations, priors, center_variance,
Returns: Returns:
boxes: priors: [[center_x, center_y, h, w]]. All the values boxes: priors: [[center_x, center_y, h, w]]. All the values
are relative to the image size. are relative to the image size.
"""
"""
The following is decode for landmark face
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
torch.cat([
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], dim=locations.dim() - 1)
""" """
# priors can have one dimension less. # priors can have one dimension less.
if priors.dim() + 1 == locations.dim(): if priors.dim() + 1 == locations.dim():
...@@ -104,9 +120,45 @@ def convert_locations_to_boxes(locations, priors, center_variance, ...@@ -104,9 +120,45 @@ def convert_locations_to_boxes(locations, priors, center_variance,
torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], dim=locations.dim() - 1) ], dim=locations.dim() - 1)
def encode_landm(matched, priors, center_variance,
size_variance):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 10].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded landm (tensor), Shape: [num_priors, 10]
"""
variances = [center_variance,size_variance]
# dist b/t match center and prior's center
matched = torch.reshape(matched, (matched.size(0), 5, 2))
priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
g_cxcy = matched[:, :, :2] - priors[:, :, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, :, 2:])
# g_cxcy /= priors[:, :, 2:]
g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
# return target for smooth_l1_loss
return g_cxcy
def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
# priors can have one dimension less # priors can have one dimension less
# g_cxcy = center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance
# g_cxcy = ((matched[:, :2] + matched[:, 2:])/2 - priors[:, :2])/ variances[0] / priors[:, 2:]
'''
return torch.cat([
(boxes[..., :2] + boxes[..., 2:]) / 2,
boxes[..., 2:] - boxes[..., :2]
], boxes.dim() - 1)
'''
if center_form_priors.dim() + 1 == center_form_boxes.dim(): if center_form_priors.dim() + 1 == center_form_boxes.dim():
center_form_priors = center_form_priors.unsqueeze(0) center_form_priors = center_form_priors.unsqueeze(0)
return torch.cat([ return torch.cat([
...@@ -115,6 +167,34 @@ def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_var ...@@ -115,6 +167,34 @@ def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_var
], dim=center_form_boxes.dim() - 1) ], dim=center_form_boxes.dim() - 1)
def decode_landm(landmarks, priors, center_variance,
size_variance):
"""Decode landm from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
pre (tensor): landm predictions for loc layers,
Shape: [num_priors,10]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded landm predictions
priors[..., :2] = priors[:, :2]
loc[:, :2] = locations[..., :2]
variances[0] = center_variance
priors[:, 2:] = priors[..., 2:]
"""
if priors.dim() + 1 == landmarks.dim():
priors = priors.unsqueeze(0)
#landmarks = landmarks.squeeze(0)
landms = torch.cat((priors[..., :2] + landmarks[..., :2] * center_variance * priors[..., 2:],
priors[..., :2] + landmarks[..., 2:4] * center_variance * priors[..., 2:],
priors[..., :2] + landmarks[..., 4:6] * center_variance * priors[..., 2:],
priors[..., :2] + landmarks[..., 6:8] * center_variance * priors[..., 2:],
priors[..., :2] + landmarks[..., 8:10] * center_variance * priors[..., 2:],
), dim=1)
return landms
def area_of(left_top, right_bottom) -> torch.Tensor: def area_of(left_top, right_bottom) -> torch.Tensor:
"""Compute the areas of rectangles given two corners. """Compute the areas of rectangles given two corners.
...@@ -148,15 +228,17 @@ def iou_of(boxes0, boxes1, eps=1e-5): ...@@ -148,15 +228,17 @@ def iou_of(boxes0, boxes1, eps=1e-5):
return overlap_area / (area0 + area1 - overlap_area + eps) return overlap_area / (area0 + area1 - overlap_area + eps)
def assign_priors(gt_boxes, gt_labels, corner_form_priors, def assign_priors(gt_boxes, gt_labels,gt_landmarks, corner_form_priors,
iou_threshold): iou_threshold):
"""Assign ground truth boxes and targets to priors. """Assign ground truth boxes and targets to priors.
Args: Args:
gt_boxes (num_targets, 4): ground truth boxes. gt_boxes (num_targets, 4): ground truth boxes.
gt_labels (num_targets): labels of targets. gt_labels (num_targets): labels of targets.
gt_landmarks(num_targets,10): ground truth landmarks
priors (num_priors, 4): corner form priors priors (num_priors, 4): corner form priors
Returns: Returns:
labels(num_priors,10): real values for
boxes (num_priors, 4): real values for priors. boxes (num_priors, 4): real values for priors.
labels (num_priros): labels for priors. labels (num_priros): labels for priors.
""" """
...@@ -175,7 +257,8 @@ def assign_priors(gt_boxes, gt_labels, corner_form_priors, ...@@ -175,7 +257,8 @@ def assign_priors(gt_boxes, gt_labels, corner_form_priors,
labels = gt_labels[best_target_per_prior_index] labels = gt_labels[best_target_per_prior_index]
labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
boxes = gt_boxes[best_target_per_prior_index] boxes = gt_boxes[best_target_per_prior_index]
return boxes, labels landmarks = gt_landmarks[best_target_per_prior_index]
return boxes, labels,landmarks
def hard_negative_mining(loss, labels, neg_pos_ratio): def hard_negative_mining(loss, labels, neg_pos_ratio):
...@@ -215,7 +298,7 @@ def corner_form_to_center_form(boxes): ...@@ -215,7 +298,7 @@ def corner_form_to_center_form(boxes):
], boxes.dim() - 1) ], boxes.dim() - 1)
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): def hard_nms(box_scores,landmark_scores, iou_threshold, top_k=-1, candidate_size=200):
""" """
Args: Args:
...@@ -245,15 +328,15 @@ def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): ...@@ -245,15 +328,15 @@ def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
) )
indexes = indexes[iou <= iou_threshold] indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :] return box_scores[picked, :],landmark_scores[picked, :]
def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None, def nms(box_scores,landmark_scores, nms_method=None, score_threshold=None, iou_threshold=None,
sigma=0.5, top_k=-1, candidate_size=200): sigma=0.5, top_k=-1, candidate_size=200):
if nms_method == "soft": if nms_method == "soft":
return soft_nms(box_scores, score_threshold, sigma, top_k) return soft_nms(box_scores, score_threshold, sigma, top_k)
else: else:
return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size) return hard_nms(box_scores,landmark_scores, iou_threshold, top_k, candidate_size=candidate_size)
def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1): def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1):
......
...@@ -35,19 +35,26 @@ class Predictor: ...@@ -35,19 +35,26 @@ class Predictor:
image = self.transform(image) image = self.transform(image)
images = image.unsqueeze(0) images = image.unsqueeze(0)
images = images.to(self.device) images = images.to(self.device)
with torch.no_grad(): with torch.no_grad():
self.timer.start() self.timer.start()
scores, boxes = self.net.forward(images) scores, boxes,landmarks = self.net.forward(images)
# print("Inference time: ", self.timer.end()) # print("Inference time: ", self.timer.end())
boxes = boxes[0] boxes = boxes[0]
scores = scores[0] scores = scores[0]
#landmarks = landmarks[0]
if not prob_threshold: if not prob_threshold:
prob_threshold = self.filter_threshold prob_threshold = self.filter_threshold
# this version of nms is slower on GPU, so we move data to CPU. # this version of nms is slower on GPU, so we move data to CPU.
boxes = boxes.to(cpu_device) boxes = boxes.to(cpu_device)
scores = scores.to(cpu_device) scores = scores.to(cpu_device)
landmarks = landmarks.to(cpu_device)
picked_box_probs = [] picked_box_probs = []
picked_labels = [] picked_labels = []
picked_landmarks_probs = []
for class_index in range(1, scores.size(1)): for class_index in range(1, scores.size(1)):
probs = scores[:, class_index] probs = scores[:, class_index]
mask = probs > prob_threshold mask = probs > prob_threshold
...@@ -55,20 +62,36 @@ class Predictor: ...@@ -55,20 +62,36 @@ class Predictor:
if probs.size(0) == 0: if probs.size(0) == 0:
continue continue
subset_boxes = boxes[mask, :] subset_boxes = boxes[mask, :]
subset_landmarks = landmarks[mask, :]
box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
box_probs = box_utils.nms(box_probs, self.nms_method, landmark_probs = torch.cat([subset_landmarks, probs.reshape(-1, 1)], dim=1)
box_probs,landmark_probs = box_utils.nms(box_probs,landmark_probs, self.nms_method,
score_threshold=prob_threshold, score_threshold=prob_threshold,
iou_threshold=self.iou_threshold, iou_threshold=self.iou_threshold,
sigma=self.sigma, sigma=self.sigma,
top_k=top_k, top_k=top_k,
candidate_size=self.candidate_size) candidate_size=self.candidate_size)
picked_box_probs.append(box_probs) picked_box_probs.append(box_probs)
picked_landmarks_probs.append(landmark_probs)
picked_labels.extend([class_index] * box_probs.size(0)) picked_labels.extend([class_index] * box_probs.size(0))
if not picked_box_probs: if not picked_box_probs:
return torch.tensor([]), torch.tensor([]), torch.tensor([]) return torch.tensor([]), torch.tensor([]), torch.tensor([])
picked_box_probs = torch.cat(picked_box_probs) picked_box_probs = torch.cat(picked_box_probs)
picked_landmarks_probs = torch.cat(picked_landmarks_probs)
picked_box_probs[:, 0] *= width picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height picked_box_probs[:, 3] *= height
return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] picked_landmarks_probs[:, 0] *= width
\ No newline at end of file picked_landmarks_probs[:, 1] *= height
picked_landmarks_probs[:, 2] *= width
picked_landmarks_probs[:, 3] *= height
picked_landmarks_probs[:, 4] *= width
picked_landmarks_probs[:, 5] *= height
picked_landmarks_probs[:, 6] *= width
picked_landmarks_probs[:, 7] *= height
picked_landmarks_probs[:, 8] *= width
picked_landmarks_probs[:, 9] *= height
return picked_box_probs[:, :4],picked_landmarks_probs[:, :10], torch.tensor(picked_labels), picked_box_probs[:, 4]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment