Be honest with yourself.

YOLOv1 decode method

Posted on 2019-08-20 In deep learning

YOLOv1 encode, decode method
YOLO一代的target编码,解码

Code实现

Encode原理

Imgur

Encode代码

# Preconditioning
h,w,_ = img.shape
boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)

# Function
def encoder(self,boxes,labels):
        '''
        boxes (tensor) [[x1,y1,x2,y2],[]]
        labels (tensor) [...]
        return 7x7x30
        '''
        grid_num = 14  # The number of grids, 网格数目
        target = torch.zeros((grid_num,grid_num,30)) # Create a grid_num x grid_num x 30 array, 30 means Bx5+classes_num. 创建一个 网格数目x网格数目x30的数组 
        cell_size = 1./grid_num
        wh = boxes[:,2:]-boxes[:,:2] # Caculate ground true box width and height
        cxcy = (boxes[:,2:]+boxes[:,:2])/2 # Caculate center x,y. Nx2
        for i in range(cxcy.size()[0]): # Loop from first ground true box to the last. 从第一个ground true box循环到最后一个
            cxcy_sample = cxcy[i]
            col_row = (cxcy_sample/cell_size).ceil()-1 # i = column, j = row
            target[int(col_row[1]),int(col_row[0]),4] = 1 # Set the confidence to 1.
            target[int(col_row[1]),int(col_row[0]),9] = 1 # Set the confidence to 1.
            target[int(col_row[1]),int(col_row[0]),int(labels[i])+9] = 1 # Set the class label to 1.
            delta_xy = cxcy_sample/cell_size - col_row #匹配到的网格的左上角相对坐标
            target[int(col_row[1]),int(col_row[0]),2:4] = wh[i]
            target[int(col_row[1]),int(col_row[0]),:2] = delta_xy
            target[int(col_row[1]),int(col_row[0]),7:9] = wh[i]
            target[int(col_row[1]),int(col_row[0]),5:7] = delta_xy
        return target

Decode代码

def decoder(pred):
    '''
    pred (tensor) 1x7x7x30
    return (tensor) box[[x1,y1,x2,y2]] label[...]
    '''
    grid_num = 14
    boxes=[]
    cls_indexs=[]
    probs = []
    cell_size = 1./grid_num
    pred = pred.data
    pred = pred.squeeze(0) #7x7x30
    contain1 = pred[:,:,4].unsqueeze(2)
    contain2 = pred[:,:,9].unsqueeze(2)
    contain = torch.cat((contain1,contain2),2)
    mask1 = contain > 0.1 #大于阈值
    mask2 = (contain==contain.max()) #we always select the best contain_prob what ever it>0.9
    mask = (mask1+mask2).gt(0)
    # min_score,min_index = torch.min(contain,2) #每个cell只选最大概率的那个预测框
    for i in range(grid_num):
        for j in range(grid_num):
            for b in range(2):
                # index = min_index[i,j]
                # mask[i,j,index] = 0
                if mask[i,j,b] == 1:
                    #print(i,j,b)
                    box = pred[i,j,b*5:b*5+4]
                    contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])
                    xy = torch.FloatTensor([j,i])*cell_size #cell左上角  up left of cell
                    box[:2] = box[:2]*cell_size + xy # return cxcy relative to image
                    box_xy = torch.FloatTensor(box.size())#转换成xy形式    convert[cx,cy,w,h] to [x1,xy1,x2,y2]
                    box_xy[:2] = box[:2] - 0.5*box[2:]
                    box_xy[2:] = box[:2] + 0.5*box[2:]
                    max_prob,cls_index = torch.max(pred[i,j,10:],0)
                    if float((contain_prob*max_prob)[0]) > 0.1:
                        boxes.append(box_xy.view(1,4))
                        cls_indexs.append(cls_index)
                        probs.append(contain_prob*max_prob)
    if len(boxes) ==0:
        boxes = torch.zeros((1,4))
        probs = torch.zeros(1)
        cls_indexs = torch.zeros(1)
    else:
        boxes = torch.cat(boxes,0) #(n,4)
        probs = torch.cat(probs,0) #(n,)
        cls_indexs = torch.cat(cls_indexs,0) #(n,)
    keep = nms(boxes,probs)
    return boxes[keep],cls_indexs[keep],probs[keep]