threshold map图gt标签
红色框为gt标签,蓝色框为膨胀(dilate)图,绿色框为缩小(shrink), 膨胀距离和缩小距离均为D,计算膨胀区域内所有的点与gt的距离,并做归一化,绿色框与蓝色框之间的区域取值处于[0,1]之间,绿色框区域内的取值大于1。
膨胀和缩小距离的计算公式如下:
在实践中,r设置为0.4
代码部分
class MakeBorderMap():
def __init__(self, shrink_ratio=0.4, thresh_min=0.3, thresh_max=0.7):
self.shrink_ratio = shrink_ratio
self.thresh_min = thresh_min
self.thresh_max = thresh_max
def __call__(self, data: dict) -> dict:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
im = data['img']
text_polys = data['text_polys']
ignore_tags = data['ignore_tags']
canvas = np.zeros(im.shape[:2], dtype=np.float32)
mask = np.zeros(im.shape[:2], dtype=np.float32)
for i in range(len(text_polys)):
if ignore_tags[i]:
continue
self.draw_border_map(text_polys[i], canvas, mask=mask)
# 计算后的值介于0-1之间,rescale到0.3到0.7
canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
data['threshold_map'] = canvas
data['threshold_mask'] = mask
return data
def draw_border_map(self, polygon, canvas, mask):
polygon = np.array(polygon)
assert polygon.ndim == 2
assert polygon.shape[1] == 2
polygon_shape = Polygon(polygon)
if polygon_shape.area <= 0:
return
distance = polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
subject = [tuple(l) for l in polygon]
padding = pyclipper.PyclipperOffset()
padding.AddPath(subject, pyclipper.JT_ROUND,
pyclipper.ET_CLOSEDPOLYGON)
padded_polygon = np.array(padding.Execute(distance)[0])
cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
xmin = padded_polygon[:, 0].min()
xmax = padded_polygon[:, 0].max()
ymin = padded_polygon[:, 1].min()
ymax = padded_polygon[:, 1].max()
width = xmax - xmin + 1
height = ymax - ymin + 1
polygon[:, 0] = polygon[:, 0] - xmin
polygon[:, 1] = polygon[:, 1] - ymin
xs = np.broadcast_to(
np.linspace(0, width - 1, num=width).reshape(1, width), (height, width))
ys = np.broadcast_to(
np.linspace(0, height - 1, num=height).reshape(height, 1), (height, width))
distance_map = np.zeros(
(polygon.shape[0], height, width), dtype=np.float32)
for i in range(polygon.shape[0]):
j = (i + 1) % polygon.shape[0]
absolute_distance = self.distance(xs, ys, polygon[i], polygon[j])
distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
distance_map = distance_map.min(axis=0)
# 保证在canvas宽高范围内
xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
1 - distance_map[
ymin_valid - ymin:ymax_valid - ymax + height,
xmin_valid - xmin:xmax_valid - xmax + width],
canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
def distance(self, xs, ys, point_1, point_2):
'''
使用正、余弦定理求点到直线的距离
compute the distance from point to a line
ys: coordinates in the first axis
xs: coordinates in the second axis
point_1, point_2: (x, y), the end of the line
'''
height, width = xs.shape[:2]
square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[1] - point_2[1])
cosin = (square_distance - square_distance_1 - square_distance_2) / (2 * np.sqrt(square_distance_1 * square_distance_2))
square_sin = 1 - np.square(cosin)
square_sin = np.nan_to_num(square_sin)
result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / square_distance)
# 对于夹角是锐角的,使用最短边作为距离
result[cosin < 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin < 0]
# self.extend_line(point_1, point_2, result)
return result
def extend_line(self, point_1, point_2, result):
ex_point_1 = (int(round(point_1[0] + (point_1[0] - point_2[0]) * (1 + self.shrink_ratio))),
int(round(point_1[1] + (point_1[1] - point_2[1]) * (1 + self.shrink_ratio))))
cv2.line(result, tuple(ex_point_1), tuple(point_1), 4096.0, 1, lineType=cv2.LINE_AA, shift=0)
ex_point_2 = (int(round(point_2[0] + (point_2[0] - point_1[0]) * (1 + self.shrink_ratio))),
int(round(point_2[1] + (point_2[1] - point_1[1]) * (1 + self.shrink_ratio))))
cv2.line(result, tuple(ex_point_2), tuple(point_2), 4096.0, 1, lineType=cv2.LINE_AA, shift=0)
return ex_point_1, ex_point_2
if __name__ == '__main__':
import numpy as np
make_border_map = MakeBorderMap()
img = cv2.imread("../../datasets/train/img/img_41.jpg")
points = np.array([[[533, 134], [562, 133], [561, 145], [532, 146]],
[[564, 131], [617, 129], [617, 145], [564, 146]],
[[620, 126], [657, 127], [656, 143], [618, 143]],
[[153, 150], [209, 144], [210, 159], [154, 165]]])
draw_img = img.copy()
for pt in points:
cv2.polylines(draw_img,[pt], True, color=(0, 255, 0))
print(draw_img)
texts = ['EW15', 'Tanjong', 'Pagar', 'CAUTION']
ignore_tags = [False, False, False, False]
data = {"img":img, "img_41":"img_41","text_polys":points,"texts":texts, "ignore_tags":ignore_tags}
data = make_border_map(data)
print(data)
probability map图gt
代码部分
def shrink_polygon_pyclipper(polygon, shrink_ratio):
from shapely.geometry import Polygon
import pyclipper
polygon_shape = Polygon(polygon)
distance = polygon_shape.area * (1 - np.power(shrink_ratio, 2)) / polygon_shape.length
subject = [tuple(l) for l in polygon]
padding = pyclipper.PyclipperOffset()
padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
shrinked = padding.Execute(-distance)
if shrinked == []:
shrinked = np.array(shrinked)
else:
shrinked = np.array(shrinked[0]).reshape(-1, 2)
return shrinked
class MakeShrinkMap():
r'''
Making binary mask from detection data with ICDAR format.
Typically following the process of class `MakeICDARData`.
'''
def __init__(self, min_text_size=8, shrink_ratio=0.4, shrink_type='pyclipper'):
shrink_func_dict = {'py': shrink_polygon_py, 'pyclipper': shrink_polygon_pyclipper}
self.shrink_func = shrink_func_dict[shrink_type]
self.min_text_size = min_text_size
self.shrink_ratio = shrink_ratio
def __call__(self, data: dict) -> dict:
"""
从scales中随机选择一个尺度,对图片和文本框进行缩放
:param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
:return:
"""
image = data['img']
text_polys = data['text_polys']
ignore_tags = data['ignore_tags']
h, w = image.shape[:2]
text_polys, ignore_tags = self.validate_polygons(text_polys, ignore_tags, h, w)
gt = np.zeros((h, w), dtype=np.float32)
mask = np.ones((h, w), dtype=np.float32)
for i in range(len(text_polys)):
polygon = text_polys[i]
height = max(polygon[:, 1]) - min(polygon[:, 1])
width = max(polygon[:, 0]) - min(polygon[:, 0])
if ignore_tags[i] or min(height, width) < self.min_text_size:
cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
ignore_tags[i] = True
else:
shrinked = self.shrink_func(polygon, self.shrink_ratio)
if shrinked.size == 0:
cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
ignore_tags[i] = True
continue
cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
data['shrink_map'] = gt
data['shrink_mask'] = mask
return data
def validate_polygons(self, polygons, ignore_tags, h, w):
'''
polygons (numpy.array, required): of shape (num_instances, num_points, 2)
'''
if len(polygons) == 0:
return polygons, ignore_tags
assert len(polygons) == len(ignore_tags)
for polygon in polygons:
polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
for i in range(len(polygons)):
area = self.polygon_area(polygons[i])
if abs(area) < 1:
ignore_tags[i] = True
if area > 0:
polygons[i] = polygons[i][::-1, :]
return polygons, ignore_tags
def polygon_area(self, polygon):
return cv2.contourArea(polygon)
# edge = 0
# for i in range(polygon.shape[0]):
# next_index = (i + 1) % polygon.shape[0]
# edge += (polygon[next_index, 0] - polygon[i, 0]) * (polygon[next_index, 1] - polygon[i, 1])
#
# return edge / 2.
if __name__ == '__main__':
import numpy as np
make_shrink_map = MakeShrinkMap()
img = cv2.imread("../../datasets/train/img/img_41.jpg")
points = np.array([[[533, 134], [562, 133], [561, 145], [532, 146]],
[[564, 131], [617, 129], [617, 145], [564, 146]],
[[620, 126], [657, 127], [656, 143], [618, 143]],
[[153, 150], [209, 144], [210, 159], [154, 165]]])
draw_img = img.copy()
for pt in points:
cv2.polylines(draw_img, [pt], True, color=(0, 255, 0))
print(draw_img)
texts = ['EW15', 'Tanjong', 'Pagar', 'CAUTION']
ignore_tags = [False, False, False, False]
data = {"img": img, "img_41": "img_41", "text_polys": points, "texts": texts, "ignore_tags": ignore_tags}
data = make_shrink_map(data)
print(data)
概率和阈值图的计算