faster_rcnn_meta_arch.py
前面看到的modle.py/DetectionModel
是所有检测模型的基类。而在faster_rcnn_meta_arch.py/FasterRCNNMetaArch
就是DetectionModel
的子类。
在object_detection\meta_architectures\faster_rcnn_meta_arch.py中定义了两个类:
先看faster_rcnn_meta_arch.py导入的文件:
#faster_rcnn_meta_arch.py
from abc import abstractmethod
from functools import partial
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import box_predictor
from object_detection.core import losses
from object_detection.core import model
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import ops
from object_detection.utils import shape_utils
导入了core的中的基本构建:
faster_rcnn_meta_arch.py文件中定义的类:
#faster_rcnn_meta_arch.py
class FasterRCNNFeatureExtractor(object):
"""Faster R-CNN Feature Extractor definition."""
.....
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
.....
这里只有两个类:
- FasterRCNNMetaArch(model.DetectionModel):
是DetectionModel的基类实现了FasterRCNN的所有流程:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
,是这个文件的重点。 - FasterRCNNFeatureExtractor(object):
是一个基类,从名字上就知道是特这提取的。有很多方法并具体实现。这个类被object_detection\models中的各个类具体检测模型实现了。
例如在fobject_detection\models\faster_rcnn_inception_resnet_v2_feature_extractor.py中就实现了FasterRCNNFeatureExtractor的具体方法。
#faster_rcnn_inception_resnet_v2_feature_extractor.py
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_resnet_v2
slim = tf.contrib.slim
class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with Inception Resnet v2 feature extractor implementation."""
这里FasterRCNNInceptionResnetV2FeatureExtractor是model_builder.py中使用的其中一个提取器。
class FasterRCNNFeatureExtractor(object):
"""Faster R-CNN Feature Extractor definition."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
self._is_training = is_training
self._first_stage_features_stride = first_stage_features_stride
self._train_batch_norm = (batch_norm_trainable and is_training)
self._reuse_weights = reuse_weights
self._weight_decay = weight_decay
@abstractmethod
def preprocess(self, resized_inputs):
pass
def extract_proposal_features(self, preprocessed_inputs, scope):
with tf.variable_scope(scope, values=[preprocessed_inputs]):
return self._extract_proposal_features(preprocessed_inputs, scope)
@abstractmethod
def _extract_proposal_features(self, preprocessed_inputs, scope):
pass
def extract_box_classifier_features(self, proposal_feature_maps, scope):
with tf.variable_scope(scope, values=[proposal_feature_maps]):
return self._extract_box_classifier_features(proposal_feature_maps, scope)
@abstractmethod
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
pass
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
variables_to_restore = {}
for variable in tf.global_variables():
for scope_name in [first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope]:
if variable.op.name.startswith(scope_name):
var_name = variable.op.name.replace(scope_name + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
FasterRCNNFeatureExtractor的方法解析
- __init____()方法:
Args:
is_training: 构建图的训练版本
first_stage_features_stride: 从那个的feature_map提取特征图。
batch_norm_trainable: 启用BN
reuse_weights: 重用参数
weight_decay: 训练参数衰减率 (default: 0.0). - preprocess(self, resized_inputs)方法:
特征提取器特定的预处理方法(裁剪图像). 这里是特征图的预处理,与DetectionModel的预处理不同。这是抽象方法会在FasterRCNNInceptionResnetV2FeatureExtractor实现。具体请查看object_detectionAPI源码阅读笔记(8-faster_rcnn_inception_resnet_v2_feature_extractor.py - extract_proposal_features(self, preprocessed_inputs, scope)方法:
提取用于第一阶段RPN的特征。
该方法负责从preprocessed的图像中提取特征图。提取出来的特征图会被送到 RPN。region proposal network(RPN)使用这些特征来预测proposal。
Args:
preprocessed_inputs: 特征图shape=[batch, height, width, channels]
scope: 参数空间的name
Returns:
rpn_feature_map:提取的特诊图shape= [batch, height, width, depth] - extract_box_classifier_features(self,proposal_feature_maps, scope)方法:
提取用于第二阶段分类的特征图。
Args:
proposal_feature_maps: [batch_size * self.max_num_proposals, crop_height, crop_width, depth]这里是代表剪裁到特定大小的特征图
scope: 参数空间
Returns:
proposal_classifier_features: [batch_size * self.max_num_proposals, height, width, depth]每个建议框的分类器特征。 - restore_from_classification_checkpoint_fn()方法:
Args:
first_stage_feature_extractor_scope: 看名字吧
feature extractor.
second_stage_feature_extractor_scope: 看名字
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
FasterRCNNMetaArch()方法解析
这个类在faster_rcnn_meta_arch.py是一个大头,也是整个检测模型的精华所在,但从代码实现的行数来看,包括注解一共有1400行代码。先缓缓!!
- FasterRCNNMetaArch 的结构
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
@property
def first_stage_feature_extractor_scope(self):
return 'FirstStageFeatureExtractor'
@property
def second_stage_feature_extractor_scope(self):
return 'SecondStageFeatureExtractor'
@property
def first_stage_box_predictor_scope(self):
return 'FirstStageBoxPredictor'
@property
def second_stage_box_predictor_scope(self):
return 'SecondStageBoxPredictor'
@property
def max_num_proposals(self):
if self._is_training and not self._hard_example_miner:
return self._second_stage_batch_size
return self._first_stage_max_proposals
def preprocess(self, inputs):
........
return self._feature_extractor.preprocess(resized_inputs)
def predict(self, preprocessed_inputs):
......
return prediction_dict
def _predict_second_stage(self, rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop,
anchors,
image_shape):
.....
return prediction_dict
def _extract_rpn_feature_maps(self, preprocessed_inputs):
......
return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape)
def _predict_rpn_proposals(self, rpn_box_predictor_features):
......
return (tf.squeeze(box_encodings, axis=2),
objectness_predictions_with_background)
def _remove_invalid_anchors_and_predictions(
self,
box_encodings,
objectness_predictions_with_background,
anchors_boxlist,
clip_window):
......
return (_batch_gather_kept_indices(box_encodings),
_batch_gather_kept_indices(objectness_predictions_with_background),
pruned_anchors_boxlist)
def _flatten_first_two_dimensions(self, inputs):
.......
return tf.reshape(inputs, flattened_shape)
def postprocess(self, prediction_dict):
with tf.name_scope('FirstStagePostprocessor'):
image_shape = prediction_dict['image_shape']
if self._first_stage_only:
proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
image_shape)
return {
'detection_boxes': proposal_boxes,
'detection_scores': proposal_scores,
'num_detections': tf.to_float(num_proposals)
}
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shape,
mask_predictions=mask_predictions)
return detections_dict
def _postprocess_rpn(self,
rpn_box_encodings_batch,
rpn_objectness_predictions_with_background_batch,
anchors,
image_shape):
......
return proposal_boxes, proposal_scores, num_proposals
def _unpad_proposals_and_sample_box_classifier_batch(
self,
proposal_boxes,
proposal_scores,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list):
.......
return (tf.stack(single_image_proposal_box_sample),
tf.stack(single_image_proposal_score_sample),
tf.stack(single_image_num_proposals_sample))
def _format_groundtruth_data(self, image_shape):
.....
return (groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list)
def _sample_box_classifier_minibatch(self,
proposal_boxlist,
groundtruth_boxlist,
groundtruth_classes_with_background):
.......
return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices)
def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized):
.......
return slim.max_pool2d(
cropped_regions,
[self._maxpool_kernel_size, self._maxpool_kernel_size],
stride=self._maxpool_stride)
def _postprocess_box_classifier(self,
refined_box_encodings,
class_predictions_with_background,
proposal_boxes,
num_proposals,
image_shape,
mask_predictions=None):
.......
return detections
def _batch_decode_boxes(self, box_encodings, anchor_boxes):
......
return tf.reshape(decoded_boxes.get(),
tf.stack([combined_shape[0], combined_shape[1],
num_classes, 4]))
def loss(self, prediction_dict, scope=None):
.......
return loss_dict
def _loss_rpn(self,
rpn_box_encodings,
rpn_objectness_predictions_with_background,
anchors,
groundtruth_boxlists,
groundtruth_classes_with_background_list):
.......
return loss_dict
def _loss_box_classifier(self,
refined_box_encodings,
class_predictions_with_background,
proposal_boxes,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list,
image_shape,
prediction_masks=None,
groundtruth_masks_list=None):
.......
return loss_dict
def _padded_batched_proposals_indicator(self,
num_proposals,
max_num_proposals):
......
return tf.greater(tiled_num_proposals, tiled_proposal_index)
def _unpad_proposals_and_apply_hard_mining(self,
proposal_boxlists,
second_stage_loc_losses,
second_stage_cls_losses,
num_proposals):
for (proposal_boxlist, single_image_loc_loss, single_image_cls_loss,
single_image_num_proposals) in zip(
proposal_boxlists,
tf.unstack(second_stage_loc_losses),
tf.unstack(second_stage_cls_losses),
tf.unstack(num_proposals)):
proposal_boxlist = box_list.BoxList(
tf.slice(proposal_boxlist.get(),
[0, 0], [single_image_num_proposals, -1]))
single_image_loc_loss = tf.slice(single_image_loc_loss,
[0], [single_image_num_proposals])
single_image_cls_loss = tf.slice(single_image_cls_loss,
[0], [single_image_num_proposals])
return self._hard_example_miner(
location_losses=tf.expand_dims(single_image_loc_loss, 0),
cls_losses=tf.expand_dims(single_image_cls_loss, 0),
decoded_boxlist_list=[proposal_boxlist])
def restore_map(self, from_detection_checkpoint=True):
.......
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
return {var.op.name: var for var in feature_extractor_variables}
虽然有这么多的方法但是可供调用的却只有少数几个
- 主要方法
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
@property
def first_stage_feature_extractor_scope(self):
return 'FirstStageFeatureExtractor'
@property
def second_stage_feature_extractor_scope(self):
return 'SecondStageFeatureExtractor'
@property
def first_stage_box_predictor_scope(self):
return 'FirstStageBoxPredictor'
@property
def second_stage_box_predictor_scope(self):
return 'SecondStageBoxPredictor'
@property
def max_num_proposals(self):
if self._is_training and not self._hard_example_miner:
return self._second_stage_batch_size
return self._first_stage_max_proposals
def preprocess(self, inputs):
........
return self._feature_extractor.preprocess(resized_inputs)
def predict(self, preprocessed_inputs):
......
return prediction_dict
def postprocess(self, prediction_dict):
with tf.name_scope('FirstStagePostprocessor'):
image_shape = prediction_dict['image_shape']
if self._first_stage_only:
proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
image_shape)
return {
'detection_boxes': proposal_boxes,
'detection_scores': proposal_scores,
'num_detections': tf.to_float(num_proposals)
}
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shape,
mask_predictions=mask_predictions)
return detections_dict
def loss(self, prediction_dict, scope=None):
.......
return loss_dict
def restore_map(self, from_detection_checkpoint=True):
.......
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
return {var.op.name: var for var in feature_extractor_variables}
从这里可以看出加上基类的方法,其实这个类也就只有7-8个是供外部调用的方法,一般供外部调用的方法才是主要方法。
0.max_num_proposals,1.preprocess,2.predict,3.postprocess,4.loss,5.restore_map,6.provide_groundtruth(基类),7.groundtruth_lists(基类)
看一下具体的流程图:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
这里再次引用haixwang总结的流程图:
这里需要牢记这张图,整个因为这就是FasterRCNNMetaArch类的大概的实现流程了。
- 主要方法逐一介绍
-
init():
参数提前介绍:
Args:
is_training: 是否构建一个训练版本特征图。
num_classes: Number of classes. 不包括背景类。
image_resizer_fn: 调用一个对图片进行resize的函数输入[height, width, channels],可能会改变图片的色彩空间。 See
builders/image_resizer_builder.py.
feature_extractor: A FasterRCNNFeatureExtractor object.就是上面介绍的FasterRCNNFeatureExtractor的一个子类。
first_stage_only: 是否仅仅构建Region Proposal Networ(RPN)的一部分.
first_stage_anchor_generator: An anchor_generator.AnchorGenerator object (note that currently we only support grid_anchor_generator.GridAnchorGenerator objects)
first_stage_atrous_rate: (This should typically be set to 1).这是多空卷积的一个参数,如果设置为1,表示普通卷积。
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,separable_conv2d and fully_connected ops for the RPN box predictor.设置参数空间的名字。
first_stage_box_predictor_kernel_size: 在RPN框预测之前的用于卷积运算的卷积核大小。
first_stage_box_predictor_depth: 在RPN框预测之前的用于卷积运算的输出维度。
first_stage_minibatch_size: 用于计算区域建议网络的内容是前景还是背景和location loss 的“batch size”。 这个“batch size”是指在图像批次中为给定的图像计算损失函数的锚点的数量。
first_stage_positive_balance_fraction: 正样本的比例
first_stage_nms_score_threshold: 第一阶段的非极大抑制值在[0,1]。The recommended value for Faster R-CNN is 0.
first_stage_nms_iou_threshold: 对于RPN预测出来的box应用非最大抑制的IOU值的阀值 (与得分最高的框的IOU超过摸个阀值都会被删除)
first_stage_max_proposals: 在区域提议网络(RPN)预测的方框上执行非最大抑制(NMS)后要保留的框的最大数量。
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
initial_crop_size: 在ROI进行裁剪的时候的尺寸
maxpool_kernel_size: A single integer indicating the kernel size of the max pool op on the cropped feature map during ROI pooling.
maxpool_stride: A single integer indicating the stride of the max pool op on the cropped feature map during ROI pooling.
second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for the second stage.
second_stage_batch_size: 第二阶段的进行classification and refined location loss的时候的batch_size。
second_stage_balance_fraction: 每张图片中的正样本的数量的比例。
second_stage_non_max_suppression_fn: 非极大值抑制方程。
second_stage_score_conversion_fn: 非线性方程,把 logits转换为probabilities.
second_stage_localization_loss_weight: second stage localization loss的比例因子.
second_stage_classification_loss_weight: second stage classification loss的比例因子.
second_stage_classification_loss:损失值,包括losses.WeightedSigmoidClassificationLoss 和
losses.WeightedSoftmaxClassificationLoss.
second_stage_mask_prediction_loss_weight: mask的一个loss权重
hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to runin parallel for calls to tf.map_fn.
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` attraining time.
ValueError: If first_stage_anchor_generator is not of typegrid_anchor_generator.GridAnchorGenerator.
init函数的解释
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
# 检查参数是否正确
if is_training and second_stage_batch_size > first_stage_max_proposals:
raise ValueError('second_stage_batch_size should be no greater than '
'first_stage_max_proposals.')
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.')
# 获取参数,这些都是设置参数
self._is_training = is_training
self._image_resizer_fn = image_resizer_fn # 图片resize函数
self._feature_extractor = feature_extractor # feature_extractor提取函数,在上面有介绍
self._first_stage_only = first_stage_only # 是否只进行区域提取
# The first class is reserved as background.
# 设置第一个类为背景类
unmatched_cls_target = tf.constant(
[1] + self._num_classes * [0], dtype=tf.float32)
# target_assigner是创建任务的类
self._proposal_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'proposal')
self._detector_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target)
# Both proposal and detector target assigners use the same box coder
self._box_coder = self._proposal_target_assigner.box_coder
# (First stage) Region proposal network parameters
# 获取第一阶段的anchor_generator生成器
self._first_stage_anchor_generator = first_stage_anchor_generator
self._first_stage_atrous_rate = first_stage_atrous_rate
self._first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope)
self._first_stage_box_predictor_kernel_size = (
first_stage_box_predictor_kernel_size)
self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
self._first_stage_minibatch_size = first_stage_minibatch_size
# 在这里进行正负样本的采样
self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=first_stage_positive_balance_fraction)
self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
self._is_training, num_classes=1,
conv_hyperparams=self._first_stage_box_predictor_arg_scope,
min_depth=0, max_depth=0, num_layers_before_predictor=0,
use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
box_code_size=self._box_coder.code_size)
# 第一阶段的非极大抑制值,iou,最大推荐区域数量
self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
self._first_stage_max_proposals = first_stage_max_proposals
# 产生WeightedSmoothL1LocalizationLoss和WeightedSoftmaxClassificationLoss
self._first_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._first_stage_objectness_loss = (
losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True))
self._first_stage_loc_loss_weight = first_stage_localization_loss_weight
self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
# Per-region cropping parameters
# 设置ROI的大小
self._initial_crop_size = initial_crop_size
self._maxpool_kernel_size = maxpool_kernel_size
self._maxpool_stride = maxpool_stride
self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor
# 还是提取第二阶段的参数,
self._second_stage_batch_size = second_stage_batch_size
self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=second_stage_balance_fraction)
# 第二阶段非极大抑制值,iou,最大推荐区域数量
self._second_stage_nms_fn = second_stage_non_max_suppression_fn
self._second_stage_score_conversion_fn = second_stage_score_conversion_fn
# 第二阶段的loss
self._second_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._second_stage_classification_loss = second_stage_classification_loss
self._second_stage_mask_loss = (
losses.WeightedSigmoidClassificationLoss(anchorwise_output=True))
self._second_stage_loc_loss_weight = second_stage_localization_loss_weight
self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
self._second_stage_mask_loss_weight = (
second_stage_mask_prediction_loss_weight)
self._hard_example_miner = hard_example_miner
self._parallel_iterations = parallel_iterations
为防止篇幅引起不适,分为两篇。请看object_detectionAPI源码阅读笔记(7-FasterRCNNMetaArch类的详解)