深度估计学习(单个图像的预测)

1.安装环境

conda install pytorch=0.4.1 torchvision=0.2.1 -c pytorch
pip install tensorboardX==1.4
conda install opencv=3.3.1 

模型

--model_name Training modality Imagenet pretrained? Model resolution KITTI abs. rel. error delta < 1.25
mono_640x192 Mono Yes 640 x 192 0.115 0.877
stereo_640x192 Stereo Yes 640 x 192 0.109 0.864
mono+stereo_640x192 Mono + Stereo Yes 640 x 192 0.106 0.874
mono_1024x320 Mono Yes 1024 x 320 0.115 0.879
stereo_1024x320 Stereo Yes 1024 x 320 0.107 0.874
mono+stereo_1024x320 Mono + Stereo Yes 1024 x 320 0.106 0.876
mono_no_pt_640x192 Mono No 640 x 192 0.132 0.845
stereo_no_pt_640x192 Stereo No 640 x 192 0.130 0.831
mono+stereo_no_pt_640x192 Mono + Stereo No 640 x 192 0.127 0.836

2.测试代码:

from __future__ import absolute_import, division, print_function

import os
import sys
import glob
import argparse
import numpy as np
import PIL.Image as pil
import matplotlib as mpl
import matplotlib.cm as cm

import torch
from torchvision import transforms, datasets

import networks
from layers import disp_to_depth
from utils import download_model_if_doesnt_exist


def parse_args():
    parser = argparse.ArgumentParser(
        description='Simple testing funtion for Monodepthv2 models.')

    parser.add_argument('--image_path', type=str,
                        help='path to a test image or folder of images', required=True)
    parser.add_argument('--model_name', type=str,
                        help='name of a pretrained model to use',
                        choices=[
                            "mono_640x192",
                            "stereo_640x192",
                            "mono+stereo_640x192",
                            "mono_no_pt_640x192",
                            "stereo_no_pt_640x192",
                            "mono+stereo_no_pt_640x192",
                            "mono_1024x320",
                            "stereo_1024x320",
                            "mono+stereo_1024x320"])
    parser.add_argument('--ext', type=str,
                        help='image extension to search for in folder', default="jpg")
    parser.add_argument("--no_cuda",
                        help='if set, disables CUDA',
                        action='store_true')

    return parser.parse_args()


def test_simple(args):
    """Function to predict for a single image or folder of images
    """
    assert args.model_name is not None, \
        "You must specify the --model_name parameter; see README.md for an example"

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(args.model_name)
    model_path = os.path.join("models", args.model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(
        num_ch_enc=encoder.num_ch_enc, scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    # FINDING INPUT IMAGES
    if os.path.isfile(args.image_path):
        # Only testing on a single image
        paths = [args.image_path]
        output_directory = os.path.dirname(args.image_path)
    elif os.path.isdir(args.image_path):
        # Searching folder for images
        paths = glob.glob(os.path.join(args.image_path, '*.{}'.format(args.ext)))
        output_directory = args.image_path
    else:
        raise Exception("Can not find args.image_path: {}".format(args.image_path))

    print("-> Predicting on {:d} test images".format(len(paths)))

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        for idx, image_path in enumerate(paths):

            if image_path.endswith("_disp.jpg"):
                # don't try to predict disparity for a disparity image!
                continue

            # Load image and preprocess
            input_image = pil.open(image_path).convert('RGB')
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width), mode="bilinear", align_corners=False)

            # Saving numpy file
            output_name = os.path.splitext(os.path.basename(image_path))[0]
            name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            vmax = np.percentile(disp_resized_np, 95)
            normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
            mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
            colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
            im = pil.fromarray(colormapped_im)

            name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name))
            im.save(name_dest_im)

            print("   Processed {:d} of {:d} images - saved prediction to {}".format(
                idx + 1, len(paths), name_dest_im))

    print('-> Done!')


if __name__ == '__main__':
    args = parse_args()
    test_simple(args)

3.测试图片:

在这里插入图片描述

在这里插入图片描述

4.测试效果
a.利用mono_640x192模型来预测
在这里插入图片描述

在这里插入图片描述

b.利用stereo_640x192模型来预测
在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

c.利用mono+stereo_640x192模型来预测
在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

剩下的模型就没测试了
总结:从效果图来看颜色的深浅反应了深度,颜色越深,表示深度大,表示距离远

github:https://github.com/nianticlabs/monodepth2

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • 6270 a1da b5e7 b8aa 4d60 60c6 dea2 9fcc66b3 1acd 0596 77b...
    BossOx阅读 9,218评论 0 0
  • 504b 0304 1400 0008 0800 fa8c 963d 50740baa dffc 0e00 6cd...
    BossOx阅读 3,537评论 0 0
  • 选择题部分 1.(),只有在发生短路事故时或者在负荷电流较大时,变流器中才会有足够的二次电流作为继电保护跳闸之用。...
    skystarwuwei阅读 13,349评论 0 7
  • 首页 资讯 文章 资源 小组 相亲 登录 注册 首页 最新文章 IT 职场 前端 后端 移动端 数据库 运维 其他...
    Helen_Cat阅读 3,926评论 1 10
  • 我读大学时,寝室有位兄弟家里很穷。 穷到什么程度? 他的衣服不超过三件。 夏天冬天都是一双解放鞋。 在助学金,贷学...
    兔子怪力阅读 283评论 2 4