将mnist数据集转换为JPG图片--附带mnist数据集

注意事项

一定要注意 /train-images-idx3-ubyte 中间是线不是点

# coding=utf-8
'''将二进制格式的MNIST数据集转成.jpg图片格式并保存,图片标签包含在图片名中'''

import numpy as np
import cv2
import os
import struct

def save_mnist_to_jpg(mnist_image_file, mnist_label_file, save_dir):
    if 'train' in os.path.basename(mnist_image_file):
        prefix = 'train'
    else:
        prefix = 'test'

    labelIndex = 0
    imageIndex = 0
    i = 0
    lbdata = open(mnist_label_file, 'rb').read()
    magic, nums = struct.unpack_from(">II", lbdata, labelIndex)
    labelIndex += struct.calcsize('>II')

    imgdata = open(mnist_image_file, "rb").read()
    magic, nums, numRows, numColumns = struct.unpack_from('>IIII', imgdata, imageIndex)
    imageIndex += struct.calcsize('>IIII')

    for i in range(nums):
        label = struct.unpack_from('>B', lbdata, labelIndex)[0]
        labelIndex += struct.calcsize('>B')
        im = struct.unpack_from('>784B', imgdata, imageIndex)
        imageIndex += struct.calcsize('>784B')
        im = np.array(im, dtype='uint8')
        img = im.reshape(28, 28)
        save_name = os.path.join(save_dir, '{}_{}_{}.jpg'.format(prefix, i, label))
        cv2.imwrite(save_name, img)


if __name__ == '__main__':
    train_images = 'E:/study/MachineLearning/data/MNIST/raw/train-images-idx3-ubyte'  # 训练集图像的文件名
    train_labels = 'E:/study/MachineLearning/data/MNIST/raw/train-labels-idx1-ubyte'  # 训练集label的文件名
    test_images = 'E:/study/MachineLearning/data/MNIST/raw/t10k-images-idx3-ubyte'  # 测试集图像的文件名
    test_labels = 'E:/study/MachineLearning/data/MNIST/raw/t10k-labels-idx1-ubyte'  # 测试集label的文件名

    save_train_dir = './data/mk/train_images/'
    save_test_dir = './data/mk/test_images/'

    if not os.path.exists(save_train_dir):
        os.makedirs(save_train_dir)
    if not os.path.exists(save_test_dir):
        os.makedirs(save_test_dir)

    save_mnist_to_jpg(test_images, test_labels, save_test_dir)
    save_mnist_to_jpg(train_images, train_labels, save_train_dir)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容