AI Meets Beauty
Perfect Half Million Beauty Product Image Recognition Challenge
分析:比赛给的数据集是一个csv文件,包括了名称和下载地址,因此首先要下载数据。
数据集总共包含520k张图像,来自14个电商网站。
数据下载脚本如下
# -*- coding: utf-8 -*-
# download.py --下载数据集
from __future__ import print_function
import os, csv
from skimage import io
from multiprocessing import Pool
def signal_handler(signum, frame, img_url):
raise Exception('Time out--', img_url)
def getFile(url):
url = url.split(',')
imgname, imgurl = url[0], url[1].strip('"')
imgformat = '.jpg'
try:
image = io.imread(imgurl)
io.imsave("./dataset/" + imgname + imgformat, image)
except Exception as e:
failures = open('failure.txt', 'a')
print (imgname + ' ' + imgurl)
failures.write(imgname + ' ' + imgurl + '\n')
if __name__ == "__main__":
os.chdir(os.path.join(os.getcwd()))
if not os.path.exists('./dataset'):
os.mkdir('./dataset')
with open ('data.csv', 'r') as f:
lines=f.readlines()
csvf = lines[:]
#csvf.extend(lines[:])
parallel_num = 10
line_num = len(csvf)
p = Pool(parallel_num)
for i in range(int(line_num/parallel_num)):
p.map(getFile, csvf[i*parallel_num:i*parallel_num+parallel_num])
f.close()
# failures.close()