#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2018-05-02 18:24:37
# @Author : Zhu Robin (robin.zhu@nokia-sbell.com)
from json import loads
import re
from concurrent.futures import ThreadPoolExecutor
import os
import requests
from scm_tools.logger import get_logger
from os import path as osp
from bs4 import BeautifulSoup
from zipfile import ZipFile
import md5
import shutil
import fire
class PhraseArt(object):
"""Phrase art url and make it more ease to operator.
api return of a dir:
{
"repo" : "******",
"path" : "/aa/aa",
"created" : "2018-05-02T20:04:33.880+08:00",
"createdBy" : "**",
"lastModified" : "2018-05-02T20:04:33.880+08:00",
"modifiedBy" : "**",
"lastUpdated" : "2018-05-02T20:04:33.880+08:00",
"children" : [ {
"uri" : "/__init__.py",
"folder" : false
}, {
"uri" : "/__init__.pyc",
"folder" : false
}, {
"uri" : "/a.py",
"folder" : false
}, {
"uri" : "/a.pyc",
"folder" : false
} ],
"uri" : "https://******:443/artifactory/api/storage/******/aa/aa"
}
api return of a file:
{
"repo" : "******",
"path" : "/aa/aa/__init__.py",
"created" : "2018-05-02T20:04:33.881+08:00",
"createdBy" : "**",
"lastModified" : "2018-05-02T20:04:33.000+08:00",
"modifiedBy" : "**",
"lastUpdated" : "2018-05-02T20:04:33.000+08:00",
"downloadUri" : "https://******:443/artifactory/******/aa/aa/__init__.py",
"mimeType" : "text/x-python",
"size" : "0",
"checksums" : {
"sha1" : "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"md5" : "d41d8cd98f00b204e9800998ecf8427e",
"sha256" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"originalChecksums" : {
"sha1" : "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"md5" : "d41d8cd98f00b204e9800998ecf8427e",
"sha256" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"uri" : "https://******:443/artifactory/api/storage/******/aa/aa/__init__.py"
}
"""
api_str = "/artifactory/api/storage/"
def __init__(self, url):
if url.endswith("/"):
url = url[:-1]
if self.api_str in url:
self.api_url = url
self.url = url.replace(self.api_str, "/artifactory/")
else:
self.url = url
self.api_url = url.replace("/artifactory/", self.api_str)
r = requests.get(self.api_url)
if r.status_code == 200:
self.data = loads(r.content)
for k, v in self.data.items():
setattr(self, k, v)
self.path = self.path[1:]
self.return_full_path = True
def __getattribute__(self, key):
# called when use like PhraseArt.somekey
try:
return super(PhraseArt, self).__getattribute__(key)
except AttributeError:
raise
@property
def md5(self):
return self.checksums["md5"]
@property
def sha1(self):
return self.checksums["sha1"]
@property
def sub_file(self):
if self.return_full_path == True:
replace_str = self.url + "/"
else:
replace_str = ""
return [one["uri"].replace("/", replace_str) for one in self.children if one["folder"] == False]
@property
def sub_dir(self):
if self.return_full_path == True:
replace_str = self.url + "/"
else:
replace_str = ""
return [one["uri"].replace("/", replace_str) for one in self.children if one["folder"] == True]
@property
def sub_api_guys(self):
if self.return_full_path == True:
replace_str = self.api_url + "/"
else:
replace_str = ""
return [one["uri"].replace("/", replace_str) for one in self.children]
@property
def sub_guys(self):
if self.return_full_path == True:
replace_str = self.url + "/"
else:
replace_str = ""
return [one["uri"].replace("/", replace_str) for one in self.children]
class DownloadArt(PhraseArt):
"""docstring for DownloadArt"""
def __init__(self, url):
super(DownloadArt, self).__init__(url)
p = re.compile(r'(.*/artifactory/[^\/]*/)')
self.art_project = p.findall(self.url)[0]
self.art_dir = url.replace(self.art_project,"")
if "." in self.art_dir.split("/")[-1]:
self.art_dir = "/".join(self.art_dir.split("/")[:-1])
self.temp = list()
self.sub = list()
self.temp.append(self.uri)
self.walk_once()
def walk_once(self):
if self.temp:
one = self.temp.pop()
p = PhraseArt(one)
if not hasattr(p, "children"):
self.sub.append(p)
else:
self.temp += p.sub_api_guys
self.walk_once()
def download_artifactory_dir(self, dir_path=None):
pass
@property
def all_sub_files(self):
return [p.path for p in self.sub]
@property
def all_sub_urls(self):
return [p.url for p in self.sub]
class DlArtTool(DownloadArt):
"""Docstring for DownloadArtTool."""
def __init__(self, url):
super(DlArtTool, self).__init__(url)
def download_art(self, dir_path=None):
# """Url of art."""
if dir_path is not None:
map(lambda x: setattr(x, "path", x.path.replace(self.art_dir, dir_path, 1)), self.sub)
datas = [(one.url, one.path, one.md5) for one in self.sub]
with ThreadPoolExecutor(max_workers=6) as executor:
executor.map(download_file, *zip(*datas))
def download_artifactory(url, dir_path=""):
"""Url: artifactory url ."""
artdl = DlArtTool(url)
if len(artdl.sub) == 1:
_info("I guess this is a file...")
dir_path = osp.realpath(dir_path)
artdl.download_art(dir_path)
return osp.join(dir_path, url.split("/")[-1])
else:
_info("This is a art dir...")
artdl.download_art(dir_path)
return dir_path
def download_file(url, file_name=None, md5_value=None):
basename, url = format_url(url)
if file_name is None:
file_name = osp.join(os.getcwd(), basename)
file_path = osp.dirname(file_name)
if not osp.exists(file_path):
os.makedirs(file_path)
with open(file_name, "wb") as f:
r = requests.get(url, stream=True, proxies=no_proxies)
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
full_name = osp.realpath(file_name)
_info("download {0} to {1} finished".format(url, full_name))
if md5_value is None:
return full_name
if check_md5sum(full_name, md5_value):
return full_name
else:
raise SystemError("check_md5sum failed")
def check_md5sum(file_name, old_value=None):
assert not osp.isdir(file_name), "{} must not a dir".format(file_name)
if old_value is None:
return True
m = md5.new()
with open(file_name, "rb") as f:
for line in f:
m.update(line)
real_value = m.hexdigest()
if real_value == old_value:
_info("check_md5sum of {} success".format(file_name))
return True
else:
_error("check_md5sum of {} failed".format(file_name))
return False
if __name__ == '__main__':
download_artifactory(url,some_path)
功能简介:
自动递归遍历artifactory里面的文件夹,组成文件数据列表映射到download_file 功能函数里面。
download_file 根据是否传入校验值来觉得是否进行md5 校验。
Tips:
- Jfrog Artifactory 有个temp缓存的坑,jfrog下载文件的时候默认是缓存到/temp/目录下的,如果下载文件几个G,temp又很小的话,你懂得...。可以通过export TMPDIR="/large——dir" 来缓解尴尬。
- 上传的功能下次做...