import requests
def progress_bar(some_iter):
try:
from tqdm import tqdm
return tqdm(some_iter)
except ModuleNotFoundError:
return some_iter
def download_file_from_google_drive(id, destination):
print("Trying to fetch {}".format(destination))
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in progress_bar(response.iter_content(CHUNK_SIZE)):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
if __name__ == "__main__":
import sys
if len(sys.argv) is not 3:
print("Usage: python download.py drive_file_id destination_file_path")
else:
# TAKE ID FROM SHAREABLE LINK
file_id = sys.argv[1]
# DESTINATION FILE ON YOUR DISK
destination = sys.argv[2]
download_file_from_google_drive(file_id, destination)
# Yelp Reviews dataset
mkdir -p $HERE/yelp
if [ ! -f $HERE/yelp/raw_train.csv ]; then
python download.py 1xeUnqkhuzGGzZKThzPeXe2Vf6Uu_g_xM $HERE/yelp/raw_train.csv # 12536
fi
if [ ! -f $HERE/yelp/raw_test.csv ]; then
python download.py 1G42LXv72DrhK4QKJoFhabVL4IU6v2ZvB $HERE/yelp/raw_test.csv # 4
fi
if [ ! -f $HERE/yelp/reviews_with_splits_lite.csv ]; then
python download.py 1Lmv4rsJiCWVs1nzs4ywA9YI-ADsTf6WB $HERE/yelp/reviews_with_splits_lite.csv # 1217
fi
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。