最近在把简书的内容导出, 以后简书只会用来转帖(也不发布). 他的好处是网上随便粘贴内容, 十分方便. 缺点至今没有处理, 就是他的内容管理.
- 没有分级目录: 目录多了会乱, 没有分级目录就很傻..
- 不便于内容搜索: 简书只适于发布, 然后你要搜索自己的内容? Google吧..
- 没有桌面客户端. 不知道为啥一直不弄个桌面客户端, 手机端倒是有了.
我还是继续用VSCode作笔记管理吧~
附带脚本:
download_jianshu_Image.py
, 指定输入目录(简书下载文件)以及输出目录(图片文件存放) 直接下载.
该脚本不能替换md文件中的链接. 不能更改图片后缀.
#! /usr/bin/env python
# -*- coding: UTF-8 -*-
# * Download images file from given website
# * Setup INPUT/OUTPUT firstly.
# * Modify from : https://cloud.tencent.com/developer/article/1599063
import re, requests, os
# 输入文件夹,即 简书导出的 .md 文件位置
INPUT_DIR = '/home/hom/Nutstore/VNote/Jianshu'
# 输出文件夹,即 文章里的图片的下载后的存放位置
OUTPUT_DIR = '/home/hom/Nutstore/VNote/Zimgs/Jianshu'
def ensure_dir_exist(dir_name):
if not os.path.exists(dir_name):
os.mkdir(dir_name)
def start_a_file(a_markdown_file, output_dir):
f = open(a_markdown_file)
line = f.readline()
i = 0
while 1:
line = f.readline()
if not line:
break
i = i + 1
ln = line[:-1]
process_line(ln, output_dir)
f.close()
return
def process_line(line, output_dir):
if line == '':
return
img_list = re.findall(r"\!\[[^\]]*\]\((.+?)\)", line, re.S)
for iu in img_list:
img_url = iu.split('?')[0]
print('[Process:]' + img_url)
if img_url.startswith(('http://', 'https://')) and 'jianshu' in img_url:
ensure_dir_exist(output_dir)
download_image_file(img_url, output_dir)
else:
print("[ 不合法的 image url]:" + img_url)
return
def download_image_file(url, output_dir):
print("# 准备下载...", end=' ')
r = requests.get(url)
img = r.content
print("准备写入...", end=' ')
new_name = output_dir + "/" + os.path.basename(url)
with open(new_name, 'wb') as f:
f.write(img)
print("写入完成!")
return
def walk_dir(dir_name):
for root, dirs, files in os.walk(dir_name):
relative_name = root.replace(INPUT_DIR, '')
print(' root={}'.format(relative_name))
ensure_dir_exist(OUTPUT_DIR + "/" + relative_name)
for f in files:
print(' file = {}'.format(f))
if f.split('.')[-1] != 'md':
continue
a_markdown_file = os.path.join(root, f)
# 生成图片存放的文件夹。
dir_name = (a_markdown_file.split('/')[-1]).split('.')[0]
this_file_output_dir = OUTPUT_DIR + '/' + relative_name + '/' + dir_name
print(' this_file_output_dir = {}'.format(this_file_output_dir))
# 处理文件
start_a_file(a_markdown_file, this_file_output_dir)
ensure_dir_exist(OUTPUT_DIR)
walk_dir(INPUT_DIR)
附带一个脚本, 可以将简书导出的笔记转为本地(包括下载图片, 转换本地地址). 支持单文件使用. 批处理请自行修改.
#! /usr/bin/env python
# -*- coding: UTF-8 -*-
#
# * 根据简书的md 文件, 下载图片文件, 用imghdr推断类型
import re, requests, os, sys, imghdr
OUTDIR = '/imgs/Jianshu' # 替换md 中对应链接为本地目录位置.
DIRECTOUTDIR = False # 设置False, 下载图片到md文件所在目录, True将下载到OUTDIR
def process_line(line, output_dir):
if line == '':
return
img_list = re.findall(r"\!\[[^\]]*\]\((.+?)\)", line, re.S)
outfnames = []
for iu in img_list:
img_url = iu.split('?')[0]
print('[Process:]' + img_url)
if img_url.startswith(('http://', 'https://')) and 'upload-images.jianshu' in img_url:
dfile = download_image_file(img_url, output_dir)
if dfile and not os.path.splitext(dfile)[1]:
ofile = dfile+'.'+imghdr.what(dfile)
os.rename(dfile, ofile)
outfnames.append((iu,ofile))
elif dfile:
outfnames.append((iu,dfile))
else:
print("[ 非简书 image url]:" + img_url)
return outfnames
def download_image_file(url, output_dir):
print("# 准备下载...", end=' ')
r = requests.get(url)
img = r.content
print("准备写入...", end=' ')
new_name = output_dir + "/" + os.path.basename(url)
with open(new_name, 'wb') as f:
f.write(img)
print("写入完成!")
return new_name
def main():
f = open(sys.argv[1])
lines = f.readlines()
f.close()
with open(sys.argv[1]+'.tmp', 'w') as f:
for line in lines:
if DIRECTOUTDIR:
out = process_line(line, DIRECTOUTDIR)
else:
if os.path.dirname(sys.argv[1]):
out = process_line(line, os.path.dirname(sys.argv[1]))
else:
out = process_line(line, '.')
# Replace markdown contents
if out:
for pair_img in out:
line = line.replace(pair_img[0], OUTDIR+os.sep+os.path.basename(pair_img[1]))
f.write(line)
else:
f.write(line)
os.rename(sys.argv[1]+'.tmp', sys.argv[1])
if __name__ == '__main__':
main()