如何批量下载飞书文档

飞书的云文档中只支持单个文件下载,如果选择文件夹下载的话,只保留了文件目录结构,文件会被下载成url,格式如下:

[InternetShortcut]
URL=https://www.feishu.cn/${type}/${token}
Object=${token}

如果需要批量下载飞书云文档,则需要使用飞书的云文档API,https://open.feishu.cn/document/server-docs/docs/drive-v1/export_task/export-user-guide
其中的三个接口:

  1. 创建导出任务:https://open.feishu.cn/open-apis/drive/v1/export_tasks
  2. 查询导出任务结果:https://open.feishu.cn/open-apis/drive/v1/export_tasks/${ticket}?token=${token}
  3. 下载导出文件:https://open.feishu.cn/open-apis/drive/v1/export_tasks/file/${file_token}/download

想要合理的下载飞书文档库中的文档,这里需要再飞书后台建立一个机器人应用,使用应用中的user_access_token,如果有其他的办法能拿到也是可以的。

首先,下载文件夹
然后,遍历文件夹中的文件,提取url后缀的文件,读取其中的token和类型
URL=https://www.feishu.cn/${type}/${token}
然后,再逐步调用导出任务,查询结果,下载文件即可实现批量下载

下面是用nodejs实现的一段批量代码,其中只支持了docx和docs的下载类型
另外:不知道为什么,每次创建之后,5秒内就查询结果的话,则获取的数值是个空的,所以延迟了5秒

const axios = require('axios');
const process = require('process');
const path = require('path')
const fs = require('fs')

let Authorization = "u-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

async function httpRequest(config) {
    try {
        const response = await axios(config);
        return response.data;
    } catch (error) {
        console.error(`Error making request: ${error}`);
    }
}

function getExportURL(token, fileExt, type, sub_id) {
    var data = JSON.stringify({
        "file_extension": fileExt,
        "token": token,
        "type": type
    });

    var config = {
        method: 'POST',
        url: 'https://open.feishu.cn/open-apis/drive/v1/export_tasks',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': `Bearer ${Authorization}`
        },
        data: data
    };
    return config
}

function getTicketURL(ticket, token) {
    var config = {
        method: 'GET',
        url: `https://open.feishu.cn/open-apis/drive/v1/export_tasks/${ticket}?token=${token}`,
        headers: {
            'Authorization': `Bearer ${Authorization}`
        }
    };
    return config
}

function getDownloadURL(file_token) {
    var config = {
        method: 'GET',
        url: `https://open.feishu.cn/open-apis/drive/v1/export_tasks/file/${file_token}/download`,
        headers: {
            'Authorization': `Bearer ${Authorization}`
        },
        responseType: 'stream'
    };
    return config
}

async function downloadFile(file_token, localPath) {
    try {
        const response = await axios(getDownloadURL(file_token));

        const writer = fs.createWriteStream(localPath);

        response.data.pipe(writer);

        return new Promise((resolve, reject) => {
            writer.on('finish', resolve);
            writer.on('error', reject);
        });
    } catch (error) {
        console.error(`Error downloading file: ${error}`);
    }
}
function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
}

async function downloadDocx(token, type) {
    let ticket = ""
    let file_token = ""
    let file_name = ""
    let succes_str = ""
    let fail_str
    let response = await httpRequest(getExportURL(token, "docx", type))
    if (response && response.data && response.data.ticket) {
        ticket = response.data.ticket
        console.log(`请求ticket完成, ticket: ${ticket}`)
        await sleep(5000)
        response = await httpRequest(getTicketURL(ticket, token))
        if (response && response.data && response.data.result && response.data.result.file_token) {
            file_token = response.data.result.file_token
            console.log(`请求file_token完成, file_token: ${file_token}`)
            file_name = response.data.result.file_name.replace("/", "-") + "." + response.data.result.file_extension
            const currentDirectory = process.cwd();
            let localPath = path.join(currentDirectory, "download", file_name)
            response = await downloadFile(file_token, localPath)
            if (response && response.data && response.data.code == 0) {
                succes_str = `下载${file_name}完成,保存在${localPath}`
            } else {
                fail_str = `下载${file_name}失败`
            }
        }
    }
}

function readAllUrlFiles(dirPath) {
    let fileList = []
    let dirFiles = fs.readdirSync(dirPath, { recursive: true })
    dirFiles.forEach(v => {
        if (path.extname(v) == '.url') {
            fileList.push(v)
        }
    })
    return fileList
}

const dirPath = process.argv[2]
const files = readAllUrlFiles(dirPath)
const tokenRegex = /Object=(.+)/;
const typeRegex = /URL=https:\/\/www.feishu.cn\/(.+)\//;
async function downloadByUrl() {
    if (files.length > 0) {
        let docPath = files.shift()
        let fullPath = path.join(dirPath, docPath)
        let content = fs.readFileSync(fullPath, 'utf-8')
        const tokenMatch = content.match(tokenRegex);
        const typeMatch = content.match(typeRegex);
        if (tokenMatch) {
            if (typeMatch[1] == 'docx') {
                console.log(`分析 ${docPath} 完成,token:${tokenMatch[1]}, 剩余文件数:${files.length}`)
                await downloadDocx(tokenMatch[1], 'docx')
            } else if (typeMatch[1] == 'docs' || typeMatch[1] == 'doc') {
                console.log(`分析 ${docPath} 完成,token:${tokenMatch[1]}, 剩余文件数:${files.length}`)
                await downloadDocx(tokenMatch[1], 'doc')
            } else {
                console.log(`${docPath} 的类型是 ${typeMatch[1]}, 暂时不支持`)
            }
        }
        downloadByUrl()
    }
}
downloadByUrl()

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容