在JavaScript中,可以通过读取文件的二进制数据并检查文件的 “魔术数字”(magic number) 来确定文件类型。魔术数字是指文件开始部分的一组特定的字节序列,不同的文件类型会有不同的魔术数字。
JavaScript中实现
以下是一个简单的示例,展示如何使用FileReader
API来读取文件的二进制数据,并基于前几个字节来判断文件类型:
function detectFileType() {
const input = document.getElementById('fileInput');
const file = input.files[0];
if (!file) {
console.log('No file selected.');
return;
}
const reader = new FileReader();
reader.onloadend = function() {
const arrayBuffer = reader.result;
const byteArray = new Uint8Array(arrayBuffer);
// 检查文件类型
const fileType = getFileType(byteArray);
document.getElementById('result').innerText = `Detected file type: ${fileType}`;
};
reader.readAsArrayBuffer(file);
}
function getFileType(byteArray) {
// 文件类型及其对应的魔术数字,下面这些魔术数字不保证准确
const fileTypes = [
{ signature: [0xFF, 0xD8, 0xFF], type: 'JPEG' },
{ signature: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], type: 'PNG' },
{ signature: [0x47, 0x49, 0x46, 0x38], type: 'GIF' },
{ signature: [0x52, 0x49, 0x46, 0x46], type: 'WEBP' },
{ signature: [0x42, 0x4D], type: 'BMP' },
];
for (const type of fileTypes) {
let match = true;
for (let i = 0; i < type.signature.length; i++) {
if (byteArray[i] !== type.signature[i]) {
match = false;
break;
}
}
if (match) {
return type.type;
}
}
return 'Unknown';
}
微信小程序中实现
那么如何在微信小程序里面如何实现类似的功能。
- 期初以为小程序里面是不能实现的,因为不知道
Uint8Array
在小程序里面有没有,经过验证有这个函数。 - 在小程序中使用
fs.readFileSync(filePath)
可以获得文件的ArrayBuffer
,这样就能完美解决问题-
filePath
必须是小程序本地地址才行,不能使用网络地址 const fs = uni.getFileSystemManager()
-
完整代码
// pages/index/index.js
Page({
data: {
fileType: ''
},
chooseFile: function() {
wx.chooseMessageFile({
count: 1,
success: (res) => {
this.readFile(res.tempFiles[0].path);
}
});
},
readFile: function(filePath) {
const fs = wx.getFileSystemManager();
fs.readFile({
filePath: filePath,
encoding: 'binary', // 注意这里使用 'binary' 模式读取文件
success: (res) => {
const buffer = res.data;
const fileType = this.detectFileType(buffer);
this.setData({
fileType: fileType
});
},
fail: (err) => {
console.error('Failed to read file:', err);
}
});
},
detectFileType: function(buffer) {
// 文件类型及其对应的魔术数字,下面这些魔术数字不保证准确
const fileTypes = [
{ signature: [0xFF, 0xD8, 0xFF], type: 'JPEG' },
{ signature: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], type: 'PNG' },
{ signature: [0x47, 0x49, 0x46, 0x38], type: 'GIF' },
{ signature: [0x52, 0x49, 0x46, 0x46], type: 'WEBP' },
{ signature: [0x42, 0x4D], type: 'BMP' },
];
const header = new Uint8Array(buffer.slice(0, 8)); // 取出前8个字节作为头部
for (const type of fileTypes) {
let match = true;
for (let i = 0; i < type.signature.length; i++) {
if (header[i] !== type.signature[i]) {
match = false;
break;
}
}
if (match) {
return type.type;
}
}
return 'Unknown';
}
});
注意
取出前12个字节作为头部
对于MP4
格式的文件,其魔术数字(signature)通常以 ftyp
原子开始,后面跟着 isom
字符串。
这里的 ftyp
是一个原子(box),它定义了文件的兼容性品牌和版本。isom
表示该文件遵循 ISO 基础媒体文件格式(ISO Base Media File Format),这是 MP4 格式的标准基础。
const header = new Uint8Array(buffer.slice(0, 12));
相对准确一点的魔术数字
下面是相对全面和准确的魔术数字,及相关代码
const fs = uni.getFileSystemManager()
const logger = log()
const imageFileTypes = [
{ signature: [0xFF, 0xD8, 0xFF], type: 'JPEG' },
{ signature: [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], type: 'PNG' },
{ signature: [0x47, 0x49, 0x46, 0x38], type: 'GIF' },
{ signature: [0x52, 0x49, 0x46, 0x46], type: 'WEBP' },
{ signature: [0x42, 0x4D], type: 'BMP' },
{ signature: [0x50, 0x33, 0x20, 0x23], type: 'PPM' }, // Portable Pixmap Format
{ signature: [0x50, 0x62, 0x20, 0x23], type: 'PGM' }, // Portable Graymap Format
{ signature: [0x50, 0x70, 0x20, 0x23], type: 'PPM' }, // Portable PixMap Format
{ signature: [0x50, 0x6D, 0x20, 0x23], type: 'PGM' }, // Portable GrayMap Format
{ signature: [0x50, 0x70, 0x20, 0x23], type: 'PBM' }, // Portable BitMap Format
{ signature: [0x50, 0x6D, 0x20, 0x23], type: 'PBM' }, // Portable BitMap Format
{ signature: [0x00, 0x00, 0x01, 0x00], type: 'ICO' },
]
const videoFileTypes = [
{ signature: [0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D], type: 'MP4' },
{ signature: [0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D], type: 'MP4' },
{ signature: [0x00, 0x00, 0x00, 0x1C, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D], type: 'MP4' },
{ signature: [0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32], type: 'MP4' },
{ signature: [0x00, 0x00, 0x00, 0x1C, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32], type: 'MP4' },
{ signature: [0x52, 0x49, 0x46, 0x46], type: 'AVI' },
{ signature: [0x00, 0x00, 0x01, 0xBA], type: 'MPEG-1' },
{ signature: [0x00, 0x00, 0x01, 0xB3], type: 'MPEG-2' },
{ signature: [0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70], type: 'MOV' },
{ signature: [0x46, 0x4C, 0x56, 0x01], type: 'FLV' },
{ signature: [0x1A, 0x45, 0xDF, 0xA3], type: 'MKV/WebM' },
{ signature: [0x30, 0x26, 0xB2, 0x75], type: 'WMV' },
{ signature: [0x4F, 0x67, 0x67, 0x53], type: 'OGG' },
{ signature: [0x47], type: 'TS' },
{ signature: [0x00, 0x00, 0x00, 0x14, 0x66, 0x74, 0x79, 0x70], type: '3GP' },
{ signature: [0x2E, 0x52, 0x4D, 0x46], type: 'RMVB' },
]
const oherFileTypes = [
{ signature: [0x50, 0x4B, 0x03, 0x04], type: 'ZIP' },
{ signature: [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00], type: 'RAR' },
{ signature: [0x37, 0x7A, 0xBC, 0xAF], type: '7-Zip' },
{ signature: [0x25, 0x50, 0x44, 0x46], type: 'PDF' },
{ signature: [0x50, 0x4B, 0x03, 0x04, 0x31, 0x57, 0x52, 0x64], type: 'DOCX' },
{ signature: [0x50, 0x4B, 0x03, 0x04, 0x58, 0x53, 0x06, 0x00], type: 'XLSX' },
{ signature: [0x50, 0x4B, 0x03, 0x04, 0x50, 0x50, 0x54, 0x06], type: 'PPTX' },
]
const imageTypes = imageFileTypes.map(item => item.type)
const videoTypes = videoFileTypes.map(item => item.type)
const oherTypes = oherFileTypes.map(item => item.type)
一些简单用法
// 获取对应类型
const getType = (header: Uint8Array) => {
const types = [...imageFileTypes, ...videoFileTypes, ...oherFileTypes]
for (const type of types) {
let match = true;
for (let i = 0; i < type.signature.length; i++) {
if (header[i] !== type.signature[i]) {
match = false;
break;
}
}
if (match) {
return type.type;
}
}
// 获取权限并建议格式是否正确
const detectFileType = async (tempFilePath: string, judgeType: 'image' | 'video'): Promise<string> => {
const arraybuffer = fs.readFileSync(tempFilePath) as ArrayBuffer
if (Object.prototype.toString.call(arraybuffer) !== '[object ArrayBuffer]') return 'Unknown'
const header = new Uint8Array(arraybuffer.slice(0, 12)); // 取出前12个字节作为头部
console.log(`文件头部`, header.toString(), '16进制', [...header].map(item => item.toString(16)).toString())
const type = getType(header)
switch(judgeType) {
case 'image':
if (!imageTypes.includes(type)) {
uni.showToast({
icon: 'none',
title: `~~当前文件真实类型可能是${type}~~,请确认是否为图片文件~~`
})
return new Promise((resolve) => {
setTimeout(() =>{
resolve(type)
}, 2500)
})
}
break
case 'video':
if (!videoTypes.includes(type)) {
uni.showToast({
icon: 'none',
title: `~~当前文件真实类型可能是${type}~~,请确认是否为视频文件~~`
})
return new Promise((resolve) => {
setTimeout(() =>{
resolve(type)
}, 2500)
})
}
break
default:
}
return type
}
return 'Unknown'
}
export default detectFileType