1.新建项目
①npm init -y 生成package.json
②tsc --init 生成 tsconfig.json
③安装tsc -node npm i tsc-node -D(安装在开发环境)
④package.json 配置scripts,安装typescript (npm i typescript -D)
爬取网页内容与数据
①
/*
* @Author: itw_liuchao04 itw_liuchao04@tkmail.com
* @Date: 2022-10-21 15:07:19
* @LastEditors: itw_liuchao04 itw_liuchao04@tkmail.com
* @LastEditTime: 2022-10-22 15:13:06
* @FilePath: \TypeScript\src\crowller.ts
* @Description:
*
* Copyright (c) 2022 by itw_liuchao04 itw_liuchao04@tkmail.com, All Rights Reserved.
*/
import fs from "fs";
import path from "path";
//ts ->.d.ts 翻译文件 @type/superagent -->js
import superagent from "superagent";
import DellAnalyzer from "./analizer";
export interface Analyzer {
anylaze: (html: string, filePath: string) => string;
}
class Crowller {
private filePath = path.resolve(__dirname, "../data/course.json");
private async getRawHtml() {
let result = await superagent.get(this.url);
return result.text;
}
private writeFile(fileContent: string) {
fs.writeFileSync(this.filePath, fileContent);
}
private async initSpliderProcess() {
const html = await this.getRawHtml();
const file = this.analyzer.anylaze(html, this.filePath);
this.writeFile(file);
}
constructor(private url: string, private analyzer: Analyzer) {
this.initSpliderProcess();
}
}
const secret = "secretKey";
const url = `http://www.dell-lee.com/typescript/demo.html?secret=${secret}`;
//获取分析实例
// const analyzer = new DellAnalyzer();
//获取唯一实例
const analyzer = DellAnalyzer.getInstance();
//创建爬虫实例
let crowller = new Crowller(url, analyzer);
②
//创建课程约束
interface Course {
title: string;
count: number;
}
//创建整合课程数据的约束
interface CourseContent {
time: number;
data: Course[];
}
interface CourseList {
[propsName: number]: Course[];
}
import fs from "fs";
//获取各区块内容
import cheerio from "cheerio";
import { Analyzer } from "./crowller";
//用来处理数据
export default class DellAnalyzer implements Analyzer {
private static instance: DellAnalyzer;
private constructor() {}
static getInstance() {
if (!DellAnalyzer.instance) {
DellAnalyzer.instance = new DellAnalyzer();
}
return DellAnalyzer.instance;
}
private getCourses(html: string) {
let $ = cheerio.load(html);
let courses = $(".course-item");
console.log(courses.length);
let courseList: Course[] = [];
courses.map((index, el) => {
let title = $(el).find(".course-desc").eq(0).text();
let count = parseInt(
$(el).find(".course-desc").eq(1).text().split(":")[1],
10
);
courseList.push({ title, count });
});
const result = {
time: new Date().getTime(),
data: courseList,
};
return result;
}
private generateJsonContent(filePath: string, courseList: CourseContent) {
//获取存储路径
let fileContent: CourseList = {};
if (fs.existsSync(filePath)) {
//如果已经写过
fileContent = JSON.parse(fs.readFileSync(filePath, "utf-8"));
}
fileContent[courseList.time] = courseList.data;
return fileContent;
}
anylaze(html: string, filePath: string) {
const courses = this.getCourses(html);
return JSON.stringify(this.generateJsonContent(filePath, courses));
}
}
通过tsc -w 命令,使得tsc监视文件变化从而实现实时编译
//package.json中
"scripts": {
"dev": "ts-node ./src/crowller.ts",
"build": "tsc -w"
},
nodemon :监听js变化,立即执行;
"scripts": {
"dev": "ts-node ./src/crowller.ts",
"build": "tsc -w",
"start": "nodemon node ./build/crowller.js"
},
//设置忽略
"nodemonConfig": {
"ignore": [
"data/*"
]
},
打开两个命令行,先执行build,另一个命令行start,即可同时监听ts文件变化并执行.
上面的执行太过麻烦,我们可以再对其进行优化
//dev环境下,帮我们同时运行 build,start
"scripts": {
"dev:build": "tsc -w",
"dev:start": "nodemon node ./build/crowller.js",
"dev": "concurrently npm:dev:*"
},