三方框架
1. JSOUP
2. okhttp
解析要素
1. 翻章:上一章
2. 翻章:下一章
3. 目录
4. 内容
表设计
/**
* 内容
*/
private String content;
@Field("content_title")
private String contentTitle;
@Field("chapter_url")
private String chapterUrl;
@Field("next_chapter_url")
private String nextChapterUrl;
@Field("last_chapter_url")
private String lastChapterUrl;
解析代码
public BookChapter content(String url) {
BookChapter bookChapter = new BookChapter();
BookSite bookSite = getSite(url);
try {
Document document = download(url);
Element titleElement = document.selectFirst(bookSite.getContentTitle());
if (titleElement != null) {
bookChapter.setName(titleElement.text());
}
Element chapterElement = document.selectFirst(bookSite.getChapterUrl());
if (chapterElement != null) {
bookChapter.setChapterUrl(chapterElement.absUrl("href"));
}
Element nextElement = document.selectFirst(bookSite.getNextChapterUrl());
if (nextElement != null) {
bookChapter.setNextChapterUrl(nextElement.absUrl("href"));
}
Element lastElement = document.selectFirst(bookSite.getLastChapterUrl());
if (lastElement != null) {
bookChapter.setLastChapterUrl(lastElement.absUrl("href"));
}
Element contentElement = document.selectFirst(bookSite.getContent());
if (contentElement != null) {
contentElement.select("a").remove();
contentElement.select("script").remove();
contentElement.select("style").remove();
bookChapter.setContent(contentElement.html());
}
} catch (IOException e) {
log.error(e.getMessage(), e);
}
return bookChapter;
}
结果
难点
技术没有什么难点,难在日常的维护上。