第一步、在bean包中创建User类。
package com.neusoft.bean;
public class User {
private Integer id;
private String slug;
private String nickname;
private String updateTime;
private String latestTime;
private String joinTime;
private String headPic;
private Integer gender;
private Integer isContract;
private Integer followingNum;
private Integer followersNum;
private Integer articlesNum;
private Integer wordsNum;
private Integer beLikedNum;
public User()
{
}
public User(int id, String slug, String nickname, String updateTime, String latestTime, String joinTime, String headPic, int gender, int isContract, int followingNum, int followersNum, int articlesNum, int wordsNum, int beLikedNum) {
this.id = id;
this.slug = slug;
this.nickname = nickname;
this.updateTime = updateTime;
this.latestTime = latestTime;
this.joinTime = joinTime;
this.headPic = headPic;
this.gender = gender;
this.isContract = isContract;
this.followingNum = followingNum;
this.followersNum = followersNum;
this.articlesNum = articlesNum;
this.wordsNum = wordsNum;
this.beLikedNum = beLikedNum;
}
public User(String slug, String nickname, String updateTime, String latestTime, String joinTime, String headPic, Integer gender, Integer isContract, Integer followingNum, Integer followersNum, Integer articlesNum, Integer wordsNum, Integer beLikedNum) {
this.slug = slug;
this.nickname = nickname;
this.updateTime = updateTime;
this.latestTime = latestTime;
this.joinTime = joinTime;
this.headPic = headPic;
this.gender = gender;
this.isContract = isContract;
this.followingNum = followingNum;
this.followersNum = followersNum;
this.articlesNum = articlesNum;
this.wordsNum = wordsNum;
this.beLikedNum = beLikedNum;
}
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getSlug() {
return slug;
}
public void setSlug(String slug) {
this.slug = slug == null ? null : slug.trim();
}
public String getNickname() {
return nickname;
}
public void setNickname(String nickname) {
this.nickname = nickname == null ? null : nickname.trim();
}
public String getUpdateTime() {
return updateTime;
}
public void setUpdateTime(String updateTime) {
this.updateTime = updateTime == null ? null : updateTime.trim();
}
public String getLatestTime() {
return latestTime;
}
public void setLatestTime(String latestTime) {
this.latestTime = latestTime == null ? null : latestTime.trim();
}
public String getJoinTime() {
return joinTime;
}
public void setJoinTime(String joinTime) {
this.joinTime = joinTime == null ? null : joinTime.trim();
}
public String getHeadPic() {
return headPic;
}
public void setHeadPic(String headPic) {
this.headPic = headPic == null ? null : headPic.trim();
}
public Integer getGender() {
return gender;
}
public void setGender(Integer gender) {
this.gender = gender;
}
public Integer getIsContract() {
return isContract;
}
public void setIsContract(Integer isContract) {
this.isContract = isContract;
}
public Integer getFollowingNum() {
return followingNum;
}
public void setFollowingNum(Integer followingNum) {
this.followingNum = followingNum;
}
public Integer getFollowersNum() {
return followersNum;
}
public void setFollowersNum(Integer followersNum) {
this.followersNum = followersNum;
}
public Integer getArticlesNum() {
return articlesNum;
}
public void setArticlesNum(Integer articlesNum) {
this.articlesNum = articlesNum;
}
public Integer getWordsNum() {
return wordsNum;
}
public void setWordsNum(Integer wordsNum) {
this.wordsNum = wordsNum;
}
public Integer getBeLikedNum() {
return beLikedNum;
}
public void setBeLikedNum(Integer beLikedNum) {
this.beLikedNum = beLikedNum;
}
}
第二步、在mapper包中创建UserMapper类和UserMapper.xml配置文件。
①、UserMapper类
import com.neusoft.bean.User;
import java.util.List;
public interface UserMapper {
int deleteByPrimaryKey(Integer id);
int insert(User record);
int insertSelective(User record);
User selectByPrimaryKey(Integer id);
User selectBySlugName(User user);
List<User> selectAll();
int updateByPrimaryKeySelective(User record);
int updateByPrimaryKey(User record);
}
②、UserMapper.xml配置文件。
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.neusoft.mapper.UserMapper" >
<resultMap id="BaseResultMap" type="User" >
<id column="_id" property="id" jdbcType="INTEGER" />
<result column="slug" property="slug" jdbcType="VARCHAR" />
<result column="nickname" property="nickname" jdbcType="VARCHAR" />
<result column="update_time" property="updateTime" jdbcType="VARCHAR" />
<result column="latest_time" property="latestTime" jdbcType="VARCHAR" />
<result column="join_time" property="joinTime" jdbcType="VARCHAR" />
<result column="head_pic" property="headPic" jdbcType="VARCHAR" />
<result column="gender" property="gender" jdbcType="INTEGER" />
<result column="is_contract" property="isContract" jdbcType="INTEGER" />
<result column="following_num" property="followingNum" jdbcType="INTEGER" />
<result column="followers_num" property="followersNum" jdbcType="INTEGER" />
<result column="articles_num" property="articlesNum" jdbcType="INTEGER" />
<result column="words_num" property="wordsNum" jdbcType="INTEGER" />
<result column="be_liked_num" property="beLikedNum" jdbcType="INTEGER" />
</resultMap>
<sql id="Base_Column_List" >
_id, slug, nickname, update_time, latest_time, join_time, head_pic, gender, is_contract,
following_num, followers_num, articles_num, words_num, be_liked_num
</sql>
<select id="selectAll" resultMap="BaseResultMap">
select
<include refid="Base_Column_List" />
from user
</select>
<select id="selectBySlugName" resultMap="BaseResultMap" parameterType="User" >
select
<include refid="Base_Column_List" />
from user
where slug = #{slug}
</select>
<select id="selectByPrimaryKey" resultMap="BaseResultMap" parameterType="java.lang.Integer" >
select
<include refid="Base_Column_List" />
from user
where _id = #{id,jdbcType=INTEGER}
</select>
<delete id="deleteByPrimaryKey" parameterType="java.lang.Integer" >
delete from user
where _id = #{id,jdbcType=INTEGER}
</delete>
<insert id="insert" parameterType="User" >
insert into user (slug, nickname,
update_time, latest_time, join_time,
head_pic, gender, is_contract,
following_num, followers_num, articles_num,
words_num, be_liked_num)
values (#{slug,jdbcType=VARCHAR}, #{nickname,jdbcType=VARCHAR},
#{updateTime,jdbcType=VARCHAR}, #{latestTime,jdbcType=VARCHAR}, #{joinTime,jdbcType=VARCHAR},
#{headPic,jdbcType=VARCHAR}, #{gender,jdbcType=INTEGER}, #{isContract,jdbcType=INTEGER},
#{followingNum,jdbcType=INTEGER}, #{followersNum,jdbcType=INTEGER}, #{articlesNum,jdbcType=INTEGER},
#{wordsNum,jdbcType=INTEGER}, #{beLikedNum,jdbcType=INTEGER})
</insert>
<insert id="insertSelective" parameterType="User" >
insert into user
<trim prefix="(" suffix=")" suffixOverrides="," >
<if test="id != null" >
_id,
</if>
<if test="slug != null" >
slug,
</if>
<if test="nickname != null" >
nickname,
</if>
<if test="updateTime != null" >
update_time,
</if>
<if test="latestTime != null" >
latest_time,
</if>
<if test="joinTime != null" >
join_time,
</if>
<if test="headPic != null" >
head_pic,
</if>
<if test="gender != null" >
gender,
</if>
<if test="isContract != null" >
is_contract,
</if>
<if test="followingNum != null" >
following_num,
</if>
<if test="followersNum != null" >
followers_num,
</if>
<if test="articlesNum != null" >
articles_num,
</if>
<if test="wordsNum != null" >
words_num,
</if>
<if test="beLikedNum != null" >
be_liked_num,
</if>
</trim>
<trim prefix="values (" suffix=")" suffixOverrides="," >
<if test="id != null" >
#{id,jdbcType=INTEGER},
</if>
<if test="slug != null" >
#{slug,jdbcType=VARCHAR},
</if>
<if test="nickname != null" >
#{nickname,jdbcType=VARCHAR},
</if>
<if test="updateTime != null" >
#{updateTime,jdbcType=VARCHAR},
</if>
<if test="latestTime != null" >
#{latestTime,jdbcType=VARCHAR},
</if>
<if test="joinTime != null" >
#{joinTime,jdbcType=VARCHAR},
</if>
<if test="headPic != null" >
#{headPic,jdbcType=VARCHAR},
</if>
<if test="gender != null" >
#{gender,jdbcType=INTEGER},
</if>
<if test="isContract != null" >
#{isContract,jdbcType=INTEGER},
</if>
<if test="followingNum != null" >
#{followingNum,jdbcType=INTEGER},
</if>
<if test="followersNum != null" >
#{followersNum,jdbcType=INTEGER},
</if>
<if test="articlesNum != null" >
#{articlesNum,jdbcType=INTEGER},
</if>
<if test="wordsNum != null" >
#{wordsNum,jdbcType=INTEGER},
</if>
<if test="beLikedNum != null" >
#{beLikedNum,jdbcType=INTEGER},
</if>
</trim>
</insert>
<update id="updateByPrimaryKeySelective" parameterType="User" >
update user
<set >
<if test="slug != null" >
slug = #{slug,jdbcType=VARCHAR},
</if>
<if test="nickname != null" >
nickname = #{nickname,jdbcType=VARCHAR},
</if>
<if test="updateTime != null" >
update_time = #{updateTime,jdbcType=VARCHAR},
</if>
<if test="latestTime != null" >
latest_time = #{latestTime,jdbcType=VARCHAR},
</if>
<if test="joinTime != null" >
join_time = #{joinTime,jdbcType=VARCHAR},
</if>
<if test="headPic != null" >
head_pic = #{headPic,jdbcType=VARCHAR},
</if>
<if test="gender != null" >
gender = #{gender,jdbcType=INTEGER},
</if>
<if test="isContract != null" >
is_contract = #{isContract,jdbcType=INTEGER},
</if>
<if test="followingNum != null" >
following_num = #{followingNum,jdbcType=INTEGER},
</if>
<if test="followersNum != null" >
followers_num = #{followersNum,jdbcType=INTEGER},
</if>
<if test="articlesNum != null" >
articles_num = #{articlesNum,jdbcType=INTEGER},
</if>
<if test="wordsNum != null" >
words_num = #{wordsNum,jdbcType=INTEGER},
</if>
<if test="beLikedNum != null" >
be_liked_num = #{beLikedNum,jdbcType=INTEGER},
</if>
</set>
where _id = #{id,jdbcType=INTEGER}
</update>
<update id="updateByPrimaryKey" parameterType="User" >
update user
set slug = #{slug,jdbcType=VARCHAR},
nickname = #{nickname,jdbcType=VARCHAR},
update_time = #{updateTime,jdbcType=VARCHAR},
latest_time = #{latestTime,jdbcType=VARCHAR},
join_time = #{joinTime,jdbcType=VARCHAR},
head_pic = #{headPic,jdbcType=VARCHAR},
gender = #{gender,jdbcType=INTEGER},
is_contract = #{isContract,jdbcType=INTEGER},
following_num = #{followingNum,jdbcType=INTEGER},
followers_num = #{followersNum,jdbcType=INTEGER},
articles_num = #{articlesNum,jdbcType=INTEGER},
words_num = #{wordsNum,jdbcType=INTEGER},
be_liked_num = #{beLikedNum,jdbcType=INTEGER}
where _id = #{id,jdbcType=INTEGER}
</update>
</mapper>
第三步、更换jsp中的index.jsp里面的内容。
<%--
Created by IntelliJ IDEA.
User: ttc
Date: 2018/7/6
Time: 14:06
To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>简书爬虫</title>
<style>
html {
height: 100%;
overflow-y: scroll;
}
body {
margin: 0px;
padding: 0px;
height: 100%;
}
.a-none-decoration {
text-decoration: none;
}
#wrapper {
height: 100%;
min-width: 1000px;
}
#s-skin-container {
height: 100%;
width: 100%;
min-width: 1000px;
-webkit-background-size: cover; /*随浏览器大小改变背景图大小*/
background-size:cover;/*随浏览器大小改变背景图大小*/
background-color: rgb(255,255,255);
background-image: url("https://ss3.bdstatic.com/iPoZeXSm1A5BphGlnYG/skin/486.jpg") ;
background-repeat:no-repeat;
position: fixed;
left: 0;
top: 0;
z-index: -10;
}
#s-container {
width: 641px;
margin: 0 auto;
text-align: center;
padding-top: 80px;
}
#s-container #s_kw_wrap {
position: relative;
}
#s-container .s_ipt {
width: 480px;
padding: 10px 50px 10px 7px;
}
#s-container .s_btn {
width: 100px;
height: 38px;
cursor: pointer;
font-size: 16px;
}
#s-container .soutu-btn {
display: inline-block;
height: 16px;
width: 18px;
position: absolute; /*默认绝对定位是相对浏览器,只有当父元素设置为相对定位,那么绝对定位才能针对父元素*/
right: 10px;
top: 1px;
background-image: url(https://ss1.bdstatic.com/5eN1bjq8AAUYm2zgoY3K/r/www/cache/static/protocol/https/soutu/img/camera_new_5606e8f.png);
}
#s-container .soutu-btn:hover {
background-position: 0 -20px;
}
</style>
</head>
<body>
<div id="wrapper">
<div id="s-skin-container"></div>
<div id="s-container">
<img id="s_lg_img" src="${pageContext.request.contextPath}/static/images/jianshubg.png" width="225" height="168"><br/><br />
<form action="${pageContext.request.contextPath}/submit" method="post">
<input type="text" id="url" name="slug" class="s_ipt" maxlength="100" placeholder="将你的简书用户的ID贴在这里,然后提交">
<a href="javascript:;" id="quickdelete" title="清空" class="quickdelete" style="top: 0px; right: 0px;"></a>
<input type="submit" value="提交" class="s_btn">
</form>
</div>
</div>
</body>
</html>
第四步、在webapp包里创建static包,里面包含用到的图片和js等。
static包
提取码:1y4w
如下图:
第五步、在util包中创建一个Spider类:
package com.neusoft.util;
import com.neusoft.bean.User;
import com.neusoft.mapper.UserMapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
@Component
public class Spider {
@Autowired
UserMapper userMapper;
public void getUserInfo(String slug) throws IOException {
Document document = Jsoup.connect("https://jianshu.com/u/"+slug).get();
Elements elements = document.select(".main-top");
Elements img = elements.select("a img");
String nickname = elements.select("a.name").text();
String headPic = img.attr("abs:src");
System.out.println("img的地址:"+headPic);
System.out.println("作者的昵称是:"+nickname);
Elements select = elements.select("div.title i");
Integer gender =null;
if(select.size()==0){
gender = 0;
System.out.println("不知道性别!");
}else {
String[] split = select.attr("class").split("-");
String sex = split[1];
if(sex.equals("man")){
gender=1;
System.out.println("作者是男的");
}else if(sex.equals("woman")){
gender = 2;
System.out.println("作者是女的");
}
}
Integer followingNum = null;
Integer followersNum = null;
Integer wordsNum =null;
Integer beLikedNum = null;
Integer articlesNum =null;
Elements p_List = elements.select("div.meta-block p");
if(p_List.size()==5){
followingNum = Integer.parseInt(p_List.get(0).text());
followersNum = Integer.parseInt(p_List.get(1).text());
articlesNum = Integer.parseInt(p_List.get(2).text());
wordsNum = Integer.parseInt(p_List.get(3).text());
beLikedNum = Integer.parseInt(p_List.get(4).text());
System.out.println("关注数:"+p_List.get(0).text());
System.out.println("粉丝数:"+p_List.get(1).text());
System.out.println("文章数:"+p_List.get(2).text());
System.out.println("字数:"+p_List.get(3).text());
System.out.println("收获的喜欢:"+p_List.get(4).text());
}else if(p_List.size()==6){
followingNum = Integer.parseInt(p_List.get(0).text());
followersNum = Integer.parseInt(p_List.get(1).text());
articlesNum = Integer.parseInt(p_List.get(2).text());
wordsNum = Integer.parseInt(p_List.get(3).text());
beLikedNum = Integer.parseInt(p_List.get(4).text());
System.out.println("关注数:"+p_List.get(0).text());
System.out.println("粉丝数:"+p_List.get(1).text());
System.out.println("文章数:"+p_List.get(2).text());
System.out.println("字数:"+p_List.get(3).text());
System.out.println("收获的喜欢:"+p_List.get(4).text());
System.out.println("简书钻:"+p_List.get(5).text());
}
Date date = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("YYYY-MM-dd HH:mm:ss");
String strdate = dateFormat.format(date);
User user = new User();
user.setLatestTime(strdate);
user.setNickname(nickname);
user.setHeadPic(headPic);
user.setGender(gender);
user.setFollowersNum(followersNum);
user.setFollowingNum(followingNum);
user.setWordsNum(wordsNum);
user.setSlug(slug);
user.setBeLikedNum(beLikedNum);
user.setArticlesNum(articlesNum);
userMapper.insert(user);
}
}
然后执行tomcat,输入需要爬取的用户id。就会将取到的数据存到数据库中。
如图: