import java.io.IOException;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Document;
5 import org.jsoup.nodes.Element;
6 import org.jsoup.select.Elements;
7
8
9 public class WyCrawler {
10 public static void main(String[] args) {
11 try {
12 Document document = Jsoup.connect("http://某网页").timeout(3000).get();
13 String selector = "li>div[class=titleBar clearfix]>h3>a";
14 Elements elements = document.select(selector);
15 for(Element element:elements){
16 // System.out.println(element.text());
17 String url = element.absUrl("href");
18 Document document2 = Jsoup.connect(url).get();
19 Elements elements2 = document2.select("#endText");
20 for(Element element2:elements2){
21 System.out.println(element2.text());
22 }
23 }
24 } catch (IOException e) {
25 e.printStackTrace();
26 }
27 }
28 }
上面是如何爬取超链接里的内容,下面的比较简单
import java.io.IOException;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Document;
5 import org.jsoup.nodes.Element;
6 import org.jsoup.select.Elements;
7
8
9 public class Test {
10 public static void main(String[] args) {
11 try {
12 Document document = Jsoup.connect("http://www.某网页.com/").get();
13 //获取内容
14 // String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>div[class=item-content]";
15 // Elements elements = document.select(selector);
16 // for(Element element:elements){
17 // System.out.println(element.text());
18 // }
19
20 //获取标题
21 // String selector2 = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>h2[class=item-title]";
22 // Elements elements = document.select(selector2);
23 // for(Element element:elements){
24 // System.out.println(element.text());
25 // }
26
27 //综合写法,标题内容一起获取
28 String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]";
29 Elements elements = document.select(selector);
30 for(Element element:elements){
31 Elements titles = element.select("div[class=item-title]");
32 Elements content = element.select("h2[class=item-content]");
33 System.out.println(titles.text()+"\n"+content.text());
34 }
35
36
37
38 } catch (IOException e) {
39 e.printStackTrace();
40 }
41 }
42 }