Java课的小作业~ 通过url获取百度搜索结果页面HTML,并通过正则表达式取得其中的结果个数。
package web;
import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.applet.Applet;
import java.net.*;
import java.awt.*;
import java.awt.event.*;
public class GetUrl extends Applet implements ActionListener {
TextField keyword = new TextField(30); // 定义搜索的关键字
TextField show = new TextField(30);
Choice EngineName; // 使用的搜索引擎列表,使用下拉框
Button go = new Button("开始搜索");
public void init() {
setBackground(Color.white); // 设置背景为白色以便配合网页色彩
keyword = new TextField(20);
show = new TextField(20);
EngineName = new Choice();
EngineName.addItem("百度搜索");
// EngineName.addItem("搜狐");
// EngineName.addItem("有道搜索");
EngineName.select(0); // 设置缺省显示的项目为 "百度搜索"
add(keyword);
add(show);
add(EngineName);
add(go);
go.addActionListener(this);
}
public void actionPerformed(ActionEvent e) {
if (e.getSource() == go) {
try {
goSearch();
} catch (Exception e1) {
showStatus("搜索时发生异常:" + e1.toString());
}
}
}
public void goSearch() throws Exception {
Graphics g = null;
String str = keyword.getText();
if (str.equals("")) {
showStatus("请填写搜索的关键字!");
return;
}
String url = "http://www.baidu.com/s?wd=";
url +=URLEncoder.encode(str,"UTF-8");// 将关键字编码成URL格式
URL u = new URL(url);
showStatus("正在连接搜索引擎" + url);
String geturl = GetData(u);
Pattern pattern = Pattern.compile("([\u76f8|\u5173|\u7ed3|\u679c|\u7ea6]{5})(.+)(\u4e2a)");//正则式,似乎不使用Unicode编码也可以
Matcher matcher = pattern.matcher(geturl);
if (matcher.find( )) {
showStatus("获取完毕");
show.setText(" " + matcher.group());
} else {
showStatus("没有结果");
}
//getAppletContext().showDocument(new URL(url), "_blank");
}
// public void paint (Graphics g)
// {
// g.drawString ("Hello World", 25, 50);
// }
public static String GetData(URL url) throws Exception{
InputStream in = url.openStream();
byte[] data = readInputStream(in);
String htmldata = new String(data,"utf-8");//输入输出流重编码,与页面编码一致,否则乱码
return htmldata;
}
public static byte[] readInputStream(InputStream in) throws Exception{
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytes;
while((bytes = in.read(buffer))!= -1){
out.write(buffer,0,bytes);
}
in.close();
return out.toByteArray();
}
}
以上です