设计一个JAVA程序,下载由URL指定的网页的源代码,找出其中所有的超链接...
发布网友
发布时间:2022-04-24 07:07
我来回答
共4个回答
热心网友
时间:2022-05-13 06:59
import java.awt.BorderLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
public class HttpViewer extends JFrame {
private JTextField urlInput;
private JTextArea viewArea;
public static void main(String[] args) {
new HttpViewer();
}
public HttpViewer() {
this.setTitle("Http Viewer");
this.setSize(800, 600);
this.setResizable(false);
this.setDefaultCloseOperation(EXIT_ON_CLOSE);
initPanel();
initAction();
this.setVisible(true);
}
// 这个方法用来设置窗口布局
private void initPanel() {
JPanel northPanel = new JPanel();
JLabel urlInputLabel = new JLabel("URL:");
urlInput = new JTextField(60);
northPanel.add(urlInputLabel);
northPanel.add(urlInput);
this.add(northPanel, BorderLayout.NORTH);
JPanel centerPanel = new JPanel();
viewArea = new JTextArea(27, 60);
centerPanel.add(new JScrollPane(viewArea));
this.add(centerPanel);
}
// 这个方法用来设置事件
private void initAction() {
urlInput.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
String text = urlInput.getText();
if (text == null || text.length() == 0) {
viewArea.setText("您没有输入URL");
return;
}
try {
URL url = new URL(text);
String context = getContent(url);
if (context != null) {
searchFromText(context);
}
} catch (MalformedURLException e1) {
viewArea.setText("您输入的URL不合法:" + text);
}
}
});
}
private String getContent(URL url) {
StringBuffer builder = new StringBuffer();
int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
viewArea.setText("连接失败:" + url.toString());
return null;
}
if (responseCode >= 400) {
viewArea.setText("请求失败,错误码:" + responseCode);
return null;
}
InputStream is = con.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String str = null;
while ((str = br.readLine()) != null)
builder.append(str);
is.close();
} catch (IOException e) {
e.printStackTrace();
viewArea.setText("IOException: " + url.toString());
} finally {
con.disconnect();
}
return builder.toString();
}
private void searchFromText(String context) {
viewArea.setText("查找URL中:\n");
Pattern pattern = Pattern.compile("<a( [^>]+)*>(.*?)</a>");
Matcher matcher = pattern.matcher(context);
while (matcher.find()) {
for (String prop : matcher.group(1).split(" ")) {
int indexOf = prop.indexOf('=');
if (indexOf > 0) {
if (prop.substring(0, indexOf).equals("href")) {
String url2 = prop.substring(indexOf + 2, prop.length() - 1);
viewArea.append(url2 + "\n");
}
}
}
}
}
}
热心网友
时间:2022-05-13 08:17
// 下载JSoup然后几行代码就可以
Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").get();
Elements links = doc.select("a[href]");
for (Element link : links) {
String linkHref =link.attr("href"); // 得到链接
String linkText =link.text(); // 得到显示的文字
}追问都说小白了,不知道怎么弄
热心网友
时间:2022-05-13 09:52
你交作业吧?
这应该是基础的东西吧?一个抓取,加正则,一个数据库读取写入。两个都不是太难的东西。。追问是的,我不会,时间也急