目标获取博客园的标题与口号:https://www.cnblogs.com/
HTML:代码:
博客园 - 开发者的网上家园
pom.xml
org.apache.httpcomponents httpclient 4.5.2 org.jsoup jsoup 1.10.2
代码:
import org.apache.http.HttpEntity;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class HttpClientjsouDemo01 { public static void main(String[] args) throws Exception { //创建实例化对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //创建实例化对象httpget HttpGet httpGet = new HttpGet("http://www.cnblogs.com/"); //执行get请求 CloseableHttpResponse response = httpClient.execute(httpGet); //返回实体整个网页内容 HttpEntity entity = response.getEntity(); String toString = EntityUtils.toString(entity,"utf-8"); response.close();//关闭请求流释放系统资源 //解析网页,得到文档对象 Document doc = Jsoup.parse(toString); //获取tag 是title的所有dom元素 Elements elements = doc.getElementsByTag("title"); //获取第一个元素 Element element = elements.get(0); //返回元素的文本 String text = element.text(); System.out.println("网页标题是:"+elements); Element element2=doc.getElementById("site_nav_top"); // 获取id=site_nav_top的DOM元素 String navTop=element2.text(); // 返回元素的文本 System.out.println("口号:"+navTop); }}
运行结果:
网页标题是:博客园 - 开发者的网上家园 口号:代码改变世界Process finished with exit code 0