跳到主要内容

Acwing做题数爬虫

依赖:

    <dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.9</version>
</dependency>
</dependencies>

代码:

/**
* Description:Acwing活动做题数爬虫,爬取5页的活动
* Date:2021-12-08 9:22 下午
*/
public class HttpClientDownPage {
public static String sendGet(String url) {
//1.生成httpclient,相当于该打开一个浏览器
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置请求和传输超时时间
RequestConfig requestConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).setSocketTimeout(2000).setConnectTimeout(2000).build();
CloseableHttpResponse response = null;
StringBuilder html = new StringBuilder();
//2.创建get请求,相当于在浏览器地址栏输入 网址
try {
for (int i = 1; i <= 5; i++) {
HttpGet request = new HttpGet(url + "/" + i);
request.setConfig(requestConfig);
//3.执行get请求,相当于在输入地址栏后敲回车键
response = httpClient.execute(request);
//4.判断响应状态为200,进行处理
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
//5.获取响应内容
HttpEntity httpEntity = response.getEntity();
html.append(EntityUtils.toString(httpEntity, "GBK"));
} else {
//如果返回状态不是200,比如404(页面不存在)等,根据情况做处理,这里略
System.out.println("返回状态不是200");
System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
}
}

} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
//6.关闭
HttpClientUtils.closeQuietly(response);
HttpClientUtils.closeQuietly(httpClient);
}
return html.toString();
}

private static Integer paraseList(Document document) {
//根据网页标签解析源码
Elements elements = document.select("span[style=color: #6a737c;]");
Integer result = 0;
for (Element element : elements) {
String[] msg = element.text().split(" ");
result += Integer.valueOf(msg[1]);
}
return result;
}

public static Integer getAcwing(String id) {
String html = sendGet("https://www.acwing.com/user/myspace/activity/" + id);
return paraseList(Jsoup.parse(html));
}

public static void main(String[] args) {
System.out.println("做题数:" + getAcwing("129659"));
}
}