java多线程下载小说网站

评价:
0
(0用户)

aaa

package xiaoshuo; import java.text.DecimalFormat; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import org.apache.xml.utils.URI; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; public class 小说多线程 { static class MyRunnable implements Runnable { private String href; private String title; private String filePath; private String fileName; //原子数 private static AtomicInteger num = new AtomicInteger(); public MyRunnable(String href, String title, String filePath, String fileName) { this.href = href; this.title = title; this.filePath = filePath; this.fileName = fileName; } @Override public void run() { try { //发送请求 String html = 小说Utils.getHttps(href); if (html != null && !"".equals(html)) { //解析内容 Document document = Jsoup.parse(html); Element element = document.getElementById("content"); List<Node> nodes = element.childNodes(); //提取文本 StringBuffer sb = new StringBuffer(); for (Node node : nodes) { String line = ""; if ("#text".equals(node.nodeName())) { line = node.outerHtml(); } else { List<Node> pNode = node.childNodes(); for (Node p : pNode) { line += p.outerHtml(); } } if (!"".equals(line.replace("&nbsp;", " ").replaceAll("\s", ""))) { if (!" &nbsp;&nbsp;&nbsp;&nbsp;天才一秒记住本站地址<br><br> ".equals(line)) { sb.append("\t").append(line.replace("&nbsp;", "")).append("\n\n"); } } } String threadName = Thread.currentThread().getName().replace("pool-1-", ""); System.out.println(String.format("%-10s", threadName) + "\t" + num.incrementAndGet() + "\t" + fileName.replace(title, "") + "\t" + title); //写出文件 小说Utils.writeTxt(filePath, fileName, title + "\n\n" + sb.toString()); //延时间隔 Thread.sleep(100); } } catch (Exception e) { e.printStackTrace(); } } } /** 合并文件 */ /*public static void main(String[] args) { String filePath = "C:\\Users\\Administrator\\Desktop\\星空彼岸"; 小说Utils.mergeFile(filePath); }*/ public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); String filePath = "C:\\Users\\Administrator\\Desktop\\星空彼岸"; String url = "http://www.xxx.com/book/25644/"; String html = 小说Utils.getHttps(url); Document document = Jsoup.parse(html); Elements chapter = document.getElementById("list").getElementsByTag("a"); URI uri = new URI(url); String domain = uri.getScheme() + "://" + uri.getHost(); 小说Utils.clearFile(filePath); // 创建线程池 ExecutorService executor = Executors.newFixedThreadPool(100); for (int i = 0; i < chapter.size(); i++) { String href = domain + chapter.get(i).attr("href"); String title = chapter.get(i).html(); //String fileName = arr[0] + "_" + title.replaceAll("[/\\:*?\"<>|]", " "); String fileName = String.format("%04d", i + 1) + "_" + title.replaceAll("[^0-9a-zA-Z\u4e00-\u9fa5]-.()()", " ").trim(); //创建线程 MyRunnable myRunnable = new MyRunnable(href, title, filePath, fileName); //执行线程 executor.execute(myRunnable); Thread.sleep(100); } //执行完毕后停止线程池 executor.shutdown(); //阻塞主线程 判断所有线程是否执行完毕 必须放在shutdown后 while (!executor.isTerminated()) { Thread.sleep(1000); } long endTime = System.currentTimeMillis(); System.out.println("OVER!\t耗时: " + new DecimalFormat(".00").format((endTime - startTime) / 1000d) + " 秒"); } }

 








 

本文为原创文章,转载请注明出处!

注册并通过认证的用户才可以进行评价!

2 thoughts on “java多线程下载小说网站”

  1. /** 执行Http请求 */
    public static String execute(CloseableHttpClient client, HttpGet get, Integer num) {
    CloseableHttpResponse response = null;
    String result = null;
    try {
    // 执行请求操作,并拿到结果
    response = client.execute(get);
    // 获取结果实体
    HttpEntity entity = response.getEntity();
    if (entity != null) {
    result = EntityUtils.toString(entity, StandardCharsets.UTF_8);
    //gzip格式解析
    //entity = new GzipDecompressingEntity(entity);
    //result = EntityUtils.toString(entity, Charset.forName(“gbk”));
    //字节解析
    //byte[] byteData = EntityUtils.toByteArray(entity);
    //result = new String(byteData, Charset.forName(“gbk”));
    //流解析
    //result = getInputStream(entity, “GBK”).toString();
    }
    } catch (Exception e) {
    try {
    Thread.sleep(3000);
    } catch (InterruptedException e1) {
    e1.printStackTrace();
    }
    if (num <= 3) { result = execute(client, get, ++num); } else { if (result == null || "".equals(result)) { System.err.println("请求失败! " + e.getMessage() + "\t" + get.getURI()); } } } finally { try { if (client != null) { client.close(); } if (response != null) { response.close(); } } catch (IOException e) { e.printStackTrace(); } } return result; }

  2. /** 发送请求 */
    public static String getHttps(String url) {
    try {
    SSLContext sslc = SSLContexts.custom().loadTrustMaterial(null, new TrustSelfSignedStrategy()).build();
    SSLConnectionSocketFactory factory = new SSLConnectionSocketFactory(sslc, NoopHostnameVerifier.INSTANCE);
    RequestConfig config = RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).setConnectionRequestTimeout(30000).build();
    HttpClientBuilder custom = HttpClients.custom();
    custom.setSSLSocketFactory(factory);
    custom.setDefaultRequestConfig(config);
    //创建HttpClient对象
    CloseableHttpClient client = custom.build();
    // 创建请求
    HttpGet get = new HttpGet(url);
    // 请求头设置
    get.setHeader(“Accept”, “text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8”);
    get.setHeader(“Accept-Language”, “zh-CN,zh;q=0.9”);
    //get.setHeader(“Accept-Encoding”, “gzip, deflate, br”);
    get.setHeader(“Content-Type”, “text/html; charset=GB2312”);
    get.setHeader(“User-Agent”, “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36”);
    get.setHeader(“Connection”, “keep-alive”);
    return execute(client, get, 1);
    } catch (Exception e) {
    e.printStackTrace();
    }
    return null;
    }

发表评论