java多线程下载小说网站

aaa

package xiaoshuo; import java.text.DecimalFormat; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import org.apache.xml.utils.URI; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; public class 小说多线程 { static class MyRunnable implements Runnable { private String href; private String title; private String filePath; private String fileName; //原子数 private static AtomicInteger num = new AtomicInteger(); public MyRunnable(String href, String title, String filePath, String fileName) { this.href = href; this.title = title; this.filePath = filePath; this.fileName = fileName; } @Override public void run() { try { //发送请求 String html = 小说Utils.getHttps(href); if (html != null && !"".equals(html)) { //解析内容 Document document = Jsoup.parse(html); Element element = document.getElementById("content"); List<Node> nodes = element.childNodes(); //提取文本 StringBuffer sb = new StringBuffer(); for (Node node : nodes) { String line = ""; if ("#text".equals(node.nodeName())) { line = node.outerHtml(); } else { List<Node> pNode = node.childNodes(); for (Node p : pNode) { line += p.outerHtml(); } } if (!"".equals(line.replace("&nbsp;", " ").replaceAll("\s", ""))) { if (!" &nbsp;&nbsp;&nbsp;&nbsp;天才一秒记住本站地址<br><br> ".equals(line)) { sb.append("\t").append(line.replace("&nbsp;", "")).append("\n\n"); } } } String threadName = Thread.currentThread().getName().replace("pool-1-", ""); System.out.println(String.format("%-10s", threadName) + "\t" + num.incrementAndGet() + "\t" + fileName.replace(title, "") + "\t" + title); //写出文件 小说Utils.writeTxt(filePath, fileName, title + "\n\n" + sb.toString()); //延时间隔 Thread.sleep(100); } } catch (Exception e) { e.printStackTrace(); } } } /** 合并文件 */ /*public static void main(String[] args) { String filePath = "C:\\Users\\Administrator\\Desktop\\星空彼岸"; 小说Utils.mergeFile(filePath); }*/ public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); String filePath = "C:\\Users\\Administrator\\Desktop\\星空彼岸"; String url = "http://www.xxx.com/book/25644/"; String html = 小说Utils.getHttps(url); Document document = Jsoup.parse(html); Elements chapter = document.getElementById("list").getElementsByTag("a"); URI uri = new URI(url); String domain = uri.getScheme() + "://" + uri.getHost(); 小说Utils.clearFile(filePath); // 创建线程池 ExecutorService executor = Executors.newFixedThreadPool(100); for (int i = 0; i < chapter.size(); i++) { String href = domain + chapter.get(i).attr("href"); String title = chapter.get(i).html(); //String fileName = arr[0] + "_" + title.replaceAll("[/\\:*?\"<>|]", " "); String fileName = String.format("%04d", i + 1) + "_" + title.replaceAll("[^0-9a-zA-Z\u4e00-\u9fa5]-.()()", " ").trim(); //创建线程 MyRunnable myRunnable = new MyRunnable(href, title, filePath, fileName); //执行线程 executor.execute(myRunnable); Thread.sleep(100); } //执行完毕后停止线程池 executor.shutdown(); //阻塞主线程 判断所有线程是否执行完毕 必须放在shutdown后 while (!executor.isTerminated()) { Thread.sleep(1000); } long endTime = System.currentTimeMillis(); System.out.println("OVER!\t耗时: " + new DecimalFormat(".00").format((endTime - startTime) / 1000d) + " 秒"); } }

 








 

本文为原创文章,转载请注明出处!