using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using Tool; using System.Net; using System.Text.RegularExpressions; using System.Threading; namespace Search { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /** * Queue, save will access the URL */ public class Queue { //Use list queue private LinkedList<string> queue = new LinkedList<string>(); //The queue public void enQueue(string t) { queue.AddLast(t); } //The queue public string deQueue() { string o = queue.Last.Value; queue.RemoveLast(); return o; } //To judge whether the queue is empty public bool isQueueEmpty() { return queue.Count > 0 ? false : true; } //Judge whether the queue contains T public bool contians(string t) { return queue.Contains(t); } public int getcount() { return queue.Count; } } public class LinkQueue { //Have access to the URL collection private static ISet<string> visitedUrl = new HashSet<string>(); //To visit the URL collection private static Queue unVisitedUrl = new Queue(); //URL queue public static Queue getUnVisitedUrl() { return unVisitedUrl; } //Add to queue URL visited in public static void addVisitedUrl(String url) { visitedUrl.Add(url); } //Remove access over URL public static void removeVisitedUrl(String url) { visitedUrl.Remove(url); } //No access to the URL queue public static Object unVisitedUrlDeQueue() { return unVisitedUrl.deQueue(); } // To ensure that each URL is visited only once public static void addUnvisitedUrl(String url) { if (url != null && !url.Trim().Equals("") && !visitedUrl.Contains(url) && !unVisitedUrl.contians(url)) unVisitedUrl.enQueue(url); } //Get the number URL has access to public static int getVisitedUrlNum() { return visitedUrl.Count; } //Whether the empty queue URL judgment not visit in the public static bool unVisitedUrlsEmpty() { return unVisitedUrl.isQueueEmpty(); } } string[] urlarr=new string[100]; private void button1_Click(object sender, EventArgs e) { zzHttp http = new zzHttp(); CookieContainer cookie = new CookieContainer(); string url = textBox1.Text!=""?textBox1.Text:"http://image.baidu.com/"; string content=http.SendDataByGET(url,"",ref cookie); string baseUri = Utility.GetBaseUri(url); string[] links = Parser.ExtractLinks(baseUri, content); foreach (string link in links) { richTextBox1.Text += link; richTextBox1.Text += "\n"; } Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // Search string MatchCollection matches = regImg.Matches(content); Queue que = new Queue(); foreach (Match match in matches) que.enQueue(match.Groups["imgUrl"].Value); int k; for (k = 0; k <que.getcount(); k++) { string picurl = que.deQueue(); richTextBox1.Text += picurl; richTextBox1.Text += "\n"; string[] s = picurl.Split('/'); string picname=s[s.Length - 1]; zzHttp.downfile(picurl, picname, @"d:\pic\"); } label1.Text = k+"Zhang"; } //Search void search() { int i = 0; LinkQueue.addUnvisitedUrl(" ;); while (!LinkQueue.unVisitedUrlsEmpty() && LinkQueue.getVisitedUrlNum() <= 1000) { //Team a queue head URL String visitUrl=(String)LinkQueue.unVisitedUrlDeQueue(); if(visitUrl==null) continue; zzHttp downLoader = new zzHttp(); CookieContainer cookie = new CookieContainer(); //Download Webpage string content=downLoader.SendDataByGET(visitUrl,"",ref cookie); //The URL in access in URL LinkQueue.addVisitedUrl(visitUrl); //Extract the downloaded Webpage in URL string baseUri = Utility.GetBaseUri(visitUrl); string[] links = Parser.ExtractLinks(baseUri, content); //New unvisited URL enqueue i++; Add2Message("Has access number:" + LinkQueue.getVisitedUrlNum() + ",count=" + LinkQueue.getUnVisitedUrl().getcount()); foreach (string link in links) { if (link.Contains("css") || link.Contains("js") || link.Contains("gif") || link.Contains("jpg") || link.Contains("png") || link.Contains("jpeg")) continue; LinkQueue.addUnvisitedUrl(link); AddMessage(link); } } } private void button2_Click(object sender, EventArgs e) { new Thread(search).Start(); } private delegate void InfoDelegate(string message); public void AddMessage(string message) { if (richTextBox1.InvokeRequired)//Cannot access it creates a delegate { InfoDelegate d = new InfoDelegate(AddMessage); richTextBox1.Invoke(d, new object[] { message}); } else { richTextBox1.AppendText(message + Environment.NewLine); richTextBox1.ScrollToCaret(); } } private delegate void Info2Delegate(string message); public void Add2Message(string message) { if (label2.InvokeRequired)//Cannot access it creates a delegate { Info2Delegate d = new Info2Delegate(Add2Message); label2.Invoke(d, new object[] { message }); } else { label2.Text = message; } } } }
C# kaynak kodları ile projelerinize yardımcı açık source code örnekleri bulun.Programlama ile uraşan coderlara yardımcı olur.
3 Ekim 2014 Cuma
c# crawler
Kaydol:
Kayıt Yorumları (Atom)
Hiç yorum yok:
Yorum Gönder