C# Profesyonel 2022: c# linkleri alma

17 Aralık 2015 Perşembe

c# linkleri alma

using System.Diagnostics;
using System.Net;

class Program
{
    static void Main()
    {
 // Scrape links from wikipedia.org

 // 1.
 // URL: http://en.wikipedia.org/wiki/Main_Page
 WebClient w = new WebClient();
 string s = w.DownloadString("http://en.wikipedia.org/wiki/Main_Page");

 // 2.
 foreach (LinkItem i in LinkFinder.Find(s))
 {
     Debug.WriteLine(i);
 }
    }
}

Example 2. Here I show a simple class that receives the HTML string and then extracts all the links and their text into structs. It is fairly fast, but I offer some optimization tips further down. It would be better to use a class.Class

C# program that scrapes with Regex

using System.Collections.Generic;
using System.Text.RegularExpressions;

public struct LinkItem
{
    public string Href;
    public string Text;

    public override string ToString()
    {
 return Href + "\n\t" + Text;
    }
}

static class LinkFinder
{
    public static List<LinkItem> Find(string file)
    {
 List<LinkItem> list = new List<LinkItem>();

 // 1.
 // Find all matches in file.
 MatchCollection m1 = Regex.Matches(file, @"(<a.*?>.*?</a>)",
     RegexOptions.Singleline);

 // 2.
 // Loop over each match.
 foreach (Match m in m1)
 {
     string value = m.Groups[1].Value;
     LinkItem i = new LinkItem();

     // 3.
     // Get href attribute.
     Match m2 = Regex.Match(value, @"href=\""(.*?)\""",
  RegexOptions.Singleline);
     if (m2.Success)
     {
  i.Href = m2.Groups[1].Value;
     }

     // 4.
     // Remove inner tags from text.
     string t = Regex.Replace(value, @"\s*<.*?>\s*", "",
  RegexOptions.Singleline);
     i.Text = t;

     list.Add(i);
 }
 return list;
    }
}