Here are some examples with the general concepts.
Open HTML document from a URL:
HtmlWeb web = new HtmlWeb();
HtmlDocument html = web.Load(url);
Get all rows in the entire document:
var rows = html.DocumentNode.SelectNodes("//table//tr");
Get all cells from the rows (notice the dot at the beginning of the expression - it means it will only search in the containing node):
if (rows != null && rows.Count > 0)
{
foreach (var row in rows)
{
var cells = row.SelectNodes(".//td");
}
}
Get the inner text from a cell:
var title = cells[0].InnerText;
Get a single node:
var link = cells[1].SelectSingleNode(".//a");
Get an attribute from a node (in this case the link’s destination):
if (link != null)
{
var href = link.GetAttributeValue("href", string.Empty);
}
Get a div by ID:
var div = html.DocumentNode.SelectSingleNode("//div[@id='myid']");
Get a div with a specific class:
var div = html.DocumentNode.SelectNodes("//div[@class='myclass']");
Get a div which contains a class:
var div = html.DocumentNode.SelectNodes("//div[contains(@class, 'content')]");
Get a div which does not have a specific class:
var div = html.DocumentNode.SelectNodes("//div[not(@class='hidden')]");
Get all links from a specific div:
var links = html.DocumentNode.SelectNodes("//div[@id='main']//a");
Get the last link on a specific div:
var lastLink = html.DocumentNode.SelectSingleNode("//div[@id='main']//a[last()]");
Get the second table in the document:
var table = html.DocumentNode.SelectSingleNode("//table[2]");
Get a table with a specific class and a specific text in the header:
var main = html.DocumentNode.SelectSingleNode("//table[contains(@class, 'standings') and .//th//text()[contains(., 'Full Table')]]");