162 lines
6.8 KiB
C#
162 lines
6.8 KiB
C#
using Microsoft.AspNetCore.Razor.Language;
|
|
using RazorEngine.Templating;
|
|
using SimpleBrowser;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace HeadlessBrowser
|
|
{
|
|
class Program
|
|
{
|
|
private static async Task Main(string[] args)
|
|
{
|
|
using Browser browser = new Browser();
|
|
try
|
|
{
|
|
// log the browser request/response data to files so we can interrogate them in case of an issue with our scraping
|
|
browser.RequestLogged += OnBrowserRequestLogged;
|
|
browser.MessageLogged += new Action<Browser, string>(OnBrowserMessageLogged);
|
|
|
|
// we'll fake the user agent for websites that alter their content for unrecognised browsers
|
|
browser.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";
|
|
|
|
// browse to GitHub
|
|
// await browser.NavigateAsync("https://github.com/");
|
|
await browser.NavigateAsync("https://www.di.se/bors/large-cap/");
|
|
if (LastRequestFailed(browser))
|
|
{
|
|
// always check the last request in case the page failed to load
|
|
return;
|
|
}
|
|
|
|
// click the login link and click it
|
|
//browser.Log("First we need to log in, so browse to the login page, fill in the login details and submit the form.");
|
|
//HtmlResult loginLink = browser.Find("a", FindBy.Value, "Sign in");
|
|
//if (!loginLink.Exists)
|
|
//{
|
|
// browser.Log("Can't find the login link! Perhaps the site is down for maintenance?");
|
|
//}
|
|
//else
|
|
//{
|
|
// await loginLink.ClickAsync();
|
|
// if (LastRequestFailed(browser))
|
|
// {
|
|
// return;
|
|
// }
|
|
|
|
// // fill in the form and click the login button - the fields are easy to locate because they have ID attributes
|
|
// browser.Find("login_field").Value = "tfoman";
|
|
// browser.Find("password").Value = "Jes@lin78";
|
|
// await browser.Find(ElementType.Button, "name", "commit").ClickAsync();
|
|
// if (LastRequestFailed(browser))
|
|
// {
|
|
// return;
|
|
// }
|
|
|
|
// // see if the login succeeded - ContainsText() is very forgiving, so don't worry about whitespace, casing, html tags separating the text, etc.
|
|
// if (browser.ContainsText("Incorrect username or password"))
|
|
// {
|
|
// browser.Log("Login failed!", LogMessageType.Error);
|
|
// }
|
|
// else
|
|
{
|
|
//// After logging in, we should check that the page contains elements that we recognise
|
|
//if (!browser.ContainsText("Your Repositories"))
|
|
//{
|
|
// browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
|
//}
|
|
//else
|
|
//{
|
|
// browser.Log("Your News Feed:");
|
|
// // we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
|
// foreach (HtmlResult item in browser.Select("div.news .title"))
|
|
// {
|
|
// browser.Log("* " + item.Value);
|
|
// }
|
|
//}
|
|
// After logging in, we should check that the page contains elements that we recognise
|
|
if (!browser.ContainsText("Kurser"))
|
|
{
|
|
browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
|
}
|
|
else
|
|
{
|
|
browser.Log("Your Stock Prices:");
|
|
// we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
|
foreach (HtmlResult item in browser.Select(".market__content .i-t__c--m , tr"))
|
|
{
|
|
browser.Log("* " + item.Value);
|
|
}
|
|
}
|
|
}
|
|
// }
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
browser.Log(ex.Message, LogMessageType.Error);
|
|
browser.Log(ex.StackTrace, LogMessageType.StackTrace);
|
|
}
|
|
finally
|
|
{
|
|
RenderService rsvc = new RenderService();
|
|
|
|
string path = WriteFile("log-" + DateTime.UtcNow.Ticks + ".html", browser.RenderHtmlLogFile(rsvc, "SimpleBrowser Sample - Request Log"));
|
|
|
|
Console.WriteLine("Log file published to:");
|
|
Console.WriteLine(path);
|
|
|
|
var process = new Process();
|
|
process.StartInfo.FileName = path;
|
|
process.StartInfo.UseShellExecute = true;
|
|
process.Start();
|
|
}
|
|
}
|
|
|
|
private static bool LastRequestFailed(Browser browser)
|
|
{
|
|
if (browser.LastWebException != null)
|
|
{
|
|
browser.Log("There was an error loading the page: " + browser.LastWebException.Message);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static void OnBrowserMessageLogged(Browser browser, string log)
|
|
{
|
|
Console.WriteLine(log);
|
|
}
|
|
|
|
private static void OnBrowserRequestLogged(Browser req, HttpRequestLog log)
|
|
{
|
|
Console.WriteLine(" -> " + log.Method + " request to " + log.Url);
|
|
Console.WriteLine(" <- Response status code: " + log.ResponseCode);
|
|
}
|
|
|
|
private static string WriteFile(string filename, string text)
|
|
{
|
|
DirectoryInfo dir = new DirectoryInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Logs"));
|
|
if (!dir.Exists)
|
|
{
|
|
dir.Create();
|
|
}
|
|
|
|
string path = Path.Combine(dir.FullName, filename);
|
|
File.WriteAllText(path, text);
|
|
return path;
|
|
}
|
|
}
|
|
|
|
public class RenderService : HtmlLogFormatter.IViewRenderService
|
|
{
|
|
public string RenderToString<TModel>(string template, string title, TModel model)
|
|
{
|
|
return RazorEngine.Engine.Razor.RunCompile(template, title, model.GetType(), model);
|
|
}
|
|
}
|
|
|
|
}
|
|
|