Add project files.
This commit is contained in:
161
HeadlessBrowser/Program.cs
Normal file
161
HeadlessBrowser/Program.cs
Normal file
@ -0,0 +1,161 @@
|
||||
using Microsoft.AspNetCore.Razor.Language;
|
||||
using RazorEngine.Templating;
|
||||
using SimpleBrowser;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace HeadlessBrowser
|
||||
{
|
||||
class Program
|
||||
{
|
||||
private static async Task Main(string[] args)
|
||||
{
|
||||
using Browser browser = new Browser();
|
||||
try
|
||||
{
|
||||
// log the browser request/response data to files so we can interrogate them in case of an issue with our scraping
|
||||
browser.RequestLogged += OnBrowserRequestLogged;
|
||||
browser.MessageLogged += new Action<Browser, string>(OnBrowserMessageLogged);
|
||||
|
||||
// we'll fake the user agent for websites that alter their content for unrecognised browsers
|
||||
browser.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";
|
||||
|
||||
// browse to GitHub
|
||||
// await browser.NavigateAsync("https://github.com/");
|
||||
await browser.NavigateAsync("https://www.di.se/bors/large-cap/");
|
||||
if (LastRequestFailed(browser))
|
||||
{
|
||||
// always check the last request in case the page failed to load
|
||||
return;
|
||||
}
|
||||
|
||||
// click the login link and click it
|
||||
//browser.Log("First we need to log in, so browse to the login page, fill in the login details and submit the form.");
|
||||
//HtmlResult loginLink = browser.Find("a", FindBy.Value, "Sign in");
|
||||
//if (!loginLink.Exists)
|
||||
//{
|
||||
// browser.Log("Can't find the login link! Perhaps the site is down for maintenance?");
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
// await loginLink.ClickAsync();
|
||||
// if (LastRequestFailed(browser))
|
||||
// {
|
||||
// return;
|
||||
// }
|
||||
|
||||
// // fill in the form and click the login button - the fields are easy to locate because they have ID attributes
|
||||
// browser.Find("login_field").Value = "tfoman";
|
||||
// browser.Find("password").Value = "Jes@lin78";
|
||||
// await browser.Find(ElementType.Button, "name", "commit").ClickAsync();
|
||||
// if (LastRequestFailed(browser))
|
||||
// {
|
||||
// return;
|
||||
// }
|
||||
|
||||
// // see if the login succeeded - ContainsText() is very forgiving, so don't worry about whitespace, casing, html tags separating the text, etc.
|
||||
// if (browser.ContainsText("Incorrect username or password"))
|
||||
// {
|
||||
// browser.Log("Login failed!", LogMessageType.Error);
|
||||
// }
|
||||
// else
|
||||
{
|
||||
//// After logging in, we should check that the page contains elements that we recognise
|
||||
//if (!browser.ContainsText("Your Repositories"))
|
||||
//{
|
||||
// browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
// browser.Log("Your News Feed:");
|
||||
// // we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
||||
// foreach (HtmlResult item in browser.Select("div.news .title"))
|
||||
// {
|
||||
// browser.Log("* " + item.Value);
|
||||
// }
|
||||
//}
|
||||
// After logging in, we should check that the page contains elements that we recognise
|
||||
if (!browser.ContainsText("Kurser"))
|
||||
{
|
||||
browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
||||
}
|
||||
else
|
||||
{
|
||||
browser.Log("Your Stock Prices:");
|
||||
// we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
||||
foreach (HtmlResult item in browser.Select(".market__content .i-t__c--m , tr"))
|
||||
{
|
||||
browser.Log("* " + item.Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
// }
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
browser.Log(ex.Message, LogMessageType.Error);
|
||||
browser.Log(ex.StackTrace, LogMessageType.StackTrace);
|
||||
}
|
||||
finally
|
||||
{
|
||||
RenderService rsvc = new RenderService();
|
||||
|
||||
string path = WriteFile("log-" + DateTime.UtcNow.Ticks + ".html", browser.RenderHtmlLogFile(rsvc, "SimpleBrowser Sample - Request Log"));
|
||||
|
||||
Console.WriteLine("Log file published to:");
|
||||
Console.WriteLine(path);
|
||||
|
||||
var process = new Process();
|
||||
process.StartInfo.FileName = path;
|
||||
process.StartInfo.UseShellExecute = true;
|
||||
process.Start();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool LastRequestFailed(Browser browser)
|
||||
{
|
||||
if (browser.LastWebException != null)
|
||||
{
|
||||
browser.Log("There was an error loading the page: " + browser.LastWebException.Message);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void OnBrowserMessageLogged(Browser browser, string log)
|
||||
{
|
||||
Console.WriteLine(log);
|
||||
}
|
||||
|
||||
private static void OnBrowserRequestLogged(Browser req, HttpRequestLog log)
|
||||
{
|
||||
Console.WriteLine(" -> " + log.Method + " request to " + log.Url);
|
||||
Console.WriteLine(" <- Response status code: " + log.ResponseCode);
|
||||
}
|
||||
|
||||
private static string WriteFile(string filename, string text)
|
||||
{
|
||||
DirectoryInfo dir = new DirectoryInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Logs"));
|
||||
if (!dir.Exists)
|
||||
{
|
||||
dir.Create();
|
||||
}
|
||||
|
||||
string path = Path.Combine(dir.FullName, filename);
|
||||
File.WriteAllText(path, text);
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
public class RenderService : HtmlLogFormatter.IViewRenderService
|
||||
{
|
||||
public string RenderToString<TModel>(string template, string title, TModel model)
|
||||
{
|
||||
return RazorEngine.Engine.Razor.RunCompile(template, title, model.GetType(), model);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user