Add project files.

This commit is contained in:
2021-07-05 22:29:31 +02:00
parent ef61217b63
commit 5648effc9a
7 changed files with 314 additions and 0 deletions

View File

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.34" />
</ItemGroup>
</Project>

17
CsharpCorne/Program.cs Normal file
View File

@ -0,0 +1,17 @@
using HtmlAgilityPack;
using System;
using System.Linq;
namespace CsharpCorner
{
class Program
{
static void Main(string[] args)
{
var web = new HtmlWeb();
var doc = web.Load("https://www.avanza.se/aktier/lista.html");
var headerNames = doc.DocumentNode
.SelectNodes("//a[@class='ellipsis']").ToList();
}
}
}

View File

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="RazorEngine.NetCore" Version="3.1.0" />
<PackageReference Include="SimpleHeadlessBrowser" Version="0.7.0" />
</ItemGroup>
</Project>

161
HeadlessBrowser/Program.cs Normal file
View File

@ -0,0 +1,161 @@
using Microsoft.AspNetCore.Razor.Language;
using RazorEngine.Templating;
using SimpleBrowser;
using System;
using System.Diagnostics;
using System.IO;
using System.Threading.Tasks;
namespace HeadlessBrowser
{
class Program
{
private static async Task Main(string[] args)
{
using Browser browser = new Browser();
try
{
// log the browser request/response data to files so we can interrogate them in case of an issue with our scraping
browser.RequestLogged += OnBrowserRequestLogged;
browser.MessageLogged += new Action<Browser, string>(OnBrowserMessageLogged);
// we'll fake the user agent for websites that alter their content for unrecognised browsers
browser.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";
// browse to GitHub
// await browser.NavigateAsync("https://github.com/");
await browser.NavigateAsync("https://www.di.se/bors/large-cap/");
if (LastRequestFailed(browser))
{
// always check the last request in case the page failed to load
return;
}
// click the login link and click it
//browser.Log("First we need to log in, so browse to the login page, fill in the login details and submit the form.");
//HtmlResult loginLink = browser.Find("a", FindBy.Value, "Sign&nbsp;in");
//if (!loginLink.Exists)
//{
// browser.Log("Can't find the login link! Perhaps the site is down for maintenance?");
//}
//else
//{
// await loginLink.ClickAsync();
// if (LastRequestFailed(browser))
// {
// return;
// }
// // fill in the form and click the login button - the fields are easy to locate because they have ID attributes
// browser.Find("login_field").Value = "tfoman";
// browser.Find("password").Value = "Jes@lin78";
// await browser.Find(ElementType.Button, "name", "commit").ClickAsync();
// if (LastRequestFailed(browser))
// {
// return;
// }
// // see if the login succeeded - ContainsText() is very forgiving, so don't worry about whitespace, casing, html tags separating the text, etc.
// if (browser.ContainsText("Incorrect username or password"))
// {
// browser.Log("Login failed!", LogMessageType.Error);
// }
// else
{
//// After logging in, we should check that the page contains elements that we recognise
//if (!browser.ContainsText("Your Repositories"))
//{
// browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
//}
//else
//{
// browser.Log("Your News Feed:");
// // we can use simple jquery selectors, though advanced selectors are yet to be implemented
// foreach (HtmlResult item in browser.Select("div.news .title"))
// {
// browser.Log("* " + item.Value);
// }
//}
// After logging in, we should check that the page contains elements that we recognise
if (!browser.ContainsText("Kurser"))
{
browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
}
else
{
browser.Log("Your Stock Prices:");
// we can use simple jquery selectors, though advanced selectors are yet to be implemented
foreach (HtmlResult item in browser.Select(".market__content .i-t__c--m , tr"))
{
browser.Log("* " + item.Value);
}
}
}
// }
}
catch (Exception ex)
{
browser.Log(ex.Message, LogMessageType.Error);
browser.Log(ex.StackTrace, LogMessageType.StackTrace);
}
finally
{
RenderService rsvc = new RenderService();
string path = WriteFile("log-" + DateTime.UtcNow.Ticks + ".html", browser.RenderHtmlLogFile(rsvc, "SimpleBrowser Sample - Request Log"));
Console.WriteLine("Log file published to:");
Console.WriteLine(path);
var process = new Process();
process.StartInfo.FileName = path;
process.StartInfo.UseShellExecute = true;
process.Start();
}
}
private static bool LastRequestFailed(Browser browser)
{
if (browser.LastWebException != null)
{
browser.Log("There was an error loading the page: " + browser.LastWebException.Message);
return true;
}
return false;
}
private static void OnBrowserMessageLogged(Browser browser, string log)
{
Console.WriteLine(log);
}
private static void OnBrowserRequestLogged(Browser req, HttpRequestLog log)
{
Console.WriteLine(" -> " + log.Method + " request to " + log.Url);
Console.WriteLine(" <- Response status code: " + log.ResponseCode);
}
private static string WriteFile(string filename, string text)
{
DirectoryInfo dir = new DirectoryInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Logs"));
if (!dir.Exists)
{
dir.Create();
}
string path = Path.Combine(dir.FullName, filename);
File.WriteAllText(path, text);
return path;
}
}
public class RenderService : HtmlLogFormatter.IViewRenderService
{
public string RenderToString<TModel>(string template, string title, TModel model)
{
return RazorEngine.Engine.Razor.RunCompile(template, title, model.GetType(), model);
}
}
}

62
WebScrape/Program.cs Normal file
View File

@ -0,0 +1,62 @@
using HtmlAgilityPack;
using ScrapySharp.Network;
using ScrapySharp.Extensions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
namespace WebScrape
{
class Program
{
static ScrapingBrowser _scrapingBrowser = new ScrapingBrowser();
static void Main(string[] args)
{
//GetHtmlAsync();
//GetHtml("https://www.di.se/bors/aktier");
GetHtml("https://www.avanza.se/aktier/lista.html");
Console.ReadKey();
}
static HtmlNode GetHtml(string url)
{
WebPage webPage = _scrapingBrowser.NavigateToPage(new Uri(url));
return webPage.Html;
}
//private static async void GetHtmlAsync()
//{
// //var url = "https://www.di.se/bors/aktier";
// var url = "https://www.di.se/bors/aktier/?data%5Bcountry%5D=SE&data%5Bmarket%5D=35207&data%5Bmarket%5D=35208&data%5Bmarket%5D=35209&data%5Bsector%5D=1&data%5Bsector%5D=2&data%5Bsector%5D=3&data%5Bsector%5D=4&data%5Bsector%5D=5&data%5Bsector%5D=6&data%5Bsector%5D=7&data%5Bsector%5D=8&data%5Bsector%5D=9&data%5Bsector%5D=10&field=name&tab=0";
// var httpClient = new HttpClient();
// var html = await httpClient.GetStringAsync(url);
// var htmlDocument = new HtmlDocument();
// htmlDocument.LoadHtml(html);
// var shareList = htmlDocument.DocumentNode.Descendants("table")
// .Where(node => node.GetAttributeValue("data-tab", "")
// .Equals("table_0")).ToList();
// var totLst = new List<HtmlNode>();
// foreach(var htmldoc in shareList)
// {
// var trow = htmldoc.Descendants("tr")
// .Where(node => node.GetAttributeValue("id", "")
// .Contains("ins_")).ToList();
// totLst.AddRange(trow);
// }
// Console.WriteLine();
//}
}
}

View File

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.34" />
<PackageReference Include="ScrapySharp" Version="3.0.0" />
</ItemGroup>
</Project>

37
WebScrapeApp.sln Normal file
View File

@ -0,0 +1,37 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.31313.79
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebScrape", "WebScrape\WebScrape.csproj", "{DF64E211-B223-48C5-83F8-60792D8DFB04}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsharpCorner", "CsharpCorne\CsharpCorner.csproj", "{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeadlessBrowser", "HeadlessBrowser\HeadlessBrowser.csproj", "{F0E685F6-9B05-42D1-B2F2-B14C861668A3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Release|Any CPU.Build.0 = Release|Any CPU
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Release|Any CPU.Build.0 = Release|Any CPU
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {998091CC-0A32-4109-B1B5-0AA12A3E5D5C}
EndGlobalSection
EndGlobal