Add project files.
This commit is contained in:
12
CsharpCorne/CsharpCorner.csproj
Normal file
12
CsharpCorne/CsharpCorner.csproj
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net5.0</TargetFramework>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.34" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
17
CsharpCorne/Program.cs
Normal file
17
CsharpCorne/Program.cs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using System;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
namespace CsharpCorner
|
||||||
|
{
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
var web = new HtmlWeb();
|
||||||
|
var doc = web.Load("https://www.avanza.se/aktier/lista.html");
|
||||||
|
var headerNames = doc.DocumentNode
|
||||||
|
.SelectNodes("//a[@class='ellipsis']").ToList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
12
HeadlessBrowser/HeadlessBrowser.csproj
Normal file
12
HeadlessBrowser/HeadlessBrowser.csproj
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net5.0</TargetFramework>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="RazorEngine.NetCore" Version="3.1.0" />
|
||||||
|
<PackageReference Include="SimpleHeadlessBrowser" Version="0.7.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
161
HeadlessBrowser/Program.cs
Normal file
161
HeadlessBrowser/Program.cs
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
using Microsoft.AspNetCore.Razor.Language;
|
||||||
|
using RazorEngine.Templating;
|
||||||
|
using SimpleBrowser;
|
||||||
|
using System;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.IO;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace HeadlessBrowser
|
||||||
|
{
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
private static async Task Main(string[] args)
|
||||||
|
{
|
||||||
|
using Browser browser = new Browser();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// log the browser request/response data to files so we can interrogate them in case of an issue with our scraping
|
||||||
|
browser.RequestLogged += OnBrowserRequestLogged;
|
||||||
|
browser.MessageLogged += new Action<Browser, string>(OnBrowserMessageLogged);
|
||||||
|
|
||||||
|
// we'll fake the user agent for websites that alter their content for unrecognised browsers
|
||||||
|
browser.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10";
|
||||||
|
|
||||||
|
// browse to GitHub
|
||||||
|
// await browser.NavigateAsync("https://github.com/");
|
||||||
|
await browser.NavigateAsync("https://www.di.se/bors/large-cap/");
|
||||||
|
if (LastRequestFailed(browser))
|
||||||
|
{
|
||||||
|
// always check the last request in case the page failed to load
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// click the login link and click it
|
||||||
|
//browser.Log("First we need to log in, so browse to the login page, fill in the login details and submit the form.");
|
||||||
|
//HtmlResult loginLink = browser.Find("a", FindBy.Value, "Sign in");
|
||||||
|
//if (!loginLink.Exists)
|
||||||
|
//{
|
||||||
|
// browser.Log("Can't find the login link! Perhaps the site is down for maintenance?");
|
||||||
|
//}
|
||||||
|
//else
|
||||||
|
//{
|
||||||
|
// await loginLink.ClickAsync();
|
||||||
|
// if (LastRequestFailed(browser))
|
||||||
|
// {
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // fill in the form and click the login button - the fields are easy to locate because they have ID attributes
|
||||||
|
// browser.Find("login_field").Value = "tfoman";
|
||||||
|
// browser.Find("password").Value = "Jes@lin78";
|
||||||
|
// await browser.Find(ElementType.Button, "name", "commit").ClickAsync();
|
||||||
|
// if (LastRequestFailed(browser))
|
||||||
|
// {
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // see if the login succeeded - ContainsText() is very forgiving, so don't worry about whitespace, casing, html tags separating the text, etc.
|
||||||
|
// if (browser.ContainsText("Incorrect username or password"))
|
||||||
|
// {
|
||||||
|
// browser.Log("Login failed!", LogMessageType.Error);
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
{
|
||||||
|
//// After logging in, we should check that the page contains elements that we recognise
|
||||||
|
//if (!browser.ContainsText("Your Repositories"))
|
||||||
|
//{
|
||||||
|
// browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
||||||
|
//}
|
||||||
|
//else
|
||||||
|
//{
|
||||||
|
// browser.Log("Your News Feed:");
|
||||||
|
// // we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
||||||
|
// foreach (HtmlResult item in browser.Select("div.news .title"))
|
||||||
|
// {
|
||||||
|
// browser.Log("* " + item.Value);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
// After logging in, we should check that the page contains elements that we recognise
|
||||||
|
if (!browser.ContainsText("Kurser"))
|
||||||
|
{
|
||||||
|
browser.Log("There wasn't the usual login failure message, but the text we normally expect isn't present on the page");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
browser.Log("Your Stock Prices:");
|
||||||
|
// we can use simple jquery selectors, though advanced selectors are yet to be implemented
|
||||||
|
foreach (HtmlResult item in browser.Select(".market__content .i-t__c--m , tr"))
|
||||||
|
{
|
||||||
|
browser.Log("* " + item.Value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
browser.Log(ex.Message, LogMessageType.Error);
|
||||||
|
browser.Log(ex.StackTrace, LogMessageType.StackTrace);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
RenderService rsvc = new RenderService();
|
||||||
|
|
||||||
|
string path = WriteFile("log-" + DateTime.UtcNow.Ticks + ".html", browser.RenderHtmlLogFile(rsvc, "SimpleBrowser Sample - Request Log"));
|
||||||
|
|
||||||
|
Console.WriteLine("Log file published to:");
|
||||||
|
Console.WriteLine(path);
|
||||||
|
|
||||||
|
var process = new Process();
|
||||||
|
process.StartInfo.FileName = path;
|
||||||
|
process.StartInfo.UseShellExecute = true;
|
||||||
|
process.Start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool LastRequestFailed(Browser browser)
|
||||||
|
{
|
||||||
|
if (browser.LastWebException != null)
|
||||||
|
{
|
||||||
|
browser.Log("There was an error loading the page: " + browser.LastWebException.Message);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void OnBrowserMessageLogged(Browser browser, string log)
|
||||||
|
{
|
||||||
|
Console.WriteLine(log);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void OnBrowserRequestLogged(Browser req, HttpRequestLog log)
|
||||||
|
{
|
||||||
|
Console.WriteLine(" -> " + log.Method + " request to " + log.Url);
|
||||||
|
Console.WriteLine(" <- Response status code: " + log.ResponseCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string WriteFile(string filename, string text)
|
||||||
|
{
|
||||||
|
DirectoryInfo dir = new DirectoryInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Logs"));
|
||||||
|
if (!dir.Exists)
|
||||||
|
{
|
||||||
|
dir.Create();
|
||||||
|
}
|
||||||
|
|
||||||
|
string path = Path.Combine(dir.FullName, filename);
|
||||||
|
File.WriteAllText(path, text);
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class RenderService : HtmlLogFormatter.IViewRenderService
|
||||||
|
{
|
||||||
|
public string RenderToString<TModel>(string template, string title, TModel model)
|
||||||
|
{
|
||||||
|
return RazorEngine.Engine.Razor.RunCompile(template, title, model.GetType(), model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
62
WebScrape/Program.cs
Normal file
62
WebScrape/Program.cs
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
using ScrapySharp.Network;
|
||||||
|
using ScrapySharp.Extensions;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net.Http;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Xml;
|
||||||
|
|
||||||
|
namespace WebScrape
|
||||||
|
{
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
|
||||||
|
static ScrapingBrowser _scrapingBrowser = new ScrapingBrowser();
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
|
||||||
|
//GetHtmlAsync();
|
||||||
|
//GetHtml("https://www.di.se/bors/aktier");
|
||||||
|
GetHtml("https://www.avanza.se/aktier/lista.html");
|
||||||
|
|
||||||
|
Console.ReadKey();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static HtmlNode GetHtml(string url)
|
||||||
|
{
|
||||||
|
WebPage webPage = _scrapingBrowser.NavigateToPage(new Uri(url));
|
||||||
|
return webPage.Html;
|
||||||
|
}
|
||||||
|
|
||||||
|
//private static async void GetHtmlAsync()
|
||||||
|
//{
|
||||||
|
// //var url = "https://www.di.se/bors/aktier";
|
||||||
|
// var url = "https://www.di.se/bors/aktier/?data%5Bcountry%5D=SE&data%5Bmarket%5D=35207&data%5Bmarket%5D=35208&data%5Bmarket%5D=35209&data%5Bsector%5D=1&data%5Bsector%5D=2&data%5Bsector%5D=3&data%5Bsector%5D=4&data%5Bsector%5D=5&data%5Bsector%5D=6&data%5Bsector%5D=7&data%5Bsector%5D=8&data%5Bsector%5D=9&data%5Bsector%5D=10&field=name&tab=0";
|
||||||
|
|
||||||
|
// var httpClient = new HttpClient();
|
||||||
|
// var html = await httpClient.GetStringAsync(url);
|
||||||
|
|
||||||
|
// var htmlDocument = new HtmlDocument();
|
||||||
|
// htmlDocument.LoadHtml(html);
|
||||||
|
|
||||||
|
// var shareList = htmlDocument.DocumentNode.Descendants("table")
|
||||||
|
// .Where(node => node.GetAttributeValue("data-tab", "")
|
||||||
|
// .Equals("table_0")).ToList();
|
||||||
|
|
||||||
|
// var totLst = new List<HtmlNode>();
|
||||||
|
// foreach(var htmldoc in shareList)
|
||||||
|
// {
|
||||||
|
// var trow = htmldoc.Descendants("tr")
|
||||||
|
// .Where(node => node.GetAttributeValue("id", "")
|
||||||
|
// .Contains("ins_")).ToList();
|
||||||
|
// totLst.AddRange(trow);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Console.WriteLine();
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
}
|
||||||
13
WebScrape/WebScrape.csproj
Normal file
13
WebScrape/WebScrape.csproj
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net5.0</TargetFramework>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.34" />
|
||||||
|
<PackageReference Include="ScrapySharp" Version="3.0.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
37
WebScrapeApp.sln
Normal file
37
WebScrapeApp.sln
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 16
|
||||||
|
VisualStudioVersion = 16.0.31313.79
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebScrape", "WebScrape\WebScrape.csproj", "{DF64E211-B223-48C5-83F8-60792D8DFB04}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CsharpCorner", "CsharpCorne\CsharpCorner.csproj", "{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeadlessBrowser", "HeadlessBrowser\HeadlessBrowser.csproj", "{F0E685F6-9B05-42D1-B2F2-B14C861668A3}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{DF64E211-B223-48C5-83F8-60792D8DFB04}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{BF147D4E-3654-4FF7-9B71-7E1B67CE7B0D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{F0E685F6-9B05-42D1-B2F2-B14C861668A3}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {998091CC-0A32-4109-B1B5-0AA12A3E5D5C}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
Reference in New Issue
Block a user