dags att checka in

This commit is contained in:
2021-08-02 12:41:02 +02:00
parent 5648effc9a
commit 668659bf20
34 changed files with 1092 additions and 5 deletions

73
OceanNetWorks/Form1.Designer.cs generated Normal file
View File

@ -0,0 +1,73 @@

namespace OceanNetWorks
{
partial class Form1
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.scraperBox = new System.Windows.Forms.RichTextBox();
this.btnScrape = new System.Windows.Forms.Button();
this.SuspendLayout();
//
// scraperBox
//
this.scraperBox.Location = new System.Drawing.Point(12, 12);
this.scraperBox.Name = "scraperBox";
this.scraperBox.Size = new System.Drawing.Size(768, 680);
this.scraperBox.TabIndex = 0;
this.scraperBox.Text = "";
//
// btnScrape
//
this.btnScrape.Location = new System.Drawing.Point(364, 711);
this.btnScrape.Name = "btnScrape";
this.btnScrape.Size = new System.Drawing.Size(75, 23);
this.btnScrape.TabIndex = 1;
this.btnScrape.Text = "Start";
this.btnScrape.UseVisualStyleBackColor = true;
this.btnScrape.Click += new System.EventHandler(this.btnScrape_Click);
//
// Form1
//
this.AutoScaleDimensions = new System.Drawing.SizeF(7F, 15F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(792, 758);
this.Controls.Add(this.btnScrape);
this.Controls.Add(this.scraperBox);
this.Name = "Form1";
this.Text = "WebScraper";
this.ResumeLayout(false);
}
#endregion
private System.Windows.Forms.RichTextBox scraperBox;
private System.Windows.Forms.Button btnScrape;
}
}

107
OceanNetWorks/Form1.cs Normal file
View File

@ -0,0 +1,107 @@
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using AngleSharp.Text;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace OceanNetWorks
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private string Title { get; set; }
private string Url { get; set; }
private string siteUrl = "https://www.oceannetworks.ca/news/stories";
//private string siteUrlx = "https://www.finansportalen.se/aktiekurser/";
public string[] QueryTerms { get; } = { "Ocean", "Nature", "Pollution" };
internal async void ScrapeWebsite()
{
//var config = Configuration.Default
// .WithJs(); // from AngleSharp.Js
//var context = BrowsingContext.New(config);
CancellationTokenSource cancellationToken = new CancellationTokenSource();
HttpClient httpClient = new HttpClient();
HttpResponseMessage request = await httpClient.GetAsync(siteUrl);
cancellationToken.Token.ThrowIfCancellationRequested();
Stream response = await request.Content.ReadAsStreamAsync();
cancellationToken.Token.ThrowIfCancellationRequested();
HtmlParser parser = new HtmlParser();
IHtmlDocument document = parser.ParseDocument(response);
GetScrapeResults(document);
}
private void GetScrapeResults(IHtmlDocument document)
{
IEnumerable<IElement> articleLink = null;
foreach (var term in QueryTerms)
{
articleLink = document.All.Where(x =>
x.ClassName == "views-field views-field-nothing" &&
(x.ParentElement.InnerHtml.Contains(term) || x.ParentElement.InnerHtml.Contains(term.ToLower())));
}
if (articleLink.Any())
{
PrintResults(articleLink);
}
}
public void PrintResults(IEnumerable<IElement> articleLink)
{
// Clean Up Results: See Next Step
foreach (var element in articleLink)
{
CleanUpResults(element);
scraperBox.Text = $"{Title} - {Url}{Environment.NewLine}";
}
}
private void CleanUpResults(IElement result)
{
string htmlResult = result.InnerHtml.ReplaceFirst(" <span class=\"field-content\"><div><a href=\"", "https://www.oceannetworks.ca");
htmlResult = htmlResult.ReplaceFirst("\">", "*");
htmlResult = htmlResult.ReplaceFirst("</a></div>\n<div class=\"article-title-top\">", "-");
htmlResult = htmlResult.ReplaceFirst("</div>\n<hr></span> ", "");
SplitResults(htmlResult);
}
private void SplitResults(string htmlResult)
{
string[] splitResults = htmlResult.Split('*');
Url = splitResults[0];
Title = splitResults[1];
}
private void btnScrape_Click(object sender, EventArgs e)
{
ScrapeWebsite();
}
}
}

60
OceanNetWorks/Form1.resx Normal file
View File

@ -0,0 +1,60 @@
<root>
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>WinExe</OutputType>
<TargetFramework>net5.0-windows</TargetFramework>
<UseWindowsForms>true</UseWindowsForms>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="0.12.1" />
</ItemGroup>
</Project>

23
OceanNetWorks/Program.cs Normal file
View File

@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace OceanNetWorks
{
static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.SetHighDpiMode(HighDpiMode.SystemAware);
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new Form1());
}
}
}