Replace Linqpad-only methods and split into files (works except Hyperlinq)
This commit is contained in:
parent
ccfeedc067
commit
9692dc531f
83
Config.cs
Normal file
83
Config.cs
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
using WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2;
|
||||||
|
|
||||||
|
public class Config
|
||||||
|
{
|
||||||
|
|
||||||
|
public const string BASE_DIR_STASH = @"/home/mike/stash/eBook_scraper/";
|
||||||
|
public const string BASE_DIR_OUT = @"/home/mike/Nextcloud/Dokumente/E-Books/Scraper/";
|
||||||
|
public const string COMPARE_PROG = @"/usr/bin/bcompare";
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------------------------------//
|
||||||
|
|
||||||
|
public static readonly EpubParameter PH1 = new EpubParameter(Site.WP, "Parahumans", 1, "Worm", "John McCrae", "2011-06-11", "en", @"https://parahumans.wordpress.com/2011/06/11/1-1/");
|
||||||
|
public static readonly EpubParameter PH2 = new EpubParameter(Site.WP, "Parahumans", 2, "Ward", "John McCrae", "2017-10-21", "en", @"https://www.parahumans.net/2017/10/21/glow-worm-0-1/");
|
||||||
|
public static readonly EpubParameter PACT = new EpubParameter(Site.WP, "Pact", "John McCrae", "2013-12-17", "en", @"https://pactwebserial.wordpress.com/2013/12/17/bonds-1-1/");
|
||||||
|
public static readonly EpubParameter TWIG = new EpubParameter(Site.WP, "Twig", "John McCrae", "2014-12-24", "en", @"https://twigserial.wordpress.com/2014/12/24/taking-root-1-1/");
|
||||||
|
public static readonly EpubParameter PALE = new EpubParameter(Site.WP, "Pale", "John McCrae", "2020-05-05", "en", @"https://palewebserial.wordpress.com/2020/05/05/blood-run-cold-0-0/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter APGTE1 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 1, "A Practical Guide to Evil I", "David Verburg", "2015-03-24", "en", @"https://practicalguidetoevil.wordpress.com/2015/03/25/prologue/");
|
||||||
|
public static readonly EpubParameter APGTE2 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 2, "A Practical Guide to Evil II", "David Verburg", "2015-11-04", "en", @"https://practicalguidetoevil.wordpress.com/2015/11/04/prologue-2/");
|
||||||
|
public static readonly EpubParameter APGTE3 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 3, "A Practical Guide to Evil III", "David Verburg", "2017-02-08", "en", @"https://practicalguidetoevil.wordpress.com/2017/02/08/prologue-3/");
|
||||||
|
public static readonly EpubParameter APGTE4 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 4, "A Practical Guide to Evil IV", "David Verburg", "2018-04-09", "en", @"https://practicalguidetoevil.wordpress.com/2018/04/09/prologue-4/");
|
||||||
|
public static readonly EpubParameter APGTE5 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 5, "A Practical Guide to Evil V", "David Verburg", "2019-01-05", "en", @"https://practicalguidetoevil.wordpress.com/2019/01/14/prologue-5/");
|
||||||
|
public static readonly EpubParameter APGTE6 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 6, "A Practical Guide to Evil VI", "David Verburg", "2020-01-06", "en", @"https://practicalguidetoevil.wordpress.com/2020/01/06/prologue-6/");
|
||||||
|
public static readonly EpubParameter APGTE7 = new EpubParameter(Site.WP, "A Practical Guide to Evil", 7, "A Practical Guide to Evil VII", "David Verburg", "2021-03-02", "en", @"https://practicalguidetoevil.wordpress.com/2021/03/02/prologue-7/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter TDE1 = new EpubParameter(Site.WW, "The Divine Elements", 1, "The Blood Legacy", "Daman Dasi", "2016-04-06", "en", @"http://www.wuxiaworld.com/tde-index/tde-chapter-0/");
|
||||||
|
public static readonly EpubParameter TDE2 = new EpubParameter(Site.WW, "The Divine Elements", 2, "The Desolate Mountains", "Daman Dasi", "2016-07-09", "en", @"http://www.wuxiaworld.com/tde-index/tde-chapter-61/");
|
||||||
|
public static readonly EpubParameter TDE3 = new EpubParameter(Site.WW, "The Divine Elements", 3, "Scion of Raizel", "Daman Dasi", "2017-06-15", "en", @"http://www.wuxiaworld.com/tde-index/tde-chapter-115/");
|
||||||
|
public static readonly EpubParameter TDE4 = new EpubParameter(Site.WW, "The Divine Elements", 4, "The Seventh Tower", "Daman Dasi", "2017-08-07", "en", @"http://www.wuxiaworld.com/tde-index/tde-chapter-179/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter SOTL = new EpubParameter(Site.WP, "Shadows of the Limelight", "Alexander Wales", "2015-04-18", "en", @"http://alexanderwales.com/shadows-of-the-limelight-ch-1-the-rooftop-races/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter UNSONG = new EpubParameter(Site.WP, "Unsong", "Scott Alexander", "2015-12-08", "en", @"http://unsongbook.com/prologue-2/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter TGAB1_1 = new EpubParameter(Site.WP, "The Gods are Bastards", 1, "What Fresh Hell", "D. D. Webb", "2014-08-20", "en", @"https://tiraas.net/2014/08/20/book-1-prologue/");
|
||||||
|
public static readonly EpubParameter TGAB1_2 = new EpubParameter(Site.WP, "The Gods are Bastards", 2, "Spacious Skies, Amber Waves", "D. D. Webb", "2014-10-10", "en", @"https://tiraas.net/2014/10/10/2-1/");
|
||||||
|
public static readonly EpubParameter TGAB1_3 = new EpubParameter(Site.WP, "The Gods are Bastards", 3, "A Fistful of Blood", "D. D. Webb", "2014-12-01", "en", @"https://tiraas.net/2014/12/01/3-1/");
|
||||||
|
public static readonly EpubParameter TGAB1_4 = new EpubParameter(Site.WP, "The Gods are Bastards", 4, "This Town Ain't Big Enough", "D. D. Webb", "2014-12-24", "en", @"https://tiraas.net/2014/12/24/4-1/");
|
||||||
|
public static readonly EpubParameter TGAB2_1 = new EpubParameter(Site.WP, "The Gods are Bastards", 5, "The Streets Where You Live", "D. D. Webb", "2015-02-24", "en", @"https://tiraas.net/2015/02/24/volume-2-prologue/");
|
||||||
|
public static readonly EpubParameter TGAB2_2 = new EpubParameter(Site.WP, "The Gods are Bastards", 6, "Crawling Chaos", "D. D. Webb", "2015-05-20", "en", @"https://tiraas.net/2015/05/20/6-1/");
|
||||||
|
public static readonly EpubParameter TGAB2_3 = new EpubParameter(Site.WP, "The Gods are Bastards", 7, "Hath No Fury", "D. D. Webb", "2015-08-03", "en", @"https://tiraas.net/2015/08/03/7-1/");
|
||||||
|
public static readonly EpubParameter TGAB3_1 = new EpubParameter(Site.WP, "The Gods are Bastards", 8, "The Mind and the Sword", "D. D. Webb", "2015-09-14", "en", @"https://tiraas.net/2015/09/14/prologue-volume-3/");
|
||||||
|
public static readonly EpubParameter TGAB3_2 = new EpubParameter(Site.WP, "The Gods are Bastards", 9, "Draw", "D. D. Webb", "2015-11-23", "en", @"https://tiraas.net/2015/11/23/9-1/");
|
||||||
|
public static readonly EpubParameter TGAB3_3 = new EpubParameter(Site.WP, "The Gods are Bastards", 10, "And Justice for All", "D. D. Webb", "2016-02-29", "en", @"https://tiraas.net/2016/02/29/10-1/");
|
||||||
|
public static readonly EpubParameter TGAB4_1 = new EpubParameter(Site.WP, "The Gods are Bastards", 11, "If You Can Make It Here", "D. D. Webb", "2016-07-29", "en", @"https://tiraas.net/2016/07/29/prologue-volume-4/");
|
||||||
|
public static readonly EpubParameter TGAB4_2 = new EpubParameter(Site.WP, "The Gods are Bastards", 12, "Sleeper", "D. D. Webb", "2016-11-18", "en", @"https://tiraas.net/2016/11/18/12-1/");
|
||||||
|
public static readonly EpubParameter TGAB4_3 = new EpubParameter(Site.WP, "The Gods are Bastards", 13, "From Sea to Stormy Sea", "D. D. Webb", "2017-08-07", "en", @"https://tiraas.net/2017/08/07/13-1/");
|
||||||
|
public static readonly EpubParameter TGAB5_1 = new EpubParameter(Site.WP, "The Gods are Bastards", 14, "Themselves Contend", "D. D. Webb", "2018-04-16", "en", @"https://tiraas.net/2018/04/16/prologue-volume-5/");
|
||||||
|
public static readonly EpubParameter TGAB5_2 = new EpubParameter(Site.WP, "The Gods are Bastards", 15, "The Fae, the Fell, and the Holy", "D. D. Webb", "2018-12-14", "en", @"https://tiraas.net/2018/12/14/15-1/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter NSTAR_1 = new EpubParameter(Site.WP, "Netherstar", 1, "Awakening", "D. D. Webb", "2019-01-26", "en", @"https://netherstar.net/2019/01/26/chapter-1-i-meant-to-do-that/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter CHESTS = new EpubParameter(Site.RR, "Everybody Loves Large Chests", "Neven Iliev", "2016-10-27", "en", @"https://www.royalroad.com/fiction/8894/everybody-loves-large-chests/chapter/99919/prologue");
|
||||||
|
|
||||||
|
public static readonly EpubParameter MWC = new EpubParameter(Site.RR, "Metaworld Chronicles", "Wutosama", "2018-09-22", "en", @"https://www.royalroad.com/fiction/14167/metaworld-chronicles/chapter/163574/chapter-1-some-things-begin-something-ends");
|
||||||
|
|
||||||
|
public static readonly EpubParameter WTC = new EpubParameter(Site.RR, "Worth the Candle", "Alexander Wales", "2017-07-14", "en", @"https://www.royalroad.com/fiction/25137/worth-the-candle/chapter/366577/taking-the-fall");
|
||||||
|
|
||||||
|
public static readonly EpubParameter WLD = new EpubParameter(Site.WP, "What Lies Dreaming", "Eneasz Brodski", "2018-11-11", "en", @"http://whatliesdreaming.com/1-joah/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter WI = new EpubParameter(Site.WP, "The Wandering Inn", "pirateaba", "2016-06-27", "en", @"https://wanderinginn.com/2016/07/27/1-00/");
|
||||||
|
|
||||||
|
public static readonly EpubParameter RTW = new EpubParameter(Site.WW, "Release that Witch", "Er Mu", "2019-09-02", "en", @"https://www.wuxiaworld.co/Release-that-Witch/1235444.html");
|
||||||
|
|
||||||
|
public static readonly EpubParameter MOL = new EpubParameter(Site.RR, "Mother of Learning", "Domagoj Kurmaic", "2019-11-03", "en", @"https://www.royalroad.com/fiction/21220/mother-of-learning/chapter/301778/1-good-morning-brother");
|
||||||
|
|
||||||
|
public static readonly EpubParameter TML = new EpubParameter(Site.RR, "The Menocht Loop", "caerulex", "2020-04-10", "en", @"https://www.royalroad.com/fiction/31514/the-menocht-loop/chapter/479082/1-yet-again");
|
||||||
|
|
||||||
|
public static readonly EpubParameter TPR = new EpubParameter(Site.RR, "The Perfect Run", "Maxime J. Durand", "2020-10-14", "en", @"https://www.royalroad.com/fiction/36735/the-perfect-run/chapter/569225/1-quicksave");
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------------------------------//
|
||||||
|
|
||||||
|
public static readonly EpubParameter[] BOOKS = new[] { TPR };
|
||||||
|
|
||||||
|
public static readonly bool USE_WEBCACHE = true;
|
||||||
|
public static readonly bool DO_LIVE_RELOAD_OF_LAST = true;
|
||||||
|
public static readonly bool CONVERT_MOBI = true;
|
||||||
|
|
||||||
|
public static readonly MainMode MODE = MainMode.Generate;
|
||||||
|
|
||||||
|
}
|
1662
Program.cs
1662
Program.cs
File diff suppressed because it is too large
Load Diff
17
Scraper/Chapter.cs
Normal file
17
Scraper/Chapter.cs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class Chapter
|
||||||
|
{
|
||||||
|
public string url;
|
||||||
|
public string title;
|
||||||
|
public string next;
|
||||||
|
|
||||||
|
public GZippedString queryResult;
|
||||||
|
public GZippedString sourcecode;
|
||||||
|
public GZippedString chapter;
|
||||||
|
|
||||||
|
public bool isPrologue;
|
||||||
|
public bool isEpilogue;
|
||||||
|
public bool isBonus;
|
||||||
|
public bool isSpecial => isPrologue || isEpilogue || isBonus;
|
||||||
|
}
|
47
Scraper/EpubParameter.cs
Normal file
47
Scraper/EpubParameter.cs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
using System.Globalization;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class EpubParameter
|
||||||
|
{
|
||||||
|
public readonly string Series;
|
||||||
|
public readonly int SeriesIndex;
|
||||||
|
public readonly Guid ID_OPF;
|
||||||
|
public readonly Guid ID_CAL;
|
||||||
|
public readonly string Title;
|
||||||
|
public readonly string Author;
|
||||||
|
public readonly DateTime Release;
|
||||||
|
public readonly string Language;
|
||||||
|
public readonly string StartURL;
|
||||||
|
public readonly string Foldername;
|
||||||
|
public readonly Site SiteType;
|
||||||
|
|
||||||
|
public string AuthorSort { get { return Author.Split(' ').Aggregate((a, b) => b + ", " + a); } }
|
||||||
|
|
||||||
|
public EpubParameter(Site st, string t, string a, string r, string l, string s) : this(st, null, -1, t, a, r, l, s) { }
|
||||||
|
|
||||||
|
public EpubParameter(Site st, string z, int i, string t, string a, string r, string l, string s)
|
||||||
|
{
|
||||||
|
SiteType = st;
|
||||||
|
Series = z;
|
||||||
|
SeriesIndex = i;
|
||||||
|
Title = t;
|
||||||
|
Author = a;
|
||||||
|
Release = DateTime.ParseExact(r, "yyyy-MM-dd", CultureInfo.InvariantCulture);
|
||||||
|
Language = l;
|
||||||
|
StartURL = s;
|
||||||
|
if (z == null)
|
||||||
|
Foldername = Helper.Filenamify(t);
|
||||||
|
else
|
||||||
|
Foldername = string.Format("{0} {1} - {2}", Helper.Filenamify(z), i, Helper.Filenamify(t));
|
||||||
|
|
||||||
|
var u = new Random(Title.GetHashCode() ^ Author.GetHashCode());
|
||||||
|
var g = new byte[16];
|
||||||
|
u.NextBytes(g);
|
||||||
|
ID_OPF = new Guid(g);
|
||||||
|
u.NextBytes(g);
|
||||||
|
ID_CAL = new Guid(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String DisplayStr => (Series == null) ? $"{Title}" : $"{Series} {SeriesIndex} - {Title}";
|
||||||
|
}
|
9
Scraper/Extensions.cs
Normal file
9
Scraper/Extensions.cs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public static class Extensions
|
||||||
|
{
|
||||||
|
public static void Dump(this string str)
|
||||||
|
{
|
||||||
|
Console.Out.WriteLine(str);
|
||||||
|
}
|
||||||
|
}
|
57
Scraper/GZippedString.cs
Normal file
57
Scraper/GZippedString.cs
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
using System.IO.Compression;
|
||||||
|
using System.Text;
|
||||||
|
using System.Xml.Serialization;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class GZippedString : IXmlSerializable
|
||||||
|
{
|
||||||
|
public string Value { get; set; }
|
||||||
|
|
||||||
|
public System.Xml.Schema.XmlSchema GetSchema() { return null; }
|
||||||
|
|
||||||
|
public void ReadXml(System.Xml.XmlReader reader)
|
||||||
|
{
|
||||||
|
Value = DecompressString(reader.ReadString());
|
||||||
|
reader.ReadEndElement();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void WriteXml(System.Xml.XmlWriter writer)
|
||||||
|
{
|
||||||
|
writer.WriteString(CompressString(Value));
|
||||||
|
}
|
||||||
|
|
||||||
|
private string CompressString(string text)
|
||||||
|
{
|
||||||
|
byte[] buffer = Encoding.UTF8.GetBytes(text);
|
||||||
|
var memoryStream = new MemoryStream();
|
||||||
|
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
|
||||||
|
gZipStream.Write(buffer, 0, buffer.Length);
|
||||||
|
memoryStream.Position = 0;
|
||||||
|
var compressedData = new byte[memoryStream.Length];
|
||||||
|
memoryStream.Read(compressedData, 0, compressedData.Length);
|
||||||
|
var gZipBuffer = new byte[compressedData.Length + 4];
|
||||||
|
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
|
||||||
|
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
|
||||||
|
return Convert.ToBase64String(gZipBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private string DecompressString(string compressedText)
|
||||||
|
{
|
||||||
|
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
|
||||||
|
using (var memoryStream = new MemoryStream())
|
||||||
|
{
|
||||||
|
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
|
||||||
|
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);
|
||||||
|
var buffer = new byte[dataLength];
|
||||||
|
memoryStream.Position = 0;
|
||||||
|
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
|
||||||
|
gZipStream.Read(buffer, 0, buffer.Length);
|
||||||
|
return Encoding.UTF8.GetString(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static implicit operator GZippedString(string v) => new GZippedString{Value = v};
|
||||||
|
public static implicit operator string (GZippedString v) => v.Value;
|
||||||
|
|
||||||
|
}
|
102
Scraper/Helper.cs
Normal file
102
Scraper/Helper.cs
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class Helper
|
||||||
|
{
|
||||||
|
|
||||||
|
public static string Filenamify(string v, bool repl = false)
|
||||||
|
{
|
||||||
|
var s = new String(v.Replace((char)160, ' ').ToCharArray().Where(p =>
|
||||||
|
(p >= '0' && p <= '9') ||
|
||||||
|
(p >= 'A' && p <= 'Z') ||
|
||||||
|
(p >= 'a' && p <= 'z') ||
|
||||||
|
p == ' ' ||
|
||||||
|
p == '.' ||
|
||||||
|
p == '-' ||
|
||||||
|
p == '*' ||
|
||||||
|
p == '_' ||
|
||||||
|
p == '.' ||
|
||||||
|
p == ',').ToArray());
|
||||||
|
|
||||||
|
if (repl) s = s.Replace(' ', '_');
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string TitleFmt(string raw)
|
||||||
|
{
|
||||||
|
raw = HtmlEntity.DeEntitize(raw);
|
||||||
|
|
||||||
|
raw = raw.Replace('–', '-');
|
||||||
|
raw = raw.Replace((char)160, ' ');
|
||||||
|
|
||||||
|
raw = raw.Trim().Trim('-', ':', '_', '#').Trim();
|
||||||
|
if (raw.ToLower().StartsWith("tde")) raw = raw.Substring(3);
|
||||||
|
|
||||||
|
raw = raw.Trim().Trim('-', ':', '_', '#').Trim();
|
||||||
|
|
||||||
|
if (raw.Length >= 2) raw = char.ToUpper(raw[0]) + raw.Substring(1);
|
||||||
|
|
||||||
|
return raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string Striptease(HtmlNode raw)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
var rm = raw.SelectNodes(@"//script");
|
||||||
|
if (rm != null && rm.Any())
|
||||||
|
{
|
||||||
|
var copy = HtmlNode.CreateNode($"<{raw.Name}></{raw.Name}>");
|
||||||
|
copy.CopyFrom(raw);
|
||||||
|
raw = copy;
|
||||||
|
|
||||||
|
rm = raw.SelectNodes(@"//script");
|
||||||
|
if (rm != null) foreach (var e in rm) e.Remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var rm = raw.SelectNodes(@"//meta");
|
||||||
|
if (rm != null && rm.Any())
|
||||||
|
{
|
||||||
|
var copy = HtmlNode.CreateNode($"<{raw.Name}></{raw.Name}>");
|
||||||
|
copy.CopyFrom(raw);
|
||||||
|
raw = copy;
|
||||||
|
|
||||||
|
rm = raw.SelectNodes(@"//meta");
|
||||||
|
if (rm != null) foreach (var e in rm) e.Remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Striptease(HtmlEntity.DeEntitize(raw.InnerText));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string Striptease(string raw)
|
||||||
|
{
|
||||||
|
var r = string.Join(string.Empty,
|
||||||
|
raw
|
||||||
|
.ToCharArray()
|
||||||
|
.Select(c => char.IsWhiteSpace(c) ? ' ' : c)
|
||||||
|
.Where(c => char.IsLetterOrDigit(c) ||char.IsWhiteSpace(c))
|
||||||
|
.Select(c => char.ToLower(c))).Trim();
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string CombineAuthority(string url, string suffix)
|
||||||
|
{
|
||||||
|
var left = new Uri(url).GetLeftPart(UriPartial.Authority);
|
||||||
|
if (!left.EndsWith("/")) left = left + "/";
|
||||||
|
if (suffix.StartsWith("/")) suffix = suffix.TrimStart('/');
|
||||||
|
return left + suffix;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string CombineUri(string uri1, string uri2)
|
||||||
|
{
|
||||||
|
if (uri1.Contains("/")) uri1 = uri1.Substring(0, uri1.LastIndexOf("/"));
|
||||||
|
uri1 = uri1.TrimEnd('/');
|
||||||
|
uri2 = uri2.TrimStart('/');
|
||||||
|
return string.Format("{0}/{1}", uri1, uri2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
18
Scraper/Hyperlinq.cs
Normal file
18
Scraper/Hyperlinq.cs
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class Hyperlinq
|
||||||
|
{
|
||||||
|
private readonly Action action;
|
||||||
|
private readonly string title;
|
||||||
|
|
||||||
|
public Hyperlinq(Action action, string title)
|
||||||
|
{
|
||||||
|
this.action = action;
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dump()
|
||||||
|
{
|
||||||
|
Console.Out.WriteLine(this.title);
|
||||||
|
}
|
||||||
|
}
|
7
Scraper/MainMode.cs
Normal file
7
Scraper/MainMode.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public enum MainMode
|
||||||
|
{
|
||||||
|
Generate,
|
||||||
|
Verify,
|
||||||
|
}
|
8
Scraper/ProcessResult.cs
Normal file
8
Scraper/ProcessResult.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public enum ProcessResult
|
||||||
|
{
|
||||||
|
SuccessNormal,
|
||||||
|
ReachedEnd,
|
||||||
|
SkipChapter,
|
||||||
|
}
|
1351
Scraper/Scraper.cs
Normal file
1351
Scraper/Scraper.cs
Normal file
File diff suppressed because it is too large
Load Diff
7
Scraper/SerializableCacheEntry.cs
Normal file
7
Scraper/SerializableCacheEntry.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class SerializableCacheEntry
|
||||||
|
{
|
||||||
|
public string URL;
|
||||||
|
public GZippedString Content;
|
||||||
|
}
|
12
Scraper/Site.cs
Normal file
12
Scraper/Site.cs
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public enum Site
|
||||||
|
{
|
||||||
|
Wordpress,
|
||||||
|
WuxiaWorld,
|
||||||
|
Royalroad,
|
||||||
|
|
||||||
|
WP = Wordpress,
|
||||||
|
WW = WuxiaWorld,
|
||||||
|
RR = Royalroad,
|
||||||
|
}
|
8
Scraper/Utf8StringWriter.cs
Normal file
8
Scraper/Utf8StringWriter.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
public class Utf8StringWriter : StringWriter
|
||||||
|
{
|
||||||
|
public override Encoding Encoding { get { return Encoding.UTF8; } }
|
||||||
|
}
|
15
WPEbookScraper2.cs
Normal file
15
WPEbookScraper2.cs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
using WordpressEboobScraper2.Scraper;
|
||||||
|
|
||||||
|
namespace WordpressEboobScraper2;
|
||||||
|
|
||||||
|
public class WPEbookScraper2
|
||||||
|
{
|
||||||
|
public static void Main()
|
||||||
|
{
|
||||||
|
var scraper = new Scraper.Scraper();
|
||||||
|
|
||||||
|
if (Config.MODE == MainMode.Generate) scraper.Generate();
|
||||||
|
if (Config.MODE == MainMode.Verify) scraper.Verify();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -7,4 +7,11 @@
|
|||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="DotNetZip" Version="1.16.0" />
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.51" />
|
||||||
|
<PackageReference Include="System.Text.Encoding.CodePages" Version="7.0.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
Loading…
Reference in New Issue
Block a user