From d981d092e42c8af1b1857e8212d36382339ae8db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20Schw=C3=B6rer?= <pubgit@mikescher.com>
Date: Tue, 3 Oct 2023 16:13:37 +0200
Subject: [PATCH] fix a few compiler warnings

---
 .../.idea/vcs.xml                             |   1 -
 Proc/HTMLToText.cs                            | 181 ++++++
 Proc/ProcessHelper.cs                         |  59 ++
 Proc/ProcessOutput.cs                         |  21 +
 Scraper/Helper.cs                             |   3 +-
 Scraper/Scraper.cs                            | 582 +++++-------------
 Scraper/Utf8StringWriter.cs                   |   2 +-
 WordpressEboobScraper2.csproj                 |   4 +-
 8 files changed, 424 insertions(+), 429 deletions(-)
 create mode 100644 Proc/HTMLToText.cs
 create mode 100644 Proc/ProcessHelper.cs
 create mode 100644 Proc/ProcessOutput.cs
diff --git a/.idea/.idea.WordpressEboobScraper2/.idea/vcs.xml b/.idea/.idea.WordpressEboobScraper2/.idea/vcs.xml
index 288b36b..94a25f7 100644
--- a/.idea/.idea.WordpressEboobScraper2/.idea/vcs.xml
+++ b/.idea/.idea.WordpressEboobScraper2/.idea/vcs.xml
@@ -1,7 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
     <mapping directory="$PROJECT_DIR$" vcs="Git" />
   </component>
 </project>
\ No newline at end of file
diff --git a/Proc/HTMLToText.cs b/Proc/HTMLToText.cs
new file mode 100644
index 0000000..9d471d9
--- /dev/null
+++ b/Proc/HTMLToText.cs
@@ -0,0 +1,181 @@
+using System.Text.RegularExpressions;
+using HtmlAgilityPack;
+
+namespace WordpressEboobScraper2.Proc;
+
+public static class HTMLToText
+	{
+		private static Regex REX_TAG1 = new Regex("<\\s*(link|style|script)[^>]*?/>", RegexOptions.Compiled);
+		private static Regex REX_TAG2 = new Regex("<\\s*(link|style|script)[^>]*?>[^<>]*?<\\/\\s*\\1\\s*>", RegexOptions.Compiled);
+
+		private class PreceedingDomTextInfo
+		{
+			public PreceedingDomTextInfo(BoolWrapper isFirstTextOfDocWritten)
+			{
+				IsFirstTextOfDocWritten = isFirstTextOfDocWritten;
+			}
+			public bool WritePrecedingWhiteSpace { get; set; }
+			public bool LastCharWasSpace { get; set; }
+			public readonly BoolWrapper IsFirstTextOfDocWritten;
+			public int ListIndex { get; set; }
+		}
+
+		private class BoolWrapper
+		{
+			public BoolWrapper() { }
+			public bool Value { get; set; }
+			public static implicit operator bool(BoolWrapper boolWrapper)
+			{
+				return boolWrapper.Value;
+			}
+			public static implicit operator BoolWrapper(bool boolWrapper)
+			{
+				return new BoolWrapper { Value = boolWrapper };
+			}
+		}
+
+		public static string Convert(string path)
+		{
+			HtmlDocument doc = new HtmlDocument();
+			doc.Load(path);
+			return ConvertDoc(doc);
+		}
+
+		public static string ConvertHtml(string html)
+		{
+			HtmlDocument doc = new HtmlDocument();
+			html = REX_TAG1.Replace(html, " ");
+			html = REX_TAG2.Replace(html, " ");
+			doc.LoadHtml(html);
+			return ConvertDoc(doc);
+		}
+
+		public static string ConvertDoc(HtmlDocument doc)
+		{
+			using (StringWriter sw = new StringWriter())
+			{
+				ConvertTo(doc.DocumentNode, sw);
+				sw.Flush();
+				return sw.ToString();
+			}
+		}
+
+		private static void ConvertContentTo(HtmlNode node, TextWriter outText, PreceedingDomTextInfo textInfo)
+		{
+			foreach (HtmlNode subnode in node.ChildNodes)
+			{
+				ConvertTo(subnode, outText, textInfo);
+			}
+		}
+
+		public static void ConvertTo(HtmlNode node, TextWriter outText)
+		{
+			ConvertTo(node, outText, new PreceedingDomTextInfo(false));
+		}
+
+		private static void ConvertTo(HtmlNode node, TextWriter outText, PreceedingDomTextInfo textInfo)
+		{
+			string html;
+			switch (node.NodeType)
+			{
+				case HtmlNodeType.Comment:
+					// don't output comments
+					break;
+				case HtmlNodeType.Document:
+					ConvertContentTo(node, outText, textInfo);
+					break;
+				case HtmlNodeType.Text:
+					// script and style must not be output
+					string parentName = node.ParentNode.Name;
+					if ((parentName == "script") || (parentName == "style"))
+					{
+						break;
+					}
+					// get text
+					html = ((HtmlTextNode)node).Text;
+					// is it in fact a special closing node output as text?
+					if (HtmlNode.IsOverlappedClosingElement(html)) break;
+
+					// check the text is meaningful and not a bunch of whitespaces
+					if (html.Length == 0) break;
+
+					if (html.Trim().ToLower().StartsWith("<?xml") && html.Trim().ToLower().EndsWith("?>")) break;
+
+					if (!textInfo.WritePrecedingWhiteSpace || textInfo.LastCharWasSpace)
+					{
+						html = html.TrimStart();
+						if (html.Length == 0) { break; }
+						textInfo.IsFirstTextOfDocWritten.Value = textInfo.WritePrecedingWhiteSpace = true;
+					}
+					outText.Write(HtmlEntity.DeEntitize(Regex.Replace(html.TrimEnd(), @"\s{2,}", " ")));
+					if (textInfo.LastCharWasSpace = char.IsWhiteSpace(html[html.Length - 1]))
+					{
+						outText.Write(' ');
+					}
+					break;
+				case HtmlNodeType.Element:
+					string endElementString = null;
+					bool isInline;
+					bool skip = false;
+					int listIndex = 0;
+					switch (node.Name)
+					{
+						case "nav":
+							skip = true;
+							isInline = false;
+							break;
+						case "body":
+						case "section":
+						case "article":
+						case "aside":
+						case "h1":
+						case "h2":
+						case "header":
+						case "footer":
+						case "address":
+						case "main":
+						case "div":
+						case "span":
+						case "p": // stylistic - adjust as you tend to use
+							if (textInfo.IsFirstTextOfDocWritten) outText.Write("\r\n");
+							endElementString = "\r\n";
+							isInline = false;
+							break;
+						case "br":
+							outText.Write("\r\n");
+							skip = true;
+							textInfo.WritePrecedingWhiteSpace = false;
+							isInline = true;
+							break;
+						case "a":
+							isInline = true;
+							break;
+						case "li":
+							isInline = false;
+							break;
+						case "ol":
+							listIndex = 1;
+							goto case "ul";
+						case "ul": //not handling nested lists any differently at this stage - that is getting close to rendering problems
+							endElementString = "\r\n";
+							isInline = false;
+							break;
+						case "img": //inline-block in reality
+							isInline = true;
+							break;
+						default:
+							isInline = true;
+							break;
+					}
+					if (!skip && node.HasChildNodes)
+					{
+						ConvertContentTo(node, outText, isInline ? textInfo : new PreceedingDomTextInfo(textInfo.IsFirstTextOfDocWritten) { ListIndex = listIndex });
+					}
+					if (endElementString != null)
+					{
+						outText.Write(endElementString);
+					}
+					break;
+			}
+		}
+	}
\ No newline at end of file
diff --git a/Proc/ProcessHelper.cs b/Proc/ProcessHelper.cs
new file mode 100644
index 0000000..08730d6
--- /dev/null
+++ b/Proc/ProcessHelper.cs
@@ -0,0 +1,59 @@
+using System.Text;
+
+namespace WordpressEboobScraper2.Proc;
+
+public static class ProcessHelper
+{
+    public static ProcessOutput ProcExecute(string command, string arguments, string workingDirectory = null)
+    {
+        var process = new System.Diagnostics.Process
+        {
+            StartInfo =
+            {
+                FileName = command,
+                Arguments = arguments,
+                WorkingDirectory = workingDirectory ?? string.Empty,
+                UseShellExecute = false,
+                RedirectStandardOutput = true,
+                RedirectStandardError = true,
+                CreateNoWindow = true,
+                ErrorDialog = false,
+            }
+        };
+
+        var builderOut = new StringBuilder();
+        var builderErr = new StringBuilder();
+        var builderBoth = new StringBuilder();
+
+        process.OutputDataReceived += (sender, args) =>
+        {
+            if (args.Data == null) return;
+
+            if (builderOut.Length == 0) builderOut.Append(args.Data);
+            else builderOut.Append("\n" + args.Data);
+
+            if (builderBoth.Length == 0) builderBoth.Append(args.Data);
+            else builderBoth.Append("\n" + args.Data);
+        };
+
+        process.ErrorDataReceived += (sender, args) =>
+        {
+            if (args.Data == null) return;
+
+            if (builderErr.Length == 0) builderErr.Append(args.Data);
+            else builderErr.Append("\n" + args.Data);
+
+            if (builderBoth.Length == 0) builderBoth.Append(args.Data);
+            else builderBoth.Append("\n" + args.Data);
+        };
+
+        process.Start();
+
+        process.BeginOutputReadLine();
+        process.BeginErrorReadLine();
+
+        process.WaitForExit();
+
+        return new ProcessOutput($"{command} {arguments.Replace("\r", "\\r").Replace("\n", "\\n")}", process.ExitCode, builderOut.ToString(), builderErr.ToString(), builderBoth.ToString());
+    }
+}
\ No newline at end of file
diff --git a/Proc/ProcessOutput.cs b/Proc/ProcessOutput.cs
new file mode 100644
index 0000000..7813e58
--- /dev/null
+++ b/Proc/ProcessOutput.cs
@@ -0,0 +1,21 @@
+namespace WordpressEboobScraper2.Proc;
+
+public struct ProcessOutput
+{
+    public readonly string Command;
+    public readonly int ExitCode;
+    public readonly string StdOut;
+    public readonly string StdErr;
+    public readonly string StdCombined;
+
+    public ProcessOutput(string cmd, int ex, string stdout, string stderr, string stdcom)
+    {
+        Command = cmd;
+        ExitCode = ex;
+        StdOut = stdout;
+        StdErr = stderr;
+        StdCombined = stdcom;
+    }
+
+    public override string ToString() => $"{Command}\n=> {ExitCode}\n\n[stdout]\n{StdOut}\n\n[stderr]\n{StdErr}";
+}
\ No newline at end of file
diff --git a/Scraper/Helper.cs b/Scraper/Helper.cs
index 8528275..7c80bc9 100644
--- a/Scraper/Helper.cs
+++ b/Scraper/Helper.cs
@@ -2,9 +2,8 @@ using HtmlAgilityPack;
 
 namespace WordpressEboobScraper2.Scraper;
 
-public class Helper
+public static class Helper
 {
-    
     public static string Filenamify(string v, bool repl = false)
     {
         var s = new String(v.Replace((char)160, ' ').ToCharArray().Where(p =>
diff --git a/Scraper/Scraper.cs b/Scraper/Scraper.cs
index 9ce0fda..4da91d5 100644
--- a/Scraper/Scraper.cs
+++ b/Scraper/Scraper.cs
@@ -6,6 +6,7 @@ using System.Xml.Linq;
 using System.Xml.Serialization;
 using HtmlAgilityPack;
 using Ionic.Zip;
+using WordpressEboobScraper2.Proc;
 
 namespace WordpressEboobScraper2.Scraper;
 
@@ -15,16 +16,16 @@ namespace WordpressEboobScraper2.Scraper;
 /**                                                     **/
 /** *************************************************** **/
 
-class Scraper
+public class Scraper
 {
 		
 	static EpubParameter ACTIVE_BOOK = null;
 
 	const int LIMIT = 1500;
 
-	readonly Regex REX_NUMSTART = new Regex(@"^\s*(?<n>[0-9]+)\s*\-.*$", RegexOptions.Compiled);
+	readonly Regex REX_NUMSTART = new(@"^\s*(?<n>[0-9]+)\s*\-.*$", RegexOptions.Compiled);
 
-	Dictionary<string, string> webCache = new Dictionary<string, string>();
+	Dictionary<string, string> webCache = new();
 
 	string STASH_FOLDER => Config.BASE_DIR_STASH + ACTIVE_BOOK.Foldername + Path.DirectorySeparatorChar;
 
@@ -144,7 +145,7 @@ class Scraper
 	void SaveCache()
 	{
 		var xs = new XmlSerializer(typeof(List<SerializableCacheEntry>));
-		using (var writer = new System.IO.StreamWriter(WCACHE_FILE))
+		using (var writer = new StreamWriter(WCACHE_FILE))
 		{
 			xs.Serialize(writer, webCache.Select(p => new SerializableCacheEntry { URL = p.Key, Content = new GZippedString{ Value = p.Value } }).ToList());
 		}
@@ -155,77 +156,74 @@ class Scraper
 		if (!File.Exists(WCACHE_FILE)) return;
 		
 		XmlSerializer deserializer = new XmlSerializer(typeof(List<SerializableCacheEntry>));
-		using (TextReader reader = new StreamReader(WCACHE_FILE))
-		{
-			var result = new List<SerializableCacheEntry>();
+		
+		using TextReader reader = new StreamReader(WCACHE_FILE);
+
+		var l = (List<SerializableCacheEntry>)deserializer.Deserialize(reader);
 			
-			var l = (List<SerializableCacheEntry>)deserializer.Deserialize(reader);
-			
-			webCache = l.ToDictionary(p => p.URL, p => p.Content.Value);
-		}
+		webCache = l.ToDictionary(p => p.URL, p => p.Content.Value);
 	}
 
 	List<Chapter> FindChapters()
 	{
 		List<Chapter> result = new List<Chapter>();
 
-		using (WebClient client = new WebClient())
+		using WebClient client = new WebClient();
+		
+		client.Encoding = Encoding.UTF8;
+		Stack<string> buffer = new Stack<string>();
+		buffer.Push(ACTIVE_BOOK.StartURL);
+
+		while (buffer.Any() && result.Count < LIMIT)
 		{
-			client.Encoding = Encoding.UTF8;
-			Stack<string> buffer = new Stack<string>();
-			buffer.Push(ACTIVE_BOOK.StartURL);
+			var url = buffer.Pop();
+			Chapter curr = new Chapter() { url = url };
 
-			while (buffer.Any() && result.Count < LIMIT)
+			var buffered = webCache.ContainsKey(url.ToLower());
+			if (buffered)
 			{
-				var url = buffer.Pop();
-				Chapter curr = new Chapter() { url = url };
-
-				var buffered = webCache.ContainsKey(url.ToLower());
-				if (buffered)
-				{
-					curr.queryResult = webCache[url.ToLower()];
-					"*(loaded from webcache)*".Dump();
-				}
-				else
-				{
-					curr.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
-					webCache[url.ToLower()] = curr.queryResult;
-					SaveCache();
-				}
-
-				var r = ProcessChapter(curr, result, s=>s.Dump(), out var next_url);
-				if (next_url != null) buffer.Push(next_url);
-				
-				if (buffered && buffer.Count == 0 && Config.DO_LIVE_RELOAD_OF_LAST)
-				{
-					"".Dump();
-					"//==> *(auto-reload from live)*".Dump();
-					"".Dump();
-					curr.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
-					webCache[url.ToLower()] = curr.queryResult;
-					SaveCache();
-
-					r = ProcessChapter(curr, result, s=>s.Dump(), out var next_url_inner);
-					if (next_url_inner != null) buffer.Push(next_url_inner);
-				}
-				if (r == ProcessResult.SuccessNormal)
-				{
-					"    ==> Chapter processed".Dump();
-					result.Add(curr);
-					OutputChapter(curr, result.Count);
-				}
-				else if (r == ProcessResult.SkipChapter)
-				{
-					"    ==> Skip this chapter".Dump();
-				}
-				else if (r == ProcessResult.ReachedEnd)
-				{
-					"    ==> End reached".Dump();
-				}
-
-
-				"".Dump();
+				curr.queryResult = webCache[url.ToLower()];
+				"*(loaded from webcache)*".Dump();
 			}
+			else
+			{
+				curr.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
+				webCache[url.ToLower()] = curr.queryResult;
+				SaveCache();
+			}
+
+			var r = ProcessChapter(curr, result, s=>s.Dump(), out var next_url);
+			if (next_url != null) buffer.Push(next_url);
+				
+			if (buffered && buffer.Count == 0 && Config.DO_LIVE_RELOAD_OF_LAST)
+			{
+				"".Dump();
+				"//==> *(auto-reload from live)*".Dump();
+				"".Dump();
+				curr.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
+				webCache[url.ToLower()] = curr.queryResult;
+				SaveCache();
+
+				r = ProcessChapter(curr, result, s=>s.Dump(), out var next_url_inner);
+				if (next_url_inner != null) buffer.Push(next_url_inner);
+			}
+			if (r == ProcessResult.SuccessNormal)
+			{
+				"    ==> Chapter processed".Dump();
+				result.Add(curr);
+				OutputChapter(curr, result.Count);
+			}
+			else if (r == ProcessResult.SkipChapter)
+			{
+				"    ==> Skip this chapter".Dump();
+			}
+			else if (r == ProcessResult.ReachedEnd)
+			{
+				"    ==> End reached".Dump();
+			}
+
+
+			"".Dump();
 		}
 
 		return result;
@@ -233,96 +231,93 @@ class Scraper
 
 	void VerifyChapters()
 	{
-		List<Chapter> result = new List<Chapter>();
+		using WebClient client = new WebClient();
+		
+		client.Encoding = Encoding.UTF8;
+		Stack<string> buffer = new Stack<string>();
+		buffer.Push(ACTIVE_BOOK.StartURL);
 
-		using (WebClient client = new WebClient())
+		while (buffer.Any())
 		{
-			client.Encoding = Encoding.UTF8;
-			Stack<string> buffer = new Stack<string>();
-			buffer.Push(ACTIVE_BOOK.StartURL);
+			var url = buffer.Pop();
+			Chapter curr_buffer = new Chapter() { url = url };
+			Chapter curr_live   = new Chapter() { url = url };
 
-			while (buffer.Any() && result.Count < LIMIT)
+			var buffered = webCache.ContainsKey(url.ToLower());
+			if (buffered)
 			{
-				var url = buffer.Pop();
-				Chapter curr_buffer = new Chapter() { url = url };
-				Chapter curr_live   = new Chapter() { url = url };
-
-				var buffered = webCache.ContainsKey(url.ToLower());
-				if (buffered)
+				try
 				{
-					try
-					{
-						curr_buffer.queryResult = webCache[url.ToLower()];
-						curr_live.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
-					}
-					catch (Exception e)
-					{
-						$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Live reload resulted in exception: {e.Message}".Dump();
-						continue;
-					}
+					curr_buffer.queryResult = webCache[url.ToLower()];
+					curr_live.queryResult = client.DownloadString(Uri.UnescapeDataString(url));
 				}
-				else
+				catch (Exception e)
 				{
+					$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Live reload resulted in exception: {e.Message}".Dump();
 					continue;
 				}
-
-				var is_diff = false;
-
-				var r_buffer = ProcessChapter(curr_buffer, result, _ => {}, out var next_buffer);
-				var r_live = ProcessChapter(curr_live, result, _ => {}, out var next_live);
-
-				if (next_buffer != null) buffer.Push(next_buffer);
-
-				if (r_buffer != r_live) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different Process result: {r_buffer} <> {r_live}".Dump(); is_diff = true; }
-				if (r_buffer != r_live) {$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different push URL: {next_buffer} <> {next_live}".Dump(); is_diff = true; }
-
-				if (!Relaxedurleq(curr_buffer.next, curr_live.next)) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different next chapter: {curr_buffer.next} <> {curr_live.next}".Dump(); is_diff = true; }
-				if (curr_buffer.title != curr_live.title) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different title: {curr_buffer.title} <> {curr_live.title}".Dump(); is_diff = true; }
-
-				if (curr_buffer.chapter.Value != curr_live.chapter.Value)
-				{
-					var clean_buffer = GetChapterText(curr_buffer);
-					var clean_live   = GetChapterText(curr_live);
-
-					if (clean_buffer.Trim() != clean_live.Trim())
-					{
-						$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different content: ".Dump();
-						new Hyperlinq(() =>
-						{
-
-							var fa = Path.Combine(Path.GetTempPath(), "buffer_" + Guid.NewGuid() + ".txt");
-							var fb = Path.Combine(Path.GetTempPath(), "live___" + Guid.NewGuid() + ".txt");
-							File.WriteAllText(fa, curr_buffer.chapter.Value);
-							File.WriteAllText(fb, curr_live.chapter.Value);
-							Process.Start(Config.COMPARE_PROG, $"\"{fa}\" \"{fb}\"");
-
-						}, "[Compare Raw]").Dump();
-						new Hyperlinq(() =>
-						{
-
-							var fa = Path.Combine(Path.GetTempPath(), "buffer_" + Guid.NewGuid() + ".txt");
-							var fb = Path.Combine(Path.GetTempPath(), "live___" + Guid.NewGuid() + ".txt");
-							File.WriteAllText(fa, clean_buffer);
-							File.WriteAllText(fb, clean_live);
-							Process.Start(Config.COMPARE_PROG, $"\"{fa}\" \"{fb}\"");
-
-						}, "[Compare Text]").Dump();
-						new Hyperlinq(() =>
-						{
-
-							webCache[url.ToLower()] = curr_live.queryResult;
-							SaveCache();
-
-						}, "[Save new version to webcache]").Dump();
-
-						is_diff = true;
-					}
-				}
-
-				if (!is_diff) $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] OK - No differences".Dump();
-
-				if (is_diff) "".Dump();
 			}
+			else
+			{
+				continue;
+			}
+
+			var is_diff = false;
+
+			var r_buffer = ProcessChapter(curr_buffer, new List<Chapter>(), _ => {}, out var next_buffer);
+			var r_live = ProcessChapter(curr_live, new List<Chapter>(), _ => {}, out var next_live);
+
+			if (next_buffer != null) buffer.Push(next_buffer);
+
+			if (r_buffer != r_live) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different Process result: {r_buffer} <> {r_live}".Dump(); is_diff = true; }
+			if (r_buffer != r_live) {$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different push URL: {next_buffer} <> {next_live}".Dump(); is_diff = true; }
+
+			if (!Relaxedurleq(curr_buffer.next, curr_live.next)) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different next chapter: {curr_buffer.next} <> {curr_live.next}".Dump(); is_diff = true; }
+			if (curr_buffer.title != curr_live.title) { $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different title: {curr_buffer.title} <> {curr_live.title}".Dump(); is_diff = true; }
+
+			if (curr_buffer.chapter.Value != curr_live.chapter.Value)
+			{
+				var clean_buffer = GetChapterText(curr_buffer);
+				var clean_live   = GetChapterText(curr_live);
+
+				if (clean_buffer.Trim() != clean_live.Trim())
+				{
+					$"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] Different content: ".Dump();
+					new Hyperlinq(() =>
+					{
+
+						var fa = Path.Combine(Path.GetTempPath(), "buffer_" + Guid.NewGuid() + ".txt");
+						var fb = Path.Combine(Path.GetTempPath(), "live___" + Guid.NewGuid() + ".txt");
+						File.WriteAllText(fa, curr_buffer.chapter.Value);
+						File.WriteAllText(fb, curr_live.chapter.Value);
+						Process.Start(Config.COMPARE_PROG, $"\"{fa}\" \"{fb}\"");
+
+					}, "[Compare Raw]").Dump();
+					new Hyperlinq(() =>
+					{
+
+						var fa = Path.Combine(Path.GetTempPath(), "buffer_" + Guid.NewGuid() + ".txt");
+						var fb = Path.Combine(Path.GetTempPath(), "live___" + Guid.NewGuid() + ".txt");
+						File.WriteAllText(fa, clean_buffer);
+						File.WriteAllText(fb, clean_live);
+						Process.Start(Config.COMPARE_PROG, $"\"{fa}\" \"{fb}\"");
+
+					}, "[Compare Text]").Dump();
+					new Hyperlinq(() =>
+					{
+
+						webCache[url.ToLower()] = curr_live.queryResult;
+						SaveCache();
+
+					}, "[Save new version to webcache]").Dump();
+
+					is_diff = true;
+				}
+			}
+
+			if (!is_diff) $"[{ACTIVE_BOOK.DisplayStr} | {curr_buffer.title ?? url}] OK - No differences".Dump();
+
+			if (is_diff) "".Dump();
 		}
 	}
 
@@ -350,9 +345,9 @@ class Scraper
 		return clean;
 	}
 
-	ProcessResult ProcessChapter(Chapter curr, IReadOnlyList<Chapter> backBuffer, Action<String> prt, out string forwardQueue_next)
+	ProcessResult ProcessChapter(Chapter curr, IReadOnlyList<Chapter> backBuffer, Action<String> prt, out string forwardQueueNext)
 	{
-		forwardQueue_next = null;
+		forwardQueueNext = null;
 		
 		HtmlDocument doc = new HtmlDocument();
 		doc.LoadHtml(curr.queryResult);
@@ -500,8 +495,6 @@ class Scraper
 
 		#region Next
 
-		string[] title_spec_words = new string[] {"prologue", "epilogue", "bonus" };
-		
 		if (backBuffer.Where(b => !b.isSpecial).Count() > 4 && 
 		    backBuffer.Where(b => !b.isSpecial).Select(bb => { var r = REX_NUMSTART.Match(bb.title); return r.Success ? r.Groups["n"].Value : null; }).Distinct().Count() == 1 && 
 			REX_NUMSTART.Match(backBuffer.Where(b => !b.isSpecial).First().title).Success &&
@@ -517,8 +510,8 @@ class Scraper
 		if (next == null)
 			next = nodeContent.Descendants()
 					  .Where(p => p.Name.ToLower() == "a")
-						.Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next")
-						.Where(p => p.Attributes.Contains("href"))
+					  .Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next")
+					  .Where(p => p.Attributes.Contains("href"))
 					  .FirstOrDefault();
 
 		if (next == null)
@@ -559,7 +552,7 @@ class Scraper
 				curr.next = next_url;
 				if (!backBuffer.Any(p => p.url.ToLower() == next_url.ToLower()))
 				{
-					forwardQueue_next = next_url;
+					forwardQueueNext = next_url;
 				}
 			}
 
@@ -940,14 +933,12 @@ class Scraper
 								new XAttribute("full-path", "OEBPS/content.opf"),
 								new XAttribute("media-type", "application/oebps-package+xml")))));
 
-		StringBuilder builder = new StringBuilder();
-		using (Utf8StringWriter writer = new Utf8StringWriter())
-		{
-			doc.Save(writer);
-			var r = writer.ToString();
-			r = r.Replace("encoding=\"utf-8\"", "encoding=\"UTF-8\"");
-			return r.Trim() + "\r\n";
-		}
+		using Utf8StringWriter writer = new Utf8StringWriter();
+		
+		doc.Save(writer);
+		var r = writer.ToString();
+		r = r.Replace("encoding=\"utf-8\"", "encoding=\"UTF-8\"");
+		return r.Trim() + "\r\n";
 	}
 
 	string GetEpubContentOPF(List<Chapter> chapters)
@@ -1030,17 +1021,14 @@ class Scraper
 
 		package.Add(new XElement(opf + "guide"));
 
-		StringBuilder builder = new StringBuilder();
-		using (Utf8StringWriter writer = new Utf8StringWriter())
-		{
-			doc.Save(writer);
-			return writer.ToString();
-		}
+		using Utf8StringWriter writer = new Utf8StringWriter();
+		
+		doc.Save(writer);
+		return writer.ToString();
 	}
 
 	string GetEpubTOC(List<Chapter> chapters)
 	{
-		XNamespace dc = "http://www.daisy.org/z3986/2005/ncx/";
 		XNamespace ncx = "http://www.idpf.org/2007/opf";
 
 		var doc = new XDocument(
@@ -1082,12 +1070,10 @@ class Scraper
 
 		root.Add(nav);
 
-		StringBuilder builder = new StringBuilder();
-		using (Utf8StringWriter writer = new Utf8StringWriter())
-		{
-			doc.Save(writer);
-			return writer.ToString();
-		}
+		using Utf8StringWriter writer = new Utf8StringWriter();
+		
+		doc.Save(writer);
+		return writer.ToString();
 	}
 
 	string GetEpubChapterFile(Chapter chapter, int idx)
@@ -1108,256 +1094,4 @@ class Scraper
 
 		return xml.ToString();
 	}
-
-	public struct ProcessOutput
-	{
-		public readonly string Command;
-		public readonly int ExitCode;
-		public readonly string StdOut;
-		public readonly string StdErr;
-		public readonly string StdCombined;
-
-		public ProcessOutput(string cmd, int ex, string stdout, string stderr, string stdcom)
-		{
-			Command = cmd;
-			ExitCode = ex;
-			StdOut = stdout;
-			StdErr = stderr;
-			StdCombined = stdcom;
-		}
-
-		public override string ToString() => $"{Command}\n=> {ExitCode}\n\n[stdout]\n{StdOut}\n\n[stderr]\n{StdErr}";
-	}
-
-	public static class ProcessHelper
-	{
-		public static ProcessOutput ProcExecute(string command, string arguments, string workingDirectory = null)
-		{
-			var process = new Process
-			{
-				StartInfo =
-					{
-						FileName = command,
-						Arguments = arguments,
-						WorkingDirectory = workingDirectory ?? string.Empty,
-						UseShellExecute = false,
-						RedirectStandardOutput = true,
-						RedirectStandardError = true,
-						CreateNoWindow = true,
-						ErrorDialog = false,
-					}
-			};
-
-			var builderOut = new StringBuilder();
-			var builderErr = new StringBuilder();
-			var builderBoth = new StringBuilder();
-
-			process.OutputDataReceived += (sender, args) =>
-			{
-				if (args.Data == null) return;
-
-				if (builderOut.Length == 0) builderOut.Append(args.Data);
-				else builderOut.Append("\n" + args.Data);
-
-				if (builderBoth.Length == 0) builderBoth.Append(args.Data);
-				else builderBoth.Append("\n" + args.Data);
-			};
-
-			process.ErrorDataReceived += (sender, args) =>
-			{
-				if (args.Data == null) return;
-
-				if (builderErr.Length == 0) builderErr.Append(args.Data);
-				else builderErr.Append("\n" + args.Data);
-
-				if (builderBoth.Length == 0) builderBoth.Append(args.Data);
-				else builderBoth.Append("\n" + args.Data);
-			};
-
-			process.Start();
-
-			process.BeginOutputReadLine();
-			process.BeginErrorReadLine();
-
-			process.WaitForExit();
-
-			return new ProcessOutput($"{command} {arguments.Replace("\r", "\\r").Replace("\n", "\\n")}", process.ExitCode, builderOut.ToString(), builderErr.ToString(), builderBoth.ToString());
-		}
-	}
-	public static class HTMLToText
-	{
-		private static Regex REX_TAG1 = new Regex("<\\s*(link|style|script)[^>]*?/>", RegexOptions.Compiled);
-		private static Regex REX_TAG2 = new Regex("<\\s*(link|style|script)[^>]*?>[^<>]*?<\\/\\s*\\1\\s*>", RegexOptions.Compiled);
-
-		private class PreceedingDomTextInfo
-		{
-			public PreceedingDomTextInfo(BoolWrapper isFirstTextOfDocWritten)
-			{
-				IsFirstTextOfDocWritten = isFirstTextOfDocWritten;
-			}
-			public bool WritePrecedingWhiteSpace { get; set; }
-			public bool LastCharWasSpace { get; set; }
-			public readonly BoolWrapper IsFirstTextOfDocWritten;
-			public int ListIndex { get; set; }
-		}
-
-		private class BoolWrapper
-		{
-			public BoolWrapper() { }
-			public bool Value { get; set; }
-			public static implicit operator bool(BoolWrapper boolWrapper)
-			{
-				return boolWrapper.Value;
-			}
-			public static implicit operator BoolWrapper(bool boolWrapper)
-			{
-				return new BoolWrapper { Value = boolWrapper };
-			}
-		}
-
-		public static string Convert(string path)
-		{
-			HtmlDocument doc = new HtmlDocument();
-			doc.Load(path);
-			return ConvertDoc(doc);
-		}
-
-		public static string ConvertHtml(string html)
-		{
-			HtmlDocument doc = new HtmlDocument();
-			html = REX_TAG1.Replace(html, " ");
-			html = REX_TAG2.Replace(html, " ");
-			doc.LoadHtml(html);
-			return ConvertDoc(doc);
-		}
-
-		public static string ConvertDoc(HtmlDocument doc)
-		{
-			using (StringWriter sw = new StringWriter())
-			{
-				ConvertTo(doc.DocumentNode, sw);
-				sw.Flush();
-				return sw.ToString();
-			}
-		}
-
-		private static void ConvertContentTo(HtmlNode node, TextWriter outText, PreceedingDomTextInfo textInfo)
-		{
-			foreach (HtmlNode subnode in node.ChildNodes)
-			{
-				ConvertTo(subnode, outText, textInfo);
-			}
-		}
-
-		public static void ConvertTo(HtmlNode node, TextWriter outText)
-		{
-			ConvertTo(node, outText, new PreceedingDomTextInfo(false));
-		}
-
-		private static void ConvertTo(HtmlNode node, TextWriter outText, PreceedingDomTextInfo textInfo)
-		{
-			string html;
-			switch (node.NodeType)
-			{
-				case HtmlNodeType.Comment:
-					// don't output comments
-					break;
-				case HtmlNodeType.Document:
-					ConvertContentTo(node, outText, textInfo);
-					break;
-				case HtmlNodeType.Text:
-					// script and style must not be output
-					string parentName = node.ParentNode.Name;
-					if ((parentName == "script") || (parentName == "style"))
-					{
-						break;
-					}
-					// get text
-					html = ((HtmlTextNode)node).Text;
-					// is it in fact a special closing node output as text?
-					if (HtmlNode.IsOverlappedClosingElement(html)) break;
-
-					// check the text is meaningful and not a bunch of whitespaces
-					if (html.Length == 0) break;
-
-					if (html.Trim().ToLower().StartsWith("<?xml") && html.Trim().ToLower().EndsWith("?>")) break;
-
-					if (!textInfo.WritePrecedingWhiteSpace || textInfo.LastCharWasSpace)
-					{
-						html = html.TrimStart();
-						if (html.Length == 0) { break; }
-						textInfo.IsFirstTextOfDocWritten.Value = textInfo.WritePrecedingWhiteSpace = true;
-					}
-					outText.Write(HtmlEntity.DeEntitize(Regex.Replace(html.TrimEnd(), @"\s{2,}", " ")));
-					if (textInfo.LastCharWasSpace = char.IsWhiteSpace(html[html.Length - 1]))
-					{
-						outText.Write(' ');
-					}
-					break;
-				case HtmlNodeType.Element:
-					string endElementString = null;
-					bool isInline;
-					bool skip = false;
-					int listIndex = 0;
-					switch (node.Name)
-					{
-						case "nav":
-							skip = true;
-							isInline = false;
-							break;
-						case "body":
-						case "section":
-						case "article":
-						case "aside":
-						case "h1":
-						case "h2":
-						case "header":
-						case "footer":
-						case "address":
-						case "main":
-						case "div":
-						case "span":
-						case "p": // stylistic - adjust as you tend to use
-							if (textInfo.IsFirstTextOfDocWritten) outText.Write("\r\n");
-							endElementString = "\r\n";
-							isInline = false;
-							break;
-						case "br":
-							outText.Write("\r\n");
-							skip = true;
-							textInfo.WritePrecedingWhiteSpace = false;
-							isInline = true;
-							break;
-						case "a":
-							isInline = true;
-							break;
-						case "li":
-							isInline = false;
-							break;
-						case "ol":
-							listIndex = 1;
-							goto case "ul";
-						case "ul": //not handling nested lists any differently at this stage - that is getting close to rendering problems
-							endElementString = "\r\n";
-							isInline = false;
-							break;
-						case "img": //inline-block in reality
-							isInline = true;
-							break;
-						default:
-							isInline = true;
-							break;
-					}
-					if (!skip && node.HasChildNodes)
-					{
-						ConvertContentTo(node, outText, isInline ? textInfo : new PreceedingDomTextInfo(textInfo.IsFirstTextOfDocWritten) { ListIndex = listIndex });
-					}
-					if (endElementString != null)
-					{
-						outText.Write(endElementString);
-					}
-					break;
-			}
-		}
-	}
 }
diff --git a/Scraper/Utf8StringWriter.cs b/Scraper/Utf8StringWriter.cs
index 6b493fa..05a1d57 100644
--- a/Scraper/Utf8StringWriter.cs
+++ b/Scraper/Utf8StringWriter.cs
@@ -4,5 +4,5 @@ namespace WordpressEboobScraper2.Scraper;
 
 public class Utf8StringWriter : StringWriter
 {
-    public override Encoding Encoding { get { return Encoding.UTF8; } }
+    public override Encoding Encoding => Encoding.UTF8;
 }
\ No newline at end of file
diff --git a/WordpressEboobScraper2.csproj b/WordpressEboobScraper2.csproj
index a0ba966..7da616f 100644
--- a/WordpressEboobScraper2.csproj
+++ b/WordpressEboobScraper2.csproj
@@ -4,7 +4,7 @@
         <OutputType>Exe</OutputType>
         <TargetFramework>net7.0</TargetFramework>
         <ImplicitUsings>enable</ImplicitUsings>
-        <Nullable>enable</Nullable>
+        <Nullable>disable</Nullable>
     </PropertyGroup>
 
 
@@ -14,4 +14,6 @@
       <PackageReference Include="System.Text.Encoding.CodePages" Version="7.0.0" />
     </ItemGroup>
 
+
+
 </Project>