Fix bug when reading royalroad with extra content
see e.g. https://www.royalroad.com/fiction/36735/the-perfect-run/chapter/761960/127-theomachia
This commit is contained in:
parent
9692dc531f
commit
4cc76a45ef
@ -99,4 +99,12 @@ public class Helper
|
|||||||
return string.Format("{0}/{1}", uri1, uri2);
|
return string.Format("{0}/{1}", uri1, uri2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static IEnumerable<HtmlNode> RecursiveDescendants(HtmlNode n)
|
||||||
|
{
|
||||||
|
foreach (var d1 in n.Descendants())
|
||||||
|
{
|
||||||
|
yield return d1;
|
||||||
|
foreach (var d2 in RecursiveDescendants(d1)) yield return d2;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -362,7 +362,7 @@ class Scraper
|
|||||||
var nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@class,'post') and contains(@class ,'type-post')]");
|
var nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@class,'post') and contains(@class ,'type-post')]");
|
||||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@id,'post') and contains(@class ,'post')]");
|
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@id,'post') and contains(@class ,'post')]");
|
||||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@id,'post') and contains(@class ,'post')]");
|
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@id,'post') and contains(@class ,'post')]");
|
||||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class ,'chapter')]//div[contains(@class ,'portlet-body')]");
|
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class ,'chapter') and not(contains(@class ,'chapter-page'))]//div[contains(@class ,'portlet-body')]");
|
||||||
if (nodeContent == null && ACTIVE_BOOK.SiteType == Site.WW) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class,'box_con')]");
|
if (nodeContent == null && ACTIVE_BOOK.SiteType == Site.WW) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class,'box_con')]");
|
||||||
|
|
||||||
var nodeNav = doc.DocumentNode.SelectSingleNode(@"//nav[contains(@class,'post-navigation') and @role='navigation']");
|
var nodeNav = doc.DocumentNode.SelectSingleNode(@"//nav[contains(@class,'post-navigation') and @role='navigation']");
|
||||||
@ -513,6 +513,7 @@ class Scraper
|
|||||||
}
|
}
|
||||||
|
|
||||||
var next = nodeContent.SelectSingleNode(@"//div[@class='entry-content']//a[normalize-space(@title)='Next Chapter' or normalize-space(text())='Next Chapter']");
|
var next = nodeContent.SelectSingleNode(@"//div[@class='entry-content']//a[normalize-space(@title)='Next Chapter' or normalize-space(text())='Next Chapter']");
|
||||||
|
|
||||||
if (next == null)
|
if (next == null)
|
||||||
next = nodeContent.Descendants()
|
next = nodeContent.Descendants()
|
||||||
.Where(p => p.Name.ToLower() == "a")
|
.Where(p => p.Name.ToLower() == "a")
|
||||||
@ -520,14 +521,25 @@ class Scraper
|
|||||||
.Where(p => p.Attributes.Contains("href"))
|
.Where(p => p.Attributes.Contains("href"))
|
||||||
.FirstOrDefault();
|
.FirstOrDefault();
|
||||||
|
|
||||||
var x = nodeContent.Descendants().Where(p => p.Name.ToLower() == "a");
|
|
||||||
|
|
||||||
if (next == null)
|
if (next == null)
|
||||||
next = nodeNav.Descendants()
|
next = nodeNav.Descendants()
|
||||||
.Where(p => p.Name.ToLower() == "a")
|
.Where(p => p.Name.ToLower() == "a")
|
||||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||||
.FirstOrDefault();
|
.FirstOrDefault();
|
||||||
|
|
||||||
|
if (next == null)
|
||||||
|
next = Helper.RecursiveDescendants(nodeContent)
|
||||||
|
.Where(p => p.Name.ToLower() == "a")
|
||||||
|
.Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next")
|
||||||
|
.Where(p => p.Attributes.Contains("href"))
|
||||||
|
.FirstOrDefault();
|
||||||
|
|
||||||
|
if (next == null)
|
||||||
|
next = Helper.RecursiveDescendants(nodeContent)
|
||||||
|
.Where(p => p.Name.ToLower() == "a")
|
||||||
|
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||||
|
.FirstOrDefault();
|
||||||
|
|
||||||
if (next != null)
|
if (next != null)
|
||||||
{
|
{
|
||||||
var next_url = next.Attributes["href"].Value.Trim();
|
var next_url = next.Attributes["href"].Value.Trim();
|
||||||
|
Loading…
Reference in New Issue
Block a user