Fix bug when reading royalroad with extra content
see e.g. https://www.royalroad.com/fiction/36735/the-perfect-run/chapter/761960/127-theomachia
This commit is contained in:
parent
9692dc531f
commit
4cc76a45ef
@ -99,4 +99,12 @@ public class Helper
|
||||
return string.Format("{0}/{1}", uri1, uri2);
|
||||
}
|
||||
|
||||
public static IEnumerable<HtmlNode> RecursiveDescendants(HtmlNode n)
|
||||
{
|
||||
foreach (var d1 in n.Descendants())
|
||||
{
|
||||
yield return d1;
|
||||
foreach (var d2 in RecursiveDescendants(d1)) yield return d2;
|
||||
}
|
||||
}
|
||||
}
|
@ -362,7 +362,7 @@ class Scraper
|
||||
var nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@class,'post') and contains(@class ,'type-post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//article[contains(@id,'post') and contains(@class ,'post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@id,'post') and contains(@class ,'post')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class ,'chapter')]//div[contains(@class ,'portlet-body')]");
|
||||
if (nodeContent == null) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class ,'chapter') and not(contains(@class ,'chapter-page'))]//div[contains(@class ,'portlet-body')]");
|
||||
if (nodeContent == null && ACTIVE_BOOK.SiteType == Site.WW) nodeContent = doc.DocumentNode.SelectSingleNode(@"//div[contains(@class,'box_con')]");
|
||||
|
||||
var nodeNav = doc.DocumentNode.SelectSingleNode(@"//nav[contains(@class,'post-navigation') and @role='navigation']");
|
||||
@ -513,6 +513,7 @@ class Scraper
|
||||
}
|
||||
|
||||
var next = nodeContent.SelectSingleNode(@"//div[@class='entry-content']//a[normalize-space(@title)='Next Chapter' or normalize-space(text())='Next Chapter']");
|
||||
|
||||
if (next == null)
|
||||
next = nodeContent.Descendants()
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
@ -520,14 +521,25 @@ class Scraper
|
||||
.Where(p => p.Attributes.Contains("href"))
|
||||
.FirstOrDefault();
|
||||
|
||||
var x = nodeContent.Descendants().Where(p => p.Name.ToLower() == "a");
|
||||
|
||||
if (next == null)
|
||||
next = nodeNav.Descendants()
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||
.FirstOrDefault();
|
||||
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||
.FirstOrDefault();
|
||||
|
||||
if (next == null)
|
||||
next = Helper.RecursiveDescendants(nodeContent)
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
.Where(p => Helper.Striptease(p) == "next chapter" || Helper.Striptease(p) == "next")
|
||||
.Where(p => p.Attributes.Contains("href"))
|
||||
.FirstOrDefault();
|
||||
|
||||
if (next == null)
|
||||
next = Helper.RecursiveDescendants(nodeContent)
|
||||
.Where(p => p.Name.ToLower() == "a")
|
||||
.Where(p => p.Attributes.Any(q => q.Name == "rel" && q.Value == "next"))
|
||||
.FirstOrDefault();
|
||||
|
||||
if (next != null)
|
||||
{
|
||||
var next_url = next.Attributes["href"].Value.Trim();
|
||||
|
Loading…
x
Reference in New Issue
Block a user