From 96ae786bb00fee77d6d659acef583f50938b7745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Schw=C3=B6rer?= Date: Mon, 18 Sep 2023 21:01:54 +0200 Subject: [PATCH] remove crawler counts from admin/project-lawful --- www/data/css/styles.css | 11 + www/data/css/styles.min.css | 3 + www/data/css/styles_admin.scss | 15 + www/extern/crawler-detect/LICENSE | 22 + www/extern/crawler-detect/README.md | 76 + www/extern/crawler-detect/composer.json | 29 + www/extern/crawler-detect/export.php | 41 + www/extern/crawler-detect/raw/Crawlers.json | 1 + www/extern/crawler-detect/raw/Crawlers.txt | 1402 ++++++++++++++++ www/extern/crawler-detect/raw/Exclusions.json | 1 + www/extern/crawler-detect/raw/Exclusions.txt | 50 + www/extern/crawler-detect/raw/Headers.json | 1 + www/extern/crawler-detect/raw/Headers.txt | 10 + .../crawler-detect/src/CrawlerDetect.php | 196 +++ .../src/Fixtures/AbstractProvider.php | 32 + .../crawler-detect/src/Fixtures/Crawlers.php | 1424 +++++++++++++++++ .../src/Fixtures/Exclusions.php | 74 + .../crawler-detect/src/Fixtures/Headers.php | 37 + www/internals/modules/projectlawful.php | 35 + www/pages/admin.php | 26 +- 20 files changed, 3477 insertions(+), 9 deletions(-) create mode 100644 www/extern/crawler-detect/LICENSE create mode 100644 www/extern/crawler-detect/README.md create mode 100644 www/extern/crawler-detect/composer.json create mode 100644 www/extern/crawler-detect/export.php create mode 100644 www/extern/crawler-detect/raw/Crawlers.json create mode 100644 www/extern/crawler-detect/raw/Crawlers.txt create mode 100644 www/extern/crawler-detect/raw/Exclusions.json create mode 100644 www/extern/crawler-detect/raw/Exclusions.txt create mode 100644 www/extern/crawler-detect/raw/Headers.json create mode 100644 www/extern/crawler-detect/raw/Headers.txt create mode 100644 www/extern/crawler-detect/src/CrawlerDetect.php create mode 100644 www/extern/crawler-detect/src/Fixtures/AbstractProvider.php create mode 100644 www/extern/crawler-detect/src/Fixtures/Crawlers.php create mode 100644 www/extern/crawler-detect/src/Fixtures/Exclusions.php create mode 100644 www/extern/crawler-detect/src/Fixtures/Headers.php diff --git a/www/data/css/styles.css b/www/data/css/styles.css index 4924734..b8fcc8c 100644 --- a/www/data/css/styles.css +++ b/www/data/css/styles.css @@ -848,6 +848,14 @@ html, body { display: flex; flex-direction: column; } +.keyvaluelist.bc_data { + padding-left: 0; + padding-right: 0; +} +.keyvaluelist.bc_data div { + padding-left: 8px; + padding-right: 8px; +} .keyvaluelist div { display: flex; flex-direction: row; @@ -860,6 +868,9 @@ html, body { min-width: 500px; align-self: start; } +.keyvaluelist .row_hover:hover { + background-color: #E0E0FF; +} .kvl_100 div span:first-child { min-width: 100px; diff --git a/www/data/css/styles.min.css b/www/data/css/styles.min.css index 25e1be2..e9760b7 100644 --- a/www/data/css/styles.min.css +++ b/www/data/css/styles.min.css @@ -162,9 +162,12 @@ html,body{margin:0;padding:0;height:100%} .admincontent .boxedcontent{margin-bottom:20px} .egg_ajaxOutput{display:flex;box-sizing:border-box;width:100%;align-self:center;margin-left:auto;margin-right:auto;resize:none;height:300px} .keyvaluelist{display:flex;flex-direction:column} +.keyvaluelist.bc_data{padding-left:0;padding-right:0} +.keyvaluelist.bc_data div{padding-left:8px;padding-right:8px} .keyvaluelist div{display:flex;flex-direction:row} .keyvaluelist div span{align-self:center} .keyvaluelist div span:first-child{font-weight:bold;min-width:500px;align-self:start} +.keyvaluelist .row_hover:hover{background-color:#e0e0ff} .kvl_100 div span:first-child{min-width:100px} .kvl_200 div span:first-child{min-width:200px} .kvl_250 div span:first-child{min-width:250px} diff --git a/www/data/css/styles_admin.scss b/www/data/css/styles_admin.scss index ee33dd3..5f3d1a3 100644 --- a/www/data/css/styles_admin.scss +++ b/www/data/css/styles_admin.scss @@ -25,6 +25,17 @@ display: flex; flex-direction: column; + // transferred padding into div + &.bc_data { + padding-left: 0; + padding-right: 0; + + div { + padding-left: 8px; + padding-right: 8px; + } + } + div { display: flex; flex-direction: row; @@ -35,6 +46,10 @@ align-self: start; } } + + .row_hover { + &:hover { background-color: #E0E0FF } + } } .kvl_100 div span:first-child { min-width: 100px; } diff --git a/www/extern/crawler-detect/LICENSE b/www/extern/crawler-detect/LICENSE new file mode 100644 index 0000000..569c7b4 --- /dev/null +++ b/www/extern/crawler-detect/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015-2020 Mark Beech + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/www/extern/crawler-detect/README.md b/www/extern/crawler-detect/README.md new file mode 100644 index 0000000..57ec8e8 --- /dev/null +++ b/www/extern/crawler-detect/README.md @@ -0,0 +1,76 @@ +



+crawlerdetect.io +

+

+

+GitHub Workflow Status + + + + +

+ +## About CrawlerDetect + +CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the `user agent` and `http_from` header. Currently able to detect 1,000's of bots/spiders/crawlers. + +### Installation +``` +composer require jaybizzle/crawler-detect +``` + +### Usage +```PHP +use Jaybizzle\CrawlerDetect\CrawlerDetect; + +$CrawlerDetect = new CrawlerDetect; + +// Check the user agent of the current 'visitor' +if($CrawlerDetect->isCrawler()) { + // true if crawler user agent detected +} + +// Pass a user agent as a string +if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) { + // true if crawler user agent detected +} + +// Output the name of the bot that matched (if any) +echo $CrawlerDetect->getMatches(); +``` + +### Contributing +If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`. + +Failing that, just create an issue with the user agent you have found, and we'll take it from there :) + +### Laravel Package +If you would like to use this with Laravel, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect) + +### Symfony Bundle +To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle). + +### YII2 Extension +To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect). + +### ES6 Library +To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect). + +### Python Library +To use this library in a Python project, check out [crawlerdetect](https://github.com/moskrc/CrawlerDetect). + +### JVM Library (written in Java) +To use this library in a JVM project (including Java, Scala, Kotlin, etc.), check out [CrawlerDetect](https://github.com/nekosoftllc/crawler-detect). + +### .NET Library +To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect). + +### Ruby Gem +To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem. + +### Go Module +To use this library with Go, check out the [crawlerdetect](https://github.com/x-way/crawlerdetect) module. + +_Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_ + +[![Analytics](https://ga-beacon.appspot.com/UA-72430465-1/Crawler-Detect/readme?pixel)](https://github.com/JayBizzle/Crawler-Detect) diff --git a/www/extern/crawler-detect/composer.json b/www/extern/crawler-detect/composer.json new file mode 100644 index 0000000..4774117 --- /dev/null +++ b/www/extern/crawler-detect/composer.json @@ -0,0 +1,29 @@ +{ + "name": "jaybizzle/crawler-detect", + "type": "library", + "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", + "keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"], + "homepage": "https://github.com/JayBizzle/Crawler-Detect/", + "license": "MIT", + "authors": [ + { + "name": "Mark Beech", + "email": "m@rkbee.ch", + "role": "Developer" + } + ], + "require": { + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8|^5.5|^6.5|^9.4" + }, + "autoload": { + "psr-4": { + "Jaybizzle\\CrawlerDetect\\": "src/" + } + }, + "scripts": { + "test": "vendor/bin/phpunit" + } +} diff --git a/www/extern/crawler-detect/export.php b/www/extern/crawler-detect/export.php new file mode 100644 index 0000000..6c7459c --- /dev/null +++ b/www/extern/crawler-detect/export.php @@ -0,0 +1,41 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +require 'src/Fixtures/AbstractProvider.php'; +require 'src/Fixtures/Crawlers.php'; +require 'src/Fixtures/Exclusions.php'; +require 'src/Fixtures/Headers.php'; + +$src = array( + 'Crawlers', + 'Exclusions', + 'Headers', +); + +foreach ($src as $class) { + $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class"; + $object = new $class; + + outputJson($object); + outputTxt($object); +} + +function outputJson($object) +{ + $className = (new ReflectionClass($object))->getShortName(); + file_put_contents("raw/$className.json", json_encode($object->getAll())); +} + +function outputTxt($object) +{ + $className = (new ReflectionClass($object))->getShortName(); + file_put_contents("raw/$className.txt", implode(PHP_EOL, $object->getAll())); +} diff --git a/www/extern/crawler-detect/raw/Crawlers.json b/www/extern/crawler-detect/raw/Crawlers.json new file mode 100644 index 0000000..003b87c --- /dev/null +++ b/www/extern/crawler-detect/raw/Crawlers.json @@ -0,0 +1 @@ +[" YLT","^Aether","^Amazon Simple Notification Service Agent$","^Amazon-Route53-Health-Check-Service","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^Corax","^DangDang","^DavClnt","^DHSH","^docker\\\/[0-9]","^Expanse","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^Gradle\\\/","^HTTPClient\\\/","^HTTPing","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^Mikrotik\\\/","^Netlab360","^NG\\\/[0-9\\.]","^NING\\\/","^npm\\\/","^Nuclei","^PHP-AYMAPI\\\/","^PHP\\\/","^pip\\\/","^pnpm\\\/","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^Swurl ","^TLS tester ","^twine\\\/","^ureq","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2GDPR\\\/","2ip\\.ru","404enemy","7Siters","80legs","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","adstxt-worker","Adstxtaggregator","adstxt\\.com","Adyen HttpClient","AffiliateLabz\\\/","affilimate-puppeteer","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Anthill","Anturis Agent","Ant\\.com","AnyEvent-HTTP\\\/","Apache Ant\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","apimon\\.de","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","Asana\\\/","Ask Jeeves","AskQuickly","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","awin\\.com","AWS Security Scanner","axios\\\/","a\\.pr-cy\\.ru","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","Bitrix link preview","biz_Directory","BKCTwitterUnshortener\\\/","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cf-facebook","cg-eye","changedetection","ChangesMeter","Charlotte","chatterino-api-cache","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","CISPA Web Analyser","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","ColdFusion","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","Contextual Code Sites Explorer","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Criteo","Crowsnest","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","DatadogSynthetics","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Dispatch\\\/","Disqus\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Domains Project\\\/","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconarchive","faviconkit","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedshow\\\/","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","Florienzh\\\/","fluffy","Flunky","flynxapp","forensiq","ForusP","FoundSeoTool","fragFINN\\.de","free thumbnails","Freeuploader","FreshRSS","frontman","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","gdnplus\\.com","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Go http package","Go [\\d\\.]* package http","Go!Zilla","Go-Ahead-Got-It","Go-http-client","go-mtasts\\\/","gobuster","gobyus","Gofeed","gofetch","Goldfire Server","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Ads-Creatives-Assistant","Google-Ads-Overview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-InspectionTool","Google-Podcast","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-SMTP-STS","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Transparency-Report","google-xrawler","Google-Youtube-Links","GoogleDocs","GoogleHC\\\/","GoogleProber","GoogleProducer","GoogleSites","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hardenize","Hatena","Havij","HaxerMen","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http:\\\/\\\/www.neomo.de\\\/","HttpComponents","httphr","HTTPie","HTTPMon","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","http\\.rb\\\/","HTTP_Compression_Test","http_get","http_request2","http_requester","httrack","huaweisymantec","HubSpot ","HubSpot-Link-Resolver","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","Internet Ninja","InternetSeer","internetVista monitor","internetwache","internet_archive","intraVnews","IODC","IOI","Inboxb0t","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JungleKeyThumbnail","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kube-probe","kubectl","kulturarw3","KumKie","Larbin","Lavf\\\/","leakix\\.net","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lighthouse","Lightspeedsystems","Likse","limber\\.io","Link Valet","LinkAlarm\\\/","LinkAnalyser","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreview","LinkScan","LinksManager","LinkTiger","LinkWalker","link_thumbnailer","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","ltx71","lua-resty-http","Lucee \\(CFML Engine\\)","Lush Http Client","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","L\\.webis","mabontland","MacOutlook\\\/","Mag-Net","MagpieRSS","Mail::STS","MailChimp","Mail\\.Ru","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","mattermost","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Data Access","Microsoft Office","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft\\.Data\\.Mashup","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","mio_httpc","Miro-HttpClient","Mister PiX","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Mollie","monitis","Monitority\\\/","Monit\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mrcgiguy","Mr\\.4x3 Powered","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","myseosnapshot","nagios","Najdi\\.si","Name Intelligence","NameFo\\.com","Nameprotect","nationalarchives","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetSystemsResearch","nettle","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","nghttp2","Nibbler","NICErsPRO","NihilScio","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-fetch","node-superagent","node-urllib","Nodemeter","NodePing","node\\.io","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Observatory\\\/","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","Open Source RSS","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","Owler","Owlin","ownCloud News","ow\\.ly","OxfordCloudService","page scorer","Page Valet","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","PageThing","page_verifier","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","PDF24 URL To PDF","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","php-requests","phpservermon","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","ping\\.blo\\.gs","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Pleroma ","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","PodcastAddict\\\/","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","postfix-mta-sts-resolver","PostmanRuntime","postplanner\\.com","PostPost","postrank","PowerPoint\\\/","Prebid","Prerender","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project ?25499","Project-Resonance","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pump","Python-httplib2","python-httpx","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","quic-go-HTTP\\\/","QuiteRSS","Quora Link Preview","Qwantify","Radian6","RadioPublicImageResizer","Railgun\\\/","RankActive","RankFlex","RankSonicSiteAuditor","RapidLoad\\\/","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSMix\\\/","RSSOwl","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","SAP NetWeaver Application Server","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","ScanAlert","scan\\.lol","Scoop","scooter","ScopeContentAG-HTTP-Client","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","SearchWP","search\\.thunderstone","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","Sendsay\\.Ru","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","seocompany","SEOkicks","SEOlizer","Seomoz","SEOprofiler","seoscanners","SEOsearch","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","ShortLinkTranslate","shortURL lengthener","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site Sucker","Site-Shot\\\/","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","sli-systems\\.com","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","Spawning-AI","spaziodati","SPDYCheck","Specificfeeds","SpeedKit","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","Statically-","StatusCake","Steeler","Stratagems Kumo","Stripe\\\/","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","Synapse\\\/","Syndirella\\\/","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","tchelebi\\.io","techiaith\\.cymru","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Intraformant","The Knowledge AI","theinternetrules","TheNomad","Thinklab","Thumbor","Thumbshots","ThumbSniper","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","ultimate_sitemap_parser","unchaos","unirest-java","UniversalFeedParser","unshortenit","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","URL Verifier","Urlcheckr","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","URL\\\/Emacs","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","Visual Rights Group","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C-checklink","W3C-mobileOK","W3C_Unicorn","WAC-OFU","WakeletLinkExpander","WallpapersHD","Wallpapers\\\/[0-9]+","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","web-capture\\.net","Web-sniffer","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","webtech\\\/","WebThumbnail","Webthumb\\\/","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinHttp-Autoproxy-Service","WinHTTP\\\/","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","WordPress\\\/","WordupinfoSearch","Word\\\/","worldping-api","wotbox","WP Engine Install Performance API","WP Rocket","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","WWWOFFLE","www\\.monitor\\.us","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","XenForo\\\/","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-[A-Z][A-Z][A-Z]","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yomins\\.com","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend\\\\Http\\\\Client","Zend_Http_Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZoteroTranslationServer","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"] \ No newline at end of file diff --git a/www/extern/crawler-detect/raw/Crawlers.txt b/www/extern/crawler-detect/raw/Crawlers.txt new file mode 100644 index 0000000..a61baee --- /dev/null +++ b/www/extern/crawler-detect/raw/Crawlers.txt @@ -0,0 +1,1402 @@ + YLT +^Aether +^Amazon Simple Notification Service Agent$ +^Amazon-Route53-Health-Check-Service +^Amazon CloudFront +^b0t$ +^bluefish +^Calypso v\/ +^COMODO DCV +^Corax +^DangDang +^DavClnt +^DHSH +^docker\/[0-9] +^Expanse +^FDM +^git\/ +^Goose\/ +^Grabber +^Gradle\/ +^HTTPClient\/ +^HTTPing +^Java\/ +^Jeode\/ +^Jetty\/ +^Mail\/ +^Mget +^Microsoft URL Control +^Mikrotik\/ +^Netlab360 +^NG\/[0-9\.] +^NING\/ +^npm\/ +^Nuclei +^PHP-AYMAPI\/ +^PHP\/ +^pip\/ +^pnpm\/ +^RMA\/ +^Ruby|Ruby\/[0-9] +^Swurl +^TLS tester +^twine\/ +^ureq +^VSE\/[0-9] +^WordPress\.com +^XRL\/[0-9] +^ZmEu +008\/ +13TABS +192\.comAgent +2GDPR\/ +2ip\.ru +404enemy +7Siters +80legs +a3logics\.in +A6-Indexer +Abonti +Aboundex +aboutthedomain +Accoona-AI-Agent +acebookexternalhit\/ +acoon +acrylicapps\.com\/pulp +Acunetix +AdAuth\/ +adbeat +AddThis +ADmantX +AdminLabs +adressendeutschland +adreview\/ +adscanner +adstxt-worker +Adstxtaggregator +adstxt\.com +Adyen HttpClient +AffiliateLabz\/ +affilimate-puppeteer +agentslug +AHC +aihit +aiohttp\/ +Airmail +akka-http\/ +akula\/ +alertra +alexa site audit +Alibaba\.Security\.Heimdall +Alligator +allloadin +AllSubmitter +alyze\.info +amagit +Anarchie +AndroidDownloadManager +Anemone +AngleSharp +annotate_google +Anthill +Anturis Agent +Ant\.com +AnyEvent-HTTP\/ +Apache Ant\/ +Apache Droid +Apache OpenOffice +Apache-HttpAsyncClient +Apache-HttpClient +ApacheBench +Apexoo +apimon\.de +APIs-Google +AportWorm\/ +AppBeat\/ +AppEngine-Google +AppleSyndication +Aprc\/[0-9] +Arachmo +arachnode +Arachnophilia +aria2 +Arukereso +asafaweb +Asana\/ +Ask Jeeves +AskQuickly +ASPSeek +Asterias +Astute +asynchttp +Attach +attohttpc +autocite +AutomaticWPTester +Autonomy +awin\.com +AWS Security Scanner +axios\/ +a\.pr-cy\.ru +B-l-i-t-z-B-O-T +Backlink-Ceck +backlink-check +BacklinkHttpStatus +BackStreet +BackupLand +BackWeb +Bad-Neighborhood +Badass +baidu\.com +Bandit +basicstate +BatchFTP +Battleztar Bazinga +baypup\/ +BazQux +BBBike +BCKLINKS +BDFetch +BegunAdvertising +Bewica-security-scan +Bidtellect +BigBozz +Bigfoot +biglotron +BingLocalSearch +BingPreview +binlar +biNu image cacher +Bitacle +Bitrix link preview +biz_Directory +BKCTwitterUnshortener\/ +Black Hole +Blackboard Safeassign +BlackWidow +BlockNote\.Net +BlogBridge +Bloglines +Bloglovin +BlogPulseLive +BlogSearch +Blogtrottr +BlowFish +boitho\.com-dc +Boost\.Beast +BPImageWalker +Braintree-Webhooks +Branch Metrics API +Branch-Passthrough +Brandprotect +BrandVerity +Brandwatch +Brodie\/ +Browsershots +BUbiNG +Buck\/ +Buddy +BuiltWith +Bullseye +BunnySlippers +Burf Search +Butterfly\/ +BuzzSumo +CAAM\/[0-9] +CakePHP +Calculon +Canary%20Mail +CaretNail +catexplorador +CC Metadata Scaper +Cegbfeieh +censys +centuryb.o.t9[at]gmail.com +Cerberian Drtrs +CERT\.at-Statistics-Survey +cf-facebook +cg-eye +changedetection +ChangesMeter +Charlotte +chatterino-api-cache +CheckHost +checkprivacy +CherryPicker +ChinaClaw +Chirp\/ +chkme\.com +Chlooe +Chromaxa +CirrusExplorer +CISPA Vulnerability Notification +CISPA Web Analyser +Citoid +CJNetworkQuality +Clarsentia +clips\.ua\.ac\.be +Cloud mapping +CloudEndure +CloudFlare-AlwaysOnline +Cloudflare-Healthchecks +Cloudinary +cmcm\.com +coccoc +cognitiveseo +ColdFusion +colly - +CommaFeed +Commons-HttpClient +commonscan +contactbigdatafr +contentkingapp +Contextual Code Sites Explorer +convera +CookieReports +copyright sheriff +CopyRightCheck +Copyscape +cortex\/ +Cosmos4j\.feedback +Covario-IDS +Craw\/ +Crescent +Criteo +Crowsnest +CSHttp +CSSCheck +Cula\/ +curb +Curious George +curl +cuwhois\/ +cybo\.com +DAP\/NetHTTP +DareBoost +DatabaseDriverMysqli +DataCha0s +DatadogSynthetics +Datafeedwatch +Datanyze +DataparkSearch +dataprovider +DataXu +Daum(oa)?[ \/][0-9] +dBpoweramp +ddline +deeris +delve\.ai +Demon +DeuSu +developers\.google\.com\/\+\/web\/snippet\/ +Devil +Digg +Digincore +DigitalPebble +Dirbuster +Discourse Forum Onebox +Dispatch\/ +Disqus\/ +DittoSpyder +dlvr +DMBrowser +DNSPod-reporting +docoloc +Dolphin http client +DomainAppender +DomainLabz +Domains Project\/ +Donuts Content Explorer +dotMailer content retrieval +dotSemantic +downforeveryoneorjustme +Download Wonder +downnotifier +DowntimeDetector +Drip +drupact +Drupal \(\+http:\/\/drupal\.org\/\) +DTS Agent +dubaiindex +DuplexWeb-Google +DynatraceSynthetic +EARTHCOM +Easy-Thumb +EasyDL +Ebingbong +ec2linkfinder +eCairn-Grabber +eCatch +ECCP +eContext\/ +Ecxi +EirGrabber +ElectricMonk +elefent +EMail Exractor +EMail Wolf +EmailWolf +Embarcadero +Embed PHP Library +Embedly +endo\/ +europarchive\.org +evc-batch +EventMachine HttpClient +Everwall Link Expander +Evidon +Evrinid +ExactSearch +ExaleadCloudview +Excel\/ +exif +ExoRank +Exploratodo +Express WebPictures +Extreme Picture Finder +EyeNetIE +ezooms +facebookexternalhit +facebookexternalua +facebookplatform +fairshare +Faraday v +fasthttp +Faveeo +Favicon downloader +faviconarchive +faviconkit +FavOrg +Feed Wrangler +Feedable\/ +Feedbin +FeedBooster +FeedBucket +FeedBunch\/ +FeedBurner +feeder +Feedly +FeedshowOnline +Feedshow\/ +Feedspot +FeedViewer\/ +Feedwind\/ +FeedZcollector +feeltiptop +Fetch API +Fetch\/[0-9] +Fever\/[0-9] +FHscan +Fiery%20Feeds +Filestack +Fimap +findlink +findthatfile +FlashGet +FlipboardBrowserProxy +FlipboardProxy +FlipboardRSS +Flock\/ +Florienzh\/ +fluffy +Flunky +flynxapp +forensiq +ForusP +FoundSeoTool +fragFINN\.de +free thumbnails +Freeuploader +FreshRSS +frontman +Funnelback +Fuzz Faster U Fool +G-i-g-a-b-o-t +g00g1e\.net +ganarvisitas +gdnplus\.com +geek-tools +Genieo +GentleSource +GetCode +Getintent +GetLinkInfo +getprismatic +GetRight +getroot +GetURLInfo\/ +GetWeb +Geziyor +Ghost Inspector +GigablastOpenSource +GIS-LABS +github-camo +GitHub-Hookshot +github\.com +Go http package +Go [\d\.]* package http +Go!Zilla +Go-Ahead-Got-It +Go-http-client +go-mtasts\/ +gobuster +gobyus +Gofeed +gofetch +Goldfire Server +GomezAgent +gooblog +Goodzer\/ +Google AppsViewer +Google Desktop +Google favicon +Google Keyword Suggestion +Google Keyword Tool +Google Page Speed Insights +Google PP Default +Google Search Console +Google Web Preview +Google-Ads-Creatives-Assistant +Google-Ads-Overview +Google-Adwords +Google-Apps-Script +Google-Calendar-Importer +Google-HotelAdsVerifier +Google-HTTP-Java-Client +Google-InspectionTool +Google-Podcast +Google-Publisher-Plugin +Google-Read-Aloud +Google-SearchByImage +Google-Site-Verification +Google-SMTP-STS +Google-speakr +Google-Structured-Data-Testing-Tool +Google-Transparency-Report +google-xrawler +Google-Youtube-Links +GoogleDocs +GoogleHC\/ +GoogleProber +GoogleProducer +GoogleSites +Gookey +GoSpotCheck +gosquared-thumbnailer +Gotit +GoZilla +grabify +GrabNet +Grafula +Grammarly +GrapeFX +GreatNews +Gregarius +GRequests +grokkit +grouphigh +grub-client +gSOAP\/ +GT::WWW +GTmetrix +GuzzleHttp +gvfs\/ +HAA(A)?RTLAND http client +Haansoft +hackney\/ +Hadi Agent +HappyApps-WebCheck +Hardenize +Hatena +Havij +HaxerMen +HeadlessChrome +HEADMasterSEO +HeartRails_Capture +help@dataminr\.com +heritrix +Hexometer +historious +hkedcity +hledejLevne\.cz +Hloader +HMView +Holmes +HonesoSearchEngine +HootSuite Image proxy +Hootsuite-WebFeed +hosterstats +HostTracker +ht:\/\/check +htdig +HTMLparser +htmlyse +HTTP Banner Detection +http-get +HTTP-Header-Abfrage +http-kit +http-request\/ +HTTP-Tiny +HTTP::Lite +http:\/\/www.neomo.de\/ +HttpComponents +httphr +HTTPie +HTTPMon +httpRequest +httpscheck +httpssites_power +httpunit +HttpUrlConnection +http\.rb\/ +HTTP_Compression_Test +http_get +http_request2 +http_requester +httrack +huaweisymantec +HubSpot +HubSpot-Link-Resolver +Humanlinks +i2kconnect\/ +Iblog +ichiro +Id-search +IdeelaborPlagiaat +IDG Twitter Links Resolver +IDwhois\/ +Iframely +igdeSpyder +iGooglePortal +IlTrovatore +Image Fetch +Image Sucker +ImageEngine\/ +ImageVisu\/ +Imagga +imagineeasy +imgsizer +InAGist +inbound\.li parser +InDesign%20CC +Indy Library +InetURL +infegy +infohelfer +InfoTekies +InfoWizards Reciprocal Link +inpwrd\.com +instabid +Instapaper +Integrity +integromedb +Intelliseek +InterGET +Internet Ninja +InternetSeer +internetVista monitor +internetwache +internet_archive +intraVnews +IODC +IOI +Inboxb0t +iplabel +ips-agent +IPS\/[0-9] +IPWorks HTTP\/S Component +iqdb\/ +Iria +Irokez +isitup\.org +iskanie +isUp\.li +iThemes Sync\/ +IZaBEE +iZSearch +JAHHO +janforman +Jaunt\/ +Java.*outbrain +javelin\.io +Jbrofuzz +Jersey\/ +JetCar +Jigsaw +Jobboerse +JobFeed discovery +Jobg8 URL Monitor +jobo +Jobrapido +Jobsearch1\.5 +JoinVision Generic +JolokiaPwn +Joomla +Jorgee +JS-Kit +JungleKeyThumbnail +JustView +Kaspersky Lab CFR link resolver +Kelny\/ +Kerrigan\/ +KeyCDN +Keyword Density +Keywords Research +khttp\/ +KickFire +KimonoLabs\/ +Kml-Google +knows\.is +KOCMOHABT +kouio +kube-probe +kubectl +kulturarw3 +KumKie +Larbin +Lavf\/ +leakix\.net +LeechFTP +LeechGet +letsencrypt +Lftp +LibVLC +LibWeb +Libwhisker +libwww +Licorne +Liferea\/ +Lighthouse +Lightspeedsystems +Likse +limber\.io +Link Valet +LinkAlarm\/ +LinkAnalyser +linkCheck +linkdex +LinkExaminer +linkfluence +linkpeek +LinkPreview +LinkScan +LinksManager +LinkTiger +LinkWalker +link_thumbnailer +Lipperhey +Litemage_walker +livedoor ScreenShot +LoadImpactRload +localsearch-web +LongURL API +longurl-r-package +looid\.com +looksystems\.net +ltx71 +lua-resty-http +Lucee \(CFML Engine\) +Lush Http Client +lwp-request +lwp-trivial +LWP::Simple +lycos +LYT\.SR +L\.webis +mabontland +MacOutlook\/ +Mag-Net +MagpieRSS +Mail::STS +MailChimp +Mail\.Ru +Majestic12 +makecontact\/ +Mandrill +MapperCmd +marketinggrader +MarkMonitor +MarkWatch +Mass Downloader +masscan\/ +Mata Hari +mattermost +Mediametric +Mediapartners-Google +mediawords +MegaIndex\.ru +MeltwaterNews +Melvil Rawi +MemGator +Metaspinner +MetaURI +MFC_Tear_Sample +Microsearch +Microsoft Data Access +Microsoft Office +Microsoft Outlook +Microsoft Windows Network Diagnostics +Microsoft-WebDAV-MiniRedir +Microsoft\.Data\.Mashup +MIDown tool +MIIxpc +Mindjet +Miniature\.io +Miniflux +mio_httpc +Miro-HttpClient +Mister PiX +mixdata dot com +mixed-content-scan +mixnode +Mnogosearch +mogimogi +Mojeek +Mojolicious \(Perl\) +Mollie +monitis +Monitority\/ +Monit\/ +montastic +MonTools +Moreover +Morfeus Fucking Scanner +Morning Paper +MovableType +mowser +Mrcgiguy +Mr\.4x3 Powered +MS Web Services Client Protocol +MSFrontPage +mShots +MuckRack\/ +muhstik-scan +MVAClient +MxToolbox\/ +myseosnapshot +nagios +Najdi\.si +Name Intelligence +NameFo\.com +Nameprotect +nationalarchives +Navroad +NearSite +Needle +Nessus +Net Vampire +NetAnts +NETCRAFT +NetLyzer +NetMechanic +NetNewsWire +Netpursual +netresearch +NetShelter ContentScan +Netsparker +NetSystemsResearch +nettle +NetTrack +Netvibes +NetZIP +Neustar WPM +NeutrinoAPI +NewRelicPinger +NewsBlur .*Finder +NewsGator +newsme +newspaper\/ +Nexgate Ruby Client +NG-Search +nghttp2 +Nibbler +NICErsPRO +NihilScio +Nikto +nineconnections +NLNZ_IAHarvester +Nmap Scripting Engine +node-fetch +node-superagent +node-urllib +Nodemeter +NodePing +node\.io +nominet\.org\.uk +nominet\.uk +Norton-Safeweb +Notifixious +notifyninja +NotionEmbedder +nuhk +nutch +Nuzzel +nWormFeedFinder +nyawc\/ +Nymesis +NYU +Observatory\/ +Ocelli\/ +Octopus +oegp +Offline Explorer +Offline Navigator +OgScrper +okhttp +omgili +OMSC +Online Domain Tools +Open Source RSS +OpenCalaisSemanticProxy +Openfind +OpenLinkProfiler +Openstat\/ +OpenVAS +OPPO A33 +Optimizer +Orbiter +OrgProbe\/ +orion-semantics +Outlook-Express +Outlook-iOS +Owler +Owlin +ownCloud News +ow\.ly +OxfordCloudService +page scorer +Page Valet +page2rss +PageFreezer +PageGrabber +PagePeeker +PageScorer +Pagespeed\/ +PageThing +page_verifier +Panopta +panscient +Papa Foto +parsijoo +Pavuk +PayPal IPN +pcBrowser +Pcore-HTTP +PDF24 URL To PDF +Pearltrees +PECL::HTTP +peerindex +Peew +PeoplePal +Perlu - +PhantomJS Screenshoter +PhantomJS\/ +Photon\/ +php-requests +phpservermon +Pi-Monster +Picscout +Picsearch +PictureFinder +Pimonster +Pingability +PingAdmin\.Ru +Pingdom +Pingoscope +PingSpot +ping\.blo\.gs +pinterest\.com +Pixray +Pizilla +Plagger\/ +Pleroma +Ploetz \+ Zeller +Plukkie +plumanalytics +PocketImageCache +PocketParser +Pockey +PodcastAddict\/ +POE-Component-Client-HTTP +Polymail\/ +Pompos +Porkbun +Port Monitor +postano +postfix-mta-sts-resolver +PostmanRuntime +postplanner\.com +PostPost +postrank +PowerPoint\/ +Prebid +Prerender +Priceonomics Analysis Engine +PrintFriendly +PritTorrent +Prlog +probethenet +Project ?25499 +Project-Resonance +prospectb2b +Protopage +ProWebWalker +proximic +PRTG Network Monitor +pshtt, https scanning +PTST +PTST\/[0-9]+ +Pump +Python-httplib2 +python-httpx +python-requests +Python-urllib +Qirina Hurdler +QQDownload +QrafterPro +Qseero +Qualidator +QueryN Metasearch +queuedriver +quic-go-HTTP\/ +QuiteRSS +Quora Link Preview +Qwantify +Radian6 +RadioPublicImageResizer +Railgun\/ +RankActive +RankFlex +RankSonicSiteAuditor +RapidLoad\/ +Re-re Studio +ReactorNetty +Readability +RealDownload +RealPlayer%20Downloader +RebelMouse +Recorder +RecurPost\/ +redback\/ +ReederForMac +Reeder\/ +ReGet +RepoMonkey +request\.js +reqwest\/ +ResponseCodeTest +RestSharp +Riddler +Rival IQ +Robosourcer +Robozilla +ROI Hunter +RPT-HTTPClient +RSSMix\/ +RSSOwl +RyowlEngine +safe-agent-scanner +SalesIntelligent +Saleslift +SAP NetWeaver Application Server +SauceNAO +SBIder +sc-downloader +scalaj-http +Scamadviser-Frontend +ScanAlert +scan\.lol +Scoop +scooter +ScopeContentAG-HTTP-Client +ScoutJet +ScoutURLMonitor +ScrapeBox Page Scanner +Scrapy +Screaming +ScreenShotService +Scrubby +Scrutiny\/ +Search37 +searchenginepromotionhelp +Searchestate +SearchExpress +SearchSight +SearchWP +search\.thunderstone +Seeker +semanticdiscovery +semanticjuice +Semiocast HTTP client +Semrush +Sendsay\.Ru +sentry\/ +SEO Browser +Seo Servis +seo-nastroj\.cz +seo4ajax +Seobility +SEOCentro +SeoCheck +seocompany +SEOkicks +SEOlizer +Seomoz +SEOprofiler +seoscanners +SEOsearch +seositecheckup +SEOstats +servernfo +sexsearcher +Seznam +Shelob +Shodan +Shoppimon +ShopWiki +ShortLinkTranslate +shortURL lengthener +shrinktheweb +Sideqik +Siege +SimplePie +SimplyFast +Siphon +SISTRIX +Site Sucker +Site-Shot\/ +Site24x7 +SiteBar +Sitebeam +Sitebulb\/ +SiteCondor +SiteExplorer +SiteGuardian +Siteimprove +SiteIndexed +Sitemap(s)? Generator +SitemapGenerator +SiteMonitor +Siteshooter B0t +SiteSnagger +SiteSucker +SiteTruth +Sitevigil +sitexy\.com +SkypeUriPreview +Slack\/ +sli-systems\.com +slider\.com +slurp +SlySearch +SmartDownload +SMRF URL Expander +SMUrlExpander +Snake +Snappy +SnapSearch +Snarfer\/ +SniffRSS +sniptracker +Snoopy +SnowHaze Search +sogou web +SortSite +Sottopop +sovereign\.ai +SpaceBison +SpamExperts +Spammen +Spanner +Spawning-AI +spaziodati +SPDYCheck +Specificfeeds +SpeedKit +speedy +SPEng +Spinn3r +spray-can +Sprinklr +spyonweb +sqlmap +Sqlworm +Sqworm +SSL Labs +ssl-tools +StackRambler +Statastico\/ +Statically- +StatusCake +Steeler +Stratagems Kumo +Stripe\/ +Stroke\.cz +StudioFACA +StumbleUpon +suchen +Sucuri +summify +SuperHTTP +Surphace Scout +Suzuran +swcd +Symfony BrowserKit +Symfony2 BrowserKit +Synapse\/ +Syndirella\/ +SynHttpClient-Built +Sysomos +sysscan +Szukacz +T0PHackTeam +tAkeOut +Tarantula\/ +Taringa UGC +TarmotGezgin +tchelebi\.io +techiaith\.cymru +Teleport +Telesoft +Telesphoreo +Telesphorep +Tenon\.io +teoma +terrainformatica +Test Certificate Info +testuri +Tetrahedron +TextRazor Downloader +The Drop Reaper +The Expert HTML Source Viewer +The Intraformant +The Knowledge AI +theinternetrules +TheNomad +Thinklab +Thumbor +Thumbshots +ThumbSniper +timewe\.net +TinEye +Tiny Tiny RSS +TLSProbe\/ +Toata +topster +touche\.com +Traackr\.com +tracemyfile +Trackuity +TrapitAgent +Trendiction +Trendsmap +trendspottr +truwoGPS +TryJsoup +TulipChain +Turingos +Turnitin +tweetedtimes +Tweetminster +Tweezler\/ +twibble +Twice +Twikle +Twingly +Twisted PageGetter +Typhoeus +ubermetrics-technologies +uclassify +UdmSearch +ultimate_sitemap_parser +unchaos +unirest-java +UniversalFeedParser +unshortenit +Unshorten\.It +Untiny +UnwindFetchor +updated +updown\.io daemon +Upflow +Uptimia +URL Verifier +Urlcheckr +URLitor +urlresolver +Urlstat +URLTester +UrlTrends Ranking Updater +URLy Warning +URLy\.Warning +URL\/Emacs +Vacuum +Vagabondo +VB Project +vBSEO +VCI +via ggpht\.com GoogleImageProxy +Virusdie +visionutils +Visual Rights Group +vkShare +VoidEYE +Voil +voltron +voyager\/ +VSAgent\/ +VSB-TUO\/ +Vulnbusters Meter +VYU2 +w3af\.org +W3C-checklink +W3C-mobileOK +W3C_Unicorn +WAC-OFU +WakeletLinkExpander +WallpapersHD +Wallpapers\/[0-9]+ +wangling +Wappalyzer +WatchMouse +WbSrch\/ +WDT\.io +Web Auto +Web Collage +Web Enhancer +Web Fetch +Web Fuck +Web Pix +Web Sauger +Web spyder +Web Sucker +web-capture\.net +Web-sniffer +Webalta +Webauskunft +WebAuto +WebCapture +WebClient\/ +webcollage +WebCookies +WebCopier +WebCorp +WebDataStats +WebDoc +WebEnhancer +WebFetch +WebFuck +WebGazer +WebGo IS +WebImageCollector +WebImages +WebIndex +webkit2png +WebLeacher +webmastercoffee +webmon +WebPix +WebReaper +WebSauger +webscreenie +Webshag +Webshot +Website Quester +websitepulse agent +WebsiteQuester +Websnapr +WebSniffer +Webster +WebStripper +WebSucker +webtech\/ +WebThumbnail +Webthumb\/ +WebWhacker +WebZIP +WeLikeLinks +WEPA +WeSEE +wf84 +Wfuzz\/ +wget +WhatCMS +WhatsApp +WhatsMyIP +WhatWeb +WhereGoes\? +Whibse +WhoAPI\/ +WhoRunsCoinHive +Whynder Magnet +Windows-RSS-Platform +WinHttp-Autoproxy-Service +WinHTTP\/ +WinPodder +wkhtmlto +wmtips +Woko +Wolfram HTTPClient +woorankreview +WordPress\/ +WordupinfoSearch +Word\/ +worldping-api +wotbox +WP Engine Install Performance API +WP Rocket +wpif +wprecon\.com survey +WPScan +wscheck +Wtrace +WWW-Collector-E +WWW-Mechanize +WWW::Document +WWW::Mechanize +WWWOFFLE +www\.monitor\.us +x09Mozilla +x22Mozilla +XaxisSemanticsClassifier +XenForo\/ +Xenu Link Sleuth +XING-contenttabreceiver +xpymep([0-9]?)\.exe +Y!J-[A-Z][A-Z][A-Z] +Yaanb +yacy +Yahoo Link Preview +YahooCacheSystem +YahooMailProxy +YahooYSMcm +YandeG +Yandex(?!Search) +yanga +yeti +Yo-yo +Yoleo Consumer +yomins\.com +yoogliFetchAgent +YottaaMonitor +Your-Website-Sucks +yourls\.org +YoYs\.net +YP\.PL +Zabbix +Zade +Zao +Zauba +Zemanta Aggregator +Zend\\Http\\Client +Zend_Http_Client +Zermelo +Zeus +zgrab +ZnajdzFoto +ZnHTTP +Zombie\.js +Zoom\.Mac +ZoteroTranslationServer +ZyBorg +[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper) \ No newline at end of file diff --git a/www/extern/crawler-detect/raw/Exclusions.json b/www/extern/crawler-detect/raw/Exclusions.json new file mode 100644 index 0000000..e7e0141 --- /dev/null +++ b/www/extern/crawler-detect/raw/Exclusions.json @@ -0,0 +1 @@ +["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT","OCTOPUS-CORE","htc_botdugls","super\\\/\\d+\\\/Android\\\/\\d+"] \ No newline at end of file diff --git a/www/extern/crawler-detect/raw/Exclusions.txt b/www/extern/crawler-detect/raw/Exclusions.txt new file mode 100644 index 0000000..a44a99c --- /dev/null +++ b/www/extern/crawler-detect/raw/Exclusions.txt @@ -0,0 +1,50 @@ +Safari.[\d\.]* +Firefox.[\d\.]* + Chrome.[\d\.]* +Chromium.[\d\.]* +MSIE.[\d\.] +Opera\/[\d\.]* +Mozilla.[\d\.]* +AppleWebKit.[\d\.]* +Trident.[\d\.]* +Windows NT.[\d\.]* +Android [\d\.]* +Macintosh. +Ubuntu +Linux +[ ]Intel +Mac OS X [\d_]* +(like )?Gecko(.[\d\.]*)? +KHTML, +CriOS.[\d\.]* +CPU iPhone OS ([0-9_])* like Mac OS X +CPU OS ([0-9_])* like Mac OS X +iPod +compatible +x86_.. +i686 +x64 +X11 +rv:[\d\.]* +Version.[\d\.]* +WOW64 +Win64 +Dalvik.[\d\.]* + \.NET CLR [\d\.]* +Presto.[\d\.]* +Media Center PC +BlackBerry +Build +Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\. +Opera + \.NET[\d\.]* +cubot +; M bot +; CRONO +; B bot +; IDbot +; ID bot +; POWER BOT +OCTOPUS-CORE +htc_botdugls +super\/\d+\/Android\/\d+ \ No newline at end of file diff --git a/www/extern/crawler-detect/raw/Headers.json b/www/extern/crawler-detect/raw/Headers.json new file mode 100644 index 0000000..718f7f6 --- /dev/null +++ b/www/extern/crawler-detect/raw/Headers.json @@ -0,0 +1 @@ +["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER"] \ No newline at end of file diff --git a/www/extern/crawler-detect/raw/Headers.txt b/www/extern/crawler-detect/raw/Headers.txt new file mode 100644 index 0000000..5e1ae32 --- /dev/null +++ b/www/extern/crawler-detect/raw/Headers.txt @@ -0,0 +1,10 @@ +HTTP_USER_AGENT +HTTP_X_OPERAMINI_PHONE_UA +HTTP_X_DEVICE_USER_AGENT +HTTP_X_ORIGINAL_USER_AGENT +HTTP_X_SKYFIRE_PHONE +HTTP_X_BOLT_PHONE_UA +HTTP_DEVICE_STOCK_UA +HTTP_X_UCBROWSER_DEVICE_UA +HTTP_FROM +HTTP_X_SCANNER \ No newline at end of file diff --git a/www/extern/crawler-detect/src/CrawlerDetect.php b/www/extern/crawler-detect/src/CrawlerDetect.php new file mode 100644 index 0000000..3ea284a --- /dev/null +++ b/www/extern/crawler-detect/src/CrawlerDetect.php @@ -0,0 +1,196 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect; + +use Jaybizzle\CrawlerDetect\Fixtures\Crawlers; +use Jaybizzle\CrawlerDetect\Fixtures\Exclusions; +use Jaybizzle\CrawlerDetect\Fixtures\Headers; + +class CrawlerDetect +{ + /** + * The user agent. + * + * @var string|null + */ + protected $userAgent; + + /** + * Headers that contain a user agent. + * + * @var array + */ + protected $httpHeaders = array(); + + /** + * Store regex matches. + * + * @var array + */ + protected $matches = array(); + + /** + * Crawlers object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers + */ + protected $crawlers; + + /** + * Exclusions object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions + */ + protected $exclusions; + + /** + * Headers object. + * + * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers + */ + protected $uaHttpHeaders; + + /** + * The compiled regex string. + * + * @var string + */ + protected $compiledRegex; + + /** + * The compiled exclusions regex string. + * + * @var string + */ + protected $compiledExclusions; + + /** + * Class constructor. + */ + public function __construct(array $headers = null, $userAgent = null) + { + $this->crawlers = new Crawlers(); + $this->exclusions = new Exclusions(); + $this->uaHttpHeaders = new Headers(); + + $this->compiledRegex = $this->compileRegex($this->crawlers->getAll()); + $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll()); + + $this->setHttpHeaders($headers); + $this->setUserAgent($userAgent); + } + + /** + * Compile the regex patterns into one regex string. + * + * @param array + * + * @return string + */ + public function compileRegex($patterns) + { + return '('.implode('|', $patterns).')'; + } + + /** + * Set HTTP headers. + * + * @param array|null $httpHeaders + */ + public function setHttpHeaders($httpHeaders) + { + // Use global _SERVER if $httpHeaders aren't defined. + if (! is_array($httpHeaders) || ! count($httpHeaders)) { + $httpHeaders = $_SERVER; + } + + // Clear existing headers. + $this->httpHeaders = array(); + + // Only save HTTP headers. In PHP land, that means + // only _SERVER vars that start with HTTP_. + foreach ($httpHeaders as $key => $value) { + if (strpos($key, 'HTTP_') === 0) { + $this->httpHeaders[$key] = $value; + } + } + } + + /** + * Return user agent headers. + * + * @return array + */ + public function getUaHttpHeaders() + { + return $this->uaHttpHeaders->getAll(); + } + + /** + * Set the user agent. + * + * @param string|null $userAgent + */ + public function setUserAgent($userAgent) + { + if (is_null($userAgent)) { + foreach ($this->getUaHttpHeaders() as $altHeader) { + if (isset($this->httpHeaders[$altHeader])) { + $userAgent .= $this->httpHeaders[$altHeader].' '; + } + } + } + + return $this->userAgent = $userAgent; + } + + /** + * Check user agent string against the regex. + * + * @param string|null $userAgent + * + * @return bool + */ + public function isCrawler($userAgent = null) + { + $agent = trim(preg_replace( + "/{$this->compiledExclusions}/i", + '', + $userAgent ?: $this->userAgent ?: '' + )); + + if ($agent === '') { + return false; + } + + return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches); + } + + /** + * Return the matches. + * + * @return string|null + */ + public function getMatches() + { + return isset($this->matches[0]) ? $this->matches[0] : null; + } + + + /** + * @return string|null + */ + public function getUserAgent() + { + return $this->userAgent; + } +} diff --git a/www/extern/crawler-detect/src/Fixtures/AbstractProvider.php b/www/extern/crawler-detect/src/Fixtures/AbstractProvider.php new file mode 100644 index 0000000..ffe10f5 --- /dev/null +++ b/www/extern/crawler-detect/src/Fixtures/AbstractProvider.php @@ -0,0 +1,32 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +abstract class AbstractProvider +{ + /** + * The data set. + * + * @var array + */ + protected $data; + + /** + * Return the data set. + * + * @return array + */ + public function getAll() + { + return $this->data; + } +} diff --git a/www/extern/crawler-detect/src/Fixtures/Crawlers.php b/www/extern/crawler-detect/src/Fixtures/Crawlers.php new file mode 100644 index 0000000..e3153ad --- /dev/null +++ b/www/extern/crawler-detect/src/Fixtures/Crawlers.php @@ -0,0 +1,1424 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Crawlers extends AbstractProvider +{ + /** + * Array of regular expressions to match against the user agent. + * + * @var array + */ + protected $data = array( + ' YLT', + '^Aether', + '^Amazon Simple Notification Service Agent$', + '^Amazon-Route53-Health-Check-Service', + '^Amazon CloudFront', + '^b0t$', + '^bluefish ', + '^Calypso v\/', + '^COMODO DCV', + '^Corax', + '^DangDang', + '^DavClnt', + '^DHSH', + '^docker\/[0-9]', + '^Expanse', + '^FDM ', + '^git\/', + '^Goose\/', + '^Grabber', + '^Gradle\/', + '^HTTPClient\/', + '^HTTPing', + '^Java\/', + '^Jeode\/', + '^Jetty\/', + '^Mail\/', + '^Mget', + '^Microsoft URL Control', + '^Mikrotik\/', + '^Netlab360', + '^NG\/[0-9\.]', + '^NING\/', + '^npm\/', + '^Nuclei', + '^PHP-AYMAPI\/', + '^PHP\/', + '^pip\/', + '^pnpm\/', + '^RMA\/', + '^Ruby|Ruby\/[0-9]', + '^Swurl ', + '^TLS tester ', + '^twine\/', + '^ureq', + '^VSE\/[0-9]', + '^WordPress\.com', + '^XRL\/[0-9]', + '^ZmEu', + '008\/', + '13TABS', + '192\.comAgent', + '2GDPR\/', + '2ip\.ru', + '404enemy', + '7Siters', + '80legs', + 'a3logics\.in', + 'A6-Indexer', + 'Abonti', + 'Aboundex', + 'aboutthedomain', + 'Accoona-AI-Agent', + 'acebookexternalhit\/', + 'acoon', + 'acrylicapps\.com\/pulp', + 'Acunetix', + 'AdAuth\/', + 'adbeat', + 'AddThis', + 'ADmantX', + 'AdminLabs', + 'adressendeutschland', + 'adreview\/', + 'adscanner', + 'adstxt-worker', + 'Adstxtaggregator', + 'adstxt\.com', + 'Adyen HttpClient', + 'AffiliateLabz\/', + 'affilimate-puppeteer', + 'agentslug', + 'AHC', + 'aihit', + 'aiohttp\/', + 'Airmail', + 'akka-http\/', + 'akula\/', + 'alertra', + 'alexa site audit', + 'Alibaba\.Security\.Heimdall', + 'Alligator', + 'allloadin', + 'AllSubmitter', + 'alyze\.info', + 'amagit', + 'Anarchie', + 'AndroidDownloadManager', + 'Anemone', + 'AngleSharp', + 'annotate_google', + 'Anthill', + 'Anturis Agent', + 'Ant\.com', + 'AnyEvent-HTTP\/', + 'Apache Ant\/', + 'Apache Droid', + 'Apache OpenOffice', + 'Apache-HttpAsyncClient', + 'Apache-HttpClient', + 'ApacheBench', + 'Apexoo', + 'apimon\.de', + 'APIs-Google', + 'AportWorm\/', + 'AppBeat\/', + 'AppEngine-Google', + 'AppleSyndication', + 'Aprc\/[0-9]', + 'Arachmo', + 'arachnode', + 'Arachnophilia', + 'aria2', + 'Arukereso', + 'asafaweb', + 'Asana\/', + 'Ask Jeeves', + 'AskQuickly', + 'ASPSeek', + 'Asterias', + 'Astute', + 'asynchttp', + 'Attach', + 'attohttpc', + 'autocite', + 'AutomaticWPTester', + 'Autonomy', + 'awin\.com', + 'AWS Security Scanner', + 'axios\/', + 'a\.pr-cy\.ru', + 'B-l-i-t-z-B-O-T', + 'Backlink-Ceck', + 'backlink-check', + 'BacklinkHttpStatus', + 'BackStreet', + 'BackupLand', + 'BackWeb', + 'Bad-Neighborhood', + 'Badass', + 'baidu\.com', + 'Bandit', + 'basicstate', + 'BatchFTP', + 'Battleztar Bazinga', + 'baypup\/', + 'BazQux', + 'BBBike', + 'BCKLINKS', + 'BDFetch', + 'BegunAdvertising', + 'Bewica-security-scan', + 'Bidtellect', + 'BigBozz', + 'Bigfoot', + 'biglotron', + 'BingLocalSearch', + 'BingPreview', + 'binlar', + 'biNu image cacher', + 'Bitacle', + 'Bitrix link preview', + 'biz_Directory', + 'BKCTwitterUnshortener\/', + 'Black Hole', + 'Blackboard Safeassign', + 'BlackWidow', + 'BlockNote\.Net', + 'BlogBridge', + 'Bloglines', + 'Bloglovin', + 'BlogPulseLive', + 'BlogSearch', + 'Blogtrottr', + 'BlowFish', + 'boitho\.com-dc', + 'Boost\.Beast', + 'BPImageWalker', + 'Braintree-Webhooks', + 'Branch Metrics API', + 'Branch-Passthrough', + 'Brandprotect', + 'BrandVerity', + 'Brandwatch', + 'Brodie\/', + 'Browsershots', + 'BUbiNG', + 'Buck\/', + 'Buddy', + 'BuiltWith', + 'Bullseye', + 'BunnySlippers', + 'Burf Search', + 'Butterfly\/', + 'BuzzSumo', + 'CAAM\/[0-9]', + 'CakePHP', + 'Calculon', + 'Canary%20Mail', + 'CaretNail', + 'catexplorador', + 'CC Metadata Scaper', + 'Cegbfeieh', + 'censys', + 'centuryb.o.t9[at]gmail.com', + 'Cerberian Drtrs', + 'CERT\.at-Statistics-Survey', + 'cf-facebook', + 'cg-eye', + 'changedetection', + 'ChangesMeter', + 'Charlotte', + 'chatterino-api-cache', + 'CheckHost', + 'checkprivacy', + 'CherryPicker', + 'ChinaClaw', + 'Chirp\/', + 'chkme\.com', + 'Chlooe', + 'Chromaxa', + 'CirrusExplorer', + 'CISPA Vulnerability Notification', + 'CISPA Web Analyser', + 'Citoid', + 'CJNetworkQuality', + 'Clarsentia', + 'clips\.ua\.ac\.be', + 'Cloud mapping', + 'CloudEndure', + 'CloudFlare-AlwaysOnline', + 'Cloudflare-Healthchecks', + 'Cloudinary', + 'cmcm\.com', + 'coccoc', + 'cognitiveseo', + 'ColdFusion', + 'colly -', + 'CommaFeed', + 'Commons-HttpClient', + 'commonscan', + 'contactbigdatafr', + 'contentkingapp', + 'Contextual Code Sites Explorer', + 'convera', + 'CookieReports', + 'copyright sheriff', + 'CopyRightCheck', + 'Copyscape', + 'cortex\/', + 'Cosmos4j\.feedback', + 'Covario-IDS', + 'Craw\/', + 'Crescent', + 'Criteo', + 'Crowsnest', + 'CSHttp', + 'CSSCheck', + 'Cula\/', + 'curb', + 'Curious George', + 'curl', + 'cuwhois\/', + 'cybo\.com', + 'DAP\/NetHTTP', + 'DareBoost', + 'DatabaseDriverMysqli', + 'DataCha0s', + 'DatadogSynthetics', + 'Datafeedwatch', + 'Datanyze', + 'DataparkSearch', + 'dataprovider', + 'DataXu', + 'Daum(oa)?[ \/][0-9]', + 'dBpoweramp', + 'ddline', + 'deeris', + 'delve\.ai', + 'Demon', + 'DeuSu', + 'developers\.google\.com\/\+\/web\/snippet\/', + 'Devil', + 'Digg', + 'Digincore', + 'DigitalPebble', + 'Dirbuster', + 'Discourse Forum Onebox', + 'Dispatch\/', + 'Disqus\/', + 'DittoSpyder', + 'dlvr', + 'DMBrowser', + 'DNSPod-reporting', + 'docoloc', + 'Dolphin http client', + 'DomainAppender', + 'DomainLabz', + 'Domains Project\/', + 'Donuts Content Explorer', + 'dotMailer content retrieval', + 'dotSemantic', + 'downforeveryoneorjustme', + 'Download Wonder', + 'downnotifier', + 'DowntimeDetector', + 'Drip', + 'drupact', + 'Drupal \(\+http:\/\/drupal\.org\/\)', + 'DTS Agent', + 'dubaiindex', + 'DuplexWeb-Google', + 'DynatraceSynthetic', + 'EARTHCOM', + 'Easy-Thumb', + 'EasyDL', + 'Ebingbong', + 'ec2linkfinder', + 'eCairn-Grabber', + 'eCatch', + 'ECCP', + 'eContext\/', + 'Ecxi', + 'EirGrabber', + 'ElectricMonk', + 'elefent', + 'EMail Exractor', + 'EMail Wolf', + 'EmailWolf', + 'Embarcadero', + 'Embed PHP Library', + 'Embedly', + 'endo\/', + 'europarchive\.org', + 'evc-batch', + 'EventMachine HttpClient', + 'Everwall Link Expander', + 'Evidon', + 'Evrinid', + 'ExactSearch', + 'ExaleadCloudview', + 'Excel\/', + 'exif', + 'ExoRank', + 'Exploratodo', + 'Express WebPictures', + 'Extreme Picture Finder', + 'EyeNetIE', + 'ezooms', + 'facebookexternalhit', + 'facebookexternalua', + 'facebookplatform', + 'fairshare', + 'Faraday v', + 'fasthttp', + 'Faveeo', + 'Favicon downloader', + 'faviconarchive', + 'faviconkit', + 'FavOrg', + 'Feed Wrangler', + 'Feedable\/', + 'Feedbin', + 'FeedBooster', + 'FeedBucket', + 'FeedBunch\/', + 'FeedBurner', + 'feeder', + 'Feedly', + 'FeedshowOnline', + 'Feedshow\/', + 'Feedspot', + 'FeedViewer\/', + 'Feedwind\/', + 'FeedZcollector', + 'feeltiptop', + 'Fetch API', + 'Fetch\/[0-9]', + 'Fever\/[0-9]', + 'FHscan', + 'Fiery%20Feeds', + 'Filestack', + 'Fimap', + 'findlink', + 'findthatfile', + 'FlashGet', + 'FlipboardBrowserProxy', + 'FlipboardProxy', + 'FlipboardRSS', + 'Flock\/', + 'Florienzh\/', + 'fluffy', + 'Flunky', + 'flynxapp', + 'forensiq', + 'ForusP', + 'FoundSeoTool', + 'fragFINN\.de', + 'free thumbnails', + 'Freeuploader', + 'FreshRSS', + 'frontman', + 'Funnelback', + 'Fuzz Faster U Fool', + 'G-i-g-a-b-o-t', + 'g00g1e\.net', + 'ganarvisitas', + 'gdnplus\.com', + 'geek-tools', + 'Genieo', + 'GentleSource', + 'GetCode', + 'Getintent', + 'GetLinkInfo', + 'getprismatic', + 'GetRight', + 'getroot', + 'GetURLInfo\/', + 'GetWeb', + 'Geziyor', + 'Ghost Inspector', + 'GigablastOpenSource', + 'GIS-LABS', + 'github-camo', + 'GitHub-Hookshot', + 'github\.com', + 'Go http package', + 'Go [\d\.]* package http', + 'Go!Zilla', + 'Go-Ahead-Got-It', + 'Go-http-client', + 'go-mtasts\/', + 'gobuster', + 'gobyus', + 'Gofeed', + 'gofetch', + 'Goldfire Server', + 'GomezAgent', + 'gooblog', + 'Goodzer\/', + 'Google AppsViewer', + 'Google Desktop', + 'Google favicon', + 'Google Keyword Suggestion', + 'Google Keyword Tool', + 'Google Page Speed Insights', + 'Google PP Default', + 'Google Search Console', + 'Google Web Preview', + 'Google-Ads', + 'Google-Adwords', + 'Google-Apps-Script', + 'Google-Calendar-Importer', + 'Google-HotelAdsVerifier', + 'Google-HTTP-Java-Client', + 'Google-InspectionTool', + 'Google-Podcast', + 'Google-Publisher-Plugin', + 'Google-Read-Aloud', + 'Google-SearchByImage', + 'Google-Site-Verification', + 'Google-SMTP-STS', + 'Google-speakr', + 'Google-Structured-Data-Testing-Tool', + 'Google-Transparency-Report', + 'google-xrawler', + 'Google-Youtube-Links', + 'GoogleDocs', + 'GoogleHC\/', + 'GoogleProber', + 'GoogleProducer', + 'GoogleSites', + 'Gookey', + 'GoSpotCheck', + 'gosquared-thumbnailer', + 'Gotit', + 'GoZilla', + 'grabify', + 'GrabNet', + 'Grafula', + 'Grammarly', + 'GrapeFX', + 'GreatNews', + 'Gregarius', + 'GRequests', + 'grokkit', + 'grouphigh', + 'grub-client', + 'gSOAP\/', + 'GT::WWW', + 'GTmetrix', + 'GuzzleHttp', + 'gvfs\/', + 'HAA(A)?RTLAND http client', + 'Haansoft', + 'hackney\/', + 'Hadi Agent', + 'HappyApps-WebCheck', + 'Hardenize', + 'Hatena', + 'Havij', + 'HaxerMen', + 'HeadlessChrome', + 'HEADMasterSEO', + 'HeartRails_Capture', + 'help@dataminr\.com', + 'heritrix', + 'Hexometer', + 'historious', + 'hkedcity', + 'hledejLevne\.cz', + 'Hloader', + 'HMView', + 'Holmes', + 'HonesoSearchEngine', + 'HootSuite Image proxy', + 'Hootsuite-WebFeed', + 'hosterstats', + 'HostTracker', + 'ht:\/\/check', + 'htdig', + 'HTMLparser', + 'htmlyse', + 'HTTP Banner Detection', + 'http-get', + 'HTTP-Header-Abfrage', + 'http-kit', + 'http-request\/', + 'HTTP-Tiny', + 'HTTP::Lite', + 'http:\/\/www.neomo.de\/', //'Francis [Bot]' + 'HttpComponents', + 'httphr', + 'HTTPie', + 'HTTPMon', + 'httpRequest', + 'httpscheck', + 'httpssites_power', + 'httpunit', + 'HttpUrlConnection', + 'http\.rb\/', + 'HTTP_Compression_Test', + 'http_get', + 'http_request2', + 'http_requester', + 'httrack', + 'huaweisymantec', + 'HubSpot ', + 'HubSpot-Link-Resolver', + 'Humanlinks', + 'i2kconnect\/', + 'Iblog', + 'ichiro', + 'Id-search', + 'IdeelaborPlagiaat', + 'IDG Twitter Links Resolver', + 'IDwhois\/', + 'Iframely', + 'igdeSpyder', + 'iGooglePortal', + 'IlTrovatore', + 'Image Fetch', + 'Image Sucker', + 'ImageEngine\/', + 'ImageVisu\/', + 'Imagga', + 'imagineeasy', + 'imgsizer', + 'InAGist', + 'inbound\.li parser', + 'InDesign%20CC', + 'Indy Library', + 'InetURL', + 'infegy', + 'infohelfer', + 'InfoTekies', + 'InfoWizards Reciprocal Link', + 'inpwrd\.com', + 'instabid', + 'Instapaper', + 'Integrity', + 'integromedb', + 'Intelliseek', + 'InterGET', + 'Internet Ninja', + 'InternetSeer', + 'internetVista monitor', + 'internetwache', + 'internet_archive', + 'intraVnews', + 'IODC', + 'IOI', + 'Inboxb0t', + 'iplabel', + 'ips-agent', + 'IPS\/[0-9]', + 'IPWorks HTTP\/S Component', + 'iqdb\/', + 'Iria', + 'Irokez', + 'isitup\.org', + 'iskanie', + 'isUp\.li', + 'iThemes Sync\/', + 'IZaBEE', + 'iZSearch', + 'JAHHO', + 'janforman', + 'Jaunt\/', + 'Java.*outbrain', + 'javelin\.io', + 'Jbrofuzz', + 'Jersey\/', + 'JetCar', + 'Jigsaw', + 'Jobboerse', + 'JobFeed discovery', + 'Jobg8 URL Monitor', + 'jobo', + 'Jobrapido', + 'Jobsearch1\.5', + 'JoinVision Generic', + 'JolokiaPwn', + 'Joomla', + 'Jorgee', + 'JS-Kit', + 'JungleKeyThumbnail', + 'JustView', + 'Kaspersky Lab CFR link resolver', + 'Kelny\/', + 'Kerrigan\/', + 'KeyCDN', + 'Keyword Density', + 'Keywords Research', + 'khttp\/', + 'KickFire', + 'KimonoLabs\/', + 'Kml-Google', + 'knows\.is', + 'KOCMOHABT', + 'kouio', + 'kube-probe', + 'kubectl', + 'kulturarw3', + 'KumKie', + 'Larbin', + 'Lavf\/', + 'leakix\.net', + 'LeechFTP', + 'LeechGet', + 'letsencrypt', + 'Lftp', + 'LibVLC', + 'LibWeb', + 'Libwhisker', + 'libwww', + 'Licorne', + 'Liferea\/', + 'Lighthouse', + 'Lightspeedsystems', + 'Likse', + 'limber\.io', + 'Link Valet', + 'LinkAlarm\/', + 'LinkAnalyser', + 'linkCheck', + 'linkdex', + 'LinkExaminer', + 'linkfluence', + 'linkpeek', + 'LinkPreview', + 'LinkScan', + 'LinksManager', + 'LinkTiger', + 'LinkWalker', + 'link_thumbnailer', + 'Lipperhey', + 'Litemage_walker', + 'livedoor ScreenShot', + 'LoadImpactRload', + 'localsearch-web', + 'LongURL API', + 'longurl-r-package', + 'looid\.com', + 'looksystems\.net', + 'ltx71', + 'lua-resty-http', + 'Lucee \(CFML Engine\)', + 'Lush Http Client', + 'lwp-request', + 'lwp-trivial', + 'LWP::Simple', + 'lycos', + 'LYT\.SR', + 'L\.webis', + 'mabontland', + 'MacOutlook\/', + 'Mag-Net', + 'MagpieRSS', + 'Mail::STS', + 'MailChimp', + 'Mail\.Ru', + 'Majestic12', + 'makecontact\/', + 'Mandrill', + 'MapperCmd', + 'marketinggrader', + 'MarkMonitor', + 'MarkWatch', + 'Mass Downloader', + 'masscan\/', + 'Mata Hari', + 'mattermost', + 'Mediametric', + 'Mediapartners-Google', + 'mediawords', + 'MegaIndex\.ru', + 'MeltwaterNews', + 'Melvil Rawi', + 'MemGator', + 'Metaspinner', + 'MetaURI', + 'MFC_Tear_Sample', + 'Microsearch', + 'Microsoft Data Access', + 'Microsoft Office', + 'Microsoft Outlook', + 'Microsoft Windows Network Diagnostics', + 'Microsoft-WebDAV-MiniRedir', + 'Microsoft\.Data\.Mashup', + 'MIDown tool', + 'MIIxpc', + 'Mindjet', + 'Miniature\.io', + 'Miniflux', + 'mio_httpc', + 'Miro-HttpClient', + 'Mister PiX', + 'mixdata dot com', + 'mixed-content-scan', + 'mixnode', + 'Mnogosearch', + 'mogimogi', + 'Mojeek', + 'Mojolicious \(Perl\)', + 'Mollie', + 'monitis', + 'Monitority\/', + 'Monit\/', + 'montastic', + 'MonTools', + 'Moreover', + 'Morfeus Fucking Scanner', + 'Morning Paper', + 'MovableType', + 'mowser', + 'Mrcgiguy', + 'Mr\.4x3 Powered', + 'MS Web Services Client Protocol', + 'MSFrontPage', + 'mShots', + 'MuckRack\/', + 'muhstik-scan', + 'MVAClient', + 'MxToolbox\/', + 'myseosnapshot', + 'nagios', + 'Najdi\.si', + 'Name Intelligence', + 'NameFo\.com', + 'Nameprotect', + 'nationalarchives', + 'Navroad', + 'NearSite', + 'Needle', + 'Nessus', + 'Net Vampire', + 'NetAnts', + 'NETCRAFT', + 'NetLyzer', + 'NetMechanic', + 'NetNewsWire', + 'Netpursual', + 'netresearch', + 'NetShelter ContentScan', + 'Netsparker', + 'NetSystemsResearch', + 'nettle', + 'NetTrack', + 'Netvibes', + 'NetZIP', + 'Neustar WPM', + 'NeutrinoAPI', + 'NewRelicPinger', + 'NewsBlur .*Finder', + 'NewsGator', + 'newsme', + 'newspaper\/', + 'Nexgate Ruby Client', + 'NG-Search', + 'nghttp2', + 'Nibbler', + 'NICErsPRO', + 'NihilScio', + 'Nikto', + 'nineconnections', + 'NLNZ_IAHarvester', + 'Nmap Scripting Engine', + 'node-fetch', + 'node-superagent', + 'node-urllib', + 'Nodemeter', + 'NodePing', + 'node\.io', + 'nominet\.org\.uk', + 'nominet\.uk', + 'Norton-Safeweb', + 'Notifixious', + 'notifyninja', + 'NotionEmbedder', + 'nuhk', + 'nutch', + 'Nuzzel', + 'nWormFeedFinder', + 'nyawc\/', + 'Nymesis', + 'NYU', + 'Observatory\/', + 'Ocelli\/', + 'Octopus', + 'oegp', + 'Offline Explorer', + 'Offline Navigator', + 'OgScrper', + 'okhttp', + 'omgili', + 'OMSC', + 'Online Domain Tools', + 'Open Source RSS', + 'OpenCalaisSemanticProxy', + 'Openfind', + 'OpenLinkProfiler', + 'Openstat\/', + 'OpenVAS', + 'OPPO A33', + 'Optimizer', + 'Orbiter', + 'OrgProbe\/', + 'orion-semantics', + 'Outlook-Express', + 'Outlook-iOS', + 'Owler', + 'Owlin', + 'ownCloud News', + 'ow\.ly', + 'OxfordCloudService', + 'page scorer', + 'Page Valet', + 'page2rss', + 'PageFreezer', + 'PageGrabber', + 'PagePeeker', + 'PageScorer', + 'Pagespeed\/', + 'PageThing', + 'page_verifier', + 'Panopta', + 'panscient', + 'Papa Foto', + 'parsijoo', + 'Pavuk', + 'PayPal IPN', + 'pcBrowser', + 'Pcore-HTTP', + 'PDF24 URL To PDF', + 'Pearltrees', + 'PECL::HTTP', + 'peerindex', + 'Peew', + 'PeoplePal', + 'Perlu -', + 'PhantomJS Screenshoter', + 'PhantomJS\/', + 'Photon\/', + 'php-requests', + 'phpservermon', + 'Pi-Monster', + 'Picscout', + 'Picsearch', + 'PictureFinder', + 'Pimonster', + 'Pingability', + 'PingAdmin\.Ru', + 'Pingdom', + 'Pingoscope', + 'PingSpot', + 'ping\.blo\.gs', + 'pinterest\.com', + 'Pixray', + 'Pizilla', + 'Plagger\/', + 'Pleroma ', + 'Ploetz \+ Zeller', + 'Plukkie', + 'plumanalytics', + 'PocketImageCache', + 'PocketParser', + 'Pockey', + 'PodcastAddict\/', + 'POE-Component-Client-HTTP', + 'Polymail\/', + 'Pompos', + 'Porkbun', + 'Port Monitor', + 'postano', + 'postfix-mta-sts-resolver', + 'PostmanRuntime', + 'postplanner\.com', + 'PostPost', + 'postrank', + 'PowerPoint\/', + 'Prebid', + 'Prerender', + 'Priceonomics Analysis Engine', + 'PrintFriendly', + 'PritTorrent', + 'Prlog', + 'probethenet', + 'Project ?25499', + 'Project-Resonance', + 'prospectb2b', + 'Protopage', + 'ProWebWalker', + 'proximic', + 'PRTG Network Monitor', + 'pshtt, https scanning', + 'PTST ', + 'PTST\/[0-9]+', + 'Pump', + 'Python-httplib2', + 'python-httpx', + 'python-requests', + 'Python-urllib', + 'Qirina Hurdler', + 'QQDownload', + 'QrafterPro', + 'Qseero', + 'Qualidator', + 'QueryN Metasearch', + 'queuedriver', + 'quic-go-HTTP\/', + 'QuiteRSS', + 'Quora Link Preview', + 'Qwantify', + 'Radian6', + 'RadioPublicImageResizer', + 'Railgun\/', + 'RankActive', + 'RankFlex', + 'RankSonicSiteAuditor', + 'RapidLoad\/', + 'Re-re Studio', + 'ReactorNetty', + 'Readability', + 'RealDownload', + 'RealPlayer%20Downloader', + 'RebelMouse', + 'Recorder', + 'RecurPost\/', + 'redback\/', + 'ReederForMac', + 'Reeder\/', + 'ReGet', + 'RepoMonkey', + 'request\.js', + 'reqwest\/', + 'ResponseCodeTest', + 'RestSharp', + 'Riddler', + 'Rival IQ', + 'Robosourcer', + 'Robozilla', + 'ROI Hunter', + 'RPT-HTTPClient', + 'RSSMix\/', + 'RSSOwl', + 'RyowlEngine', + 'safe-agent-scanner', + 'SalesIntelligent', + 'Saleslift', + 'SAP NetWeaver Application Server', + 'SauceNAO', + 'SBIder', + 'sc-downloader', + 'scalaj-http', + 'Scamadviser-Frontend', + 'ScanAlert', + 'scan\.lol', + 'Scoop', + 'scooter', + 'ScopeContentAG-HTTP-Client', + 'ScoutJet', + 'ScoutURLMonitor', + 'ScrapeBox Page Scanner', + 'Scrapy', + 'Screaming', + 'ScreenShotService', + 'Scrubby', + 'Scrutiny\/', + 'Search37', + 'searchenginepromotionhelp', + 'Searchestate', + 'SearchExpress', + 'SearchSight', + 'SearchWP', + 'search\.thunderstone', + 'Seeker', + 'semanticdiscovery', + 'semanticjuice', + 'Semiocast HTTP client', + 'Semrush', + 'Sendsay\.Ru', + 'sentry\/', + 'SEO Browser', + 'Seo Servis', + 'seo-nastroj\.cz', + 'seo4ajax', + 'Seobility', + 'SEOCentro', + 'SeoCheck', + 'seocompany', + 'SEOkicks', + 'SEOlizer', + 'Seomoz', + 'SEOprofiler', + 'seoscanners', + 'SEOsearch', + 'seositecheckup', + 'SEOstats', + 'servernfo', + 'sexsearcher', + 'Seznam', + 'Shelob', + 'Shodan', + 'Shoppimon', + 'ShopWiki', + 'ShortLinkTranslate', + 'shortURL lengthener', + 'shrinktheweb', + 'Sideqik', + 'Siege', + 'SimplePie', + 'SimplyFast', + 'Siphon', + 'SISTRIX', + 'Site Sucker', + 'Site-Shot\/', + 'Site24x7', + 'SiteBar', + 'Sitebeam', + 'Sitebulb\/', + 'SiteCondor', + 'SiteExplorer', + 'SiteGuardian', + 'Siteimprove', + 'SiteIndexed', + 'Sitemap(s)? Generator', + 'SitemapGenerator', + 'SiteMonitor', + 'Siteshooter B0t', + 'SiteSnagger', + 'SiteSucker', + 'SiteTruth', + 'Sitevigil', + 'sitexy\.com', + 'SkypeUriPreview', + 'Slack\/', + 'sli-systems\.com', + 'slider\.com', + 'slurp', + 'SlySearch', + 'SmartDownload', + 'SMRF URL Expander', + 'SMUrlExpander', + 'Snake', + 'Snappy', + 'SnapSearch', + 'Snarfer\/', + 'SniffRSS', + 'sniptracker', + 'Snoopy', + 'SnowHaze Search', + 'sogou web', + 'SortSite', + 'Sottopop', + 'sovereign\.ai', + 'SpaceBison', + 'SpamExperts', + 'Spammen', + 'Spanner', + 'Spawning-AI', + 'spaziodati', + 'SPDYCheck', + 'Specificfeeds', + 'SpeedKit', + 'speedy', + 'SPEng', + 'Spinn3r', + 'spray-can', + 'Sprinklr ', + 'spyonweb', + 'sqlmap', + 'Sqlworm', + 'Sqworm', + 'SSL Labs', + 'ssl-tools', + 'StackRambler', + 'Statastico\/', + 'Statically-', + 'StatusCake', + 'Steeler', + 'Stratagems Kumo', + 'Stripe\/', + 'Stroke\.cz', + 'StudioFACA', + 'StumbleUpon', + 'suchen', + 'Sucuri', + 'summify', + 'SuperHTTP', + 'Surphace Scout', + 'Suzuran', + 'swcd ', + 'Symfony BrowserKit', + 'Symfony2 BrowserKit', + 'Synapse\/', + 'Syndirella\/', + 'SynHttpClient-Built', + 'Sysomos', + 'sysscan', + 'Szukacz', + 'T0PHackTeam', + 'tAkeOut', + 'Tarantula\/', + 'Taringa UGC', + 'TarmotGezgin', + 'tchelebi\.io', + 'techiaith\.cymru', + 'Teleport', + 'Telesoft', + 'Telesphoreo', + 'Telesphorep', + 'Tenon\.io', + 'teoma', + 'terrainformatica', + 'Test Certificate Info', + 'testuri', + 'Tetrahedron', + 'TextRazor Downloader', + 'The Drop Reaper', + 'The Expert HTML Source Viewer', + 'The Intraformant', + 'The Knowledge AI', + 'theinternetrules', + 'TheNomad', + 'Thinklab', + 'Thumbor', + 'Thumbshots', + 'ThumbSniper', + 'timewe\.net', + 'TinEye', + 'Tiny Tiny RSS', + 'TLSProbe\/', + 'Toata', + 'topster', + 'touche\.com', + 'Traackr\.com', + 'tracemyfile', + 'Trackuity', + 'TrapitAgent', + 'Trendiction', + 'Trendsmap', + 'trendspottr', + 'truwoGPS', + 'TryJsoup', + 'TulipChain', + 'Turingos', + 'Turnitin', + 'tweetedtimes', + 'Tweetminster', + 'Tweezler\/', + 'twibble', + 'Twice', + 'Twikle', + 'Twingly', + 'Twisted PageGetter', + 'Typhoeus', + 'ubermetrics-technologies', + 'uclassify', + 'UdmSearch', + 'ultimate_sitemap_parser', + 'unchaos', + 'unirest-java', + 'UniversalFeedParser', + 'unshortenit', + 'Unshorten\.It', + 'Untiny', + 'UnwindFetchor', + 'updated', + 'updown\.io daemon', + 'Upflow', + 'Uptimia', + 'URL Verifier', + 'Urlcheckr', + 'URLitor', + 'urlresolver', + 'Urlstat', + 'URLTester', + 'UrlTrends Ranking Updater', + 'URLy Warning', + 'URLy\.Warning', + 'URL\/Emacs', + 'Vacuum', + 'Vagabondo', + 'VB Project', + 'vBSEO', + 'VCI', + 'via ggpht\.com GoogleImageProxy', + 'Virusdie', + 'visionutils', + 'Visual Rights Group', + 'vkShare', + 'VoidEYE', + 'Voil', + 'voltron', + 'voyager\/', + 'VSAgent\/', + 'VSB-TUO\/', + 'Vulnbusters Meter', + 'VYU2', + 'w3af\.org', + 'W3C-checklink', + 'W3C-mobileOK', + 'W3C_Unicorn', + 'WAC-OFU', + 'WakeletLinkExpander', + 'WallpapersHD', + 'Wallpapers\/[0-9]+', + 'wangling', + 'Wappalyzer', + 'WatchMouse', + 'WbSrch\/', + 'WDT\.io', + 'Web Auto', + 'Web Collage', + 'Web Enhancer', + 'Web Fetch', + 'Web Fuck', + 'Web Pix', + 'Web Sauger', + 'Web spyder', + 'Web Sucker', + 'web-capture\.net', + 'Web-sniffer', + 'Webalta', + 'Webauskunft', + 'WebAuto', + 'WebCapture', + 'WebClient\/', + 'webcollage', + 'WebCookies', + 'WebCopier', + 'WebCorp', + 'WebDataStats', + 'WebDoc', + 'WebEnhancer', + 'WebFetch', + 'WebFuck', + 'WebGazer', + 'WebGo IS', + 'WebImageCollector', + 'WebImages', + 'WebIndex', + 'webkit2png', + 'WebLeacher', + 'webmastercoffee', + 'webmon ', + 'WebPix', + 'WebReaper', + 'WebSauger', + 'webscreenie', + 'Webshag', + 'Webshot', + 'Website Quester', + 'websitepulse agent', + 'WebsiteQuester', + 'Websnapr', + 'WebSniffer', + 'Webster', + 'WebStripper', + 'WebSucker', + 'webtech\/', + 'WebThumbnail', + 'Webthumb\/', + 'WebWhacker', + 'WebZIP', + 'WeLikeLinks', + 'WEPA', + 'WeSEE', + 'wf84', + 'Wfuzz\/', + 'wget', + 'WhatCMS', + 'WhatsApp', + 'WhatsMyIP', + 'WhatWeb', + 'WhereGoes\?', + 'Whibse', + 'WhoAPI\/', + 'WhoRunsCoinHive', + 'Whynder Magnet', + 'Windows-RSS-Platform', + 'WinHttp-Autoproxy-Service', + 'WinHTTP\/', + 'WinPodder', + 'wkhtmlto', + 'wmtips', + 'Woko', + 'Wolfram HTTPClient', + 'woorankreview', + 'WordPress\/', + 'WordupinfoSearch', + 'Word\/', + 'worldping-api', + 'wotbox', + 'WP Engine Install Performance API', + 'WP Rocket', + 'wpif', + 'wprecon\.com survey', + 'WPScan', + 'wscheck', + 'Wtrace', + 'WWW-Collector-E', + 'WWW-Mechanize', + 'WWW::Document', + 'WWW::Mechanize', + 'WWWOFFLE', + 'www\.monitor\.us', + 'x09Mozilla', + 'x22Mozilla', + 'XaxisSemanticsClassifier', + 'XenForo\/', + 'Xenu Link Sleuth', + 'XING-contenttabreceiver', + 'xpymep([0-9]?)\.exe', + 'Y!J-[A-Z][A-Z][A-Z]', + 'Yaanb', + 'yacy', + 'Yahoo Link Preview', + 'YahooCacheSystem', + 'YahooMailProxy', + 'YahooYSMcm', + 'YandeG', + 'Yandex(?!Search)', + 'yanga', + 'yeti', + 'Yo-yo', + 'Yoleo Consumer', + 'yomins\.com', + 'yoogliFetchAgent', + 'YottaaMonitor', + 'Your-Website-Sucks', + 'yourls\.org', + 'YoYs\.net', + 'YP\.PL', + 'Zabbix', + 'Zade', + 'Zao', + 'Zauba', + 'Zemanta Aggregator', + 'Zend\\\\Http\\\\Client', + 'Zend_Http_Client', + 'Zermelo', + 'Zeus ', + 'zgrab', + 'ZnajdzFoto', + 'ZnHTTP', + 'Zombie\.js', + 'Zoom\.Mac', + 'ZoteroTranslationServer', + 'ZyBorg', + '[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)', + ); +} diff --git a/www/extern/crawler-detect/src/Fixtures/Exclusions.php b/www/extern/crawler-detect/src/Fixtures/Exclusions.php new file mode 100644 index 0000000..6274557 --- /dev/null +++ b/www/extern/crawler-detect/src/Fixtures/Exclusions.php @@ -0,0 +1,74 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Exclusions extends AbstractProvider +{ + /** + * List of strings to remove from the user agent before running the crawler regex + * Over a large list of user agents, this gives us about a 55% speed increase! + * + * @var array + */ + protected $data = array( + 'Safari.[\d\.]*', + 'Firefox.[\d\.]*', + ' Chrome.[\d\.]*', + 'Chromium.[\d\.]*', + 'MSIE.[\d\.]', + 'Opera\/[\d\.]*', + 'Mozilla.[\d\.]*', + 'AppleWebKit.[\d\.]*', + 'Trident.[\d\.]*', + 'Windows NT.[\d\.]*', + 'Android [\d\.]*', + 'Macintosh.', + 'Ubuntu', + 'Linux', + '[ ]Intel', + 'Mac OS X [\d_]*', + '(like )?Gecko(.[\d\.]*)?', + 'KHTML,', + 'CriOS.[\d\.]*', + 'CPU iPhone OS ([0-9_])* like Mac OS X', + 'CPU OS ([0-9_])* like Mac OS X', + 'iPod', + 'compatible', + 'x86_..', + 'i686', + 'x64', + 'X11', + 'rv:[\d\.]*', + 'Version.[\d\.]*', + 'WOW64', + 'Win64', + 'Dalvik.[\d\.]*', + ' \.NET CLR [\d\.]*', + 'Presto.[\d\.]*', + 'Media Center PC', + 'BlackBerry', + 'Build', + 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.', + 'Opera', + ' \.NET[\d\.]*', + 'cubot', + '; M bot', + '; CRONO', + '; B bot', + '; IDbot', + '; ID bot', + '; POWER BOT', + 'OCTOPUS-CORE', + 'htc_botdugls', + 'super\/\d+\/Android\/\d+', + ); +} diff --git a/www/extern/crawler-detect/src/Fixtures/Headers.php b/www/extern/crawler-detect/src/Fixtures/Headers.php new file mode 100644 index 0000000..35c60c3 --- /dev/null +++ b/www/extern/crawler-detect/src/Fixtures/Headers.php @@ -0,0 +1,37 @@ + + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace Jaybizzle\CrawlerDetect\Fixtures; + +class Headers extends AbstractProvider +{ + /** + * All possible HTTP headers that represent the user agent string. + * + * @var array + */ + protected $data = array( + // The default User-Agent string. + 'HTTP_USER_AGENT', + // Header can occur on devices using Opera Mini. + 'HTTP_X_OPERAMINI_PHONE_UA', + // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/ + 'HTTP_X_DEVICE_USER_AGENT', + 'HTTP_X_ORIGINAL_USER_AGENT', + 'HTTP_X_SKYFIRE_PHONE', + 'HTTP_X_BOLT_PHONE_UA', + 'HTTP_DEVICE_STOCK_UA', + 'HTTP_X_UCBROWSER_DEVICE_UA', + // Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address + 'HTTP_FROM', + 'HTTP_X_SCANNER', // Seen in use by Netsparker + ); +} diff --git a/www/internals/modules/projectlawful.php b/www/internals/modules/projectlawful.php index 0235d24..0d00199 100644 --- a/www/internals/modules/projectlawful.php +++ b/www/internals/modules/projectlawful.php @@ -57,6 +57,41 @@ class ProjectLawful implements IWebsiteModule return $this->site->modules->Database()->sql_query_assoc('SELECT variant, COUNT(*) AS `count` FROM projectlawful_downloadcounter GROUP BY variant ORDER BY variant'); } + public function listDownloadCountsExt() + { + // https://github.com/JayBizzle/Crawler-Detect + + require_once __DIR__ . '/../../extern/crawler-detect/src/Fixtures/AbstractProvider.php'; + require_once __DIR__ . '/../../extern/crawler-detect/src/Fixtures/Crawlers.php'; + require_once __DIR__ . '/../../extern/crawler-detect/src/Fixtures/Exclusions.php'; + require_once __DIR__ . '/../../extern/crawler-detect/src/Fixtures/Headers.php'; + require_once __DIR__ . '/../../extern/crawler-detect/src/CrawlerDetect.php'; + + $CrawlerDetect = new \Jaybizzle\CrawlerDetect\CrawlerDetect; + + $r = []; + + foreach ($this->site->modules->Database()->sql_query_assoc('SELECT * FROM projectlawful_downloadcounter ORDER BY timestamp ASC') as $entry) + { + if (!key_exists($entry['variant'], $r)) $r[$entry['variant']] = [0, 0, '']; + + $v = $r[$entry['variant']]; + + if ($CrawlerDetect->isCrawler($entry['useragent'])) + { + $r[$entry['variant']] = [$v[0] + 0, $v[1] + 1, $entry['timestamp']]; + } + else + { + $r[$entry['variant']] = [$v[0] + 1, $v[1] + 1, $v[2]]; + } + } + + ksort($r); + + return $r; + } + public function variantExists(string $variant) { return isset($this->variants[$variant]); diff --git a/www/pages/admin.php b/www/pages/admin.php index e09851f..2d102e3 100644 --- a/www/pages/admin.php +++ b/www/pages/admin.php @@ -97,8 +97,16 @@ $connected = true; try { $SITE->modules->Database(); } catch (Exception $e) { $c
- modules->ProjectLawful()->listDownloadCounts() as $dlc): ?> -
:
+ modules->ProjectLawful()->listDownloadCountsExt() as $variant => [$nonbot, $total, $ts]): ?> +
+ : +   + +   + ( ) +   + +
Database not connected.
@@ -150,11 +158,11 @@ $connected = true; try { $SITE->modules->Database(); } catch (Exception $e) { $c
Statics
-
Blog entries: modules->Blog()->listAll()); ?>
-
Book entries: modules->Books()->listAll()); ?>
-
Euler entries: modules->Euler()->listAll()); ?>
-
Program entries: modules->Programs()->listAll()); ?>
-
Update entries: modules->UpdatesLog()->listUpdateData()); ?>
+
Blog entries: modules->Blog()->listAll()); ?>
+
Book entries: modules->Books()->listAll()); ?>
+
Euler entries: modules->Euler()->listAll()); ?>
+
Program entries: modules->Programs()->listAll()); ?>
+
Update entries: modules->UpdatesLog()->listUpdateData()); ?>
@@ -218,9 +226,9 @@ $connected = true; try { $SITE->modules->Database(); } catch (Exception $e) { $c if ($key === 'extendedgitgraph') continue; if (is_array($value)) - echo '
' . $key . ' ' . json_encode($value, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) . '
' . "\n"; + echo '
' . $key . ' ' . json_encode($value, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) . '
' . "\n"; else - echo '
' . $key . ' ' . nl2br(var_export($value, true)) . '
' . "\n"; + echo '
' . $key . ' ' . nl2br(var_export($value, true)) . '
' . "\n"; } ?>