[ { "pattern": "Googlebot\\/", "url": "http://www.google.com/bot.html", "instances": [ "Googlebot/2.1 (+http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36" ], "description": "Google's main web crawling bot for search indexing" }, { "pattern": "Googlebot-Mobile", "instances": [ "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" ], "description": "Google's legacy mobile crawler for Google Search indexing" }, { "pattern": "Googlebot-Image", "instances": [ "Googlebot-Image/1.0" ], "description": "Google's image-specific web crawling bot for image search indexing" }, { "pattern": "Googlebot-News", "instances": [ "Googlebot-News" ], "description": "Google's news-specific web crawling bot for Google News indexing" }, { "pattern": "Googlebot-Video", "instances": [ "Googlebot-Video/1.0" ], "description": "Google's video crawler for video-related Google Search features" }, { "pattern": "AdsBot-Google([^-]|$)", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "AdsBot-Google (+http://www.google.com/adsbot.html)" ], "description": "Google's Ads bot for checking web page ad quality" }, { "pattern": "AdsBot-Google-Mobile", "addition_date": "2017/08/21", "url": "https://support.google.com/adwords/answer/2404197", "instances": [ "AdsBot-Google-Mobile-Apps", "Mozilla/5.0 (Linux; Android 5.0; SM-G920A) AppleWebKit (KHTML, like Gecko) Chrome Mobile Safari (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)" ], "description": "Google's mobile Ads bot for crawling mobile pages to serve targeted ads" }, { "pattern": "Feedfetcher-Google", "addition_date": "2018/06/27", "url": "https://support.google.com/webmasters/answer/178852", "instances": [ "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers; feed-id=728742641706423)" ], "description": "Google's feed fetcher bot for fetching RSS and Atom feeds for Google services" }, { "pattern": "Mediapartners-Google", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "Mediapartners-Google", "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 10_0 like Mac OS X; en-us) AppleWebKit/602.1.38 (KHTML, like Gecko) Version/10.0 Mobile/14A5297c Safari/602.1 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)" ], "description": "Google's Mediapartners bot for AdSense and AdMob crawling" }, { "pattern": "Mediapartners \\(Googlebot\\)", "addition_date": "2017/08/08", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [], "description": "Google's Mediapartners bot variant for AdSense and AdMob crawling" }, { "pattern": "APIs-Google", "addition_date": "2017/08/08", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)" ], "description": "Google's APIs bot for crawling API documentation and services" }, { "pattern": "Google-InspectionTool", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Google-InspectionTool/1.0)", "Mozilla/5.0 (compatible; Google-InspectionTool/1.0)" ], "description": "Google's inspection tool bot for testing and debugging search indexing" }, { "pattern": "Storebot-Google", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "Mozilla/5.0 (X11; Linux x86_64; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012; Storebot-Google/1.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36" ], "description": "Google's Storebot for crawling product and e-commerce pages" }, { "pattern": "GoogleOther", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "GoogleOther" ], "description": "Google's other bots and services for various Google search features" }, { "pattern": "bingbot", "url": "http://www.bing.com/bingbot.htm", "instances": [ "Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) SitemapProbe", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (seoanalyzer; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Safari/537.36", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36" ], "description": "Microsoft's web crawling bot for Bing search indexing" }, { "pattern": "Slurp", "url": "http://help.yahoo.com/help/us/ysearch/slurp", "instances": [ "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)", "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" ], "description": "Yahoo's web crawling bot for Yahoo search indexing" }, { "pattern": "[wW]get", "instances": [ "WGETbot/1.0 (+http://wget.alanreed.org)", "Wget/1.14 (linux-gnu)", "Wget/1.20.3 (linux-gnu)" ], "description": "GNU Wget command-line tool for downloading web content" }, { "pattern": "LinkedInBot", "instances": [ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)" ], "description": "LinkedIn's bot for crawling professional content and profiles" }, { "pattern": "Python-urllib", "instances": [ "Python-urllib/1.17", "Python-urllib/2.5", "Python-urllib/2.6", "Python-urllib/2.7", "Python-urllib/3.1", "Python-urllib/3.2", "Python-urllib/3.3", "Python-urllib/3.4", "Python-urllib/3.5", "Python-urllib/3.6", "Python-urllib/3.7" ], "description": "Python's built-in URL library for HTTP requests" }, { "pattern": "python-requests", "addition_date": "2018/05/27", "instances": [ "python-requests/2.9.2", "python-requests/2.11.1", "python-requests/2.18.4", "python-requests/2.19.1", "python-requests/2.20.0", "python-requests/2.21.0", "python-requests/2.22.0" ], "description": "Popular Python HTTP library for making web requests" }, { "pattern": "aiohttp", "addition_date": "2019/12/23", "instances": [ "Python/3.9 aiohttp/3.7.3", "Python/3.8 aiohttp/3.7.2", "Python/3.7 aiohttp/3.6.2a2" ], "url": "https://docs.aiohttp.org/en/stable/", "description": "Asynchronous HTTP client library for Python" }, { "pattern": "httpx", "addition_date": "2019/12/23", "instances": [ "python-httpx/0.16.1", "python-httpx/0.13.0.dev1" ], "url": "https://www.python-httpx.org", "description": "Modern Python HTTP client with async support" }, { "pattern": "libwww-perl", "instances": [ "2Bone_LinkChecker/1.0 libwww-perl/6.03", "2Bone_LinkChkr/1.0 libwww-perl/6.03", "amibot - http://www.amidalla.de - tech@amidalla.com libwww-perl/5.831" ], "description": "Perl library for making HTTP requests and web crawling" }, { "pattern": "httpunit", "instances": [ "httpunit/1.x" ], "description": "Java library for automated web application testing" }, { "pattern": "Nutch", "instances": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/605.1.16 (KHTML, like Gecko; compatible; Friendly_Crawler/2.0) Chrome/120.0.6099.217 Safari/605.1.15/Nutch-1.20-SNAPSHOT", "NutchCVS/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", "istellabot-nutch/Nutch-1.10" ], "description": "Apache Nutch open-source web crawler framework" }, { "pattern": "Go-http-client", "addition_date": "2016/03/26", "url": "https://golang.org/pkg/net/http/", "instances": [ "Go-http-client/1.1", "Go-http-client/2.0" ], "description": "Go programming language HTTP client library" }, { "pattern": "phpcrawl", "addition_date": "2012/09/17", "url": "http://phpcrawl.cuab.de/", "instances": [ "phpcrawl" ], "description": "PHP web crawler library for scraping websites" }, { "pattern": "msnbot", "url": "http://search.msn.com/msnbot.htm", "instances": [ "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)", "adidxbot/2.0 (+http://search.msn.com/msnbot.htm)", "librabot/1.0 (+http://search.msn.com/msnbot.htm)", "librabot/2.0 (+http://search.msn.com/msnbot.htm)", "msnbot-NewsBlogs/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)", "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)", "msnbot-media/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot/1.0 (+http://search.msn.com/msnbot.htm)", "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "msnbot/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot/2.0b (+http://search.msn.com/msnbot.htm).", "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._" ], "description": "Microsoft's search engine bot for web indexing" }, { "pattern": "jyxobot", "instances": [], "description": "Jyxo search engine bot for web crawling" }, { "pattern": "FAST-WebCrawler", "instances": [ "FAST-WebCrawler/3.6/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.8" ], "description": "FAST search engine web crawler for indexing" }, { "pattern": "FAST Enterprise Crawler", "instances": [ "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/", "FAST Enterprise Crawler 6 used by Schibsted (webcrawl@schibstedsok.no)" ], "description": "FAST enterprise-grade web crawler for search" }, { "pattern": "BIGLOTRON", "instances": [ "BIGLOTRON (Beta 2;GNU/Linux)" ], "description": "Biglotron search engine web crawler bot" }, { "pattern": "Teoma", "instances": [ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)", "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml)" ], "url": "http://about.ask.com/en/docs/about/webmasters.shtml", "description": "Ask Jeeves Teoma search engine web crawler" }, { "pattern": "convera", "instances": [ "ConveraCrawler/0.9e (+http://ews.converasearch.com/crawl.htm)" ], "url": "http://ews.converasearch.com/crawl.htm", "description": "Convera search engine web crawler bot" }, { "pattern": "seekbot", "instances": [ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2" ], "url": "http://www.seekbot.net/bot.html", "description": "Seekbot search engine web crawler for indexing" }, { "pattern": "Gigabot", "instances": [ "Gigabot/1.0", "Gigabot/2.0 (http://www.gigablast.com/spider.html)" ], "url": "http://www.gigablast.com/spider.html", "description": "Gigablast search engine web crawler bot" }, { "pattern": "Gigablast", "instances": [ "GigablastOpenSource/1.0" ], "url": "https://github.com/gigablast/open-source-search-engine", "description": "Gigablast open-source search engine crawler" }, { "pattern": "exabot", "instances": [ "Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)", "Mozilla/5.0 (compatible; Exabot PyExalead/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot-Images/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0 (BiggerBetter); +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0; http://www.exabot.com/go/robot)" ], "description": "Exabot search engine web crawler for indexing" }, { "pattern": "ia_archiver", "instances": [ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)", "ia_archiver-web.archive.org" ], "description": "Internet Archive Wayback Machine web crawler" }, { "pattern": "GingerCrawler", "instances": [ "GingerCrawler/1.0 (Language Assistant for Dyslexics; www.gingersoftware.com/crawler_agent.htm; support at ginger software dot com)" ], "description": "Ginger Software's language assistant web crawler" }, { "pattern": "webmon ", "instances": [], "description": "Webmon website monitoring and crawling bot" }, { "pattern": "HTTrack", "instances": [ "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" ], "description": "HTTrack website copier for offline browsing" }, { "pattern": "grub\\.org", "instances": [ "Mozilla/4.0 (compatible; grub-client-0.3.0; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.4; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.5; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.7; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.1.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.2.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.3.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.3.7; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.4.3; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.5.3; Crawl your own stuff with http://grub.org)" ], "description": "Grub search engine web crawler for indexing" }, { "pattern": "UsineNouvelleCrawler", "instances": [], "description": "Usine Nouvelle news site web crawler" }, { "pattern": "antibot", "instances": [], "description": "Antibot web crawler for content discovery" }, { "pattern": "netresearchserver", "instances": [], "description": "Net Research Server web crawler bot" }, { "pattern": "speedy", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider for SpeedyAds (http://www.entireweb.com/about/search_tech/speedy_spider/)", "Mozilla/5.0 (compatible; Speedy Spider; http://www.entireweb.com/about/search_tech/speedy_spider/)", "Speedy Spider (Entireweb; Beta/1.2; http://www.entireweb.com/about/search_tech/speedyspider/)", "Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)" ], "description": "Entireweb Speedy Spider web crawler bot" }, { "pattern": "fluffy", "instances": [], "description": "Fluffy search engine web crawler bot" }, { "pattern": "findlink", "instances": [ "findlinks/1.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.3-beta8 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.3-beta9 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.5-beta7 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/; YaCy 0.1; yacy.net)", "findlinks/1.1.6-beta2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta3 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.4 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.9 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1.3 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.6 (+http://wortschatz.uni-leipzig.de/findlinks/)" ], "description": "Findlinks web crawler for link discovery" }, { "pattern": "msrbot", "instances": [], "description": "Microsoft Research web crawler bot" }, { "pattern": "panscient", "instances": [ "panscient.com" ], "description": "Panscient web crawler for content analysis" }, { "pattern": "yacybot", "instances": [ "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.8.0_111; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 5.2.8-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 5.2.9-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 5.2.11-Jinsol; java 12.0.2; Europe/en) http://yacy.net/bot.html" ], "description": "YaCy decentralized search engine web crawler" }, { "pattern": "AISearchBot", "instances": [], "description": "AI-powered search engine web crawler bot" }, { "pattern": "ips-agent", "instances": [ "BlackBerry9000/4.6.0.167 Profile/MIDP-2.0 Configuration/CLDC-1.1 VendorID/102 ips-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12; ips-agent) Gecko/20050922 Fedora/1.0.7-1.1.fc4 Firefox/1.0.7", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.24; ips-agent) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1" ], "description": "IPS agent web crawler for content indexing" }, { "pattern": "tagoobot", "instances": [], "description": "Tagoo search engine web crawler bot" }, { "pattern": "MJ12bot", "instances": [ "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.5; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.0; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.2; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.0; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.2; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.4 (domain ownership verifier); http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.6; http://mj12bot.com/)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)" ], "description": "Majestic-12 search engine web crawler bot" }, { "pattern": "woriobot", "instances": [ "Mozilla/5.0 (compatible; woriobot +http://worio.com)", "Mozilla/5.0 (compatible; woriobot support [at] zite [dot] com +http://zite.com)" ], "description": "Worio search engine web crawler bot" }, { "pattern": "yanga", "instances": [ "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)" ], "description": "Yanga search engine web crawler bot" }, { "pattern": "buzzbot", "instances": [ "Buzzbot/1.0 (Buzzbot; http://www.buzzstream.com; buzzbot@buzzstream.com)" ], "description": "Buzzstream web crawler for link research" }, { "pattern": "mlbot", "instances": [ "MLBot (www.metadatalabs.com/mlbot)" ], "description": "Metadata Labs web crawler for analysis" }, { "pattern": "yandex\\.com\\/bots", "url": "https://yandex.ru/support/webmaster/robot-workings/check-yandex-robots.html#robot-in-logs", "instances": [ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots", "Mozilla/5.0 (compatible; YandexUserproxy; robot; +http://yandex.com/bots", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01)", "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexAdNet/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexCalendar/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots", "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexForDomain/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexPagechecker/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexSitelinks; Dyatel; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexSpravBot/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexVertis/3.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)", "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)" ], "addition_date": "2015/04/14", "description": "Yandex search engine web crawler bots" }, { "pattern": "purebot", "addition_date": "2010/01/19", "instances": [], "description": "Pure web crawler for content discovery" }, { "pattern": "Linguee Bot", "addition_date": "2010/01/26", "url": "http://www.linguee.com/bot", "instances": [ "Linguee Bot (http://www.linguee.com/bot)", "Linguee Bot (http://www.linguee.com/bot; bot@linguee.com)" ], "description": "Linguee translation web crawler bot" }, { "pattern": "CyberPatrol", "addition_date": "2010/02/11", "url": "http://www.cyberpatrol.com/cyberpatrolcrawler.asp", "instances": [ "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)" ], "description": "CyberPatrol web content filtering bot" }, { "pattern": "voilabot", "addition_date": "2010/05/18", "instances": [ "Mozilla/5.0 (Windows NT 5.1; U; Win64; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)" ], "description": "Voila search engine web crawler bot" }, { "pattern": "Baiduspider", "addition_date": "2010/07/15", "url": "http://www.baidu.jp/spider/", "instances": [ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)" ], "description": "Baidu search engine web crawler bot" }, { "pattern": "citeseerxbot", "addition_date": "2010/07/17", "instances": [], "description": "CiteSeerX academic web crawler bot" }, { "pattern": "spbot", "addition_date": "2010/07/31", "url": "http://www.seoprofiler.com/bot", "instances": [ "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.2; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.3; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.7; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.8; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0.9; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.1.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.2.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.3.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.1; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.2; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.1; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.2; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.3; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0; +http://OpenLinkProfiler.org/bot )" ], "description": "SEO Profiler web crawler for analysis" }, { "pattern": "twengabot", "addition_date": "2010/08/03", "url": "http://www.twenga.com/bot.html", "instances": [], "description": "Twenga shopping web crawler bot" }, { "pattern": "postrank", "addition_date": "2010/08/03", "url": "http://www.postrank.com", "instances": [ "PostRank/2.0 (postrank.com)", "PostRank/2.0 (postrank.com; 1 subscribers)" ], "description": "PostRank web crawler for content ranking" }, { "pattern": "Turnitin", "addition_date": "2010/09/26", "url": "http://www.turnitin.com", "instances": [ "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", "Turnitin (https://bit.ly/2UvnfoQ)" ], "description": "Turnitin plagiarism detection web crawler" }, { "pattern": "scribdbot", "addition_date": "2010/09/28", "url": "http://www.scribd.com", "instances": [], "description": "Scribd document web crawler bot" }, { "pattern": "page2rss", "addition_date": "2010/10/07", "url": "http://www.page2rss.com", "instances": [ "Mozilla/5.0 (compatible; Page2RSS/0.7; +http://page2rss.com/)" ], "description": "Page2RSS web crawler for RSS conversion" }, { "pattern": "sitebot", "addition_date": "2010/12/15", "url": "http://www.sitebot.org", "instances": [ "Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)" ], "description": "Sitebot web crawler for site analysis" }, { "pattern": "linkdex", "addition_date": "2011/01/06", "url": "http://www.linkdex.com", "instances": [ "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", "linkdex.com/v2.0", "linkdexbot/Nutch-1.0-dev (http://www.linkdex.com/; crawl at linkdex dot com)" ], "description": "Linkdex SEO tool web crawler for link analysis" }, { "pattern": "Adidxbot", "url": "https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0", "instances": [], "description": "Bing's advertising index web crawler bot" }, { "pattern": "ezooms", "addition_date": "2011/04/27", "url": "http://www.phpbb.com/community/viewtopic.php?f=64&t=935605&start=450#p12948289", "instances": [ "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)" ], "description": "Ezooms search engine web crawler bot" }, { "pattern": "dotbot", "addition_date": "2011/04/27", "instances": [ "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)", "dotbot" ], "description": "Moz DotBot web crawler for SEO analysis" }, { "pattern": "Mail\\.RU_Bot", "addition_date": "2011/04/27", "instances": [ "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots)", "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/", "Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/", "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Robots/2.0; +http://go.mail.ru/help/robots)" ], "description": "Mail.RU search engine web crawler bot" }, { "pattern": "discobot", "addition_date": "2011/05/03", "url": "http://discoveryengine.com/discobot.html", "instances": [ "Mozilla/5.0 (compatible; discobot/1.0; +http://discoveryengine.com/discobot.html)", "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)", "mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html)" ], "description": "Discovery Engine web crawler for content discovery" }, { "pattern": "heritrix", "addition_date": "2011/06/21", "url": "https://github.com/internetarchive/heritrix3/wiki", "instances": [ "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.12.1b +http://netarkivet.dk/website/info.html)", "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://rjpower.org)", "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://archive.org)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.accelobot.com)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.14.3.r6601 +http://www.buddybuzz.net/yptrino)", "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://parsijoo.ir)", "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://www.exif-search.com)", "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://aihit.com)", "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://seekda.com)", "Mozilla/5.0 (compatible; heritrix/3.0.0-SNAPSHOT-20091120.021634 +http://crawler.archive.org)", "Mozilla/5.0 (compatible; heritrix/3.1.0-RC1 +http://boston.lti.cs.cmu.edu/crawler_12/)", "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://places.tomtom.com/crawlerinfo)", "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://www.mixdata.com)", "Mozilla/5.0 (compatible; heritrix/3.1.1; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.crim.ca)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.exif-search.com)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.mixdata.com)", "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", "Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)" ], "description": "Internet Archive's Heritrix web crawler framework" }, { "pattern": "findthatfile", "addition_date": "2011/06/21", "url": "http://www.findthatfile.com/", "instances": [], "description": "FindThatFile web crawler for file discovery" }, { "pattern": "europarchive\\.org", "addition_date": "2011/06/21", "url": "", "instances": [ "Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)" ], "description": "European Archive web crawler for preservation" }, { "pattern": "NerdByNature\\.Bot", "addition_date": "2011/07/12", "url": "http://www.nerdbynature.net/bot", "instances": [ "Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)" ], "description": "NerdByNature web crawler for content indexing" }, { "pattern": "(sistrix|SISTRIX) [cC]rawler", "addition_date": "2011/08/02", "url": "https://www.sistrix.com/tutorials/crawling-errors-in-the-optimizer/", "instances": [ "Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/)" ], "description": "SISTRIX SEO tool web crawler for analysis" }, { "pattern": "Ahrefs(Bot|SiteAudit)", "addition_date": "2011/08/28", "instances": [ "Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsSiteAudit/6.1; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsBot/5.2; News; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsSiteAudit/5.2; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsBot/6.1; News; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" ], "description": "Ahrefs SEO tool web crawler for link analysis" }, { "pattern": "fuelbot", "addition_date": "2018/06/28", "instances": [ "fuelbot" ], "description": "Fuel web crawler for content discovery" }, { "pattern": "CrunchBot", "addition_date": "2018/06/28", "instances": [ "CrunchBot/1.0 (+http://www.leadcrunch.com/crunchbot)" ], "description": "LeadCrunch web crawler for lead generation" }, { "pattern": "IndeedBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0 (IndeedBot 1.1)" ], "description": "Indeed job search web crawler bot" }, { "pattern": "mappydata", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Mappy/1.0; +http://mappydata.net/bot/)" ], "description": "Mappy web crawler for mapping data" }, { "pattern": "woobot", "addition_date": "2018/06/28", "instances": [ "woobot" ], "description": "Woo web crawler for content analysis" }, { "pattern": "ZoominfoBot", "addition_date": "2018/06/28", "instances": [ "ZoominfoBot (zoominfobot at zoominfo dot com)" ], "description": "ZoomInfo web crawler for business intelligence" }, { "pattern": "PrivacyAwareBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; PrivacyAwareBot/1.1; +http://www.privacyaware.org)" ], "description": "PrivacyAware web crawler for privacy analysis" }, { "pattern": "Multiviewbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Multiviewbot" ], "description": "Multiview web crawler for content discovery" }, { "pattern": "SWIMGBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36 SWIMGBot" ], "description": "SWIMG web crawler for image discovery" }, { "pattern": "Grobbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Grobbot/2.2; +https://grob.it)" ], "description": "Grob web crawler for content analysis" }, { "pattern": "eright", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; eright/1.0; +bot@eright.com)" ], "description": "Eright web crawler for content discovery" }, { "pattern": "Apercite", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Apercite; +http://www.apercite.fr/robot/index.html)" ], "description": "Apercite web crawler for content analysis" }, { "pattern": "semanticbot", "addition_date": "2018/06/28", "instances": [ "semanticbot", "semanticbot (info@semanticaudience.com)" ], "description": "Semantic web crawler for content analysis" }, { "pattern": "Aboundex", "addition_date": "2011/09/28", "url": "http://www.aboundex.com/crawler/", "instances": [ "Aboundex/0.2 (http://www.aboundex.com/crawler/)", "Aboundex/0.3 (http://www.aboundex.com/crawler/)" ], "description": "Aboundex web crawler for content discovery" }, { "pattern": "domaincrawler", "addition_date": "2011/10/21", "instances": [ "CipaCrawler/3.0 (info@domaincrawler.com; http://www.domaincrawler.com/www.example.com)" ], "description": "Domain Crawler web crawler for analysis" }, { "pattern": "wbsearchbot", "addition_date": "2011/12/21", "url": "http://www.warebay.com/bot.html", "instances": [], "description": "Warebay search web crawler bot" }, { "pattern": "summify", "addition_date": "2012/01/04", "url": "http://summify.com", "instances": [ "Summify (Summify/1.0.1; +http://summify.com)" ], "description": "Summify web crawler for content summarization" }, { "pattern": "CCBot", "addition_date": "2012/02/05", "url": "http://www.commoncrawl.org/bot.html", "instances": [ "CCBot/2.0 (http://commoncrawl.org/faq/)", "CCBot/2.0 (https://commoncrawl.org/faq/)" ], "description": "Common Crawl web crawler for indexing" }, { "pattern": "edisterbot", "addition_date": "2012/02/25", "instances": [], "description": "Edister web crawler for content discovery" }, { "pattern": "SeznamBot", "addition_date": "2012/03/14", "instances": [ "Mozilla/5.0 (compatible; SeznamBot/3.2-test1-1; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/4.0; +http://napoveda.seznam.cz/seznambot-intro/)" ], "description": "Seznam search engine web crawler bot" }, { "pattern": "ec2linkfinder", "addition_date": "2012/03/22", "instances": [ "ec2linkfinder" ], "description": "EC2 link finder web crawler bot" }, { "pattern": "gslfbot", "addition_date": "2012/04/03", "instances": [], "description": "GSLFBOT web crawler for content discovery" }, { "pattern": "aiHitBot", "addition_date": "2012/04/16", "instances": [ "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)" ], "description": "AiHit web crawler for data collection" }, { "pattern": "intelium_bot", "addition_date": "2012/05/07", "instances": [], "description": "Intelium web crawler for content discovery" }, { "pattern": "facebookexternalhit", "addition_date": "2012/05/07", "instances": [ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)", "facebookexternalhit/1.1", "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler/", "description": "Facebook external hit web crawler bot" }, { "pattern": "Yeti", "addition_date": "2012/05/07", "url": "http://naver.me/bot", "instances": [ "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)" ], "description": "Naver Yeti search engine web crawler bot" }, { "pattern": "RetrevoPageAnalyzer", "addition_date": "2012/05/07", "instances": [ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; RetrevoPageAnalyzer; +http://www.retrevo.com/content/about-us)" ], "description": "Retrevo page analyzer web crawler bot" }, { "pattern": "lb-spider", "addition_date": "2012/05/07", "instances": [], "description": "LB spider web crawler for content discovery" }, { "pattern": "Sogou", "addition_date": "2012/05/13", "url": "http://www.sogou.com/docs/help/webmasters.htm#07", "instances": [ "Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)" ], "description": "Sogou search engine web crawler bot" }, { "pattern": "lssbot", "addition_date": "2012/05/15", "url": "https://www.lssbot.com/", "instances": [], "description": "LSS web crawler for content discovery" }, { "pattern": "careerbot", "addition_date": "2012/05/23", "url": "http://www.career-x.de/bot.html", "instances": [], "description": "Career-X web crawler for job discovery" }, { "pattern": "wotbox", "addition_date": "2012/06/12", "url": "http://www.wotbox.com", "instances": [ "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)", "Wotbox/2.01 (+http://www.wotbox.com/bot/)" ], "description": "Wotbox web crawler for content discovery" }, { "pattern": "wocbot", "addition_date": "2012/07/25", "url": "http://www.wocodi.com/crawler", "instances": [], "description": "Wocodi web crawler for content discovery" }, { "pattern": "ichiro", "addition_date": "2012/08/28", "url": "http://help.goo.ne.jp/help/article/1142", "instances": [ "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "DoCoMo/2.0 P900i(c100;TB;W24H11)(compatible; ichiro/mobile goo;+http://help.goo.ne.jp/door/crawler.html)", "DoCoMo/2.0 P901i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/door/crawler.html)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "ichiro/2.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/2.0 (ichiro@nttr.co.jp)", "ichiro/3.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/3.0 (http://help.goo.ne.jp/help/article/1142)", "ichiro/3.0 (http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/5.0 (http://help.goo.ne.jp/door/crawler.html)" ], "description": "Goo ichiro search engine web crawler bot" }, { "pattern": "DuckDuckBot", "addition_date": "2012/09/19", "url": "http://duckduckgo.com/duckduckbot.html", "instances": [ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)", "Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)", "'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)'" ], "description": "DuckDuckGo search engine web crawler bot" }, { "pattern": "lssrocketcrawler", "addition_date": "2012/09/24", "instances": [], "description": "LSS Rocket web crawler for content" }, { "pattern": "drupact", "addition_date": "2012/09/27", "url": "http://www.arocom.de/drupact", "instances": [ "drupact/0.7; http://www.arocom.de/drupact" ], "description": "Drupact web crawler for content discovery" }, { "pattern": "webcompanycrawler", "addition_date": "2012/10/03", "instances": [], "description": "Web Company web crawler for analysis" }, { "pattern": "acoonbot", "addition_date": "2012/10/07", "url": "http://www.acoon.de/robot.asp", "instances": [], "description": "Acoon web crawler for content discovery" }, { "pattern": "openindexspider", "addition_date": "2012/10/26", "url": "http://www.openindex.io/en/webmasters/spider.html", "instances": [], "description": "OpenIndex web crawler for indexing" }, { "pattern": "gnam gnam spider", "addition_date": "2012/10/31", "instances": [], "description": "Gnam Gnam web crawler for discovery" }, { "pattern": "web-archive-net\\.com\\.bot", "instances": [], "description": "Web Archive web crawler for preservation" }, { "pattern": "backlinkcrawler", "addition_date": "2013/01/04", "url": "http://www.backlinktest.com/crawler.html", "instances": [], "description": "Backlink Crawler web crawler for analysis" }, { "pattern": "coccoc", "addition_date": "2013/01/04", "url": "http://help.coccoc.vn/", "instances": [ "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", "coccoc", "coccoc/1.0 ()", "coccoc/1.0 (http://help.coccoc.com/)", "coccoc/1.0 (http://help.coccoc.vn/)" ], "description": "Coccoc search engine web crawler bot" }, { "pattern": "integromedb", "addition_date": "2013/01/10", "url": "http://www.integromedb.org/Crawler", "instances": [ "www.integromedb.org/Crawler" ], "description": "IntegromeDB web crawler for research" }, { "pattern": "content crawler spider", "addition_date": "2013/01/11", "instances": [], "description": "Content Crawler web crawler for discovery" }, { "pattern": "toplistbot", "addition_date": "2013/02/05", "instances": [], "description": "TopList web crawler for ranking" }, { "pattern": "it2media-domain-crawler", "addition_date": "2013/03/12", "instances": [ "it2media-domain-crawler/1.0 on crawler-prod.it2media.de", "it2media-domain-crawler/2.0" ], "description": "IT2Media domain web crawler bot" }, { "pattern": "ip-web-crawler\\.com", "addition_date": "2013/03/22", "instances": [], "description": "IP Web Crawler web crawler bot" }, { "pattern": "siteexplorer\\.info", "addition_date": "2013/05/01", "instances": [ "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +http://siteexplorer.info/)", "Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)" ], "description": "Site Explorer web crawler for analysis" }, { "pattern": "elisabot", "addition_date": "2013/06/27", "instances": [], "description": "Elisa web crawler for content discovery" }, { "pattern": "proximic", "addition_date": "2013/09/12", "url": "http://www.proximic.com/info/spider.php", "instances": [ "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com)", "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)" ], "description": "Proximic web crawler for content analysis" }, { "pattern": "changedetection", "addition_date": "2013/09/13", "url": "http://www.changedetection.com/bot.html", "instances": [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )" ], "description": "ChangeDetection web crawler for monitoring" }, { "pattern": "arabot", "addition_date": "2013/10/09", "instances": [], "description": "Arabot web crawler for content discovery" }, { "pattern": "WeSEE:Search", "addition_date": "2013/11/18", "instances": [ "WeSEE:Search", "WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/)" ], "description": "WeSEE search engine web crawler bot" }, { "pattern": "niki-bot", "addition_date": "2014/01/01", "instances": [], "description": "Niki web crawler for content discovery" }, { "pattern": "CrystalSemanticsBot", "addition_date": "2014/02/17", "url": "http://www.crystalsemantics.com/user-agent/", "instances": [], "description": "Crystal Semantics web crawler bot" }, { "pattern": "rogerbot", "addition_date": "2014/02/28", "url": "http://moz.com/help/pro/what-is-rogerbot-", "instances": [ "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+partager@moz.com)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+shiny@moz.com)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)", "rogerbot/1.0 (http://www.moz.com/dp/rogerbot, rogerbot-crawler@moz.com)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler+shiny@seomoz.org)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler@seomoz.org)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-wherecat@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr2-crawler-05@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-11@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-15@moz.com)", "rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)" ], "description": "Moz RogerBot web crawler for analysis" }, { "pattern": "360Spider", "addition_date": "2014/03/14", "url": "http://needs-be.blogspot.co.uk/2013/02/how-to-block-spider360.html", "instances": [ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 QIHU 360SE; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; ) Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11 360Spider;", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Gecko/20070312 Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36; 360Spider" ], "description": "360 search engine web crawler bot" }, { "pattern": "psbot", "addition_date": "2014/03/31", "url": "http://www.picsearch.com/bot.html", "instances": [ "psbot-image (+http://www.picsearch.com/bot.html)", "psbot-page (+http://www.picsearch.com/bot.html)", "psbot/0.1 (+http://www.picsearch.com/bot.html)" ], "description": "PicSearch web crawler for image discovery" }, { "pattern": "InterfaxScanBot", "addition_date": "2014/03/31", "url": "http://scan-interfax.ru", "instances": [], "description": "Interfax scan web crawler bot" }, { "pattern": "CC Metadata Scaper", "addition_date": "2014/04/01", "url": "http://wiki.creativecommons.org/Metadata_Scraper", "instances": [ "CC Metadata Scaper http://wiki.creativecommons.org/Metadata_Scraper" ], "description": "Creative Commons metadata web crawler" }, { "pattern": "g00g1e\\.net", "addition_date": "2014/04/01", "url": "http://www.g00g1e.net/", "instances": [], "description": "G00g1e web crawler for content discovery" }, { "pattern": "GrapeshotCrawler", "addition_date": "2014/04/01", "url": "http://www.grapeshot.co.uk/crawler.php", "instances": [ "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)" ], "description": "Grapeshot web crawler for content analysis" }, { "pattern": "urlappendbot", "addition_date": "2014/05/10", "url": "http://www.profound.net/urlappendbot.html", "instances": [ "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)" ], "description": "URL Append web crawler for analysis" }, { "pattern": "brainobot", "addition_date": "2014/06/24", "instances": [], "description": "Braino web crawler for content discovery" }, { "pattern": "fr-crawler", "addition_date": "2014/07/31", "instances": [ "Mozilla/5.0 (compatible; fr-crawler/1.1)" ], "description": "FR Crawler web crawler for content discovery" }, { "pattern": "binlar", "addition_date": "2014/09/12", "instances": [ "binlar_2.6.3 binlar2.6.3@unspecified.mail", "binlar_2.6.3 binlar_2.6.3@unspecified.mail", "binlar_2.6.3 larbin2.6.3@unspecified.mail", "binlar_2.6.3 phanendra_kalapala@McAfee.com", "binlar_2.6.3 test@mgmt.mic" ], "description": "Binlar web crawler for content discovery" }, { "pattern": "SimpleCrawler", "addition_date": "2014/09/12", "instances": [ "SimpleCrawler/0.1" ], "description": "Simple Crawler web crawler framework" }, { "pattern": "Twitterbot", "addition_date": "2014/09/12", "url": "https://dev.twitter.com/cards/getting-started", "instances": [ "Twitterbot/0.1", "Twitterbot/1.0" ], "description": "Twitter web crawler for link previews" }, { "pattern": "cXensebot", "addition_date": "2014/10/05", "instances": [ "cXensebot/1.1a" ], "url": "http://www.cxense.com/bot.html", "description": "CXense web crawler for content analysis" }, { "pattern": "smtbot", "addition_date": "2014/10/04", "instances": [ "Mozilla/5.0 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)", "SMTBot (similartech.com/smtbot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36 (compatible; SMTBot/1.0; http://www.similartech.com/smtbot)" ], "url": "http://www.similartech.com/smtbot", "description": "SimilarTech web crawler for technology detection" }, { "pattern": "bnf\\.fr_bot", "addition_date": "2014/11/18", "url": "http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html", "instances": [ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)", "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)" ], "description": "BNF French National Library web crawler" }, { "pattern": "A6-Indexer", "addition_date": "2014/12/05", "url": "http://www.a6corp.com/a6-web-scraping-policy/", "instances": [ "A6-Indexer" ], "description": "A6 Corporation web crawler for indexing" }, { "pattern": "ADmantX", "addition_date": "2014/12/05", "url": "http://www.admantx.com", "instances": [ "ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com" ], "description": "ADmantX semantic analyzer web crawler" }, { "pattern": "Facebot", "url": "https://developers.facebook.com/docs/sharing/best-practices#crawl", "addition_date": "2014/12/30", "instances": [ "Facebot/1.0" ], "description": "Facebook's web crawler for social sharing" }, { "pattern": "OrangeBot\\/", "instances": [ "Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com" ], "addition_date": "2015/01/12", "description": "Orange search engine web crawler bot" }, { "pattern": "memorybot", "url": "http://mignify.com/bot.htm", "instances": [ "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)" ], "addition_date": "2015/02/01", "description": "Mignify memory web crawler bot" }, { "pattern": "AdvBot", "url": "http://advbot.net/bot.html", "instances": [ "Mozilla/5.0 (compatible; AdvBot/2.0; +http://advbot.net/bot.html)" ], "addition_date": "2015/02/01", "description": "AdvBot web crawler for advertising analysis" }, { "pattern": "MegaIndex", "url": "https://www.megaindex.ru/?tab=linkAnalyze", "instances": [ "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +https://www.megaindex.ru/?tab=linkAnalyze)", "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)" ], "addition_date": "2015/03/28", "description": "MegaIndex SEO tool web crawler for analysis" }, { "pattern": "SemanticScholarBot", "url": "https://www.semanticscholar.org/crawler", "instances": [ "SemanticScholarBot/1.0 (+http://s2.allenai.org/bot.html)", "Mozilla/5.0 (compatible) SemanticScholarBot (+https://www.semanticscholar.org/crawler)" ], "addition_date": "2015/03/28", "description": "Semantic Scholar web crawler for academic content" }, { "pattern": "ltx71", "url": "http://ltx71.com/", "instances": [ "ltx71 - (http://ltx71.com/)" ], "addition_date": "2015/04/04", "description": "LTX71 web crawler for content discovery" }, { "pattern": "nerdybot", "url": "http://nerdybot.com/", "instances": [ "nerdybot" ], "addition_date": "2015/04/05", "description": "NerdyBot web crawler for content analysis" }, { "pattern": "xovibot", "url": "http://www.xovibot.net/", "instances": [ "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)" ], "addition_date": "2015/04/05", "description": "Xovi SEO tool web crawler for analysis" }, { "pattern": "BUbiNG", "url": "http://law.di.unimi.it/BUbiNG.html", "instances": [ "BUbiNG (+http://law.di.unimi.it/BUbiNG.html)" ], "addition_date": "2015/04/06", "description": "BUbiNG web crawler framework for research" }, { "pattern": "Qwantify", "url": "https://www.qwant.com/", "instances": [ "Mozilla/5.0 (compatible; Qwantify/2.0n; +https://www.qwant.com/)/*", "Mozilla/5.0 (compatible; Qwantify/2.4w; +https://www.qwant.com/)/2.4w", "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.1; +https://help.qwant.com/bot)", "Mozilla/5.0 (compatible; Qwantify/Bleriot/1.2.1; +https://help.qwant.com/bot)" ], "addition_date": "2015/04/06", "description": "Qwant search engine web crawler bot" }, { "pattern": "archive\\.org_bot", "url": "http://www.archive.org/details/archive.org_bot", "depends_on": [ "heritrix" ], "instances": [ "Mozilla/5.0 (compatible; heritrix/3.1.1-SNAPSHOT-20120116.200628 +http://www.archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)", "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; special_archiver/3.1.1 +http://www.archive.org/details/archive.org_bot)" ], "addition_date": "2015/04/14", "description": "Archive.org web crawler for preservation" }, { "pattern": "Applebot", "url": "http://www.apple.com/go/applebot", "addition_date": "2015/04/15", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)" ], "description": "Apple's web crawler for Siri and search" }, { "pattern": "TweetmemeBot", "url": "http://datasift.com/bot.html", "instances": [ "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0" ], "addition_date": "2015/04/15", "description": "TweetMeme web crawler for social content" }, { "pattern": "crawler4j", "url": "https://github.com/yasserg/crawler4j", "instances": [ "crawler4j (http://code.google.com/p/crawler4j/)", "crawler4j (https://github.com/yasserg/crawler4j/)" ], "addition_date": "2015/05/07", "description": "Crawler4j Java web crawler framework" }, { "pattern": "findxbot", "url": "http://www.findxbot.com", "instances": [ "Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)" ], "addition_date": "2015/05/07", "description": "FindX web crawler for content discovery" }, { "pattern": "S[eE][mM]rushBot", "url": "http://www.semrush.com/bot.html", "instances": [ "Mozilla/5.0 (compatible; SemrushBot-SA/0.97; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot-SI/0.97; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot/3~bl; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot/0.98~bl; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot-BA; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot/6~bl; +http://www.semrush.com/bot.html)", "Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)", "SEMrushBot" ], "addition_date": "2015/05/26", "description": "Semrush SEO tool web crawler for analysis" }, { "pattern": "yoozBot", "url": "http://yooz.ir", "instances": [ "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)" ], "addition_date": "2015/05/26", "description": "Yooz web crawler for content analysis" }, { "pattern": "lipperhey", "url": "http://www.lipperhey.com/", "instances": [ "Mozilla/5.0 (compatible; Lipperhey Link Explorer; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey SEO Service; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey Site Explorer; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey-Kaus-Australis/5.0; +https://www.lipperhey.com/en/about/)" ], "addition_date": "2015/08/26", "description": "Lipperhey SEO tool web crawler for analysis" }, { "pattern": "Y!J", "url": "https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/%E3%82%A6%E3%82%A7%E3%83%96%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AB%E3%82%A2%E3%82%AF%E3%82%BB%E3%82%B9%E3%81%99%E3%82%8B%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%81%AE%E3%83%A6%E3%83%BC%E3%82%B6%E3%83%BC%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6", "instances": [ "Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)", "Y!J-BRJ/YATS crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Y!J-PSC/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Y!J-BRW/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Mozilla/5.0 (iPhone; Y!J-BRY/YATSH crawler; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Mozilla/5.0 (compatible; Y!J SearchMonkey/1.0 (Y!J-AGENT; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html))" ], "addition_date": "2015/05/26", "description": "Yahoo Japan search engine web crawler bot" }, { "pattern": "Domain Re-Animator Bot", "url": "http://domainreanimator.com", "instances": [ "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com" ], "addition_date": "2015/04/14", "description": "Domain Re-Animator web crawler for domain analysis" }, { "pattern": "AddThis", "url": "https://www.addthis.com", "instances": [ "AddThis.com robot tech.support@clearspring.com" ], "addition_date": "2015/06/02", "description": "AddThis social sharing web crawler bot" }, { "pattern": "Screaming Frog SEO Spider", "url": "http://www.screamingfrog.co.uk/seo-spider", "instances": [ "Screaming Frog SEO Spider/5.1" ], "addition_date": "2016/01/08", "description": "Screaming Frog SEO tool web crawler" }, { "pattern": "MetaURI", "url": "http://www.useragentstring.com/MetaURI_id_17683.php", "instances": [ "MetaURI API/2.0 +metauri.com" ], "addition_date": "2016/01/02", "description": "MetaURI API web crawler for metadata" }, { "pattern": "Scrapy", "url": "http://scrapy.org/", "instances": [ "Scrapy/1.0.3 (+http://scrapy.org)" ], "addition_date": "2016/01/02", "description": "Scrapy Python web crawler framework" }, { "pattern": "Livelap[bB]ot", "url": "http://site.livelap.com/crawler", "instances": [ "LivelapBot/0.2 (http://site.livelap.com/crawler)", "Livelapbot/0.1" ], "addition_date": "2016/01/02", "description": "Livelap web crawler for content discovery" }, { "pattern": "OpenHoseBot", "url": "http://www.openhose.org/bot.html", "instances": [ "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)" ], "addition_date": "2016/01/02", "description": "OpenHose web crawler for content analysis" }, { "pattern": "CapsuleChecker", "url": "http://www.capsulink.com/about", "instances": [ "CapsuleChecker (http://www.capsulink.com/)" ], "addition_date": "2016/01/02", "description": "Capsule web crawler for link checking" }, { "pattern": "collection@infegy\\.com", "url": "http://infegy.com/", "instances": [ "Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com" ], "addition_date": "2016/01/03", "description": "Infegy web crawler for social listening" }, { "pattern": "IstellaBot", "url": "http://www.tiscali.it/", "instances": [ "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)" ], "addition_date": "2016/01/09", "description": "Istella web crawler for search indexing" }, { "pattern": "DeuSu\\/", "addition_date": "2016/01/23", "url": "https://deusu.de/robot.html", "instances": [ "Mozilla/5.0 (compatible; DeuSu/0.1.0; +https://deusu.org)", "Mozilla/5.0 (compatible; DeuSu/5.0.2; +https://deusu.de/robot.html)" ], "description": "DeuSu web crawler for search indexing" }, { "pattern": "betaBot", "addition_date": "2016/01/23", "instances": [], "description": "BetaBot web crawler for testing" }, { "pattern": "Cliqzbot\\/", "addition_date": "2016/01/23", "url": "http://cliqz.com/company/cliqzbot", "instances": [ "Mozilla/5.0 (compatible; Cliqzbot/2.0; +http://cliqz.com/company/cliqzbot)", "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)" ], "description": "Cliqz search engine web crawler bot" }, { "pattern": "MojeekBot\\/", "addition_date": "2016/01/23", "url": "https://www.mojeek.com/bot.html", "instances": [ "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)" ], "description": "Mojeek search engine web crawler bot" }, { "pattern": "netEstate NE Crawler", "addition_date": "2016/01/23", "url": "http://www.website-datenbank.de/", "instances": [ "netEstate NE Crawler (+http://www.sengine.info/)", "netEstate NE Crawler (+http://www.website-datenbank.de/)" ], "description": "NetEstate web crawler for domain analysis" }, { "pattern": "SafeSearch microdata crawler", "addition_date": "2016/01/23", "url": "https://safesearch.avira.com", "instances": [ "SafeSearch microdata crawler (https://safesearch.avira.com, safesearch-abuse@avira.com)" ], "description": "Avira SafeSearch web crawler for safety" }, { "pattern": "Gluten Free Crawler\\/", "addition_date": "2016/01/23", "url": "http://glutenfreepleasure.com/", "instances": [ "Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)" ], "description": "Gluten Free Pleasure web crawler bot" }, { "pattern": "Sonic", "addition_date": "2016/02/08", "url": "http://www.yama.info.waseda.ac.jp/~crawler/info.html", "instances": [ "Mozilla/5.0 (compatible; RankSonicSiteAuditor/1.0; +https://ranksonic.com/ranksonic_sab.html)", "Mozilla/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)", "Mozzila/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)" ], "description": "Sonic web crawler for ranking analysis" }, { "pattern": "Sysomos", "addition_date": "2016/02/08", "url": "http://www.sysomos.com", "instances": [ "Mozilla/5.0 (compatible; Sysomos/1.0; +http://www.sysomos.com/; Sysomos)" ], "description": "Sysomos web crawler for social media analysis" }, { "pattern": "Trove", "addition_date": "2016/02/08", "url": "http://www.trove.com", "instances": [], "description": "Trove web crawler for content discovery" }, { "pattern": "deadlinkchecker", "addition_date": "2016/02/08", "url": "http://www.deadlinkchecker.com", "instances": [ "www.deadlinkchecker.com Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36", "www.deadlinkchecker.com XMLHTTP/1.0", "www.deadlinkchecker.com XMLHTTP/1.0 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36" ], "description": "Dead Link Checker web crawler for link validation" }, { "pattern": "Slack-ImgProxy", "addition_date": "2016/04/25", "url": "https://api.slack.com/robots", "instances": [ "Slack-ImgProxy (+https://api.slack.com/robots)", "Slack-ImgProxy 0.59 (+https://api.slack.com/robots)", "Slack-ImgProxy 0.66 (+https://api.slack.com/robots)", "Slack-ImgProxy 1.106 (+https://api.slack.com/robots)", "Slack-ImgProxy 1.138 (+https://api.slack.com/robots)", "Slack-ImgProxy 149 (+https://api.slack.com/robots)" ], "description": "Slack's image proxy web crawler bot" }, { "pattern": "Embedly", "addition_date": "2016/04/25", "url": "http://support.embed.ly", "instances": [ "Embedly +support@embed.ly", "Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/)", "Mozilla/5.0 (compatible; Embedly/0.2; snap; +http://support.embed.ly/)" ], "description": "Embedly web crawler for content embedding" }, { "pattern": "RankActiveLinkBot", "addition_date": "2016/06/20", "url": "https://rankactive.com/resources/rankactive-linkbot", "instances": [ "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)" ], "description": "RankActive web crawler for link analysis" }, { "pattern": "iskanie", "addition_date": "2016/09/02", "url": "http://www.iskanie.com", "instances": [ "iskanie (+http://www.iskanie.com)" ], "description": "Iskanie web crawler for content discovery" }, { "pattern": "SafeDNSBot", "addition_date": "2016/09/10", "url": "https://www.safedns.com/searchbot", "instances": [ "SafeDNSBot (https://www.safedns.com/searchbot)" ], "description": "SafeDNS web crawler for security analysis" }, { "pattern": "SkypeUriPreview", "addition_date": "2016/10/10", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5" ], "description": "Skype's URI preview web crawler bot" }, { "pattern": "Veoozbot", "addition_date": "2016/11/03", "url": "http://www.veooz.com/veoozbot.html", "instances": [ "Mozilla/5.0 (compatible; Veoozbot/1.0; +http://www.veooz.com/veoozbot.html)" ], "description": "Veooz web crawler for marketing analysis" }, { "pattern": "Slackbot", "addition_date": "2016/11/03", "url": "https://api.slack.com/robots", "instances": [ "Slackbot-LinkExpanding (+https://api.slack.com/robots)", "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", "Slackbot 1.0 (+https://api.slack.com/robots)" ], "description": "Slack's link expansion web crawler bot" }, { "pattern": "redditbot", "addition_date": "2016/11/03", "url": "http://www.reddit.com/feedback", "instances": [ "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)" ], "description": "Reddit's web crawler for content sharing" }, { "pattern": "datagnionbot", "addition_date": "2016/11/03", "url": "http://www.datagnion.com/bot.html", "instances": [ "datagnionbot (+http://www.datagnion.com/bot.html)" ], "description": "Datagnion web crawler for data analysis" }, { "pattern": "Google-Adwords-Instant", "addition_date": "2016/11/03", "url": "http://www.google.com/adsbot.html", "instances": [ "Google-Adwords-Instant (+http://www.google.com/adsbot.html)" ], "description": "Google AdWords instant web crawler bot" }, { "pattern": "adbeat_bot", "addition_date": "2016/11/04", "instances": [ "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", "adbeat_bot" ], "description": "AdBeat web crawler for advertising analysis" }, { "pattern": "WhatsApp", "addition_date": "2016/11/15", "url": "https://www.whatsapp.com/", "instances": [ "WhatsApp", "WhatsApp/0.3.4479 N", "WhatsApp/0.3.4679 N", "WhatsApp/0.3.4941 N", "WhatsApp/2.12.15/i", "WhatsApp/2.12.16/i", "WhatsApp/2.12.17/i", "WhatsApp/2.12.449 A", "WhatsApp/2.12.453 A", "WhatsApp/2.12.510 A", "WhatsApp/2.12.540 A", "WhatsApp/2.12.548 A", "WhatsApp/2.12.555 A", "WhatsApp/2.12.556 A", "WhatsApp/2.16.1/i", "WhatsApp/2.16.13 A", "WhatsApp/2.16.2/i", "WhatsApp/2.16.42 A", "WhatsApp/2.16.57 A", "WhatsApp/2.19.92 i", "WhatsApp/2.19.175 A", "WhatsApp/2.19.244 A", "WhatsApp/2.19.258 A", "WhatsApp/2.19.308 A", "WhatsApp/2.19.330 A" ], "description": "WhatsApp's web crawler for link previews" }, { "pattern": "contxbot", "addition_date": "2017/02/25", "instances": [ "Mozilla/5.0 (compatible;contxbot/1.0)" ], "description": "Contx web crawler for content discovery" }, { "pattern": "pinterest\\.com\\/bot", "addition_date": "2017/03/03", "instances": [ "Mozilla/5.0 (compatible; Pinterestbot/1.0; +http://www.pinterest.com/bot.html)", "Pinterest/0.2 (+http://www.pinterest.com/bot.html)" ], "url": "http://www.pinterest.com/bot.html", "description": "Pinterest web crawler for content discovery" }, { "pattern": "electricmonk", "addition_date": "2017/03/04", "instances": [ "Mozilla/5.0 (compatible; electricmonk/3.2.0 +https://www.duedil.com/our-crawler/)" ], "url": "https://www.duedil.com/our-crawler/", "description": "DueDil electricmonk web crawler bot" }, { "pattern": "GarlikCrawler", "addition_date": "2017/03/18", "instances": [ "GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)" ], "url": "http://garlik.com/", "description": "Garlik web crawler for content discovery" }, { "pattern": "BingPreview\\/", "addition_date": "2017/04/23", "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b", "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; BingPreview/1.0b) like Gecko", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; WOW64; Trident/6.0; BingPreview/1.0b)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; WOW64; Trident/5.0; BingPreview/1.0b)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 BingPreview/1.0b" ], "description": "Bing preview web crawler bot" }, { "pattern": "vebidoobot", "addition_date": "2017/05/08", "instances": [ "Mozilla/5.0 (compatible; vebidoobot/1.0; +https://blog.vebidoo.de/vebidoobot/" ], "url": "https://blog.vebidoo.de/vebidoobot/", "description": "Vebidoo web crawler for content discovery" }, { "pattern": "FemtosearchBot", "addition_date": "2017/05/16", "instances": [ "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)" ], "url": "http://femtosearch.com", "description": "Femtosearch web crawler for content discovery" }, { "pattern": "Yahoo Link Preview", "addition_date": "2017/06/28", "instances": [ "Mozilla/5.0 (compatible; Yahoo Link Preview; https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)" ], "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html", "description": "Yahoo link preview web crawler bot" }, { "pattern": "MetaJobBot", "addition_date": "2017/08/16", "instances": [ "Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler)" ], "url": "http://www.metajob.de/the/crawler", "description": "MetaJob web crawler for job discovery" }, { "pattern": "DomainStatsBot", "addition_date": "2017/08/16", "instances": [ "DomainStatsBot/1.0 (http://domainstats.io/our-bot)" ], "url": "http://domainstats.io/our-bot", "description": "DomainStats web crawler for domain analysis" }, { "pattern": "mindUpBot", "addition_date": "2017/08/16", "instances": [ "mindUpBot (datenbutler.de)" ], "url": "http://www.datenbutler.de/", "description": "MindUp web crawler for content discovery" }, { "pattern": "Daum\\/", "addition_date": "2017/08/16", "instances": [ "Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)" ], "url": "http://cs.daum.net/faq/15/4118.html?faqId=28966", "description": "Daum search engine web crawler bot" }, { "pattern": "Jugendschutzprogramm-Crawler", "addition_date": "2017/08/16", "instances": [ "Jugendschutzprogramm-Crawler; Info: http://www.jugendschutzprogramm.de" ], "url": "http://www.jugendschutzprogramm.de", "description": "Jugendschutzprogramm web crawler bot" }, { "pattern": "Xenu Link Sleuth", "addition_date": "2017/08/19", "instances": [ "Xenu Link Sleuth/1.3.8" ], "url": "http://home.snafu.de/tilman/xenulink.html", "description": "Xenu link checker web crawler tool" }, { "pattern": "Pcore-HTTP", "addition_date": "2017/08/19", "instances": [ "Pcore-HTTP/v0.40.3", "Pcore-HTTP/v0.44.0" ], "url": "https://bitbucket.org/softvisio/pcore/overview", "description": "Pcore HTTP web crawler library" }, { "pattern": "moatbot", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36 moatbot", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4 moatbot" ], "url": "https://moat.com", "description": "Moat web crawler for advertising analysis" }, { "pattern": "KosmioBot", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36 (compatible; KosmioBot/1.0; +http://kosm.io/bot.html)" ], "url": "http://kosm.io/bot.html", "description": "Kosmio web crawler for content discovery" }, { "pattern": "[pP]ingdom", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)", "Mozilla/5.0 (compatible; pingbot/2.0; +http://www.pingdom.com/)", "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) browser/2020.2.1 Chrome/78.0.3904.130 Electron/7.3.2 Safari/537.36 PingdomTMS/2020.2", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) browser/2020.2.5 Chrome/78.0.3904.130 Electron/7.3.15 Safari/537.36 PingdomTMS/2020.2", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) browser/2020.2.0 Chrome/78.0.3904.130 Electron/7.1.7 Safari/537.36 PingdomTMS/2020.2", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) renderer/2020.2.0 Chrome/78.0.3904.130 Electron/7.1.7 Safari/537.36 PingdomTMS/2020.2", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/61.0.3163.100 Chrome/61.0.3163.100 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; http://www.pingdom.com/)" ], "url": "http://www.pingdom.com", "description": "Pingdom website monitoring web crawler" }, { "pattern": "AppInsights", "addition_date": "2019/03/09", "instances": [ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; AppInsights)" ], "url": "https://docs.microsoft.com/en-us/azure/azure-monitor/app/app-insights-overview", "description": "Microsoft AppInsights web crawler bot" }, { "pattern": "PhantomJS", "addition_date": "2017/09/18", "instances": [ "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1 bl.uk_lddc_renderbot/2.0.0 (+ http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)" ], "url": "http://phantomjs.org/", "description": "PhantomJS headless browser web crawler" }, { "pattern": "Gowikibot", "addition_date": "2017/10/26", "instances": [ "Mozilla/5.0 (compatible; Gowikibot/1.0; +http://www.gowikibot.com)" ], "url": "http://www.gowikibot.com", "description": "GoWiki web crawler for content discovery" }, { "pattern": "PiplBot", "addition_date": "2017/10/30", "instances": [ "PiplBot (+http://www.pipl.com/bot/)", "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)" ], "url": "http://www.pipl.com/bot/", "description": "Pipl web crawler for people search" }, { "pattern": "Discordbot", "addition_date": "2017/09/22", "url": "https://discordapp.com", "instances": [ "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)" ], "description": "Discord web crawler for link previews" }, { "pattern": "TelegramBot", "addition_date": "2017/10/01", "instances": [ "TelegramBot (like TwitterBot)" ], "description": "Telegram web crawler for link previews" }, { "pattern": "Jetslide", "addition_date": "2017/09/27", "url": "http://jetsli.de/crawler", "instances": [ "Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)" ], "description": "Jetslide web crawler for content discovery" }, { "pattern": "newsharecounts", "addition_date": "2017/09/30", "url": "http://newsharecounts.com/crawler", "instances": [ "Mozilla/5.0 (compatible; NewShareCounts.com/1.0; +http://newsharecounts.com/crawler)" ], "description": "NewShareCounts web crawler for sharing" }, { "pattern": "James BOT", "addition_date": "2017/10/12", "url": "http://cognitiveseo.com/bot.html", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html" ], "description": "CognitiveSEO James web crawler bot" }, { "pattern": "Bark[rR]owler", "addition_date": "2017/10/09", "url": "http://www.exensa.com/crawl", "instances": [ "Barkrowler/0.5.1 (experimenting / debugging - sorry for your logs ) http://www.exensa.com/crawl - admin@exensa.com -- based on BuBiNG", "Barkrowler/0.7 (+http://www.exensa.com/crawl)", "BarkRowler/0.7 (+http://www.exensa.com/crawling)", "Barkrowler/0.9 (+http://www.exensa.com/crawl)" ], "description": "Barkrowler web crawler for content discovery" }, { "pattern": "TinEye", "addition_date": "2017/10/14", "url": "http://www.tineye.com/crawler.html", "instances": [ "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", "TinEye/1.1 (http://tineye.com/crawler.html)" ], "description": "TinEye reverse image search web crawler" }, { "pattern": "SocialRankIOBot", "addition_date": "2017/10/19", "url": "http://socialrank.io/about", "instances": [ "SocialRankIOBot; http://socialrank.io/about" ], "description": "SocialRank web crawler for social analysis" }, { "pattern": "trendictionbot", "addition_date": "2017/10/30", "url": "http://www.trendiction.de/bot", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20170101 Firefox/67.0" ], "description": "Trendiction web crawler for trend analysis" }, { "pattern": "Ocarinabot", "addition_date": "2017/09/27", "instances": [ "Ocarinabot" ], "description": "Ocarina web crawler for content discovery" }, { "pattern": "epicbot", "addition_date": "2017/10/31", "url": "http://www.epictions.com/epicbot", "instances": [ "Mozilla/5.0 (compatible; epicbot; +http://www.epictions.com/epicbot)" ], "description": "Epic web crawler for content discovery" }, { "pattern": "Primalbot", "addition_date": "2017/09/27", "url": "https://www.primal.com", "instances": [ "Mozilla/5.0 (compatible; Primalbot; +https://www.primal.com;)" ], "description": "Primal web crawler for content discovery" }, { "pattern": "DuckDuckGo-Favicons-Bot", "addition_date": "2017/10/06", "url": "http://duckduckgo.com", "instances": [ "Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)" ], "description": "DuckDuckGo favicon web crawler bot" }, { "pattern": "GnowitNewsbot", "addition_date": "2017/10/30", "url": "http://www.gnowit.com", "instances": [ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0 / GnowitNewsbot / Contact information at http://www.gnowit.com" ], "description": "Gnowit news web crawler bot" }, { "pattern": "Leikibot", "addition_date": "2017/09/24", "url": "http://www.leiki.com", "instances": [ "Mozilla/5.0 (Windows NT 6.3;compatible; Leikibot/1.0; +http://www.leiki.com)" ], "description": "Leiki web crawler for content discovery" }, { "pattern": "LinkArchiver", "addition_date": "2017/09/24", "url": "https://github.com/thisisparker/linkarchiver", "instances": [ "@LinkArchiver twitter bot" ], "description": "LinkArchiver web crawler for archiving" }, { "pattern": "YaK\\/", "addition_date": "2017/09/25", "url": "http://linkfluence.com", "instances": [ "Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)" ], "description": "Linkfluence YaK web crawler bot" }, { "pattern": "PaperLiBot", "addition_date": "2017/09/25", "url": "http://support.paper.li/entries/20023257-what-is-paper-li", "instances": [ "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)", "Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)" ], "description": "Paper.li web crawler for content curation" }, { "pattern": "Digg Deeper", "addition_date": "2017/09/26", "url": "http://digg.com/about", "instances": [ "Digg Deeper/v1 (http://digg.com/about)" ], "description": "Digg web crawler for content discovery" }, { "pattern": "dcrawl", "addition_date": "2017/09/22", "url": "https://github.com/kgretzky/dcrawl", "instances": [ "dcrawl/1.0" ], "description": "dcrawl web crawler framework" }, { "pattern": "Snacktory", "addition_date": "2017/09/23", "url": "https://github.com/karussell/snacktory", "instances": [ "Mozilla/5.0 (compatible; Snacktory; +https://github.com/karussell/snacktory)" ], "description": "Snacktory web crawler for content extraction" }, { "pattern": "AndersPinkBot", "addition_date": "2017/09/24", "url": "http://anderspink.com/bot.html", "instances": [ "Mozilla/5.0 (compatible; AndersPinkBot/1.0; +http://anderspink.com/bot.html)" ], "description": "AndersPink web crawler for content discovery" }, { "pattern": "Fyrebot", "addition_date": "2017/09/22", "instances": [ "Fyrebot/1.0" ], "description": "Fyre web crawler for content discovery" }, { "pattern": "EveryoneSocialBot", "addition_date": "2017/09/22", "url": "http://everyonesocial.com", "instances": [ "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)" ], "description": "EveryoneSocial web crawler for sharing" }, { "pattern": "Mediatoolkitbot", "addition_date": "2017/10/06", "url": "http://mediatoolkit.com", "instances": [ "Mediatoolkitbot (complaints@mediatoolkit.com)" ], "description": "Mediatoolkit web crawler for monitoring" }, { "pattern": "Luminator-robots", "addition_date": "2017/09/22", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.13 Luminator-robots/2.0" ], "description": "Luminator web crawler for content discovery" }, { "pattern": "ExtLinksBot", "addition_date": "2017/11/02", "url": "https://extlinks.com/Bot.html", "instances": [ "Mozilla/5.0 (compatible; ExtLinksBot/1.5 +https://extlinks.com/Bot.html)" ], "description": "ExtLinks web crawler for link analysis" }, { "pattern": "SurveyBot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.13) Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools)" ], "description": "DomainTools survey web crawler bot" }, { "pattern": "NING\\/", "addition_date": "2017/11/02", "instances": [ "NING/1.0" ], "description": "Ning web crawler for content discovery" }, { "pattern": "okhttp", "addition_date": "2017/11/02", "instances": [ "okhttp/2.5.0", "okhttp/2.7.5", "okhttp/3.2.0", "okhttp/3.5.0", "okhttp/4.1.0" ], "description": "OkHttp Java HTTP client library" }, { "pattern": "Nuzzel", "addition_date": "2017/11/02", "instances": [ "Nuzzel" ], "description": "Nuzzel web crawler for news discovery" }, { "pattern": "omgili", "addition_date": "2017/11/02", "url": "http://omgili.com", "instances": [ "omgili/0.5 +http://omgili.com" ], "description": "Omgili web crawler for content discovery" }, { "pattern": "PocketParser", "addition_date": "2017/11/02", "url": "https://getpocket.com/pocketparser_ua", "instances": [ "PocketParser/2.0 (+https://getpocket.com/pocketparser_ua)" ], "description": "Pocket web crawler for content parsing" }, { "pattern": "YisouSpider", "addition_date": "2017/11/02", "instances": [ "YisouSpider", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36" ], "description": "Yisou search engine web crawler bot" }, { "pattern": "um-LN", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com)" ], "description": "Ubermetrics web crawler for monitoring" }, { "pattern": "ToutiaoSpider", "addition_date": "2017/11/02", "url": "http://web.toutiao.com/media_cooperation/", "instances": [ "Mozilla/5.0 (compatible; ToutiaoSpider/1.0; http://web.toutiao.com/media_cooperation/;)" ], "description": "Toutiao news platform web crawler bot" }, { "pattern": "MuckRack", "addition_date": "2017/11/02", "url": "http://muckrack.com", "instances": [ "Mozilla/5.0 (compatible; MuckRack/1.0; +http://muckrack.com)" ], "description": "MuckRack journalist database web crawler" }, { "pattern": "Jamie's Spider", "addition_date": "2017/11/02", "url": "http://jamiembrown.com/", "instances": [ "Jamie's Spider (http://jamiembrown.com/)" ], "description": "Jamie Brown's personal web crawler bot" }, { "pattern": "AHC\\/", "addition_date": "2017/11/02", "url": "https://github.com/AsyncHttpClient/async-http-client", "instances": [ "AHC/2.0" ], "description": "Async HTTP Client Java library for HTTP requests" }, { "pattern": "NetcraftSurveyAgent", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)" ], "description": "Netcraft survey web crawler bot" }, { "pattern": "Laserlikebot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)" ], "description": "Laserlike web crawler for content discovery" }, { "pattern": "^Apache-HttpClient", "addition_date": "2017/11/02", "instances": [ "Apache-HttpClient/4.2.3 (java 1.5)", "Apache-HttpClient/4.2.5 (java 1.5)", "Apache-HttpClient/4.3.1 (java 1.5)", "Apache-HttpClient/4.3.3 (java 1.5)", "Apache-HttpClient/4.3.5 (java 1.5)", "Apache-HttpClient/4.4.1 (Java/1.8.0_65)", "Apache-HttpClient/4.5.2 (Java/1.8.0_65)", "Apache-HttpClient/4.5.2 (Java/1.8.0_151)", "Apache-HttpClient/4.5.2 (Java/1.8.0_161)", "Apache-HttpClient/4.5.2 (Java/1.8.0_181)", "Apache-HttpClient/4.5.3 (Java/1.8.0_121)", "Apache-HttpClient/4.5.3-SNAPSHOT (Java/1.8.0_152)", "Apache-HttpClient/4.5.7 (Java/11.0.3)", "Apache-HttpClient/4.5.10 (Java/1.8.0_201)" ], "description": "Apache HTTP Client Java library for requests" }, { "pattern": "AppEngine-Google", "addition_date": "2017/11/02", "instances": [ "AppEngine-Google; (+http://code.google.com/appengine; appid: example)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36 AppEngine-Google; (+http://code.google.com/appengine; appid: s~feedly-nikon3)" ], "description": "Google App Engine web crawler bot" }, { "pattern": "Jetty", "addition_date": "2017/11/02", "instances": [ "Jetty/9.3.z-SNAPSHOT" ], "description": "Jetty web server HTTP client library" }, { "pattern": "Upflow", "addition_date": "2017/11/02", "instances": [ "Upflow/1.0" ], "description": "Upflow web crawler for content analysis" }, { "pattern": "Thinklab", "addition_date": "2017/11/02", "url": "thinklab.com", "instances": [ "Thinklab (thinklab.com)" ], "description": "Thinklab web crawler for research" }, { "pattern": "Traackr\\.com", "addition_date": "2017/11/02", "url": "https://www.traackr.com/", "instances": [ "Traackr.com" ], "description": "Traackr influencer marketing web crawler" }, { "pattern": "Twurly", "addition_date": "2017/11/02", "url": "http://twurly.org", "instances": [ "Ruby, Twurly v1.1 (http://twurly.org)" ], "description": "Twurly Ruby web crawler framework" }, { "pattern": "Mastodon", "addition_date": "2017/11/02", "instances": [ "http.rb/2.2.2 (Mastodon/1.5.1; +https://example-masto-instance.org/)" ], "description": "Mastodon social network web crawler bot" }, { "pattern": "http_get", "addition_date": "2017/11/02", "instances": [ "http_get" ], "description": "HTTP GET command-line tool for requests" }, { "pattern": "DnyzBot", "addition_date": "2017/11/20", "instances": [ "Mozilla/5.0 (compatible; DnyzBot/1.0)" ], "description": "Dnyz web crawler for content discovery" }, { "pattern": "botify", "addition_date": "2018/02/01", "instances": [ "Mozilla/5.0 (compatible; botify; http://botify.com)" ], "description": "Botify SEO tool web crawler for analysis" }, { "pattern": "007ac9 Crawler", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)" ], "description": "007ac9 web crawler for content discovery" }, { "pattern": "BehloolBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; BehloolBot/beta; +http://www.webeaver.com/bot)" ], "description": "Behlool web crawler for content analysis" }, { "pattern": "BrandVerity", "addition_date": "2018/02/27", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/55.0 BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11A465 Twitter for iPhone BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)" ], "url": "http://www.brandverity.com/why-is-brandverity-visiting-me", "description": "BrandVerity web crawler for brand monitoring" }, { "pattern": "check_http", "addition_date": "2018/02/09", "instances": [ "check_http/v2.2.1 (nagios-plugins 2.2.1)" ], "description": "Nagios check_http monitoring plugin" }, { "pattern": "BDCbot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" ], "description": "BigDataCorp web crawler for data collection" }, { "pattern": "ZumBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)" ], "description": "Zum search engine web crawler bot" }, { "pattern": "EZID", "addition_date": "2018/02/09", "instances": [ "EZID (EZID link checker; https://ezid.cdlib.org/)" ], "description": "EZID link checker web crawler bot" }, { "pattern": "ICC-Crawler", "addition_date": "2018/02/28", "instances": [ "ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)" ], "url": "http://ucri.nict.go.jp/en/icccrawler.html", "description": "ICC web crawler for language research" }, { "pattern": "ArchiveBot", "addition_date": "2018/02/28", "instances": [ "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)" ], "url": "https://github.com/ArchiveTeam/ArchiveBot", "description": "Archive Team web crawler for preservation" }, { "pattern": "^LCC ", "addition_date": "2018/02/28", "instances": [ "LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html)" ], "url": "http://corpora.informatik.uni-leipzig.de/crawler_faq.html", "description": "Leipzig Corpora Collection web crawler" }, { "pattern": "filterdb\\.iss\\.net\\/crawler", "addition_date": "2018/03/16", "instances": [ "Mozilla/5.0 (compatible; oBot/2.3.1; +http://filterdb.iss.net/crawler/)" ], "url": "http://filterdb.iss.net/crawler/", "description": "ISS filter database web crawler" }, { "pattern": "BLP_bbot", "addition_date": "2018/03/27", "instances": [ "BLP_bbot/0.1" ], "description": "BLP web crawler for content discovery" }, { "pattern": "BomboraBot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)" ], "url": "http://www.bombora.com/bot", "description": "Bombora web crawler for business intelligence" }, { "pattern": "Buck\\/", "addition_date": "2018/03/27", "instances": [ "Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)" ], "url": "https://app.hypefactors.com/media-monitoring/about.html", "description": "Hypefactors media monitoring web crawler" }, { "pattern": "Companybook-Crawler", "addition_date": "2018/03/27", "instances": [ "Companybook-Crawler (+https://www.companybooknetworking.com/)" ], "url": "https://www.companybooknetworking.com/", "description": "Companybook networking web crawler" }, { "pattern": "Genieo", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)" ], "url": "http://www.genieo.com/webfilter.html", "description": "Genieo web filter web crawler bot" }, { "pattern": "magpie-crawler", "addition_date": "2018/03/27", "instances": [ "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)" ], "url": "http://www.brandwatch.net", "description": "Brandwatch magpie web crawler bot" }, { "pattern": "MeltwaterNews", "addition_date": "2018/03/27", "instances": [ "MeltwaterNews www.meltwater.com" ], "url": "http://www.meltwater.com", "description": "Meltwater news monitoring web crawler" }, { "pattern": "Moreover", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 Moreover/5.1 (+http://www.moreover.com)" ], "url": "http://www.moreover.com", "description": "Moreover news aggregation web crawler" }, { "pattern": "newspaper\\/", "addition_date": "2018/03/27", "instances": [ "newspaper/0.1.0.7", "newspaper/0.2.5", "newspaper/0.2.6", "newspaper/0.2.8" ], "description": "Newspaper Python web scraping library" }, { "pattern": "ScoutJet", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)" ], "url": "http://www.scoutjet.com/", "description": "ScoutJet web crawler for content discovery" }, { "pattern": "(^| )sentry\\/", "addition_date": "2018/03/27", "instances": [ "sentry/8.22.0 (https://sentry.io)" ], "url": "https://sentry.io", "description": "Sentry error tracking web crawler bot" }, { "pattern": "StorygizeBot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; StorygizeBot; http://www.storygize.com)" ], "url": "http://www.storygize.com", "description": "Storygize web crawler for content analysis" }, { "pattern": "UptimeRobot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)" ], "url": "http://www.uptimerobot.com/", "description": "UptimeRobot website monitoring web crawler" }, { "pattern": "OutclicksBot", "addition_date": "2018/04/21", "instances": [ "OutclicksBot/2 +https://www.outclicks.net/agent/VjzDygCuk4ubNmg40ZMbFqT0sIh7UfOKk8s8ZMiupUR", "OutclicksBot/2 +https://www.outclicks.net/agent/gIYbZ38dfAuhZkrFVl7sJBFOUhOVct6J1SvxgmBZgCe", "OutclicksBot/2 +https://www.outclicks.net/agent/PryJzTl8POCRHfvEUlRN5FKtZoWDQOBEvFJ2wh6KH5J", "OutclicksBot/2 +https://www.outclicks.net/agent/p2i4sNUh7eylJF1S6SGgRs5mP40ExlYvsr9GBxVQG6h" ], "url": "https://www.outclicks.net", "description": "Outclicks web crawler for link tracking" }, { "pattern": "seoscanners", "addition_date": "2018/05/27", "instances": [ "Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)" ], "url": "https://github.com/monperrus/crawler-user-agents/issues/384#issuecomment-2575367162", "description": "SEO Scanners web crawler for analysis" }, { "pattern": "Hatena", "addition_date": "2018/05/29", "instances": [ "Hatena Antenna/0.3", "Hatena::Russia::Crawler/0.01", "Hatena-Favicon/2 (http://www.hatena.ne.jp/faq/)", "Hatena::Scissors/0.01", "HatenaBookmark/4.0 (Hatena::Bookmark; Analyzer)", "Hatena::Fetcher/0.01 (master) Furl/3.13" ], "description": "Hatena web services web crawler bot" }, { "pattern": "Google Web Preview", "addition_date": "2018/05/31", "instances": [ "Mozilla/5.0 (Linux; U; Android 2.3.4; generic) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Version/4.0 Mobile Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Chrome/27.0.1453 Safari/537.36" ], "description": "Google web preview web crawler bot" }, { "pattern": "MauiBot", "addition_date": "2018/06/06", "instances": [ "MauiBot (crawler.feedback+wc@gmail.com)" ], "description": "Maui web crawler for content discovery" }, { "pattern": "AlphaBot", "addition_date": "2018/05/27", "instances": [ "Mozilla/5.0 (compatible; AlphaBot/3.2; +http://alphaseobot.com/bot.html)" ], "url": "http://alphaseobot.com/bot.html", "description": "AlphaBot SEO web crawler for analysis" }, { "pattern": "SBL-BOT", "addition_date": "2018/06/06", "instances": [ "SBL-BOT (http://sbl.net)" ], "url": "http://sbl.net", "description": "Bot of SoftByte BlackWidow" }, { "pattern": "IAS crawler", "addition_date": "2018/06/06", "instances": [ "IAS crawler (ias_crawler; http://integralads.com/site-indexing-policy/)" ], "url": "http://integralads.com/site-indexing-policy/", "description": "Bot of Integral Ad Science, Inc." }, { "pattern": "adscanner", "addition_date": "2018/06/24", "instances": [ "Mozilla/5.0 (compatible; adscanner/)" ], "description": "AdScanner web crawler for ad analysis" }, { "pattern": "Netvibes", "addition_date": "2018/06/24", "instances": [ "Netvibes (crawler/bot; http://www.netvibes.com", "Netvibes (crawler; http://www.netvibes.com)" ], "url": "http://www.netvibes.com", "description": "Netvibes web crawler for content aggregation" }, { "pattern": "acapbot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible;acapbot/0.1;treat like Googlebot)", "Mozilla/5.0 (compatible;acapbot/0.1.;treat like Googlebot)" ], "description": "ACAP web crawler for content analysis" }, { "pattern": "Baidu-YunGuanCe", "addition_date": "2018/06/27", "instances": [ "Baidu-YunGuanCe-Bot(ce.baidu.com)", "Baidu-YunGuanCe-SLABot(ce.baidu.com)", "Baidu-YunGuanCe-ScanBot(ce.baidu.com)", "Baidu-YunGuanCe-PerfBot(ce.baidu.com)", "Baidu-YunGuanCe-VSBot(ce.baidu.com)" ], "url": "https://ce.baidu.com/topic/topic20150908", "description": "Baidu Cloud Watch" }, { "pattern": "bitlybot", "addition_date": "2018/06/27", "instances": [ "bitlybot/3.0 (+http://bit.ly/)", "bitlybot/2.0", "bitlybot" ], "url": "http://bit.ly/", "description": "Bit.ly web crawler for link tracking" }, { "pattern": "blogmuraBot", "addition_date": "2018/06/27", "instances": [ "blogmuraBot (+http://www.blogmura.com)" ], "url": "http://www.blogmura.com", "description": "A blog ranking site which links to blogs on just about every theme possible." }, { "pattern": "Bot\\.AraTurka\\.com", "addition_date": "2018/06/27", "instances": [ "Bot.AraTurka.com/0.0.1" ], "url": "http://www.araturka.com", "description": "AraTurka web crawler for content discovery" }, { "pattern": "bot-pge\\.chlooe\\.com", "addition_date": "2018/06/27", "instances": [ "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)" ], "description": "Chlooe web crawler for content analysis" }, { "pattern": "BoxcarBot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)" ], "url": "https://boxcar.io/", "description": "Boxcar web crawler for notifications" }, { "pattern": "BTWebClient", "addition_date": "2018/06/27", "instances": [ "BTWebClient/180B(9704)" ], "url": "http://www.utorrent.com/", "description": "µTorrent BitTorrent Client" }, { "pattern": "ContextAd Bot", "addition_date": "2018/06/27", "instances": [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", "ContextAd Bot 1.0" ], "description": "ContextAd web crawler for advertising" }, { "pattern": "Digincore bot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)" ], "url": "http://www.digincore.com/crawler.html", "description": "Digincore web crawler for content analysis" }, { "pattern": "Disqus", "addition_date": "2018/06/27", "instances": [ "Disqus/1.0" ], "url": "https://disqus.com/", "description": "validate and quality check pages." }, { "pattern": "Feedly", "addition_date": "2018/06/27", "instances": [ "Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)", "FeedlyBot/1.0 (http://feedly.com)" ], "url": "https://www.feedly.com/fetcher.html", "description": "Feedly Fetcher is how Feedly grabs RSS or Atom feeds when users choose to add them to their Feedly or any of the other applications built on top of the feedly cloud." }, { "pattern": "Fetch\\/", "addition_date": "2018/06/27", "instances": [ "Fetch/2.0a (CMS Detection/Web/SEO analysis tool, see http://guess.scritch.org)" ], "description": "Fetch web crawler for CMS detection" }, { "pattern": "Fever", "addition_date": "2018/06/27", "instances": [ "Fever/1.38 (Feed Parser; http://feedafever.com; Allow like Gecko)" ], "url": "http://feedafever.com", "description": "Fever feed reader web crawler bot" }, { "pattern": "Flamingo_SearchEngine", "addition_date": "2018/06/27", "instances": [ "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)" ], "description": "Flamingo search engine web crawler bot" }, { "pattern": "FlipboardProxy", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (compatible; FlipboardProxy/1.2; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:28.0) Gecko/20100101 Firefox/28.0 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0 (FlipboardProxy/1.2; +http://flipboard.com/browserproxy)" ], "url": "https://about.flipboard.com/browserproxy/", "description": "a proxy service to fetch, validate, and prepare certain elements of websites for presentation through the Flipboard Application" }, { "pattern": "g2reader-bot", "addition_date": "2018/06/27", "instances": [ "g2reader-bot/1.0 (+http://www.g2reader.com/)" ], "url": "http://www.g2reader.com/", "description": "G2Reader web crawler for content discovery" }, { "pattern": "G2 Web Services", "addition_date": "2019/03/01", "instances": [ "G2 Web Services/1.0 (built with StormCrawler Archetype 1.8; https://www.g2webservices.com/; developers@g2llc.com)" ], "url": "https://www.g2webservices.com/", "description": "G2 web services web crawler bot" }, { "pattern": "imrbot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; imrbot/1.10.8 +http://www.mignify.com)" ], "url": "http://www.mignify.com", "description": "Mignify imrbot web crawler bot" }, { "pattern": "K7MLWCBot", "addition_date": "2018/06/27", "instances": [ "K7MLWCBot/1.0 (+http://www.k7computing.com)" ], "url": "http://www.k7computing.com", "description": "Virus scanner" }, { "pattern": "Kemvibot", "addition_date": "2018/06/27", "instances": [ "Kemvibot/1.0 (http://kemvi.com, marco@kemvi.com)" ], "url": "http://kemvi.com", "description": "Kemvi web crawler for content discovery" }, { "pattern": "Landau-Media-Spider", "addition_date": "2018/06/27", "instances": [ "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)" ], "url": "http://bots.landaumedia.de/bot.html", "description": "Landau Media web crawler bot" }, { "pattern": "linkapediabot", "addition_date": "2018/06/27", "instances": [ "linkapediabot (+http://www.linkapedia.com)" ], "url": "http://www.linkapedia.com", "description": "Linkapedia web crawler for link discovery" }, { "pattern": "vkShare", "addition_date": "2018/07/02", "instances": [ "Mozilla/5.0 (compatible; vkShare; +http://vk.com/dev/Share)" ], "url": "http://vk.com/dev/Share", "description": "VK Share web crawler for social sharing" }, { "pattern": "Siteimprove\\.com", "addition_date": "2018/06/22", "instances": [ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) LinkCheck by Siteimprove.com", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) Match by Siteimprove.com", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) SiteCheck-sitecrawl by Siteimprove.com", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) LinkCheck by Siteimprove.com" ], "description": "Siteimprove web crawler for site analysis" }, { "pattern": "BLEXBot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)" ], "url": "http://webmeup-crawler.com", "description": "WebMeUp BLEX web crawler bot" }, { "pattern": "DareBoost", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36 DareBoost" ], "url": "https://www.dareboost.com/", "description": "Bot to test, Analyze and Optimize website" }, { "pattern": "ZuperlistBot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; ZuperlistBot/1.0)" ], "description": "Zuperlist web crawler for content discovery" }, { "pattern": "Miniflux\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; Miniflux/2.0.x-dev; +https://miniflux.net)", "Mozilla/5.0 (compatible; Miniflux/2.0.3; +https://miniflux.net)", "Mozilla/5.0 (compatible; Miniflux/2.0.7; +https://miniflux.net)", "Mozilla/5.0 (compatible; Miniflux/2.0.10; +https://miniflux.net)", "Mozilla/5.0 (compatibl$; Miniflux/2.0.x-dev; +https://miniflux.app)", "Mozilla/5.0 (compatible; Miniflux/2.0.11; +https://miniflux.app)", "Mozilla/5.0 (compatible; Miniflux/2.0.12; +https://miniflux.app)", "Mozilla/5.0 (compatible; Miniflux/ae1dc1a; +https://miniflux.app)", "Mozilla/5.0 (compatible; Miniflux/3b6e44c; +https://miniflux.app)" ], "url": "https://miniflux.net", "description": "Miniflux is a minimalist and opinionated feed reader." }, { "pattern": "Feedspot", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)", "Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher; like FeedFetcher-Google)" ], "url": "http://www.feedspot.com/fs/bot", "description": "Feedspot web crawler for feed discovery" }, { "pattern": "Diffbot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)" ], "url": "http://www.diffbot.com", "description": "Diffbot web crawler for content extraction" }, { "pattern": "SEOkicks", "addition_date": "2018/08/22", "instances": [ "Mozilla/5.0 (compatible; SEOkicks; +https://www.seokicks.de/robot.html)" ], "url": "https://www.seokicks.de/robot.html", "description": "SEOkicks web crawler for SEO analysis" }, { "pattern": "tracemyfile", "addition_date": "2018/08/23", "instances": [ "Mozilla/5.0 (compatible; tracemyfile/1.0; +bot@tracemyfile.com)" ], "description": "TraceMyFile web crawler for file tracking" }, { "pattern": "Nimbostratus-Bot", "addition_date": "2018/08/29", "instances": [ "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)" ], "description": "Nimbostratus web crawler for cloud analysis" }, { "pattern": "zgrab", "addition_date": "2018/08/30", "instances": [ "Mozilla/5.0 zgrab/0.x" ], "url": "https://github.com/zmap/zgrab2", "description": "Zgrab web crawler for security scanning" }, { "pattern": "PR-CY\\.RU", "addition_date": "2018/08/30", "instances": [ "Mozilla/5.0 (compatible; PR-CY.RU; + https://a.pr-cy.ru)" ], "url": "https://a.pr-cy.ru/", "description": "PR-CY web crawler for SEO analysis" }, { "pattern": "AdsTxtCrawler", "addition_date": "2018/08/30", "instances": [ "AdsTxtCrawler/1.0" ], "description": "AdsTxt web crawler for ads.txt validation" }, { "pattern": "Datafeedwatch", "addition_date": "2018/09/05", "instances": [ "Datafeedwatch/2.1.x" ], "url": "https://www.datafeedwatch.com/", "description": "Datafeedwatch web crawler for feed analysis" }, { "pattern": "Zabbix", "addition_date": "2018/09/05", "instances": [ "Zabbix" ], "url": "https://www.zabbix.com/documentation/3.4/manual/web_monitoring", "description": "Zabbix web crawler for monitoring" }, { "pattern": "TangibleeBot", "addition_date": "2018/09/05", "instances": [ "TangibleeBot/1.0.0.0 (http://tangiblee.com/bot)" ], "url": "http://tangiblee.com/bot", "description": "Tangiblee web crawler for visual search" }, { "pattern": "google-xrawler", "addition_date": "2018/09/05", "instances": [ "google-xrawler" ], "url": "https://webmasters.stackexchange.com/questions/105560/what-is-the-google-xrawler-user-agent-used-for", "description": "Google xrawler web crawler bot" }, { "pattern": "axios", "addition_date": "2018/09/06", "instances": [ "axios/0.18.0", "axios/0.19.0" ], "url": "https://github.com/axios/axios", "description": "Axios HTTP client library for requests" }, { "pattern": "Amazon CloudFront", "addition_date": "2018/09/07", "instances": [ "Amazon CloudFront" ], "url": "https://aws.amazon.com/cloudfront/", "description": "Amazon CloudFront web crawler bot" }, { "pattern": "Pulsepoint", "addition_date": "2018/09/24", "instances": [ "Pulsepoint XT3 web scraper" ], "description": "Pulsepoint web crawler for content discovery" }, { "pattern": "CloudFlare-AlwaysOnline", "addition_date": "2018/09/27", "instances": [ "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +http://www.cloudflare.com/always-online) AppleWebKit/534.34", "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +https://www.cloudflare.com/always-online) AppleWebKit/534.34" ], "url": "https://www.cloudflare.com/always-online/", "description": "CloudFlare always online web crawler" }, { "pattern": "Cloudflare-Healthchecks", "addition_date": "2024/12/17", "instances": [ "Mozilla/5.0 (compatible; Cloudflare-Healthchecks/1.0; +https://www.cloudflare.com/; healthcheck-id: AAAAAAAAAAAAAAAA)" ], "url": "https://developers.cloudflare.com/health-checks/", "description": "CloudFlare health checks web crawler" }, { "pattern": "Cloudflare-Traffic-Manager", "addition_date": "2024/12/17", "instances": [ "Mozilla/5.0 (compatible; Cloudflare-Traffic-Manager/1.0; +https://www.cloudflare.com/traffic-manager/; pool-id: AAAAAAAAAAAAAAAA)" ], "url": "https://developers.cloudflare.com/load-balancing/monitors/", "description": "CloudFlare traffic manager web crawler" }, { "pattern": "CloudFlare-Prefetch", "addition_date": "2024/12/17", "instances": [ "Mozilla/5.0 (compatible; CloudFlare-Prefetch/0.1; +http://www.cloudflare.com/)" ], "url": "https://developers.cloudflare.com/speed/optimization/content/prefetch-urls/", "description": "CloudFlare prefetch web crawler bot" }, { "pattern": "Cloudflare-SSLDetector", "addition_date": "2024/12/17", "instances": [ "Cloudflare-SSLDetector" ], "url": "https://developers.cloudflare.com/ssl/origin-configuration/ssl-tls-recommender/", "description": "CloudFlare SSL detector web crawler" }, { "pattern": "https:\\/\\/developers\\.cloudflare\\.com\\/security-center\\/", "addition_date": "2024/12/17", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 (compatible; +https://developers.cloudflare.com/security-center/)" ], "url": "https://developers.cloudflare.com/ssl/origin-configuration/ssl-tls-recommender/", "description": "CloudFlare security center web crawler" }, { "pattern": "Google-Structured-Data-Testing-Tool", "addition_date": "2018/10/02", "instances": [ "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool)", "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +http://developers.google.com/structured-data/testing-tool/)" ], "url": "https://search.google.com/structured-data/testing-tool", "description": "Google structured data testing web crawler" }, { "pattern": "WordupInfoSearch", "addition_date": "2018/10/07", "instances": [ "WordupInfoSearch/1.0" ], "description": "Wordup info search web crawler bot" }, { "pattern": "WebDataStats", "addition_date": "2018/10/08", "instances": [ "Mozilla/5.0 (compatible; WebDataStats/1.0 ; +https://webdatastats.com/policy.html)" ], "url": "https://webdatastats.com/", "description": "WebDataStats web crawler for data analysis" }, { "pattern": "HttpUrlConnection", "addition_date": "2018/10/08", "instances": [ "Jersey/2.25.1 (HttpUrlConnection 1.8.0_141)" ], "description": "Java HttpUrlConnection HTTP client" }, { "pattern": "ZoomBot", "addition_date": "2018/10/10", "instances": [ "ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)" ], "url": "http://suite.seozoom.it/bot.html", "description": "SEOZoom web crawler for SEO analysis" }, { "pattern": "VelenPublicWebCrawler", "addition_date": "2018/10/09", "url": "https://velen.io/", "instances": [ "VelenPublicWebCrawler (velen.io)" ], "description": "Velen web crawler for content discovery" }, { "pattern": "MoodleBot", "addition_date": "2018/10/10", "instances": [ "MoodleBot/1.0" ], "description": "Moodle web crawler for learning platforms" }, { "pattern": "jpg-newsbot", "addition_date": "2018/10/10", "instances": [ "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)" ], "url": "https://vipnytt.no/bots/", "description": "JPG news web crawler bot" }, { "pattern": "outbrain", "addition_date": "2018/10/14", "instances": [ "Mozilla/5.0 (Java) outbrain" ], "url": "https://www.outbrain.com/help/advertisers/invalid-url/", "description": "Outbrain web crawler for content discovery" }, { "pattern": "W3C_Validator", "addition_date": "2018/10/14", "instances": [ "W3C_Validator/1.3" ], "url": "https://validator.w3.org/services", "description": "W3C HTML validator web crawler bot" }, { "pattern": "Validator\\.nu", "addition_date": "2018/10/14", "instances": [ "Validator.nu/LV" ], "url": "https://validator.w3.org/services", "description": "Validator.nu HTML validator web crawler" }, { "pattern": "W3C-checklink", "addition_date": "2018/10/14", "depends_on": [ "libwww-perl" ], "instances": [ "W3C-checklink/2.90 libwww-perl/5.64", "W3C-checklink/3.6.2.3 libwww-perl/5.64", "W3C-checklink/4.2 [4.20] libwww-perl/5.803", "W3C-checklink/4.2.1 [4.21] libwww-perl/5.803", "W3C-checklink/4.3 [4.42] libwww-perl/5.805", "W3C-checklink/4.3 [4.42] libwww-perl/5.808", "W3C-checklink/4.3 [4.42] libwww-perl/5.820", "W3C-checklink/4.5 [4.154] libwww-perl/5.823", "W3C-checklink/4.5 [4.160] libwww-perl/5.823" ], "url": "https://validator.w3.org/services", "description": "W3C checklink web crawler for link validation" }, { "pattern": "W3C-mobileOK", "addition_date": "2018/10/14", "instances": [ "W3C-mobileOK/DDC-1.0" ], "url": "https://validator.w3.org/services", "description": "W3C mobile OK web crawler bot" }, { "pattern": "W3C_I18n-Checker", "addition_date": "2018/10/14", "instances": [ "W3C_I18n-Checker/1.0" ], "url": "https://validator.w3.org/services", "description": "W3C internationalization web crawler" }, { "pattern": "FeedValidator", "addition_date": "2018/10/14", "instances": [ "FeedValidator/1.3" ], "url": "https://validator.w3.org/services", "description": "Feed validator web crawler for RSS validation" }, { "pattern": "W3C_CSS_Validator", "addition_date": "2018/10/14", "instances": [ "Jigsaw/2.3.0 W3C_CSS_Validator_JFouffa/2.0" ], "url": "https://validator.w3.org/services", "description": "W3C CSS validator web crawler bot" }, { "pattern": "W3C_Unicorn", "addition_date": "2018/10/14", "instances": [ "W3C_Unicorn/1.0" ], "url": "https://validator.w3.org/services", "description": "W3C Unicorn validator web crawler bot" }, { "pattern": "Google-PhysicalWeb", "addition_date": "2018/10/21", "instances": [ "Mozilla/5.0 (Google-PhysicalWeb)" ], "description": "Google physical web web crawler bot" }, { "pattern": "Blackboard", "addition_date": "2018/10/28", "instances": [ "Blackboard Safeassign" ], "url": "https://help.blackboard.com/Learn/Administrator/Hosting/Tools_Management/SafeAssign", "description": "Blackboard SafeAssign web crawler bot" }, { "pattern": "ICBot\\/", "addition_date": "2018/10/23", "instances": [ "Mozilla/5.0 (compatible; ICBot/0.1; +https://ideasandcode.xyz" ], "url": "https://ideasandcode.xyz", "description": "Ideas and Code web crawler bot" }, { "pattern": "BazQux", "addition_date": "2018/10/23", "instances": [ "Mozilla/5.0 (compatible; BazQux/2.4; +https://bazqux.com/fetcher; 1 subscribers)" ], "url": "https://bazqux.com/fetcher", "description": "BazQux RSS reader web crawler bot" }, { "pattern": "Twingly", "addition_date": "2018/10/23", "instances": [ "Mozilla/5.0 (compatible; Twingly Recon; twingly.com)" ], "url": "https://twingly.com", "description": "Twingly blog search web crawler bot" }, { "pattern": "Rivva", "addition_date": "2018/10/23", "instances": [ "Mozilla/5.0 (compatible; Rivva; http://rivva.de)" ], "url": "http://rivva.de", "description": "Rivva blog search web crawler bot" }, { "pattern": "Experibot", "addition_date": "2018/11/03", "instances": [ "Experibot-v2 http://goo.gl/ZAr8wX", "Experibot-v3 http://goo.gl/ZAr8wX" ], "url": "https://amirkr.wixsite.com/experibot", "description": "Experibot web crawler for testing" }, { "pattern": "awesomecrawler", "addition_date": "2018/11/24", "instances": [ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.5 Safari/537.22 +awesomecrawler" ], "description": "Awesome web crawler for content discovery" }, { "pattern": "Dataprovider\\.com", "addition_date": "2018/11/24", "instances": [ "Mozilla/5.0 (compatible; Dataprovider.com)" ], "url": "https://www.dataprovider.com/", "description": "Dataprovider web crawler for data collection" }, { "pattern": "GroupHigh\\/", "addition_date": "2018/11/24", "instances": [ "Mozilla/5.0 (compatible; GroupHigh/1.0; +http://www.grouphigh.com/" ], "url": "http://www.grouphigh.com/", "description": "GroupHigh web crawler for influencer marketing" }, { "pattern": "theoldreader\\.com", "addition_date": "2018/12/02", "instances": [ "Mozilla/5.0 (compatible; theoldreader.com)" ], "url": "https://www.theoldreader.com/", "description": "The Old Reader web crawler bot" }, { "pattern": "AnyEvent", "addition_date": "2018/12/07", "instances": [ "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/2.24; +http://software.schmorp.de/pkg/AnyEvent)" ], "url": "http://software.schmorp.de/pkg/AnyEvent.html", "description": "AnyEvent Perl HTTP web crawler library" }, { "pattern": "Uptimebot\\.org", "addition_date": "2019/01/17", "instances": [ "Uptimebot.org - Free website monitoring" ], "url": "http://uptimebot.org/", "description": "Uptimebot website monitoring web crawler" }, { "pattern": "Nmap Scripting Engine", "addition_date": "2019/02/04", "instances": [ "Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)" ], "url": "https://nmap.org/book/nse.html", "description": "Nmap NSE web crawler for security scanning" }, { "pattern": "2ip\\.ru", "addition_date": "2019/02/12", "instances": [ "2ip.ru CMS Detector (https://2ip.ru/cms/)" ], "url": "https://2ip.ru/cms/", "description": "2IP CMS detector web crawler bot" }, { "pattern": "Clickagy", "addition_date": "2019/02/19", "instances": [ "Clickagy Intelligence Bot v2" ], "url": "https://www.clickagy.com", "description": "Clickagy intelligence web crawler bot" }, { "pattern": "Caliperbot", "addition_date": "2019/03/02", "instances": [ "Caliperbot/1.0 (+http://www.conductor.com/caliperbot)" ], "url": "http://www.conductor.com/caliperbot", "description": "Conductor Caliperbot web crawler for analysis" }, { "pattern": "MBCrawler", "addition_date": "2019/03/02", "instances": [ "MBCrawler/1.0 (https://monitorbacklinks.com)" ], "url": "https://monitorbacklinks.com", "description": "Monitor Backlinks web crawler bot" }, { "pattern": "online-webceo-bot", "addition_date": "2019/03/02", "instances": [ "Mozilla/5.0 (compatible; online-webceo-bot/1.0; +http://online.webceo.com)" ], "url": "http://online.webceo.com", "description": "Online WebCEO web crawler for analysis" }, { "pattern": "B2B Bot", "addition_date": "2019/03/02", "instances": [ "B2B Bot" ], "description": "B2B web crawler for business discovery" }, { "pattern": "AddSearchBot", "addition_date": "2019/03/02", "instances": [ "Mozilla/5.0 (compatible; AddSearchBot/0.9; +http://www.addsearch.com/bot; info@addsearch.com)" ], "url": "http://www.addsearch.com/bot", "description": "AddSearch web crawler for site search" }, { "pattern": "Google Favicon", "addition_date": "2019/03/14", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon" ], "description": "Google favicon web crawler bot" }, { "pattern": "HubSpot", "addition_date": "2019/04/15", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36 HubSpot Webcrawler - web-crawlers@hubspot.com", "Mozilla/5.0 (X11; Linux x86_64; HubSpot Single Page link check; web-crawlers+links@hubspot.com) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", "Mozilla/5.0 (compatible; HubSpot Crawler; web-crawlers@hubspot.com)", "HubSpot Connect 2.0 (http://dev.hubspot.com/) - BizOpsCompanies-Tq2-BizCoDomainValidationAudit" ], "description": "HubSpot web crawler for marketing automation" }, { "pattern": "Chrome-Lighthouse", "addition_date": "2019/03/15", "instances": [ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Mobile Safari/537.36 Chrome-Lighthouse", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Safari/537.36 Chrome-Lighthouse", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3694.0 Safari/537.36 Chrome-Lighthouse", "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4695.0 Mobile Safari/537.36 Chrome-Lighthouse", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3694.0 Mobile Safari/537.36 Chrome-Lighthouse" ], "url": "https://developers.google.com/speed/pagespeed/insights", "description": "Chrome Lighthouse web crawler for audits" }, { "pattern": "HeadlessChrome", "url": "https://developers.google.com/web/updates/2017/04/headless-chrome", "addition_date": "2019/06/17", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/74.0.3729.169 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/69.0.3494.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/76.0.3803.0 Safari/537.36" ], "description": "Headless Chrome web crawler for testing" }, { "pattern": "CheckMarkNetwork\\/", "addition_date": "2019/06/30", "instances": [ "CheckMarkNetwork/1.0 (+http://www.checkmarknetwork.com/spider.html)" ], "url": "https://www.checkmarknetwork.com/", "description": "CheckMark Network web crawler bot" }, { "pattern": "www\\.uptime\\.com", "addition_date": "2019/07/21", "instances": [ "Mozilla/5.0 (compatible; Uptimebot/1.0; +http://www.uptime.com/uptimebot)" ], "url": "http://www.uptime.com/uptimebot", "description": "Uptime.com website monitoring web crawler" }, { "pattern": "Streamline3Bot\\/", "addition_date": "2019/07/21", "instances": [ "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1) Streamline3Bot/1.0", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; +https://www.ubtsupport.com/legal/Streamline3Bot.php) Streamline3Bot/1.0" ], "url": "https://www.ubtsupport.com/legal/Streamline3Bot.php", "description": "Streamline3 web crawler for monitoring" }, { "pattern": "serpstatbot\\/", "addition_date": "2019/07/25", "instances": [ "serpstatbot/1.0 (advanced backlink tracking bot; http://serpstatbot.com/; abuse@serpstatbot.com)", "serpstatbot/1.0 (advanced backlink tracking bot; curl/7.58.0; http://serpstatbot.com/; abuse@serpstatbot.com)" ], "url": "http://serpstatbot.com", "description": "Serpstat web crawler for backlink tracking" }, { "pattern": "MixnodeCache\\/", "addition_date": "2019/08/04", "instances": [ "MixnodeCache/1.8(+https://cache.mixnode.com/)" ], "url": "https://cache.mixnode.com/", "description": "Mixnode cache web crawler bot" }, { "pattern": "^curl", "addition_date": "2019/08/15", "instances": [ "curl", "curl/7.29.0", "curl/7.47.0", "curl/7.54.0", "curl/7.55.1", "curl/7.64.0", "curl/7.64.1", "curl/7.65.3" ], "url": "https://curl.haxx.se/", "description": "cURL command-line tool for HTTP requests" }, { "pattern": "SimpleScraper", "addition_date": "2019/08/16", "instances": [ "Mozilla/5.0 (compatible; SimpleScraper)" ], "url": "https://github.com/ramonkcom/simple-scraper/", "description": "Simple Scraper web scraping tool" }, { "pattern": "RSSingBot", "addition_date": "2019/09/15", "instances": [ "RSSingBot (http://www.rssing.com)" ], "url": "http://www.rssing.com", "description": "RSSing RSS reader web crawler bot" }, { "pattern": "Jooblebot", "addition_date": "2019/09/25", "instances": [ "Mozilla/5.0 (compatible; Jooblebot/2.0; Windows NT 6.1; WOW64; +http://jooble.org/jooble-bot) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36" ], "url": "http://jooble.org/jooble-bot", "description": "Jooble job search web crawler bot" }, { "pattern": "fedoraplanet", "addition_date": "2019/09/28", "instances": [ "venus/fedoraplanet" ], "url": "http://fedoraplanet.org/", "description": "Fedora Planet web crawler for aggregation" }, { "pattern": "Friendica", "addition_date": "2019/09/28", "instances": [ "Friendica 'The Tazmans Flax-lily' 2019.01-1293; https://hoyer.xyz" ], "url": "https://hoyer.xyz", "description": "Friendica social network web crawler bot" }, { "pattern": "NextCloud", "addition_date": "2019/09/30", "instances": [ "NextCloud-News/1.0" ], "url": "https://nextcloud.com/", "description": "NextCloud news reader web crawler bot" }, { "pattern": "Tiny Tiny RSS", "addition_date": "2019/10/04", "instances": [ "Tiny Tiny RSS/1.15.3 (http://tt-rss.org/)", "Tiny Tiny RSS/17.12 (a2d1fa5) (http://tt-rss.org/)", "Tiny Tiny RSS/19.2 (b68db2d) (http://tt-rss.org/)", "Tiny Tiny RSS/19.8 (http://tt-rss.org/)" ], "url": "http://tt-rss.org/", "description": "Tiny Tiny RSS reader web crawler bot" }, { "pattern": "RegionStuttgartBot", "addition_date": "2019/10/17", "instances": [ "Mozilla/5.0 (compatible; RegionStuttgartBot/1.0; +http://it.region-stuttgart.de/competenzatlas/unternehmen-suchen/)" ], "url": "http://it.region-stuttgart.de/competenzatlas/unternehmen-suchen/", "description": "Region Stuttgart web crawler bot" }, { "pattern": "Bytespider", "addition_date": "2019/11/11", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.3754.1902 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.4454.1745 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.7597.1164 Mobile Safari/537.36; Bytespider;bytespider@bytedance.com", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2988.1545 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4141.1682 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.3478.1649 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.5267.1259 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.7990.1979 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.2268.1523 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2576.1836 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.9681.1227 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.6023.1635 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.4944.1981 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.3613.1739 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.4022.1033 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.3248.1547 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.5527.1507 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.5216.1326 Mobile Safari/537.36; Bytespider", "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.9038.1080 Mobile Safari/537.36; Bytespider" ], "url": "https://stackoverflow.com/questions/57908900/what-is-the-bytespider-user-agent", "description": "ByteDance Bytespider web crawler bot" }, { "pattern": "Datanyze", "addition_date": "2019/11/17", "instances": [ "Mozilla/5.0 (X11; Datanyze; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" ], "url": "https://www.datanyze.com/dnyzbot/", "description": "Datanyze web crawler for technology detection" }, { "pattern": "Google-Site-Verification", "addition_date": "2019/12/11", "instances": [ "Mozilla/5.0 (compatible; Google-Site-Verification/1.0)" ], "url": "https://support.google.com/webmasters/answer/9008080", "description": "Google site verification web crawler bot" }, { "pattern": "TrendsmapResolver", "addition_date": "2020/02/24", "instances": [ "Mozilla/5.0 (compatible; TrendsmapResolver/0.1)" ], "url": "https://www.trendsmap.com/", "description": "Trendsmap web crawler for trend analysis" }, { "pattern": "tweetedtimes", "addition_date": "2020/02/24", "instances": [ "Mozilla/5.0 (compatible; +http://tweetedtimes.com)" ], "url": "https://tweetedtimes.com/", "description": "Tweeted Times web crawler for news" }, { "pattern": "NTENTbot", "addition_date": "2020/02/24", "instances": [ "Mozilla/5.0 (compatible; NTENTbot; +http://www.ntent.com/ntentbot)" ], "url": "https://ntent.com/ntentbot/", "description": "NTENT web crawler for search results" }, { "pattern": "Gwene", "addition_date": "2020/02/24", "instances": [ "Gwene/1.0 (The gwene.org rss-to-news gateway) Googlebot" ], "url": "https://gwene.org", "description": "Gwene RSS to news gateway web crawler" }, { "pattern": "SimplePie", "addition_date": "2020/02/24", "instances": [ "SimplePie/1.3-dev (Feed Parser; http://simplepie.org; Allow like Gecko)" ], "url": "http://simplepie.org", "description": "SimplePie PHP feed parser web crawler" }, { "pattern": "SearchAtlas", "addition_date": "2020/03/02", "instances": [ "SearchAtlas.com SEO Crawler" ], "url": "http://SearchAtlas.com", "description": "SearchAtlas SEO web crawler for analysis" }, { "pattern": "Superfeedr", "addition_date": "2020/03/02", "instances": [ "Superfeedr bot/2.0 http://superfeedr.com - Make your feeds realtime: get in touch - feed-id:1162088860" ], "url": "http://superfeedr.com", "description": "Superfeedr feed reader web crawler bot" }, { "pattern": "feedbot", "addition_date": "2020/03/02", "instances": [ "wp.com feedbot/1.0 (+https://wp.com)" ], "url": "http://wp.com", "description": "WordPress.com feed web crawler bot" }, { "pattern": "UT-Dorkbot", "addition_date": "2020/03/02", "instances": [ "UT-Dorkbot/1.0" ], "url": "https://security.utexas.edu/dorkbot", "description": "University of Texas security web crawler" }, { "pattern": "Amazonbot", "addition_date": "2020/03/02", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" ], "url": "https://developer.amazon.com/support/amazonbot", "description": "Amazon web crawler for product discovery" }, { "pattern": "AmazonProductDiscovery", "addition_date": "2025/12/22", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 (compatible; AmazonProductDiscovery/1.0; https://vendorcentral.amazon.com/support/amazonproductbot)", "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 (compatible; AmazonProductDiscovery/1.0; https://vendorcentral.amazon.com/support/amazonproductbot)" ], "url": "https://vendorcentral.amazon.com/support/amazonproductbot", "description": "Amazon product discovery web crawler" }, { "pattern": "AmazonSellerInitiatedListing", "addition_date": "2025/12/22", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 (compatible; AmazonSellerInitiatedListing/1.0; https://vendorcentral.amazon.com/support/amazonproductbot)" ], "url": "https://vendorcentral.amazon.com/support/amazonproductbot", "description": "Amazon seller listing web crawler" }, { "pattern": "SerendeputyBot", "addition_date": "2020/03/02", "instances": [ "SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)" ], "url": "http://serendeputy.com/about/serendeputy-bot", "description": "Serendeputy web crawler for content discovery" }, { "pattern": "Eyeotabot", "addition_date": "2020/03/02", "instances": [ "Mozilla/5.0 (compatible; Eyeotabot/1.0; +http://www.eyeota.com)" ], "url": "http://www.eyeota.com", "description": "Eyeota web crawler for audience data" }, { "pattern": "officestorebot", "addition_date": "2020/03/02", "instances": [ "Mozilla/5.0 (compatible; officestorebot/1.0; +https://aka.ms/officestorebot)" ], "url": "https://aka.ms/officestorebot", "description": "Microsoft Office Store web crawler bot" }, { "pattern": "Neticle Crawler", "addition_date": "2020/03/02", "instances": [ "Neticle Crawler v1.0 ( https://neticle.com/bot/en/ )" ], "url": "https://neticle.com/bot/en/", "description": "Neticle web crawler for content analysis" }, { "pattern": "SurdotlyBot", "addition_date": "2020/03/02", "instances": [ "Mozilla/5.0 (compatible; SurdotlyBot/1.0; +http://sur.ly/bot.html; Linux; Android 4; iPhone; CPU iPhone OS 6_0_1 like Mac OS X)" ], "url": "http://sur.ly/bot.html", "description": "Surly web crawler for link shortening" }, { "pattern": "LinkisBot", "addition_date": "2020/03/02", "instances": [ "Mozilla/5.0 (compatible; LinkisBot/1.0; bot@linkis.com) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321" ], "description": "Linkis web crawler for link sharing" }, { "pattern": "AwarioSmartBot", "addition_date": "2020/03/02", "instances": [ "AwarioSmartBot/1.0 (+https://awario.com/bots.html; bots@awario.com)" ], "url": "https://awario.com/bots.html", "description": "Awario smart web crawler bot" }, { "pattern": "AwarioRssBot", "addition_date": "2020/03/02", "instances": [ "AwarioRssBot/1.0 (+https://awario.com/bots.html; bots@awario.com)" ], "url": "https://awario.com/bots.html", "description": "Awario RSS feed web crawler bot" }, { "pattern": "RyteBot", "addition_date": "2020/03/02", "instances": [ "RyteBot/1.0.0 (+https://bot.ryte.com/)" ], "url": "https://bot.ryte.com/", "description": "Ryte web crawler for site analysis" }, { "pattern": "FreeWebMonitoring SiteChecker", "addition_date": "2020/03/02", "instances": [ "FreeWebMonitoring SiteChecker/0.2 (+https://www.freewebmonitoring.com/bot.html)" ], "url": "https://www.freewebmonitoring.com/bot.html", "description": "FreeWebMonitoring site checker web crawler" }, { "pattern": "AspiegelBot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; AspiegelBot)" ], "url": "https://aspiegel.com", "description": "Aspiegel web crawler for content discovery" }, { "pattern": "NAVER Blog Rssbot", "addition_date": "2020/03/16", "instances": [ "NAVER Blog Rssbot" ], "url": "http://www.naver.com", "description": "Naver blog RSS web crawler bot" }, { "pattern": "zenback bot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; zenback bot; powered by logly +http://corp.logly.co.jp/)" ], "url": "http://corp.logly.co.jp/", "description": "Zenback web crawler for content discovery" }, { "pattern": "SentiBot", "addition_date": "2020/03/16", "instances": [ "SentiBot www.sentibot.eu (compatible with Googlebot)" ], "url": "https://sites.google.com/senti1.com/sentibot-eu/home", "description": "SentiBot web crawler for sentiment analysis" }, { "pattern": "Domains Project\\/", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; Domains Project/1.0.3; +https://github.com/tb0hdan/domains)" ], "url": "https://github.com/tb0hdan/domains", "description": "Domains Project web crawler bot" }, { "pattern": "Pandalytics", "addition_date": "2020/03/16", "instances": [ "Pandalytics/1.0 (https://domainsbot.com/pandalytics/)" ], "url": "https://domainsbot.com/pandalytics/", "description": "Pandalytics web crawler for domain analysis" }, { "pattern": "VKRobot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; VKRobot/1.0)" ], "description": "VK social network web crawler bot" }, { "pattern": "bidswitchbot", "addition_date": "2020/03/16", "instances": [ "bidswitchbot/1.0" ], "url": "https://www.bidswitch.com/about-us/", "description": "BidSwitch web crawler for advertising" }, { "pattern": "tigerbot", "addition_date": "2020/03/16", "instances": [ "tigerbot" ], "description": "Tiger web crawler for content discovery" }, { "pattern": "NIXStatsbot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; NIXStatsbot/1.1; +http://www.nixstats.com/bot.html)" ], "url": "http://www.nixstats.com/bot.html", "description": "NIXStats web crawler for monitoring" }, { "pattern": "Atom Feed Robot", "addition_date": "2020/03/16", "instances": [ "RSSMicro.com RSS/Atom Feed Robot" ], "url": "https://rssmicro.com", "description": "RSSMicro atom feed web crawler bot" }, { "pattern": "[Cc]urebot", "addition_date": "2020/03/16", "instances": [ "Curebot/1.0", "curebot-feed-fetcher" ], "description": "Cure web crawler for content discovery" }, { "pattern": "PagePeeker\\/", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36 (compatible; PagePeeker/3.0; +https://pagepeeker.com/robots/)" ], "url": "https://pagepeeker.com/robots/", "description": "PagePeeker web crawler for screenshots" }, { "pattern": "Vigil\\/", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; Vigil/1.0; +http://vigil-app.com/bot.html)" ], "url": "http://vigil-app.com/bot.html", "description": "Vigil web crawler for monitoring" }, { "pattern": "rssbot\\/", "addition_date": "2020/03/16", "instances": [ "rssbot/1.4.3 (+https://t.me/RustRssBot)" ], "url": "https://github.com/iovxw/rssbot", "description": "RSS bot web crawler for feeds" }, { "pattern": "startmebot\\/", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; startmebot/1.0; +https://start.me/bot)" ], "url": "https://start.me/bot", "description": "Start.me web crawler for content discovery" }, { "pattern": "JobboerseBot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (X11; U; Linux Core i7-4980HQ; de; rv:32.0; compatible; JobboerseBot; http://www.jobboerse.com/bot.htm) Gecko/20100101 Firefox/38.0" ], "url": "http://www.jobboerse.com/bot.htm", "description": "Jobboerse web crawler for job discovery" }, { "pattern": "seewithkids", "addition_date": "2020/03/16", "instances": [ "http://seewithkids.com/bot" ], "url": "http://seewithkids.com/bot", "description": "SeeWithKids web crawler for content discovery" }, { "pattern": "NINJA bot", "addition_date": "2020/03/16", "instances": [ "NINJA bot" ], "description": "NINJA web crawler for content discovery" }, { "pattern": "Cutbot", "addition_date": "2020/03/16", "instances": [ "Cutbot; 1.5; http://cutbot.net/" ], "url": "http://cutbot.net/", "description": "Cutbot web crawler for content discovery" }, { "pattern": "BublupBot", "addition_date": "2020/03/16", "instances": [ "BublupBot (+https://www.bublup.com/bublup-bot.html)" ], "url": "https://www.bublup.com/bublup-bot.html", "description": "Bublup web crawler for content discovery" }, { "pattern": "BrandONbot", "addition_date": "2020/03/16", "instances": [ "BrandONbot (http://brandonmedia.net)" ], "url": "http://brandonmedia.net", "description": "BrandON web crawler for brand monitoring" }, { "pattern": "RidderBot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; RidderBot/1.0; bot@ridder.co)", "Mozilla/5.0 (compatible; RidderBot/1.0; bot@ridder.co) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321" ], "url": "https://ridder.co/", "description": "Ridder web crawler for content discovery" }, { "pattern": "Taboolabot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; Taboolabot/3.7; +http://www.taboola.com)" ], "url": "http://www.taboola.com", "description": "Taboola web crawler for content discovery" }, { "pattern": "Dubbotbot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; Dubbotbot/0.2; +http://dubbot.com)" ], "url": "http://dubbot.com", "description": "Dubbot web crawler for content discovery" }, { "pattern": "FindITAnswersbot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible;FindITAnswersbot/1.0;+http://search.it-influentials.com/bot.htm)" ], "url": "http://search.it-influentials.com/bot.htm", "description": "FindITAnswers web crawler for discovery" }, { "pattern": "infoobot", "addition_date": "2020/03/16", "instances": [ "infoobot/0.1 (https://www.infoo.nl/bot.html)" ], "url": "https://www.infoo.nl/bot.html", "description": "Infoo web crawler for content discovery" }, { "pattern": "Refindbot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36 (Refindbot/1.0)" ], "url": "https://refind.com/about", "description": "Refind web crawler for content discovery" }, { "pattern": "BlogTraffic\\/\\d\\.\\d+ Feed-Fetcher", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; BlogTraffic/1.4 Feed-Fetcher; +http://www.blogtraffic.de/rss-bot.html)" ], "url": "http://www.blogtraffic.de/rss-bot.html", "description": "BlogTraffic feed fetcher web crawler" }, { "pattern": "SeobilityBot", "addition_date": "2020/03/16", "instances": [ "SeobilityBot (SEO Tool; https://www.seobility.net/sites/bot.html)" ], "url": "https://www.seobility.net/sites/bot.html", "description": "Seobility web crawler for SEO analysis" }, { "pattern": "Cincraw", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; Cincraw/1.0; +http://cincrawdata.net/bot/)" ], "url": "http://cincrawdata.net/bot/", "description": "Cincraw web crawler for data collection" }, { "pattern": "Dragonbot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (Windows NT 6.1; rv:34.0) Gecko/20100101 Firefox/34.0; Dragonbot; http://www.dragonmetrics.com" ], "url": "http://www.dragonmetrics.com", "description": "DragonMetrics web crawler for analysis" }, { "pattern": "VoluumDSP-content-bot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; VoluumDSP-content-bot/2.0; +dsp-dev@codewise.com)" ], "url": "https://codewise.com", "description": "Voluum DSP web crawler for advertising" }, { "pattern": "FreshRSS", "addition_date": "2020/03/16", "instances": [ "FreshRSS/1.11.2 (Linux; https://freshrss.org) like Googlebot" ], "url": "https://freshrss.org", "description": "FreshRSS feed reader web crawler bot" }, { "pattern": "BitBot", "addition_date": "2020/03/16", "instances": [ "Mozilla/5.0 (compatible; BitBot/v1.19.0; +https://bitbot.dev)" ], "url": "https://bitbot.dev", "description": "BitBot web crawler for content discovery" }, { "pattern": "^PHP-Curl-Class", "addition_date": "2020/12/10", "instances": [ "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.2.24 curl/7.61.1", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.3.19 curl/7.66.0", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.3.23 curl/7.66.0", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.4.7 curl/7.69.1", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.4.9 curl/7.69.1", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.4.10 curl/7.69.1", "PHP-Curl-Class/4.13.0 (+https://github.com/php-curl-class/php-curl-class) PHP/7.4.11 curl/7.69.1" ], "url": "https://github.com/php-curl-class/php-curl-class", "description": "PHP Curl Class HTTP client library" }, { "pattern": "Google-Certificates-Bridge", "addition_date": "2020/12/23", "instances": [ "Google-Certificates-Bridge" ], "description": "Google certificates bridge web crawler" }, { "pattern": "centurybot", "addition_date": "2022/04/26", "instances": [ "Mozilla/5.0 (compatible; Go-http-client/1.1; +centurybot9@gmail.com)" ], "description": "Century web crawler for content discovery" }, { "pattern": "Viber", "addition_date": "2021/04/27", "instances": [ "Viber" ], "url": "https://www.viber.com/", "description": "Viber messaging web crawler bot" }, { "pattern": "e\\.ventures Investment Crawler", "addition_date": "2021/06/05", "url": "https://www.eventures.vc/", "instances": [ "e.ventures Investment Crawler (eventures.vc)" ], "description": "E.ventures investment web crawler bot" }, { "pattern": "evc-batch", "addition_date": "2021/06/07", "url": "https://www.eventures.vc/", "instances": [ "Mozilla/5.0 (compatible; evc-batch/2.0)" ], "description": "E.ventures batch web crawler bot" }, { "pattern": "PetalBot", "addition_date": "2021/06/07", "instances": [ "Mozilla/5.0 (compatible;PetalBot;+https://webmaster.petalsearch.com/site/petalbot)", "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)" ], "url": "https://webmaster.petalsearch.com/site/petalbot", "description": "Petal search engine web crawler bot" }, { "pattern": "virustotal", "addition_date": "2021/09/22", "instances": [ "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US) AppEngine-Google; (+http://code.google.com/appengine; appid: s~virustotalcloud)", "AppEngine-Google; (+http://code.google.com/appengine; appid: s~virustotalcloud)" ], "url": "https://www.virustotal.com/gui/home/url", "description": "VirusTotal web crawler for security" }, { "pattern": "(^| )PTST\\/", "addition_date": "2021/12/05", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36 PTST/211202.211915", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0 PTST/211202.211915" ], "url": "https://www.webpagetest.org", "description": "WebPageTest web crawler for testing" }, { "pattern": "minicrawler", "addition_date": "2022/01/12", "instances": [ "Testomatobot/1.0 (Linux x86_64; +https://www.testomato.com/testomatobot) minicrawler/5.2.2" ], "url": "https://www.testomato.com/bot", "description": "Testomato web crawler for testing" }, { "pattern": "Cookiebot", "addition_date": "2022/01/23", "url": "https://www.cookiebot.com/", "instances": [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko; compatible; Cookiebot/1.0; +http://cookiebot.com/) Chrome/97.0.4692.71 Safari/537.36" ], "description": "Cookiebot web crawler for cookie scanning" }, { "pattern": "trovitBot", "addition_date": "2022/06/08", "url": "http://www.trovit.com/bot.html", "instances": [ "Mozilla/5.0 (compatible; trovitBot 1.0; +http://www.trovit.com/bot.html)" ], "description": "Trovit web crawler for content discovery" }, { "pattern": "seostar\\.co", "addition_date": "2022/08/04", "url": "https://seostar.co/robot/", "instances": [ "Mozilla/5.0 (compatible; Adsbot/3.1; +https://seostar.co/robot/)" ], "description": "SEOstar web crawler for SEO analysis" }, { "pattern": "IonCrawl", "addition_date": "2022/08/04", "url": "https://www.ionos.de/terms-gtc/faq-crawler-en", "instances": [ "IonCrawl (https://www.ionos.de/terms-gtc/faq-crawler-en/)" ], "description": "IONOS IonCrawl web crawler bot" }, { "pattern": "Uptime-Kuma", "addition_date": "2022/10/17", "url": "https://uptime.kuma.pet/", "instances": [ "Uptime-Kuma/1.23.16", "Uptime-Kuma/1.23.15", "Uptime-Kuma/1.23.14", "Uptime-Kuma/1.23.13", "Uptime-Kuma/1.23.12", "Uptime-Kuma/1.23.11", "Uptime-Kuma/1.23.10", "Uptime-Kuma/1.18.0" ], "description": "Uptime Kuma web crawler for monitoring" }, { "pattern": "Seekport", "addition_date": "2022/10/17", "url": "https://bot.seekport.com", "instances": [ "Mozilla/5.0 (compatible; SeekportBot; +https://bot.seekport.com)", "Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/)" ], "description": "Seekport web crawler for content discovery" }, { "pattern": "FreshpingBot", "addition_date": "2022/10/17", "url": "https://www.freshworks.com/website-monitoring/", "instances": [ "FreshpingBot/1.0 (+https://freshping.io/)" ], "description": "Freshping web crawler for monitoring" }, { "pattern": "Feedbin", "addition_date": "2022/11/05", "url": "https://feedbin.com/", "instances": [ "Feedbin feed-id:2005098 - 2 subscribers" ], "description": "Feedbin feed reader web crawler bot" }, { "pattern": "CriteoBot", "addition_date": "2022/11/13", "url": "https://www.criteo.com/", "instances": [ "CriteoBot/0.1 (+https://www.criteo.com/criteo-crawler/)" ], "description": "Criteo web crawler for advertising" }, { "pattern": "Snap URL Preview Service", "addition_date": "2022/11/13", "url": "https://snap.com/", "instances": [ "Snap URL Preview Service; bot; snapchat; https://developers.snap.com/robots" ], "description": "Snapchat URL preview web crawler" }, { "pattern": "Better Uptime Bot", "addition_date": "2022/11/13", "url": "https://betteruptime.com/", "instances": [ "Better Uptime Bot Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" ], "description": "Better Uptime web crawler for monitoring" }, { "pattern": "RuxitSynthetic", "addition_date": "2023/02/16", "url": "https://www.dynatrace.com/support/help/platform-modules/digital-experience/synthetic-monitoring/browser-monitors/configure-browser-monitors#expand--default-user-agent", "instances": [ "RuxitSynthetic/1.0" ], "description": "Dynatrace Ruxit synthetic web crawler" }, { "pattern": "Google-Read-Aloud", "addition_date": "2023/02/16", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36 (compatible; Google-Read-Aloud; +https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers)", "Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36 (compatible; Google-Read-Aloud; +https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers)" ], "description": "Google read aloud web crawler bot" }, { "pattern": "Valve\\/Steam", "addition_date": "2023/05/24", "instances": [ "Valve/Steam HTTP Client 1.0 (SteamChatURLLookup)" ], "description": "Steam web crawler for link previews" }, { "pattern": "OdklBot\\/", "addition_date": "2023/05/24", "instances": [ "OdklBot/1.0 (share@odnoklassniki.ru)", "Mozilla/5.0 (compatible; OdklBot/1.0 like Linux; klass@odnoklassniki.ru)" ], "url": "https://odnoklassniki.ru/", "description": "Odnoklassniki web crawler for sharing" }, { "pattern": "GPTBot", "addition_date": "2023/08/09", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)" ], "url": "https://platform.openai.com/docs/gptbot", "description": "OpenAI GPT web crawler bot" }, { "pattern": "ChatGPT-User", "addition_date": "2024/04/19", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot" ], "url": "https://openai.com/bot", "description": "ChatGPT user web crawler bot" }, { "pattern": "OAI-SearchBot", "addition_date": "2024/09/24", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot" ], "url": "https://platform.openai.com/docs/bots", "description": "OpenAI search web crawler bot" }, { "pattern": "YandexRenderResourcesBot\\/", "addition_date": "2023/08/16", "instances": [ "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0" ], "url": "http://yandex.com/bots", "description": "Yandex render resources web crawler" }, { "pattern": "LightspeedSystemsCrawler", "addition_date": "2023/08/16", "instances": [ "LightspeedSystemsCrawler Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US" ], "description": "Lightspeed Systems web crawler bot" }, { "pattern": "ev-crawler\\/", "addition_date": "2023/08/16", "instances": [ "Mozilla/5.0 (compatible; ev-crawler/1.0; +https://headline.com/legal/crawler)" ], "url": "https://headline.com/legal/crawler", "description": "Headline web crawler for content discovery" }, { "pattern": "BitSightBot\\/", "addition_date": "2023/08/16", "instances": [ "Mozilla/5.0 (compatible; BitSightBot/1.0)" ], "url": "https://www.bitsight.com", "description": "BitSight web crawler for security" }, { "pattern": "woorankreview\\/", "addition_date": "2023/08/16", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1 (compatible; woorankreview/2.0; +https://www.woorank.com/)", "Mozilla/5.0 (compatible; woorankreview/2.0; +https://www.woorank.com/)" ], "url": "https://www.woorank.com/", "description": "WooRank web crawler for SEO analysis" }, { "pattern": "Google-Safety", "addition_date": "2023/08/17", "instances": [ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.179 Mobile Safari/537.36 (compatible; Google-Safety; +http://www.google.com/bot.html)", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.179 Safari/537.36 (compatible; Google-Safety; +http://www.google.com/bot.html)", "Google-Safety" ], "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "description": "Google safety web crawler bot" }, { "pattern": "AwarioBot", "addition_date": "2023/08/23", "instances": [ "Mozilla/5.0 (compatible; AwarioBot/1.0; +https://awario.com/bots.html)" ], "url": "https://awario.com/bots.html", "description": "Awario web crawler for monitoring" }, { "pattern": "DataForSeoBot", "addition_date": "2023/08/23", "instances": [ "Mozilla/5.0 (compatible; DataForSeoBot/1.0; +https://dataforseo.com/dataforseo-bot)" ], "url": "https://dataforseo.com/dataforseo-bot", "description": "DataForSEO web crawler for analysis" }, { "pattern": "Linespider", "addition_date": "2023/08/24", "instances": [ "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Linespider/1.1; +https://lin.ee/4dwXkTH) Chrome/W.X.Y.Z Safari/537.36" ], "url": "https://help2.line.me/linesearchbot/web/?contentId=50006055&lang=en", "description": "LINE Linespider web crawler bot" }, { "pattern": "WellKnownBot", "addition_date": "2023/08/29", "instances": [ "Mozilla/5.0 (compatible; WellKnownBot/0.1; +https://well-known.dev/about/#bot)" ], "url": "https://well-known.dev/about/#bot)", "description": "WellKnown web crawler for discovery" }, { "pattern": "A Patent Crawler", "addition_date": "2023/08/29", "instances": [ "E. Orliac, G. Fourestey/2.3 (A Patent Crawler; http://scitas.epfl.ch/; etienne.orliac@epfl.ch, gilles.fourestey@epfl.ch)" ], "url": "http://scitas.epfl.ch/", "description": "EPFL patent web crawler bot" }, { "pattern": "StractBot", "addition_date": "2023/09/06", "instances": [ "Mozilla/5.0 (compatible; StractBot/0.1; open source search engine; +https://trystract.com/webmasters)" ], "url": "https://trystract.com/webmasters", "description": "Stract search engine web crawler bot" }, { "pattern": "search\\.marginalia\\.nu", "addition_date": "2023/09/08", "instances": [ "search.marginalia.nu" ], "url": "https://search.marginalia.nu", "description": "Marginalia search web crawler bot" }, { "pattern": "YouBot", "addition_date": "2023/09/08", "instances": [ "YouBot (+http://www.you.com)" ], "url": "https://you.com/", "description": "You.com search engine web crawler bot" }, { "pattern": "Nicecrawler", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Nicecrawler/1.1; +http://www.nicecrawler.com/) Chrome/90.0.4430.97 Safari/537.36" ], "url": "http://www.nicecrawler.com/", "description": "Nicecrawler web crawler for discovery" }, { "pattern": "Neevabot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; Neevabot/1.0; +https://neeva.com/neevabot)" ], "url": "https://neeva.com/neevabot", "description": "Neeva search engine web crawler bot" }, { "pattern": "BrightEdge Crawler", "addition_date": "2023/09/08", "instances": [ "BrightEdge Crawler/1.0 (crawler@brightedge.com)" ], "url": "https://www.brightedge.com/", "description": "BrightEdge web crawler for SEO" }, { "pattern": "SiteCheckerBotCrawler", "addition_date": "2023/09/08", "instances": [ "SiteCheckerBotCrawler/1.0 (+http://sitechecker.pro)" ], "url": "http://sitechecker.pro", "description": "SiteChecker web crawler for analysis" }, { "pattern": "TombaPublicWebCrawler", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; TombaPublicWebCrawler/1.0; +https://tombascraper.com)" ], "url": "https://tombascraper.com", "description": "Tomba web crawler for email discovery" }, { "pattern": "CrawlyProjectCrawler", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 (compatible; CrawlyProjectCrawler/0.1.3; crawlyproject@digitaldragon.dev +https://crawlyproject.digitaldragon.dev/)" ], "url": "https://crawlyproject.digitaldragon.dev/", "description": "Crawly Project web crawler framework" }, { "pattern": "KomodiaBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; +http://www.komodia.com/newwiki/index.php/URL_server_crawler) KomodiaBot/1.0" ], "url": "http://www.komodia.com/newwiki/index.php/URL_server_crawler", "description": "Komodia web crawler for classification" }, { "pattern": "KStandBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; +http://url-classification.io/wiki/index.php?title=URL_server_crawler) KStandBot/1.0" ], "url": "http://url-classification.io", "description": "KStand web crawler for classification" }, { "pattern": "CISPA Webcrawler", "addition_date": "2023/09/08", "instances": [ "CISPA Webcrawler (https://vuln-notify-checker.cispa.saarland)" ], "url": "https://vuln-notify-checker.cispa.saarland", "description": "CISPA web crawler for vulnerability" }, { "pattern": "MTRobot", "addition_date": "2023/09/08", "instances": [ "MTRobot/0.2 (Metrics Tools Analytics Crawler; https://metrics-tools.de/robot.html; crawler@metrics-tools.de)" ], "url": "https://metrics-tools.de/robot.html", "description": "Metrics Tools web crawler for analysis" }, { "pattern": "hyscore\\.io", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1. 4 (compatible; HyScore/1.0; +https://hyscore.io/crawler/)" ], "url": "https://hyscore.io/crawler/", "description": "HyScore web crawler for analysis" }, { "pattern": "AlexandriaOrgBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (Linux) (compatible; AlexandriaOrgBot/1.0; +https://www.alexandria.org/bot.html)" ], "url": "https://www.alexandria.org/bot.html", "description": "Alexandria web crawler for discovery" }, { "pattern": "2ip bot", "addition_date": "2023/09/08", "instances": [ "2ip bot/1.1 (+http://2ip.io)" ], "url": "http://2ip.io", "description": "2IP web crawler for analysis" }, { "pattern": "Yellowbrandprotectionbot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; Yellowbrandprotectionbot/1.0; +https://www.yellowbp.com/bot.html)" ], "url": "https://www.yellowbp.com/bot.html", "description": "Yellow brand protection web crawler" }, { "pattern": "SEOlizer", "addition_date": "2023/09/08", "instances": [ "SEOlizer/1.1 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13 (+https://www.seolizer.de/bot.html)" ], "url": "https://www.seolizer.de/bot.html", "description": "SEOlizer web crawler for SEO analysis" }, { "pattern": "vuhuvBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; vuhuvBot/1.0; +http://vuhuv.com/bot.html)" ], "url": "http://vuhuv.com/bot.html", "description": "Vuhuv web crawler for content discovery" }, { "pattern": "INETDEX-BOT", "addition_date": "2023/09/08", "instances": [ "INETDEX-BOT/1.5 (Mozilla/5.0; https://inetdex.com/bot.html)" ], "url": "https://inetdex.com/bot.html", "description": "INETDEX web crawler for indexing" }, { "pattern": "Synapse", "addition_date": "2023/09/08", "instances": [ "Synapse (bot; +https://github.com/matrix-org/synapse)" ], "url": "https://github.com/matrix-org/synapse", "description": "Matrix Synapse web crawler bot" }, { "pattern": "t3versionsBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; t3versionsBot/1.0; +https://www.t3versions.com/bot)" ], "url": "https://www.t3versions.com/bot", "description": "T3versions web crawler for discovery" }, { "pattern": "deepnoc", "addition_date": "2023/09/08", "instances": [ "deepnoc - https://deepnoc.com/bot" ], "url": "https://deepnoc.com/bot", "description": "DeepNOC web crawler for monitoring" }, { "pattern": "Cocolyzebot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; Cocolyzebot/1.0; https://cocolyze.com/bot)" ], "url": "https://cocolyze.com/bot", "description": "Cocolyze web crawler for analysis" }, { "pattern": "hypestat", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; hypestat/1.0; +https://hypestat.com/bot)" ], "url": "https://hypestat.com/bot", "description": "Hypestat web crawler for analysis" }, { "pattern": "ReverseEngineeringBot", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; ReverseEngineeringBot/0.1; +https://torus.company/bot.html)" ], "url": "https://torus.company/bot.html", "description": "Torus reverse engineering web crawler" }, { "pattern": "sempi\\.tech", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; Semanticbot/1.0; +http://sempi.tech/bot.html)" ], "url": "http://sempi.tech/bot.html", "description": "Sempi web crawler for analysis" }, { "pattern": "Iframely", "addition_date": "2023/09/08", "instances": [ "Iframely/1.3.1 (+https://iframely.com/docs/about) Atlassian" ], "url": "https://iframely.com/docs/about", "description": "Iframely web crawler for embeds" }, { "pattern": "MetaInspector", "addition_date": "2023/09/08", "instances": [ "MetaInspector/5.6.0 (+https://github.com/jaimeiniesta/metainspector)" ], "url": "https://github.com/jaimeiniesta/metainspector", "description": "MetaInspector web crawler for metadata" }, { "pattern": "node-fetch", "addition_date": "2023/09/08", "instances": [ "node-fetch/1.0 (+https://github.com/bitinn/node-fetch)" ], "url": "https://github.com/bitinn/node-fetch", "description": "Node-fetch HTTP client library" }, { "pattern": "l9explore", "addition_date": "2023/09/08", "instances": [ "l9explore/1.2.2", "lkxscan/v0.1.0 (+https://leakix.net) l9explore/v1.0.0 (+https://github.com/LeakIX/l9explore)" ], "url": "https://github.com/LeakIX/l9explore", "description": "L9explore web crawler for discovery" }, { "pattern": "python-opengraph", "addition_date": "2023/09/08", "instances": [ "python-opengraph-jaywink/0.2.0 (+https://github.com/jaywink/python-opengraph)" ], "url": "https://github.com/jaywink/python-opengraph", "description": "Python OpenGraph web crawler bot" }, { "pattern": "OpenGraphCheck", "addition_date": "2023/09/08", "instances": [ "OpenGraphCheck/2.1 (+https://opengraphcheck.com)" ], "url": "https://opengraphcheck.com", "description": "OpenGraphCheck web crawler for metadata" }, { "pattern": "developers\\.google\\.com\\/\\+\\/web\\/snippet", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google-PageRenderer Google (+https://developers.google.com/+/web/snippet/)", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google (+https://developers.google.com/+/web/snippet/" ], "url": "https://developers.google.com/+/web/snippet", "description": "Google snippet web crawler bot" }, { "pattern": "SenutoBot", "addition_date": "2023/09/08", "instances": [ "SenutoBot/1.0 (compatible; SenutoBot/1.0; +https://www.senuto.com/)" ], "url": "https://www.senuto.com", "description": "Senuto web crawler for SEO analysis" }, { "pattern": "MaCoCu", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; MaCoCu; +https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/)" ], "url": "https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data", "description": "MaCoCu web crawler for language research" }, { "pattern": "NewsBlur", "addition_date": "2023/09/08", "instances": [ "NewsBlur Feed Fetcher - 1 subscriber - http://www.newsblur.com/site/0000000/webpage (Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15)" ], "url": "http://www.newsblur.com", "description": "NewsBlur feed reader web crawler bot" }, { "pattern": "inoreader", "addition_date": "2023/09/08", "instances": [ "Mozilla/5.0 (compatible; inoreader.com; 1 subscribers)" ], "url": "http://inoreader.com", "description": "Inoreader feed reader web crawler bot" }, { "pattern": "NetSystemsResearch", "addition_date": "2023/09/08", "instances": [ "NetSystemsResearch studies the availability of various services across the internet. Our website is netsystemsresearch.com" ], "url": "http://netsystemsresearch.com", "description": "NetSystemsResearch web crawler bot" }, { "pattern": "PageThing", "addition_date": "2023/09/08", "instances": [ "PageThing http://pagething.com curl www" ], "url": "http://pagething.com", "description": "PageThing web crawler for content discovery" }, { "pattern": "WordPress\\/", "addition_date": "2023/10/24", "instances": [ "WordPress/X.X.X; https://example.com" ], "url": "https://wordpress.org", "description": "WordPress web crawler for site discovery" }, { "pattern": "PhxBot", "addition_date": "2024/01/06", "instances": [ "PhxBot/0.1 (phxbot@protonmail.com)" ], "description": "Phoenix web crawler for content discovery" }, { "pattern": "ImagesiftBot", "addition_date": "2024/01/06", "instances": [ "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)" ], "url": "https://imagesift.com/about", "description": "Imagesift bot for image search and indexing" }, { "pattern": "Expanse", "addition_date": "2024/02/01", "instances": [ "Expanse, a Palo Alto Networks company, searches across the global IPv4 space multiple times per day to identify customers' presences on the Internet. If you would like to be excluded from our scans, please send IP addresses/domains to: scaninfo@paloaltonetworks.com" ], "url": "https://www.paloaltonetworks.com/cortex/cortex-xpanse", "description": "Palo Alto Networks Expanse bot for internet asset discovery" }, { "pattern": "InternetMeasurement", "addition_date": "2024/02/01", "instances": [ "Mozilla/5.0 (compatible; InternetMeasurement/1.0; +https://internet-measurement.com/)" ], "url": "https://internet-measurement.com", "description": "Internet Measurement bot for network research and analysis" }, { "pattern": "^BW\\/", "addition_date": "2024/02/08", "instances": [ "BW/1.1; bit.ly/3eZNDnO", "BW/1.1; rb.gy/oupwis" ], "url": "https://builtwith.com/biup", "description": "BuiltWith web crawler for technology detection" }, { "pattern": "GeedoBot", "addition_date": "2024/02/11", "instances": [ "Mozilla/5.0 (compatible; GeedoBot; +http://www.geedo.com/bot.html)" ], "url": "http://www.geedo.com", "description": "Geedo web crawler for content discovery" }, { "pattern": "Audisto Crawler", "addition_date": "2024/03/14", "instances": [ "Audisto Crawler (mobile; +https://audisto.com/bot)", "Audisto Crawler (desktop; +https://audisto.com/bot)", "Audisto Crawler (mobile; essential; +https://audisto.com/bot)", "Audisto Crawler (desktop; essential; +https://audisto.com/bot)" ], "url": "https://audisto.com/help/crawler/bot/", "description": "Audisto SEO web crawler for analysis" }, { "pattern": "PerplexityBot\\/", "addition_date": "2024/03/14", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)" ], "url": "https://docs.perplexity.ai/docs/perplexitybot", "description": "Perplexity AI web crawler for search" }, { "pattern": "[cC]laude[bB]ot", "addition_date": "2024/04/19", "instances": [ "claudebot", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)" ], "url": "https://www.anthropic.com/", "description": "Anthropic Claude web crawler bot" }, { "pattern": "Monsidobot", "addition_date": "2024/05/14", "instances": [ "Mozilla/5.0 (compatible; Monsidobot/2.2; +http://monsido.com/bot.html; info@monsido.com)" ], "url": "http://monsido.com/bot.html", "description": "Monsido web crawler for website monitoring" }, { "pattern": "GroupMeBot", "addition_date": "2024/05/19", "instances": [ "GroupMeBot/1.0" ], "url": "https://groupme.com/", "description": "GroupMe web crawler for messaging" }, { "pattern": "Vercelbot", "addition_date": "2024/08/30", "instances": [ "Vercelbot (+https://vercel.com)" ], "url": "https://github.com/vercel/vercel/discussions/5095#discussioncomment-58705", "description": "Vercel web crawler for deployment" }, { "pattern": "vercel-screenshot", "addition_date": "2024/08/30", "instances": [], "description": "Vercel screenshot web crawler bot" }, { "pattern": "facebookcatalog\\/", "addition_date": "2024/10/03", "instances": [ "facebookcatalog/1.0" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers", "description": "Facebook catalog web crawler bot" }, { "pattern": "meta-externalads\\/", "addition_date": "2025/08/08", "instances": [ "meta-externalads/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", "meta-externalads/1.1" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers", "description": "Meta external ads web crawler bot" }, { "pattern": "meta-externalagent\\/", "addition_date": "2024/10/03", "instances": [ "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", "meta-externalagent/1.1" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers", "description": "Meta external agent web crawler bot" }, { "pattern": "meta-externalfetcher\\/", "addition_date": "2024/10/03", "instances": [ "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", "meta-externalfetcher/1.1" ], "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers", "description": "Meta external fetcher web crawler bot" }, { "pattern": "AcademicBotRTU", "addition_date": "2024/10/17", "instances": [ "AcademicBotRTU (https://academicbot.rtu.lv; mailto:caps@rtu.lv)" ], "url": "https://academicbot.rtu.lv", "description": "Academic Bot RTU web crawler bot" }, { "pattern": "KeybaseBot", "addition_date": "2024/10/21", "url": "https://book.keybase.io/docs/chat/link-previews", "instances": [ "Mozilla/5.0 (compatible; KeybaseBot; +https://keybase.io)" ], "description": "Keybase web crawler for link previews" }, { "pattern": "Lemmy", "addition_date": "2025/02/11", "instances": [ "Lemmy/0.19.8; +https://leminal.space" ], "url": "https://leminal.space", "description": "Lemmy social network web crawler bot" }, { "pattern": "CookieHubScan", "addition_date": "2024/11/29", "url": "https://www.cookiehub.com/", "instances": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 CookieHubScan/3.0" ], "description": "CookieHub web crawler for cookie scanning" }, { "pattern": "Hydrozen\\.io", "addition_date": "2025/02/02", "instances": [ "Hydrozen.io/1.0" ], "url": "https://docs.hydrozen.io/overview/misc/user-agent-and-ip-list", "description": "Hydrozen web crawler for monitoring" }, { "pattern": "HTTP Banner Detection", "addition_date": "2025/02/10", "instances": [ "HTTP Banner Detection (https://security.ipip.net)" ], "url": "https://security.ipip.net", "description": "IPIP HTTP banner detection web crawler" }, { "pattern": "SummalyBot", "addition_date": "2025/02/10", "instances": [ "SummalyBot/5.1.0" ], "url": "https://github.com/misskey-dev/summaly", "description": "Summaly web crawler for content summarization" }, { "pattern": "MicrosoftPreview\\/", "addition_date": "2025/02/11", "url": "https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0", "instances": [ "MicrosoftPreview/2.0; +https://aka.ms/MicrosoftPreview" ], "description": "Microsoft preview web crawler bot" }, { "pattern": "GeedoProductSearch", "addition_date": "2025/03/15", "url": "http://www.geedo.com/product-search.html", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GeedoProductSearch; +http://www.geedo.com/product-search.html) Chrome/79.0.3945.88 Safari/537.36" ], "description": "Geedo product search web crawler bot" }, { "pattern": "TikTokSpider", "addition_date": "2025/03/16", "instances": [ "Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider; ttspider-feedback@tiktok.com)" ], "description": "TikTok web crawler for content discovery" }, { "pattern": "OnCrawl\\/", "addition_date": "2025/03/27", "url": "http://www.oncrawl.com", "instances": [ "Mozilla/5.0 (compatible; OnCrawl/1.0; +http://www.oncrawl.com)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; OnCrawl/1.0; +http://www.oncrawl.com)" ], "description": "OnCrawl SEO web crawler for analysis" }, { "pattern": "sindresorhus\\/got", "addition_date": "2025/04/22", "url": "https://github.com/sindresorhus/got", "instances": [ "got (https://github.com/sindresorhus/got)" ], "description": "Got HTTP client library for requests" }, { "pattern": "CensysInspect\\/", "addition_date": "2025/04/22", "url": "https://about.censys.io", "instances": [ "Mozilla/5.0 (compatible; CensysInspect/1.1; +https://about.censys.io/)" ], "description": "Censys web crawler for security scanning" }, { "pattern": "SBIntuitionsBot\\/", "addition_date": "2025/04/23", "url": "https://www.sbintuitions.co.jp/bot/", "instances": [ "Mozilla/5.0 (compatible; SBIntuitionsBot/0.1; +https://www.sbintuitions.co.jp/bot/)" ], "description": "SB Intuitions web crawler bot" }, { "pattern": "sitebulb", "addition_date": "2025/04/30", "url": "https://sitebulb.com/", "instances": [ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6439.0 Mobile Safari/537.36 +https://sitebulb.com" ], "description": "Sitebulb SEO web crawler for analysis" }, { "pattern": "YextBot\\/", "addition_date": "2025/08/08", "url": "https://hitchhikers.yext.com/modules/kg140-yext-site-crawler/01-create-a-crawler/", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/87.0.4280.88 YextBot/Java Safari/537.36" ], "description": "Yext web crawler for site crawling" }, { "pattern": "DatadogSynthetics", "addition_date": "2025/08/19", "url": "https://docs.datadoghq.com/synthetics/", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.168 Safari/537.36 DatadogSynthetics" ], "description": "Datadog synthetics web crawler bot" }, { "pattern": "Google-Ads-Conversions", "addition_date": "2025/09/10", "url": "https://developers.google.com/google-ads/api/docs/conversions/upload-online", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 Chrome/139.0.7258.127 Safari/537.36 Google-Ads-Conversions" ], "description": "Google Ads conversions web crawler" }, { "pattern": "ObservePoint", "addition_date": "2025/12/23", "url": "https://help.observepoint.com/en/articles/9101465-allow-exclude-observepoint-traffic#h_2a8176c9b9", "instances": [], "description": "ObservePoint web crawler for monitoring" }, { "pattern": "Checkly", "addition_date": "2026/02/11", "url": "https://www.checklyhq.com/docs/", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.6998.35 Safari/537.36 (Checkly, https://www.checklyhq.com)" ], "description": "Checkly web crawler for monitoring" }, { "pattern": "ALittle Client", "addition_date": "2026/04/07", "url": "https://udger.com/resources/ua-list/bot-detail?bot=ALittle+Client", "instances": [ "ALittle Client" ], "description": "ALittle web crawler for content discovery" }, { "pattern": "AliyunSecBot", "addition_date": "2026/04/07", "url": "https://service.alibaba.com", "instances": [ "AliyunSecBot/Aliyun AliyunSecBot@service.alibaba.com" ], "description": "Alibaba Aliyun security web crawler bot" }, { "pattern": "Claude-Web", "addition_date": "2026/04/07", "url": "https://anthropic.com", "instances": [ "Claude-Web/1.0 (web crawler; +https://www.anthropic.com/; bots@anthropic.com)" ], "description": "Anthropic Claude web crawler bot" }, { "pattern": "anthropic-ai", "addition_date": "2026/04/07", "url": "https://anthropic.com", "instances": [ "anthropic-ai" ], "description": "Anthropic AI web crawler bot" }, { "pattern": "Claude-User", "addition_date": "2026/04/07", "url": "https://useragents.io/uas/mozilla-5-0-applewebkit-537-36-khtml-like-gecko-compatible-claudebot-1-0-supportanthropic-com_954fa13a8e1e46d8267fb56e2d48100e", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Claude-User/1.0; +Claude-User@anthropic.com)", "Claude-User (claude-code/2.1.86; +https://support.anthropic.com/)" ], "description": "Anthropic Claude user web crawler bot" }, { "pattern": "Claude-SearchBot", "addition_date": "2026/04/07", "url": "https://useragents.io/uas/mozilla-5-0-applewebkit-537-36-khtml-like-gecko-compatible-claudebot-1-0-supportanthropic-com_954fa13a8e1e46d8267fb56e2d48100e", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Claude-SearchBot/1.0; +https://www.anthropic.com)" ], "description": "Anthropic Claude search web crawler bot" }, { "pattern": "Google-Extended", "addition_date": "2026/04/07", "url": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers", "instances": [ "Mozilla/5.0 (compatible; Google-Extended/1.0; +http://www.google.com/bot.html)" ], "description": "Google extended web crawler bot" }, { "pattern": "cohere-ai", "addition_date": "2026/04/07", "url": "https://cohere.com", "instances": [ "cohere-ai" ], "description": "Cohere AI web crawler bot" }, { "pattern": "Timpibot", "addition_date": "2026/04/07", "url": "https://timpi.io", "instances": [ "Timpibot/0.9 (+http://www.timpi.io)", "Mozilla/5.0 (compatible; Timpibot/0.8; +http://www.timpi.io)", "Mozilla/5.0 (compatible; Timpibot/0.9; +http://www.timpi.io)" ], "description": "Timpi web crawler for content discovery" }, { "pattern": "SERankingBacklinksBot", "addition_date": "2026/04/07", "url": "https://seranking.com/backlinks-crawler", "instances": [ "Mozilla/5.0 (compatible; SERankingBacklinksBot/1.0; +https://seranking.com/backlinks-crawler)" ], "description": "SEranking backlinks web crawler bot" }, { "pattern": "CMSChecker", "addition_date": "2026/04/07", "instances": [ "Mozilla/5.0 (compatible; CMSChecker/1.0; +https://cmschecker.net)" ], "description": "CMS Checker web crawler for detection" }, { "pattern": "Wayback", "addition_date": "2026/04/07", "url": "https://archive.org", "instances": [ "Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)" ], "description": "Internet Archive Wayback web crawler bot" }, { "pattern": "Playwright", "addition_date": "2026/04/07", "url": "https://playwright.dev", "instances": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Playwright/1.40.0" ], "description": "Playwright browser automation web crawler" }, { "pattern": "Puppeteer", "addition_date": "2026/04/07", "url": "https://pptr.dev", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.0.0 Safari/537.36 Puppeteer" ], "description": "Puppeteer browser automation web crawler" }, { "pattern": "Selenium", "addition_date": "2026/04/07", "url": "https://www.selenium.dev", "instances": [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36; Selenium" ], "description": "Selenium browser automation web crawler" }, { "pattern": "Nikto", "addition_date": "2026/04/07", "url": "https://cirt.net/Nikto2", "instances": [ "Mozilla/5.00 (Nikto/2.1.5) (Evasions:None) (Test:Port Check)", "Mozilla/5.0 (X11; Linux x86_64) Nikto/2.5.0 (Evasions:None) (Test:Port Check)" ], "description": "Nikto web server security scanner bot" }, { "pattern": "sqlmap", "addition_date": "2026/04/07", "url": "https://sqlmap.org", "instances": [ "sqlmap/1.7.8#stable (https://sqlmap.org)" ], "description": "SQLMap SQL injection testing web crawler" }, { "pattern": "ZmEu", "addition_date": "2026/04/07", "url": "https://en.wikipedia.org/wiki/ZmEu_(vulnerability_scanner)", "instances": [ "ZmEu" ], "description": "ZmEu vulnerability scanner web crawler" }, { "pattern": "masscan", "addition_date": "2026/04/07", "url": "https://github.com/robertdavidgraham/masscan", "instances": [ "masscan/1.0 (https://github.com/robertdavidgraham/masscan)" ], "description": "Masscan network scanner web crawler bot" }, { "pattern": "WPScan", "addition_date": "2026/04/07", "url": "https://wpscan.com", "instances": [ "WPScan v3.8.22 (https://wpscan.com/wordpress-security-scanner)", "Mozilla/5.0 (compatible; WPScan; +https://wpscan.com/wordpress-security-scanner)" ], "description": "WPScan WordPress security scanner bot" }, { "pattern": "[aA]cunetix", "addition_date": "2026/04/07", "url": "https://www.acunetix.com", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) acunetix-product/wvs (Acunetix Web Vulnerability Scanner - Free Edition)", "acunetix-product/wvs" ], "description": "Acunetix web vulnerability scanner bot" }, { "pattern": "Nessus", "addition_date": "2026/04/07", "url": "https://www.tenable.com/products/nessus", "instances": [ "Mozilla/5.0 (compatible; Nessus; http://www.nessus.org)" ], "description": "Nessus vulnerability scanner web crawler" }, { "pattern": "[dD]ir[Bb]uster", "addition_date": "2026/04/07", "url": "https://github.com/KajanM/DirBuster", "instances": [ "DirBuster-1.0-RC1 (http://www.owasp.org/index.php/Category:OWASP_DirBuster_Project)" ], "description": "DirBuster directory scanner web crawler" }, { "pattern": "StatusCake", "addition_date": "2026/04/07", "url": "https://www.statuscake.com", "instances": [ "StatusCake_Uptime_Checker/1.0" ], "description": "StatusCake uptime monitoring web crawler" }, { "pattern": "colly", "addition_date": "2026/04/07", "url": "https://go-colly.org", "instances": [ "colly - https://github.com/gocolly/colly", "colly/2.1.0" ], "description": "Colly Go web crawler framework" }, { "pattern": "[mM]echanize", "addition_date": "2026/04/07", "url": "https://github.com/sparklemotion/mechanize", "instances": [ "Mechanize/2.9.1 Ruby/3.1.2 (http://github.com/sparklemotion/mechanize/)" ], "description": "Mechanize Ruby web crawler library" }, { "pattern": "air\\.ai\\/scanning", "addition_date": "2026/04/07", "instances": [ "air.ai/scanning Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Chrome/126.0.0.0 Safari/537.36" ], "description": "Air.ai web crawler for scanning" }, { "pattern": "asnriskscorer", "addition_date": "2026/04/07", "instances": [ "asnriskscorer/1.0" ], "description": "ASN Risk Scorer web crawler bot" }, { "pattern": "OICrawler", "addition_date": "2026/04/07", "url": "https://openindex.ai", "instances": [ "OICrawler/Nutch https://openindex.ai" ], "description": "OpenIndex web crawler for indexing" }, { "pattern": "l9scan", "addition_date": "2026/04/07", "url": "https://github.com/LeakIX/l9scan", "instances": [ "Mozilla/5.0 (l9scan/2.0; +https://github.com/LeakIX/l9scan)" ], "description": "L9scan web crawler for scanning" }, { "pattern": "SlaccaleBot", "addition_date": "2026/04/07", "instances": [ "SlaccaleBot" ], "description": "Slaccale web crawler bot" }, { "pattern": "CustomAsyncHttpClient", "addition_date": "2026/04/07", "instances": [ "CustomAsyncHttpClient" ], "description": "Custom async HTTP client web crawler" }, { "pattern": "^HTTPie\\/", "addition_date": "2026/04/07", "url": "https://httpie.io", "instances": [ "HTTPie/3.2.2" ], "description": "HTTPie command-line HTTP client tool" }, { "pattern": "Gemini-Deep-Research", "addition_date": "2026/04/07", "url": "https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Gemini-Deep-Research; +https://gemini.google/overview/deep-research/) Chrome/135.0.0.0 Safari/537.36" ], "description": "Google Gemini deep research web crawler" }, { "pattern": "Perplexity-User", "addition_date": "2026/04/07", "url": "https://docs.perplexity.ai/guides/bots", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Perplexity-User/1.0; +https://perplexity.ai/perplexity-user)" ], "description": "Perplexity user web crawler bot" }, { "pattern": "PerplexityUser", "addition_date": "2026/04/07", "url": "https://perplexity.ai", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityUser/1.0; +https://perplexity.ai)" ], "description": "Perplexity AI web crawler bot" }, { "pattern": "meta-webindexer", "addition_date": "2026/04/07", "url": "https://developers.facebook.com/docs/sharing/webmasters/web-crawlers#meta-webindexer", "instances": [ "meta-webindexer/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)" ], "description": "Meta web indexer bot for Facebook and Instagram content crawling" }, { "pattern": "DuckAssistBot", "addition_date": "2026/04/07", "url": "https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot", "instances": [ "DuckAssistBot/1.2; (+http://duckduckgo.com/duckassistbot.html)" ], "description": "DuckDuckGo assistant bot for web crawling and search" }, { "pattern": "MistralAI-User", "addition_date": "2026/04/07", "instances": [ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; MistralAI-User/1.0; +https://docs.mistral.ai/robots)" ], "description": "Mistral AI web crawler bot for content indexing" }, { "pattern": "webzio", "addition_date": "2026/04/07", "url": "https://webz.io/blog/company/from-omgilibot-to-the-webzbot-duo-a-powerful-leap-for-ethical-and-comprehensive-data-collection/#", "instances": [ "webzio (+https://webz.io/bot.html)" ], "description": "Webz.io web crawler bot for ethical data collection" } ]