###**************************************************************************** # robots.txt # : Robots, spiders, and search engines use this file to detmine which # content they should *not* crawl while indexing your website. # : This system is called "The Robots Exclusion Standard." # : It is strongly encouraged to use a robots.txt validator to check # for valid syntax before any robots read it! # # Examples: # # Instruct all robots to stay out of the admin area. # : User-agent: * # : Disallow: /admin/ # # Restrict Google and MSN from indexing your images. # : User-agent: Googlebot # : Disallow: /images/ # : User-agent: MSNBot # : Disallow: /images/ #**************************************************************************** ## robots.txt for Magento Community and Enterprise ## Ensure that Google can crawl everything: User-agent: Googlebot Disallow: /blog/archiveyear DIsallow: User-agent: Googlebot-image Disallow: ## GENERAL SETTINGS ## Enable robots.txt rules for all crawlers User-agent: * ## Crawl-delay parameter: number of seconds to wait between successive requests to the same server. ## Set a custom crawl rate if you're experiencing traffic problems with your server. Crawl-delay: 30 ## Magento sitemap: uncomment and replace the URL to your Magento sitemap file Sitemap: https://www.cheapjoes.com/sitemap/sitemap.xml ## DEVELOPMENT RELATED SETTINGS ## Do not crawl development files and folders: CVS, svn directories and dump files Disallow: /CVS Disallow: /*.svn$ Disallow: /*.idea$ Disallow: /*.sql$ Disallow: /*.tgz$ ## GENERAL MAGENTO SETTINGS ## Do not crawl Magento admin page Disallow: /admin/ ## Do not crawl common Magento technical folders Disallow: /app/ Disallow: /downloader/ Disallow: /errors/ Disallow: /includes/ Disallow: /lib/ Disallow: /pkginfo/ Disallow: /shell/ Disallow: /var/ ## Do not crawl common Magento files Disallow: /api.php Disallow: /cron.php Disallow: /cron.sh Disallow: /error_log Disallow: /get.php Disallow: /install.php Disallow: /LICENSE.html Disallow: /LICENSE.txt Disallow: /LICENSE_AFL.txt Disallow: /README.txt Disallow: /RELEASE_NOTES.txt ## MAGENTO SEO IMPROVEMENTS ## Do not crawl sub category pages that are sorted or filtered. Disallow: /*?dir* Disallow: /*?dir=desc Disallow: /*?dir=asc Disallow: /*?limit=all Disallow: /*?mode* ## Do not crawl 2-nd home page copy (example.com/index.php/). Uncomment it only if you activated Magento SEO URLs. ## Disallow: /index.php/ ## Do not crawl links with session IDs Disallow: /*?SID= ## Do not crawl checkout and user account pages Disallow: /checkout/ Disallow: /onestepcheckout/ Disallow: /customer/ Disallow: /customer/account/ Disallow: /customer/account/login/ ## Do not crawl seach pages and not-SEO optimized catalog links Disallow: /catalogsearch/ Disallow: /catalog/product_compare/ Disallow: /catalog/category/view/ Disallow: /catalog/product/view/ Disallow: /customer/account/login/referrer/* Disallow: /customer/account/login/referer/* ## SERVER SETTINGS ## Do not crawl common server technical folders and files Disallow: /cgi-bin/ Disallow: /cleanup.php Disallow: /apc.php Disallow: /memcache.php Disallow: /phpinfo.php ## IMAGE CRAWLERS SETTINGS Crawl-delay: 10 User-agent: msnbot Crawl-delay: 1 ## Extra: Uncomment if you do not wish Google and Bing to index your images # User-agent: Googlebot-Image # Disallow: / # User-agent: msnbot-media # Disallow: / ## From Inchoo Recommended robots.txt ## http://inchoo.net/ecommerce/ultimate-magento-robots-txt-file-examples/ # Directories Disallow: /404/ Disallow: /app/ Disallow: /cgi-bin/ Disallow: /downloader/ Disallow: /errors/ Disallow: /includes/ #Disallow: /js/ #Disallow: /lib/ Disallow: /magento/ #Disallow: /media/ Disallow: /pkginfo/ Disallow: /report/ Disallow: /scripts/ Disallow: /shell/ #Disallow: /skin/ Disallow: /stats/ Disallow: /var/ # Paths (clean URLs) Disallow: /index.php/ Disallow: /catalog/product_compare/ Disallow: /catalog/category/view/ Disallow: /catalog/product/view/ Disallow: /catalogsearch/ #Disallow: /checkout/ Disallow: /control/ Disallow: /contacts/ Disallow: /customer/ Disallow: /customize/ Disallow: /newsletter/ Disallow: /poll/ Disallow: /review/ Disallow: /sendfriend/ Disallow: /tag/ Disallow: /wishlist/ Disallow: /catalog/product/gallery/ # Files Disallow: /cron.php Disallow: /cron.sh Disallow: /error_log Disallow: /install.php Disallow: /LICENSE.html Disallow: /LICENSE.txt Disallow: /LICENSE_AFL.txt Disallow: /STATUS.txt # Paths (no clean URLs) #Disallow: /*.js$ #Disallow: /*.css$ Disallow: /*.php$ Disallow: /*?SID= Disallow: /catalogsearch/result/? ## BOT BLACKLIST RELATED SETTINGS # too many repeated hits, too quick User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / # too many repeated hits, too quick User-agent: AhrefsBot Disallow: / ## From http://www.robotstxt.org/robots.txt # too many repeated hits, too quick User-agent: litefinder Disallow: / # Yahoo. too many repeated hits, too quick User-agent: Slurp Disallow: / # too many repeated hits, too quick User-agent: Baidu Disallow: / ## From http://www.seobook.com/robots.txt # Begin block Bad-Robots from robots.txt User-agent: Applebot Disallow:/ User-agent: archive.org_bot Disallow:/ User-agent: asterias Disallow:/ User-agent: BackDoorBot/1.0 Disallow:/ User-agent: Barkrowler/0.7 Disallow:/ User-agent: Black Hole Disallow:/ User-agent: BlowFish/1.0 Disallow:/ User-agent: BotALot Disallow:/ User-agent: BUbiNG Disallow:/ User-agent: BuiltBotTough Disallow:/ User-agent: Bullseye/1.0 Disallow:/ User-agent: BunnySlippers Disallow:/ User-agent: Cegbfeieh Disallow:/ User-agent: CheeseBot Disallow:/ User-agent: CherryPicker Disallow:/ User-agent: CherryPickerElite/1.0 Disallow:/ User-agent: CherryPickerSE/1.0 Disallow:/ User-agent: Cliqzbot Disallow:/ User-agent: coccocbot-image Disallow:/ User-agent: coccocbot-web Disallow:/ User-agent: CopyRightCheck Disallow:/ User-agent: cosmos Disallow:/ User-agent: Crescent Disallow:/ User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0 Disallow:/ User-agent: Dispatch/0.11.3 Disallow:/ User-agent: DittoSpyder Disallow:/ User-agent: EmailCollector Disallow:/ User-agent: EmailSiphon Disallow:/ User-agent: EmailWolf Disallow:/ User-agent: EroCrawler Disallow:/ User-agent: Exabot Disallow:/ User-agent: ExtractorPro Disallow:/ User-agent: Facebot Disallow:/ User-agent: Foobot Disallow:/ User-agent: G-i-g-a-b-o-t Disallow:/ User-agent: Harvest/1.5 Disallow:/ User-agent: hloader Disallow:/ User-agent: httplib Disallow:/ User-agent: humanlinks Disallow:/ User-agent: ia_archiver Disallow:/ User-agent: InfoNaviRobot Disallow:/ User-agent: istellabot/t.1.13 Disallow:/ User-agent: JennyBot Disallow:/ User-agent: Kenjin Spider Disallow:/ User-agent: Keyword Density/0.9 Disallow:/ User-agent: LexiBot Disallow:/ User-agent: libWeb/clsHTTP Disallow:/ User-agent: LinkextractorPro Disallow:/ User-agent: LinkScan/8.1a Unix Disallow:/ User-agent: LinkWalker Disallow:/ User-agent: LNSpiderguy Disallow:/ User-agent: lwp-trivial Disallow:/ User-agent: lwp-trivial/1.34 Disallow:/ User-agent: Mail.RU_Bot/2.0 Disallow:/ User-agent: Mata Hari Disallow:/ User-agent: MJ12bot Disallow:/ User-agent: Microsoft URL Control - 5.01.4511 Disallow:/ User-agent: Microsoft URL Control - 6.00.8169 Disallow:/ User-agent: MIIxpc Disallow:/ User-agent: MIIxpc/4.2 Disallow:/ User-agent: Mister PiX Disallow:/ User-agent: moget Disallow:/ User-agent: moget/2.1 Disallow:/ User-agent: mozilla/4 Disallow:/ User-agent: Mozilla/4.0 (compatible; BullsEye; Windows 95) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 95) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 98) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows NT) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows XP) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows 2000) Disallow:/ User-agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows ME) Disallow:/ User-agent: mozilla/5 Disallow:/ User-agent: NetAnts Disallow:/ User-agent: NICErsPRO Disallow:/ User-agent: Offline Explorer Disallow:/ User-agent: Openfind Disallow:/ User-agent: Openfind data gathere Disallow:/ User-agent: Pinterestbot/1.0 Disallow:/ User-agent: ProPowerBot/2.14 Disallow:/ User-agent: ProWebWalker Disallow:/ User-agent: QueryN Metasearch Disallow:/ User-agent: RepoMonkey Disallow:/ User-agent: RepoMonkey Bait & Tackle/v1.01 Disallow:/ User-agent: RMA Disallow:/ User-agent: SeznamBot/3.2-test1 Disallow:/ User-agent: ScoutJet Disallow:/ User-agent: SiteExplorer/1.1b Disallow:/ User-agent: SiteSnagger Disallow:/ User-agent: Slack-ImgProxy Disallow:/ User-agent: SpankBot Disallow:/ User-agent: spanner Disallow:/ User-agent: suzuran Disallow:/ User-agent: Szukacz/1.4 Disallow:/ User-agent: Teleport Disallow:/ User-agent: TeleportPro Disallow:/ User-agent: Telesoft Disallow:/ User-agent: The Intraformant Disallow:/ User-agent: TheNomad Disallow:/ User-agent: TightTwatBot Disallow:/ User-agent: TinEye-bot/1.31 Disallow:/ User-agent: Titan Disallow:/ User-agent: toCrawl/UrlDispatcher Disallow:/ User-agent: True_Robot Disallow:/ User-agent: True_Robot/1.0 Disallow:/ User-agent: turingos Disallow:/ User-agent: TweetmemeBot Disallow:/ User-agent: URLy Warning Disallow:/ User-agent: VCI Disallow:/ User-agent: VCI WebViewer VCI WebViewer Win32 Disallow:/ User-agent: YandexBot Disallow:/ User-agent: YandexImages/3.0 Disallow:/ User-agent: Yeti Disallow:/ User-agent: Web Image Collector Disallow:/ User-agent: WebAuto Disallow:/ User-agent: WebBandit Disallow:/ User-agent: WebBandit/3.50 Disallow:/ User-agent: WebCopier Disallow:/ User-agent: WebEnhancer Disallow:/ User-agent: WebmasterWorldForumBot Disallow:/ User-agent: WebSauger Disallow:/ User-agent: Website Quester Disallow:/ User-agent: Webster Pro Disallow:/ User-agent: WebStripper Disallow:/ User-agent: WebZip Disallow:/ User-agent: WebZip/4.0 Disallow:/ User-agent: Wget Disallow:/ User-agent: Wget/1.5.3 Disallow:/ User-agent: Wget/1.6 Disallow:/ User-agent: WWW-Collector-E Disallow:/ User-agent: Xenu's Disallow:/ User-agent: Xenu's Link Sleuth 1.1c Disallow:/ User-agent: Zeus Disallow:/ User-agent: Zeus 32297 Webster Pro V2.9 Win32 Disallow:/ User-agent: ZoomBot (Linkbot 1.0) Disallow:/ # SEO-related bots User-agent: mj12bot Disallow:/ User-agent: ahrefsbot Disallow:/ ## Bots User-Agent: swebot Disallow: / User-agent: aihitbot Disallow: / User-agent: Exabot Disallow: / User-agent: sitebot Disallow: / User-agent: YandexBot Disallow: / User-agent: Yandex Disallow: / User-agent: TwengaBot-Discover Disallow: / User-agent: Ezooms Disallow: / User-agent: Bender Disallow: / User-agent: discobot Disallow: / User-Agent: panscient.com Disallow: / User-Agent: searchwebengine.net Disallow: / User-agent: MLBot Disallow: / User-agent: NextGenSearchBot Disallow: / User-agent: Speedy Disallow: / user-agent: AhrefsBot disallow: / User-agent: NerdByNature.Bot disallow: / User-agent: WBSearchBot Disallow: / User-agent: sistrix Disallow: / User-agent: SindiceBot Disallow: / User-agent: plukkie Disallow: / User-agent: findfiles.net Disallow: / User-agent: SeznamBot Disallow: / User-agent: Goodzer Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: lemurwebcrawler Disallow: / User-agent: gigabot Disallow: / User-agent: discobot Disallow: / User-agent: FAST Enterprise Crawler 6 Disallow: / User-agent: Sensis.com.au Web Crawler Disallow: / User-agent: worio bot heritrix Disallow: / User-agent: trovitBot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / User-agent: ShoppimonAgent Disallow: / Crawl-delay: 15 User-agent: Slurp Crawl-delay: 30 User-agent: Yahoo! Slurp Crawl-delay: 30 Sitemap: https://www.cheapjoes.com/sitemap/sitemap.xml