# Slow down bots User-agent: * Crawl-delay: 10 Disallow: /author/ Disallow: /archives/ Disallow: /category/ Disallow: /date/ Disallow: /comments/feed/ Disallow: /tag/ Disallow: /recommendations/ Disallow: /CVS Disallow: /*.svn$ Disallow: /*.idea$ Disallow: /*.sql$ Disallow: /*.tgz$ Disallow: /*? Disallow: /*.php$ Disallow: /*.CVS Disallow: /*.Zip$ ## GENERAL MAGENTO SETTINGS ## Do not crawl Magento admin page Disallow: /admin/ ## Do not crawl common Magento technical folders Disallow: /app/ Disallow: /downloader/ Disallow: /errors/ Disallow: /includes/ Disallow: /lib/ Disallow: /pkginfo/ Disallow: /shell/ Disallow: /var/ Disallow: /bin/ Disallow: /dev/ Disallow: /phpserver/ Disallow: /report/ Disallow: /setup/ Disallow: /update/ Disallow: /vendor/ ## Do not crawl common Magento files Disallow: /api.php Disallow: /cron.php Disallow: /cron.sh Disallow: /error_log Disallow: /get.php Disallow: /install.php Disallow: /LICENSE.html Disallow: /LICENSE.txt Disallow: /LICENSE_AFL.txt Disallow: /README.txt Disallow: /RELEASE_NOTES.txt ## MAGENTO SEO IMPROVEMENTS ## Do not crawl sub category pages that are sorted or filtered. Disallow: /*?dir* Disallow: /*?dir=desc Disallow: /*?dir=asc Disallow: /*?limit=all Disallow: /*?mode* Disallow: /*?*product_list_mode= Disallow: /*?*product_list_order= Disallow: /*?*product_list_limit= Disallow: /*?*product_list_dir= ## Do not crawl 2-nd home page copy (example.com/index.php/). Uncomment it only if you activated Magento SEO URLs. ## Disallow: /index.php/ ## Do not crawl links with session IDs Disallow: /*?SID= ## Do not crawl checkout and user account pages Disallow: /checkout/ Disallow: /checkout/klarna/ Disallow: /checkout/cart/ Disallow: /customer/ Disallow: /customer/account/ Disallow: /customer/account/login/ Disallow: /control/ Disallow: /contacts/ Disallow: /customize/ Disallow: /newsletter/ Disallow: /review/ Disallow: /sendfriend/ Disallow: /wishlist/ Disallow: /prescription/ ## Do not crawl seach pages and not-SEO optimized catalog links Disallow: /search/ Disallow: /catalog/product_compare/ Disallow: /catalog/category/view/ ## SERVER SETTINGS ## Do not crawl common server technical folders and files Disallow: /cgi-bin/ Disallow: /cleanup.php Disallow: /apc.php Disallow: /memcache.php Disallow: /phpinfo.php ## Do not crawl root folder files Disallow: /LICENSE.html Disallow: /LICENSE.txt Disallow: /LICENSE_AFL.txt Disallow: /LICENSE_EE.html Disallow: /LICENSE_EE.txt Disallow: /README.md Disallow: /RELEASE_NOTES.txt Disallow: /Vagrantfile Disallow: /amartinez_customimportexport.php Disallow: /api.php Disallow: /backup.sh Disallow: /cron.php Disallow: /cron.sh Disallow: /get.php Disallow: /healthcheck Disallow: /install.php Disallow: /m_test_search_speed.php Disallow: /composer.json Disallow: /composer.lock Disallow: /CONTRIBUTING.md Disallow: /CONTRIBUTOR_LICENSE_AGREEMENT.html Disallow: /COPYING.txt Disallow: /Gruntfile.js Disallow: /nginx.conf.sample Disallow: /nginx.conf.sample Disallow: /package.json Disallow: /php.ini.sample # www.robotstxt.org/ # www.google.com/support/webmasters/bin/answer.py?hl=en&answer=156449 # Disallow: Sistrix User-agent: sistrix Disallow: / # Disallow: Sistrix User-agent: SISTRIX Crawler Disallow: / # Disallow: Sistrix User-agent: SISTRIX Disallow: / # Disallow: SEOkicks-Robot User-agent: SEOkicks-Robot Disallow: / # Disallow: jobs.de-Robot User-agent: jobs.de-Robot Disallow: / # Bot der Leipziger Unister Holding GmbH user-agent: UnisterBot disallow: / # http://www.opensiteexplorer.org/dotbot User-agent: DotBot Disallow: / User-agent: dotbot Disallow: / # http://www.searchmetrics.com User-agent: SearchmetricsBot Disallow: / # http://www.majestic12.co.uk/projects/dsearch/mj12bot.php User-agent: MJ12bot Disallow: / # http://www.domaintools.com/webmasters/surveybot.php User-agent: SurveyBot Disallow: / # http://www.seodiver.com/bot user-agent: SEOdiver disallow: / # http://openlinkprofiler.org/bot User-agent: spbot Disallow: / # http://www.wotbox.com/bot/ User-agent: wotbox Disallow: / # http://www.meanpath.com/meanpathbot.html User-agent: meanpathbot Disallow: / # http://www.backlinktest.com/crawler.html User-agent: BacklinkCrawler Disallow: / # http://www.brandwatch.com/magpie-crawler/ User-agent: magpie-crawler Disallow: / # http://filterdb.iss.net/crawler/ User-agent: oBot Disallow: / User-agent: fr-crawler Disallow: / # http://webmeup-crawler.com User-agent: BLEXBot Disallow: / # https://megaindex.com/crawler User-agent: MegaIndex.ru Disallow: / User-agent: megaindex.com Disallow: / # http://www.cloudservermarket.com User-Agent: CloudServerMarketSpider Disallow: / # http://www.trendiction.de/de/publisher/bot User-Agent: trendictionbot Disallow: / # http://www.exalead.com User-agent: Exabot Disallow: / # http://www.career-x.de/bot.html User-agent: careerbot Disallow: / # https://www.lipperhey.com/en/about/ User-agent: Lipperhey-Kaus-Australis Disallow: / User-agent: seoscanners.net Disallow: / User-agent: MetaJobBot Disallow: / User-agent: Spiderbot Disallow: / User-agent: LinkStats Disallow: / User-agent: JobboerseBot Disallow: / User-agent: ICCrawler Disallow: / User-agent: Plista Disallow: / User-agent: Domain Re-Animator Bot Disallow: / # https://www.lipperhey.com/en/about/ User-agent: Lipperhey-Kaus-Australis Disallow: / # https://turnitin.com/robot/crawlerinfo.html User-agent: turnitinbot Disallow: / # http://help.coccoc.com/ User-agent: coccoc Disallow: / # ubermetrics-technologies.com User-agent: um-IC Disallow: / # datenbutler.de User-agent: mindUpBot Disallow: / # http://searchgears.de/uber-uns/crawling-faq.html User-agent: sg-Orbiter Disallow: / # http://commoncrawl.org/faq/ User-agent: CCBot Disallow: / # https://www.qwant.com/ User-agent: Qwantify Disallow: / # http://linkfluence.net/ User-agent: Kraken Disallow: / # http://www.botje.com/plukkie.htm User-agent: plukkie Disallow: / # https://www.safedns.com/searchbot User-agent: SafeDNSBot Disallow: / # http://www.haosou.com/help/help_3_2.html User-agent: 360Spider Disallow: / # http://www.haosou.com/help/help_3_2.html User-agent: HaosouSpider Disallow: / # http://www.moz.com/dp/rogerbot User-agent: rogerbot Disallow: / # http://www.openhose.org/bot.html User-agent: OpenHoseBot Disallow: / # http://thumbsniper.com User-agent: ThumbSniper Disallow: / # http://www.radian6.com/crawler User-agent: R6_CommentReader Disallow: / User-agent: ImplisenseBot Disallow: / # http://cliqz.com/company/cliqzbot User-agent: Cliqzbot Disallow: / # https://www.aihitdata.com/about User-agent: aiHitBot Disallow: / # http://www.trendiction.com/en/publisher/bot User-Agent: trendictionbot Disallow: / # http://seocompany.store User-Agent: adscanner Disallow: / # https://github.com/yasserg/crawler4j/ User-Agent: crawler4j Disallow: / # http://warebay.com/bot.html User-agent: WBSearchBot Disallow: / User-agent: Python/3.5 aiohttp Disallow: / User-agent: Toweya.com Disallow: / # http://www.website-datenbank.de/ User-agent: netEstate Disallow: / # http://law.di.unimi.it/BUbiNG.html User-agent: BUbiNG Disallow: / # http://www.linguee.com/bot; bot@linguee.com User-agent: Linguee Disallow: / # https://www.semrush.com/bot/ User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / # www.sentibot.eu User-agent: sentibot Disallow: / User-agent: SentiBot Disallow: / # http://velen.io User-agent: VelenPublicWebCrawler Disallow: / User-agent: DomainCrawler Disallow: / # https://moz.com/help/guides/moz-procedures/what-is-rogerbot User-agent: rogerbot Disallow: / User-agent: IndeedBot Disallow: / # http://www.garlik.com User-agent: GarlikCrawler Disallow: / # https://www.gosign.de/typo3-extension/typo3-sicherheitsmonitor/ User-agent: Gosign-Security-Crawler Disallow: / # http://www.siteliner.com/bot User-agent: Siteliner Disallow: / # https://sabsim.com User-agent: SabsimBot Disallow: / # http://ltx71.com/ User-agent: ltx71 Disallow: / # Website Sitemap Sitemap: https://www.tokmanni.fi/sitemap/sitemap.xml