#
# robots.txt for http:www.protamil.com and friends
#
# Please note: There are a lot of pages on this site, and there are
# some misbehaved spiders out there that go _way_ too fast. If you're
# irresponsible, your access to the site may be blocked.
#
User-agent: Mediapartners-Google
Allow: /
 
User-agent: Adsbot-Google
Allow: /
 
User-agent: Googlebot-Image
Allow: /
 
User-agent: Googlebot-Mobile
Allow: /
 
User-agent: ia_archiver-web.archive.org
Allow: /
User-agent: ia_archiver
Allow: /
 
User-agent: duggmirror
Allow: /
# Wikipedia work bots:
User-agent: IsraBot
Allow:
User-agent: Orthogaffe
Allow:
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: Download Ninja
Disallow: /
#
# Sorry, wget in its recursive mode is a frequent problem.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance.
#
User-agent: wget
Disallow: /
#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /
#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /
#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /
Sitemap: <?=$WEB;?>/sitemap/google/arts.php
Sitemap: <?=$WEB;?>/sitemap/google/astrology.php
Sitemap: <?=$WEB;?>/sitemap/google/education.php
Sitemap: <?=$WEB;?>/sitemap/google/tamil-dictionaries.php
Sitemap: <?=$WEB;?>/sitemap/google/general-knowledge.php
Sitemap: <?=$WEB;?>/sitemap/google/india.php
Sitemap: <?=$WEB;?>/sitemap/google/jokes.php
Sitemap: <?=$WEB;?>/sitemap/google/maps.php
Sitemap: <?=$WEB;?>/sitemap/google/medical.php
Sitemap: <?=$WEB;?>/sitemap/google/womens.php
Sitemap: <?=$WEB;?>/sitemap/google/spirituality.php
Sitemap: <?=$WEB;?>/sitemap/google/fm-radio.php
Sitemap: <?=$WEB;?>/sitemap/google/search.php
Sitemap: <?=$WEB;?>/sitemap/google/world.php