# ==============================================================
# Rent.it
# Proudly on internet since 1999
# ==============================================================

User-agent: Google Favicon
Disallow: /

# ==============================================================
# Thanks to Wikipedia for this rules
# ==============================================================

# Observed spamming large amounts of https://en.wikipedia.org/?curid=NNNNNN and ignoring 429 ratelimit responses, claims to respect robots: http://mj12bot.com/
User-agent: MJ12bot
Disallow: /

# Crawlers that are kind enough to obey, but which we'd rather not have unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /

User-agent: DOC
Disallow: /

User-agent: Zao
Disallow: /

# Some bots are known to be trouble, particularly those designed to copy entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /

User-agent: Zealbot
Disallow: /

User-agent: MSIECrawler
Disallow: /

User-agent: SiteSnagger
Disallow: /

User-agent: WebStripper
Disallow: /

User-agent: WebCopier
Disallow: /

User-agent: Fetch
Disallow: /

User-agent: Offline Explorer
Disallow: /

User-agent: Teleport
Disallow: /

User-agent: TeleportPro
Disallow: /

User-agent: WebZIP
Disallow: /

User-agent: linko
Disallow: /

User-agent: HTTrack
Disallow: /

User-agent: Microsoft.URL.Control
Disallow: /

User-agent: Xenu
Disallow: /

User-agent: larbin
Disallow: /

User-agent: libwww
Disallow: /

User-agent: ZyBORG
Disallow: /

User-agent: Download Ninja
Disallow: /

# Misbehaving: requests much too fast:
User-agent: fast
Disallow: /

# Sorry, wget in its recursive mode is a frequent problem. Please read the man page and use it properly; there is a --wait option you can use to set the delay between hits, for instance.
User-agent: wget
Disallow: /

# The 'grub' distributed client has been *very* poorly behaved.
User-agent: grub-client
Disallow: /

# Doesn't follow robots.txt anyway, but...
User-agent: k2spider
Disallow: /

# Hits many times per second, not acceptable http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /

# A capture bot, downloads gazillions of pages with no public benefit http://www.webreaper.net/
User-agent: WebReaper
Disallow: /

# ==============================================================
# New rules 2024
# ==============================================================
# Too much request on very old pages with 404
User-agent: Bytespider
Disallow: /

# AI forbidden - check also ai.txt
User-agent: GPTBot
Disallow: /

# ==============================================================
# Final rules for all spider
# ==============================================================

# User-agent: Mediapartners-Google

# User-agent: Googlebot

# User-agent: Adsbot-Google

User-agent: Googlebot-Image
Allow: /

# User-agent: Googlebot-Mobile

# User-agent: ia_archiver

User-Agent: *
Disallow: /public/newsletter/