## General ## Modif:20/05/2025 # # no forum pic 01/04/2025 # no forum feed 01/04/2025 # nouveaux robots 20/05/2025 ######################################### #adserver ######################################### User-agent: * Disallow: /adserver/ ######################################### ## Robots autorisés ######################################### user-agent: Googlebot allow: / User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-Video Allow: / # Adsense User-agent: Mediapartners-Google Allow: / User-agent: Googlebot-News Allow: / User-agent: Storebot-Google Allow: / User-agent: Google-InspectionTool Allow: / User-agent: GoogleOther Allow: / User-agent: GoogleOther-Image Allow: / User-agent: GoogleOther-Video Allow: / User-agent: bingbot Allow: / # https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0 User-agent: AdIdxBot Allow: / # https://www.criteo.com/criteo-crawler/ User-agent: CriteoBot/0.1 Allow: / ## Google et Bing Disallow User-agent: Google-CloudVertexBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: BingPreview Disallow: / ######################################### ## Robots bloqués ######################################### ## User-agent: adbeat_bot Disallow: / User-agent: ADmantX Disallow: / user-agent: AhrefsBot disallow: / User-agent: barkrowler disallow: / User-agent: BLEXBot disallow: / User-agent: claritybot disallow: / User-agent: Cliqzbot disallow: / User-agent: coccocbot-web disallow: / User-agent: DataForSeoBot disallow: / User-agent: dotbot disallow: / User-agent: Exabot Disallow: / User-agent: grapeshot disallow: / User-agent: Linespider disallow: / User-agent: linkdexbot disallow: / User-agent: MegaIndex disallow: / User-agent: MJ12bot disallow: / User-agent: Nutch Disallow: / User-agent: Pinterest disallow: / User-agent: proximic Disallow: / User-agent: Qwarrybot disallow: / User-agent: SemrushBot disallow: / User-agent: sistrix Disallow: / User-agent: ShopWiki Disallow: / User-agent: SnapchatAds/1.0 Disallow: / User-agent: Snap URL Preview Service; bot; snapchat Disallow: / User-agent: sogou spider Disallow: / User-agent: startmebot disallow: / User-Agent: trendictionbot Disallow: / User-agent: TTD-Content disallow: / User-agent: turnitinbot disallow: / User-agent: UptimeRobot/2.0 Disallow: / User-agent: Taboolabot/3.7 Disallow: / User-agent: SeekportBot Disallow: / User-agent: Qwantify-prod/1.0 Disallow: / User-agent: Brightbot 1.0 Disallow: / User-agent: Pernod Ricard - ClickToBuy CrawlerBot/1.0 Disallow: / User-agent: XoviBot Disallow: / User-agent: YandexRenderResourcesBot/1.0 Disallow: / # Nouveaux User-agent: Amazonbot disallow: / User-agent: Omgili Disallow: / User-agent: FacebookBot Disallow: / User-agent: Diffbot Disallow: / User-agent: Bytespider Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: PetalBot Disallow: / ######################################### ## IA Desactivation ######################################### User-agent: AI2Bot Disallow: / User-agent: amazonbot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: applebot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: bingsapphire Disallow: / User-agent: Bytespider Disallow: / User-agent: ccbot Disallow: / User-agent: chatglm-spider Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: copilotsapphire Disallow: / ## Anthropic User-Agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: cohere-ai Disallow: / User-agent: Diffbot Disallow: / User-agent: DuckAssistBot Disallow: / User-agent: facebookbot Disallow: / User-agent: friendlycrawler Disallow: / ## Nouvel version de bot ChatGPT depuis été 2023 User-Agent: GPTBot Disallow: / User-agent: ia_archiver Disallow: / User-agent: img2dataset Disallow: / User-agent: imagesiftbot Disallow: / User-agent: Kangaroo Bot Disallow: / ## facebook Le robot indexation Meta-ExternalAgent pour entraînement de modèles d’IA ou’amélioration de produits en indexant directement le contenu. User-agent: meta-externalagent Disallow: / User-agent: Meta-ExternalFetcher Disallow: / User-agent: NovaAct Disallow: / User-agent: OAI-SearchBot Disallow: / User-agent: omgili Disallow: / User-agent: openai Disallow: / User-agent: Operator Disallow: / User-agent: PanguBot Disallow: / User-agent: PerplexityBot Disallow: / User-agent: Perplexity-User Disallow: / User-agent: poesearchbot Disallow: / User-agent: spawning-ai Disallow: / User-agent: summalybot Disallow: / User-agent: the knowledge ai Disallow: / User-agent: timpibot Disallow: / User-agent: velenpublicwebcrawler Disallow: / User-agent: webzio Disallow: / User-agent: youbot Disallow: / ################################################################################## ## Archivers - https://darkvisitors.com/agents ################################################################################## User-agent: archive.org_bot Disallow: / User-agent: Arquivo-web-crawler Disallow: / User-agent: heritrix Disallow: / User-agent: ia_archiver Disallow: / User-agent: ia_archiver-web.archive.org Disallow: / User-agent: Nicecrawler Disallow: / ################################################################################## ## Developer Helpers - https://darkvisitors.com/agents ################################################################################## User-agent: 2ipbot Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: Chrome-Lighthouse Disallow: / User-agent: Dark Visitor Server Disallow: / User-agent: deadlinkchecker Disallow: / User-Agent: Eyeotabot Disallow: / User-agent: rogerbot Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: t3versionsBot Disallow: / User-Agent: trovitBot Disallow: / User-agent: W3C_CSS_Validator Disallow: / User-agent: W3C_Validator Disallow: / User-agent: WellKnownBot Disallow: / User-agent: YakazBot Disallow: / ################################################################################## ## Fetchers - https://darkvisitors.com/agents ################################################################################## User-agent: BazQux Disallow: / User-agent: bitlybot Disallow: / User-agent: BublupBot Disallow: / User-agent: Discordbot Disallow: / User-agent: Embedly Disallow: / User-agent: facebookexternalhit Disallow: / User-agent: Feedly Disallow: / User-agent: FlipboardProxy Disallow: / User-agent: FreshRSS Disallow: / User-agent: Friendica Disallow: / User-agent: Google-Read-Aloud Disallow: / User-agent: Hatena Disallow: / User-agent: Iframely Disallow: / User-agent: inoreader Disallow: / User-agent: LinkedInBot Disallow: / User-agent: Mail.RU_Bot Disallow: / User-agent: Mastodon Disallow: / User-agent: Miniflux Disallow: / User-agent: NewsBlur Disallow: / User-agent: Nextcloud Disallow: / User-agent: Pinterestbot Disallow: / User-agent: PocketParser Disallow: / User-agent: redditbot Disallow: / User-agent: SerendeputyBot Disallow: / User-agent: SimplePie Disallow: / User-agent: SkypeUriPreview Disallow: / User-agent: Slackbot-LinkExpanding Disallow: / User-agent: Snap URL Preview Service Disallow: / User-agent: snapchat Disallow: / User-agent: startmebot Disallow: / User-agent: Superfeedr Disallow: / User-agent: SurdotlyBot Disallow: / User-agent: Synapse Disallow: / User-agent: TelegramBot Disallow: / User-agent: Twitterbot Disallow: / User-agent: Viber Disallow: / User-agent: vkShare Disallow: / User-agent: WhatsApp Disallow: / User-agent: Yahoo Link Preview Disallow: / User-agent: TinyTinyRSS Disallow: / ################################################################################## # Tous les operations suivntes s'appliques tous les autres robots ################################################################################## User-agent: * Crawl-delay: 2 # Ne pas scanner ces repertoires Disallow: /societe ######################################### # Ne pas scanner ces repertoires forum ######################################### User-agent: * Disallow: /forum_mobilier_et_arts_decoratifs/viewforum.php/memberlist.php Disallow: /forum_mobilier_et_arts_decoratifs/viewforum.php/viewtopic.php # priorité aux viewtopic.php?t= (topic plutôt que chaque post individuellement) Disallow: /forum_mobilier_et_arts_decoratifs/viewtopic.php?p=* Disallow: /forum_mobilier_et_arts_decoratifs/memberlist.php Disallow: /forum_mobilier_et_arts_decoratifs/app.php/feed/* Disallow: /forum_mobilier_et_arts_decoratifs/feed/* Disallow: /forum_mobilier_et_arts_decoratifs/search.php?author_id=* # Disallow picture (pour l'instant) car non prises en compte de toute facon # Disallow: /forum_mobilier_et_arts_decoratifs/download/file.php?id=* ######################################### # Conservation des ressources bots de referencement ######################################### # User-agent: * # Temporaire ne pas référencer les pager de search.php (pour l'instant) # Disallow: /forum_mobilier_et_arts_decoratifs/search.php?* # Temporaire ne pas lire les requetes de query (pour l'instant) # Disallow: /*?* ######################################### # Ne pas autoriser l'access aux chargement de publicité cpad ######################################### User-agent: * Disallow: /cpad/