# robots.txt — The Lawyer Mindset
# AI Crawler Governance Matrix (2026)

# ==========================================================================
# REAL-TIME RETRIEVAL & SEARCH BOTS — ALLOW
# These fetch content during live inference to build answers.
# Blocking them removes you from AI-generated results entirely.
# ==========================================================================

# OpenAI real-time search (powers ChatGPT search)
User-agent: OAI-SearchBot
Allow: /

# Anthropic real-time search (powers Claude search)
User-agent: Claude-SearchBot
Allow: /

# Perplexity answer engine
User-agent: PerplexityBot
Allow: /

# Brave Search (independent index, powers Brave AI and third-party search APIs)
User-agent: BraveSearch
Allow: /

# DuckDuckGo crawler (DuckDuckBot powers DDG AI Chat and search)
User-agent: DuckDuckBot
Allow: /

# Parallel AI search (parallel.ai answer engine)
User-agent: ParallelAI
Allow: /

# You.com AI search
User-agent: YouBot
Allow: /

# Cohere AI retrieval (powers enterprise RAG and search)
User-agent: cohere-ai
Allow: /

# Meta AI search / llama retrieval
User-agent: FacebookBot
Allow: /

# Amazon Alexa / AI assistant crawling
User-agent: Amazonbot
Allow: /

# ==========================================================================
# USER-INITIATED PROXIES — ALLOW
# Triggered when a human pastes a URL into a chat interface.
# ==========================================================================

User-agent: ChatGPT-User
Allow: /

User-agent: Claude-User
Allow: /

# ==========================================================================
# LEGACY SEARCH INDEXING — ALLOW
# Googlebot and Bingbot remain the foundational data layer that
# feeds AI Overviews, Copilot, and ChatGPT search grounding.
# ==========================================================================

User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

# Yandex (supports IndexNow, feeds some AI aggregators)
User-agent: YandexBot
Allow: /

# ==========================================================================
# MODEL TRAINING SCRAPERS — DISALLOW
# These harvest data for pre-training future foundation models.
# Blocking them does NOT penalize real-time search visibility.
# ==========================================================================

User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: Timpibot
Disallow: /

Sitemap: https://lawyermindset.com/sitemap.xml