# Robots.txt for FontHub - Font Showcase Website # This file tells search engine crawlers which parts of the site they can access User-agent: * # Allow access to main content Allow: / Allow: /font/ Allow: /category/ Allow: /search Allow: /about Allow: /contact Allow: /privacy Allow: /submit # Allow access to assets Allow: /assets/ Allow: /uploads/*.jpg Allow: /uploads/*.jpeg Allow: /uploads/*.png Allow: /uploads/*.gif Allow: /uploads/*.webp Allow: /uploads/*.svg # Disallow sensitive areas Disallow: /config/ Disallow: /includes/ Disallow: /controllers/ Disallow: /views/ Disallow: /data/ Disallow: /logs/ # Disallow download tracking URLs (use canonical font pages instead) Disallow: /download/ # Disallow admin areas (if they exist in the future) Disallow: /admin/ Disallow: /backend/ Disallow: /dashboard/ # Disallow temporary and backup files Disallow: *.tmp Disallow: *.bak Disallow: *.old Disallow: *~ # Disallow query parameters that don't add value Disallow: /*?*utm_* Disallow: /*?*ref=* Disallow: /*?*source=* # Allow specific search patterns that are valuable Allow: /search?q=* # Crawl-delay for respectful crawling Crawl-delay: 1 # Sitemap location Sitemap: https://yourdomain.com/sitemap.xml # Additional sitemaps (if created in the future) # Sitemap: https://yourdomain.com/sitemap-images.xml # Sitemap: https://yourdomain.com/sitemap-categories.xml # Specific rules for major search engines # Google User-agent: Googlebot Allow: / Crawl-delay: 1 # Bing User-agent: Bingbot Allow: / Crawl-delay: 1 # Yahoo User-agent: Slurp Allow: / Crawl-delay: 2 # Yandex User-agent: YandexBot Allow: / Crawl-delay: 2 # Baidu User-agent: Baiduspider Allow: / Crawl-delay: 3 # DuckDuckGo User-agent: DuckDuckBot Allow: / Crawl-delay: 1 # Facebook User-agent: facebookexternalhit Allow: / # Twitter User-agent: Twitterbot Allow: / # LinkedIn User-agent: LinkedInBot Allow: / # Pinterest User-agent: Pinterest Allow: / # Block aggressive crawlers and bots User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MegaIndex Disallow: / # Block image hotlinking bots User-agent: * Disallow: /uploads/ Allow: /uploads/*$ # Special rules for font files # Allow crawling of font previews but not direct font downloads Allow: /uploads/previews/ Disallow: /downloads/ # Host information (update with actual domain) # Host: https://yourdomain.com # Notes for developers: # - Update the domain in the Sitemap URL above # - Monitor crawl errors in Google Search Console # - Consider creating image and video sitemaps if content grows # - Review and update this file quarterly # - Test robots.txt using Google Search Console's robots.txt tester