robots.php

I’ll make this as a standalone inc/seo/robots.php file for your RX theme, with safe defaults, admin controls support, robots.txt output, robots meta tags, headers, sitemap references, and helper filters.

Below is an advanced robots.php file for your RX theme.

Place it here:

rx-theme/inc/seo/robots.php

Then load it from your functions.php:

require_once get_template_directory() . '/inc/seo/robots.php';

inc/seo/robots.php

<?php
/**
 * RX Theme - Advanced Robots Manager
 *
 * File: inc/seo/robots.php
 *
 * Features:
 * - Advanced virtual robots.txt output
 * - Automatic sitemap discovery
 * - WordPress core robots meta enhancement
 * - Per-page robots meta rules
 * - Noindex for low-value archive/search pages
 * - WooCommerce-friendly rules
 * - AI crawler blocking option
 * - Bad bot blocking option
 * - Crawl-delay support
 * - Custom allow/disallow rules
 * - X-Robots-Tag HTTP header support
 * - Admin setting-ready architecture
 * - Developer filters
 *
 * @package RX_Theme
 */

defined( 'ABSPATH' ) || exit;

if ( ! defined( 'RX_THEME_VERSION' ) ) {
	define( 'RX_THEME_VERSION', '1.0.0' );
}

if ( ! defined( 'RX_ROBOTS_OPTION_KEY' ) ) {
	define( 'RX_ROBOTS_OPTION_KEY', 'rx_theme_robots_settings' );
}

/**
 * Return default robots settings.
 *
 * You can later connect these settings with Customizer, Theme Options,
 * or ACF Options Page.
 *
 * @return array
 */
function rx_robots_default_settings() {
	return array(
		'enabled'                         => true,
		'disable_search_engine_visibility' => false,

		/**
		 * Robots.txt settings.
		 */
		'robots_txt_enabled'              => true,
		'add_sitemap'                     => true,
		'add_wp_sitemap'                  => true,
		'add_custom_sitemaps'             => true,
		'crawl_delay_enabled'             => false,
		'crawl_delay'                     => 10,

		/**
		 * Basic disallow areas.
		 */
		'disallow_wp_admin'               => true,
		'allow_admin_ajax'                => true,
		'disallow_wp_includes'            => false,
		'disallow_trackbacks'             => true,
		'disallow_feeds'                  => false,
		'disallow_comments_feed'          => true,
		'disallow_search'                 => true,
		'disallow_author_archives'        => false,
		'disallow_date_archives'          => false,
		'disallow_tag_archives'           => false,
		'disallow_attachment_pages'       => true,
		'disallow_preview_urls'           => true,
		'disallow_query_strings'          => false,

		/**
		 * Sensitive or low-value URL patterns.
		 */
		'disallow_login'                  => true,
		'disallow_register'               => true,
		'disallow_cart_checkout'          => true,
		'disallow_user_account'           => true,
		'disallow_private_paths'          => true,

		/**
		 * AI crawler controls.
		 */
		'block_ai_crawlers'               => false,

		/**
		 * Aggressive bot blocking.
		 */
		'block_bad_bots'                  => false,

		/**
		 * Meta robots settings.
		 */
		'meta_robots_enabled'             => true,
		'noindex_search_results'          => true,
		'noindex_404'                     => true,
		'noindex_paged_archives'          => false,
		'noindex_author_archives'         => false,
		'noindex_date_archives'           => false,
		'noindex_tag_archives'            => false,
		'noindex_attachment_pages'        => true,
		'noindex_feed_pages'              => true,
		'nofollow_search_results'         => true,
		'noarchive_search_results'        => false,
		'nosnippet_search_results'        => false,
		'max_snippet'                     => -1,
		'max_image_preview'               => 'large',
		'max_video_preview'               => -1,

		/**
		 * X-Robots-Tag header.
		 */
		'x_robots_header_enabled'         => false,
		'x_robots_header_media'           => false,

		/**
		 * Custom rules.
		 *
		 * Example:
		 * array(
		 *     'Disallow: /private-folder/',
		 *     'Allow: /private-folder/public-file.pdf',
		 * )
		 */
		'custom_robots_rules'             => array(),

		/**
		 * Custom sitemap URLs.
		 *
		 * Example:
		 * array(
		 *     home_url( '/post-sitemap.xml' ),
		 *     home_url( '/page-sitemap.xml' ),
		 * )
		 */
		'custom_sitemaps'                 => array(),

		/**
		 * Extra meta robots rules by condition.
		 */
		'extra_noindex_post_types'        => array(),
		'extra_noindex_taxonomies'        => array(),
	);
}

/**
 * Get robots settings.
 *
 * @return array
 */
function rx_robots_get_settings() {
	$defaults = rx_robots_default_settings();
	$saved    = get_option( RX_ROBOTS_OPTION_KEY, array() );

	if ( ! is_array( $saved ) ) {
		$saved = array();
	}

	$settings = wp_parse_args( $saved, $defaults );

	/**
	 * Filter RX robots settings.
	 *
	 * @param array $settings Robots settings.
	 */
	return apply_filters( 'rx_robots_settings', $settings );
}

/**
 * Check whether robots module is enabled.
 *
 * @return bool
 */
function rx_robots_is_enabled() {
	$settings = rx_robots_get_settings();

	return ! empty( $settings['enabled'] );
}

/**
 * Convert URL path safely.
 *
 * @param string $path URL path.
 * @return string
 */
function rx_robots_clean_path( $path ) {
	$path = trim( (string) $path );

	if ( '' === $path ) {
		return '';
	}

	if ( 0 !== strpos( $path, '/' ) ) {
		$path = '/' . $path;
	}

	return esc_url_raw( $path );
}

/**
 * Add one robots.txt rule.
 *
 * @param array  $lines Robots lines.
 * @param string $rule Rule text.
 * @return array
 */
function rx_robots_add_line( $lines, $rule ) {
	$rule = trim( (string) $rule );

	if ( '' === $rule ) {
		return $lines;
	}

	$lines[] = $rule;

	return $lines;
}

/**
 * Get common RX disallow rules.
 *
 * @return array
 */
function rx_robots_common_rules() {
	$settings = rx_robots_get_settings();
	$lines    = array();

	if ( ! empty( $settings['disallow_wp_admin'] ) ) {
		$lines[] = 'Disallow: /wp-admin/';
	}

	if ( ! empty( $settings['allow_admin_ajax'] ) ) {
		$lines[] = 'Allow: /wp-admin/admin-ajax.php';
	}

	if ( ! empty( $settings['disallow_wp_includes'] ) ) {
		$lines[] = 'Disallow: /wp-includes/';
	}

	if ( ! empty( $settings['disallow_login'] ) ) {
		$lines[] = 'Disallow: /wp-login.php';
		$lines[] = 'Disallow: /login/';
	}

	if ( ! empty( $settings['disallow_register'] ) ) {
		$lines[] = 'Disallow: /register/';
		$lines[] = 'Disallow: /wp-signup.php';
	}

	if ( ! empty( $settings['disallow_trackbacks'] ) ) {
		$lines[] = 'Disallow: */trackback/';
		$lines[] = 'Disallow: /trackback/';
	}

	if ( ! empty( $settings['disallow_comments_feed'] ) ) {
		$lines[] = 'Disallow: /comments/feed/';
	}

	if ( ! empty( $settings['disallow_feeds'] ) ) {
		$lines[] = 'Disallow: */feed/';
		$lines[] = 'Disallow: /feed/';
	}

	if ( ! empty( $settings['disallow_search'] ) ) {
		$lines[] = 'Disallow: /?s=';
		$lines[] = 'Disallow: /search/';
	}

	if ( ! empty( $settings['disallow_author_archives'] ) ) {
		$lines[] = 'Disallow: /author/';
	}

	if ( ! empty( $settings['disallow_date_archives'] ) ) {
		$lines[] = 'Disallow: /20';
	}

	if ( ! empty( $settings['disallow_tag_archives'] ) ) {
		$lines[] = 'Disallow: /tag/';
	}

	if ( ! empty( $settings['disallow_attachment_pages'] ) ) {
		$lines[] = 'Disallow: /attachment/';
		$lines[] = 'Disallow: ?attachment_id=';
	}

	if ( ! empty( $settings['disallow_preview_urls'] ) ) {
		$lines[] = 'Disallow: /*preview=true';
		$lines[] = 'Disallow: /*preview_id=';
		$lines[] = 'Disallow: /*preview_nonce=';
	}

	if ( ! empty( $settings['disallow_query_strings'] ) ) {
		$lines[] = 'Disallow: /*?';
	}

	if ( ! empty( $settings['disallow_cart_checkout'] ) ) {
		$lines[] = 'Disallow: /cart/';
		$lines[] = 'Disallow: /checkout/';
		$lines[] = 'Disallow: /basket/';
		$lines[] = 'Disallow: /my-cart/';
	}

	if ( ! empty( $settings['disallow_user_account'] ) ) {
		$lines[] = 'Disallow: /my-account/';
		$lines[] = 'Disallow: /account/';
		$lines[] = 'Disallow: /dashboard/';
		$lines[] = 'Disallow: /profile/';
	}

	if ( ! empty( $settings['disallow_private_paths'] ) ) {
		$lines[] = 'Disallow: /private/';
		$lines[] = 'Disallow: /tmp/';
		$lines[] = 'Disallow: /temp/';
		$lines[] = 'Disallow: /backup/';
		$lines[] = 'Disallow: /backups/';
		$lines[] = 'Disallow: /logs/';
		$lines[] = 'Disallow: /cgi-bin/';
	}

	if ( ! empty( $settings['crawl_delay_enabled'] ) ) {
		$delay = absint( $settings['crawl_delay'] );

		if ( $delay > 0 ) {
			$lines[] = 'Crawl-delay: ' . $delay;
		}
	}

	/**
	 * Filter common robots.txt rules.
	 *
	 * @param array $lines Rules.
	 */
	return apply_filters( 'rx_robots_common_rules', array_unique( $lines ) );
}

/**
 * Get known AI crawler user agents.
 *
 * @return array
 */
function rx_robots_ai_crawlers() {
	$crawlers = array(
		'GPTBot',
		'ChatGPT-User',
		'OAI-SearchBot',
		'Google-Extended',
		'CCBot',
		'ClaudeBot',
		'Claude-Web',
		'Anthropic-ai',
		'PerplexityBot',
		'Perplexity-User',
		'Bytespider',
		'Amazonbot',
		'Applebot-Extended',
		'Meta-ExternalAgent',
		'Meta-ExternalFetcher',
		'FacebookBot',
		'Diffbot',
		'YouBot',
		'ImagesiftBot',
		'cohere-ai',
		'AI2Bot',
		'Kangaroo Bot',
		'PanguBot',
	);

	/**
	 * Filter AI crawler list.
	 *
	 * @param array $crawlers AI crawlers.
	 */
	return apply_filters( 'rx_robots_ai_crawlers', $crawlers );
}

/**
 * Get bad bot user agents.
 *
 * @return array
 */
function rx_robots_bad_bots() {
	$bots = array(
		'AhrefsBot',
		'SemrushBot',
		'DotBot',
		'MJ12bot',
		'BLEXBot',
		'PetalBot',
		'YandexBot',
		'Baiduspider',
		'SeznamBot',
		'MauiBot',
		'MegaIndex',
		'serpstatbot',
		'DataForSeoBot',
		'Barkrowler',
		'SeekportBot',
		'TurnitinBot',
		'ZoominfoBot',
	);

	/**
	 * Filter bad bot list.
	 *
	 * @param array $bots Bad bots.
	 */
	return apply_filters( 'rx_robots_bad_bots', $bots );
}

/**
 * Build crawler block section.
 *
 * @param array $agents User agents.
 * @return array
 */
function rx_robots_build_block_section( $agents ) {
	$lines = array();

	foreach ( $agents as $agent ) {
		$agent = trim( (string) $agent );

		if ( '' === $agent ) {
			continue;
		}

		$lines[] = '';
		$lines[] = 'User-agent: ' . $agent;
		$lines[] = 'Disallow: /';
	}

	return $lines;
}

/**
 * Get sitemap URLs.
 *
 * @return array
 */
function rx_robots_get_sitemaps() {
	$settings = rx_robots_get_settings();
	$sitemaps = array();

	if ( ! empty( $settings['add_wp_sitemap'] ) ) {
		$sitemaps[] = home_url( '/wp-sitemap.xml' );
	}

	/**
	 * Common SEO plugin sitemap URLs.
	 * Only added if the file likely exists or if plugins commonly use it.
	 */
	$common_sitemaps = array(
		home_url( '/sitemap.xml' ),
		home_url( '/sitemap_index.xml' ),
		home_url( '/post-sitemap.xml' ),
		home_url( '/page-sitemap.xml' ),
		home_url( '/category-sitemap.xml' ),
	);

	foreach ( $common_sitemaps as $sitemap ) {
		$sitemaps[] = $sitemap;
	}

	if ( ! empty( $settings['add_custom_sitemaps'] ) && ! empty( $settings['custom_sitemaps'] ) && is_array( $settings['custom_sitemaps'] ) ) {
		foreach ( $settings['custom_sitemaps'] as $custom_sitemap ) {
			$custom_sitemap = esc_url_raw( $custom_sitemap );

			if ( ! empty( $custom_sitemap ) ) {
				$sitemaps[] = $custom_sitemap;
			}
		}
	}

	$sitemaps = array_unique( array_filter( $sitemaps ) );

	/**
	 * Filter sitemap URLs added to robots.txt.
	 *
	 * @param array $sitemaps Sitemap URLs.
	 */
	return apply_filters( 'rx_robots_sitemaps', $sitemaps );
}

/**
 * Main robots.txt output.
 *
 * WordPress calls this through do_robots().
 *
 * @param string $output Existing robots.txt output.
 * @param bool   $public Whether site is public.
 * @return string
 */
function rx_robots_txt_output( $output, $public ) {
	if ( ! rx_robots_is_enabled() ) {
		return $output;
	}

	$settings = rx_robots_get_settings();

	if ( empty( $settings['robots_txt_enabled'] ) ) {
		return $output;
	}

	$lines = array();

	$lines[] = '# RX Theme Advanced Robots.txt';
	$lines[] = '# Generated dynamically by WordPress';
	$lines[] = '# Site: ' . home_url( '/' );
	$lines[] = '';

	/**
	 * Respect WordPress Search Engine Visibility setting.
	 */
	if ( ! $public || ! empty( $settings['disable_search_engine_visibility'] ) ) {
		$lines[] = 'User-agent: *';
		$lines[] = 'Disallow: /';
		$lines[] = '';

		if ( ! empty( $settings['add_sitemap'] ) ) {
			foreach ( rx_robots_get_sitemaps() as $sitemap ) {
				$lines[] = 'Sitemap: ' . esc_url_raw( $sitemap );
			}
		}

		return implode( "\n", array_unique( $lines ) ) . "\n";
	}

	$lines[] = 'User-agent: *';

	foreach ( rx_robots_common_rules() as $rule ) {
		$lines[] = $rule;
	}

	if ( ! empty( $settings['custom_robots_rules'] ) && is_array( $settings['custom_robots_rules'] ) ) {
		$lines[] = '';
		$lines[] = '# Custom RX Rules';

		foreach ( $settings['custom_robots_rules'] as $custom_rule ) {
			$custom_rule = trim( wp_strip_all_tags( (string) $custom_rule ) );

			if ( '' !== $custom_rule ) {
				$lines[] = $custom_rule;
			}
		}
	}

	if ( ! empty( $settings['block_ai_crawlers'] ) ) {
		$lines[] = '';
		$lines[] = '# Block AI Crawlers';
		$lines   = array_merge( $lines, rx_robots_build_block_section( rx_robots_ai_crawlers() ) );
	}

	if ( ! empty( $settings['block_bad_bots'] ) ) {
		$lines[] = '';
		$lines[] = '# Block Aggressive SEO / Scraper Bots';
		$lines   = array_merge( $lines, rx_robots_build_block_section( rx_robots_bad_bots() ) );
	}

	if ( ! empty( $settings['add_sitemap'] ) ) {
		$lines[] = '';
		$lines[] = '# Sitemaps';

		foreach ( rx_robots_get_sitemaps() as $sitemap ) {
			$lines[] = 'Sitemap: ' . esc_url_raw( $sitemap );
		}
	}

	$lines[] = '';
	$lines[] = '# End RX Theme Robots.txt';

	/**
	 * Filter final robots.txt lines.
	 *
	 * @param array $lines Robots lines.
	 */
	$lines = apply_filters( 'rx_robots_txt_lines', $lines );

	$lines = array_map( 'trim', $lines );

	return implode( "\n", $lines ) . "\n";
}
add_filter( 'robots_txt', 'rx_robots_txt_output', 99, 2 );

/**
 * Build robots meta directives.
 *
 * @return array
 */
function rx_robots_meta_directives() {
	$settings   = rx_robots_get_settings();
	$directives = array();

	if ( ! rx_robots_is_enabled() || empty( $settings['meta_robots_enabled'] ) ) {
		return $directives;
	}

	/**
	 * Default positive directives for indexable pages.
	 */
	$directives['index']  = true;
	$directives['follow'] = true;

	/**
	 * Search result pages.
	 */
	if ( is_search() && ! empty( $settings['noindex_search_results'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;

		if ( ! empty( $settings['nofollow_search_results'] ) ) {
			unset( $directives['follow'] );
			$directives['nofollow'] = true;
		}

		if ( ! empty( $settings['noarchive_search_results'] ) ) {
			$directives['noarchive'] = true;
		}

		if ( ! empty( $settings['nosnippet_search_results'] ) ) {
			$directives['nosnippet'] = true;
		}
	}

	/**
	 * 404 pages.
	 */
	if ( is_404() && ! empty( $settings['noindex_404'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Author archives.
	 */
	if ( is_author() && ! empty( $settings['noindex_author_archives'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Date archives.
	 */
	if ( is_date() && ! empty( $settings['noindex_date_archives'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Tag archives.
	 */
	if ( is_tag() && ! empty( $settings['noindex_tag_archives'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Paged archives.
	 */
	if ( is_paged() && ! empty( $settings['noindex_paged_archives'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Attachment pages.
	 */
	if ( is_attachment() && ! empty( $settings['noindex_attachment_pages'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Feeds.
	 */
	if ( is_feed() && ! empty( $settings['noindex_feed_pages'] ) ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	/**
	 * Noindex selected post types.
	 */
	if ( is_singular() && ! empty( $settings['extra_noindex_post_types'] ) && is_array( $settings['extra_noindex_post_types'] ) ) {
		$post_type = get_post_type();

		if ( in_array( $post_type, $settings['extra_noindex_post_types'], true ) ) {
			unset( $directives['index'] );
			$directives['noindex'] = true;
		}
	}

	/**
	 * Noindex selected taxonomies.
	 */
	if ( is_tax() && ! empty( $settings['extra_noindex_taxonomies'] ) && is_array( $settings['extra_noindex_taxonomies'] ) ) {
		$term = get_queried_object();

		if ( $term && ! empty( $term->taxonomy ) && in_array( $term->taxonomy, $settings['extra_noindex_taxonomies'], true ) ) {
			unset( $directives['index'] );
			$directives['noindex'] = true;
		}
	}

	/**
	 * Snippet/image/video preview directives.
	 */
	if ( isset( $settings['max_snippet'] ) ) {
		$max_snippet = intval( $settings['max_snippet'] );

		if ( -1 === $max_snippet || $max_snippet >= 0 ) {
			$directives[ 'max-snippet:' . $max_snippet ] = true;
		}
	}

	if ( ! empty( $settings['max_image_preview'] ) ) {
		$allowed_image_preview = array( 'none', 'standard', 'large' );
		$image_preview         = sanitize_key( $settings['max_image_preview'] );

		if ( in_array( $image_preview, $allowed_image_preview, true ) ) {
			$directives[ 'max-image-preview:' . $image_preview ] = true;
		}
	}

	if ( isset( $settings['max_video_preview'] ) ) {
		$max_video_preview = intval( $settings['max_video_preview'] );

		if ( -1 === $max_video_preview || $max_video_preview >= 0 ) {
			$directives[ 'max-video-preview:' . $max_video_preview ] = true;
		}
	}

	/**
	 * Filter RX robots meta directives.
	 *
	 * Example:
	 * add_filter( 'rx_robots_meta_directives', function( $directives ) {
	 *     if ( is_page( 'thank-you' ) ) {
	 *         unset( $directives['index'] );
	 *         $directives['noindex'] = true;
	 *     }
	 *     return $directives;
	 * });
	 *
	 * @param array $directives Robots directives.
	 */
	return apply_filters( 'rx_robots_meta_directives', $directives );
}

/**
 * Add RX directives to WordPress robots array.
 *
 * @param array $robots WordPress robots directives.
 * @return array
 */
function rx_robots_wp_meta_filter( $robots ) {
	if ( ! rx_robots_is_enabled() ) {
		return $robots;
	}

	$rx_directives = rx_robots_meta_directives();

	if ( empty( $rx_directives ) ) {
		return $robots;
	}

	foreach ( $rx_directives as $directive => $enabled ) {
		if ( ! $enabled ) {
			continue;
		}

		/**
		 * Handle noindex/index conflict.
		 */
		if ( 'noindex' === $directive ) {
			unset( $robots['index'] );
		}

		if ( 'index' === $directive ) {
			unset( $robots['noindex'] );
		}

		if ( 'nofollow' === $directive ) {
			unset( $robots['follow'] );
		}

		if ( 'follow' === $directive ) {
			unset( $robots['nofollow'] );
		}

		$robots[ $directive ] = true;
	}

	return $robots;
}
add_filter( 'wp_robots', 'rx_robots_wp_meta_filter', 99 );

/**
 * Output fallback robots meta manually.
 *
 * Usually WordPress wp_robots() handles this.
 * This fallback helps older themes or custom heads.
 *
 * @return void
 */
function rx_robots_manual_meta_tag() {
	if ( ! rx_robots_is_enabled() ) {
		return;
	}

	if ( function_exists( 'wp_robots' ) ) {
		return;
	}

	$directives = rx_robots_meta_directives();

	if ( empty( $directives ) ) {
		return;
	}

	$values = array();

	foreach ( $directives as $directive => $enabled ) {
		if ( $enabled ) {
			$values[] = esc_attr( $directive );
		}
	}

	if ( empty( $values ) ) {
		return;
	}

	echo "\n" . '<meta name="robots" content="' . esc_attr( implode( ', ', $values ) ) . '">' . "\n";
}
add_action( 'wp_head', 'rx_robots_manual_meta_tag', 1 );

/**
 * Add X-Robots-Tag HTTP header.
 *
 * @return void
 */
function rx_robots_x_header() {
	if ( headers_sent() || ! rx_robots_is_enabled() ) {
		return;
	}

	$settings = rx_robots_get_settings();

	if ( empty( $settings['x_robots_header_enabled'] ) ) {
		return;
	}

	$directives = rx_robots_meta_directives();

	if ( empty( $directives ) ) {
		return;
	}

	$values = array();

	foreach ( $directives as $directive => $enabled ) {
		if ( $enabled ) {
			$values[] = $directive;
		}
	}

	if ( empty( $values ) ) {
		return;
	}

	header( 'X-Robots-Tag: ' . implode( ', ', array_unique( $values ) ), true );
}
add_action( 'template_redirect', 'rx_robots_x_header', 1 );

/**
 * X-Robots-Tag for media attachments.
 *
 * Useful when you want PDFs, docs, or media files not indexed directly.
 *
 * @param array $headers Headers.
 * @param int   $post_id Attachment ID.
 * @param array $meta Attachment meta.
 * @return array
 */
function rx_robots_media_headers( $headers, $post_id, $meta ) {
	$settings = rx_robots_get_settings();

	if ( empty( $settings['x_robots_header_media'] ) ) {
		return $headers;
	}

	$mime = get_post_mime_type( $post_id );

	$noindex_mimes = array(
		'application/pdf',
		'application/msword',
		'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
		'application/vnd.ms-excel',
		'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
		'application/vnd.ms-powerpoint',
		'application/vnd.openxmlformats-officedocument.presentationml.presentation',
	);

	/**
	 * Filter media MIME types that should receive X-Robots-Tag.
	 *
	 * @param array $noindex_mimes MIME types.
	 */
	$noindex_mimes = apply_filters( 'rx_robots_media_noindex_mimes', $noindex_mimes );

	if ( in_array( $mime, $noindex_mimes, true ) ) {
		$headers['X-Robots-Tag'] = 'noindex, nofollow';
	}

	return $headers;
}
add_filter( 'wp_get_attachment_url_headers', 'rx_robots_media_headers', 10, 3 );

/**
 * Add canonical safety for noindex pages.
 *
 * This does not print canonical itself.
 * It only gives you a condition helper for other SEO files.
 *
 * @return bool
 */
function rx_robots_current_page_is_noindex() {
	$directives = rx_robots_meta_directives();

	return ! empty( $directives['noindex'] );
}

/**
 * Helper: Should current page be indexable?
 *
 * @return bool
 */
function rx_robots_current_page_is_indexable() {
	return ! rx_robots_current_page_is_noindex();
}

/**
 * Add custom body class for debugging indexability.
 *
 * Only visible to admins.
 *
 * @param array $classes Body classes.
 * @return array
 */
function rx_robots_body_class_debug( $classes ) {
	if ( ! current_user_can( 'manage_options' ) ) {
		return $classes;
	}

	if ( rx_robots_current_page_is_noindex() ) {
		$classes[] = 'rx-robots-noindex';
	} else {
		$classes[] = 'rx-robots-indexable';
	}

	return $classes;
}
add_filter( 'body_class', 'rx_robots_body_class_debug' );

/**
 * Add admin bar indicator.
 *
 * @param WP_Admin_Bar $wp_admin_bar Admin bar object.
 * @return void
 */
function rx_robots_admin_bar_indicator( $wp_admin_bar ) {
	if ( ! is_admin_bar_showing() || ! current_user_can( 'manage_options' ) ) {
		return;
	}

	$title = rx_robots_current_page_is_noindex() ? 'RX SEO: Noindex' : 'RX SEO: Indexable';

	$wp_admin_bar->add_node(
		array(
			'id'    => 'rx-robots-status',
			'title' => esc_html( $title ),
			'href'  => esc_url( home_url( '/robots.txt' ) ),
			'meta'  => array(
				'title' => esc_attr__( 'View robots.txt', 'rx-theme' ),
			),
		)
	);
}
add_action( 'admin_bar_menu', 'rx_robots_admin_bar_indicator', 100 );

/**
 * Register robots settings.
 *
 * This prepares the option for a future settings page.
 *
 * @return void
 */
function rx_robots_register_settings() {
	register_setting(
		'rx_theme_robots',
		RX_ROBOTS_OPTION_KEY,
		array(
			'type'              => 'array',
			'sanitize_callback' => 'rx_robots_sanitize_settings',
			'default'           => rx_robots_default_settings(),
		)
	);
}
add_action( 'admin_init', 'rx_robots_register_settings' );

/**
 * Sanitize robots settings.
 *
 * @param array $input Raw input.
 * @return array
 */
function rx_robots_sanitize_settings( $input ) {
	$defaults = rx_robots_default_settings();

	if ( ! is_array( $input ) ) {
		return $defaults;
	}

	$output = array();

	$boolean_keys = array(
		'enabled',
		'disable_search_engine_visibility',
		'robots_txt_enabled',
		'add_sitemap',
		'add_wp_sitemap',
		'add_custom_sitemaps',
		'crawl_delay_enabled',
		'disallow_wp_admin',
		'allow_admin_ajax',
		'disallow_wp_includes',
		'disallow_trackbacks',
		'disallow_feeds',
		'disallow_comments_feed',
		'disallow_search',
		'disallow_author_archives',
		'disallow_date_archives',
		'disallow_tag_archives',
		'disallow_attachment_pages',
		'disallow_preview_urls',
		'disallow_query_strings',
		'disallow_login',
		'disallow_register',
		'disallow_cart_checkout',
		'disallow_user_account',
		'disallow_private_paths',
		'block_ai_crawlers',
		'block_bad_bots',
		'meta_robots_enabled',
		'noindex_search_results',
		'noindex_404',
		'noindex_paged_archives',
		'noindex_author_archives',
		'noindex_date_archives',
		'noindex_tag_archives',
		'noindex_attachment_pages',
		'noindex_feed_pages',
		'nofollow_search_results',
		'noarchive_search_results',
		'nosnippet_search_results',
		'x_robots_header_enabled',
		'x_robots_header_media',
	);

	foreach ( $boolean_keys as $key ) {
		$output[ $key ] = ! empty( $input[ $key ] );
	}

	$output['crawl_delay']       = isset( $input['crawl_delay'] ) ? absint( $input['crawl_delay'] ) : $defaults['crawl_delay'];
	$output['max_snippet']       = isset( $input['max_snippet'] ) ? intval( $input['max_snippet'] ) : $defaults['max_snippet'];
	$output['max_video_preview'] = isset( $input['max_video_preview'] ) ? intval( $input['max_video_preview'] ) : $defaults['max_video_preview'];

	$allowed_image_preview = array( 'none', 'standard', 'large' );
	$image_preview         = isset( $input['max_image_preview'] ) ? sanitize_key( $input['max_image_preview'] ) : $defaults['max_image_preview'];

	$output['max_image_preview'] = in_array( $image_preview, $allowed_image_preview, true ) ? $image_preview : 'large';

	$output['custom_robots_rules'] = array();

	if ( ! empty( $input['custom_robots_rules'] ) && is_array( $input['custom_robots_rules'] ) ) {
		foreach ( $input['custom_robots_rules'] as $rule ) {
			$rule = trim( wp_strip_all_tags( (string) $rule ) );

			if ( '' !== $rule ) {
				$output['custom_robots_rules'][] = $rule;
			}
		}
	}

	$output['custom_sitemaps'] = array();

	if ( ! empty( $input['custom_sitemaps'] ) && is_array( $input['custom_sitemaps'] ) ) {
		foreach ( $input['custom_sitemaps'] as $sitemap ) {
			$sitemap = esc_url_raw( $sitemap );

			if ( ! empty( $sitemap ) ) {
				$output['custom_sitemaps'][] = $sitemap;
			}
		}
	}

	$output['extra_noindex_post_types'] = array();

	if ( ! empty( $input['extra_noindex_post_types'] ) && is_array( $input['extra_noindex_post_types'] ) ) {
		foreach ( $input['extra_noindex_post_types'] as $post_type ) {
			$post_type = sanitize_key( $post_type );

			if ( post_type_exists( $post_type ) ) {
				$output['extra_noindex_post_types'][] = $post_type;
			}
		}
	}

	$output['extra_noindex_taxonomies'] = array();

	if ( ! empty( $input['extra_noindex_taxonomies'] ) && is_array( $input['extra_noindex_taxonomies'] ) ) {
		foreach ( $input['extra_noindex_taxonomies'] as $taxonomy ) {
			$taxonomy = sanitize_key( $taxonomy );

			if ( taxonomy_exists( $taxonomy ) ) {
				$output['extra_noindex_taxonomies'][] = $taxonomy;
			}
		}
	}

	return wp_parse_args( $output, $defaults );
}

/**
 * Add robots.txt rewrite support.
 *
 * WordPress normally handles virtual robots.txt.
 * This flush helper is useful when the theme is activated.
 *
 * @return void
 */
function rx_robots_theme_activation_flush() {
	flush_rewrite_rules();
}
add_action( 'after_switch_theme', 'rx_robots_theme_activation_flush' );

/**
 * Optional helper: return robots.txt preview as string.
 *
 * Useful for future admin preview page.
 *
 * @return string
 */
function rx_robots_get_preview() {
	$public = (bool) get_option( 'blog_public', 1 );

	return rx_robots_txt_output( '', $public );
}

/**
 * Optional helper: print robots preview in admin-safe format.
 *
 * @return void
 */
function rx_robots_print_preview() {
	if ( ! current_user_can( 'manage_options' ) ) {
		return;
	}

	echo '<pre style="white-space:pre-wrap;max-height:500px;overflow:auto;background:#fff;border:1px solid #ccd0d4;padding:15px;">';
	echo esc_html( rx_robots_get_preview() );
	echo '</pre>';
}

/**
 * Disable attachment page indexing more strictly by redirecting attachment pages.
 *
 * This is SEO-safe for most content websites.
 *
 * @return void
 */
function rx_robots_maybe_redirect_attachment_pages() {
	if ( ! is_attachment() ) {
		return;
	}

	$settings = rx_robots_get_settings();

	if ( empty( $settings['noindex_attachment_pages'] ) ) {
		return;
	}

	$attachment_id = get_queried_object_id();
	$parent_id     = wp_get_post_parent_id( $attachment_id );

	if ( $parent_id ) {
		wp_safe_redirect( get_permalink( $parent_id ), 301 );
		exit;
	}

	$file_url = wp_get_attachment_url( $attachment_id );

	if ( $file_url ) {
		wp_safe_redirect( $file_url, 301 );
		exit;
	}
}
add_action( 'template_redirect', 'rx_robots_maybe_redirect_attachment_pages', 20 );

/**
 * Add noindex header to REST API search-like endpoints if needed.
 *
 * @return void
 */
function rx_robots_rest_headers() {
	if ( ! rx_robots_is_enabled() || headers_sent() ) {
		return;
	}

	if ( ! defined( 'REST_REQUEST' ) || ! REST_REQUEST ) {
		return;
	}

	header( 'X-Robots-Tag: noindex, nofollow', false );
}
add_action( 'rest_api_init', 'rx_robots_rest_headers', 1 );

/**
 * Add noindex to oEmbed responses.
 *
 * @return void
 */
function rx_robots_oembed_noindex_header() {
	if ( headers_sent() ) {
		return;
	}

	header( 'X-Robots-Tag: noindex, follow', false );
}
add_action( 'embed_head', 'rx_robots_oembed_noindex_header', 1 );

/**
 * Developer shortcut:
 * Force noindex for a specific page by post meta.
 *
 * Meta key:
 * _rx_robots_noindex = 1
 *
 * @param array $directives Directives.
 * @return array
 */
function rx_robots_post_meta_noindex( $directives ) {
	if ( ! is_singular() ) {
		return $directives;
	}

	$post_id = get_queried_object_id();

	if ( ! $post_id ) {
		return $directives;
	}

	$noindex = get_post_meta( $post_id, '_rx_robots_noindex', true );

	if ( '1' === (string) $noindex ) {
		unset( $directives['index'] );
		$directives['noindex'] = true;
	}

	$nofollow = get_post_meta( $post_id, '_rx_robots_nofollow', true );

	if ( '1' === (string) $nofollow ) {
		unset( $directives['follow'] );
		$directives['nofollow'] = true;
	}

	return $directives;
}
add_filter( 'rx_robots_meta_directives', 'rx_robots_post_meta_noindex', 20 );

/**
 * Add post meta boxes for per-post robots control.
 *
 * @return void
 */
function rx_robots_add_meta_box() {
	$post_types = get_post_types(
		array(
			'public' => true,
		),
		'names'
	);

	foreach ( $post_types as $post_type ) {
		add_meta_box(
			'rx_robots_meta_box',
			__( 'RX Robots SEO', 'rx-theme' ),
			'rx_robots_render_meta_box',
			$post_type,
			'side',
			'default'
		);
	}
}
add_action( 'add_meta_boxes', 'rx_robots_add_meta_box' );

/**
 * Render per-post robots meta box.
 *
 * @param WP_Post $post Current post.
 * @return void
 */
function rx_robots_render_meta_box( $post ) {
	wp_nonce_field( 'rx_robots_save_meta', 'rx_robots_meta_nonce' );

	$noindex  = get_post_meta( $post->ID, '_rx_robots_noindex', true );
	$nofollow = get_post_meta( $post->ID, '_rx_robots_nofollow', true );

	echo '<p>';
	echo '<label>';
	echo '<input type="checkbox" name="rx_robots_noindex" value="1" ' . checked( '1', $noindex, false ) . '> ';
	echo esc_html__( 'Noindex this content', 'rx-theme' );
	echo '</label>';
	echo '</p>';

	echo '<p>';
	echo '<label>';
	echo '<input type="checkbox" name="rx_robots_nofollow" value="1" ' . checked( '1', $nofollow, false ) . '> ';
	echo esc_html__( 'Nofollow links on this content', 'rx-theme' );
	echo '</label>';
	echo '</p>';

	echo '<p style="font-size:12px;color:#646970;">';
	echo esc_html__( 'Use carefully. Noindex removes this URL from search results over time.', 'rx-theme' );
	echo '</p>';
}

/**
 * Save per-post robots meta.
 *
 * @param int $post_id Post ID.
 * @return void
 */
function rx_robots_save_meta_box( $post_id ) {
	if ( ! isset( $_POST['rx_robots_meta_nonce'] ) ) {
		return;
	}

	if ( ! wp_verify_nonce( sanitize_text_field( wp_unslash( $_POST['rx_robots_meta_nonce'] ) ), 'rx_robots_save_meta' ) ) {
		return;
	}

	if ( defined( 'DOING_AUTOSAVE' ) && DOING_AUTOSAVE ) {
		return;
	}

	if ( ! current_user_can( 'edit_post', $post_id ) ) {
		return;
	}

	$noindex  = isset( $_POST['rx_robots_noindex'] ) ? '1' : '0';
	$nofollow = isset( $_POST['rx_robots_nofollow'] ) ? '1' : '0';

	update_post_meta( $post_id, '_rx_robots_noindex', $noindex );
	update_post_meta( $post_id, '_rx_robots_nofollow', $nofollow );
}
add_action( 'save_post', 'rx_robots_save_meta_box' );

Best safe default

For your medical/SEO website, keep these false unless you really need them:

'block_ai_crawlers'      => false,
'block_bad_bots'         => false,
'disallow_feeds'         => false,
'disallow_query_strings' => false,

Because blocking too much can reduce crawling, indexing, rich results, feed discovery, and SEO visibility.

The most important safe SEO defaults are already enabled:

'disallow_wp_admin'         => true,
'allow_admin_ajax'          => true,
'disallow_search'           => true,
'noindex_search_results'    => true,
'noindex_404'               => true,
'noindex_attachment_pages'  => true,
'add_sitemap'               => true,
'add_wp_sitemap'            => true,
'max_image_preview'         => 'large',

After adding the file, visit:

https://yourdomain.com/robots.txt

For your site, it will be:

https://rxharun.com/robots.txt

Leave a Reply

Your email address will not be published. Required fields are marked *