canonical.php

I’ll build this as a strong standalone inc/seo/canonical.php module for your RX theme, with canonical URL generation, duplicate URL cleanup, pagination handling, feeds/search/archives logic, REST/head safety, and optional redirects.

Below is a full advanced canonical.php file for your RX theme.

Place it here:

rx-theme/inc/seo/canonical.php

Then include it from functions.php:

require_once get_template_directory() . '/inc/seo/canonical.php';

inc/seo/canonical.php

<?php
/**
 * RX Theme Canonical URL System
 *
 * File: inc/seo/canonical.php
 *
 * Features:
 * - Advanced canonical tag output
 * - Homepage canonical
 * - Singular post/page/custom post canonical
 * - Category/tag/taxonomy canonical
 * - Author archive canonical
 * - Date archive canonical
 * - Search canonical
 * - Post type archive canonical
 * - Pagination canonical
 * - Multipage post canonical
 * - Attachment canonical handling
 * - Query string cleanup
 * - UTM / tracking parameter removal
 * - Optional canonical redirects
 * - Optional duplicate URL cleanup
 * - REST, feed, admin, preview, and robots-safe behavior
 *
 * @package RX_Theme
 */

defined( 'ABSPATH' ) || exit;

if ( ! class_exists( 'RX_Theme_Canonical' ) ) :

final class RX_Theme_Canonical {

	/**
	 * Canonical URL cache.
	 *
	 * @var string|null
	 */
	private static $canonical_url = null;

	/**
	 * Query parameters that should never appear in canonical URLs.
	 *
	 * @var array
	 */
	private static $blocked_query_args = array(
		'utm_source',
		'utm_medium',
		'utm_campaign',
		'utm_term',
		'utm_content',
		'utm_id',
		'utm_name',
		'utm_cid',
		'utm_reader',
		'utm_viz_id',
		'utm_pubreferrer',
		'utm_swu',
		'fbclid',
		'gclid',
		'gbraid',
		'wbraid',
		'msclkid',
		'yclid',
		'_ga',
		'_gl',
		'mc_cid',
		'mc_eid',
		'igshid',
		'ref',
		'ref_src',
		'source',
		'campaign',
		'medium',
		'fb_action_ids',
		'fb_action_types',
		'fb_source',
		'action_object_map',
		'action_type_map',
		'action_ref_map',
		'PHPSESSID',
		'sessionid',
		'sid',
		'preview',
		'preview_id',
		'preview_nonce',
		'doing_wp_cron',
	);

	/**
	 * Allowed query args that may remain in canonical URLs.
	 *
	 * Keep this short. Canonical URLs should normally be clean.
	 *
	 * @var array
	 */
	private static $allowed_query_args = array(
		's',
	);

	/**
	 * Initialize hooks.
	 *
	 * @return void
	 */
	public static function init() {

		/**
		 * Remove default WordPress canonical tag.
		 * We output our own advanced canonical tag.
		 */
		remove_action( 'wp_head', 'rel_canonical' );

		add_action( 'wp_head', array( __CLASS__, 'output_canonical_tag' ), 1 );

		/**
		 * Optional redirects.
		 * Disabled by default through filter safety.
		 */
		add_action( 'template_redirect', array( __CLASS__, 'maybe_redirect_to_canonical' ), 1 );

		/**
		 * Expose canonical through useful filters.
		 */
		add_filter( 'get_canonical_url', array( __CLASS__, 'filter_wp_canonical_url' ), 20, 2 );
	}

	/**
	 * Output canonical tag in wp_head.
	 *
	 * @return void
	 */
	public static function output_canonical_tag() {

		if ( ! self::should_output_canonical() ) {
			return;
		}

		$canonical = self::get_canonical_url();

		if ( empty( $canonical ) ) {
			return;
		}

		printf(
			'<link rel="canonical" href="%s" />' . "\n",
			esc_url( $canonical )
		);
	}

	/**
	 * Decide whether canonical tag should be output.
	 *
	 * @return bool
	 */
	private static function should_output_canonical() {

		if ( is_admin() ) {
			return false;
		}

		if ( wp_doing_ajax() ) {
			return false;
		}

		if ( defined( 'REST_REQUEST' ) && REST_REQUEST ) {
			return false;
		}

		if ( is_feed() || is_trackback() || is_robots() ) {
			return false;
		}

		if ( is_preview() ) {
			return false;
		}

		if ( is_404() ) {
			return false;
		}

		/**
		 * Allow theme/plugin to disable canonical.
		 *
		 * Example:
		 * add_filter( 'rx_theme_should_output_canonical', '__return_false' );
		 */
		return (bool) apply_filters( 'rx_theme_should_output_canonical', true );
	}

	/**
	 * Main canonical URL generator.
	 *
	 * @return string
	 */
	public static function get_canonical_url() {

		if ( null !== self::$canonical_url ) {
			return self::$canonical_url;
		}

		$url = '';

		if ( is_front_page() ) {
			$url = self::get_front_page_canonical();
		} elseif ( is_home() ) {
			$url = self::get_blog_home_canonical();
		} elseif ( is_singular() ) {
			$url = self::get_singular_canonical();
		} elseif ( is_category() || is_tag() || is_tax() ) {
			$url = self::get_taxonomy_canonical();
		} elseif ( is_post_type_archive() ) {
			$url = self::get_post_type_archive_canonical();
		} elseif ( is_author() ) {
			$url = self::get_author_canonical();
		} elseif ( is_date() ) {
			$url = self::get_date_archive_canonical();
		} elseif ( is_search() ) {
			$url = self::get_search_canonical();
		} elseif ( is_archive() ) {
			$url = self::get_generic_archive_canonical();
		}

		$url = self::normalize_url( $url );
		$url = self::remove_unwanted_query_args( $url );
		$url = self::maybe_add_pagination_to_canonical( $url );

		/**
		 * Final canonical URL filter.
		 *
		 * @param string $url Canonical URL.
		 */
		self::$canonical_url = apply_filters( 'rx_theme_canonical_url', $url );

		return self::$canonical_url;
	}

	/**
	 * Homepage canonical.
	 *
	 * @return string
	 */
	private static function get_front_page_canonical() {
		return home_url( '/' );
	}

	/**
	 * Blog page canonical.
	 *
	 * @return string
	 */
	private static function get_blog_home_canonical() {

		$page_for_posts = (int) get_option( 'page_for_posts' );

		if ( $page_for_posts > 0 ) {
			return get_permalink( $page_for_posts );
		}

		return home_url( '/' );
	}

	/**
	 * Singular canonical.
	 *
	 * @return string
	 */
	private static function get_singular_canonical() {

		$post_id = get_queried_object_id();

		if ( ! $post_id ) {
			return '';
		}

		/**
		 * Attachment pages are often thin/duplicate.
		 * Canonical them to parent post if parent exists.
		 */
		if ( is_attachment() ) {
			$parent_id = wp_get_post_parent_id( $post_id );

			if ( $parent_id ) {
				return get_permalink( $parent_id );
			}
		}

		$url = get_permalink( $post_id );

		/**
		 * Multipage post/page support:
		 * example: /my-post/2/
		 */
		$page = self::get_current_multipage_number();

		if ( $page > 1 ) {
			$url = self::add_page_number_to_url( $url, $page );
		}

		return $url;
	}

	/**
	 * Taxonomy canonical.
	 *
	 * @return string
	 */
	private static function get_taxonomy_canonical() {

		$term = get_queried_object();

		if ( empty( $term ) || is_wp_error( $term ) || empty( $term->term_id ) ) {
			return '';
		}

		$url = get_term_link( $term );

		if ( is_wp_error( $url ) ) {
			return '';
		}

		return $url;
	}

	/**
	 * Custom post type archive canonical.
	 *
	 * @return string
	 */
	private static function get_post_type_archive_canonical() {

		$post_type = get_query_var( 'post_type' );

		if ( is_array( $post_type ) ) {
			$post_type = reset( $post_type );
		}

		if ( empty( $post_type ) ) {
			return '';
		}

		$url = get_post_type_archive_link( $post_type );

		return $url ? $url : '';
	}

	/**
	 * Author archive canonical.
	 *
	 * @return string
	 */
	private static function get_author_canonical() {

		$author_id = get_queried_object_id();

		if ( ! $author_id ) {
			return '';
		}

		return get_author_posts_url( $author_id );
	}

	/**
	 * Date archive canonical.
	 *
	 * @return string
	 */
	private static function get_date_archive_canonical() {

		$year  = get_query_var( 'year' );
		$month = get_query_var( 'monthnum' );
		$day   = get_query_var( 'day' );

		if ( $year && $month && $day ) {
			return get_day_link( $year, $month, $day );
		}

		if ( $year && $month ) {
			return get_month_link( $year, $month );
		}

		if ( $year ) {
			return get_year_link( $year );
		}

		return '';
	}

	/**
	 * Search result canonical.
	 *
	 * @return string
	 */
	private static function get_search_canonical() {

		$search_query = get_search_query( false );

		if ( '' === $search_query ) {
			return home_url( '/' );
		}

		/**
		 * Search pages can be noindexed by SEO plugins.
		 * Still a canonical can be useful to clean tracking params.
		 */
		return add_query_arg(
			array(
				's' => rawurlencode( $search_query ),
			),
			home_url( '/' )
		);
	}

	/**
	 * Generic archive fallback canonical.
	 *
	 * @return string
	 */
	private static function get_generic_archive_canonical() {

		global $wp;

		if ( empty( $wp->request ) ) {
			return home_url( '/' );
		}

		return home_url( trailingslashit( $wp->request ) );
	}

	/**
	 * Add pagination to canonical where needed.
	 *
	 * @param string $url Canonical URL.
	 * @return string
	 */
	private static function maybe_add_pagination_to_canonical( $url ) {

		if ( empty( $url ) ) {
			return '';
		}

		if ( is_singular() ) {
			return $url;
		}

		$paged = self::get_current_paged_number();

		if ( $paged <= 1 ) {
			return $url;
		}

		return self::add_page_number_to_url( $url, $paged );
	}

	/**
	 * Get current archive pagination number.
	 *
	 * @return int
	 */
	private static function get_current_paged_number() {

		$paged = (int) get_query_var( 'paged' );

		if ( $paged < 1 ) {
			$paged = 1;
		}

		return $paged;
	}

	/**
	 * Get current singular multipage number.
	 *
	 * @return int
	 */
	private static function get_current_multipage_number() {

		$page = (int) get_query_var( 'page' );

		if ( $page < 1 ) {
			$page = 1;
		}

		return $page;
	}

	/**
	 * Add /page/2/ or /2/ depending on context/permalink structure.
	 *
	 * @param string $url  Base URL.
	 * @param int    $page Page number.
	 * @return string
	 */
	private static function add_page_number_to_url( $url, $page ) {

		$page = absint( $page );

		if ( $page <= 1 || empty( $url ) ) {
			return $url;
		}

		$using_permalinks = (bool) get_option( 'permalink_structure' );

		if ( ! $using_permalinks ) {
			if ( is_singular() ) {
				return add_query_arg( 'page', $page, $url );
			}

			return add_query_arg( 'paged', $page, $url );
		}

		$url = trailingslashit( $url );

		if ( is_singular() ) {
			return user_trailingslashit( $url . $page, 'single_paged' );
		}

		return user_trailingslashit( $url . 'page/' . $page, 'paged' );
	}

	/**
	 * Normalize canonical URL.
	 *
	 * @param string $url URL.
	 * @return string
	 */
	private static function normalize_url( $url ) {

		if ( empty( $url ) ) {
			return '';
		}

		$url = html_entity_decode( $url );
		$url = esc_url_raw( $url );

		/**
		 * Force site scheme.
		 */
		$url = set_url_scheme( $url, is_ssl() ? 'https' : parse_url( home_url(), PHP_URL_SCHEME ) );

		/**
		 * Remove URL fragments.
		 */
		$url = strtok( $url, '#' );

		/**
		 * Normalize trailing slash using WordPress permalink settings.
		 */
		if ( self::should_have_trailing_slash( $url ) ) {
			$url = trailingslashit( $url );
		}

		return $url;
	}

	/**
	 * Determine if URL should have trailing slash.
	 *
	 * @param string $url URL.
	 * @return bool
	 */
	private static function should_have_trailing_slash( $url ) {

		$permalink_structure = get_option( 'permalink_structure' );

		if ( empty( $permalink_structure ) ) {
			return false;
		}

		$path = wp_parse_url( $url, PHP_URL_PATH );

		if ( empty( $path ) ) {
			return true;
		}

		/**
		 * Do not force trailing slash on files.
		 */
		if ( preg_match( '/\.[a-zA-Z0-9]{2,8}$/', $path ) ) {
			return false;
		}

		return true;
	}

	/**
	 * Remove blocked query args from canonical URL.
	 *
	 * @param string $url URL.
	 * @return string
	 */
	private static function remove_unwanted_query_args( $url ) {

		if ( empty( $url ) ) {
			return '';
		}

		$blocked = apply_filters(
			'rx_theme_canonical_blocked_query_args',
			self::$blocked_query_args
		);

		$allowed = apply_filters(
			'rx_theme_canonical_allowed_query_args',
			self::$allowed_query_args
		);

		$parts = wp_parse_url( $url );

		if ( empty( $parts['query'] ) ) {
			return $url;
		}

		parse_str( $parts['query'], $query_args );

		foreach ( $query_args as $key => $value ) {
			if ( in_array( $key, $blocked, true ) ) {
				unset( $query_args[ $key ] );
				continue;
			}

			if ( ! in_array( $key, $allowed, true ) ) {
				unset( $query_args[ $key ] );
			}
		}

		$clean_url = self::build_url_from_parts( $parts );
		$clean_url = remove_query_arg( array_keys( $query_args ), $clean_url );

		if ( ! empty( $query_args ) ) {
			$clean_url = add_query_arg( $query_args, $clean_url );
		}

		return esc_url_raw( $clean_url );
	}

	/**
	 * Build URL from parsed parts.
	 *
	 * @param array $parts URL parts.
	 * @return string
	 */
	private static function build_url_from_parts( $parts ) {

		$scheme   = isset( $parts['scheme'] ) ? $parts['scheme'] . '://' : '';
		$host     = isset( $parts['host'] ) ? $parts['host'] : '';
		$port     = isset( $parts['port'] ) ? ':' . $parts['port'] : '';
		$user     = isset( $parts['user'] ) ? $parts['user'] : '';
		$pass     = isset( $parts['pass'] ) ? ':' . $parts['pass'] : '';
		$pass     = ( $user || $pass ) ? "$pass@" : '';
		$path     = isset( $parts['path'] ) ? $parts['path'] : '';
		$query    = isset( $parts['query'] ) ? '?' . $parts['query'] : '';
		$fragment = '';

		return "{$scheme}{$user}{$pass}{$host}{$port}{$path}{$query}{$fragment}";
	}

	/**
	 * Optional redirect to canonical.
	 *
	 * Disabled by default.
	 *
	 * Enable with:
	 * add_filter( 'rx_theme_enable_canonical_redirect', '__return_true' );
	 *
	 * @return void
	 */
	public static function maybe_redirect_to_canonical() {

		if ( ! apply_filters( 'rx_theme_enable_canonical_redirect', false ) ) {
			return;
		}

		if ( is_admin() || wp_doing_ajax() || is_preview() || is_feed() || is_404() ) {
			return;
		}

		if ( ! isset( $_SERVER['REQUEST_URI'] ) ) {
			return;
		}

		$canonical = self::get_canonical_url();

		if ( empty( $canonical ) ) {
			return;
		}

		$current_url = self::get_current_url();
		$current_url = self::remove_unwanted_query_args( $current_url );
		$current_url = self::normalize_url( $current_url );

		if ( empty( $current_url ) ) {
			return;
		}

		if ( untrailingslashit( $current_url ) === untrailingslashit( $canonical ) ) {
			return;
		}

		/**
		 * Avoid redirecting search pages too aggressively.
		 */
		if ( is_search() ) {
			return;
		}

		wp_safe_redirect( esc_url_raw( $canonical ), 301 );
		exit;
	}

	/**
	 * Get current full URL.
	 *
	 * @return string
	 */
	private static function get_current_url() {

		$scheme = is_ssl() ? 'https' : 'http';

		$host = isset( $_SERVER['HTTP_HOST'] )
			? sanitize_text_field( wp_unslash( $_SERVER['HTTP_HOST'] ) )
			: wp_parse_url( home_url(), PHP_URL_HOST );

		$request_uri = isset( $_SERVER['REQUEST_URI'] )
			? sanitize_text_field( wp_unslash( $_SERVER['REQUEST_URI'] ) )
			: '/';

		return esc_url_raw( $scheme . '://' . $host . $request_uri );
	}

	/**
	 * Filter WordPress get_canonical_url().
	 *
	 * @param string  $canonical_url Canonical URL.
	 * @param WP_Post $post          Post object.
	 * @return string
	 */
	public static function filter_wp_canonical_url( $canonical_url, $post ) {

		if ( is_singular() ) {
			$rx_url = self::get_canonical_url();

			if ( ! empty( $rx_url ) ) {
				return $rx_url;
			}
		}

		return $canonical_url;
	}
}

RX_Theme_Canonical::init();

endif;

/**
 * Public helper function.
 *
 * Usage:
 * echo esc_url( rx_theme_get_canonical_url() );
 *
 * @return string
 */
if ( ! function_exists( 'rx_theme_get_canonical_url' ) ) {
	function rx_theme_get_canonical_url() {
		if ( class_exists( 'RX_Theme_Canonical' ) ) {
			return RX_Theme_Canonical::get_canonical_url();
		}

		return '';
	}
}

/**
 * Optional helper: add custom blocked query args.
 *
 * Example:
 * add_filter( 'rx_theme_canonical_blocked_query_args', function( $args ) {
 *     $args[] = 'my_tracking_param';
 *     return $args;
 * } );
 */

/**
 * Optional helper: enable canonical redirects.
 *
 * Add this in functions.php only if you want strict SEO redirects:
 *
 * add_filter( 'rx_theme_enable_canonical_redirect', '__return_true' );
 */

Best functions.php loading code

Use this:

/**
 * RX SEO files.
 */
$rx_seo_files = array(
	'/inc/seo/canonical.php',
);

foreach ( $rx_seo_files as $rx_file ) {
	$rx_path = get_template_directory() . $rx_file;

	if ( file_exists( $rx_path ) ) {
		require_once $rx_path;
	}
}

Optional strict canonical redirect

Only add this if your site URL structure is already stable:

add_filter( 'rx_theme_enable_canonical_redirect', '__return_true' );

My recommendation: first use the canonical tag only. After testing your site, then enable the redirect. This prevents accidental redirect problems while developing the RX theme.

Leave a Reply

Your email address will not be published. Required fields are marked *