$buffer->append( $item_array );
// Add as many items to the buffer as possible.
while ( false === $buffer->is_full() ) {
// Retrieve a batch of posts (in order).
$posts = $this->librarian->query_sitemaps_after_id(
// If there were no posts to get, make a note.
if ( null == $posts ) { // phpcs:ignore Universal.Operators.StrictComparisons.LooseEqual -- WPCS: loose comparison ok.
$any_sitemaps_left = false;
// Otherwise, loop through each post in the batch.
foreach ( $posts as $post ) {
// Generate the sitemap XML for the post.
$current_item = $this->sitemap_row_to_index_item( (array) $post );
// Try adding this item to the buffer.
if ( true === $buffer->append( $current_item['xml'] ) ) {
$last_sitemap_id = $post['ID'];
$buffer->view_time( $current_item['last_modified'] );
// Otherwise stop looping through posts.
// If no items were added, return false.
if ( true === $buffer->is_empty() ) {
$this->librarian->store_sitemap_data(
* Now report back with the ID of the last sitemap post ID to
* be successfully added, whether there are any sitemap posts
* left, and the most recent modification time seen.
'last_id' => $last_sitemap_id,
'any_left' => $any_sitemaps_left,
'last_modified' => $buffer->last_modified(),
* Construct the sitemap index url entry for a sitemap row.
* @link https://www.sitemaps.org/protocol.html#sitemapIndex_sitemap
* @param array $row The sitemap data to be processed.
* @return string An XML fragment representing the post URL.
private function sitemap_row_to_index_item( $row ) {
$url = $this->finder->construct_sitemap_url( $row['post_title'] );
'lastmod' => jp_sitemap_datetime( $row['post_date'] ),
'last_modified' => $row['post_date'],
* This is served instead of a 404 when the master sitemap is requested
* @return string The empty sitemap xml.
public function empty_sitemap_xml() {
$empty_sitemap = new Jetpack_Sitemap_Buffer_Empty();
return $empty_sitemap->contents();
* Build and return the news sitemap xml. Note that the result of this
* function is cached in the transient 'jetpack_news_sitemap_xml'.
* @return string The news sitemap xml.
public function news_sitemap_xml() {
$buffer = Jetpack_Sitemap_Buffer_Factory::create(
$the_stored_news_sitemap = get_transient( 'jetpack_news_sitemap_xml' );
if ( false === $the_stored_news_sitemap ) {
$this->logger->report( 'Beginning news sitemap generation.' );
* Filter limit of entries to include in news sitemap.
* @param int $count Number of entries to include in news sitemap.
$item_limit = apply_filters(
'jetpack_sitemap_news_sitemap_count',
JP_NEWS_SITEMAP_MAX_ITEMS
$posts = $this->librarian->query_most_recent_posts( $item_limit );
$buffer->append( array( 'url' => array( 'loc' => home_url( '/' ) ) ) );
foreach ( $posts as $post ) {
$current_item = $this->post_to_news_sitemap_item( $post );
if ( $current_item['xml'] !== null && false === $buffer->append( $current_item['xml'] ) ) {
$this->logger->time( 'End news sitemap generation.' );
$the_stored_news_sitemap = $buffer->contents();
'jetpack_news_sitemap_xml',
$the_stored_news_sitemap,
return $the_stored_news_sitemap;
* Construct the sitemap url entry for a WP_Post.
* @link https://www.sitemaps.org/protocol.html#urldef
* @param object $post The post to be processed. Similar to WP_Post, but without post_content and post_content_filtered.
* @type array $xml An XML fragment representing the post URL.
* @type string $last_modified Date post was last modified.
private function post_to_sitemap_item( $post ) {
* Filter condition to allow skipping specific posts in sitemap.
* @param bool $skip Current boolean. False by default, so no post is skipped.
* @param object $post Current post in the form of a $wpdb result object. Not WP_Post.
* Doesn't have all the properties of a WP_Post.
if ( true === apply_filters( 'jetpack_sitemap_skip_post', false, $post ) ) {
$url = esc_url( get_permalink( $post ) );
* Spec requires the URL to be <=2048 bytes.
* In practice this constraint is unlikely to be violated.
if ( 2048 < strlen( $url ) ) {
$url = home_url() . '/?p=' . $post->ID;
$last_modified = $post->post_modified_gmt;
// Check for more recent comments.
// Note that 'Y-m-d h:i:s' strings sort lexicographically.
if ( 0 < $post->comment_count ) {
$this->librarian->query_latest_approved_comment_time_on_post( $post->ID )
'lastmod' => jp_sitemap_datetime( $last_modified ),
* Filter sitemap URL item before rendering it as XML.
* @param array $tree Associative array representing sitemap URL element.
* @param int $post_id ID of the post being processed.
$item_array = apply_filters( 'jetpack_sitemap_url', $item_array, $post->ID );
'last_modified' => $last_modified,
* Construct the image sitemap url entry for a WP_Post of image type.
* @link https://www.sitemaps.org/protocol.html#urldef
* @param WP_Post $post The image post to be processed.
* @type array $xml An XML fragment representing the post URL.
* @type string $last_modified Date post was last modified.
private function image_post_to_sitemap_item( $post ) {
* Filter condition to allow skipping specific image posts in the sitemap.
* @param bool $skip Current boolean. False by default, so no post is skipped.
* @param WP_POST $post Current post object.
if ( apply_filters( 'jetpack_sitemap_image_skip_post', false, $post ) ) {
$url = wp_get_attachment_url( $post->ID );
// Do not include the image if the attached parent is not published.
// Unattached will be published. Otherwise, will inherit parent status.
if ( 'publish' !== get_post_status( $post ) ) {
$parent_url = get_permalink( get_post( $post->post_parent ) );
if ( '' == $parent_url ) { // phpcs:ignore Universal.Operators.StrictComparisons.LooseEqual -- WPCS: loose comparison ok.
$parent_url = get_permalink( $post );
'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
* Filter associative array with data to build <url> node
* and its descendants for current post in image sitemap.
* @param array $item_array Data to build parent and children nodes for current post.
* @param int $post_id Current image post ID.
$item_array = apply_filters(
'jetpack_sitemap_image_sitemap_item',
'last_modified' => $post->post_modified_gmt,
* Construct the video sitemap url entry for a WP_Post of video type.
* @link https://www.sitemaps.org/protocol.html#urldef
* @link https://developers.google.com/webmasters/videosearch/sitemaps
* @param WP_Post $post The video post to be processed.
* @type array $xml An XML fragment representing the post URL.
* @type string $last_modified Date post was last modified.
private function video_post_to_sitemap_item( $post ) {
* Filter condition to allow skipping specific video posts in the sitemap.
* @param bool $skip Current boolean. False by default, so no post is skipped.
* @param WP_POST $post Current post object.
if ( apply_filters( 'jetpack_sitemap_video_skip_post', false, $post ) ) {
// Do not include the video if the attached parent is not published.
// Unattached will be published. Otherwise, will inherit parent status.
if ( 'publish' !== get_post_status( $post ) ) {
$parent_url = esc_url( get_permalink( get_post( $post->post_parent ) ) );
if ( '' == $parent_url ) { // phpcs:ignore Universal.Operators.StrictComparisons.LooseEqual -- WPCS: loose comparison ok.
$parent_url = esc_url( get_permalink( $post ) );
// Prepare the content like get_the_content_feed().
$content = $post->post_content;
/** This filter is already documented in core/wp-includes/post-template.php */
$content = apply_filters( 'the_content', $content );
/** This filter is already documented in core/wp-includes/feed.php */
$content = apply_filters( 'the_content_feed', $content, 'rss2' );
// Include thumbnails for VideoPress videos, use blank image for others.
if ( 'complete' === get_post_meta( $post->ID, 'videopress_status', true ) && has_post_thumbnail( $post ) ) {
$video_thumbnail_url = get_the_post_thumbnail_url( $post );
* Filter the thumbnail image used in the video sitemap for non-VideoPress videos.
* @param string $str Image URL.
$video_thumbnail_url = apply_filters( 'jetpack_video_sitemap_default_thumbnail', 'https://s0.wp.com/i/blank.jpg' );
'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
/** This filter is already documented in core/wp-includes/feed.php */
'video:title' => apply_filters( 'the_title_rss', $post->post_title ),
'video:thumbnail_loc' => esc_url( $video_thumbnail_url ),
'video:description' => $content,
'video:content_loc' => esc_url( wp_get_attachment_url( $post->ID ) ),
// TODO: Integrate with VideoPress here.
// cf. video:player_loc tag in video sitemap spec.
* Filter associative array with data to build <url> node
* and its descendants for current post in video sitemap.
* @param array $item_array Data to build parent and children nodes for current post.
* @param int $post_id Current video post ID.
$item_array = apply_filters(
'jetpack_sitemap_video_sitemap_item',
'last_modified' => $post->post_modified_gmt,
* Construct the news sitemap url entry for a WP_Post.
* @link https://www.sitemaps.org/protocol.html#urldef
* @param object $post The post to be processed. Similar to WP_Post, but without post_content and post_content_filtered.
* @return string An XML fragment representing the post URL.
private function post_to_news_sitemap_item( $post ) {
// Exclude posts with meta 'jetpack_seo_noindex' set true from the Jetpack news sitemap.
add_filter( 'jetpack_sitemap_news_skip_post', array( 'Jetpack_SEO_Posts', 'exclude_noindex_posts_from_jetpack_sitemap' ), 10, 2 );
* Filter condition to allow skipping specific posts in news sitemap.
* @param bool $skip Current boolean. False by default, so no post is skipped.
* @param object $post Current post in the form of a $wpdb result object. Not WP_Post.
* Doesn't have all the properties of a WP_Post.
if ( apply_filters( 'jetpack_sitemap_news_skip_post', false, $post ) ) {
$url = get_permalink( $post );
* Spec requires the URL to be <=2048 bytes.
* In practice this constraint is unlikely to be violated.
if ( 2048 < strlen( $url ) ) {
$url = home_url() . '/?p=' . $post->ID;
* Trim the locale to an ISO 639 language code as required by Google.
* Special cases are zh-cn (Simplified Chinese) and zh-tw (Traditional Chinese).
* @link https://www.loc.gov/standards/iso639-2/php/code_list.php
$language = strtolower( get_locale() );
if ( in_array( $language, array( 'zh_tw', 'zh_cn' ), true ) ) {
$language = str_replace( '_', '-', $language );
$language = preg_replace( '/(_.*)$/i', '', $language );