-
Notifications
You must be signed in to change notification settings - Fork 138
Implement Compression Streams API for URL Metrics JSON Compression #1905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
4153951
5347657
29fff3c
36bc475
a4632d1
14409a6
2f81ec0
90aca14
eeb066d
8b84ed3
c59c3ed
130277a
cbfba12
0ca808f
e1d4b1e
af1d1dc
97c7836
82fe8e9
1984db0
5f0be29
f9d7fe1
2a11bf5
ba383c6
746a410
d41da3c
ad6d264
c0e3291
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| // noinspection JSUnusedGlobalSymbols | ||
|
|
||
| /** | ||
| * @typedef {import("web-vitals").LCPMetric} LCPMetric | ||
| * @typedef {import("web-vitals").LCPMetricWithAttribution} LCPMetricWithAttribution | ||
|
|
@@ -197,7 +199,7 @@ function getCurrentTime() { | |
| /** | ||
| * Recursively freezes an object to prevent mutation. | ||
| * | ||
| * @param {Object} obj Object to recursively freeze. | ||
| * @param {Object} obj - Object to recursively freeze. | ||
| */ | ||
| function recursiveFreeze( obj ) { | ||
| for ( const prop of Object.getOwnPropertyNames( obj ) ) { | ||
|
|
@@ -276,7 +278,7 @@ const reservedElementPropertyKeys = new Set( [ | |
| /** | ||
| * Gets element data. | ||
| * | ||
| * @param {string} xpath XPath. | ||
| * @param {string} xpath - XPath. | ||
| * @return {ElementData|null} Element data, or null if no element for the XPath exists. | ||
| */ | ||
| function getElementData( xpath ) { | ||
|
|
@@ -292,8 +294,8 @@ function getElementData( xpath ) { | |
| /** | ||
| * Extends element data. | ||
| * | ||
| * @param {string} xpath XPath. | ||
| * @param {ExtendedElementData} properties Properties. | ||
| * @param {string} xpath - XPath. | ||
| * @param {ExtendedElementData} properties - Properties. | ||
| */ | ||
| function extendElementData( xpath, properties ) { | ||
| if ( ! elementsByXPath.has( xpath ) ) { | ||
|
|
@@ -310,6 +312,23 @@ function extendElementData( xpath, properties ) { | |
| Object.assign( elementData, properties ); | ||
| } | ||
|
|
||
| /** | ||
| * Compresses a string using CompressionStream API. | ||
| * | ||
| * @param {string} string - String to compress. | ||
| * @return {Promise<Blob>} Compressed data. | ||
| */ | ||
| async function compress( string ) { | ||
| const encodedData = new TextEncoder().encode( string ); | ||
| const compressedDataStream = new Blob( [ encodedData ] ) | ||
| .stream() | ||
| .pipeThrough( new CompressionStream( 'gzip' ) ); | ||
| const compressedDataBuffer = await new Response( | ||
| compressedDataStream | ||
| ).arrayBuffer(); | ||
| return new Blob( [ compressedDataBuffer ], { type: 'application/gzip' } ); | ||
| } | ||
|
|
||
| /** | ||
| * @typedef {{timestamp: number, creationDate: Date}} UrlMetricDebugData | ||
| * @typedef {{groups: Array<{url_metrics: Array<UrlMetricDebugData>}>}} CollectionDebugData | ||
|
|
@@ -318,23 +337,23 @@ function extendElementData( xpath, properties ) { | |
| /** | ||
| * Detects the LCP element, loaded images, client viewport and store for future optimizations. | ||
| * | ||
| * @param {Object} args Args. | ||
| * @param {string[]} args.extensionModuleUrls URLs for extension script modules to import. | ||
| * @param {number} args.minViewportAspectRatio Minimum aspect ratio allowed for the viewport. | ||
| * @param {number} args.maxViewportAspectRatio Maximum aspect ratio allowed for the viewport. | ||
| * @param {boolean} args.isDebug Whether to show debug messages. | ||
| * @param {string} args.restApiEndpoint URL for where to send the detection data. | ||
| * @param {string} [args.restApiNonce] Nonce for the REST API when the user is logged-in. | ||
| * @param {string} args.currentETag Current ETag. | ||
| * @param {string} args.currentUrl Current URL. | ||
| * @param {string} args.urlMetricSlug Slug for URL Metric. | ||
| * @param {number|null} args.cachePurgePostId Cache purge post ID. | ||
| * @param {string} args.urlMetricHMAC HMAC for URL Metric storage. | ||
| * @param {URLMetricGroupStatus[]} args.urlMetricGroupStatuses URL Metric group statuses. | ||
| * @param {number} args.storageLockTTL The TTL (in seconds) for the URL Metric storage lock. | ||
| * @param {number} args.freshnessTTL The freshness age (TTL) for a given URL Metric. | ||
| * @param {string} args.webVitalsLibrarySrc The URL for the web-vitals library. | ||
| * @param {CollectionDebugData} [args.urlMetricGroupCollection] URL Metric group collection, when in debug mode. | ||
| * @param {Object} args - Args. | ||
| * @param {string[]} args.extensionModuleUrls - URLs for extension script modules to import. | ||
| * @param {number} args.minViewportAspectRatio - Minimum aspect ratio allowed for the viewport. | ||
| * @param {number} args.maxViewportAspectRatio - Maximum aspect ratio allowed for the viewport. | ||
| * @param {boolean} args.isDebug - Whether to show debug messages. | ||
| * @param {string} args.restApiEndpoint - URL for where to send the detection data. | ||
| * @param {string} [args.restApiNonce] - Nonce for the REST API when the user is logged-in. | ||
westonruter marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * @param {string} args.currentETag - Current ETag. | ||
| * @param {string} args.currentUrl - Current URL. | ||
| * @param {string} args.urlMetricSlug - Slug for URL Metric. | ||
| * @param {number|null} args.cachePurgePostId - Cache purge post ID. | ||
| * @param {string} args.urlMetricHMAC - HMAC for URL Metric storage. | ||
| * @param {URLMetricGroupStatus[]} args.urlMetricGroupStatuses - URL Metric group statuses. | ||
| * @param {number} args.storageLockTTL - The TTL (in seconds) for the URL Metric storage lock. | ||
| * @param {number} args.freshnessTTL - The freshness age (TTL) for a given URL Metric. | ||
| * @param {string} args.webVitalsLibrarySrc - The URL for the web-vitals library. | ||
| * @param {CollectionDebugData} [args.urlMetricGroupCollection] - URL Metric group collection, when in debug mode. | ||
| */ | ||
| export default async function detect( { | ||
| minViewportAspectRatio, | ||
westonruter marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
@@ -648,9 +667,7 @@ export default async function detect( { | |
| for ( const elementIntersection of elementIntersections ) { | ||
| const xpath = breadcrumbedElementsMap.get( elementIntersection.target ); | ||
| if ( ! xpath ) { | ||
| if ( isDebug ) { | ||
| error( 'Unable to look up XPath for element' ); | ||
| } | ||
| warn( 'Unable to look up XPath for element' ); | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -773,26 +790,24 @@ export default async function detect( { | |
| const maxBodyLengthKiB = 64; | ||
| const maxBodyLengthBytes = maxBodyLengthKiB * 1024; | ||
|
|
||
| // TODO: Consider adding replacer to reduce precision on numbers in DOMRect to reduce payload size. | ||
| const jsonBody = JSON.stringify( urlMetric ); | ||
| const compressedJsonBody = await compress( jsonBody ); | ||
|
||
| const percentOfBudget = | ||
| ( jsonBody.length / ( maxBodyLengthKiB * 1000 ) ) * 100; | ||
| ( compressedJsonBody.size / ( maxBodyLengthKiB * 1000 ) ) * 100; | ||
|
|
||
| /* | ||
| * According to the fetch() spec: | ||
| * "If the sum of contentLength and inflightKeepaliveBytes is greater than 64 kibibytes, then return a network error." | ||
| * This is what browsers also implement for navigator.sendBeacon(). Therefore, if the size of the JSON is greater | ||
| * than the maximum, we should avoid even trying to send it. | ||
| */ | ||
| if ( jsonBody.length > maxBodyLengthBytes ) { | ||
| if ( isDebug ) { | ||
| error( | ||
| `Unable to send URL Metric because it is ${ jsonBody.length.toLocaleString() } bytes, ${ Math.round( | ||
| percentOfBudget | ||
| ) }% of ${ maxBodyLengthKiB } KiB limit:`, | ||
| urlMetric | ||
| ); | ||
| } | ||
| if ( compressedJsonBody.size > maxBodyLengthBytes ) { | ||
| error( | ||
| `Unable to send URL Metric because it is ${ compressedJsonBody.size.toLocaleString() } bytes, ${ Math.round( | ||
| percentOfBudget | ||
| ) }% of ${ maxBodyLengthKiB } KiB limit:`, | ||
| urlMetric | ||
| ); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -806,7 +821,7 @@ export default async function detect( { | |
| String( getCurrentTime() ) | ||
| ); | ||
|
|
||
| const message = `Sending URL Metric (${ jsonBody.length.toLocaleString() } bytes, ${ Math.round( | ||
| const message = `Sending URL Metric (${ compressedJsonBody.size.toLocaleString() } bytes, ${ Math.round( | ||
| percentOfBudget | ||
| ) }% of ${ maxBodyLengthKiB } KiB limit):`; | ||
|
|
||
|
|
@@ -830,12 +845,7 @@ export default async function detect( { | |
| ); | ||
| } | ||
| url.searchParams.set( 'hmac', urlMetricHMAC ); | ||
| navigator.sendBeacon( | ||
| url, | ||
| new Blob( [ jsonBody ], { | ||
| type: 'application/json', | ||
| } ) | ||
| ); | ||
| navigator.sendBeacon( url, compressedJsonBody ); | ||
|
|
||
| // Clean up. | ||
| breadcrumbedElementsMap.clear(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -218,27 +218,6 @@ function od_handle_rest_request( WP_REST_Request $request ) { | |||||||
| ); | ||||||||
| } | ||||||||
|
|
||||||||
| /* | ||||||||
| * The limit for data sent via navigator.sendBeacon() is 64 KiB. This limit is checked in detect.js so that the | ||||||||
| * request will not even be attempted if the payload is too large. This server-side restriction is added as a | ||||||||
| * safeguard against clients sending possibly malicious payloads much larger than 64 KiB which should never be | ||||||||
| * getting sent. | ||||||||
| */ | ||||||||
| $max_size = 64 * 1024; | ||||||||
| $content_length = strlen( (string) wp_json_encode( $url_metric ) ); | ||||||||
| if ( $content_length > $max_size ) { | ||||||||
| return new WP_Error( | ||||||||
| 'rest_content_too_large', | ||||||||
| sprintf( | ||||||||
| /* translators: 1: the size of the payload, 2: the maximum allowed payload size */ | ||||||||
| __( 'JSON payload size is %1$s bytes which is larger than the maximum allowed size of %2$s bytes.', 'optimization-detective' ), | ||||||||
| number_format_i18n( $content_length ), | ||||||||
| number_format_i18n( $max_size ) | ||||||||
| ), | ||||||||
| array( 'status' => 413 ) | ||||||||
| ); | ||||||||
| } | ||||||||
|
|
||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed this check from Additionally, if a malicious user sends data with
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh yes, good points!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add a limit of 1MB for URL metrics? I am not sure about the what should be the limit.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder as well, when
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I think we were thinking along the same lines here and commented at the same time.) Yes, let's maybe introduce a new function that returns the maximum byte size for a URL Metric serialized to JSON. The function's return value can be filtered, but defaulting to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a thought—should we compress the URL metrics before storing them in the database? Would using
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need to compress in the DB, no. That would be an over-optimization and could negatively impact performance at runtime with having to decompress. Storage is cheap. What is expensive is network bandwidth, especially with the limitations of the beacon size. So that's what we want to optimize for.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried submitting a URL Metric with various numbers of copies of the first element completely populating the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried doing something silly and visit every single tag: add_action( 'od_register_tag_visitors', function ( OD_Tag_Visitor_Registry $registry ) {
$registry->register( 'all', '__return_true' );
} );On a post with 1,000 Image blocks, this resulted in 2,198 elements in the URL Metric. The uncompressed size of the URL Metric is 1,068,353 bytes (about 1 MB), with the compressed size being 33,047 bytes (52% of 64 KiB limit). So indeed, I think 1 MB is probably far larger than any URL Metric will naturally be, so it's a safe upper bound for what should be allowed. |
||||||||
| try { | ||||||||
| $url_metric_group->add_url_metric( $url_metric ); | ||||||||
| } catch ( InvalidArgumentException $e ) { | ||||||||
|
|
@@ -346,3 +325,65 @@ function od_trigger_page_cache_invalidation( int $cache_purge_post_id ): void { | |||||||
| /** This action is documented in wp-includes/post.php. */ | ||||||||
| do_action( 'save_post', $post->ID, $post, /* $update */ true ); | ||||||||
| } | ||||||||
|
|
||||||||
| /** | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just had a thought that perhaps an |
||||||||
| * Decompresses the REST API request body for the URL Metrics endpoint. | ||||||||
| * | ||||||||
| * @since n.e.x.t | ||||||||
| * @access private | ||||||||
| * | ||||||||
| * @phpstan-param WP_REST_Request<array<string, mixed>> $request | ||||||||
| * | ||||||||
| * @param mixed $result Response to replace the requested version with. Can be anything a normal endpoint can return, or null to not hijack the request. | ||||||||
| * @param WP_REST_Server $server Server instance. | ||||||||
| * @param WP_REST_Request $request Request used to generate the response. | ||||||||
| * @return mixed Response to replace the requested version with. | ||||||||
| */ | ||||||||
| function od_decompress_rest_request_body( $result, WP_REST_Server $server, WP_REST_Request $request ) { | ||||||||
| if ( | ||||||||
| $request->get_route() === '/' . OD_REST_API_NAMESPACE . OD_URL_METRICS_ROUTE && | ||||||||
| 'application/gzip' === $request->get_header( 'Content-Type' ) | ||||||||
|
||||||||
| 'application/gzip' === $request->get_header( 'Content-Type' ) | |
| 'application/gzip' === $request->get_header( 'Content-Type' ) && | |
| function_exists( 'gzdecode' ) |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I looked at this in PhpStorm and I noticed an error:

So then I added it to require in c59c3ed, but then I wondered what core does with this function. I found this:
Note that core checks if the function exists. I think what we'll have to do is do the same, and that od_get_detection_script() we'll need to add a new param which is exported to the client like gzdecodeAvailable which contains the value of function_exists( 'gzdecode' ). This can then be used to conditionally compress the URL Metric data before sending it to the REST API.
We should use the function in the same way it is being used in core, perhaps even with the error suppression.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added logic for handling gzdecode() availability in af1d1dc
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this because in PhpStorm because otherwise there is a warning:
The reason the IDE doesn't know the export is used is because it is used in PHP and via a dynamic export. See also https://stackoverflow.com/questions/54687009/export-default-unused-in-webstorm
This
noinspectionannotation is used in Gutenberg once as well: https://github.com/WordPress/gutenberg/blob/8b88ada2fd675509bf0c39a55c23a75cc67987cb/packages/components/src/mobile/link-settings/test/edit.native.js#L1