diff --git a/CHANGELOG.md b/CHANGELOG.md index 3085a431c..fdb5d4d3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [7.3.1] - 2026-01-06 + +### Added +- OpenTelemetry context-based base attributes to DiagnosticsMetrics + +### Fixed +- Merge request context attributes with custom metrics - custom metrics now automatically include request-scoped base attributes (account, route_id, status_code, etc.) using OpenTelemetry Context API + +## [7.3.0] - 2025-12-16 +### Changed +- Update temporality to delta in metrics client +- New deploy pipeline with npm publish flow + +### Fixed +- Fix account attribute to diagnostics metrics + ## [7.2.9-beta.1] - 2025-12-12 ### Changed - Increase maximum attributes limit for diagnostics metrics diff --git a/package.json b/package.json index 21a6e597f..3cd55f6a6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vtex/api", - "version": "7.3.0", + "version": "7.3.1", "description": "VTEX I/O API client", "main": "lib/index.js", "typings": "lib/index.d.ts", diff --git a/src/metrics/DiagnosticsMetrics.test.ts b/src/metrics/DiagnosticsMetrics.test.ts index 62b7bfc1e..c06ab9ede 100644 --- a/src/metrics/DiagnosticsMetrics.test.ts +++ b/src/metrics/DiagnosticsMetrics.test.ts @@ -1,4 +1,6 @@ import { Types } from '@vtex/diagnostics-nodejs' +import { context } from '@opentelemetry/api' +import { AsyncHooksContextManager } from '@opentelemetry/context-async-hooks' import { DiagnosticsMetrics } from './DiagnosticsMetrics' // Mock only the external I/O boundary (getMetricClient) @@ -14,6 +16,11 @@ jest.mock('../constants', () => ({ import { getMetricClient } from '../service/metrics/client' +// Set up OpenTelemetry context manager for async context propagation +const contextManager = new AsyncHooksContextManager() +contextManager.enable() +context.setGlobalContextManager(contextManager) + describe('DiagnosticsMetrics', () => { let diagnosticsMetrics: DiagnosticsMetrics let mockMetricsClient: Types.MetricClient @@ -317,7 +324,7 @@ describe('DiagnosticsMetrics', () => { }) }) - it('should allow up to 7 attributes without warning', async () => { + it('should allow up to 7 custom attributes without warning', async () => { const warnSpy = jest.spyOn(console, 'warn').mockImplementation() const attributes = { @@ -338,7 +345,7 @@ describe('DiagnosticsMetrics', () => { warnSpy.mockRestore() }) - it('should limit attributes to 7 and warn when exceeded (recordLatency)', async () => { + it('should limit custom attributes to 7 and warn when exceeded (recordLatency)', async () => { const warnSpy = jest.spyOn(console, 'warn').mockImplementation() const attributes = { @@ -354,7 +361,7 @@ describe('DiagnosticsMetrics', () => { diagnosticsMetrics.recordLatency([0, 1000000], attributes) - // Should only include first 7 attributes + // Should only include first 7 custom attributes const recorded = recordedHistogramCalls[0].attributes expect(Object.keys(recorded)).toHaveLength(7) expect(recorded).toEqual({ @@ -368,13 +375,13 @@ describe('DiagnosticsMetrics', () => { }) expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining('Attribute limit exceeded: 8 attributes provided, using only the first 7') + expect.stringContaining('Custom attribute limit exceeded: 8 custom attributes provided, using only the first 7') ) warnSpy.mockRestore() }) - it('should limit attributes to 7 and warn when exceeded (incrementCounter)', async () => { + it('should limit custom attributes to 7 and warn when exceeded (incrementCounter)', async () => { const warnSpy = jest.spyOn(console, 'warn').mockImplementation() const attributes = { @@ -390,7 +397,7 @@ describe('DiagnosticsMetrics', () => { diagnosticsMetrics.incrementCounter('test_counter', 1, attributes) - // Should only include first 7 attributes + // Should only include first 7 custom attributes const recorded = recordedCounterCalls.get('test_counter')![0].attributes expect(Object.keys(recorded)).toHaveLength(7) expect(recorded).toEqual({ @@ -404,13 +411,13 @@ describe('DiagnosticsMetrics', () => { }) expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining('Attribute limit exceeded: 8 attributes provided, using only the first 7') + expect.stringContaining('Custom attribute limit exceeded: 8 custom attributes provided, using only the first 7') ) warnSpy.mockRestore() }) - it('should limit attributes to 7 and warn when exceeded (setGauge)', async () => { + it('should limit custom attributes to 7 and warn when exceeded (setGauge)', async () => { const warnSpy = jest.spyOn(console, 'warn').mockImplementation() const attributes = { @@ -426,7 +433,7 @@ describe('DiagnosticsMetrics', () => { diagnosticsMetrics.setGauge('test_gauge', 100, attributes) - // Should only include first 7 attributes + // Should only include first 7 custom attributes const recorded = recordedGaugeCalls.get('test_gauge')![0].attributes expect(Object.keys(recorded)).toHaveLength(7) expect(recorded).toEqual({ @@ -440,10 +447,308 @@ describe('DiagnosticsMetrics', () => { }) expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining('Attribute limit exceeded: 8 attributes provided, using only the first 7') + expect.stringContaining('Custom attribute limit exceeded: 8 custom attributes provided, using only the first 7') ) warnSpy.mockRestore() }) }) + + describe('Base Attributes Merging (runWithBaseAttributes)', () => { + beforeEach(async () => { + // Wait for initialization + await new Promise(resolve => setTimeout(resolve, 10)) + }) + + describe('recordLatency', () => { + it('should merge base attributes with custom attributes', () => { + const baseAttributes = { account: 'testaccount', route_id: 'test-route' } + const customAttributes = { operation: 'custom-op', status: 'success' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual({ + account: 'testaccount', + route_id: 'test-route', + operation: 'custom-op', + status: 'success', + }) + }) + + it('should give base attributes precedence over custom attributes on conflicts', () => { + const baseAttributes = { status: 'base-status', account: 'base-account' } + const customAttributes = { status: 'custom-status', operation: 'test-op' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual({ + account: 'base-account', + status: 'base-status', // Base takes precedence, custom 'status' is dropped + operation: 'test-op', // Non-conflicting custom attribute is kept + }) + }) + + it('should silently drop conflicting custom attributes without warnings', () => { + const warnSpy = jest.spyOn(console, 'warn').mockImplementation() + + const baseAttributes = { + account: 'base-account', + route_id: 'base-route', + component: 'base-component', + } + const customAttributes = { + account: 'custom-account', // Conflicts - should be dropped + route_id: 'custom-route', // Conflicts - should be dropped + operation: 'custom-op', // No conflict - should be kept + status: 'success', // No conflict - should be kept + } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + + // Verify no warnings were logged for conflicting attributes + expect(warnSpy).not.toHaveBeenCalled() + + // Verify base attributes are preserved, conflicting custom attributes dropped + expect(recordedHistogramCalls[0].attributes).toEqual({ + account: 'base-account', // Base preserved + route_id: 'base-route', // Base preserved + component: 'base-component', // Base preserved + operation: 'custom-op', // Non-conflicting custom kept + status: 'success', // Non-conflicting custom kept + }) + + warnSpy.mockRestore() + }) + + it('should use only base attributes when no custom attributes provided', () => { + const baseAttributes = { account: 'testaccount', route_id: 'test-route' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.recordLatency(100) + }) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual(baseAttributes) + }) + + it('should use only custom attributes when outside base attributes context', () => { + const customAttributes = { operation: 'custom-op' } + + diagnosticsMetrics.recordLatency(100, customAttributes) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual(customAttributes) + }) + + it('should work with nested runWithBaseAttributes calls (inner takes precedence)', () => { + const outerBase = { account: 'outer-account', level: 'outer' } + const innerBase = { account: 'inner-account', level: 'inner' } + const customAttributes = { operation: 'test' } + + diagnosticsMetrics.runWithBaseAttributes(outerBase, () => { + diagnosticsMetrics.runWithBaseAttributes(innerBase, () => { + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + }) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual({ + account: 'inner-account', + level: 'inner', + operation: 'test', + }) + }) + }) + + describe('incrementCounter', () => { + it('should merge base attributes with custom attributes', () => { + const baseAttributes = { account: 'testaccount' } + const customAttributes = { method: 'GET' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.incrementCounter('http_requests_total', 1, customAttributes) + }) + + const calls = recordedCounterCalls.get('http_requests_total') + expect(calls).toHaveLength(1) + expect(calls![0].attributes).toEqual({ + account: 'testaccount', + method: 'GET', + }) + }) + + it('should give base attributes precedence over custom attributes on conflicts', () => { + const baseAttributes = { status: 'base', account: 'base-account' } + const customAttributes = { status: 'custom', method: 'GET' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.incrementCounter('test_counter', 1, customAttributes) + }) + + const calls = recordedCounterCalls.get('test_counter') + expect(calls![0].attributes).toEqual({ + status: 'base', // Base takes precedence + account: 'base-account', + method: 'GET', // Non-conflicting custom attribute is kept + }) + }) + }) + + describe('setGauge', () => { + it('should merge base attributes with custom attributes', () => { + const baseAttributes = { environment: 'production' } + const customAttributes = { cache: 'pages' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.setGauge('cache_items_current', 1024, customAttributes) + }) + + const calls = recordedGaugeCalls.get('cache_items_current') + expect(calls).toHaveLength(1) + expect(calls![0].attributes).toEqual({ + environment: 'production', + cache: 'pages', + }) + }) + + it('should give base attributes precedence over custom attributes on conflicts', () => { + const baseAttributes = { type: 'base', environment: 'prod' } + const customAttributes = { type: 'custom', cache: 'pages' } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.setGauge('test_gauge', 100, customAttributes) + }) + + const calls = recordedGaugeCalls.get('test_gauge') + expect(calls![0].attributes).toEqual({ + type: 'base', // Base takes precedence + environment: 'prod', + cache: 'pages', // Non-conflicting custom attribute is kept + }) + }) + }) + + describe('async operations', () => { + it('should maintain base attributes context through async operations', async () => { + const baseAttributes = { account: 'async-account' } + const customAttributes = { operation: 'async-op' } + + await diagnosticsMetrics.runWithBaseAttributes(baseAttributes, async () => { + // Simulate async operation + await new Promise(resolve => setTimeout(resolve, 5)) + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + + expect(recordedHistogramCalls).toHaveLength(1) + expect(recordedHistogramCalls[0].attributes).toEqual({ + account: 'async-account', + operation: 'async-op', + }) + }) + + it('should isolate context between concurrent async operations', async () => { + const baseAttrs1 = { account: 'account1' } + const baseAttrs2 = { account: 'account2' } + + await Promise.all([ + diagnosticsMetrics.runWithBaseAttributes(baseAttrs1, async () => { + await new Promise(resolve => setTimeout(resolve, 10)) + diagnosticsMetrics.recordLatency(100, { op: 'op1' }) + }), + diagnosticsMetrics.runWithBaseAttributes(baseAttrs2, async () => { + await new Promise(resolve => setTimeout(resolve, 5)) + diagnosticsMetrics.recordLatency(200, { op: 'op2' }) + }), + ]) + + expect(recordedHistogramCalls).toHaveLength(2) + + // Order might vary due to timing, so check both are present + const attrs = recordedHistogramCalls.map(c => c.attributes) + expect(attrs).toContainEqual({ account: 'account1', op: 'op1' }) + expect(attrs).toContainEqual({ account: 'account2', op: 'op2' }) + }) + }) + + describe('attribute limiting with base attributes', () => { + beforeEach(() => { + // Enable LINKED for these tests so warnings are triggered + const constants = require('../constants') + Object.defineProperty(constants, 'LINKED', { + value: true, + writable: true, + configurable: true, + }) + }) + + afterEach(() => { + // Reset LINKED back to false + const constants = require('../constants') + Object.defineProperty(constants, 'LINKED', { + value: false, + writable: true, + configurable: true, + }) + }) + + it('should limit only custom attributes to 7, not base attributes', () => { + const warnSpy = jest.spyOn(console, 'warn').mockImplementation() + + const baseAttributes = { + base1: 'value1', + base2: 'value2', + base3: 'value3', + base4: 'value4', + } + const customAttributes = { + custom1: 'value1', + custom2: 'value2', + custom3: 'value3', + custom4: 'value4', + custom5: 'value5', + custom6: 'value6', + custom7: 'value7', + custom8: 'value8', // This should be dropped + } + + diagnosticsMetrics.runWithBaseAttributes(baseAttributes, () => { + diagnosticsMetrics.recordLatency(100, customAttributes) + }) + + // 4 base attributes + 7 custom attributes (8th custom dropped) = 11 total + const recorded = recordedHistogramCalls[0].attributes + expect(Object.keys(recorded)).toHaveLength(11) + + // Verify all base attributes are present + expect(recorded.base1).toBe('value1') + expect(recorded.base2).toBe('value2') + expect(recorded.base3).toBe('value3') + expect(recorded.base4).toBe('value4') + + // Verify only first 7 custom attributes are present + expect(recorded.custom1).toBe('value1') + expect(recorded.custom2).toBe('value2') + expect(recorded.custom3).toBe('value3') + expect(recorded.custom4).toBe('value4') + expect(recorded.custom5).toBe('value5') + expect(recorded.custom6).toBe('value6') + expect(recorded.custom7).toBe('value7') + expect(recorded.custom8).toBeUndefined() // 8th custom attribute should be dropped + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('Custom attribute limit exceeded: 8 custom attributes provided, using only the first 7') + ) + + warnSpy.mockRestore() + }) + }) + }) }) diff --git a/src/metrics/DiagnosticsMetrics.ts b/src/metrics/DiagnosticsMetrics.ts index cd1f7b699..33c533313 100644 --- a/src/metrics/DiagnosticsMetrics.ts +++ b/src/metrics/DiagnosticsMetrics.ts @@ -1,13 +1,22 @@ -import { Attributes } from '@opentelemetry/api' +import { Attributes, context, createContextKey } from '@opentelemetry/api' import { Types } from '@vtex/diagnostics-nodejs' import { getMetricClient } from '../service/metrics/client' import { METRIC_CLIENT_INIT_TIMEOUT_MS, LINKED } from '../constants' /** - * Maximum number of attributes allowed per metric to control cardinality. - * This limit applies to external usage (VTEX IO Apps) to prevent unbounded metric dimensions. + * Maximum number of custom attributes allowed per metric call to control cardinality. + * This limit applies only to custom attributes provided by callers (VTEX IO Apps). + * Base attributes (set via runWithBaseAttributes) are not counted toward this limit. + * + * Total attributes sent = base attributes + custom attributes (up to MAX_CUSTOM_ATTRIBUTES) + */ +const MAX_CUSTOM_ATTRIBUTES = 7 + +/** + * Context key for storing base attributes in OpenTelemetry context. + * These attributes are automatically merged with custom attributes in all metric methods. */ -const MAX_ATTRIBUTES = 7 +const BASE_ATTRIBUTES_KEY = createContextKey('vtex.metrics.baseAttributes') /** * Converts an hrtime tuple [seconds, nanoseconds] to milliseconds. @@ -17,30 +26,32 @@ function hrtimeToMillis(hrtime: [number, number]): number { } /** - * Limits the number of attributes to prevent high cardinality. - * Takes the first MAX_ATTRIBUTES entries if the limit is exceeded. + * Limits the number of custom attributes to prevent high cardinality. + * Takes the first MAX_CUSTOM_ATTRIBUTES entries if the limit is exceeded. + * + * Note: This limit applies only to custom attributes. Base attributes are not limited. * - * @param attributes Optional attributes object - * @returns Limited attributes object or undefined + * @param customAttributes Optional custom attributes object + * @returns Limited custom attributes object or undefined */ -function limitAttributes(attributes?: Attributes): Attributes | undefined { - if (!attributes) { +function limitCustomAttributes(customAttributes?: Attributes): Attributes | undefined { + if (!customAttributes) { return undefined } - const entries = Object.entries(attributes) - if (entries.length <= MAX_ATTRIBUTES) { - return attributes + const entries = Object.entries(customAttributes) + if (entries.length <= MAX_CUSTOM_ATTRIBUTES) { + return customAttributes } if (LINKED) { console.warn( - `Attribute limit exceeded: ${entries.length} attributes provided, using only the first ${MAX_ATTRIBUTES}. ` + - `Consider reducing the number of attributes to avoid high cardinality.` + `Custom attribute limit exceeded: ${entries.length} custom attributes provided, using only the first ${MAX_CUSTOM_ATTRIBUTES}. ` + + `Consider reducing the number of custom attributes to avoid high cardinality. ` ) } - return Object.fromEntries(entries.slice(0, MAX_ATTRIBUTES)) + return Object.fromEntries(entries.slice(0, MAX_CUSTOM_ATTRIBUTES)) } /** @@ -51,6 +62,21 @@ function limitAttributes(attributes?: Attributes): Attributes | undefined { * Uses a single histogram for all latency measurements with attributes to differentiate. * This follows OpenTelemetry best practices and reduces metric cardinality. * + * ## Base Attributes (Request Context) + * + * DiagnosticsMetrics supports automatic merging of request-scoped "base attributes" + * with custom attributes provided in each metric call. This is useful for automatically + * including request context (account, status_code, route_id, etc.) in all metrics + * recorded during a request lifecycle. + * + * Use `runWithBaseAttributes()` to set base attributes for a scope. All metric calls + * within that scope will automatically include these base attributes, merged with + * any custom attributes provided. + * + * **Important:** Base attributes take precedence over custom attributes. If a custom + * attribute has the same key as a base attribute, the custom attribute is silently + * dropped and the base attribute value is used. + * * @example * ```typescript * const diagnosticsMetrics = new DiagnosticsMetrics() @@ -69,6 +95,16 @@ function limitAttributes(attributes?: Attributes): Attributes | undefined { * * // Set a gauge value * diagnosticsMetrics.setGauge('cache_items_current', 1024, { cache: 'pages' }) + * + * // Using base attributes for request context + * await diagnosticsMetrics.runWithBaseAttributes( + * { 'vtex.account.name': 'mystore', status_code: 200 }, + * async () => { + * // All metrics recorded here will include the base attributes + * diagnosticsMetrics.recordLatency(100, { operation: 'custom-op' }) + * // Result: { 'vtex.account.name': 'mystore', status_code: 200, operation: 'custom-op' } + * } + * ) * ``` */ export class DiagnosticsMetrics { @@ -134,13 +170,109 @@ export class DiagnosticsMetrics { }) } + /** + * Execute a function with base attributes set in the OpenTelemetry context. + * All metric calls within the function will automatically include these base attributes, + * merged with any custom attributes provided in each call. + * + * Base attributes take precedence over custom attributes when there are key conflicts. + * Conflicting custom attributes are silently dropped. + * + * @param baseAttributes Base attributes to include in all metrics within the scope + * @param fn Function to execute with the base attributes context + * @returns The return value of the function + * + * @example + * ```typescript + * // In a request middleware + * await diagnosticsMetrics.runWithBaseAttributes( + * { + * 'vtex.account.name': ctx.vtex.account, + * status_code: ctx.status, + * route_id: ctx.vtex.route.id, + * }, + * async () => { + * await next() + * } + * ) + * + * // In app code (custom attributes are merged with base) + * diagnosticsMetrics.recordLatency(elapsed, { operation: 'my-operation', status: 'success' }) + * // Result includes both base attributes AND custom attributes + * ``` + */ + public runWithBaseAttributes(baseAttributes: Attributes, fn: () => T): T { + const currentContext = context.active() + const newContext = currentContext.setValue(BASE_ATTRIBUTES_KEY, baseAttributes) + return context.with(newContext, fn) + } + + /** + * Get the base attributes from the current OpenTelemetry context. + * Returns undefined if no base attributes are set. + */ + private getBaseAttributes(): Attributes | undefined { + return context.active().getValue(BASE_ATTRIBUTES_KEY) as Attributes | undefined + } + + /** + * Merge base attributes from context with provided custom attributes. + * Base attributes take precedence over custom attributes when there are key conflicts. + * + * Custom attributes are limited to MAX_CUSTOM_ATTRIBUTES before merging. + * Custom attributes with keys that conflict with base attributes are silently dropped. + * Base attributes are not limited. + * + * @param customAttributes Custom attributes provided by the caller + * @returns Merged attributes (base + non-conflicting limited custom) or undefined if both are empty + */ + private mergeAttributes(customAttributes?: Attributes): Attributes | undefined { + const baseAttributes = this.getBaseAttributes() + + // Limit custom attributes before merging + const limitedCustomAttributes = limitCustomAttributes(customAttributes) + + if (!baseAttributes && !limitedCustomAttributes) { + return undefined + } + + if (!baseAttributes) { + return limitedCustomAttributes + } + + if (!limitedCustomAttributes) { + return baseAttributes + } + + // Filter out custom attributes that conflict with base attributes (base takes precedence) + const baseKeys = new Set(Object.keys(baseAttributes)) + const nonConflictingCustomAttributes: Attributes = {} + + for (const [key, value] of Object.entries(limitedCustomAttributes)) { + if (!baseKeys.has(key)) { + nonConflictingCustomAttributes[key] = value + } + // Silently drop conflicting custom attributes - base attributes take precedence + } + + // Merge: base attributes + non-conflicting custom attributes + return { ...baseAttributes, ...nonConflictingCustomAttributes } + } + /** * Record a latency measurement using the single shared histogram. * Accepts either an hrtime tuple from process.hrtime() or milliseconds as a number. * Use attributes to differentiate between different operations. + * + * Base attributes from the current context (set via `runWithBaseAttributes`) are + * automatically merged with the provided custom attributes. Base attributes take + * precedence - if a custom attribute key conflicts with a base attribute key, + * the custom attribute is silently dropped. + * + * Custom attributes are limited to MAX_CUSTOM_ATTRIBUTES (5). Base attributes are not limited. * * @param value Either [seconds, nanoseconds] from process.hrtime() or milliseconds - * @param attributes Attributes including 'operation' to identify the operation type (max of MAX_ATTRIBUTES attributes) + * @param attributes Custom attributes including 'operation' to identify the operation type (max 5 custom attributes) * * @example * ```typescript @@ -161,20 +293,27 @@ export class DiagnosticsMetrics { // Convert hrtime to milliseconds if needed const milliseconds = Array.isArray(value) ? hrtimeToMillis(value) : value - // Limit attributes to prevent high cardinality - const limitedAttributes = limitAttributes(attributes) + // Merge base attributes from context with custom attributes (custom attrs are limited internally) + const mergedAttributes = this.mergeAttributes(attributes) - // Record to the single shared histogram with attributes - this.latencyHistogram.record(milliseconds, limitedAttributes) + // Record to the single shared histogram with merged attributes + this.latencyHistogram.record(milliseconds, mergedAttributes) } /** * Increment a counter by a specific value. * Multiple counters are stored by name since counters represent different types of events. + * + * Base attributes from the current context (set via `runWithBaseAttributes`) are + * automatically merged with the provided custom attributes. Base attributes take + * precedence - if a custom attribute key conflicts with a base attribute key, + * the custom attribute is silently dropped. + * + * Custom attributes are limited to MAX_CUSTOM_ATTRIBUTES (5). Base attributes are not limited. * * @param name Counter name (e.g., 'http_requests_total', 'cache_hits_total') * @param value Amount to increment by (typically 1) - * @param attributes Optional attributes for the counter (max of MAX_ATTRIBUTES attributes, e.g., { method: 'GET', status: '2xx' }) + * @param attributes Optional custom attributes for the counter (max 5 custom attributes, e.g., { method: 'GET', status: '2xx' }) * * @example * ```typescript @@ -196,20 +335,27 @@ export class DiagnosticsMetrics { this.counters.set(name, counter) } - // Limit attributes to prevent high cardinality - const limitedAttributes = limitAttributes(attributes) + // Merge base attributes from context with custom attributes (custom attrs are limited internally) + const mergedAttributes = this.mergeAttributes(attributes) // Increment the counter - this.counters.get(name)!.add(value, limitedAttributes) + this.counters.get(name)!.add(value, mergedAttributes) } /** * Set a gauge to a specific value (current state). * Multiple gauges are stored by name since gauges represent different types of measurements. + * + * Base attributes from the current context (set via `runWithBaseAttributes`) are + * automatically merged with the provided custom attributes. Base attributes take + * precedence - if a custom attribute key conflicts with a base attribute key, + * the custom attribute is silently dropped. + * + * Custom attributes are limited to MAX_CUSTOM_ATTRIBUTES (5). Base attributes are not limited. * * @param name Gauge name (e.g., 'cache_items_current', 'memory_usage_bytes') * @param value Current value - * @param attributes Optional attributes for the gauge (max of MAX_ATTRIBUTES attributes, e.g., { cache: 'pages' }) + * @param attributes Optional custom attributes for the gauge (max 5 custom attributes, e.g., { cache: 'pages' }) * * @example * ```typescript @@ -231,11 +377,11 @@ export class DiagnosticsMetrics { this.gauges.set(name, gauge) } - // Limit attributes to prevent high cardinality - const limitedAttributes = limitAttributes(attributes) + // Merge base attributes from context with custom attributes (custom attrs are limited internally) + const mergedAttributes = this.mergeAttributes(attributes) // Set the gauge value - this.gauges.get(name)!.set(value, limitedAttributes) + this.gauges.get(name)!.set(value, mergedAttributes) } } diff --git a/src/service/worker/runtime/http/middlewares/timings.test.ts b/src/service/worker/runtime/http/middlewares/timings.test.ts index 1984af4c8..6ebcac054 100644 --- a/src/service/worker/runtime/http/middlewares/timings.test.ts +++ b/src/service/worker/runtime/http/middlewares/timings.test.ts @@ -19,11 +19,12 @@ describe('timings middleware', () => { let consoleLogSpy: jest.SpyInstance beforeEach(() => { - // Mock DiagnosticsMetrics + // Mock DiagnosticsMetrics with runWithBaseAttributes that executes the function mockDiagnosticsMetrics = { recordLatency: jest.fn(), incrementCounter: jest.fn(), setGauge: jest.fn(), + runWithBaseAttributes: jest.fn((baseAttributes, fn) => fn()), } as any // Set up global @@ -71,13 +72,22 @@ describe('timings middleware', () => { it('should record metrics for successful request', async () => { await timings(mockCtx, mockNext) - // Diagnostics metrics - expect(mockDiagnosticsMetrics.recordLatency).toHaveBeenCalledWith( - [1, 500000000], + // Verify runWithBaseAttributes is called with base attributes + expect(mockDiagnosticsMetrics.runWithBaseAttributes).toHaveBeenCalledWith( expect.objectContaining({ + 'vtex.account.name': 'testaccount', component: 'http-handler', route_id: 'test-route', route_type: 'public', + }), + expect.any(Function) + ) + + // Diagnostics metrics - now only receive completion-specific attributes + // Base attributes (account, route_id, etc.) are merged internally by DiagnosticsMetrics + expect(mockDiagnosticsMetrics.recordLatency).toHaveBeenCalledWith( + [1, 500000000], + expect.objectContaining({ status_code: 200, status: 'success', }) @@ -87,9 +97,6 @@ describe('timings middleware', () => { 'http_handler_requests_total', 1, expect.objectContaining({ - component: 'http-handler', - route_id: 'test-route', - route_type: 'public', status_code: 200, status: 'success', }) @@ -138,17 +145,15 @@ describe('timings middleware', () => { const latencyCall = mockDiagnosticsMetrics.recordLatency.mock.calls[0] expect(latencyCall[1]).toMatchObject({ + status_code: 404, status: '4xx', }) - // Counter with status as attribute + // Counter with status as attribute (base attributes merged internally) expect(mockDiagnosticsMetrics.incrementCounter).toHaveBeenCalledWith( 'http_handler_requests_total', 1, expect.objectContaining({ - component: 'http-handler', - route_id: 'test-route', - route_type: 'public', status_code: 404, status: '4xx', }) @@ -163,6 +168,7 @@ describe('timings middleware', () => { expect(mockDiagnosticsMetrics.recordLatency).toHaveBeenCalledWith( expect.any(Array), expect.objectContaining({ + status_code: 500, status: 'error', }) ) diff --git a/src/service/worker/runtime/http/middlewares/timings.ts b/src/service/worker/runtime/http/middlewares/timings.ts index 2d4013dad..cde12a236 100644 --- a/src/service/worker/runtime/http/middlewares/timings.ts +++ b/src/service/worker/runtime/http/middlewares/timings.ts @@ -61,37 +61,60 @@ export async function timings < U extends RecorderState, V extends ParamsContext > (ctx: ServiceContext, next: () => Promise) { - // Errors will be caught by the next middleware so we don't have to catch. - await next() + const { vtex: { route: { id, type } }, vtex } = ctx - const { status: statusCode, vtex: { route: { id, type } }, timings: {total}, vtex } = ctx - const totalMillis = hrToMillis(total) - console.log(log(ctx, totalMillis)) - console.log(logBillingInfo(vtex, totalMillis)) + // Set base attributes for all metrics recorded during this request. + // This includes metrics recorded by VTEX IO apps during handler execution. + // These attributes will be automatically merged with custom attributes. + const baseAttributes: Attributes = { + [AttributeKeys.VTEX_ACCOUNT_NAME]: vtex.account, + component: 'http-handler', + route_id: id, + route_type: type, + } - const status = statusLabel(statusCode) - - // Legacy metrics (backward compatibility) - // Only batch successful responses so metrics don't consider errors - metrics.batch(`http-handler-${id}`, status === 'success' ? total : undefined, { [status]: 1 }) + // Wrap the request handling with base attributes context. + // All metrics recorded during next() will automatically include these attributes. + const executeWithBaseAttributes = async () => { + // Errors will be caught by the next middleware so we don't have to catch. + await next() - // New diagnostics metrics with stable names and attributes - if (global.diagnosticsMetrics) { - const attributes: Attributes = { - [AttributeKeys.VTEX_ACCOUNT_NAME]: vtex.account, - component: 'http-handler', - route_id: id, - route_type: type, - status_code: statusCode, - status, - } + const { status: statusCode, timings: {total} } = ctx + const totalMillis = hrToMillis(total) + console.log(log(ctx, totalMillis)) + console.log(logBillingInfo(vtex, totalMillis)) + + const status = statusLabel(statusCode) + + // Legacy metrics (backward compatibility) + // Only batch successful responses so metrics don't consider errors + metrics.batch(`http-handler-${id}`, status === 'success' ? total : undefined, { [status]: 1 }) + + // New diagnostics metrics with stable names and attributes + // Note: base attributes (account, route_id, route_type) are automatically merged + // We only need to provide the response-specific attributes here + if (global.diagnosticsMetrics) { + const responseAttributes: Attributes = { + status_code: statusCode, + status, + } - // Record latency histogram (record all requests, not just successful ones) - global.diagnosticsMetrics.recordLatency(total, attributes) + // Record latency histogram (record all requests, not just successful ones) + global.diagnosticsMetrics.recordLatency(total, responseAttributes) - // Increment counter (status is an attribute, not in metric name) - global.diagnosticsMetrics.incrementCounter('http_handler_requests_total', 1, attributes) + // Increment counter (status is an attribute, not in metric name) + global.diagnosticsMetrics.incrementCounter('http_handler_requests_total', 1, responseAttributes) + } else { + console.warn('DiagnosticsMetrics not available. HTTP handler metrics not reported.') + } + } + + // If diagnosticsMetrics is available, run with base attributes context + // Otherwise, run without context (fallback for graceful degradation) + if (global.diagnosticsMetrics) { + await global.diagnosticsMetrics.runWithBaseAttributes(baseAttributes, executeWithBaseAttributes) } else { console.warn('DiagnosticsMetrics not available. HTTP handler metrics not reported.') + await executeWithBaseAttributes() } } diff --git a/yarn.lock b/yarn.lock index ccfe59ceb..6330ccc57 100644 --- a/yarn.lock +++ b/yarn.lock @@ -254,6 +254,11 @@ protobufjs "^7.2.5" yargs "^17.7.2" +"@hapi/bourne@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@hapi/bourne/-/bourne-3.0.0.tgz#f11fdf7dda62fe8e336fa7c6642d9041f30356d7" + integrity sha512-Waj1cwPXJDucOib4a3bAISsKJVb15MKi9IvmTI/7ssVEm6sywXGjVJDhl6/umt1pK1ZS7PacXU3A1PmFKHEZ2w== + "@istanbuljs/load-nyc-config@^1.0.0": version "1.0.0" resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.0.0.tgz#10602de5570baea82f8afbfa2630b24e7a8cfe5b" @@ -1910,10 +1915,11 @@ cliui@^8.0.1: wrap-ansi "^7.0.0" co-body@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/co-body/-/co-body-6.0.0.tgz#965b9337d7f5655480787471f4237664820827e3" - integrity sha512-9ZIcixguuuKIptnY8yemEOuhb71L/lLf+Rl5JfJEUiDNJk0e02MBt7BPxR2GEh5mw8dPthQYR4jPI/BnS1MQgw== + version "6.2.0" + resolved "https://registry.yarnpkg.com/co-body/-/co-body-6.2.0.tgz#afd776d60e5659f4eee862df83499698eb1aea1b" + integrity sha512-Kbpv2Yd1NdL1V/V4cwLVxraHDV6K8ayohr2rmH0J87Er8+zJjcTa6dAn9QMPC9CRgU8+aNajKbSf1TzDB1yKPA== dependencies: + "@hapi/bourne" "^3.0.0" inflation "^2.0.0" qs "^6.5.2" raw-body "^2.3.3"