siimon · ssg2526 · May 20, 2024 · May 23, 2024 · May 24, 2024 · May 31, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,11 @@ project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Changed
 
+- Changes for cluster mode
+- Removed `byLabels` Grouper in `metricAggregators.js` file and created a global Map to avoid Map creation on every request for the metrics
+- Moved hashing of labels from master to worker to distribute the cpu bound hashing among workers
+- Workers to write metrics in tmp file and send the file name to master to read metrics from rather than sending on IPC to keep IPC congestion free. (change in `cluster.js`)
+
 ### Added
 
 [unreleased]: https://github.com/siimon/prom-client/compare/v15.1.2...HEAD

diff --git a/lib/cluster.js b/lib/cluster.js
@@ -9,8 +9,11 @@
  */
 
 const Registry = require('./registry');
-const { Grouper } = require('./util');
+const { Grouper, hashObject } = require('./util');
 const { aggregators } = require('./metricAggregators');
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
 // We need to lazy-load the 'cluster' module as some application servers -
 // namely Passenger - crash when it is imported.
 let cluster = () => {
@@ -175,19 +178,28 @@ function addListeners() {
 					request.done(new Error(message.error));
 					return;
 				}
-
-				message.metrics.forEach(registry => request.responses.push(registry));
-				request.pending--;
-
-				if (request.pending === 0) {
-					// finalize
-					requests.delete(message.requestId);
-					clearTimeout(request.errorTimeout);
-
-					const registry = AggregatorRegistry.aggregate(request.responses);
-					const promString = registry.metrics();
-					request.done(null, promString);
-				}
+				fs.readFile(message.filename, 'utf8', (err, data) => {
+					if (err) {
+						request.done(err);
+						return;
+					} else {
+						const metrics = JSON.parse(data);
+						metrics.forEach(registry => request.responses.push(registry));
+						fs.unlink(message.filename, e => {
+							if (e)
+								console.error(`Error deleting file ${message.filename}:`, e);
+						});
+						request.pending--;
+						if (request.pending === 0) {
+							// finalize
+							requests.delete(message.requestId);
+							clearTimeout(request.errorTimeout);
+							const registry = AggregatorRegistry.aggregate(request.responses);
+							const promString = registry.metrics();
+							request.done(null, promString);
+						}
+					}
+				});
 			}
 		});
 	}
@@ -198,10 +210,32 @@ function addListeners() {
 			if (message.type === GET_METRICS_REQ) {
 				Promise.all(registries.map(r => r.getMetricsAsJSON()))
 					.then(metrics => {
-						process.send({
-							type: GET_METRICS_RES,
-							requestId: message.requestId,
-							metrics,
+						metrics.forEach(registry => {
+							registry.forEach(value => {
+								const hash = hashObject(value);
+								const key = `${value.metricName}_${hash}`;
+								value.hash = key;
+							});
+						});
+						// adding request id in file path to handle concurrency
+						const filename = path.join(
+							os.tmpdir(),
+							`metrics-${process.pid}-${message.requestId}.json`,
+						);
+						fs.writeFile(filename, JSON.stringify(metrics), err => {
+							if (err) {
+								process.send({
+									type: GET_METRICS_RES,
+									requestId: message.requestId,
+									error: err.message,
+								});
+							} else {
+								process.send({
+									type: GET_METRICS_RES,
+									requestId: message.requestId,
+									filename,
+								});
+							}
 						});
 					})
 					.catch(error => {

diff --git a/lib/metricAggregators.js b/lib/metricAggregators.js
@@ -1,6 +1,6 @@
 'use strict';
 
-const { Grouper, hashObject } = require('./util');
+const metricMap = new Map();
 
 /**
  * Returns a new function that applies the `aggregatorFn` to the values.
@@ -18,11 +18,18 @@ function AggregatorFactory(aggregatorFn) {
 			aggregator: metrics[0].aggregator,
 		};
 		// Gather metrics by metricName and labels.
-		const byLabels = new Grouper();
+		if (!metricMap.get(metrics[0].name)) {
+			metricMap.set(metrics[0].name, new Map());
+		}
+		const byLabels = metricMap.get(metrics[0].name);
 		metrics.forEach(metric => {
 			metric.values.forEach(value => {
-				const key = hashObject(value.labels);
-				byLabels.add(`${value.metricName}_${key}`, value);
+				const valuesArray = byLabels.get(value.hash);
+				if (!valuesArray) {
+					byLabels.set(value.hash, [value]);
+				} else {
+					valuesArray.push(value);
+				}
 			});
 		});
 		// Apply aggregator function to gathered metrics.
@@ -37,6 +44,7 @@ function AggregatorFactory(aggregatorFn) {
 			}
 			// NB: Timestamps are omitted.
 			result.values.push(valObj);
+			values.length = 0;
 		});
 		return result;
 	};

diff --git a/test/aggregatorsTest.js b/test/aggregatorsTest.js
@@ -8,17 +8,17 @@ describe('aggregators', () => {
 			name: 'metric_name',
 			type: 'does not matter',
 			values: [
-				{ labels: [], value: 1 },
-				{ labels: ['label1'], value: 2 },
+				{ labels: [], value: 1, hash: 'h1' },
+				{ labels: ['label1'], value: 2, hash: 'h2' },
 			],
 		},
 		{
 			help: 'metric_help',
 			name: 'metric_name',
 			type: 'does not matter',
 			values: [
-				{ labels: [], value: 3 },
-				{ labels: ['label1'], value: 4 },
+				{ labels: [], value: 3, hash: 'h1' },
+				{ labels: ['label1'], value: 4, hash: 'h2' },
 			],
 		},
 	];
@@ -102,19 +102,19 @@ describe('aggregators', () => {
 					help: 'metric_help',
 					name: 'metric_name',
 					type: 'does not matter',
-					values: [{ labels: [], value: 1, metricName: 'abc' }],
+					values: [{ labels: [], value: 1, metricName: 'abc', hash: 'h1' }],
 				},
 				{
 					help: 'metric_help',
 					name: 'metric_name',
 					type: 'does not matter',
-					values: [{ labels: [], value: 3, metricName: 'abc' }],
+					values: [{ labels: [], value: 3, metricName: 'abc', hash: 'h1' }],
 				},
 				{
 					help: 'metric_help',
 					name: 'metric_name',
 					type: 'does not matter',
-					values: [{ labels: [], value: 5, metricName: 'def' }],
+					values: [{ labels: [], value: 5, metricName: 'def', hash: 'h2' }],
 				},
 			];
 			const result = aggregators.sum(metrics2);

diff --git a/test/clusterTest.js b/test/clusterTest.js
@@ -3,6 +3,7 @@
 const cluster = require('cluster');
 const process = require('process');
 const Registry = require('../lib/cluster');
+const { hash } = require('crypto');
 
 describe.each([
 	['Prometheus', Registry.PROMETHEUS_CONTENT_TYPE],
@@ -61,11 +62,13 @@ describe.each([
 						labels: { le: 0.1, code: '300' },
 						value: 0,
 						metricName: 'test_histogram_bucket',
+						hash: 'test_histogram_bucket{le="0.1",code="300"}',
 					},
 					{
 						labels: { le: 10, code: '300' },
 						value: 1.6486727018068046,
 						metricName: 'test_histogram_bucket',
+						hash: 'test_histogram_bucket{le="10",code="300"}',
 					},
 				],
 				aggregator: 'sum',
@@ -75,24 +78,40 @@ describe.each([
 				name: 'test_gauge',
 				type: 'gauge',
 				values: [
-					{ value: 0.47, labels: { method: 'get', code: 200 } },
-					{ value: 0.64, labels: {} },
-					{ value: 23, labels: { method: 'post', code: '300' } },
+					{
+						value: 0.47,
+						labels: { method: 'get', code: 200 },
+						hash: 'test_gauge{method="get",code="200"}',
+					},
+					{ value: 0.64, labels: {}, hash: 'test_gauge{}' },
+					{
+						value: 23,
+						labels: { method: 'post', code: '300' },
+						hash: 'test_gauge{method="post",code="300"}',
+					},
 				],
 				aggregator: 'sum',
 			},
 			{
 				help: 'Start time of the process since unix epoch in seconds.',
 				name: 'process_start_time_seconds',
 				type: 'gauge',
-				values: [{ value: 1502075832, labels: {} }],
+				values: [
+					{
+						value: 1502075832,
+						labels: {},
+						hash: 'process_start_time_seconds{}',
+					},
+				],
 				aggregator: 'omit',
 			},
 			{
 				help: 'Lag of event loop in seconds.',
 				name: 'nodejs_eventloop_lag_seconds',
 				type: 'gauge',
-				values: [{ value: 0.009, labels: {} }],
+				values: [
+					{ value: 0.009, labels: {}, hash: 'nodejs_eventloop_lag_seconds{}' },
+				],
 				aggregator: 'average',
 			},
 			{
@@ -103,6 +122,7 @@ describe.each([
 					{
 						value: 1,
 						labels: { version: 'v6.11.1', major: 6, minor: 11, patch: 1 },
+						hash: 'nodejs_version_info{version="v6.11.1",major="6",minor="11",patch="1"}',
 					},
 				],
 				aggregator: 'first',
@@ -118,11 +138,13 @@ describe.each([
 						labels: { le: 0.1, code: '300' },
 						value: 0.235151,
 						metricName: 'test_histogram_bucket',
+						hash: 'test_histogram_bucket{le="0.1",code="300"}',
 					},
 					{
 						labels: { le: 10, code: '300' },
 						value: 1.192591,
 						metricName: 'test_histogram_bucket',
+						hash: 'test_histogram_bucket{le="10",code="300"}',
 					},
 				],
 				aggregator: 'sum',
@@ -132,24 +154,40 @@ describe.each([
 				name: 'test_gauge',
 				type: 'gauge',
 				values: [
-					{ value: 0.02, labels: { method: 'get', code: 200 } },
-					{ value: 0.24, labels: {} },
-					{ value: 51, labels: { method: 'post', code: '300' } },
+					{
+						value: 0.02,
+						labels: { method: 'get', code: 200 },
+						hash: 'test_gauge{method="get",code="200"}',
+					},
+					{ value: 0.24, labels: {}, hash: 'test_gauge{}' },
+					{
+						value: 51,
+						labels: { method: 'post', code: '300' },
+						hash: 'test_gauge{method="post",code="300"}',
+					},
 				],
 				aggregator: 'sum',
 			},
 			{
 				help: 'Start time of the process since unix epoch in seconds.',
 				name: 'process_start_time_seconds',
 				type: 'gauge',
-				values: [{ value: 1502075849, labels: {} }],
+				values: [
+					{
+						value: 1502075849,
+						labels: {},
+						hash: 'process_start_time_seconds{}',
+					},
+				],
 				aggregator: 'omit',
 			},
 			{
 				help: 'Lag of event loop in seconds.',
 				name: 'nodejs_eventloop_lag_seconds',
 				type: 'gauge',
-				values: [{ value: 0.008, labels: {} }],
+				values: [
+					{ value: 0.008, labels: {}, hash: 'nodejs_eventloop_lag_seconds{}' },
+				],
 				aggregator: 'average',
 			},
 			{
@@ -160,6 +198,7 @@ describe.each([
 					{
 						value: 1,
 						labels: { version: 'v6.11.1', major: 6, minor: 11, patch: 1 },
+						hash: 'nodejs_version_info{version="v6.11.1",major="6",minor="11",patch="1"}',
 					},
 				],
 				aggregator: 'first',