From 3c3644bb10d9c71448e9afbb5b9051d645d11431 Mon Sep 17 00:00:00 2001 From: Andrei Mihu Date: Tue, 28 Jan 2020 21:51:30 +0000 Subject: [PATCH] Add metrics for authoritative match count and dropped events. --- CHANGELOG.md | 4 +++ server/match_registry.go | 8 +++-- server/metrics.go | 68 ++++++++++++++++++++++++++-------------- server/runtime_event.go | 1 + 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 198500c38..07e66359b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project are documented below. The format is based on [keep a changelog](http://keepachangelog.com) and this project uses [semantic versioning](http://semver.org). ## [Unreleased] +### Added +- New metric for number of authoritative matches currently running. +- New metric for total number of events dropped by the events processor pool. + ### Changed - Update username on leaderboard and tournament records when processing a score update. - Automatically stop empty authoritative matches after a configurable amount of time. diff --git a/server/match_registry.go b/server/match_registry.go index f34ca186b..89442fba6 100644 --- a/server/match_registry.go +++ b/server/match_registry.go @@ -101,7 +101,7 @@ type LocalMatchRegistry struct { node string matches *sync.Map - matchCount *atomic.Int32 + matchCount *atomic.Int64 index bleve.Index stopped *atomic.Bool @@ -125,7 +125,7 @@ func NewLocalMatchRegistry(logger, startupLogger *zap.Logger, config Config, tra node: node, matches: &sync.Map{}, - matchCount: atomic.NewInt32(0), + matchCount: atomic.NewInt64(0), index: index, stopped: atomic.NewBool(false), @@ -167,7 +167,8 @@ func (r *LocalMatchRegistry) NewMatch(logger *zap.Logger, id uuid.UUID, core Run } r.matches.Store(id, match) - r.matchCount.Inc() + count := r.matchCount.Inc() + MetricsRuntimeMatchCount.M(count) return match, nil } @@ -183,6 +184,7 @@ func (r *LocalMatchRegistry) GetMatch(id uuid.UUID) *MatchHandler { func (r *LocalMatchRegistry) RemoveMatch(id uuid.UUID, stream PresenceStream) { r.matches.Delete(id) matchesRemaining := r.matchCount.Dec() + MetricsRuntimeMatchCount.M(matchesRemaining) r.tracker.UntrackByStream(stream) if err := r.index.Delete(fmt.Sprintf("%v.%v", id.String(), r.node)); err != nil { diff --git a/server/metrics.go b/server/metrics.go index 06b3bb34b..79a419d0d 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -33,14 +33,16 @@ import ( var ( // Metrics stats measurements. - MetricsRuntimeCount = stats.Int64("nakama/runtime/count", "Number of pooled runtime instances", stats.UnitDimensionless) - MetricsSocketWsTimeSpentMsec = stats.Float64("nakama.socket/ws/server_elapsed_time", "Elapsed time in msecs spent in WebSocket connections", stats.UnitMilliseconds) - MetricsSocketWsOpenCount = stats.Int64("nakama.socket/ws/open_count", "Number of opened WebSocket connections", stats.UnitDimensionless) - MetricsSocketWsCloseCount = stats.Int64("nakama.socket/ws/close_count", "Number of closed WebSocket connections", stats.UnitDimensionless) - MetricsAPITimeSpentMsec = stats.Float64("nakama.api/server/server_elapsed_time", "Elapsed time in msecs spent in API functions", stats.UnitMilliseconds) - MetricsAPICount = stats.Int64("nakama.api/server/request_count", "Number of calls to API functions", stats.UnitDimensionless) - MetricsRtapiTimeSpentMsec = stats.Float64("nakama.rtapi/server/server_elapsed_time", "Elapsed time in msecs spent in realtime socket functions", stats.UnitMilliseconds) - MetricsRtapiCount = stats.Int64("nakama.rtapi/server/request_count", "Number of calls to realtime socket functions", stats.UnitDimensionless) + MetricsRuntimeCount = stats.Int64("nakama/runtime/count", "Number of pooled runtime instances", stats.UnitDimensionless) + MetricsRuntimeMatchCount = stats.Int64("nakama/runtime/match_count", "Number of authoritative matches running", stats.UnitDimensionless) + MetricsRuntimeEventsDroppedCount = stats.Int64("nakama/runtime/events_dropped_count", "Number of events dropped by the events processor pool", stats.UnitDimensionless) + MetricsSocketWsTimeSpentMsec = stats.Float64("nakama.socket/ws/server_elapsed_time", "Elapsed time in msecs spent in WebSocket connections", stats.UnitMilliseconds) + MetricsSocketWsOpenCount = stats.Int64("nakama.socket/ws/open_count", "Number of opened WebSocket connections", stats.UnitDimensionless) + MetricsSocketWsCloseCount = stats.Int64("nakama.socket/ws/close_count", "Number of closed WebSocket connections", stats.UnitDimensionless) + MetricsAPITimeSpentMsec = stats.Float64("nakama.api/server/server_elapsed_time", "Elapsed time in msecs spent in API functions", stats.UnitMilliseconds) + MetricsAPICount = stats.Int64("nakama.api/server/request_count", "Number of calls to API functions", stats.UnitDimensionless) + MetricsRtapiTimeSpentMsec = stats.Float64("nakama.rtapi/server/server_elapsed_time", "Elapsed time in msecs spent in realtime socket functions", stats.UnitMilliseconds) + MetricsRtapiCount = stats.Int64("nakama.rtapi/server/request_count", "Number of calls to realtime socket functions", stats.UnitDimensionless) // Metrics stats tag keys. MetricsFunction, _ = tag.NewKey("function") @@ -60,8 +62,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte } if err := view.Register(&view.View{ - Name: "nakama/runtime/count", - Description: "Number of pooled runtime instances", + Name: MetricsRuntimeCount.Name(), + Description: MetricsRuntimeCount.Description(), TagKeys: []tag.Key{}, Measure: MetricsRuntimeCount, Aggregation: view.Count(), @@ -69,8 +71,26 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing runtime count metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.socket/ws/server_elapsed_time", - Description: "Elapsed time in msecs spent in WebSocket connections", + Name: MetricsRuntimeMatchCount.Name(), + Description: MetricsRuntimeMatchCount.Description(), + TagKeys: []tag.Key{}, + Measure: MetricsRuntimeMatchCount, + Aggregation: view.LastValue(), + }); err != nil { + startupLogger.Fatal("Error subscribing runtime match count metrics view", zap.Error(err)) + } + if err := view.Register(&view.View{ + Name: MetricsRuntimeEventsDroppedCount.Name(), + Description: MetricsRuntimeEventsDroppedCount.Description(), + TagKeys: []tag.Key{}, + Measure: MetricsRuntimeEventsDroppedCount, + Aggregation: view.Count(), + }); err != nil { + startupLogger.Fatal("Error subscribing runtime events dropped count metrics view", zap.Error(err)) + } + if err := view.Register(&view.View{ + Name: MetricsSocketWsTimeSpentMsec.Name(), + Description: MetricsSocketWsTimeSpentMsec.Description(), TagKeys: []tag.Key{}, Measure: MetricsSocketWsTimeSpentMsec, Aggregation: ocgrpc.DefaultMillisecondsDistribution, @@ -78,8 +98,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing socket ws elapsed time metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.socket/ws/open_count", - Description: "Number of opened WebSocket connections", + Name: MetricsSocketWsOpenCount.Name(), + Description: MetricsSocketWsOpenCount.Description(), TagKeys: []tag.Key{}, Measure: MetricsSocketWsOpenCount, Aggregation: view.Count(), @@ -87,8 +107,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing socket ws opened count metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.socket/ws/close_count", - Description: "Number of closed WebSocket connections", + Name: MetricsSocketWsCloseCount.Name(), + Description: MetricsSocketWsCloseCount.Description(), TagKeys: []tag.Key{}, Measure: MetricsSocketWsCloseCount, Aggregation: view.Count(), @@ -96,8 +116,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing socket ws count metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.api/server/server_elapsed_time", - Description: "Elapsed time in msecs spent in API functions", + Name: MetricsAPITimeSpentMsec.Name(), + Description: MetricsAPITimeSpentMsec.Description(), TagKeys: []tag.Key{MetricsFunction}, Measure: MetricsAPITimeSpentMsec, Aggregation: ocgrpc.DefaultMillisecondsDistribution, @@ -105,8 +125,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing api elapsed time metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.api/server/request_count", - Description: "Number of calls to API functions", + Name: MetricsAPICount.Name(), + Description: MetricsAPICount.Description(), TagKeys: []tag.Key{MetricsFunction}, Measure: MetricsAPICount, Aggregation: view.Count(), @@ -114,8 +134,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing api request count metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.rtapi/server/server_elapsed_time", - Description: "Elapsed time in msecs spent in realtime socket functions", + Name: MetricsRtapiTimeSpentMsec.Name(), + Description: MetricsRtapiTimeSpentMsec.Description(), TagKeys: []tag.Key{MetricsFunction}, Measure: MetricsRtapiTimeSpentMsec, Aggregation: ocgrpc.DefaultMillisecondsDistribution, @@ -123,8 +143,8 @@ func NewMetrics(logger, startupLogger *zap.Logger, config Config, metricsExporte startupLogger.Fatal("Error subscribing rtapi elapsed time metrics view", zap.Error(err)) } if err := view.Register(&view.View{ - Name: "nakama.rtapi/server/request_count", - Description: "Number of calls to realtime socket functions", + Name: MetricsRtapiCount.Name(), + Description: MetricsRtapiCount.Description(), TagKeys: []tag.Key{MetricsFunction}, Measure: MetricsRtapiCount, Aggregation: view.Count(), diff --git a/server/runtime_event.go b/server/runtime_event.go index ef02d0a27..fefed755d 100644 --- a/server/runtime_event.go +++ b/server/runtime_event.go @@ -59,6 +59,7 @@ func (b *RuntimeEventQueue) Queue(fn func()) { // Event queued successfully. default: // Event queue is full, drop it to avoid blocking the caller. + MetricsRuntimeEventsDroppedCount.M(1) b.logger.Warn("Runtime event queue full, events may be lost") } } -- GitLab