From a3acdc7041564592cd2891f9992ea2c9db95d6cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferdinand=20M=C3=BCtsch?= Date: Fri, 18 Mar 2022 12:29:43 +0100 Subject: [PATCH] fix: duration aggregation for heartbeats with identical timestamps (resolve #340) --- scripts/aggregate_durations.sql | 9 +++++++++ scripts/count_duplicates_by_user.sql | 13 +++++++++++++ services/duration.go | 6 +++++- services/duration_test.go | 11 +++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 scripts/aggregate_durations.sql create mode 100644 scripts/count_duplicates_by_user.sql diff --git a/scripts/aggregate_durations.sql b/scripts/aggregate_durations.sql new file mode 100644 index 0000000..e1a1b2a --- /dev/null +++ b/scripts/aggregate_durations.sql @@ -0,0 +1,9 @@ +SELECT project, language, editor, operating_system, machine, branch, SUM(GREATEST(1, diff)) as 'sum' +FROM ( + SELECT project, language, editor, operating_system, machine, branch, TIME_TO_SEC(LEAST(TIMEDIFF(time, LAG(time) over w), '00:02:00')) as 'diff' + FROM heartbeats + WHERE user_id = 'n1try' + WINDOW w AS (ORDER BY time) + ) s2 +WHERE diff IS NOT NULL +GROUP BY project, language, editor, operating_system, machine, branch; \ No newline at end of file diff --git a/scripts/count_duplicates_by_user.sql b/scripts/count_duplicates_by_user.sql new file mode 100644 index 0000000..751a9e5 --- /dev/null +++ b/scripts/count_duplicates_by_user.sql @@ -0,0 +1,13 @@ +SELECT s2.user_id, sum(c) as count, total, (sum(c) / total) as ratio +FROM ( + SELECT time, + user_id, + entity, + COUNT(time) as c + FROM heartbeats + GROUP BY time, user_id, entity + HAVING COUNT(time) > 1 + ) s2 + LEFT JOIN (SELECT user_id, count(id) AS total FROM heartbeats GROUP BY user_id) s3 ON s2.user_id = s3.user_id +GROUP BY user_id +ORDER BY count DESC; \ No newline at end of file diff --git a/services/duration.go b/services/duration.go index 42ab705..6dedfbe 100644 --- a/services/duration.go +++ b/services/duration.go @@ -80,8 +80,12 @@ func (srv *DurationService) Get(from, to time.Time, user *models.User, filters * for _, list := range mapping { for _, d := range list { + // will only happen if two heartbeats with different hashes (e.g. different project) have the same timestamp + // that, in turn, will most likely only happen for mysql, where `time` column's precision was set to second for a while + // assume that two non-identical heartbeats with identical time are sub-second apart from each other, so round up to expectancy value + // also see https://github.com/muety/wakapi/issues/340 if d.Duration == 0 { - d.Duration = HeartbeatDiffThreshold + d.Duration = 500 * time.Millisecond } durations = append(durations, d) } diff --git a/services/duration_test.go b/services/duration_test.go index cb3e6aa..7f793f5 100644 --- a/services/duration_test.go +++ b/services/duration_test.go @@ -65,6 +65,17 @@ func (suite *DurationServiceTestSuite) SetupSuite() { Machine: TestMachine1, Time: models.CustomTime(suite.TestStartTime.Add(30 * time.Second)), // 0:30 }, + // duplicate of previous one + { + ID: rand.Uint64(), + UserID: TestUserId, + Project: TestProject1, + Language: TestLanguageGo, + Editor: TestEditorGoland, + OperatingSystem: TestOsLinux, + Machine: TestMachine1, + Time: models.CustomTime(suite.TestStartTime.Add(30 * time.Second)), // 0:30 + }, { ID: rand.Uint64(), UserID: TestUserId,