From fa25b9eec63c073e16f4595459880e4ee8797913 Mon Sep 17 00:00:00 2001 From: Gusted Date: Fri, 18 Aug 2023 04:39:23 +0200 Subject: [PATCH] [GITEA] Add slow SQL query warning - Backport of https://codeberg.org/forgejo/forgejo/pulls/1284 - Databases are one of the most important parts of Forgejo, every interaction with Forgejo uses the database in one way or another. Therefore, it is important to maintain the database and recognize when Forgejo is not doing well with the database. Forgejo already has the option to log *every* SQL query along with its execution time, but monitoring becomes impractical for larger instances and takes up unnecessary storage in the logs. - Add a QoL enhancement that allows instance administrators to specify a threshold value beyond which query execution time is logged as a warning in the xorm logger. The default value is a conservative five seconds to avoid this becoming a source of spam in the logs. - The use case for this patch is that with an instance the size of Codeberg, monitoring SQL logs is not very fruitful and most of them are uninteresting. Recently, in the context of persistent deadlock issues (https://codeberg.org/forgejo/forgejo/issues/220), I have noticed that certain queries hold locks on tables like comment and issue for several seconds. This patch helps to identify which queries these are and when they happen. - Added unit test. --- models/db/engine.go | 28 +++++++++++++++++++++++++++ models/db/engine_test.go | 38 +++++++++++++++++++++++++++++++++++++ modules/setting/database.go | 2 ++ 3 files changed, 68 insertions(+) diff --git a/models/db/engine.go b/models/db/engine.go index 6d1c10e55b..4b8c2cd573 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -11,10 +11,13 @@ import ( "io" "reflect" "strings" + "time" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "xorm.io/xorm" + "xorm.io/xorm/contexts" "xorm.io/xorm/names" "xorm.io/xorm/schemas" @@ -147,6 +150,13 @@ func InitEngine(ctx context.Context) error { xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime) xormEngine.SetDefaultContext(ctx) + if setting.Database.SlowQueryTreshold > 0 { + xormEngine.AddHook(&SlowQueryHook{ + Treshold: setting.Database.SlowQueryTreshold, + Logger: log.GetLogger("xorm"), + }) + } + SetDefaultEngine(ctx, xormEngine) return nil } @@ -300,3 +310,21 @@ func SetLogSQL(ctx context.Context, on bool) { sess.Engine().ShowSQL(on) } } + +type SlowQueryHook struct { + Treshold time.Duration + Logger log.Logger +} + +var _ contexts.Hook = &SlowQueryHook{} + +func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) { + return c.Ctx, nil +} + +func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error { + if c.ExecuteTime >= h.Treshold { + h.Logger.Log(8, log.WARN, "[Slow SQL Query] %s %v - %v", c.SQL, c.Args, c.ExecuteTime) + } + return nil +} diff --git a/models/db/engine_test.go b/models/db/engine_test.go index fb5c73e44f..6a88402e91 100644 --- a/models/db/engine_test.go +++ b/models/db/engine_test.go @@ -6,15 +6,19 @@ package db_test import ( "path/filepath" "testing" + "time" "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" _ "code.gitea.io/gitea/cmd" // for TestPrimaryKeys "github.com/stretchr/testify/assert" + "xorm.io/xorm" ) func TestDumpDatabase(t *testing.T) { @@ -85,3 +89,37 @@ func TestPrimaryKeys(t *testing.T) { } } } + +func TestSlowQuery(t *testing.T) { + lc, cleanup := test.NewLogChecker("slow-query") + lc.StopMark("[Slow SQL Query]") + defer cleanup() + + e := db.GetEngine(db.DefaultContext) + engine, ok := e.(*xorm.Engine) + assert.True(t, ok) + + // It's not possible to clean this up with XORM, but it's luckily not harmful + // to leave around. + engine.AddHook(&db.SlowQueryHook{ + Treshold: time.Second * 10, + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped := lc.Check(100 * time.Millisecond) + assert.False(t, stopped) + + engine.AddHook(&db.SlowQueryHook{ + Treshold: 0, // Every query should be logged. + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped = lc.Check(100 * time.Millisecond) + assert.True(t, stopped) +} diff --git a/modules/setting/database.go b/modules/setting/database.go index fe5ce93231..388332029e 100644 --- a/modules/setting/database.go +++ b/modules/setting/database.go @@ -46,6 +46,7 @@ var ( ConnMaxLifetime time.Duration IterateBufferSize int AutoMigration bool + SlowQueryTreshold time.Duration }{ Timeout: 500, IterateBufferSize: 50, @@ -92,6 +93,7 @@ func loadDBSetting(rootCfg ConfigProvider) { Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10) Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second) Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true) + Database.SlowQueryTreshold = sec.Key("SLOW_QUERY_TRESHOLD").MustDuration(5 * time.Second) } // DBConnStr returns database connection string