1
0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2024-11-21 08:31:27 -05:00

Merge pull request 'fix: referenced sha256:* container images may be deleted' (#5430) from earl-warren/forgejo:wip-container-cleanup into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5430
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
This commit is contained in:
Earl Warren 2024-09-30 16:45:09 +00:00
commit c4d2635839
3 changed files with 163 additions and 17 deletions

View file

@ -0,0 +1,116 @@
// Copyright 2024 The Forgejo Authors.
// SPDX-License-Identifier: GPL-3.0-or-later
package container
import (
"testing"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
container_module "code.gitea.io/gitea/modules/packages/container"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/timeutil"
container_service "code.gitea.io/gitea/services/packages/container"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCleanupSHA256(t *testing.T) {
require.NoError(t, unittest.PrepareTestDatabase())
defer test.MockVariableValue(&container_service.SHA256BatchSize, 1)()
ctx := db.DefaultContext
createContainer := func(t *testing.T, name, version, digest string, created timeutil.TimeStamp) {
t.Helper()
ownerID := int64(2001)
p := packages.Package{
OwnerID: ownerID,
LowerName: name,
Type: packages.TypeContainer,
}
_, err := db.GetEngine(ctx).Insert(&p)
// package_version").Where("version = ?", multiTag).Update(&packages_model.PackageVersion{MetadataJSON: `corrupted "manifests":[{ bad`})
require.NoError(t, err)
var metadata string
if digest != "" {
m := container_module.Metadata{
Manifests: []*container_module.Manifest{
{
Digest: digest,
},
},
}
mt, err := json.Marshal(m)
require.NoError(t, err)
metadata = string(mt)
}
v := packages.PackageVersion{
PackageID: p.ID,
LowerVersion: version,
MetadataJSON: metadata,
CreatedUnix: created,
}
_, err = db.GetEngine(ctx).NoAutoTime().Insert(&v)
require.NoError(t, err)
}
cleanupAndCheckLogs := func(t *testing.T, olderThan time.Duration, expected ...string) {
t.Helper()
logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE)
logChecker.Filter(expected...)
logChecker.StopMark(container_service.SHA256LogFinish)
defer cleanup()
require.NoError(t, CleanupExpiredData(ctx, olderThan))
logFiltered, logStopped := logChecker.Check(5 * time.Second)
assert.True(t, logStopped)
filtered := make([]bool, 0, len(expected))
for range expected {
filtered = append(filtered, true)
}
assert.EqualValues(t, filtered, logFiltered, expected)
}
ancient := 1 * time.Hour
t.Run("no packages, cleanup nothing", func(t *testing.T) {
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
})
orphan := "orphan"
createdLongAgo := timeutil.TimeStamp(time.Now().Add(-(ancient * 2)).Unix())
createdRecently := timeutil.TimeStamp(time.Now().Add(-(ancient / 2)).Unix())
t.Run("an orphaned package created a long time ago is removed", func(t *testing.T) {
createContainer(t, orphan, "sha256:"+orphan, "", createdLongAgo)
cleanupAndCheckLogs(t, ancient, "Removing 1 entries from `package_version`")
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
})
t.Run("a newly created orphaned package is not cleaned up", func(t *testing.T) {
createContainer(t, orphan, "sha256:"+orphan, "", createdRecently)
cleanupAndCheckLogs(t, ancient, "1 out of 1 container image(s) are not deleted because they were created less than")
cleanupAndCheckLogs(t, 0, "Removing 1 entries from `package_version`")
cleanupAndCheckLogs(t, 0, "Nothing to cleanup")
})
t.Run("a referenced package is not removed", func(t *testing.T) {
referenced := "referenced"
digest := "sha256:" + referenced
createContainer(t, referenced, digest, "", createdRecently)
index := "index"
createContainer(t, index, index, digest, createdRecently)
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
})
}

View file

@ -0,0 +1,14 @@
// Copyright 2024 The Forgejo Authors.
// SPDX-License-Identifier: GPL-3.0-or-later
package container
import (
"testing"
"code.gitea.io/gitea/models/unittest"
)
func TestMain(m *testing.M) {
unittest.MainTest(m)
}

View file

@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
container_module "code.gitea.io/gitea/modules/packages/container"
"code.gitea.io/gitea/modules/timeutil"
)
var (
@ -37,18 +38,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
defer committer.Close()
foundAtLeastOneSHA256 := false
shaToVersionID := make(map[string]int64, 100)
type packageVersion struct {
id int64
created timeutil.TimeStamp
}
shaToPackageVersion := make(map[string]packageVersion, 100)
knownSHA := make(map[string]any, 100)
// compute before making the inventory to not race against ongoing
// image creations
old := timeutil.TimeStamp(time.Now().Add(-olderThan).Unix())
log.Debug("Look for all package_version.version that start with sha256:")
old := time.Now().Add(-olderThan).Unix()
// Iterate over all container versions in ascending order and store
// in shaToVersionID all versions with a sha256: prefix. If an index
// in shaToPackageVersion all versions with a sha256: prefix. If an index
// manifest is found, the sha256: digest it references are removed
// from shaToVersionID. If the sha256: digest found in an index
// manifest is not already in shaToVersionID, it is stored in
// from shaToPackageVersion. If the sha256: digest found in an index
// manifest is not already in shaToPackageVersion, it is stored in
// knownSHA to be dealt with later.
//
// Although it is theoretically possible that a sha256: is uploaded
@ -56,16 +63,16 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
// normal order of operations. First the sha256: version is uploaded
// and then the index manifest. When the iteration completes,
// knownSHA will therefore be empty most of the time and
// shaToVersionID will only contain unreferenced sha256: versions.
// shaToPackageVersion will only contain unreferenced sha256: versions.
if err := db.GetEngine(ctx).
Select("`package_version`.`id`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
Select("`package_version`.`id`, `package_version`.`created_unix`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`").
Where("`package`.`type` = ? AND `package_version`.`created_unix` < ?", packages.TypeContainer, old).
Where("`package`.`type` = ?", packages.TypeContainer).
OrderBy("`package_version`.`id` ASC").
Iterate(new(packages.PackageVersion), func(_ int, bean any) error {
v := bean.(*packages.PackageVersion)
if strings.HasPrefix(v.LowerVersion, "sha256:") {
shaToVersionID[v.LowerVersion] = v.ID
shaToPackageVersion[v.LowerVersion] = packageVersion{id: v.ID, created: v.CreatedUnix}
foundAtLeastOneSHA256 = true
} else if strings.Contains(v.MetadataJSON, `"manifests":[{`) {
var metadata container_module.Metadata
@ -74,8 +81,8 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
return nil
}
for _, manifest := range metadata.Manifests {
if _, ok := shaToVersionID[manifest.Digest]; ok {
delete(shaToVersionID, manifest.Digest)
if _, ok := shaToPackageVersion[manifest.Digest]; ok {
delete(shaToPackageVersion, manifest.Digest)
} else {
knownSHA[manifest.Digest] = true
}
@ -87,10 +94,10 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
}
for sha := range knownSHA {
delete(shaToVersionID, sha)
delete(shaToPackageVersion, sha)
}
if len(shaToVersionID) == 0 {
if len(shaToPackageVersion) == 0 {
if foundAtLeastOneSHA256 {
log.Debug("All container images with a version matching sha256:* are referenced by an index manifest")
} else {
@ -100,15 +107,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
return nil
}
found := len(shaToVersionID)
found := len(shaToPackageVersion)
log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found)
log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize)
packageVersionIDs := make([]int64, 0, SHA256BatchSize)
for _, id := range shaToVersionID {
packageVersionIDs = append(packageVersionIDs, id)
tooYoung := 0
for _, p := range shaToPackageVersion {
if p.created < old {
packageVersionIDs = append(packageVersionIDs, p.id)
} else {
tooYoung++
}
}
if tooYoung > 0 {
log.Warn("%d out of %d container image(s) are not deleted because they were created less than %v ago", tooYoung, found, olderThan)
}
for len(packageVersionIDs) > 0 {