From 3973f1022d57a3134e8f775e1c1cc6d398681bb4 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 25 Nov 2024 11:35:49 -0800 Subject: [PATCH] Add github compatible tarball download API endpoints (#32572) Fix #29654 Fix #32481 (cherry picked from commit 703be6bf307ed19ce8dc8cd311d24aeb6e5b9861) Conflicts: routers/api/v1/repo/file.go routers/web/repo/repo.go services/repository/archiver/archiver.go services/repository/archiver/archiver_test.go trivial context conflicts add missing function PathParam skipped in a very large refactor --- routers/api/v1/api.go | 2 + routers/api/v1/repo/download.go | 53 +++++++++++++++++++ routers/api/v1/repo/file.go | 15 ++++-- routers/web/repo/repo.go | 14 ++++- services/repository/archiver/archiver.go | 34 +++++++----- services/repository/archiver/archiver_test.go | 25 ++++----- tests/integration/api_repo_archive_test.go | 40 ++++++++++++++ 7 files changed, 152 insertions(+), 31 deletions(-) create mode 100644 routers/api/v1/repo/download.go diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 4fe10d8a00..c028a5a32d 100644 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -1352,6 +1352,8 @@ func Routes() *web.Route { m.Post("", bind(api.UpdateRepoAvatarOption{}), repo.UpdateAvatar) m.Delete("", repo.DeleteAvatar) }, reqAdmin(), reqToken()) + + m.Get("/{ball_type:tarball|zipball|bundle}/*", reqRepoReader(unit.TypeCode), repo.DownloadArchive) }, repoAssignment(), checkTokenPublicOnly()) }, tokenRequiresScopes(auth_model.AccessTokenScopeCategoryRepository)) diff --git a/routers/api/v1/repo/download.go b/routers/api/v1/repo/download.go new file mode 100644 index 0000000000..3a0401a5b0 --- /dev/null +++ b/routers/api/v1/repo/download.go @@ -0,0 +1,53 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repo + +import ( + "fmt" + "net/http" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/services/context" + archiver_service "code.gitea.io/gitea/services/repository/archiver" +) + +func DownloadArchive(ctx *context.APIContext) { + var tp git.ArchiveType + switch ballType := ctx.Params("ball_type"); ballType { + case "tarball": + tp = git.TARGZ + case "zipball": + tp = git.ZIP + case "bundle": + tp = git.BUNDLE + default: + ctx.Error(http.StatusBadRequest, "", fmt.Sprintf("Unknown archive type: %s", ballType)) + return + } + + if ctx.Repo.GitRepo == nil { + gitRepo, err := gitrepo.OpenRepository(ctx, ctx.Repo.Repository) + if err != nil { + ctx.Error(http.StatusInternalServerError, "OpenRepository", err) + return + } + ctx.Repo.GitRepo = gitRepo + defer gitRepo.Close() + } + + r, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, ctx.Params("*"), tp) + if err != nil { + ctx.ServerError("NewRequest", err) + return + } + + archive, err := r.Await(ctx) + if err != nil { + ctx.ServerError("archive.Await", err) + return + } + + download(ctx, r.GetArchiveName(), archive) +} diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index 50d2786ec8..2cea538b72 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -308,7 +308,13 @@ func GetArchive(ctx *context.APIContext) { func archiveDownload(ctx *context.APIContext) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) + ext, tp, err := archiver_service.ParseFileName(uri) + if err != nil { + ctx.Error(http.StatusBadRequest, "ParseFileName", err) + return + } + + aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp) if err != nil { if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { ctx.Error(http.StatusBadRequest, "unknown archive format", err) @@ -334,9 +340,12 @@ func download(ctx *context.APIContext, archiveName string, archiver *repo_model. // Add nix format link header so tarballs lock correctly: // https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md - ctx.Resp.Header().Add("Link", fmt.Sprintf("<%s/archive/%s.tar.gz?rev=%s>; rel=\"immutable\"", + ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`, ctx.Repo.Repository.APIURL(), - archiver.CommitID, archiver.CommitID)) + archiver.CommitID, + archiver.Type.String(), + archiver.CommitID, + )) rPath := archiver.RelativePath() if setting.RepoArchive.Storage.MinioConfig.ServeDirect { diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index 8036bcae67..1c4fb39546 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -472,7 +472,12 @@ func RedirectDownload(ctx *context.Context) { // Download an archive of a repository func Download(ctx *context.Context) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) + ext, tp, err := archiver_service.ParseFileName(uri) + if err != nil { + ctx.ServerError("ParseFileName", err) + return + } + aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp) if err != nil { if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { ctx.Error(http.StatusBadRequest, err.Error()) @@ -547,7 +552,12 @@ func download(ctx *context.Context, archiveName string, archiver *repo_model.Rep // kind of drop it on the floor if this is the case. func InitiateDownload(ctx *context.Context) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) + ext, tp, err := archiver_service.ParseFileName(uri) + if err != nil { + ctx.ServerError("ParseFileName", err) + return + } + aReq, err := archiver_service.NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, strings.TrimSuffix(uri, ext), tp) if err != nil { ctx.ServerError("archiver_service.NewRequest", err) return diff --git a/services/repository/archiver/archiver.go b/services/repository/archiver/archiver.go index 73dcb0795f..279067c002 100644 --- a/services/repository/archiver/archiver.go +++ b/services/repository/archiver/archiver.go @@ -68,30 +68,36 @@ func (e RepoRefNotFoundError) Is(err error) bool { return ok } -// NewRequest creates an archival request, based on the URI. The -// resulting ArchiveRequest is suitable for being passed to Await() -// if it's determined that the request still needs to be satisfied. -func NewRequest(ctx context.Context, repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) { - r := &ArchiveRequest{ - RepoID: repoID, - } - - var ext string +func ParseFileName(uri string) (ext string, tp git.ArchiveType, err error) { switch { case strings.HasSuffix(uri, ".zip"): ext = ".zip" - r.Type = git.ZIP + tp = git.ZIP case strings.HasSuffix(uri, ".tar.gz"): ext = ".tar.gz" - r.Type = git.TARGZ + tp = git.TARGZ case strings.HasSuffix(uri, ".bundle"): ext = ".bundle" - r.Type = git.BUNDLE + tp = git.BUNDLE default: - return nil, ErrUnknownArchiveFormat{RequestFormat: uri} + return "", 0, ErrUnknownArchiveFormat{RequestFormat: uri} + } + return ext, tp, nil +} + +// NewRequest creates an archival request, based on the URI. The +// resulting ArchiveRequest is suitable for being passed to Await() +// if it's determined that the request still needs to be satisfied. +func NewRequest(ctx context.Context, repoID int64, repo *git.Repository, refName string, fileType git.ArchiveType) (*ArchiveRequest, error) { + if fileType < git.ZIP || fileType > git.BUNDLE { + return nil, ErrUnknownArchiveFormat{RequestFormat: fileType.String()} } - r.refName = strings.TrimSuffix(uri, ext) + r := &ArchiveRequest{ + RepoID: repoID, + refName: refName, + Type: fileType, + } // Get corresponding commit. commitID, err := repo.ConvertToGitID(r.refName) diff --git a/services/repository/archiver/archiver_test.go b/services/repository/archiver/archiver_test.go index 6d9224da9f..e7a2422e55 100644 --- a/services/repository/archiver/archiver_test.go +++ b/services/repository/archiver/archiver_test.go @@ -9,6 +9,7 @@ import ( "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/services/contexttest" _ "code.gitea.io/gitea/models/actions" @@ -32,47 +33,47 @@ func TestArchive_Basic(t *testing.T) { contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() - bogusReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") + bogusReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP) require.NoError(t, err) assert.NotNil(t, bogusReq) assert.EqualValues(t, firstCommit+".zip", bogusReq.GetArchiveName()) // Check a series of bogus requests. // Step 1, valid commit with a bad extension. - bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".dilbert") + bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, 100) require.Error(t, err) assert.Nil(t, bogusReq) // Step 2, missing commit. - bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip") + bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff", git.ZIP) require.Error(t, err) assert.Nil(t, bogusReq) // Step 3, doesn't look like branch/tag/commit. - bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip") + bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db", git.ZIP) require.Error(t, err) assert.Nil(t, bogusReq) - bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master.zip") + bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master", git.ZIP) require.NoError(t, err) assert.NotNil(t, bogusReq) assert.EqualValues(t, "master.zip", bogusReq.GetArchiveName()) - bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive.zip") + bogusReq, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive", git.ZIP) require.NoError(t, err) assert.NotNil(t, bogusReq) assert.EqualValues(t, "test-archive.zip", bogusReq.GetArchiveName()) // Now two valid requests, firstCommit with valid extensions. - zipReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") + zipReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP) require.NoError(t, err) assert.NotNil(t, zipReq) - tgzReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".tar.gz") + tgzReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.TARGZ) require.NoError(t, err) assert.NotNil(t, tgzReq) - secondReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".zip") + secondReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit, git.ZIP) require.NoError(t, err) assert.NotNil(t, secondReq) @@ -92,7 +93,7 @@ func TestArchive_Basic(t *testing.T) { // Sleep two seconds to make sure the queue doesn't change. time.Sleep(2 * time.Second) - zipReq2, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") + zipReq2, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP) require.NoError(t, err) // This zipReq should match what's sitting in the queue, as we haven't // let it release yet. From the consumer's point of view, this looks like @@ -107,12 +108,12 @@ func TestArchive_Basic(t *testing.T) { // Now we'll submit a request and TimedWaitForCompletion twice, before and // after we release it. We should trigger both the timeout and non-timeout // cases. - timedReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".tar.gz") + timedReq, err := NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit, git.TARGZ) require.NoError(t, err) assert.NotNil(t, timedReq) doArchive(db.DefaultContext, timedReq) - zipReq2, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") + zipReq2, err = NewRequest(ctx, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit, git.ZIP) require.NoError(t, err) // Now, we're guaranteed to have released the original zipReq from the queue. // Ensure that we don't get handed back the released entry somehow, but they diff --git a/tests/integration/api_repo_archive_test.go b/tests/integration/api_repo_archive_test.go index a16136a39b..fab3dedfab 100644 --- a/tests/integration/api_repo_archive_test.go +++ b/tests/integration/api_repo_archive_test.go @@ -63,3 +63,43 @@ func TestAPIDownloadArchive(t *testing.T) { link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/archive/master", user2.Name, repo.Name)) MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusBadRequest) } + +func TestAPIDownloadArchive2(t *testing.T) { + defer tests.PrepareTestEnv(t)() + + repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1}) + user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + session := loginUser(t, user2.LowerName) + token := getTokenForLoggedInUser(t, session, auth_model.AccessTokenScopeReadRepository) + + link, _ := url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/zipball/master", user2.Name, repo.Name)) + resp := MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK) + bs, err := io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Len(t, bs, 320) + + link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/tarball/master", user2.Name, repo.Name)) + resp = MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK) + bs, err = io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Len(t, bs, 266) + + // Must return a link to a commit ID as the "immutable" archive link + linkHeaderRe := regexp.MustCompile(`^<(https?://.*/api/v1/repos/user2/repo1/archive/[a-f0-9]+\.tar\.gz.*)>; rel="immutable"$`) + m := linkHeaderRe.FindStringSubmatch(resp.Header().Get("Link")) + assert.NotEmpty(t, m[1]) + resp = MakeRequest(t, NewRequest(t, "GET", m[1]).AddTokenAuth(token), http.StatusOK) + bs2, err := io.ReadAll(resp.Body) + require.NoError(t, err) + // The locked URL should give the same bytes as the non-locked one + assert.EqualValues(t, bs, bs2) + + link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/bundle/master", user2.Name, repo.Name)) + resp = MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusOK) + bs, err = io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Len(t, bs, 382) + + link, _ = url.Parse(fmt.Sprintf("/api/v1/repos/%s/%s/archive/master", user2.Name, repo.Name)) + MakeRequest(t, NewRequest(t, "GET", link.String()).AddTokenAuth(token), http.StatusBadRequest) +}