1
0
Fork 0
mirror of https://codeberg.org/forgejo/forgejo.git synced 2024-11-23 08:47:42 -05:00
forgejo/modules/git/repo_tree.go

154 lines
3.5 KiB
Go
Raw Normal View History

2016-11-03 18:16:01 -04:00
// Copyright 2015 The Gogs Authors. All rights reserved.
Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
2019-04-19 08:17:27 -04:00
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
2016-11-03 18:16:01 -04:00
package git
Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
2019-04-19 08:17:27 -04:00
import (
"bytes"
"io"
2019-05-11 11:29:17 -04:00
"os"
"strings"
"time"
Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
2019-04-19 08:17:27 -04:00
)
2019-05-11 11:29:17 -04:00
// CommitTreeOpts represents the possible options to CommitTree
type CommitTreeOpts struct {
Sign merges, CRUD, Wiki and Repository initialisation with gpg key (#7631) This PR fixes #7598 by providing a configurable way of signing commits across the Gitea instance. Per repository configurability and import/generation of trusted secure keys is not provided by this PR - from a security PoV that's probably impossible to do properly. Similarly web-signing, that is asking the user to sign something, is not implemented - this could be done at a later stage however. ## Features - [x] If commit.gpgsign is set in .gitconfig sign commits and files created through repofiles. (merges should already have been signed.) - [x] Verify commits signed with the default gpg as valid - [x] Signer, Committer and Author can all be different - [x] Allow signer to be arbitrarily different - We still require the key to have an activated email on Gitea. A more complete implementation would be to use a keyserver and mark external-or-unactivated with an "unknown" trust level icon. - [x] Add a signing-key.gpg endpoint to get the default gpg pub key if available - Rather than add a fake web-flow user I've added this as an endpoint on /api/v1/signing-key.gpg - [x] Try to match the default key with a user on gitea - this is done at verification time - [x] Make things configurable? - app.ini configuration done - [x] when checking commits are signed need to check if they're actually verifiable too - [x] Add documentation I have decided that adjusting the docker to create a default gpg key is not the correct thing to do and therefore have not implemented this.
2019-10-16 09:42:42 -04:00
Parents []string
Message string
KeyID string
NoGPGSign bool
AlwaysSign bool
2019-05-11 11:29:17 -04:00
}
// CommitTree creates a commit from a given tree id for the user with provided message
func (repo *Repository) CommitTree(author, committer *Signature, tree *Tree, opts CommitTreeOpts) (ObjectID, error) {
commitTimeStr := time.Now().Format(time.RFC3339)
2019-05-11 11:29:17 -04:00
// Because this may call hooks we should pass in the environment
env := append(os.Environ(),
Add configurable Trust Models (#11712) * Add configurable Trust Models Gitea's default signature verification model differs from GitHub. GitHub uses signatures to verify that the committer is who they say they are - meaning that when GitHub makes a signed commit it must be the committer. The GitHub model prevents re-publishing of commits after revocation of a key and prevents re-signing of other people's commits to create a completely trusted repository signed by one key or a set of trusted keys. The default behaviour of Gitea in contrast is to always display the avatar and information related to a signature. This allows signatures to be decoupled from the committer. That being said, allowing arbitary users to present other peoples commits as theirs is not necessarily desired therefore we have a trust model whereby signatures from collaborators are marked trusted, signatures matching the commit line are marked untrusted and signatures that match a user in the db but not the committer line are marked unmatched. The problem with this model is that this conflicts with Github therefore we need to provide an option to allow users to choose the Github model should they wish to. Signed-off-by: Andrew Thornton <art27@cantab.net> * Adjust locale strings Signed-off-by: Andrew Thornton <art27@cantab.net> * as per @6543 Co-authored-by: 6543 <6543@obermui.de> * Update models/gpg_key.go * Add migration for repository Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2020-09-19 12:44:55 -04:00
"GIT_AUTHOR_NAME="+author.Name,
"GIT_AUTHOR_EMAIL="+author.Email,
2019-05-11 11:29:17 -04:00
"GIT_AUTHOR_DATE="+commitTimeStr,
Add configurable Trust Models (#11712) * Add configurable Trust Models Gitea's default signature verification model differs from GitHub. GitHub uses signatures to verify that the committer is who they say they are - meaning that when GitHub makes a signed commit it must be the committer. The GitHub model prevents re-publishing of commits after revocation of a key and prevents re-signing of other people's commits to create a completely trusted repository signed by one key or a set of trusted keys. The default behaviour of Gitea in contrast is to always display the avatar and information related to a signature. This allows signatures to be decoupled from the committer. That being said, allowing arbitary users to present other peoples commits as theirs is not necessarily desired therefore we have a trust model whereby signatures from collaborators are marked trusted, signatures matching the commit line are marked untrusted and signatures that match a user in the db but not the committer line are marked unmatched. The problem with this model is that this conflicts with Github therefore we need to provide an option to allow users to choose the Github model should they wish to. Signed-off-by: Andrew Thornton <art27@cantab.net> * Adjust locale strings Signed-off-by: Andrew Thornton <art27@cantab.net> * as per @6543 Co-authored-by: 6543 <6543@obermui.de> * Update models/gpg_key.go * Add migration for repository Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2020-09-19 12:44:55 -04:00
"GIT_COMMITTER_NAME="+committer.Name,
"GIT_COMMITTER_EMAIL="+committer.Email,
2019-05-11 11:29:17 -04:00
"GIT_COMMITTER_DATE="+commitTimeStr,
)
cmd := NewCommand(repo.Ctx, "commit-tree").AddDynamicArguments(tree.ID.String())
2019-05-11 11:29:17 -04:00
for _, parent := range opts.Parents {
cmd.AddArguments("-p").AddDynamicArguments(parent)
2019-05-11 11:29:17 -04:00
}
messageBytes := new(bytes.Buffer)
_, _ = messageBytes.WriteString(opts.Message)
_, _ = messageBytes.WriteString("\n")
2019-05-11 11:29:17 -04:00
if opts.KeyID != "" || opts.AlwaysSign {
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmd.AddOptionFormat("-S%s", opts.KeyID)
2019-05-11 11:29:17 -04:00
}
if opts.NoGPGSign {
2019-05-11 11:29:17 -04:00
cmd.AddArguments("--no-gpg-sign")
}
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
err := cmd.Run(&RunOpts{
Env: env,
Dir: repo.Path,
Stdin: messageBytes,
Stdout: stdout,
Stderr: stderr,
})
2019-05-11 11:29:17 -04:00
if err != nil {
return nil, ConcatenateError(err, stderr.String())
2019-05-11 11:29:17 -04:00
}
return NewIDFromString(strings.TrimSpace(stdout.String()))
2019-05-11 11:29:17 -04:00
}
func (repo *Repository) getTree(id ObjectID) (*Tree, error) {
wr, rd, cancel := repo.CatFileBatch(repo.Ctx)
defer cancel()
_, _ = wr.Write([]byte(id.String() + "\n"))
// ignore the SHA
_, typ, size, err := ReadBatchLine(rd)
if err != nil {
return nil, err
}
switch typ {
case "tag":
resolvedID := id
data, err := io.ReadAll(io.LimitReader(rd, size))
if err != nil {
return nil, err
}
tag, err := parseTagData(id.Type(), data)
if err != nil {
return nil, err
}
commit, err := tag.Commit(repo)
if err != nil {
return nil, err
}
commit.Tree.ResolvedID = resolvedID
return &commit.Tree, nil
case "commit":
commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size))
if err != nil {
return nil, err
}
if _, err := rd.Discard(1); err != nil {
return nil, err
}
commit.Tree.ResolvedID = commit.ID
return &commit.Tree, nil
case "tree":
tree := NewTree(repo, id)
tree.ResolvedID = id
objectFormat, err := repo.GetObjectFormat()
if err != nil {
return nil, err
}
tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, size)
if err != nil {
return nil, err
}
tree.entriesParsed = true
return tree, nil
default:
if err := DiscardFull(rd, size+1); err != nil {
return nil, err
}
return nil, ErrNotExist{
ID: id.String(),
}
}
}
// GetTree find the tree object in the repository.
func (repo *Repository) GetTree(idStr string) (*Tree, error) {
objectFormat, err := repo.GetObjectFormat()
if err != nil {
return nil, err
}
if len(idStr) != objectFormat.FullLength() {
res, err := repo.GetRefCommitID(idStr)
if err != nil {
return nil, err
}
if len(res) > 0 {
idStr = res
}
}
id, err := NewIDFromString(idStr)
if err != nil {
return nil, err
}
return repo.getTree(id)
}