2019-12-13 17:21:06 -05:00
// Copyright 2019 The Gitea Authors.
// All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2019-12-13 17:21:06 -05:00
package pull
import (
"bufio"
"context"
"fmt"
"io"
"os"
2021-12-18 23:19:25 -05:00
"path/filepath"
2019-12-13 17:21:06 -05:00
"strings"
"code.gitea.io/gitea/models"
2023-01-16 03:00:22 -05:00
git_model "code.gitea.io/gitea/models/git"
2022-06-13 05:37:59 -04:00
issues_model "code.gitea.io/gitea/models/issues"
2021-11-09 14:57:58 -05:00
"code.gitea.io/gitea/models/unit"
2022-10-12 01:18:26 -04:00
"code.gitea.io/gitea/modules/container"
2019-12-13 17:21:06 -05:00
"code.gitea.io/gitea/modules/git"
2021-12-18 23:19:25 -05:00
"code.gitea.io/gitea/modules/graceful"
2019-12-13 17:21:06 -05:00
"code.gitea.io/gitea/modules/log"
2021-12-18 23:19:25 -05:00
"code.gitea.io/gitea/modules/process"
2022-05-08 12:46:32 -04:00
repo_module "code.gitea.io/gitea/modules/repository"
2022-12-19 06:37:15 -05:00
"code.gitea.io/gitea/modules/setting"
2020-08-11 16:05:34 -04:00
"code.gitea.io/gitea/modules/util"
2020-10-13 14:50:57 -04:00
"github.com/gobwas/glob"
2019-12-13 17:21:06 -05:00
)
// DownloadDiffOrPatch will write the patch for the pr to the writer
2022-06-13 05:37:59 -04:00
func DownloadDiffOrPatch ( ctx context . Context , pr * issues_model . PullRequest , w io . Writer , patch , binary bool ) error {
2022-11-19 03:12:33 -05:00
if err := pr . LoadBaseRepo ( ctx ) ; err != nil {
2020-04-03 09:21:41 -04:00
log . Error ( "Unable to load base repository ID %d for pr #%d [%d]" , pr . BaseRepoID , pr . Index , pr . ID )
2019-12-13 17:21:06 -05:00
return err
}
2022-01-19 18:26:57 -05:00
gitRepo , closer , err := git . RepositoryFromContextOrOpen ( ctx , pr . BaseRepo . RepoPath ( ) )
2019-12-13 17:21:06 -05:00
if err != nil {
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-13 17:21:06 -05:00
}
2022-01-19 18:26:57 -05:00
defer closer . Close ( )
2021-09-27 17:09:49 -04:00
if err := gitRepo . GetDiffOrPatch ( pr . MergeBase , pr . GetGitRefName ( ) , w , patch , binary ) ; err != nil {
2020-01-12 04:36:21 -05:00
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "Unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-13 17:21:06 -05:00
}
return nil
}
var patchErrorSuffices = [ ] string {
": already exists in index" ,
": patch does not apply" ,
": already exists in working directory" ,
"unrecognized input" ,
2022-12-14 08:45:33 -05:00
": No such file or directory" ,
": does not exist in index" ,
2019-12-13 17:21:06 -05:00
}
// TestPatch will test whether a simple patch will apply
2022-06-13 05:37:59 -04:00
func TestPatch ( pr * issues_model . PullRequest ) error {
2023-02-03 18:11:48 -05:00
ctx , _ , finished := process . GetManager ( ) . AddContext ( graceful . GetManager ( ) . HammerContext ( ) , fmt . Sprintf ( "TestPatch: %s" , pr ) )
2022-01-19 18:26:57 -05:00
defer finished ( )
2019-12-13 17:21:06 -05:00
// Clone base repo.
2022-01-19 18:26:57 -05:00
tmpBasePath , err := createTemporaryRepo ( ctx , pr )
2019-12-13 17:21:06 -05:00
if err != nil {
log . Error ( "CreateTemporaryPath: %v" , err )
return err
}
defer func ( ) {
2022-05-08 12:46:32 -04:00
if err := repo_module . RemoveTemporaryPath ( tmpBasePath ) ; err != nil {
2019-12-13 17:21:06 -05:00
log . Error ( "Merge: RemoveTemporaryPath: %s" , err )
}
} ( )
2022-03-29 15:13:41 -04:00
gitRepo , err := git . OpenRepository ( ctx , tmpBasePath )
2019-12-13 17:21:06 -05:00
if err != nil {
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-13 17:21:06 -05:00
}
defer gitRepo . Close ( )
2020-10-13 14:50:57 -04:00
// 1. update merge base
2022-03-31 22:55:30 -04:00
pr . MergeBase , _ , err = git . NewCommand ( ctx , "merge-base" , "--" , "base" , "tracking" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2019-12-13 17:21:06 -05:00
if err != nil {
var err2 error
pr . MergeBase , err2 = gitRepo . GetRefCommitID ( git . BranchPrefix + "base" )
if err2 != nil {
2022-10-24 15:29:17 -04:00
return fmt . Errorf ( "GetMergeBase: %v and can't find commit ID for base: %w" , err , err2 )
2019-12-13 17:21:06 -05:00
}
}
pr . MergeBase = strings . TrimSpace ( pr . MergeBase )
2022-07-13 04:22:51 -04:00
if pr . HeadCommitID , err = gitRepo . GetRefCommitID ( git . BranchPrefix + "tracking" ) ; err != nil {
return fmt . Errorf ( "GetBranchCommitID: can't find commit ID for head: %w" , err )
}
if pr . HeadCommitID == pr . MergeBase {
pr . Status = issues_model . PullRequestStatusAncestor
return nil
}
2020-10-13 14:50:57 -04:00
// 2. Check for conflicts
2022-06-13 05:37:59 -04:00
if conflicts , err := checkConflicts ( ctx , pr , gitRepo , tmpBasePath ) ; err != nil || conflicts || pr . Status == issues_model . PullRequestStatusEmpty {
2020-10-13 14:50:57 -04:00
return err
}
// 3. Check for protected files changes
2023-01-16 03:00:22 -05:00
if err = checkPullFilesProtection ( ctx , pr , gitRepo ) ; err != nil {
return fmt . Errorf ( "pr.CheckPullFilesProtection(): %v" , err )
2020-10-13 14:50:57 -04:00
}
if len ( pr . ChangedProtectedFiles ) > 0 {
log . Trace ( "Found %d protected files changed" , len ( pr . ChangedProtectedFiles ) )
}
2022-06-13 05:37:59 -04:00
pr . Status = issues_model . PullRequestStatusMergeable
2020-10-13 14:50:57 -04:00
return nil
}
2021-12-18 23:19:25 -05:00
type errMergeConflict struct {
filename string
}
func ( e * errMergeConflict ) Error ( ) string {
return fmt . Sprintf ( "conflict detected at: %s" , e . filename )
}
func attemptMerge ( ctx context . Context , file * unmergedFile , tmpBasePath string , gitRepo * git . Repository ) error {
2022-07-28 19:19:55 -04:00
log . Trace ( "Attempt to merge:\n%v" , file )
2021-12-18 23:19:25 -05:00
switch {
case file . stage1 != nil && ( file . stage2 == nil || file . stage3 == nil ) :
// 1. Deleted in one or both:
//
// Conflict <==> the stage1 !SameAs to the undeleted one
if ( file . stage2 != nil && ! file . stage1 . SameAs ( file . stage2 ) ) || ( file . stage3 != nil && ! file . stage1 . SameAs ( file . stage3 ) ) {
// Conflict!
return & errMergeConflict { file . stage1 . path }
}
// Not a genuine conflict and we can simply remove the file from the index
return gitRepo . RemoveFilesFromIndex ( file . stage1 . path )
case file . stage1 == nil && file . stage2 != nil && ( file . stage3 == nil || file . stage2 . SameAs ( file . stage3 ) ) :
// 2. Added in ours but not in theirs or identical in both
//
// Not a genuine conflict just add to the index
if err := gitRepo . AddObjectToIndex ( file . stage2 . mode , git . MustIDFromString ( file . stage2 . sha ) , file . stage2 . path ) ; err != nil {
return err
}
return nil
case file . stage1 == nil && file . stage2 != nil && file . stage3 != nil && file . stage2 . sha == file . stage3 . sha && file . stage2 . mode != file . stage3 . mode :
// 3. Added in both with the same sha but the modes are different
//
// Conflict! (Not sure that this can actually happen but we should handle)
return & errMergeConflict { file . stage2 . path }
case file . stage1 == nil && file . stage2 == nil && file . stage3 != nil :
// 4. Added in theirs but not ours:
//
// Not a genuine conflict just add to the index
return gitRepo . AddObjectToIndex ( file . stage3 . mode , git . MustIDFromString ( file . stage3 . sha ) , file . stage3 . path )
case file . stage1 == nil :
// 5. Created by new in both
//
// Conflict!
return & errMergeConflict { file . stage2 . path }
case file . stage2 != nil && file . stage3 != nil :
// 5. Modified in both - we should try to merge in the changes but first:
//
if file . stage2 . mode == "120000" || file . stage3 . mode == "120000" {
// 5a. Conflicting symbolic link change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode == "160000" || file . stage3 . mode == "160000" {
// 5b. Conflicting submodule change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode != file . stage3 . mode {
// 5c. Conflicting mode change
return & errMergeConflict { file . stage2 . path }
}
// Need to get the objects from the object db to attempt to merge
2022-10-23 10:44:45 -04:00
root , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage1 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if err != nil {
return fmt . Errorf ( "unable to get root object: %s at path: %s for merging. Error: %w" , file . stage1 . sha , file . stage1 . path , err )
}
root = strings . TrimSpace ( root )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , root ) )
} ( )
2022-10-23 10:44:45 -04:00
base , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage2 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if err != nil {
return fmt . Errorf ( "unable to get base object: %s at path: %s for merging. Error: %w" , file . stage2 . sha , file . stage2 . path , err )
}
base = strings . TrimSpace ( filepath . Join ( tmpBasePath , base ) )
defer func ( ) {
_ = util . Remove ( base )
} ( )
2022-10-23 10:44:45 -04:00
head , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage3 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if err != nil {
return fmt . Errorf ( "unable to get head object:%s at path: %s for merging. Error: %w" , file . stage3 . sha , file . stage3 . path , err )
}
head = strings . TrimSpace ( head )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , head ) )
} ( )
// now git merge-file annoyingly takes a different order to the merge-tree ...
2022-10-23 10:44:45 -04:00
_ , _ , conflictErr := git . NewCommand ( ctx , "merge-file" ) . AddDynamicArguments ( base , root , head ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if conflictErr != nil {
return & errMergeConflict { file . stage2 . path }
}
// base now contains the merged data
2022-10-23 10:44:45 -04:00
hash , _ , err := git . NewCommand ( ctx , "hash-object" , "-w" , "--path" ) . AddDynamicArguments ( file . stage2 . path , base ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if err != nil {
return err
}
hash = strings . TrimSpace ( hash )
return gitRepo . AddObjectToIndex ( file . stage2 . mode , git . MustIDFromString ( hash ) , file . stage2 . path )
default :
if file . stage1 != nil {
return & errMergeConflict { file . stage1 . path }
} else if file . stage2 != nil {
return & errMergeConflict { file . stage2 . path }
} else if file . stage3 != nil {
return & errMergeConflict { file . stage3 . path }
}
}
return nil
}
2022-02-09 15:28:55 -05:00
// AttemptThreeWayMerge will attempt to three way merge using git read-tree and then follow the git merge-one-file algorithm to attempt to resolve basic conflicts
func AttemptThreeWayMerge ( ctx context . Context , gitPath string , gitRepo * git . Repository , base , ours , theirs , description string ) ( bool , [ ] string , error ) {
ctx , cancel := context . WithCancel ( ctx )
defer cancel ( )
2021-12-18 23:19:25 -05:00
// First we use read-tree to do a simple three-way merge
2022-10-23 10:44:45 -04:00
if _ , _ , err := git . NewCommand ( ctx , "read-tree" , "-m" ) . AddDynamicArguments ( base , ours , theirs ) . RunStdString ( & git . RunOpts { Dir : gitPath } ) ; err != nil {
2021-12-18 23:19:25 -05:00
log . Error ( "Unable to run read-tree -m! Error: %v" , err )
2022-10-24 15:29:17 -04:00
return false , nil , fmt . Errorf ( "unable to run read-tree -m! Error: %w" , err )
2021-12-18 23:19:25 -05:00
}
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
unmerged := make ( chan * unmergedFile )
2022-02-09 15:28:55 -05:00
go unmergedFiles ( ctx , gitPath , unmerged )
2021-12-18 23:19:25 -05:00
defer func ( ) {
cancel ( )
for range unmerged {
// empty the unmerged channel
}
} ( )
numberOfConflicts := 0
conflict := false
2022-02-09 15:28:55 -05:00
conflictedFiles := make ( [ ] string , 0 , 5 )
2021-12-18 23:19:25 -05:00
for file := range unmerged {
if file == nil {
break
}
if file . err != nil {
cancel ( )
2022-02-09 15:28:55 -05:00
return false , nil , file . err
2021-12-18 23:19:25 -05:00
}
// OK now we have the unmerged file triplet attempt to merge it
2022-02-09 15:28:55 -05:00
if err := attemptMerge ( ctx , file , gitPath , gitRepo ) ; err != nil {
2021-12-18 23:19:25 -05:00
if conflictErr , ok := err . ( * errMergeConflict ) ; ok {
2022-02-09 15:28:55 -05:00
log . Trace ( "Conflict: %s in %s" , conflictErr . filename , description )
2021-12-18 23:19:25 -05:00
conflict = true
if numberOfConflicts < 10 {
2022-02-09 15:28:55 -05:00
conflictedFiles = append ( conflictedFiles , conflictErr . filename )
2021-12-18 23:19:25 -05:00
}
numberOfConflicts ++
continue
}
2022-02-09 15:28:55 -05:00
return false , nil , err
2021-12-18 23:19:25 -05:00
}
}
2022-02-09 15:28:55 -05:00
return conflict , conflictedFiles , nil
}
2022-06-13 05:37:59 -04:00
func checkConflicts ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository , tmpBasePath string ) ( bool , error ) {
2022-03-29 12:42:34 -04:00
// 1. checkConflicts resets the conflict status - therefore - reset the conflict status
pr . ConflictedFiles = nil
// 2. AttemptThreeWayMerge first - this is much quicker than plain patch to base
2022-02-09 15:28:55 -05:00
description := fmt . Sprintf ( "PR[%d] %s/%s#%d" , pr . ID , pr . BaseRepo . OwnerName , pr . BaseRepo . Name , pr . Index )
2022-12-19 06:37:15 -05:00
conflict , conflictFiles , err := AttemptThreeWayMerge ( ctx ,
2022-02-09 15:28:55 -05:00
tmpBasePath , gitRepo , pr . MergeBase , "base" , "tracking" , description )
if err != nil {
return false , err
}
2021-12-18 23:19:25 -05:00
if ! conflict {
2022-12-19 06:37:15 -05:00
// No conflicts detected so we need to check if the patch is empty...
// a. Write the newly merged tree and check the new tree-hash
2022-03-31 22:55:30 -04:00
var treeHash string
treeHash , _ , err = git . NewCommand ( ctx , "write-tree" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-18 23:19:25 -05:00
if err != nil {
2022-07-28 19:19:55 -04:00
lsfiles , _ , _ := git . NewCommand ( ctx , "ls-files" , "-u" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
return false , fmt . Errorf ( "unable to write unconflicted tree: %w\n`git ls-files -u`:\n%s" , err , lsfiles )
2021-12-18 23:19:25 -05:00
}
treeHash = strings . TrimSpace ( treeHash )
baseTree , err := gitRepo . GetTree ( "base" )
if err != nil {
return false , err
}
2022-12-19 06:37:15 -05:00
// b. compare the new tree-hash with the base tree hash
2021-12-18 23:19:25 -05:00
if treeHash == baseTree . ID . String ( ) {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 05:37:59 -04:00
pr . Status = issues_model . PullRequestStatusEmpty
2021-12-18 23:19:25 -05:00
}
return false , nil
}
2022-12-19 06:37:15 -05:00
// 3. OK the three-way merge method has detected conflicts
// 3a. Are still testing with GitApply? If not set the conflict status and move on
if ! setting . Repository . PullRequest . TestConflictingPatchesWithGitApply {
pr . Status = issues_model . PullRequestStatusConflict
pr . ConflictedFiles = conflictFiles
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
return true , nil
}
2021-12-18 23:19:25 -05:00
2022-12-19 06:37:15 -05:00
// 3b. Create a plain patch from head to base
2021-09-22 01:38:34 -04:00
tmpPatchFile , err := os . CreateTemp ( "" , "patch" )
2019-12-13 17:21:06 -05:00
if err != nil {
log . Error ( "Unable to create temporary patch file! Error: %v" , err )
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "unable to create temporary patch file! Error: %w" , err )
2019-12-13 17:21:06 -05:00
}
defer func ( ) {
2020-08-11 16:05:34 -04:00
_ = util . Remove ( tmpPatchFile . Name ( ) )
2019-12-13 17:21:06 -05:00
} ( )
2021-09-27 17:09:49 -04:00
if err := gitRepo . GetDiffBinary ( pr . MergeBase , "tracking" , tmpPatchFile ) ; err != nil {
2019-12-13 17:21:06 -05:00
tmpPatchFile . Close ( )
2020-01-12 04:36:21 -05:00
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-13 17:21:06 -05:00
}
stat , err := tmpPatchFile . Stat ( )
if err != nil {
tmpPatchFile . Close ( )
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "unable to stat patch file: %w" , err )
2019-12-13 17:21:06 -05:00
}
patchPath := tmpPatchFile . Name ( )
tmpPatchFile . Close ( )
2022-12-19 06:37:15 -05:00
// 3c. if the size of that patch is 0 - there can be no conflicts!
2019-12-13 17:21:06 -05:00
if stat . Size ( ) == 0 {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 05:37:59 -04:00
pr . Status = issues_model . PullRequestStatusEmpty
2020-10-13 14:50:57 -04:00
return false , nil
2019-12-13 17:21:06 -05:00
}
log . Trace ( "PullRequest[%d].testPatch (patchPath): %s" , pr . ID , patchPath )
2022-03-29 12:42:34 -04:00
// 4. Read the base branch in to the index of the temporary repository
2022-03-31 22:55:30 -04:00
_ , _ , err = git . NewCommand ( gitRepo . Ctx , "read-tree" , "base" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2019-12-13 17:21:06 -05:00
if err != nil {
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "git read-tree %s: %w" , pr . BaseBranch , err )
2019-12-13 17:21:06 -05:00
}
2022-03-29 12:42:34 -04:00
// 5. Now get the pull request configuration to check if we need to ignore whitespace
2022-12-09 21:46:31 -05:00
prUnit , err := pr . BaseRepo . GetUnit ( ctx , unit . TypePullRequests )
2019-12-13 17:21:06 -05:00
if err != nil {
2020-10-13 14:50:57 -04:00
return false , err
2019-12-13 17:21:06 -05:00
}
prConfig := prUnit . PullRequestsConfig ( )
2022-03-29 12:42:34 -04:00
// 6. Prepare the arguments to apply the patch against the index
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdApply := git . NewCommand ( gitRepo . Ctx , "apply" , "--check" , "--cached" )
2019-12-13 17:21:06 -05:00
if prConfig . IgnoreWhitespaceConflicts {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdApply . AddArguments ( "--ignore-whitespace" )
2019-12-13 17:21:06 -05:00
}
2022-02-01 23:46:10 -05:00
is3way := false
2021-12-18 23:19:25 -05:00
if git . CheckGitVersionAtLeast ( "2.32.0" ) == nil {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdApply . AddArguments ( "--3way" )
2022-02-01 23:46:10 -05:00
is3way = true
2021-12-18 23:19:25 -05:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
cmdApply . AddDynamicArguments ( patchPath )
2019-12-13 17:21:06 -05:00
2022-03-29 12:42:34 -04:00
// 7. Prep the pipe:
2020-10-13 14:50:57 -04:00
// - Here we could do the equivalent of:
// `git apply --check --cached patch_file > conflicts`
// Then iterate through the conflicts. However, that means storing all the conflicts
// in memory - which is very wasteful.
// - alternatively we can do the equivalent of:
// `git apply --check ... | grep ...`
// meaning we don't store all of the conflicts unnecessarily.
2019-12-13 17:21:06 -05:00
stderrReader , stderrWriter , err := os . Pipe ( )
if err != nil {
log . Error ( "Unable to open stderr pipe: %v" , err )
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "unable to open stderr pipe: %w" , err )
2019-12-13 17:21:06 -05:00
}
defer func ( ) {
_ = stderrReader . Close ( )
_ = stderrWriter . Close ( )
} ( )
2020-10-13 14:50:57 -04:00
2022-03-29 12:42:34 -04:00
// 8. Run the check command
2021-12-18 23:19:25 -05:00
conflict = false
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
err = cmdApply . Run ( & git . RunOpts {
Dir : tmpBasePath ,
Stderr : stderrWriter ,
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
// Close the writer end of the pipe to begin processing
_ = stderrWriter . Close ( )
defer func ( ) {
// Close the reader on return to terminate the git command if necessary
_ = stderrReader . Close ( )
} ( )
const prefix = "error: patch failed:"
const errorPrefix = "error: "
const threewayFailed = "Failed to perform three-way merge..."
const appliedPatchPrefix = "Applied patch to '"
const withConflicts = "' with conflicts."
conflicts := make ( container . Set [ string ] )
// Now scan the output from the command
scanner := bufio . NewScanner ( stderrReader )
for scanner . Scan ( ) {
line := scanner . Text ( )
log . Trace ( "PullRequest[%d].testPatch: stderr: %s" , pr . ID , line )
if strings . HasPrefix ( line , prefix ) {
conflict = true
filepath := strings . TrimSpace ( strings . Split ( line [ len ( prefix ) : ] , ":" ) [ 0 ] )
conflicts . Add ( filepath )
} else if is3way && line == threewayFailed {
conflict = true
} else if strings . HasPrefix ( line , errorPrefix ) {
conflict = true
for _ , suffix := range patchErrorSuffices {
if strings . HasSuffix ( line , suffix ) {
filepath := strings . TrimSpace ( strings . TrimSuffix ( line [ len ( errorPrefix ) : ] , suffix ) )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-13 17:21:06 -05:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
break
2022-02-01 23:46:10 -05:00
}
2019-12-13 17:21:06 -05:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
} else if is3way && strings . HasPrefix ( line , appliedPatchPrefix ) && strings . HasSuffix ( line , withConflicts ) {
conflict = true
filepath := strings . TrimPrefix ( strings . TrimSuffix ( line , withConflicts ) , appliedPatchPrefix )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-13 17:21:06 -05:00
}
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
// only list 10 conflicted files
if len ( conflicts ) >= 10 {
break
}
}
2020-10-13 14:50:57 -04:00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
if len ( conflicts ) > 0 {
pr . ConflictedFiles = make ( [ ] string , 0 , len ( conflicts ) )
for key := range conflicts {
pr . ConflictedFiles = append ( pr . ConflictedFiles , key )
2019-12-13 17:21:06 -05:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
}
2020-10-13 14:50:57 -04:00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 21:30:43 -05:00
return nil
} ,
} )
2019-12-13 17:21:06 -05:00
2022-04-21 17:55:45 -04:00
// 9. Check if the found conflictedfiles is non-zero, "err" could be non-nil, so we should ignore it if we found conflicts.
// Note: `"err" could be non-nil` is due that if enable 3-way merge, it doesn't return any error on found conflicts.
if len ( pr . ConflictedFiles ) > 0 {
2019-12-13 17:21:06 -05:00
if conflict {
2022-06-13 05:37:59 -04:00
pr . Status = issues_model . PullRequestStatusConflict
2019-12-13 17:21:06 -05:00
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
2020-10-13 14:50:57 -04:00
return true , nil
2019-12-13 17:21:06 -05:00
}
2022-04-21 17:55:45 -04:00
} else if err != nil {
2022-10-24 15:29:17 -04:00
return false , fmt . Errorf ( "git apply --check: %w" , err )
2019-12-13 17:21:06 -05:00
}
2020-10-13 14:50:57 -04:00
return false , nil
}
// CheckFileProtection check file Protection
2022-01-19 18:26:57 -05:00
func CheckFileProtection ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , limit int , env [ ] string ) ( [ ] string , error ) {
2020-10-13 14:50:57 -04:00
if len ( patterns ) == 0 {
return nil , nil
}
2022-01-19 18:26:57 -05:00
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2020-10-13 14:50:57 -04:00
if err != nil {
return nil , err
}
changedProtectedFiles := make ( [ ] string , 0 , limit )
2021-09-11 10:21:17 -04:00
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
for _ , pat := range patterns {
if pat . Match ( lpath ) {
changedProtectedFiles = append ( changedProtectedFiles , lpath )
break
}
}
if len ( changedProtectedFiles ) >= limit {
break
}
}
if len ( changedProtectedFiles ) > 0 {
err = models . ErrFilePathProtected {
Path : changedProtectedFiles [ 0 ] ,
}
2020-10-13 14:50:57 -04:00
}
return changedProtectedFiles , err
}
2021-09-11 10:21:17 -04:00
// CheckUnprotectedFiles check if the commit only touches unprotected files
2022-01-19 18:26:57 -05:00
func CheckUnprotectedFiles ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , env [ ] string ) ( bool , error ) {
2021-09-11 10:21:17 -04:00
if len ( patterns ) == 0 {
return false , nil
}
2022-01-19 18:26:57 -05:00
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2021-09-11 10:21:17 -04:00
if err != nil {
return false , err
}
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
unprotected := false
for _ , pat := range patterns {
if pat . Match ( lpath ) {
unprotected = true
break
}
}
if ! unprotected {
return false , nil
}
}
return true , nil
}
2020-10-13 14:50:57 -04:00
// checkPullFilesProtection check if pr changed protected files and save results
2023-01-16 03:00:22 -05:00
func checkPullFilesProtection ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository ) error {
2022-06-13 05:37:59 -04:00
if pr . Status == issues_model . PullRequestStatusEmpty {
2022-03-29 12:42:34 -04:00
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 03:00:22 -05:00
pb , err := git_model . GetFirstMatchProtectedBranchRule ( ctx , pr . BaseRepoID , pr . BaseBranch )
if err != nil {
2020-10-13 14:50:57 -04:00
return err
}
2023-01-16 03:00:22 -05:00
if pb == nil {
2020-10-13 14:50:57 -04:00
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 03:00:22 -05:00
pr . ChangedProtectedFiles , err = CheckFileProtection ( gitRepo , pr . MergeBase , "tracking" , pb . GetProtectedFilePatterns ( ) , 10 , os . Environ ( ) )
2020-10-13 14:50:57 -04:00
if err != nil && ! models . IsErrFilePathProtected ( err ) {
return err
}
2019-12-13 17:21:06 -05:00
return nil
}