mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-29 09:31:11 -05:00
523efa433b
* Extract out the common cat-file batch calls Signed-off-by: Andrew Thornton <art27@cantab.net> * Move bleve and elastic indexers to use a common cat-file --batch when indexing Signed-off-by: Andrew Thornton <art27@cantab.net> * move catfilebatch to batch_reader and rename to batch_reader.go Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: Lauris BH <lauris@nix.lv>
349 lines
9.2 KiB
Go
349 lines
9.2 KiB
Go
// Copyright 2017 The Gitea Authors. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build !gogit
|
|
|
|
package git
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"path"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// GetCommitsInfo gets information of all commits that are corresponding to these entries
|
|
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache *LastCommitCache) ([]CommitInfo, *Commit, error) {
|
|
entryPaths := make([]string, len(tes)+1)
|
|
// Get the commit for the treePath itself
|
|
entryPaths[0] = ""
|
|
for i, entry := range tes {
|
|
entryPaths[i+1] = entry.Name()
|
|
}
|
|
|
|
var err error
|
|
|
|
var revs map[string]*Commit
|
|
if cache != nil {
|
|
var unHitPaths []string
|
|
revs, unHitPaths, err = getLastCommitForPathsByCache(commit.ID.String(), treePath, entryPaths, cache)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
if len(unHitPaths) > 0 {
|
|
sort.Strings(unHitPaths)
|
|
commits, err := GetLastCommitForPaths(commit, treePath, unHitPaths)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
for i, found := range commits {
|
|
if err := cache.Put(commit.ID.String(), path.Join(treePath, unHitPaths[i]), found.ID.String()); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
revs[unHitPaths[i]] = found
|
|
}
|
|
}
|
|
} else {
|
|
sort.Strings(entryPaths)
|
|
revs = map[string]*Commit{}
|
|
var foundCommits []*Commit
|
|
foundCommits, err = GetLastCommitForPaths(commit, treePath, entryPaths)
|
|
for i, found := range foundCommits {
|
|
revs[entryPaths[i]] = found
|
|
}
|
|
}
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
commitsInfo := make([]CommitInfo, len(tes))
|
|
for i, entry := range tes {
|
|
commitsInfo[i] = CommitInfo{
|
|
Entry: entry,
|
|
}
|
|
if entryCommit, ok := revs[entry.Name()]; ok {
|
|
commitsInfo[i].Commit = entryCommit
|
|
if entry.IsSubModule() {
|
|
subModuleURL := ""
|
|
var fullPath string
|
|
if len(treePath) > 0 {
|
|
fullPath = treePath + "/" + entry.Name()
|
|
} else {
|
|
fullPath = entry.Name()
|
|
}
|
|
if subModule, err := commit.GetSubModule(fullPath); err != nil {
|
|
return nil, nil, err
|
|
} else if subModule != nil {
|
|
subModuleURL = subModule.URL
|
|
}
|
|
subModuleFile := NewSubModuleFile(entryCommit, subModuleURL, entry.ID.String())
|
|
commitsInfo[i].SubModuleFile = subModuleFile
|
|
}
|
|
}
|
|
}
|
|
|
|
// Retrieve the commit for the treePath itself (see above). We basically
|
|
// get it for free during the tree traversal and it's used for listing
|
|
// pages to display information about newest commit for a given path.
|
|
var treeCommit *Commit
|
|
var ok bool
|
|
if treePath == "" {
|
|
treeCommit = commit
|
|
} else if treeCommit, ok = revs[""]; ok {
|
|
treeCommit.repo = commit.repo
|
|
}
|
|
return commitsInfo, treeCommit, nil
|
|
}
|
|
|
|
func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) {
|
|
var unHitEntryPaths []string
|
|
var results = make(map[string]*Commit)
|
|
for _, p := range paths {
|
|
lastCommit, err := cache.Get(commitID, path.Join(treePath, p))
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
if lastCommit != nil {
|
|
results[p] = lastCommit.(*Commit)
|
|
continue
|
|
}
|
|
|
|
unHitEntryPaths = append(unHitEntryPaths, p)
|
|
}
|
|
|
|
return results, unHitEntryPaths, nil
|
|
}
|
|
|
|
// GetLastCommitForPaths returns last commit information
|
|
func GetLastCommitForPaths(commit *Commit, treePath string, paths []string) ([]*Commit, error) {
|
|
// We read backwards from the commit to obtain all of the commits
|
|
|
|
// We'll do this by using rev-list to provide us with parent commits in order
|
|
revListReader, revListWriter := io.Pipe()
|
|
defer func() {
|
|
_ = revListWriter.Close()
|
|
_ = revListReader.Close()
|
|
}()
|
|
|
|
go func() {
|
|
stderr := strings.Builder{}
|
|
err := NewCommand("rev-list", "--format=%T", commit.ID.String()).RunInDirPipeline(commit.repo.Path, revListWriter, &stderr)
|
|
if err != nil {
|
|
_ = revListWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
|
|
} else {
|
|
_ = revListWriter.Close()
|
|
}
|
|
}()
|
|
|
|
batchStdinWriter, batchReader, cancel := CatFileBatch(commit.repo.Path)
|
|
defer cancel()
|
|
|
|
mapsize := 4096
|
|
if len(paths) > mapsize {
|
|
mapsize = len(paths)
|
|
}
|
|
|
|
path2idx := make(map[string]int, mapsize)
|
|
for i, path := range paths {
|
|
path2idx[path] = i
|
|
}
|
|
|
|
fnameBuf := make([]byte, 4096)
|
|
modeBuf := make([]byte, 40)
|
|
|
|
allShaBuf := make([]byte, (len(paths)+1)*20)
|
|
shaBuf := make([]byte, 20)
|
|
tmpTreeID := make([]byte, 40)
|
|
|
|
// commits is the returnable commits matching the paths provided
|
|
commits := make([]string, len(paths))
|
|
// ids are the blob/tree ids for the paths
|
|
ids := make([][]byte, len(paths))
|
|
|
|
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
|
|
scan := bufio.NewScanner(revListReader)
|
|
revListLoop:
|
|
for scan.Scan() {
|
|
// Get the next parent commit ID
|
|
commitID := scan.Text()
|
|
if !scan.Scan() {
|
|
break revListLoop
|
|
}
|
|
commitID = commitID[7:]
|
|
rootTreeID := scan.Text()
|
|
|
|
// push the tree to the cat-file --batch process
|
|
_, err := batchStdinWriter.Write([]byte(rootTreeID + "\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
currentPath := ""
|
|
|
|
// OK if the target tree path is "" and the "" is in the paths just set this now
|
|
if treePath == "" && paths[0] == "" {
|
|
// If this is the first time we see this set the id appropriate for this paths to this tree and set the last commit to curCommit
|
|
if len(ids[0]) == 0 {
|
|
ids[0] = []byte(rootTreeID)
|
|
commits[0] = string(commitID)
|
|
} else if bytes.Equal(ids[0], []byte(rootTreeID)) {
|
|
commits[0] = string(commitID)
|
|
}
|
|
}
|
|
|
|
treeReadingLoop:
|
|
for {
|
|
_, _, size, err := ReadBatchLine(batchReader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Handle trees
|
|
|
|
// n is counter for file position in the tree file
|
|
var n int64
|
|
|
|
// Two options: currentPath is the targetTreepath
|
|
if treePath == currentPath {
|
|
// We are in the right directory
|
|
// Parse each tree line in turn. (don't care about mode here.)
|
|
for n < size {
|
|
fname, sha, count, err := ParseTreeLineSkipMode(batchReader, fnameBuf, shaBuf)
|
|
shaBuf = sha
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
n += int64(count)
|
|
idx, ok := path2idx[string(fname)]
|
|
if ok {
|
|
// Now if this is the first time round set the initial Blob(ish) SHA ID and the commit
|
|
if len(ids[idx]) == 0 {
|
|
copy(allShaBuf[20*(idx+1):20*(idx+2)], shaBuf)
|
|
ids[idx] = allShaBuf[20*(idx+1) : 20*(idx+2)]
|
|
commits[idx] = string(commitID)
|
|
} else if bytes.Equal(ids[idx], shaBuf) {
|
|
commits[idx] = string(commitID)
|
|
}
|
|
}
|
|
// FIXME: is there any order to the way strings are emitted from cat-file?
|
|
// if there is - then we could skip once we've passed all of our data
|
|
}
|
|
break treeReadingLoop
|
|
}
|
|
|
|
var treeID []byte
|
|
|
|
// We're in the wrong directory
|
|
// Find target directory in this directory
|
|
idx := len(currentPath)
|
|
if idx > 0 {
|
|
idx++
|
|
}
|
|
target := strings.SplitN(treePath[idx:], "/", 2)[0]
|
|
|
|
for n < size {
|
|
// Read each tree entry in turn
|
|
mode, fname, sha, count, err := ParseTreeLine(batchReader, modeBuf, fnameBuf, shaBuf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
n += int64(count)
|
|
|
|
// if we have found the target directory
|
|
if bytes.Equal(fname, []byte(target)) && bytes.Equal(mode, []byte("40000")) {
|
|
copy(tmpTreeID, sha)
|
|
treeID = tmpTreeID
|
|
break
|
|
}
|
|
}
|
|
|
|
if n < size {
|
|
// Discard any remaining entries in the current tree
|
|
discard := size - n
|
|
for discard > math.MaxInt32 {
|
|
_, err := batchReader.Discard(math.MaxInt32)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
discard -= math.MaxInt32
|
|
}
|
|
_, err := batchReader.Discard(int(discard))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// if we haven't found a treeID for the target directory our search is over
|
|
if len(treeID) == 0 {
|
|
break treeReadingLoop
|
|
}
|
|
|
|
// add the target to the current path
|
|
if idx > 0 {
|
|
currentPath += "/"
|
|
}
|
|
currentPath += target
|
|
|
|
// if we've now found the current path check its sha id and commit status
|
|
if treePath == currentPath && paths[0] == "" {
|
|
if len(ids[0]) == 0 {
|
|
copy(allShaBuf[0:20], treeID)
|
|
ids[0] = allShaBuf[0:20]
|
|
commits[0] = string(commitID)
|
|
} else if bytes.Equal(ids[0], treeID) {
|
|
commits[0] = string(commitID)
|
|
}
|
|
}
|
|
treeID = to40ByteSHA(treeID)
|
|
_, err = batchStdinWriter.Write(treeID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, err = batchStdinWriter.Write([]byte("\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
commitsMap := make(map[string]*Commit, len(commits))
|
|
commitsMap[commit.ID.String()] = commit
|
|
|
|
commitCommits := make([]*Commit, len(commits))
|
|
for i, commitID := range commits {
|
|
c, ok := commitsMap[commitID]
|
|
if ok {
|
|
commitCommits[i] = c
|
|
continue
|
|
}
|
|
|
|
if len(commitID) == 0 {
|
|
continue
|
|
}
|
|
|
|
_, err := batchStdinWriter.Write([]byte(commitID + "\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, typ, size, err := ReadBatchLine(batchReader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if typ != "commit" {
|
|
return nil, fmt.Errorf("unexpected type: %s for commit id: %s", typ, commitID)
|
|
}
|
|
c, err = CommitFromReader(commit.repo, MustIDFromString(string(commitID)), io.LimitReader(batchReader, int64(size)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
commitCommits[i] = c
|
|
}
|
|
|
|
return commitCommits, scan.Err()
|
|
}
|