mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-24 08:57:03 -05:00
d60152587c
- The code that gets contributor stats tried to store an
`map[string]*ContributorData` type in the cache, this works for the
memory cache but not for other caches such as Redis.
- The cache implementation for Redis would convert this map via
`fmt.Sprintf` to an string, which would simply print the pointer and not
the value of the pointer. Storing pointers is a no-go as this will get
GC-ed eventually within a few minutes. Therefore store everything with
json, that does properly store the value of the pointers.
- Adds unit test that verifies JSON is being used.
- Resolves https://codeberg.org/forgejo/forgejo/issues/3158
(cherry picked from commit 4c8b67c4b2
)
328 lines
9.7 KiB
Go
328 lines
9.7 KiB
Go
// Copyright 2023 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package repository
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/models/avatars"
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
|
user_model "code.gitea.io/gitea/models/user"
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/gitrepo"
|
|
"code.gitea.io/gitea/modules/graceful"
|
|
"code.gitea.io/gitea/modules/json"
|
|
"code.gitea.io/gitea/modules/log"
|
|
api "code.gitea.io/gitea/modules/structs"
|
|
|
|
"gitea.com/go-chi/cache"
|
|
)
|
|
|
|
const (
|
|
contributorStatsCacheKey = "GetContributorStats/%s/%s"
|
|
contributorStatsCacheTimeout int64 = 60 * 10
|
|
)
|
|
|
|
var (
|
|
ErrAwaitGeneration = errors.New("generation took longer than ")
|
|
awaitGenerationTime = time.Second * 5
|
|
generateLock = sync.Map{}
|
|
)
|
|
|
|
type WeekData struct {
|
|
Week int64 `json:"week"` // Starting day of the week as Unix timestamp
|
|
Additions int `json:"additions"` // Number of additions in that week
|
|
Deletions int `json:"deletions"` // Number of deletions in that week
|
|
Commits int `json:"commits"` // Number of commits in that week
|
|
}
|
|
|
|
// ContributorData represents statistical git commit count data
|
|
type ContributorData struct {
|
|
Name string `json:"name"` // Display name of the contributor
|
|
Login string `json:"login"` // Login name of the contributor in case it exists
|
|
AvatarLink string `json:"avatar_link"`
|
|
HomeLink string `json:"home_link"`
|
|
TotalCommits int64 `json:"total_commits"`
|
|
Weeks map[int64]*WeekData `json:"weeks"`
|
|
}
|
|
|
|
// ExtendedCommitStats contains information for commit stats with author data
|
|
type ExtendedCommitStats struct {
|
|
Author *api.CommitUser `json:"author"`
|
|
Stats *api.CommitStats `json:"stats"`
|
|
}
|
|
|
|
const layout = time.DateOnly
|
|
|
|
func findLastSundayBeforeDate(dateStr string) (string, error) {
|
|
date, err := time.Parse(layout, dateStr)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
weekday := date.Weekday()
|
|
daysToSubtract := int(weekday) - int(time.Sunday)
|
|
if daysToSubtract < 0 {
|
|
daysToSubtract += 7
|
|
}
|
|
|
|
lastSunday := date.AddDate(0, 0, -daysToSubtract)
|
|
return lastSunday.Format(layout), nil
|
|
}
|
|
|
|
// GetContributorStats returns contributors stats for git commits for given revision or default branch
|
|
func GetContributorStats(ctx context.Context, cache cache.Cache, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) {
|
|
// as GetContributorStats is resource intensive we cache the result
|
|
cacheKey := fmt.Sprintf(contributorStatsCacheKey, repo.FullName(), revision)
|
|
if !cache.IsExist(cacheKey) {
|
|
genReady := make(chan struct{})
|
|
|
|
// dont start multible async generations
|
|
_, run := generateLock.Load(cacheKey)
|
|
if run {
|
|
return nil, ErrAwaitGeneration
|
|
}
|
|
|
|
generateLock.Store(cacheKey, struct{}{})
|
|
// run generation async
|
|
go generateContributorStats(genReady, cache, cacheKey, repo, revision)
|
|
|
|
select {
|
|
case <-time.After(awaitGenerationTime):
|
|
return nil, ErrAwaitGeneration
|
|
case <-genReady:
|
|
// we got generation ready before timeout
|
|
break
|
|
}
|
|
}
|
|
// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
|
|
|
|
switch v := cache.Get(cacheKey).(type) {
|
|
case error:
|
|
return nil, v
|
|
case string:
|
|
var cachedStats map[string]*ContributorData
|
|
return cachedStats, json.Unmarshal([]byte(v), &cachedStats)
|
|
default:
|
|
return nil, fmt.Errorf("unexpected type in cache detected")
|
|
}
|
|
}
|
|
|
|
// getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision
|
|
func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int */) ([]*ExtendedCommitStats, error) {
|
|
baseCommit, err := repo.GetCommit(revision)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
stdoutReader, stdoutWriter, err := os.Pipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
_ = stdoutReader.Close()
|
|
_ = stdoutWriter.Close()
|
|
}()
|
|
|
|
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse")
|
|
// AddOptionFormat("--max-count=%d", limit)
|
|
gitCmd.AddDynamicArguments(baseCommit.ID.String())
|
|
|
|
var extendedCommitStats []*ExtendedCommitStats
|
|
stderr := new(strings.Builder)
|
|
err = gitCmd.Run(&git.RunOpts{
|
|
Dir: repo.Path,
|
|
Stdout: stdoutWriter,
|
|
Stderr: stderr,
|
|
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
|
|
_ = stdoutWriter.Close()
|
|
scanner := bufio.NewScanner(stdoutReader)
|
|
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "---" {
|
|
continue
|
|
}
|
|
scanner.Scan()
|
|
authorName := strings.TrimSpace(scanner.Text())
|
|
scanner.Scan()
|
|
authorEmail := strings.TrimSpace(scanner.Text())
|
|
scanner.Scan()
|
|
date := strings.TrimSpace(scanner.Text())
|
|
scanner.Scan()
|
|
stats := strings.TrimSpace(scanner.Text())
|
|
if authorName == "" || authorEmail == "" || date == "" || stats == "" {
|
|
// FIXME: find a better way to parse the output so that we will handle this properly
|
|
log.Warn("Something is wrong with git log output, skipping...")
|
|
log.Warn("authorName: %s, authorEmail: %s, date: %s, stats: %s", authorName, authorEmail, date, stats)
|
|
continue
|
|
}
|
|
// 1 file changed, 1 insertion(+), 1 deletion(-)
|
|
fields := strings.Split(stats, ",")
|
|
|
|
commitStats := api.CommitStats{}
|
|
for _, field := range fields[1:] {
|
|
parts := strings.Split(strings.TrimSpace(field), " ")
|
|
value, contributionType := parts[0], parts[1]
|
|
amount, _ := strconv.Atoi(value)
|
|
|
|
if strings.HasPrefix(contributionType, "insertion") {
|
|
commitStats.Additions = amount
|
|
} else {
|
|
commitStats.Deletions = amount
|
|
}
|
|
}
|
|
commitStats.Total = commitStats.Additions + commitStats.Deletions
|
|
scanner.Text() // empty line at the end
|
|
|
|
res := &ExtendedCommitStats{
|
|
Author: &api.CommitUser{
|
|
Identity: api.Identity{
|
|
Name: authorName,
|
|
Email: authorEmail,
|
|
},
|
|
Date: date,
|
|
},
|
|
Stats: &commitStats,
|
|
}
|
|
extendedCommitStats = append(extendedCommitStats, res)
|
|
|
|
}
|
|
_ = stdoutReader.Close()
|
|
return nil
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr)
|
|
}
|
|
|
|
return extendedCommitStats, nil
|
|
}
|
|
|
|
func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey string, repo *repo_model.Repository, revision string) {
|
|
ctx := graceful.GetManager().HammerContext()
|
|
|
|
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
|
|
if err != nil {
|
|
err := fmt.Errorf("OpenRepository: %w", err)
|
|
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
|
|
return
|
|
}
|
|
defer closer.Close()
|
|
|
|
if len(revision) == 0 {
|
|
revision = repo.DefaultBranch
|
|
}
|
|
extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision)
|
|
if err != nil {
|
|
err := fmt.Errorf("ExtendedCommitStats: %w", err)
|
|
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
|
|
return
|
|
}
|
|
if len(extendedCommitStats) == 0 {
|
|
err := fmt.Errorf("no commit stats returned for revision '%s'", revision)
|
|
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
|
|
return
|
|
}
|
|
|
|
layout := time.DateOnly
|
|
|
|
unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0)
|
|
contributorsCommitStats := make(map[string]*ContributorData)
|
|
contributorsCommitStats["total"] = &ContributorData{
|
|
Name: "Total",
|
|
Weeks: make(map[int64]*WeekData),
|
|
}
|
|
total := contributorsCommitStats["total"]
|
|
|
|
for _, v := range extendedCommitStats {
|
|
userEmail := v.Author.Email
|
|
if len(userEmail) == 0 {
|
|
continue
|
|
}
|
|
u, _ := user_model.GetUserByEmail(ctx, userEmail)
|
|
if u != nil {
|
|
// update userEmail with user's primary email address so
|
|
// that different mail addresses will linked to same account
|
|
userEmail = u.GetEmail()
|
|
}
|
|
// duplicated logic
|
|
if _, ok := contributorsCommitStats[userEmail]; !ok {
|
|
if u == nil {
|
|
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
|
|
if avatarLink == "" {
|
|
avatarLink = unknownUserAvatarLink
|
|
}
|
|
contributorsCommitStats[userEmail] = &ContributorData{
|
|
Name: v.Author.Name,
|
|
AvatarLink: avatarLink,
|
|
Weeks: make(map[int64]*WeekData),
|
|
}
|
|
} else {
|
|
contributorsCommitStats[userEmail] = &ContributorData{
|
|
Name: u.DisplayName(),
|
|
Login: u.LowerName,
|
|
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
|
|
HomeLink: u.HomeLink(),
|
|
Weeks: make(map[int64]*WeekData),
|
|
}
|
|
}
|
|
}
|
|
// Update user statistics
|
|
user := contributorsCommitStats[userEmail]
|
|
startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date)
|
|
|
|
val, _ := time.Parse(layout, startingOfWeek)
|
|
week := val.UnixMilli()
|
|
|
|
if user.Weeks[week] == nil {
|
|
user.Weeks[week] = &WeekData{
|
|
Additions: 0,
|
|
Deletions: 0,
|
|
Commits: 0,
|
|
Week: week,
|
|
}
|
|
}
|
|
if total.Weeks[week] == nil {
|
|
total.Weeks[week] = &WeekData{
|
|
Additions: 0,
|
|
Deletions: 0,
|
|
Commits: 0,
|
|
Week: week,
|
|
}
|
|
}
|
|
user.Weeks[week].Additions += v.Stats.Additions
|
|
user.Weeks[week].Deletions += v.Stats.Deletions
|
|
user.Weeks[week].Commits++
|
|
user.TotalCommits++
|
|
|
|
// Update overall statistics
|
|
total.Weeks[week].Additions += v.Stats.Additions
|
|
total.Weeks[week].Deletions += v.Stats.Deletions
|
|
total.Weeks[week].Commits++
|
|
total.TotalCommits++
|
|
}
|
|
|
|
data, err := json.Marshal(contributorsCommitStats)
|
|
if err != nil {
|
|
err := fmt.Errorf("couldn't marshal the data: %w", err)
|
|
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
|
|
return
|
|
}
|
|
|
|
// Store the data as an string, to make it uniform what data type is returned
|
|
// from caches.
|
|
_ = cache.Put(cacheKey, string(data), contributorStatsCacheTimeout)
|
|
generateLock.Delete(cacheKey)
|
|
if genDone != nil {
|
|
genDone <- struct{}{}
|
|
}
|
|
}
|