mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-12-22 12:54:53 -05:00
Support repo code search without setting up an indexer (#29998)
By using git's ability, end users (especially small instance users) do not need to enable the indexer, they could also benefit from the code searching feature. Fix #29996 ![image](https://github.com/go-gitea/gitea/assets/2114189/11b7e458-88a4-480d-b4d7-72ee59406dd1) ![image](https://github.com/go-gitea/gitea/assets/2114189/0fe777d5-c95c-4288-a818-0427680805b6) --------- Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
parent
488a99fb56
commit
1e7a6483b8
12 changed files with 254 additions and 60 deletions
|
@ -17,6 +17,12 @@ menu:
|
|||
|
||||
# Repository indexer
|
||||
|
||||
## Builtin repository code search without indexer
|
||||
|
||||
Users could do repository-level code search without setting up a repository indexer.
|
||||
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
|
||||
Better code search support could be achieved by setting up the repository indexer.
|
||||
|
||||
## Setting up the repository indexer
|
||||
|
||||
Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):
|
||||
|
|
|
@ -87,6 +87,9 @@ _Symbols used in table:_
|
|||
| Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
| Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ |
|
||||
|
||||
- Gitea has builtin repository-level code search
|
||||
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)
|
||||
|
||||
## Issue Tracker
|
||||
|
||||
| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |
|
||||
|
|
|
@ -367,7 +367,6 @@ type RunStdError interface {
|
|||
error
|
||||
Unwrap() error
|
||||
Stderr() string
|
||||
IsExitCode(code int) bool
|
||||
}
|
||||
|
||||
type runStdError struct {
|
||||
|
@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
|
|||
return r.stderr
|
||||
}
|
||||
|
||||
func (r *runStdError) IsExitCode(code int) bool {
|
||||
func IsErrorExitCode(err error, code int) bool {
|
||||
var exitError *exec.ExitError
|
||||
if errors.As(r.err, &exitError) {
|
||||
if errors.As(err, &exitError) {
|
||||
return exitError.ExitCode() == code
|
||||
}
|
||||
return false
|
||||
|
|
|
@ -340,7 +340,7 @@ func CheckGitVersionEqual(equal string) error {
|
|||
|
||||
func configSet(key, value string) error {
|
||||
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
|
||||
if err != nil && !err.IsExitCode(1) {
|
||||
if err != nil && !IsErrorExitCode(err, 1) {
|
||||
return fmt.Errorf("failed to get git config %s, err: %w", key, err)
|
||||
}
|
||||
|
||||
|
@ -363,7 +363,7 @@ func configSetNonExist(key, value string) error {
|
|||
// already exist
|
||||
return nil
|
||||
}
|
||||
if err.IsExitCode(1) {
|
||||
if IsErrorExitCode(err, 1) {
|
||||
// not exist, set new config
|
||||
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
|
||||
if err != nil {
|
||||
|
@ -381,7 +381,7 @@ func configAddNonExist(key, value string) error {
|
|||
// already exist
|
||||
return nil
|
||||
}
|
||||
if err.IsExitCode(1) {
|
||||
if IsErrorExitCode(err, 1) {
|
||||
// not exist, add new config
|
||||
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
|
||||
if err != nil {
|
||||
|
@ -402,7 +402,7 @@ func configUnsetAll(key, value string) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
if err.IsExitCode(1) {
|
||||
if IsErrorExitCode(err, 1) {
|
||||
// not exist
|
||||
return nil
|
||||
}
|
||||
|
|
112
modules/git/grep.go
Normal file
112
modules/git/grep.go
Normal file
|
@ -0,0 +1,112 @@
|
|||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package git
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
)
|
||||
|
||||
type GrepResult struct {
|
||||
Filename string
|
||||
LineNumbers []int
|
||||
LineCodes []string
|
||||
}
|
||||
|
||||
type GrepOptions struct {
|
||||
RefName string
|
||||
ContextLineNumber int
|
||||
IsFuzzy bool
|
||||
}
|
||||
|
||||
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
|
||||
stdoutReader, stdoutWriter, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = stdoutReader.Close()
|
||||
_ = stdoutWriter.Close()
|
||||
}()
|
||||
|
||||
/*
|
||||
The output is like this ( "^@" means \x00):
|
||||
|
||||
HEAD:.air.toml
|
||||
6^@bin = "gitea"
|
||||
|
||||
HEAD:.changelog.yml
|
||||
2^@repo: go-gitea/gitea
|
||||
*/
|
||||
var results []*GrepResult
|
||||
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
|
||||
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
|
||||
if opts.IsFuzzy {
|
||||
words := strings.Fields(search)
|
||||
for _, word := range words {
|
||||
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
|
||||
}
|
||||
} else {
|
||||
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
|
||||
}
|
||||
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
|
||||
stderr := bytes.Buffer{}
|
||||
err = cmd.Run(&RunOpts{
|
||||
Dir: repo.Path,
|
||||
Stdout: stdoutWriter,
|
||||
Stderr: &stderr,
|
||||
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
|
||||
_ = stdoutWriter.Close()
|
||||
defer stdoutReader.Close()
|
||||
|
||||
isInBlock := false
|
||||
scanner := bufio.NewScanner(stdoutReader)
|
||||
var res *GrepResult
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if !isInBlock {
|
||||
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
|
||||
isInBlock = true
|
||||
res = &GrepResult{Filename: filename}
|
||||
results = append(results, res)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if line == "" {
|
||||
if len(results) >= 50 {
|
||||
cancel()
|
||||
break
|
||||
}
|
||||
isInBlock = false
|
||||
continue
|
||||
}
|
||||
if line == "--" {
|
||||
continue
|
||||
}
|
||||
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
|
||||
lineNumInt, _ := strconv.Atoi(lineNum)
|
||||
res.LineNumbers = append(res.LineNumbers, lineNumInt)
|
||||
res.LineCodes = append(res.LineCodes, lineCode)
|
||||
}
|
||||
}
|
||||
return scanner.Err()
|
||||
},
|
||||
})
|
||||
// git grep exits with 1 if no results are found
|
||||
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil && !errors.Is(err, context.Canceled) {
|
||||
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
|
||||
}
|
||||
return results, nil
|
||||
}
|
41
modules/git/grep_test.go
Normal file
41
modules/git/grep_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package git
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGrepSearch(t *testing.T) {
|
||||
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
|
||||
assert.NoError(t, err)
|
||||
defer repo.Close()
|
||||
|
||||
res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []*GrepResult{
|
||||
{
|
||||
Filename: "java-hello/main.java",
|
||||
LineNumbers: []int{3},
|
||||
LineCodes: []string{" public static void main(String[] args)"},
|
||||
},
|
||||
{
|
||||
Filename: "main.vendor.java",
|
||||
LineNumbers: []int{3},
|
||||
LineCodes: []string{" public static void main(String[] args)"},
|
||||
},
|
||||
}, res)
|
||||
|
||||
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, res, 0)
|
||||
|
||||
res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
|
||||
assert.Error(t, err)
|
||||
assert.Len(t, res, 0)
|
||||
}
|
|
@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
|
||||
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
|
||||
hl, _ := highlight.Code(filename, "", code)
|
||||
highlightedLines := strings.Split(string(hl), "\n")
|
||||
|
||||
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
|
||||
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
|
||||
for i := 0; i < len(lines); i++ {
|
||||
lines[i].Num = lineNums[i]
|
||||
lines[i].FormattedContent = template.HTML(highlightedLines[i])
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
|
||||
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
|
||||
|
||||
var formattedLinesBuffer bytes.Buffer
|
||||
|
||||
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
|
||||
lines := make([]ResultLine, 0, len(contentLines))
|
||||
lineNums := make([]int, 0, len(contentLines))
|
||||
index := startIndex
|
||||
for i, line := range contentLines {
|
||||
var err error
|
||||
|
@ -91,29 +105,16 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
|
|||
line[closeActiveIndex:],
|
||||
)
|
||||
} else {
|
||||
err = writeStrings(&formattedLinesBuffer,
|
||||
line,
|
||||
)
|
||||
err = writeStrings(&formattedLinesBuffer, line)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lines = append(lines, ResultLine{Num: startLineNum + i})
|
||||
lineNums = append(lineNums, startLineNum+i)
|
||||
index += len(line)
|
||||
}
|
||||
|
||||
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
|
||||
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
|
||||
highlightedLines := strings.Split(string(hl), "\n")
|
||||
|
||||
// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
|
||||
lines = lines[:min(len(highlightedLines), len(lines))]
|
||||
highlightedLines = highlightedLines[:len(lines)]
|
||||
for i := 0; i < len(lines); i++ {
|
||||
lines[i].FormattedContent = template.HTML(highlightedLines[i])
|
||||
}
|
||||
|
||||
return &Result{
|
||||
RepoID: result.RepoID,
|
||||
Filename: result.Filename,
|
||||
|
@ -121,7 +122,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
|
|||
UpdatedUnix: result.UpdatedUnix,
|
||||
Language: result.Language,
|
||||
Color: result.Color,
|
||||
Lines: lines,
|
||||
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -171,6 +171,7 @@ org_kind = Search orgs...
|
|||
team_kind = Search teams...
|
||||
code_kind = Search code...
|
||||
code_search_unavailable = Code search is currently not available. Please contact the site administrator.
|
||||
code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer.
|
||||
package_kind = Search packages...
|
||||
project_kind = Search projects...
|
||||
branch_kind = Search branches...
|
||||
|
|
|
@ -5,9 +5,11 @@ package repo
|
|||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
|
@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search"
|
|||
|
||||
// Search render repository search page
|
||||
func Search(ctx *context.Context) {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
ctx.Redirect(ctx.Repo.RepoLink)
|
||||
return
|
||||
}
|
||||
|
||||
language := ctx.FormTrim("l")
|
||||
keyword := ctx.FormTrim("q")
|
||||
|
||||
|
@ -41,19 +38,55 @@ func Search(ctx *context.Context) {
|
|||
if page <= 0 {
|
||||
page = 1
|
||||
}
|
||||
|
||||
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
|
||||
language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
|
||||
if err != nil {
|
||||
if code_indexer.IsAvailable(ctx) {
|
||||
ctx.ServerError("SearchResults", err)
|
||||
|
||||
var total int
|
||||
var searchResults []*code_indexer.Result
|
||||
var searchResultLanguages []*code_indexer.SearchResultLanguages
|
||||
if setting.Indexer.RepoIndexerEnabled {
|
||||
var err error
|
||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||
RepoIDs: []int64{ctx.Repo.Repository.ID},
|
||||
Keyword: keyword,
|
||||
IsKeywordFuzzy: isFuzzy,
|
||||
Language: language,
|
||||
Paginator: &db.ListOptions{
|
||||
Page: page,
|
||||
PageSize: setting.UI.RepoSearchPagingNum,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
if code_indexer.IsAvailable(ctx) {
|
||||
ctx.ServerError("SearchResults", err)
|
||||
return
|
||||
}
|
||||
ctx.Data["CodeIndexerUnavailable"] = true
|
||||
} else {
|
||||
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
||||
}
|
||||
} else {
|
||||
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
|
||||
if err != nil {
|
||||
ctx.ServerError("GrepSearch", err)
|
||||
return
|
||||
}
|
||||
ctx.Data["CodeIndexerUnavailable"] = true
|
||||
} else {
|
||||
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
||||
total = len(res)
|
||||
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
|
||||
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
|
||||
res = res[pageStart:pageEnd]
|
||||
for _, r := range res {
|
||||
searchResults = append(searchResults, &code_indexer.Result{
|
||||
RepoID: ctx.Repo.Repository.ID,
|
||||
Filename: r.Filename,
|
||||
CommitID: ctx.Repo.CommitID,
|
||||
// UpdatedUnix: not supported yet
|
||||
// Language: not supported yet
|
||||
// Color: not supported yet
|
||||
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Data["CodeIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||
ctx.Data["Repo"] = ctx.Repo.Repository
|
||||
ctx.Data["SourcePath"] = ctx.Repo.Repository.Link()
|
||||
ctx.Data["SearchResults"] = searchResults
|
||||
|
|
|
@ -5,29 +5,18 @@
|
|||
{{template "base/alert" .}}
|
||||
{{template "repo/code/recently_pushed_new_branches" .}}
|
||||
{{if and (not .HideRepoInfo) (not .IsBlame)}}
|
||||
<div class="ui repo-description gt-word-break">
|
||||
<div id="repo-desc" class="tw-text-16">
|
||||
<div class="repo-description">
|
||||
<div id="repo-desc" class="gt-word-break tw-text-16">
|
||||
{{$description := .Repository.DescriptionHTML $.Context}}
|
||||
{{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}}
|
||||
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
|
||||
</div>
|
||||
{{if .RepoSearchEnabled}}
|
||||
<div class="ui repo-search">
|
||||
<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get">
|
||||
<div class="field">
|
||||
<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "search.code_search_unavailable"}}"{{end}}>
|
||||
<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
|
||||
{{if .CodeIndexerUnavailable}}
|
||||
<i class="icon">{{svg "octicon-alert"}}</i>
|
||||
{{end}}
|
||||
<button class="ui small icon button"{{if .CodeIndexerUnavailable}} disabled{{end}} type="submit">
|
||||
{{svg "octicon-search"}}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
<form class="ignore-dirty" action="{{.RepoLink}}/search" method="get">
|
||||
<div class="ui small action input">
|
||||
<input name="q" value="{{.Keyword}}" placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
|
||||
{{template "shared/search/button"}}
|
||||
</div>
|
||||
{{end}}
|
||||
</form>
|
||||
</div>
|
||||
<div class="tw-flex tw-items-center tw-flex-wrap tw-gap-1" id="repo-topics">
|
||||
{{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}}
|
||||
|
|
|
@ -11,9 +11,16 @@
|
|||
<div class="ui error message">
|
||||
<p>{{ctx.Locale.Tr "search.code_search_unavailable"}}</p>
|
||||
</div>
|
||||
{{else if .SearchResults}}
|
||||
{{template "shared/search/code/results" .}}
|
||||
{{else if .Keyword}}
|
||||
<div>{{ctx.Locale.Tr "search.no_results"}}</div>
|
||||
{{else}}
|
||||
{{if not .CodeIndexerEnabled}}
|
||||
<div class="ui message">
|
||||
<p>{{ctx.Locale.Tr "search.code_search_by_git_grep"}}</p>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if .SearchResults}}
|
||||
{{template "shared/search/code/results" .}}
|
||||
{{else if .Keyword}}
|
||||
<div>{{ctx.Locale.Tr "search.no_results"}}</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
</div>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
{{if or .result.Language (not .result.UpdatedUnix.IsZero)}}
|
||||
<div class="ui bottom attached table segment tw-flex tw-items-center tw-justify-between">
|
||||
<div class="tw-flex tw-items-center gt-ml-4">
|
||||
{{if .result.Language}}
|
||||
|
@ -10,3 +11,4 @@
|
|||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
|
Loading…
Reference in a new issue