forked from kevadesu/forgejo
Merge branch 'forgejo' into repocard
This commit is contained in:
commit
a82cd18d9a
119 changed files with 6915 additions and 2391 deletions
|
@ -4,47 +4,80 @@
|
|||
package pwn
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/h2non/gock"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type mockTransport struct{}
|
||||
|
||||
func (mockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
if req.URL.Host != "api.pwnedpasswords.com" {
|
||||
return nil, errors.New("unexpected host")
|
||||
}
|
||||
|
||||
res := &http.Response{
|
||||
ProtoMajor: 1,
|
||||
ProtoMinor: 1,
|
||||
Proto: "HTTP/1.1",
|
||||
Request: req,
|
||||
Header: make(http.Header),
|
||||
StatusCode: 200,
|
||||
}
|
||||
|
||||
switch req.URL.Path {
|
||||
case "/range/5c1d8":
|
||||
res.Body = io.NopCloser(strings.NewReader("EAF2F254732680E8AC339B84F3266ECCBB5:1\r\nFC446EB88938834178CB9322C1EE273C2A7:2"))
|
||||
return res, nil
|
||||
case "/range/ba189":
|
||||
res.Body = io.NopCloser(strings.NewReader("FD4CB34F0378BCB15D23F6FFD28F0775C9E:3\r\nFDF342FCD8C3611DAE4D76E8A992A3E4169:4"))
|
||||
return res, nil
|
||||
case "/range/a1733":
|
||||
res.Body = io.NopCloser(strings.NewReader("C4CE0F1F0062B27B9E2F41AF0C08218017C:1\r\nFC446EB88938834178CB9322C1EE273C2A7:2\r\nFE81480327C992FE62065A827429DD1318B:0"))
|
||||
return res, nil
|
||||
case "/range/5617b":
|
||||
res.Body = io.NopCloser(strings.NewReader("FD4CB34F0378BCB15D23F6FFD28F0775C9E:3\r\nFDF342FCD8C3611DAE4D76E8A992A3E4169:4\r\nFE81480327C992FE62065A827429DD1318B:0"))
|
||||
return res, nil
|
||||
case "/range/79082":
|
||||
res.Body = io.NopCloser(strings.NewReader("FDF342FCD8C3611DAE4D76E8A992A3E4169:4\r\nFE81480327C992FE62065A827429DD1318B:0\r\nAFEF386F56EB0B4BE314E07696E5E6E6536:0"))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("unexpected path")
|
||||
}
|
||||
|
||||
var client = New(WithHTTP(&http.Client{
|
||||
Timeout: time.Second * 2,
|
||||
Timeout: time.Second * 2,
|
||||
Transport: mockTransport{},
|
||||
}))
|
||||
|
||||
func TestPassword(t *testing.T) {
|
||||
defer gock.Off()
|
||||
|
||||
count, err := client.CheckPassword("", false)
|
||||
require.ErrorIs(t, err, ErrEmptyPassword, "blank input should return ErrEmptyPassword")
|
||||
assert.Equal(t, -1, count)
|
||||
|
||||
gock.New("https://api.pwnedpasswords.com").Get("/range/5c1d8").Times(1).Reply(200).BodyString("EAF2F254732680E8AC339B84F3266ECCBB5:1\r\nFC446EB88938834178CB9322C1EE273C2A7:2")
|
||||
count, err = client.CheckPassword("pwned", false)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count)
|
||||
|
||||
gock.New("https://api.pwnedpasswords.com").Get("/range/ba189").Times(1).Reply(200).BodyString("FD4CB34F0378BCB15D23F6FFD28F0775C9E:3\r\nFDF342FCD8C3611DAE4D76E8A992A3E4169:4")
|
||||
count, err = client.CheckPassword("notpwned", false)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count)
|
||||
|
||||
gock.New("https://api.pwnedpasswords.com").Get("/range/a1733").Times(1).Reply(200).BodyString("C4CE0F1F0062B27B9E2F41AF0C08218017C:1\r\nFC446EB88938834178CB9322C1EE273C2A7:2\r\nFE81480327C992FE62065A827429DD1318B:0")
|
||||
count, err = client.CheckPassword("paddedpwned", true)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count)
|
||||
|
||||
gock.New("https://api.pwnedpasswords.com").Get("/range/5617b").Times(1).Reply(200).BodyString("FD4CB34F0378BCB15D23F6FFD28F0775C9E:3\r\nFDF342FCD8C3611DAE4D76E8A992A3E4169:4\r\nFE81480327C992FE62065A827429DD1318B:0")
|
||||
count, err = client.CheckPassword("paddednotpwned", true)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count)
|
||||
|
||||
gock.New("https://api.pwnedpasswords.com").Get("/range/79082").Times(1).Reply(200).BodyString("FDF342FCD8C3611DAE4D76E8A992A3E4169:4\r\nFE81480327C992FE62065A827429DD1318B:0\r\nAFEF386F56EB0B4BE314E07696E5E6E6536:0")
|
||||
count, err = client.CheckPassword("paddednotpwnedzero", true)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count)
|
||||
|
|
|
@ -36,13 +36,15 @@ const (
|
|||
RegExpGrepMode
|
||||
)
|
||||
|
||||
var GrepSearchOptions = [3]string{"exact", "union", "regexp"}
|
||||
|
||||
type GrepOptions struct {
|
||||
RefName string
|
||||
MaxResultLimit int
|
||||
MatchesPerFile int // >= git 2.38
|
||||
ContextLineNumber int
|
||||
Mode grepMode
|
||||
PathSpec []setting.Glob
|
||||
Filename string
|
||||
}
|
||||
|
||||
func (opts *GrepOptions) ensureDefaults() {
|
||||
|
@ -112,12 +114,38 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
|
|||
}
|
||||
|
||||
// pathspec
|
||||
files := make([]string, 0,
|
||||
len(setting.Indexer.IncludePatterns)+
|
||||
len(setting.Indexer.ExcludePatterns)+
|
||||
len(opts.PathSpec))
|
||||
for _, expr := range append(setting.Indexer.IncludePatterns, opts.PathSpec...) {
|
||||
files = append(files, ":"+expr.Pattern())
|
||||
includeLen := len(setting.Indexer.IncludePatterns)
|
||||
if len(opts.Filename) > 0 {
|
||||
includeLen = 1
|
||||
}
|
||||
files := make([]string, 0, len(setting.Indexer.ExcludePatterns)+includeLen)
|
||||
if len(opts.Filename) > 0 && len(setting.Indexer.IncludePatterns) > 0 {
|
||||
// if the both a global include pattern and the per search path is defined
|
||||
// we only include results where the path matches the globally set pattern
|
||||
// (eg, global pattern = "src/**" and path = "node_modules/")
|
||||
|
||||
// FIXME: this is a bit too restrictive, and fails to consider cases where the
|
||||
// gloabally set include pattern refers to a file than a directory
|
||||
// (eg, global pattern = "**.go" and path = "modules/git")
|
||||
exprMatched := false
|
||||
for _, expr := range setting.Indexer.IncludePatterns {
|
||||
if expr.Match(opts.Filename) {
|
||||
files = append(files, ":(literal)"+opts.Filename)
|
||||
exprMatched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !exprMatched {
|
||||
log.Warn("git-grep: filepath %s does not match any include pattern", opts.Filename)
|
||||
}
|
||||
} else if len(opts.Filename) > 0 {
|
||||
// if the path is only set we just include results that matches it
|
||||
files = append(files, ":(literal)"+opts.Filename)
|
||||
} else {
|
||||
// otherwise if global include patterns are set include results that strictly match them
|
||||
for _, expr := range setting.Indexer.IncludePatterns {
|
||||
files = append(files, ":"+expr.Pattern())
|
||||
}
|
||||
}
|
||||
for _, expr := range setting.Indexer.ExcludePatterns {
|
||||
files = append(files, ":^"+expr.Pattern())
|
||||
|
|
|
@ -89,6 +89,20 @@ func TestGrepSearch(t *testing.T) {
|
|||
},
|
||||
}, res)
|
||||
|
||||
res, err = GrepSearch(context.Background(), repo, "world", GrepOptions{
|
||||
MatchesPerFile: 1,
|
||||
Filename: "java-hello/",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []*GrepResult{
|
||||
{
|
||||
Filename: "java-hello/main.java",
|
||||
LineNumbers: []int{1},
|
||||
LineCodes: []string{"public class HelloWorld"},
|
||||
HighlightedRanges: [][3]int{{0, 18, 23}},
|
||||
},
|
||||
}, res)
|
||||
|
||||
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, res)
|
||||
|
|
|
@ -230,7 +230,7 @@ func (repo *Repository) CommitsByFileAndRange(opts CommitsByFileAndRangeOptions)
|
|||
go func() {
|
||||
stderr := strings.Builder{}
|
||||
gitCmd := NewCommand(repo.Ctx, "rev-list").
|
||||
AddOptionFormat("--max-count=%d", setting.Git.CommitsRangeSize*opts.Page).
|
||||
AddOptionFormat("--max-count=%d", setting.Git.CommitsRangeSize).
|
||||
AddOptionFormat("--skip=%d", skip)
|
||||
gitCmd.AddDynamicArguments(opts.Revision)
|
||||
|
||||
|
|
|
@ -7,6 +7,9 @@ import (
|
|||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
@ -138,3 +141,61 @@ func TestGetTagCommit(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
assert.EqualValues(t, lTagCommitID, lTag.ID.String())
|
||||
}
|
||||
|
||||
func TestCommitsByRange(t *testing.T) {
|
||||
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||
bareRepo1, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||
require.NoError(t, err)
|
||||
defer bareRepo1.Close()
|
||||
|
||||
baseCommit, err := bareRepo1.GetBranchCommit("master")
|
||||
require.NoError(t, err)
|
||||
|
||||
testCases := []struct {
|
||||
Page int
|
||||
ExpectedCommitCount int
|
||||
}{
|
||||
{1, 3},
|
||||
{2, 3},
|
||||
{3, 1},
|
||||
{4, 0},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
commits, err := baseCommit.CommitsByRange(testCase.Page, 3, "")
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, commits, testCase.ExpectedCommitCount, "page: %d", testCase.Page)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommitsByFileAndRange(t *testing.T) {
|
||||
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
|
||||
bareRepo1, err := openRepositoryWithDefaultContext(bareRepo1Path)
|
||||
require.NoError(t, err)
|
||||
defer bareRepo1.Close()
|
||||
defer test.MockVariableValue(&setting.Git.CommitsRangeSize, 2)()
|
||||
|
||||
testCases := []struct {
|
||||
File string
|
||||
Page int
|
||||
ExpectedCommitCount int
|
||||
}{
|
||||
{"file1.txt", 1, 1},
|
||||
{"file2.txt", 1, 1},
|
||||
{"file*.txt", 1, 2},
|
||||
{"foo", 1, 2},
|
||||
{"foo", 2, 1},
|
||||
{"foo", 3, 0},
|
||||
{"f*", 1, 2},
|
||||
{"f*", 2, 2},
|
||||
{"f*", 3, 1},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
commits, err := bareRepo1.CommitsByFileAndRange(CommitsByFileAndRangeOptions{
|
||||
Revision: "master",
|
||||
File: testCase.File,
|
||||
Page: testCase.Page,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, commits, testCase.ExpectedCommitCount, "file: '%s', page: %d", testCase.File, testCase.Page)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/gitrepo"
|
||||
tokenizer_hierarchy "code.gitea.io/gitea/modules/indexer/code/bleve/tokenizer/hierarchy"
|
||||
"code.gitea.io/gitea/modules/indexer/code/internal"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
|
||||
|
@ -56,6 +57,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
|||
type RepoIndexerData struct {
|
||||
RepoID int64
|
||||
CommitID string
|
||||
Filename string
|
||||
Content string
|
||||
Language string
|
||||
UpdatedAt time.Time
|
||||
|
@ -69,7 +71,8 @@ func (d *RepoIndexerData) Type() string {
|
|||
const (
|
||||
repoIndexerAnalyzer = "repoIndexerAnalyzer"
|
||||
repoIndexerDocType = "repoIndexerDocType"
|
||||
repoIndexerLatestVersion = 6
|
||||
pathHierarchyAnalyzer = "pathHierarchyAnalyzer"
|
||||
repoIndexerLatestVersion = 7
|
||||
)
|
||||
|
||||
// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
|
||||
|
@ -89,6 +92,11 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
|
|||
docMapping.AddFieldMappingsAt("Language", termFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("CommitID", termFieldMapping)
|
||||
|
||||
pathFieldMapping := bleve.NewTextFieldMapping()
|
||||
pathFieldMapping.IncludeInAll = false
|
||||
pathFieldMapping.Analyzer = pathHierarchyAnalyzer
|
||||
docMapping.AddFieldMappingsAt("Filename", pathFieldMapping)
|
||||
|
||||
timeFieldMapping := bleve.NewDateTimeFieldMapping()
|
||||
timeFieldMapping.IncludeInAll = false
|
||||
docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
|
||||
|
@ -103,6 +111,13 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
|
|||
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
} else if err := mapping.AddCustomAnalyzer(pathHierarchyAnalyzer, map[string]any{
|
||||
"type": analyzer_custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": tokenizer_hierarchy.Name,
|
||||
"token_filters": []string{unicodeNormalizeName},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mapping.DefaultAnalyzer = repoIndexerAnalyzer
|
||||
mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
|
||||
|
@ -178,6 +193,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
|||
return batch.Index(id, &RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
CommitID: commitSha,
|
||||
Filename: update.Filename,
|
||||
Content: string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
|
||||
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
|
@ -266,22 +282,30 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
indexerQuery = keywordQuery
|
||||
}
|
||||
|
||||
opts.Filename = strings.Trim(opts.Filename, "/")
|
||||
if len(opts.Filename) > 0 {
|
||||
// we use a keyword analyzer for the query than path hierarchy analyzer
|
||||
// to match only the exact path
|
||||
// eg, a query for modules/indexer/code
|
||||
// should not provide results for modules/ nor modules/indexer
|
||||
indexerQuery = bleve.NewConjunctionQuery(
|
||||
indexerQuery,
|
||||
inner_bleve.MatchQuery(opts.Filename, "Filename", analyzer_keyword.Name, 0),
|
||||
)
|
||||
}
|
||||
|
||||
// Save for reuse without language filter
|
||||
facetQuery := indexerQuery
|
||||
if len(opts.Language) > 0 {
|
||||
languageQuery := bleve.NewMatchQuery(opts.Language)
|
||||
languageQuery.FieldVal = "Language"
|
||||
languageQuery.Analyzer = analyzer_keyword.Name
|
||||
|
||||
indexerQuery = bleve.NewConjunctionQuery(
|
||||
indexerQuery,
|
||||
languageQuery,
|
||||
inner_bleve.MatchQuery(opts.Language, "Language", analyzer_keyword.Name, 0),
|
||||
)
|
||||
}
|
||||
|
||||
from, pageSize := opts.GetSkipTake()
|
||||
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
|
||||
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||
searchRequest.Fields = []string{"Content", "RepoID", "Filename", "Language", "CommitID", "UpdatedAt"}
|
||||
searchRequest.IncludeLocations = true
|
||||
|
||||
if len(opts.Language) == 0 {
|
||||
|
@ -320,7 +344,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
RepoID: int64(hit.Fields["RepoID"].(float64)),
|
||||
StartIndex: startIndex,
|
||||
EndIndex: endIndex,
|
||||
Filename: internal.FilenameOfIndexerID(hit.ID),
|
||||
Filename: hit.Fields["Filename"].(string),
|
||||
Content: hit.Fields["Content"].(string),
|
||||
CommitID: hit.Fields["CommitID"].(string),
|
||||
UpdatedUnix: updatedUnix,
|
||||
|
@ -333,7 +357,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
if len(opts.Language) > 0 {
|
||||
// Use separate query to go get all language counts
|
||||
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
|
||||
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
|
||||
facetRequest.Fields = []string{"Content", "RepoID", "Filename", "Language", "CommitID", "UpdatedAt"}
|
||||
facetRequest.IncludeLocations = true
|
||||
facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
|
||||
|
||||
|
|
69
modules/indexer/code/bleve/tokenizer/hierarchy/hierarchy.go
Normal file
69
modules/indexer/code/bleve/tokenizer/hierarchy/hierarchy.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package hierarchy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "path_hierarchy"
|
||||
|
||||
type PathHierarchyTokenizer struct{}
|
||||
|
||||
// Similar to elastic's path_hierarchy tokenizer
|
||||
// This tokenizes a given path into all the possible hierarchies
|
||||
// For example,
|
||||
// modules/indexer/code/search.go =>
|
||||
//
|
||||
// modules/
|
||||
// modules/indexer
|
||||
// modules/indexer/code
|
||||
// modules/indexer/code/search.go
|
||||
func (t *PathHierarchyTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
// trim any extra slashes
|
||||
input = bytes.Trim(input, "/")
|
||||
|
||||
// zero allocations until the nested directories exceed a depth of 8 (which is unlikely)
|
||||
rv := make(analysis.TokenStream, 0, 8)
|
||||
count, off := 1, 0
|
||||
|
||||
// iterate till all directory seperators
|
||||
for i := bytes.IndexRune(input[off:], '/'); i != -1; i = bytes.IndexRune(input[off:], '/') {
|
||||
// the index is relative to input[offest...]
|
||||
// add this index to the accumlated offset to get the index of the current seperator in input[0...]
|
||||
off += i
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input[:off], // take the slice, input[0...index of seperator]
|
||||
Start: 0,
|
||||
End: off,
|
||||
Position: count,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
// increment the offset after considering the seperator
|
||||
off++
|
||||
count++
|
||||
}
|
||||
|
||||
// the entire file path should always be the last token
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input,
|
||||
Start: 0,
|
||||
End: len(input),
|
||||
Position: count,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return &PathHierarchyTokenizer{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, TokenizerConstructor)
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package hierarchy
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIndexerBleveHierarchyTokenizer(t *testing.T) {
|
||||
tokenizer := &PathHierarchyTokenizer{}
|
||||
keywords := []struct {
|
||||
Term string
|
||||
Results []string
|
||||
}{
|
||||
{
|
||||
Term: "modules/indexer/code/search.go",
|
||||
Results: []string{
|
||||
"modules",
|
||||
"modules/indexer",
|
||||
"modules/indexer/code",
|
||||
"modules/indexer/code/search.go",
|
||||
},
|
||||
},
|
||||
{
|
||||
Term: "/tmp/forgejo/",
|
||||
Results: []string{
|
||||
"tmp",
|
||||
"tmp/forgejo",
|
||||
},
|
||||
},
|
||||
{
|
||||
Term: "a/b/c/d/e/f/g/h/i/j",
|
||||
Results: []string{
|
||||
"a",
|
||||
"a/b",
|
||||
"a/b/c",
|
||||
"a/b/c/d",
|
||||
"a/b/c/d/e",
|
||||
"a/b/c/d/e/f",
|
||||
"a/b/c/d/e/f/g",
|
||||
"a/b/c/d/e/f/g/h",
|
||||
"a/b/c/d/e/f/g/h/i",
|
||||
"a/b/c/d/e/f/g/h/i/j",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, kw := range keywords {
|
||||
tokens := tokenizer.Tokenize([]byte(kw.Term))
|
||||
assert.Len(t, tokens, len(kw.Results))
|
||||
for i, token := range tokens {
|
||||
assert.Equal(t, i+1, token.Position)
|
||||
assert.Equal(t, kw.Results[i], string(token.Term))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
esRepoIndexerLatestVersion = 1
|
||||
esRepoIndexerLatestVersion = 2
|
||||
// multi-match-types, currently only 2 types are used
|
||||
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
|
||||
esMultiMatchTypeBestFields = "best_fields"
|
||||
|
@ -57,6 +57,21 @@ func NewIndexer(url, indexerName string) *Indexer {
|
|||
|
||||
const (
|
||||
defaultMapping = `{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"custom_path_tree": {
|
||||
"tokenizer": "custom_hierarchy"
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"custom_hierarchy": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"repo_id": {
|
||||
|
@ -72,6 +87,15 @@ const (
|
|||
"type": "keyword",
|
||||
"index": true
|
||||
},
|
||||
"filename": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"tree": {
|
||||
"type": "text",
|
||||
"analyzer": "custom_path_tree"
|
||||
}
|
||||
}
|
||||
},
|
||||
"language": {
|
||||
"type": "keyword",
|
||||
"index": true
|
||||
|
@ -138,6 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
|||
"repo_id": repo.ID,
|
||||
"content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
|
||||
"commit_id": sha,
|
||||
"filename": update.Filename,
|
||||
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
"updated_at": timeutil.TimeStampNow(),
|
||||
}),
|
||||
|
@ -267,7 +292,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||
panic(fmt.Sprintf("2===%#v", hit.Highlight))
|
||||
}
|
||||
|
||||
repoID, fileName := internal.ParseIndexerID(hit.Id)
|
||||
res := make(map[string]any)
|
||||
if err := json.Unmarshal(hit.Source, &res); err != nil {
|
||||
return 0, nil, nil, err
|
||||
|
@ -276,8 +300,8 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||
language := res["language"].(string)
|
||||
|
||||
hits = append(hits, &internal.SearchResult{
|
||||
RepoID: repoID,
|
||||
Filename: fileName,
|
||||
RepoID: int64(res["repo_id"].(float64)),
|
||||
Filename: res["filename"].(string),
|
||||
CommitID: res["commit_id"].(string),
|
||||
Content: res["content"].(string),
|
||||
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
|
||||
|
@ -326,6 +350,9 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
|
||||
query = query.Must(repoQuery)
|
||||
}
|
||||
if len(opts.Filename) > 0 {
|
||||
query = query.Filter(elastic.NewTermsQuery("filename.tree", opts.Filename))
|
||||
}
|
||||
|
||||
var (
|
||||
start, pageSize = opts.GetSkipTake()
|
||||
|
|
|
@ -34,10 +34,11 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
err := index(git.DefaultContext, indexer, repoID)
|
||||
require.NoError(t, err)
|
||||
keywords := []struct {
|
||||
RepoIDs []int64
|
||||
Keyword string
|
||||
IDs []int64
|
||||
Langs int
|
||||
RepoIDs []int64
|
||||
Keyword string
|
||||
IDs []int64
|
||||
Langs int
|
||||
Filename string
|
||||
}{
|
||||
{
|
||||
RepoIDs: nil,
|
||||
|
@ -51,6 +52,20 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
IDs: []int64{},
|
||||
Langs: 0,
|
||||
},
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "Description",
|
||||
IDs: []int64{},
|
||||
Langs: 0,
|
||||
Filename: "NOT-README.md",
|
||||
},
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "Description",
|
||||
IDs: []int64{repoID},
|
||||
Langs: 1,
|
||||
Filename: "README.md",
|
||||
},
|
||||
{
|
||||
RepoIDs: nil,
|
||||
Keyword: "Description for",
|
||||
|
@ -86,6 +101,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
Page: 1,
|
||||
PageSize: 10,
|
||||
},
|
||||
Filename: kw.Filename,
|
||||
IsKeywordFuzzy: true,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
|
|
@ -24,6 +24,7 @@ type SearchOptions struct {
|
|||
RepoIDs []int64
|
||||
Keyword string
|
||||
Language string
|
||||
Filename string
|
||||
|
||||
IsKeywordFuzzy bool
|
||||
|
||||
|
|
|
@ -3,30 +3,8 @@
|
|||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
)
|
||||
import "code.gitea.io/gitea/modules/indexer/internal"
|
||||
|
||||
func FilenameIndexerID(repoID int64, filename string) string {
|
||||
return internal.Base36(repoID) + "_" + filename
|
||||
}
|
||||
|
||||
func ParseIndexerID(indexerID string) (int64, string) {
|
||||
index := strings.IndexByte(indexerID, '_')
|
||||
if index == -1 {
|
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID)
|
||||
}
|
||||
repoID, _ := internal.ParseBase36(indexerID[:index])
|
||||
return repoID, indexerID[index+1:]
|
||||
}
|
||||
|
||||
func FilenameOfIndexerID(indexerID string) string {
|
||||
index := strings.IndexByte(indexerID, '_')
|
||||
if index == -1 {
|
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID)
|
||||
}
|
||||
return indexerID[index+1:]
|
||||
}
|
||||
|
|
|
@ -35,6 +35,8 @@ type SearchResultLanguages = internal.SearchResultLanguages
|
|||
|
||||
type SearchOptions = internal.SearchOptions
|
||||
|
||||
var CodeSearchOptions = [2]string{"exact", "fuzzy"}
|
||||
|
||||
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
||||
startIndex := selectionStartIndex
|
||||
numLinesBefore := 0
|
||||
|
|
|
@ -28,13 +28,16 @@ var (
|
|||
// The hash used for HKDF.
|
||||
hash = sha256.New
|
||||
// The AEAD used for encryption/decryption.
|
||||
aead = chacha20poly1305.NewX
|
||||
aeadKeySize = chacha20poly1305.KeySize
|
||||
aeadNonceSize = chacha20poly1305.NonceSizeX
|
||||
aead = chacha20poly1305.NewX
|
||||
// The pseudorandom key generated by HKDF-Extract.
|
||||
prk []byte
|
||||
)
|
||||
|
||||
const (
|
||||
aeadKeySize = chacha20poly1305.KeySize
|
||||
aeadNonceSize = chacha20poly1305.NonceSizeX
|
||||
)
|
||||
|
||||
// Set the main IKM for this module.
|
||||
func Init(ikm []byte) {
|
||||
// Salt is intentionally left empty, it's not useful to Forgejo's use case.
|
||||
|
@ -55,7 +58,7 @@ var (
|
|||
// Derive *the* key for a given context, this is a deterministic function.
|
||||
// The same key will be provided for the same context.
|
||||
func DeriveKey(context Context) *Key {
|
||||
if len(prk) == 0 {
|
||||
if len(prk) != sha256.Size {
|
||||
panic("keying: not initialized")
|
||||
}
|
||||
|
||||
|
@ -63,7 +66,7 @@ func DeriveKey(context Context) *Key {
|
|||
|
||||
key := make([]byte, aeadKeySize)
|
||||
// This should never return an error, but if it does, panic.
|
||||
if _, err := r.Read(key); err != nil {
|
||||
if n, err := r.Read(key); err != nil || n != aeadKeySize {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
|
@ -92,7 +95,7 @@ func (k *Key) Encrypt(plaintext, additionalData []byte) []byte {
|
|||
|
||||
// Generate a random nonce.
|
||||
nonce := make([]byte, aeadNonceSize)
|
||||
if _, err := rand.Read(nonce); err != nil {
|
||||
if n, err := rand.Read(nonce); err != nil || n != aeadNonceSize {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
"code.gitea.io/gitea/modules/validation"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
|
@ -49,8 +50,16 @@ type pomStruct struct {
|
|||
Version string `xml:"version"`
|
||||
Scope string `xml:"scope"`
|
||||
} `xml:"dependencies>dependency"`
|
||||
Parent struct {
|
||||
GroupID string `xml:"groupId"`
|
||||
ArtifactID string `xml:"artifactId"`
|
||||
Version string `xml:"version"`
|
||||
RelativePath string `xml:"relativePath"`
|
||||
} `xml:"parent"`
|
||||
}
|
||||
|
||||
var ErrNoGroupID = util.NewInvalidArgumentErrorf("group ID is missing")
|
||||
|
||||
// ParsePackageMetaData parses the metadata of a pom file
|
||||
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
|
||||
var pom pomStruct
|
||||
|
@ -65,6 +74,17 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
|
|||
pom.URL = ""
|
||||
}
|
||||
|
||||
groupID := pom.GroupID
|
||||
|
||||
if groupID == "" {
|
||||
// If a project inherits from a parent project, the groupId element is optional.
|
||||
// Refer to: https://maven.apache.org/pom.html#Inheritance
|
||||
if pom.Parent.GroupID == "" {
|
||||
return nil, ErrNoGroupID
|
||||
}
|
||||
groupID = pom.Parent.GroupID
|
||||
}
|
||||
|
||||
licenses := make([]string, 0, len(pom.Licenses))
|
||||
for _, l := range pom.Licenses {
|
||||
if l.Name != "" {
|
||||
|
@ -82,7 +102,7 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
|
|||
}
|
||||
|
||||
return &Metadata{
|
||||
GroupID: pom.GroupID,
|
||||
GroupID: groupID,
|
||||
ArtifactID: pom.ArtifactID,
|
||||
Name: pom.Name,
|
||||
Description: pom.Description,
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
const (
|
||||
groupID = "org.gitea"
|
||||
parentGroupID = "org.gitea.parent"
|
||||
artifactID = "my-project"
|
||||
version = "1.0.1"
|
||||
name = "My Gitea Project"
|
||||
|
@ -27,6 +28,11 @@ const (
|
|||
|
||||
const pomContent = `<?xml version="1.0"?>
|
||||
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>` + parentGroupID + `</groupId>
|
||||
<artifactId>parent-project</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</parent>
|
||||
<groupId>` + groupID + `</groupId>
|
||||
<artifactId>` + artifactID + `</artifactId>
|
||||
<version>` + version + `</version>
|
||||
|
@ -47,6 +53,24 @@ const pomContent = `<?xml version="1.0"?>
|
|||
</dependencies>
|
||||
</project>`
|
||||
|
||||
const pomWithParentGroupID = `<?xml version="1.0"?>
|
||||
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>` + parentGroupID + `</groupId>
|
||||
<artifactId>parent-project</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>` + artifactID + `</artifactId>
|
||||
<version>` + version + `</version>
|
||||
</project>`
|
||||
|
||||
const pomWithMissingGroupID = `<?xml version="1.0"?>
|
||||
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<artifactId>` + artifactID + `</artifactId>
|
||||
<version>` + version + `</version>
|
||||
</project>`
|
||||
|
||||
func TestParsePackageMetaData(t *testing.T) {
|
||||
t.Run("InvalidFile", func(t *testing.T) {
|
||||
m, err := ParsePackageMetaData(strings.NewReader(""))
|
||||
|
@ -87,4 +111,19 @@ func TestParsePackageMetaData(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
assert.NotNil(t, m)
|
||||
})
|
||||
|
||||
t.Run("UseParentGroupID", func(t *testing.T) {
|
||||
m, err := ParsePackageMetaData(strings.NewReader(pomWithParentGroupID))
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, m)
|
||||
|
||||
assert.Equal(t, parentGroupID, m.GroupID)
|
||||
})
|
||||
|
||||
t.Run("MissingGroupIDThrowsError", func(t *testing.T) {
|
||||
m, err := ParsePackageMetaData(strings.NewReader(pomWithMissingGroupID))
|
||||
assert.Nil(t, m)
|
||||
require.Error(t, err)
|
||||
assert.Equal(t, ErrNoGroupID, err)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -89,8 +89,9 @@ func CreateRepositoryByExample(ctx context.Context, doer, u *user_model.User, re
|
|||
Type: tp,
|
||||
Config: &repo_model.PullRequestsConfig{
|
||||
AllowMerge: true, AllowRebase: true, AllowRebaseMerge: true, AllowSquash: true, AllowFastForwardOnly: true,
|
||||
DefaultMergeStyle: repo_model.MergeStyle(setting.Repository.PullRequest.DefaultMergeStyle),
|
||||
AllowRebaseUpdate: true,
|
||||
DefaultMergeStyle: repo_model.MergeStyle(setting.Repository.PullRequest.DefaultMergeStyle),
|
||||
DefaultUpdateStyle: repo_model.UpdateStyle(setting.Repository.PullRequest.DefaultUpdateStyle),
|
||||
AllowRebaseUpdate: true,
|
||||
},
|
||||
})
|
||||
} else {
|
||||
|
|
|
@ -46,7 +46,7 @@ func (url defaultActionsURL) URL() string {
|
|||
}
|
||||
|
||||
const (
|
||||
defaultActionsURLForgejo = "https://code.forgejo.org"
|
||||
defaultActionsURLForgejo = "https://data.forgejo.org"
|
||||
defaultActionsURLGitHub = "github" // https://github.com
|
||||
defaultActionsURLSelf = "self" // the root URL of the self-hosted instance
|
||||
)
|
||||
|
|
|
@ -117,7 +117,7 @@ func Test_getDefaultActionsURLForActions(t *testing.T) {
|
|||
iniStr: `
|
||||
[actions]
|
||||
`,
|
||||
wantURL: "https://code.forgejo.org",
|
||||
wantURL: "https://data.forgejo.org",
|
||||
},
|
||||
{
|
||||
name: "github",
|
||||
|
|
|
@ -87,6 +87,7 @@ var (
|
|||
DefaultMergeMessageAllAuthors bool
|
||||
DefaultMergeMessageMaxApprovers int
|
||||
DefaultMergeMessageOfficialApproversOnly bool
|
||||
DefaultUpdateStyle string
|
||||
PopulateSquashCommentWithCommitMessages bool
|
||||
AddCoCommitterTrailers bool
|
||||
TestConflictingPatchesWithGitApply bool
|
||||
|
@ -216,6 +217,7 @@ var (
|
|||
DefaultMergeMessageAllAuthors bool
|
||||
DefaultMergeMessageMaxApprovers int
|
||||
DefaultMergeMessageOfficialApproversOnly bool
|
||||
DefaultUpdateStyle string
|
||||
PopulateSquashCommentWithCommitMessages bool
|
||||
AddCoCommitterTrailers bool
|
||||
TestConflictingPatchesWithGitApply bool
|
||||
|
@ -232,6 +234,7 @@ var (
|
|||
DefaultMergeMessageAllAuthors: false,
|
||||
DefaultMergeMessageMaxApprovers: 10,
|
||||
DefaultMergeMessageOfficialApproversOnly: true,
|
||||
DefaultUpdateStyle: "merge",
|
||||
PopulateSquashCommentWithCommitMessages: false,
|
||||
AddCoCommitterTrailers: true,
|
||||
RetargetChildrenOnMerge: true,
|
||||
|
|
|
@ -105,6 +105,7 @@ type Repository struct {
|
|||
DefaultDeleteBranchAfterMerge bool `json:"default_delete_branch_after_merge"`
|
||||
DefaultMergeStyle string `json:"default_merge_style"`
|
||||
DefaultAllowMaintainerEdit bool `json:"default_allow_maintainer_edit"`
|
||||
DefaultUpdateStyle string `json:"default_update_style"`
|
||||
AvatarURL string `json:"avatar_url"`
|
||||
Internal bool `json:"internal"`
|
||||
MirrorInterval string `json:"mirror_interval"`
|
||||
|
@ -225,6 +226,8 @@ type EditRepoOption struct {
|
|||
DefaultDeleteBranchAfterMerge *bool `json:"default_delete_branch_after_merge,omitempty"`
|
||||
// set to a merge style to be used by this repository: "merge", "rebase", "rebase-merge", "squash", or "fast-forward-only".
|
||||
DefaultMergeStyle *string `json:"default_merge_style,omitempty"`
|
||||
// set to a update style to be used by this repository: "rebase" or "merge"
|
||||
DefaultUpdateStyle *string `json:"default_update_style,omitempty"`
|
||||
// set to `true` to allow edits from maintainers by default
|
||||
DefaultAllowMaintainerEdit *bool `json:"default_allow_maintainer_edit,omitempty"`
|
||||
// set to `true` to archive this repository.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue