mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-07-16 08:59:24 +02:00
fix: skip empty tokens in SearchOptions.Tokens() (#8261)
Some checks are pending
/ release (push) Waiting to run
testing-integration / test-unit (push) Waiting to run
testing-integration / test-sqlite (push) Waiting to run
testing / frontend-checks (push) Waiting to run
testing / backend-checks (push) Waiting to run
testing / test-unit (push) Blocked by required conditions
testing / test-e2e (push) Blocked by required conditions
testing / test-remote-cacher (redis) (push) Blocked by required conditions
testing / test-remote-cacher (valkey) (push) Blocked by required conditions
testing / test-remote-cacher (garnet) (push) Blocked by required conditions
testing / test-remote-cacher (redict) (push) Blocked by required conditions
testing / test-mysql (push) Blocked by required conditions
testing / test-pgsql (push) Blocked by required conditions
testing / test-sqlite (push) Blocked by required conditions
testing / security-check (push) Blocked by required conditions
Some checks are pending
/ release (push) Waiting to run
testing-integration / test-unit (push) Waiting to run
testing-integration / test-sqlite (push) Waiting to run
testing / frontend-checks (push) Waiting to run
testing / backend-checks (push) Waiting to run
testing / test-unit (push) Blocked by required conditions
testing / test-e2e (push) Blocked by required conditions
testing / test-remote-cacher (redis) (push) Blocked by required conditions
testing / test-remote-cacher (valkey) (push) Blocked by required conditions
testing / test-remote-cacher (garnet) (push) Blocked by required conditions
testing / test-remote-cacher (redict) (push) Blocked by required conditions
testing / test-mysql (push) Blocked by required conditions
testing / test-pgsql (push) Blocked by required conditions
testing / test-sqlite (push) Blocked by required conditions
testing / security-check (push) Blocked by required conditions
Query string tokenizer could return a list containing empty tokens when the query string was `\` or `"` (probably in other scenarios as well). This seems undesirable and is what triggered #8260, but I'm posting this separately from that fix in case I'm wrong. Feel free to reject if so. The actual change in behavior is that now searching for `\` or `"` behaves the same as if the query were empty (the bleve/elastic code checks that the tokenizer actually returned, anything rather than just query being non-empty). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. - [ ] I want the title to show in the release notes with a link to this pull request. - [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8261 Reviewed-by: Shiny Nematoda <snematoda@noreply.codeberg.org> Co-authored-by: Danko Aleksejevs <danko@very.lv> Co-committed-by: Danko Aleksejevs <danko@very.lv>
This commit is contained in:
parent
bc2e4942fc
commit
4935e6e1a3
5 changed files with 146 additions and 19 deletions
|
@ -156,11 +156,12 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
|
||||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||||
var queries []query.Query
|
var queries []query.Query
|
||||||
|
|
||||||
if options.Keyword != "" {
|
tokens, err := options.Tokens()
|
||||||
tokens, err := options.Tokens()
|
if err != nil {
|
||||||
if err != nil {
|
return nil, err
|
||||||
return nil, err
|
}
|
||||||
}
|
|
||||||
|
if len(tokens) > 0 {
|
||||||
q := bleve.NewBooleanQuery()
|
q := bleve.NewBooleanQuery()
|
||||||
for _, token := range tokens {
|
for _, token := range tokens {
|
||||||
innerQ := bleve.NewDisjunctionQuery(
|
innerQ := bleve.NewDisjunctionQuery(
|
||||||
|
|
|
@ -149,12 +149,13 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
|
||||||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||||
query := elastic.NewBoolQuery()
|
query := elastic.NewBoolQuery()
|
||||||
|
|
||||||
if options.Keyword != "" {
|
tokens, err := options.Tokens()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tokens) > 0 {
|
||||||
q := elastic.NewBoolQuery()
|
q := elastic.NewBoolQuery()
|
||||||
tokens, err := options.Tokens()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
for _, token := range tokens {
|
for _, token := range tokens {
|
||||||
innerQ := elastic.NewMultiMatchQuery(token.Term, "content", "comments").FieldWithBoost("title", 2.0).TieBreaker(0.5)
|
innerQ := elastic.NewMultiMatchQuery(token.Term, "content", "comments").FieldWithBoost("title", 2.0).TieBreaker(0.5)
|
||||||
if token.Fuzzy {
|
if token.Fuzzy {
|
||||||
|
|
|
@ -45,12 +45,9 @@ func (t *Tokenizer) next() (tk Token, err error) {
|
||||||
|
|
||||||
// skip all leading white space
|
// skip all leading white space
|
||||||
for {
|
for {
|
||||||
if r, _, err = t.in.ReadRune(); err == nil && r == ' ' {
|
if r, _, err = t.in.ReadRune(); err != nil || r != ' ' {
|
||||||
//nolint:staticcheck,wastedassign // SA4006 the variable is used after the loop
|
break
|
||||||
r, _, err = t.in.ReadRune()
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
break
|
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return tk, err
|
return tk, err
|
||||||
|
@ -107,11 +104,17 @@ nextEnd:
|
||||||
|
|
||||||
// Tokenize the keyword
|
// Tokenize the keyword
|
||||||
func (o *SearchOptions) Tokens() (tokens []Token, err error) {
|
func (o *SearchOptions) Tokens() (tokens []Token, err error) {
|
||||||
|
if o.Keyword == "" {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
in := strings.NewReader(o.Keyword)
|
in := strings.NewReader(o.Keyword)
|
||||||
it := Tokenizer{in: in}
|
it := Tokenizer{in: in}
|
||||||
|
|
||||||
for token, err := it.next(); err == nil; token, err = it.next() {
|
for token, err := it.next(); err == nil; token, err = it.next() {
|
||||||
tokens = append(tokens, token)
|
if token.Term != "" {
|
||||||
|
tokens = append(tokens, token)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
@ -41,6 +41,36 @@ var testOpts = []testIssueQueryStringOpt{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Keyword: "Hello World",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: "Hello",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Term: "World",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: " Hello World ",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: "Hello",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Term: "World",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Keyword: "+Hello +World",
|
Keyword: "+Hello +World",
|
||||||
Results: []Token{
|
Results: []Token{
|
||||||
|
@ -156,6 +186,68 @@ var testOpts = []testIssueQueryStringOpt{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Keyword: "\\",
|
||||||
|
Results: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "\"",
|
||||||
|
Results: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "Hello \\",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: "Hello",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "\"\"",
|
||||||
|
Results: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "\" World \"",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: " World ",
|
||||||
|
Fuzzy: false,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "\"\" World \"\"",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: "World",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Keyword: "Best \"Hello World\" Ever",
|
||||||
|
Results: []Token{
|
||||||
|
{
|
||||||
|
Term: "Best",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Term: "Hello World",
|
||||||
|
Fuzzy: false,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Term: "Ever",
|
||||||
|
Fuzzy: true,
|
||||||
|
Kind: BoolOptShould,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIssueQueryString(t *testing.T) {
|
func TestIssueQueryString(t *testing.T) {
|
||||||
|
|
|
@ -87,14 +87,44 @@ func TestIndexer(t *testing.T, indexer internal.Indexer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func allResults(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
||||||
|
assert.Len(t, result.Hits, len(data))
|
||||||
|
assert.Equal(t, len(data), int(result.Total))
|
||||||
|
}
|
||||||
|
|
||||||
var cases = []*testIndexerCase{
|
var cases = []*testIndexerCase{
|
||||||
{
|
{
|
||||||
Name: "default",
|
Name: "default",
|
||||||
SearchOptions: &internal.SearchOptions{},
|
SearchOptions: &internal.SearchOptions{},
|
||||||
Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
|
Expected: allResults,
|
||||||
assert.Len(t, result.Hits, len(data))
|
},
|
||||||
assert.Equal(t, len(data), int(result.Total))
|
{
|
||||||
|
Name: "empty keyword",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "",
|
||||||
},
|
},
|
||||||
|
Expected: allResults,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "whitespace keyword",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: " ",
|
||||||
|
},
|
||||||
|
Expected: allResults,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "dangling slash in keyword",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "\\",
|
||||||
|
},
|
||||||
|
Expected: allResults,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "dangling quote in keyword",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "\"",
|
||||||
|
},
|
||||||
|
Expected: allResults,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "empty",
|
Name: "empty",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue