Upgrade bleve from v2.0.6 to v2.3.0 (#18132)

2022-01-01 16:26:27 +08:00 · 2022-01-01 16:26:27 +08:00 · 25a290e320
commit 25a290e320
parent 1a4e2bfcd1
70 changed files with 1283 additions and 660 deletions
--- a/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go
@ -83,7 +83,7 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
 	return newTopNCollector(size, skip, sort)
 }

-// NewTopNCollector builds a collector to find the top 'size' hits
+// NewTopNCollectorAfter builds a collector to find the top 'size' hits
 // skipping over the first 'skip' hits
 // ordering hits by the provided sort order
 func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
@ -235,9 +235,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,

 	// compute search duration
 	hc.took = time.Since(startTime)
-	if err != nil {
-		return err
-	}
+
 	// finalize actual results
 	err = hc.finalizeResults(reader)
 	if err != nil {
--- a/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_datetime.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_datetime.go
@ -87,23 +87,21 @@ func (fb *DateTimeFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
-	if field == fb.field {
-		fb.sawValue = true
-		// only consider the values which are shifted 0
-		prefixCoded := numeric.PrefixCoded(term)
-		shift, err := prefixCoded.Shift()
-		if err == nil && shift == 0 {
-			i64, err := prefixCoded.Int64()
-			if err == nil {
-				t := time.Unix(0, i64)
+func (fb *DateTimeFacetBuilder) UpdateVisitor(term []byte) {
+	fb.sawValue = true
+	// only consider the values which are shifted 0
+	prefixCoded := numeric.PrefixCoded(term)
+	shift, err := prefixCoded.Shift()
+	if err == nil && shift == 0 {
+		i64, err := prefixCoded.Int64()
+		if err == nil {
+			t := time.Unix(0, i64)

-				// look at each of the ranges for a match
-				for rangeName, r := range fb.ranges {
-					if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
-						fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
-						fb.total++
-					}
+			// look at each of the ranges for a match
+			for rangeName, r := range fb.ranges {
+				if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
+					fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
+					fb.total++
 				}
 			}
 		}
--- a/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_numeric.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_numeric.go
@ -86,23 +86,21 @@ func (fb *NumericFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
-	if field == fb.field {
-		fb.sawValue = true
-		// only consider the values which are shifted 0
-		prefixCoded := numeric.PrefixCoded(term)
-		shift, err := prefixCoded.Shift()
-		if err == nil && shift == 0 {
-			i64, err := prefixCoded.Int64()
-			if err == nil {
-				f64 := numeric.Int64ToFloat64(i64)
+func (fb *NumericFacetBuilder) UpdateVisitor(term []byte) {
+	fb.sawValue = true
+	// only consider the values which are shifted 0
+	prefixCoded := numeric.PrefixCoded(term)
+	shift, err := prefixCoded.Shift()
+	if err == nil && shift == 0 {
+		i64, err := prefixCoded.Int64()
+		if err == nil {
+			f64 := numeric.Int64ToFloat64(i64)

-				// look at each of the ranges for a match
-				for rangeName, r := range fb.ranges {
-					if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
-						fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
-						fb.total++
-					}
+			// look at each of the ranges for a match
+			for rangeName, r := range fb.ranges {
+				if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
+					fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
+					fb.total++
 				}
 			}
 		}
--- a/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_terms.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/facet/facet_builder_terms.go
@ -62,12 +62,10 @@ func (fb *TermsFacetBuilder) Field() string {
 	return fb.field
 }

-func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
-	if field == fb.field {
-		fb.sawValue = true
-		fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
-		fb.total++
-	}
+func (fb *TermsFacetBuilder) UpdateVisitor(term []byte) {
+	fb.sawValue = true
+	fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
+	fb.total++
 }

 func (fb *TermsFacetBuilder) StartDoc() {
@ -87,7 +85,7 @@ func (fb *TermsFacetBuilder) Result() *search.FacetResult {
 		Missing: fb.missing,
 	}

-	rv.Terms = make([]*search.TermFacet, 0, len(fb.termsCount))
+	rv.Terms = &search.TermFacets{}

 	for term, count := range fb.termsCount {
 		tf := &search.TermFacet{
@ -95,20 +93,20 @@ func (fb *TermsFacetBuilder) Result() *search.FacetResult {
 			Count: count,
 		}

-		rv.Terms = append(rv.Terms, tf)
+		rv.Terms.Add(tf)
 	}

 	sort.Sort(rv.Terms)

 	// we now have the list of the top N facets
 	trimTopN := fb.size
-	if trimTopN > len(rv.Terms) {
-		trimTopN = len(rv.Terms)
+	if trimTopN > rv.Terms.Len() {
+		trimTopN = rv.Terms.Len()
 	}
-	rv.Terms = rv.Terms[:trimTopN]
+	rv.Terms.TrimToTopN(trimTopN)

 	notOther := 0
-	for _, tf := range rv.Terms {
+	for _, tf := range rv.Terms.Terms() {
 		notOther += tf.Count
 	}
 	rv.Other = fb.total - notOther
--- a/vendor/github.com/blevesearch/bleve/v2/search/facets_builder.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/facets_builder.go
@ -15,6 +15,7 @@
 package search

 import (
+	"encoding/json"
 	"reflect"
 	"sort"

@ -43,7 +44,7 @@ func init() {

 type FacetBuilder interface {
 	StartDoc()
-	UpdateVisitor(field string, term []byte)
+	UpdateVisitor(term []byte)
 	EndDoc()

 	Result() *FacetResult
@ -53,10 +54,11 @@ type FacetBuilder interface {
 }

 type FacetsBuilder struct {
-	indexReader index.IndexReader
-	facetNames  []string
-	facets      []FacetBuilder
-	fields      []string
+	indexReader   index.IndexReader
+	facetNames    []string
+	facets        []FacetBuilder
+	facetsByField map[string][]FacetBuilder
+	fields        []string
 }

 func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
@ -80,8 +82,13 @@ func (fb *FacetsBuilder) Size() int {
 }

 func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
+	if fb.facetsByField == nil {
+		fb.facetsByField = map[string][]FacetBuilder{}
+	}
+
 	fb.facetNames = append(fb.facetNames, name)
 	fb.facets = append(fb.facets, facetBuilder)
+	fb.facetsByField[facetBuilder.Field()] = append(fb.facetsByField[facetBuilder.Field()], facetBuilder)
 	fb.fields = append(fb.fields, facetBuilder.Field())
 }

@ -102,8 +109,10 @@ func (fb *FacetsBuilder) EndDoc() {
 }

 func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
-	for _, facetBuilder := range fb.facets {
-		facetBuilder.UpdateVisitor(field, term)
+	if facetBuilders, ok := fb.facetsByField[field]; ok {
+		for _, facetBuilder := range facetBuilders {
+			facetBuilder.UpdateVisitor(term)
+		}
 	}
 }

@ -112,27 +121,73 @@ type TermFacet struct {
 	Count int    `json:"count"`
 }

-type TermFacets []*TermFacet
-
-func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
-	for _, existingTerm := range tf {
-		if termFacet.Term == existingTerm.Term {
-			existingTerm.Count += termFacet.Count
-			return tf
-		}
-	}
-	// if we got here it wasn't already in the existing terms
-	tf = append(tf, termFacet)
-	return tf
+type TermFacets struct {
+	termFacets []*TermFacet
+	termLookup map[string]*TermFacet
 }

-func (tf TermFacets) Len() int      { return len(tf) }
-func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
-func (tf TermFacets) Less(i, j int) bool {
-	if tf[i].Count == tf[j].Count {
-		return tf[i].Term < tf[j].Term
+func (tf *TermFacets) Terms() []*TermFacet {
+	return tf.termFacets
+}
+
+func (tf *TermFacets) TrimToTopN(n int) {
+	tf.termFacets = tf.termFacets[:n]
+}
+
+func (tf *TermFacets) Add(termFacets ...*TermFacet) {
+	for _, termFacet := range termFacets {
+		if tf.termLookup == nil {
+			tf.termLookup = map[string]*TermFacet{}
+		}
+
+		if term, ok := tf.termLookup[termFacet.Term]; ok {
+			term.Count += termFacet.Count
+			return
+		}
+
+		// if we got here it wasn't already in the existing terms
+		tf.termFacets = append(tf.termFacets, termFacet)
+		tf.termLookup[termFacet.Term] = termFacet
 	}
-	return tf[i].Count > tf[j].Count
+}
+
+func (tf *TermFacets) Len() int {
+	// Handle case where *TermFacets is not fully initialized in index_impl.go.init()
+	if tf == nil {
+		return 0
+	}
+
+	return len(tf.termFacets)
+}
+func (tf *TermFacets) Swap(i, j int) {
+	tf.termFacets[i], tf.termFacets[j] = tf.termFacets[j], tf.termFacets[i]
+}
+func (tf *TermFacets) Less(i, j int) bool {
+	if tf.termFacets[i].Count == tf.termFacets[j].Count {
+		return tf.termFacets[i].Term < tf.termFacets[j].Term
+	}
+	return tf.termFacets[i].Count > tf.termFacets[j].Count
+}
+
+// TermFacets used to be a type alias for []*TermFacet.
+// To maintain backwards compatibility, we have to implement custom
+// JSON marshalling.
+func (tf *TermFacets) MarshalJSON() ([]byte, error) {
+	return json.Marshal(tf.termFacets)
+}
+
+func (tf *TermFacets) UnmarshalJSON(b []byte) error {
+	termFacets := []*TermFacet{}
+	err := json.Unmarshal(b, &termFacets)
+	if err != nil {
+		return err
+	}
+
+	for _, termFacet := range termFacets {
+		tf.Add(termFacet)
+	}
+
+	return nil
 }

 type NumericRangeFacet struct {
@ -246,7 +301,7 @@ type FacetResult struct {
 	Total         int                `json:"total"`
 	Missing       int                `json:"missing"`
 	Other         int                `json:"other"`
-	Terms         TermFacets         `json:"terms,omitempty"`
+	Terms         *TermFacets        `json:"terms,omitempty"`
 	NumericRanges NumericRangeFacets `json:"numeric_ranges,omitempty"`
 	DateRanges    DateRangeFacets    `json:"date_ranges,omitempty"`
 }
@ -254,7 +309,7 @@ type FacetResult struct {
 func (fr *FacetResult) Size() int {
 	return reflectStaticSizeFacetResult + size.SizeOfPtr +
 		len(fr.Field) +
-		len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
+		fr.Terms.Len()*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
 		len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
 		len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
 }
@ -264,8 +319,8 @@ func (fr *FacetResult) Merge(other *FacetResult) {
 	fr.Missing += other.Missing
 	fr.Other += other.Other
 	if fr.Terms != nil && other.Terms != nil {
-		for _, term := range other.Terms {
-			fr.Terms = fr.Terms.Add(term)
+		for _, term := range other.Terms.termFacets {
+			fr.Terms.Add(term)
 		}
 	}
 	if fr.NumericRanges != nil && other.NumericRanges != nil {
@ -283,12 +338,12 @@ func (fr *FacetResult) Merge(other *FacetResult) {
 func (fr *FacetResult) Fixup(size int) {
 	if fr.Terms != nil {
 		sort.Sort(fr.Terms)
-		if len(fr.Terms) > size {
-			moveToOther := fr.Terms[size:]
+		if fr.Terms.Len() > size {
+			moveToOther := fr.Terms.termFacets[size:]
 			for _, mto := range moveToOther {
 				fr.Other += mto.Count
 			}
-			fr.Terms = fr.Terms[0:size]
+			fr.Terms.termFacets = fr.Terms.termFacets[0:size]
 		}
 	} else if fr.NumericRanges != nil {
 		sort.Sort(fr.NumericRanges)
--- a/vendor/github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple/simple.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple/simple.go
@ -123,9 +123,15 @@ OUTER:
 		// if there were no terms to highlight
 		// produce a single fragment from the beginning
 		start := 0
-		end := start + s.fragmentSize
-		if end > len(orig) {
-			end = len(orig)
+		end := start
+		used := 0
+		for end < len(orig) && used < s.fragmentSize {
+			r, size := utf8.DecodeRune(orig[end:])
+			if r == utf8.RuneError {
+				break
+			}
+			end += size
+			used++
 		}
 		rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
 	}
--- a/vendor/github.com/blevesearch/bleve/v2/search/query/ip_range.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/query/ip_range.go
@ -0,0 +1,84 @@
+//  Copyright (c) 2021 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package query
+
+import (
+	"fmt"
+	"net"
+
+	"github.com/blevesearch/bleve/v2/mapping"
+	"github.com/blevesearch/bleve/v2/search"
+	"github.com/blevesearch/bleve/v2/search/searcher"
+	index "github.com/blevesearch/bleve_index_api"
+)
+
+type IPRangeQuery struct {
+	CIDR     string `json:"cidr, omitempty"`
+	FieldVal string `json:"field,omitempty"`
+	BoostVal *Boost `json:"boost,omitempty"`
+}
+
+func NewIPRangeQuery(cidr string) *IPRangeQuery {
+	return &IPRangeQuery{
+		CIDR: cidr,
+	}
+}
+
+func (q *IPRangeQuery) SetBoost(b float64) {
+	boost := Boost(b)
+	q.BoostVal = &boost
+}
+
+func (q *IPRangeQuery) Boost() float64 {
+	return q.BoostVal.Value()
+}
+
+func (q *IPRangeQuery) SetField(f string) {
+	q.FieldVal = f
+}
+
+func (q *IPRangeQuery) Field() string {
+	return q.FieldVal
+}
+
+func (q *IPRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
+	field := q.FieldVal
+	if q.FieldVal == "" {
+		field = m.DefaultSearchField()
+	}
+	_, ipNet, err := net.ParseCIDR(q.CIDR)
+	if err != nil {
+		ip := net.ParseIP(q.CIDR)
+		if ip == nil {
+			return nil, err
+		}
+		// If we are searching for a specific ip rather than members of a network, just use a term search.
+		return searcher.NewTermSearcherBytes(i, ip.To16(), field, q.BoostVal.Value(), options)
+	}
+	return searcher.NewIPRangeSearcher(i, ipNet, field, q.BoostVal.Value(), options)
+}
+
+func (q *IPRangeQuery) Validate() error {
+	_, _, err := net.ParseCIDR(q.CIDR)
+	if err == nil {
+		return nil
+	}
+	// We also allow search for a specific IP.
+	ip := net.ParseIP(q.CIDR)
+	if ip != nil {
+		return nil // we have a valid ip
+	}
+	return fmt.Errorf("IPRangeQuery must be for an network or ip address, %q", q.CIDR)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/search/query/query_string_lex.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/query/query_string_lex.go
@ -248,8 +248,8 @@ func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
 }

 func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
-	// only a non-escaped space ends the tilde (or eof)
-	if eof || (!l.inEscape && next == ' ') {
+	// end on non-escaped space, colon, tilde, boost (or eof)
+	if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
 		// end number
 		l.nextTokenType = tNUMBER
 		l.nextToken = &yySymType{
@ -257,7 +257,13 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
 		}
 		logDebugTokens("NUMBER - '%s'", l.nextToken.s)
 		l.reset()
-		return startState, true
+
+		consumed := true
+		if !eof && (next == ':' || next == '^' || next == '~') {
+			consumed = false
+		}
+
+		return startState, consumed
 	} else if !l.inEscape && next == '\\' {
 		l.inEscape = true
 		return inNumOrStrState, true
@ -287,7 +293,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
 }

 func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
-	// end on non-escped space, colon, tilde, boost (or eof)
+	// end on non-escaped space, colon, tilde, boost (or eof)
 	if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
 		// end string
 		l.nextTokenType = tSTRING
--- a/vendor/github.com/blevesearch/bleve/v2/search/search.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/search.go
@ -270,7 +270,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
 		var needsDedupe bool

 		for i, ftl := range dm.FieldTermLocations {
-			if lastField != ftl.Field {
+			if i == 0 || lastField != ftl.Field {
 				lastField = ftl.Field

 				if dm.Locations == nil {
--- a/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_ip_range.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/searcher/search_ip_range.go
@ -0,0 +1,67 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+	"net"
+
+	"github.com/blevesearch/bleve/v2/search"
+	index "github.com/blevesearch/bleve_index_api"
+)
+
+// netLimits returns the lo and hi bounds inside the network.
+func netLimits(n *net.IPNet) (lo net.IP, hi net.IP) {
+	ones, bits := n.Mask.Size()
+	netNum := n.IP
+	if bits == net.IPv4len*8 {
+		netNum = netNum.To16()
+		ones += 8 * (net.IPv6len - net.IPv4len)
+	}
+	mask := net.CIDRMask(ones, 8*net.IPv6len)
+	lo = make(net.IP, net.IPv6len)
+	hi = make(net.IP, net.IPv6len)
+	for i := 0; i < net.IPv6len; i++ {
+		lo[i] = netNum[i] & mask[i]
+		hi[i] = lo[i] | ^mask[i]
+	}
+	return lo, hi
+}
+
+func NewIPRangeSearcher(indexReader index.IndexReader, ipNet *net.IPNet,
+	field string, boost float64, options search.SearcherOptions) (
+	search.Searcher, error) {
+
+	lo, hi := netLimits(ipNet)
+	fieldDict, err := indexReader.FieldDictRange(field, lo, hi)
+	if err != nil {
+		return nil, err
+	}
+	defer fieldDict.Close()
+
+	var terms []string
+	tfd, err := fieldDict.Next()
+	for err == nil && tfd != nil {
+		terms = append(terms, tfd.Term)
+		if tooManyClauses(len(terms)) {
+			return nil, tooManyClausesErr(field, len(terms))
+		}
+		tfd, err = fieldDict.Next()
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/search/sort.go
+++ b/vendor/github.com/blevesearch/bleve/v2/search/sort.go
@ -21,12 +21,13 @@ import (
 	"math"
 	"sort"
 	"strings"
+	"unicode/utf8"

 	"github.com/blevesearch/bleve/v2/geo"
 	"github.com/blevesearch/bleve/v2/numeric"
 )

-var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
+var HighTerm = strings.Repeat(string(utf8.MaxRune), 3)
 var LowTerm = string([]byte{0x00})

 type SearchSort interface {