Upgrade bleve from v2.0.6 to v2.3.0 (#18132)

This commit is contained in:
Lunny Xiao 2022-01-01 16:26:27 +08:00 committed by GitHub
parent 1a4e2bfcd1
commit 25a290e320
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
70 changed files with 1283 additions and 660 deletions

View file

@ -83,7 +83,7 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// NewTopNCollectorAfter builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
@ -235,9 +235,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {

View file

@ -87,23 +87,21 @@ func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
func (fb *DateTimeFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}

View file

@ -86,23 +86,21 @@ func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
func (fb *NumericFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}

View file

@ -62,12 +62,10 @@ func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
func (fb *TermsFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
func (fb *TermsFacetBuilder) StartDoc() {
@ -87,7 +85,7 @@ func (fb *TermsFacetBuilder) Result() *search.FacetResult {
Missing: fb.missing,
}
rv.Terms = make([]*search.TermFacet, 0, len(fb.termsCount))
rv.Terms = &search.TermFacets{}
for term, count := range fb.termsCount {
tf := &search.TermFacet{
@ -95,20 +93,20 @@ func (fb *TermsFacetBuilder) Result() *search.FacetResult {
Count: count,
}
rv.Terms = append(rv.Terms, tf)
rv.Terms.Add(tf)
}
sort.Sort(rv.Terms)
// we now have the list of the top N facets
trimTopN := fb.size
if trimTopN > len(rv.Terms) {
trimTopN = len(rv.Terms)
if trimTopN > rv.Terms.Len() {
trimTopN = rv.Terms.Len()
}
rv.Terms = rv.Terms[:trimTopN]
rv.Terms.TrimToTopN(trimTopN)
notOther := 0
for _, tf := range rv.Terms {
for _, tf := range rv.Terms.Terms() {
notOther += tf.Count
}
rv.Other = fb.total - notOther

View file

@ -15,6 +15,7 @@
package search
import (
"encoding/json"
"reflect"
"sort"
@ -43,7 +44,7 @@ func init() {
type FacetBuilder interface {
StartDoc()
UpdateVisitor(field string, term []byte)
UpdateVisitor(term []byte)
EndDoc()
Result() *FacetResult
@ -53,10 +54,11 @@ type FacetBuilder interface {
}
type FacetsBuilder struct {
indexReader index.IndexReader
facetNames []string
facets []FacetBuilder
fields []string
indexReader index.IndexReader
facetNames []string
facets []FacetBuilder
facetsByField map[string][]FacetBuilder
fields []string
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
@ -80,8 +82,13 @@ func (fb *FacetsBuilder) Size() int {
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
if fb.facetsByField == nil {
fb.facetsByField = map[string][]FacetBuilder{}
}
fb.facetNames = append(fb.facetNames, name)
fb.facets = append(fb.facets, facetBuilder)
fb.facetsByField[facetBuilder.Field()] = append(fb.facetsByField[facetBuilder.Field()], facetBuilder)
fb.fields = append(fb.fields, facetBuilder.Field())
}
@ -102,8 +109,10 @@ func (fb *FacetsBuilder) EndDoc() {
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets {
facetBuilder.UpdateVisitor(field, term)
if facetBuilders, ok := fb.facetsByField[field]; ok {
for _, facetBuilder := range facetBuilders {
facetBuilder.UpdateVisitor(term)
}
}
}
@ -112,27 +121,73 @@ type TermFacet struct {
Count int `json:"count"`
}
type TermFacets []*TermFacet
func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
for _, existingTerm := range tf {
if termFacet.Term == existingTerm.Term {
existingTerm.Count += termFacet.Count
return tf
}
}
// if we got here it wasn't already in the existing terms
tf = append(tf, termFacet)
return tf
type TermFacets struct {
termFacets []*TermFacet
termLookup map[string]*TermFacet
}
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool {
if tf[i].Count == tf[j].Count {
return tf[i].Term < tf[j].Term
func (tf *TermFacets) Terms() []*TermFacet {
return tf.termFacets
}
func (tf *TermFacets) TrimToTopN(n int) {
tf.termFacets = tf.termFacets[:n]
}
func (tf *TermFacets) Add(termFacets ...*TermFacet) {
for _, termFacet := range termFacets {
if tf.termLookup == nil {
tf.termLookup = map[string]*TermFacet{}
}
if term, ok := tf.termLookup[termFacet.Term]; ok {
term.Count += termFacet.Count
return
}
// if we got here it wasn't already in the existing terms
tf.termFacets = append(tf.termFacets, termFacet)
tf.termLookup[termFacet.Term] = termFacet
}
return tf[i].Count > tf[j].Count
}
func (tf *TermFacets) Len() int {
// Handle case where *TermFacets is not fully initialized in index_impl.go.init()
if tf == nil {
return 0
}
return len(tf.termFacets)
}
func (tf *TermFacets) Swap(i, j int) {
tf.termFacets[i], tf.termFacets[j] = tf.termFacets[j], tf.termFacets[i]
}
func (tf *TermFacets) Less(i, j int) bool {
if tf.termFacets[i].Count == tf.termFacets[j].Count {
return tf.termFacets[i].Term < tf.termFacets[j].Term
}
return tf.termFacets[i].Count > tf.termFacets[j].Count
}
// TermFacets used to be a type alias for []*TermFacet.
// To maintain backwards compatibility, we have to implement custom
// JSON marshalling.
func (tf *TermFacets) MarshalJSON() ([]byte, error) {
return json.Marshal(tf.termFacets)
}
func (tf *TermFacets) UnmarshalJSON(b []byte) error {
termFacets := []*TermFacet{}
err := json.Unmarshal(b, &termFacets)
if err != nil {
return err
}
for _, termFacet := range termFacets {
tf.Add(termFacet)
}
return nil
}
type NumericRangeFacet struct {
@ -246,7 +301,7 @@ type FacetResult struct {
Total int `json:"total"`
Missing int `json:"missing"`
Other int `json:"other"`
Terms TermFacets `json:"terms,omitempty"`
Terms *TermFacets `json:"terms,omitempty"`
NumericRanges NumericRangeFacets `json:"numeric_ranges,omitempty"`
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
@ -254,7 +309,7 @@ type FacetResult struct {
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
fr.Terms.Len()*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
@ -264,8 +319,8 @@ func (fr *FacetResult) Merge(other *FacetResult) {
fr.Missing += other.Missing
fr.Other += other.Other
if fr.Terms != nil && other.Terms != nil {
for _, term := range other.Terms {
fr.Terms = fr.Terms.Add(term)
for _, term := range other.Terms.termFacets {
fr.Terms.Add(term)
}
}
if fr.NumericRanges != nil && other.NumericRanges != nil {
@ -283,12 +338,12 @@ func (fr *FacetResult) Merge(other *FacetResult) {
func (fr *FacetResult) Fixup(size int) {
if fr.Terms != nil {
sort.Sort(fr.Terms)
if len(fr.Terms) > size {
moveToOther := fr.Terms[size:]
if fr.Terms.Len() > size {
moveToOther := fr.Terms.termFacets[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.Terms = fr.Terms[0:size]
fr.Terms.termFacets = fr.Terms.termFacets[0:size]
}
} else if fr.NumericRanges != nil {
sort.Sort(fr.NumericRanges)

View file

@ -123,9 +123,15 @@ OUTER:
// if there were no terms to highlight
// produce a single fragment from the beginning
start := 0
end := start + s.fragmentSize
if end > len(orig) {
end = len(orig)
end := start
used := 0
for end < len(orig) && used < s.fragmentSize {
r, size := utf8.DecodeRune(orig[end:])
if r == utf8.RuneError {
break
}
end += size
used++
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
}

View file

@ -0,0 +1,84 @@
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"net"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type IPRangeQuery struct {
CIDR string `json:"cidr, omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewIPRangeQuery(cidr string) *IPRangeQuery {
return &IPRangeQuery{
CIDR: cidr,
}
}
func (q *IPRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *IPRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *IPRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *IPRangeQuery) Field() string {
return q.FieldVal
}
func (q *IPRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
_, ipNet, err := net.ParseCIDR(q.CIDR)
if err != nil {
ip := net.ParseIP(q.CIDR)
if ip == nil {
return nil, err
}
// If we are searching for a specific ip rather than members of a network, just use a term search.
return searcher.NewTermSearcherBytes(i, ip.To16(), field, q.BoostVal.Value(), options)
}
return searcher.NewIPRangeSearcher(i, ipNet, field, q.BoostVal.Value(), options)
}
func (q *IPRangeQuery) Validate() error {
_, _, err := net.ParseCIDR(q.CIDR)
if err == nil {
return nil
}
// We also allow search for a specific IP.
ip := net.ParseIP(q.CIDR)
if ip != nil {
return nil // we have a valid ip
}
return fmt.Errorf("IPRangeQuery must be for an network or ip address, %q", q.CIDR)
}

View file

@ -248,8 +248,8 @@ func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
}
func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end on non-escaped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end number
l.nextTokenType = tNUMBER
l.nextToken = &yySymType{
@ -257,7 +257,13 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
}
logDebugTokens("NUMBER - '%s'", l.nextToken.s)
l.reset()
return startState, true
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
return inNumOrStrState, true
@ -287,7 +293,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
}
func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escped space, colon, tilde, boost (or eof)
// end on non-escaped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end string
l.nextTokenType = tSTRING

View file

@ -270,7 +270,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
if i == 0 || lastField != ftl.Field {
lastField = ftl.Field
if dm.Locations == nil {

View file

@ -0,0 +1,67 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"net"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// netLimits returns the lo and hi bounds inside the network.
func netLimits(n *net.IPNet) (lo net.IP, hi net.IP) {
ones, bits := n.Mask.Size()
netNum := n.IP
if bits == net.IPv4len*8 {
netNum = netNum.To16()
ones += 8 * (net.IPv6len - net.IPv4len)
}
mask := net.CIDRMask(ones, 8*net.IPv6len)
lo = make(net.IP, net.IPv6len)
hi = make(net.IP, net.IPv6len)
for i := 0; i < net.IPv6len; i++ {
lo[i] = netNum[i] & mask[i]
hi[i] = lo[i] | ^mask[i]
}
return lo, hi
}
func NewIPRangeSearcher(indexReader index.IndexReader, ipNet *net.IPNet,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
lo, hi := netLimits(ipNet)
fieldDict, err := indexReader.FieldDictRange(field, lo, hi)
if err != nil {
return nil, err
}
defer fieldDict.Close()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View file

@ -21,12 +21,13 @@ import (
"math"
"sort"
"strings"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
)
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var HighTerm = strings.Repeat(string(utf8.MaxRune), 3)
var LowTerm = string([]byte{0x00})
type SearchSort interface {