Fix several render issues (#14986)

* Fix an issue with panics related to attributes
* Wrap goldmark render in a recovery function
* Reduce memory use in render emoji
* Use a pipe for rendering goldmark - still needs more work and a limiter

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
zeripath 2021-03-15 23:20:05 +00:00 committed by GitHub
parent 044cd4d016
commit ed31ddc29a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 211 additions and 61 deletions

View file

@ -30,6 +30,9 @@ var (
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int
// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer
// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer
@ -49,6 +52,7 @@ func loadMap() {
// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)
// sort from largest to small so we match combined emoji first
@ -64,6 +68,7 @@ func loadMap() {
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
// setup aliases
for _, a := range e.Aliases {
@ -77,6 +82,7 @@ func loadMap() {
}
// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
return aliasReplacer.Replace(s)
}
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
}
n.pos += len(p)
return len(p), nil
}
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
}
n.pos += len(s)
return len(s), nil
}
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
found := make(map[int]int)
keys := make([]int, 0)
secondWriteWriter := rememberSecondWriteWriter{}
//see if there are any emoji in string before looking for position of specific ones
//no performance difference when there is a match but 10x faster when there are not
if s == ReplaceCodes(s) {
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
return nil
}
// get index of first emoji occurrence while also checking for longest combination
for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {
if _, ok := found[i]; !ok {
if len(keys) == 0 || i < keys[0] {
found[i] = j
keys = []int{i}
}
if i == 0 {
break
}
}
}
}
if len(keys) > 0 {
index := keys[0]
return []int{index, index + len(GemojiData[found[index]].Emoji)}
}
return nil
return []int{secondWriteWriter.idx, secondWriteWriter.end}
}

View file

@ -8,6 +8,8 @@ package emoji
import (
"reflect"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDumpInfo(t *testing.T) {
@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
}
}
}
func TestFindEmojiSubmatchIndex(t *testing.T) {
type testcase struct {
teststring string
expected []int
}
testcases := []testcase{
{
"\U0001f44d",
[]int{0, len("\U0001f44d")},
},
{
"\U0001f44d +1 \U0001f44d \U0001f37a",
[]int{0, 4},
},
{
" \U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
{
string([]byte{'\u0001'}) + "\U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
}
for _, kase := range testcases {
actual := FindEmojiSubmatchIndex(kase.teststring)
assert.Equal(t, kase.expected, actual)
}
}