git/blob: GetContentBase64 with fewer allocations and no goroutine (#8297)

See #8222 for context.i

`GetBlobContentBase64` was using a pipe and a goroutine to read the blob content as base64. This can be replace by a pre-allocated buffer and a direct copy.

Note that although similar to `GetBlobContent`, it does not truncate the content if the blob size is over the limit (but returns an error). I think that `GetBlobContent` should adopt the same behavior at some point (error instead of truncating).

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [x] I do not want this change to show in the release notes.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8297
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: oliverpool <git@olivier.pfad.fr>
Co-committed-by: oliverpool <git@olivier.pfad.fr>
This commit is contained in:
oliverpool 2025-06-27 11:22:10 +02:00 committed by Earl Warren
parent 184e068f37
commit 7ad20a2730
4 changed files with 57 additions and 31 deletions

View file

@ -8,6 +8,7 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"fmt"
"io" "io"
"forgejo.org/modules/log" "forgejo.org/modules/log"
@ -172,33 +173,43 @@ func (b *Blob) GetBlobContent(limit int64) (string, error) {
return string(buf), err return string(buf), err
} }
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string type BlobTooLargeError struct {
func (b *Blob) GetBlobContentBase64() (string, error) { Size, Limit int64
dataRc, err := b.DataAsync() }
func (b BlobTooLargeError) Error() string {
return fmt.Sprintf("blob: content larger than limit (%d > %d)", b.Size, b.Limit)
}
// GetContentBase64 Reads the content of the blob and returns it as base64 encoded string.
// Returns [BlobTooLargeError] if the (unencoded) content is larger than the limit.
func (b *Blob) GetContentBase64(limit int64) (string, error) {
if b.Size() > limit {
return "", BlobTooLargeError{
Size: b.Size(),
Limit: limit,
}
}
rc, size, err := b.NewTruncatedReader(limit)
if err != nil { if err != nil {
return "", err return "", err
} }
defer dataRc.Close() defer rc.Close()
pr, pw := io.Pipe() encoding := base64.StdEncoding
encoder := base64.NewEncoder(base64.StdEncoding, pw) buf := bytes.NewBuffer(make([]byte, 0, encoding.EncodedLen(int(size))))
go func() { encoder := base64.NewEncoder(encoding, buf)
_, err := io.Copy(encoder, dataRc)
_ = encoder.Close()
if err != nil { if _, err := io.Copy(encoder, rc); err != nil {
_ = pw.CloseWithError(err)
} else {
_ = pw.Close()
}
}()
out, err := io.ReadAll(pr)
if err != nil {
return "", err return "", err
} }
return string(out), nil if err := encoder.Close(); err != nil {
return "", err
}
return buf.String(), nil
} }
// GuessContentType guesses the content type of the blob. // GuessContentType guesses the content type of the blob.

View file

@ -63,6 +63,24 @@ func TestBlob(t *testing.T) {
require.Equal(t, "file2\n", r) require.Equal(t, "file2\n", r)
}) })
t.Run("GetContentBase64", func(t *testing.T) {
r, err := testBlob.GetContentBase64(100)
require.NoError(t, err)
require.Equal(t, "ZmlsZTIK", r)
r, err = testBlob.GetContentBase64(-1)
require.ErrorAs(t, err, &BlobTooLargeError{})
require.Empty(t, r)
r, err = testBlob.GetContentBase64(4)
require.ErrorAs(t, err, &BlobTooLargeError{})
require.Empty(t, r)
r, err = testBlob.GetContentBase64(6)
require.NoError(t, err)
require.Equal(t, "ZmlsZTIK", r)
})
t.Run("NewTruncatedReader", func(t *testing.T) { t.Run("NewTruncatedReader", func(t *testing.T) {
// read fewer than available // read fewer than available
rc, size, err := testBlob.NewTruncatedReader(100) rc, size, err := testBlob.NewTruncatedReader(100)

View file

@ -5,6 +5,7 @@ package repo
import ( import (
"encoding/base64" "encoding/base64"
"errors"
"fmt" "fmt"
"net/http" "net/http"
"net/url" "net/url"
@ -506,11 +507,8 @@ func findWikiRepoCommit(ctx *context.APIContext) (*git.Repository, *git.Commit)
// given tree entry, encoded with base64. Writes to ctx if an error occurs. // given tree entry, encoded with base64. Writes to ctx if an error occurs.
func wikiContentsByEntry(ctx *context.APIContext, entry *git.TreeEntry) string { func wikiContentsByEntry(ctx *context.APIContext, entry *git.TreeEntry) string {
blob := entry.Blob() blob := entry.Blob()
if blob.Size() > setting.API.DefaultMaxBlobSize { content, err := blob.GetContentBase64(setting.API.DefaultMaxBlobSize)
return "" if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
}
content, err := blob.GetBlobContentBase64()
if err != nil {
ctx.Error(http.StatusInternalServerError, "GetBlobContentBase64", err) ctx.Error(http.StatusInternalServerError, "GetBlobContentBase64", err)
return "" return ""
} }

View file

@ -5,6 +5,7 @@ package files
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"net/url" "net/url"
"path" "path"
@ -273,13 +274,11 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
if err != nil { if err != nil {
return nil, err return nil, err
} }
content := "" content, err := gitBlob.GetContentBase64(setting.API.DefaultMaxBlobSize)
if gitBlob.Size() <= setting.API.DefaultMaxBlobSize { if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
content, err = gitBlob.GetBlobContentBase64()
if err != nil {
return nil, err return nil, err
} }
}
return &api.GitBlob{ return &api.GitBlob{
SHA: gitBlob.ID.String(), SHA: gitBlob.ID.String(),
URL: repo.APIURL() + "/git/blobs/" + url.PathEscape(gitBlob.ID.String()), URL: repo.APIURL() + "/git/blobs/" + url.PathEscape(gitBlob.ID.String()),