git/blob: GetContentBase64 with fewer allocations and no goroutine (#8297)

See #8222 for context.i

`GetBlobContentBase64` was using a pipe and a goroutine to read the blob content as base64. This can be replace by a pre-allocated buffer and a direct copy.

Note that although similar to `GetBlobContent`, it does not truncate the content if the blob size is over the limit (but returns an error). I think that `GetBlobContent` should adopt the same behavior at some point (error instead of truncating).

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [x] I do not want this change to show in the release notes.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8297
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: oliverpool <git@olivier.pfad.fr>
Co-committed-by: oliverpool <git@olivier.pfad.fr>
This commit is contained in:
oliverpool 2025-06-27 11:22:10 +02:00 committed by Earl Warren
parent 184e068f37
commit 7ad20a2730
4 changed files with 57 additions and 31 deletions

View file

@ -8,6 +8,7 @@ import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"forgejo.org/modules/log"
@ -172,33 +173,43 @@ func (b *Blob) GetBlobContent(limit int64) (string, error) {
return string(buf), err
}
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string
func (b *Blob) GetBlobContentBase64() (string, error) {
dataRc, err := b.DataAsync()
type BlobTooLargeError struct {
Size, Limit int64
}
func (b BlobTooLargeError) Error() string {
return fmt.Sprintf("blob: content larger than limit (%d > %d)", b.Size, b.Limit)
}
// GetContentBase64 Reads the content of the blob and returns it as base64 encoded string.
// Returns [BlobTooLargeError] if the (unencoded) content is larger than the limit.
func (b *Blob) GetContentBase64(limit int64) (string, error) {
if b.Size() > limit {
return "", BlobTooLargeError{
Size: b.Size(),
Limit: limit,
}
}
rc, size, err := b.NewTruncatedReader(limit)
if err != nil {
return "", err
}
defer dataRc.Close()
defer rc.Close()
pr, pw := io.Pipe()
encoder := base64.NewEncoder(base64.StdEncoding, pw)
encoding := base64.StdEncoding
buf := bytes.NewBuffer(make([]byte, 0, encoding.EncodedLen(int(size))))
go func() {
_, err := io.Copy(encoder, dataRc)
_ = encoder.Close()
encoder := base64.NewEncoder(encoding, buf)
if err != nil {
_ = pw.CloseWithError(err)
} else {
_ = pw.Close()
}
}()
out, err := io.ReadAll(pr)
if err != nil {
if _, err := io.Copy(encoder, rc); err != nil {
return "", err
}
return string(out), nil
if err := encoder.Close(); err != nil {
return "", err
}
return buf.String(), nil
}
// GuessContentType guesses the content type of the blob.

View file

@ -63,6 +63,24 @@ func TestBlob(t *testing.T) {
require.Equal(t, "file2\n", r)
})
t.Run("GetContentBase64", func(t *testing.T) {
r, err := testBlob.GetContentBase64(100)
require.NoError(t, err)
require.Equal(t, "ZmlsZTIK", r)
r, err = testBlob.GetContentBase64(-1)
require.ErrorAs(t, err, &BlobTooLargeError{})
require.Empty(t, r)
r, err = testBlob.GetContentBase64(4)
require.ErrorAs(t, err, &BlobTooLargeError{})
require.Empty(t, r)
r, err = testBlob.GetContentBase64(6)
require.NoError(t, err)
require.Equal(t, "ZmlsZTIK", r)
})
t.Run("NewTruncatedReader", func(t *testing.T) {
// read fewer than available
rc, size, err := testBlob.NewTruncatedReader(100)

View file

@ -5,6 +5,7 @@ package repo
import (
"encoding/base64"
"errors"
"fmt"
"net/http"
"net/url"
@ -506,11 +507,8 @@ func findWikiRepoCommit(ctx *context.APIContext) (*git.Repository, *git.Commit)
// given tree entry, encoded with base64. Writes to ctx if an error occurs.
func wikiContentsByEntry(ctx *context.APIContext, entry *git.TreeEntry) string {
blob := entry.Blob()
if blob.Size() > setting.API.DefaultMaxBlobSize {
return ""
}
content, err := blob.GetBlobContentBase64()
if err != nil {
content, err := blob.GetContentBase64(setting.API.DefaultMaxBlobSize)
if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
ctx.Error(http.StatusInternalServerError, "GetBlobContentBase64", err)
return ""
}

View file

@ -5,6 +5,7 @@ package files
import (
"context"
"errors"
"fmt"
"net/url"
"path"
@ -273,13 +274,11 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
if err != nil {
return nil, err
}
content := ""
if gitBlob.Size() <= setting.API.DefaultMaxBlobSize {
content, err = gitBlob.GetBlobContentBase64()
if err != nil {
content, err := gitBlob.GetContentBase64(setting.API.DefaultMaxBlobSize)
if err != nil && !errors.As(err, &git.BlobTooLargeError{}) {
return nil, err
}
}
return &api.GitBlob{
SHA: gitBlob.ID.String(),
URL: repo.APIURL() + "/git/blobs/" + url.PathEscape(gitBlob.ID.String()),