Add Image Diff for SVG files (#14867)

* Added type sniffer.

* Switched content detection from base to typesniffer.

* Added GuessContentType to Blob.

* Moved image info logic to client.
Added support for SVG images in diff.

* Restore old blocked svg behaviour.

* Added missing image formats.

* Execute image diff only when container is visible.

* add margin to spinner

* improve BIN tag on image diffs

* Default to render view.

* Show image diff on incomplete diff.

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
KN4CK3R 2021-06-05 14:32:19 +02:00 committed by GitHub
parent 7979c3654e
commit 8e262104c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 449 additions and 441 deletions

View file

@ -0,0 +1,96 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package typesniffer
import (
"fmt"
"io"
"net/http"
"regexp"
"strings"
)
// Use at most this many bytes to determine Content Type.
const sniffLen = 1024
// SvgMimeType MIME type of SVG images.
const SvgMimeType = "image/svg+xml"
var svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
var svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
// SniffedType contains informations about a blobs type.
type SniffedType struct {
contentType string
}
// IsText etects if content format is plain text.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
}
// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
}
// IsSvgImage detects if data is an SVG image format
func (ct SniffedType) IsSvgImage() bool {
return strings.Contains(ct.contentType, SvgMimeType)
}
// IsPDF detects if data is a PDF format
func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
}
// IsVideo detects if data is an video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
}
// IsAudio detects if data is an video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
}
// IsRepresentableAsText returns true if file content can be represented as
// plain text or is empty.
func (ct SniffedType) IsRepresentableAsText() bool {
return ct.IsText() || ct.IsSvgImage()
}
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
return SniffedType{"text/unknown"}
}
ct := http.DetectContentType(data)
if len(data) > sniffLen {
data = data[:sniffLen]
}
if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
// SVG is unsupported. https://github.com/golang/go/issues/15888
ct = SvgMimeType
}
return SniffedType{ct}
}
// DetectContentTypeFromReader guesses the content type contained in the reader.
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
buf := make([]byte, sniffLen)
n, err := r.Read(buf)
if err != nil && err != io.EOF {
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
}
buf = buf[:n]
return DetectContentType(buf), nil
}

View file

@ -0,0 +1,97 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package typesniffer
import (
"bytes"
"encoding/base64"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
// Longer than sniffLen detects something else.
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
}
func TestIsTextFile(t *testing.T) {
assert.True(t, DetectContentType([]byte{}).IsText())
assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
}
func TestIsSvgImage(t *testing.T) {
assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte("<svg/>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Multiline
Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte{}).IsSvgImage())
assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
}
func TestIsPDF(t *testing.T) {
pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
assert.True(t, DetectContentType(pdf).IsPDF())
assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
}
func TestIsVideo(t *testing.T) {
mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
assert.True(t, DetectContentType(mp4).IsVideo())
assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
}
func TestIsAudio(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, DetectContentType(mp3).IsAudio())
assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
}
func TestDetectContentTypeFromReader(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
}