forgejo/modules/git/blob.go
Bojidar Marinov 1ed750a33a feat: detect Interlisp sources as text (#8377)
This PR detects Interlisp files (files that include "(DEFINE-FILE-INFO" somewhere near the start, and do not have an .LCOM extension) as text files and displays them as such in the web UI.
To check for extensions, I had to extend the `typesniffer.DetectContentType` function to accept an extra filename parameter—which could be useful for future filetype detection features. It is possible that a few of the places I modified pass a full file path instead of just passing a file name.

Implements #8184

## Checklist

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
  - [ ] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes... - NA
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [ ] I do not want this change to show in the release notes.
- [x] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

<!--start release-notes-assistant-->

## Release notes
<!--URL:https://codeberg.org/forgejo/forgejo-->
- Features
  - [PR](https://codeberg.org/forgejo/forgejo/pulls/8377): <!--number 8377 --><!--line 0 --><!--description ZGV0ZWN0IEludGVybGlzcCBzb3VyY2VzIGFzIHRleHQ=-->detect Interlisp sources as text<!--description-->
<!--end release-notes-assistant-->

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8377
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Bojidar Marinov <bojidar.marinov.bg@gmail.com>
Co-committed-by: Bojidar Marinov <bojidar.marinov.bg@gmail.com>
2025-07-02 07:38:46 +02:00

239 lines
5.3 KiB
Go

// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"forgejo.org/modules/log"
"forgejo.org/modules/typesniffer"
)
// Blob represents a Git object.
type Blob struct {
ID ObjectID
gotSize bool
size int64
name string
repo *Repository
}
func (b *Blob) newReader() (*bufio.Reader, int64, func(), error) {
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
return nil, 0, nil, err
}
_, err = wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, 0, nil, err
}
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, 0, nil, err
}
b.gotSize = true
b.size = size
return rd, size, cancel, err
}
// Size returns the uncompressed size of the blob
func (b *Blob) Size() int64 {
if b.gotSize {
return b.size
}
wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
defer cancel()
_, err = wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(rd)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
b.gotSize = true
return b.size
}
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
rd, size, cancel, err := b.newReader()
if err != nil {
return nil, err
}
if size < 4096 {
bs, err := io.ReadAll(io.LimitReader(rd, size))
defer cancel()
if err != nil {
return nil, err
}
_, err = rd.Discard(1)
return io.NopCloser(bytes.NewReader(bs)), err
}
return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil
}
type blobReader struct {
rd *bufio.Reader
n int64 // number of bytes to read
additionalDiscard int64 // additional number of bytes to discard
cancel func()
}
func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return n, err
}
// Close implements io.Closer
func (b *blobReader) Close() error {
if b.rd == nil {
return nil
}
defer b.cancel()
// discard the unread bytes, the truncated bytes and the trailing newline
if err := DiscardFull(b.rd, b.n+b.additionalDiscard+1); err != nil {
return err
}
b.rd = nil
return nil
}
// Name returns name of the tree entry this blob object was created from (or empty string)
func (b *Blob) Name() string {
return b.name
}
// NewTruncatedReader return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned)
func (b *Blob) NewTruncatedReader(limit int64) (rc io.ReadCloser, fullSize int64, err error) {
r, fullSize, cancel, err := b.newReader()
if err != nil {
return nil, fullSize, err
}
limit = min(limit, fullSize)
return &blobReader{
rd: r,
n: limit,
additionalDiscard: fullSize - limit,
cancel: cancel,
}, fullSize, nil
}
// GetBlobContent Gets the truncated content of the blob as raw text
func (b *Blob) GetBlobContent(limit int64) (string, error) {
if limit <= 0 {
return "", nil
}
rc, fullSize, err := b.NewTruncatedReader(limit)
if err != nil {
return "", err
}
defer rc.Close()
buf := make([]byte, min(fullSize, limit))
_, err = io.ReadFull(rc, buf)
return string(buf), err
}
type BlobTooLargeError struct {
Size, Limit int64
}
func (b BlobTooLargeError) Error() string {
return fmt.Sprintf("blob: content larger than limit (%d > %d)", b.Size, b.Limit)
}
// GetContentBase64 Reads the content of the blob and returns it as base64 encoded string.
// Returns [BlobTooLargeError] if the (unencoded) content is larger than the limit.
func (b *Blob) GetContentBase64(limit int64) (string, error) {
if b.Size() > limit {
return "", BlobTooLargeError{
Size: b.Size(),
Limit: limit,
}
}
rc, size, err := b.NewTruncatedReader(limit)
if err != nil {
return "", err
}
defer rc.Close()
encoding := base64.StdEncoding
buf := bytes.NewBuffer(make([]byte, 0, encoding.EncodedLen(int(size))))
encoder := base64.NewEncoder(encoding, buf)
if _, err := io.Copy(encoder, rc); err != nil {
return "", err
}
if err := encoder.Close(); err != nil {
return "", err
}
return buf.String(), nil
}
// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()
return typesniffer.DetectContentTypeFromReader(r, b.Name())
}
// GetBlob finds the blob object in the repository.
func (repo *Repository) GetBlob(idStr string) (*Blob, error) {
id, err := NewIDFromString(idStr)
if err != nil {
return nil, err
}
if id.IsZero() {
return nil, ErrNotExist{id.String(), ""}
}
return &Blob{
ID: id,
repo: repo,
}, nil
}