gitea/modules/git/gitcmd/command.go

// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2016 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package gitcmd

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"time"

	"code.gitea.io/gitea/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions
	"code.gitea.io/gitea/modules/gtprof"
	"code.gitea.io/gitea/modules/log"
	"code.gitea.io/gitea/modules/process"
	"code.gitea.io/gitea/modules/util"
)

// TrustedCmdArgs returns the trusted arguments for git command.
// It's mainly for passing user-provided and trusted arguments to git command
// In most cases, it shouldn't be used. Use AddXxx function instead
type TrustedCmdArgs []internal.CmdArg

// DefaultLocale is the default LC_ALL to run git commands in.
const DefaultLocale = "C"

// Command represents a command with its subcommands or arguments.
type Command struct {
	callerInfo string
	prog       string
	args       []string
	preErrors  []error
	configArgs []string
	opts       runOpts

	cmd *exec.Cmd

	cmdCtx       context.Context
	cmdCancel    process.CancelCauseFunc
	cmdFinished  process.FinishedFunc
	cmdStartTime time.Time

	cmdStdinWriter   *io.WriteCloser
	cmdStdoutReader  *io.ReadCloser
	cmdStderrReader  *io.ReadCloser
	cmdManagedStderr *bytes.Buffer
}

func logArgSanitize(arg string) string {
	if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
		return util.SanitizeCredentialURLs(arg)
	} else if filepath.IsAbs(arg) {
		base := filepath.Base(arg)
		dir := filepath.Dir(arg)
		return ".../" + filepath.Join(filepath.Base(dir), base)
	}
	return arg
}

func (c *Command) LogString() string {
	// WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space),
	// It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here.
	debugQuote := func(s string) string {
		if strings.ContainsAny(s, " `'\"\t\r\n") {
			return fmt.Sprintf("%q", s)
		}
		return s
	}
	a := make([]string, 0, len(c.args)+1)
	a = append(a, debugQuote(c.prog))
	for i := 0; i < len(c.args); i++ {
		a = append(a, debugQuote(logArgSanitize(c.args[i])))
	}
	return strings.Join(a, " ")
}

func (c *Command) ProcessState() string {
	if c.cmd == nil {
		return ""
	}
	return c.cmd.ProcessState.String()
}

// NewCommand creates and returns a new Git Command based on given command and arguments.
// Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead.
func NewCommand(args ...internal.CmdArg) *Command {
	cargs := make([]string, 0, len(args))
	for _, arg := range args {
		cargs = append(cargs, string(arg))
	}
	return &Command{
		prog: GitExecutable,
		args: cargs,
	}
}

func (c *Command) handlePreErrorBrokenCommand(arg string) {
	c.preErrors = append(c.preErrors, util.ErrorWrap(ErrBrokenCommand, `broken git command argument %q`, arg))
}

// isSafeArgumentValue checks if the argument is safe to be used as a value (not an option)
func isSafeArgumentValue(s string) bool {
	return s == "" || s[0] != '-'
}

// isValidArgumentOption checks if the argument is a valid option (starting with '-').
// It doesn't check whether the option is supported or not
func isValidArgumentOption(s string) bool {
	return s != "" && s[0] == '-'
}

// AddArguments adds new git arguments (option/value) to the command. It only accepts string literals, or trusted CmdArg.
// Type CmdArg is in the internal package, so it can not be used outside of this package directly,
// it makes sure that user-provided arguments won't cause RCE risks.
// User-provided arguments should be passed by other AddXxx functions
func (c *Command) AddArguments(args ...internal.CmdArg) *Command {
	for _, arg := range args {
		c.args = append(c.args, string(arg))
	}
	return c
}

// AddOptionValues adds a new option with a list of non-option values
// For example: AddOptionValues("--opt", val) means 2 arguments: {"--opt", val}.
// The values are treated as dynamic argument values. It equals to: AddArguments("--opt") then AddDynamicArguments(val).
func (c *Command) AddOptionValues(opt internal.CmdArg, args ...string) *Command {
	if !isValidArgumentOption(string(opt)) {
		c.handlePreErrorBrokenCommand(string(opt))
		return c
	}
	c.args = append(c.args, string(opt))
	c.AddDynamicArguments(args...)
	return c
}

// AddOptionFormat adds a new option with a format string and arguments
// For example: AddOptionFormat("--opt=%s %s", val1, val2) means 1 argument: {"--opt=val1 val2"}.
func (c *Command) AddOptionFormat(opt string, args ...any) *Command {
	if !isValidArgumentOption(opt) {
		c.handlePreErrorBrokenCommand(opt)
		return c
	}
	// a quick check to make sure the format string matches the number of arguments, to find low-level mistakes ASAP
	if strings.Count(strings.ReplaceAll(opt, "%%", ""), "%") != len(args) {
		c.handlePreErrorBrokenCommand(opt)
		return c
	}
	s := fmt.Sprintf(opt, args...)
	c.args = append(c.args, s)
	return c
}

// AddDynamicArguments adds new dynamic argument values to the command.
// The arguments may come from user input and can not be trusted, so no leading '-' is allowed to avoid passing options.
// TODO: in the future, this function can be renamed to AddArgumentValues
func (c *Command) AddDynamicArguments(args ...string) *Command {
	for _, arg := range args {
		if !isSafeArgumentValue(arg) {
			c.handlePreErrorBrokenCommand(arg)
		}
	}
	if len(c.preErrors) != 0 {
		return c
	}
	c.args = append(c.args, args...)
	return c
}

// AddDashesAndList adds the "--" and then add the list as arguments, it's usually for adding file list
// At the moment, this function can be only called once, maybe in future it can be refactored to support multiple calls (if necessary)
func (c *Command) AddDashesAndList(list ...string) *Command {
	c.args = append(c.args, "--")
	// Some old code also checks `arg != ""`, IMO it's not necessary.
	// If the check is needed, the list should be prepared before the call to this function
	c.args = append(c.args, list...)
	return c
}

func (c *Command) AddConfig(key, value string) *Command {
	kv := key + "=" + value
	if !isSafeArgumentValue(kv) {
		c.handlePreErrorBrokenCommand(kv)
	} else {
		c.configArgs = append(c.configArgs, "-c", kv)
	}
	return c
}

// ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs
// In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead
func ToTrustedCmdArgs(args []string) TrustedCmdArgs {
	ret := make(TrustedCmdArgs, len(args))
	for i, arg := range args {
		ret[i] = internal.CmdArg(arg)
	}
	return ret
}

type runOpts struct {
	Env     []string
	Timeout time.Duration

	// Dir is the working dir for the git command, however:
	// FIXME: this could be incorrect in many cases, for example:
	// * /some/path/.git
	// * /some/path/.git/gitea-data/data/repositories/user/repo.git
	// If "user/repo.git" is invalid/broken, then running git command in it will use "/some/path/.git", and produce unexpected results
	// The correct approach is to use `--git-dir" global argument
	Dir string

	Stdout io.Writer

	// Stdin is used for passing input to the command
	// The caller must make sure the Stdin writer is closed properly to finish the Run function.
	// Otherwise, the Run function may hang for long time or forever, especially when the Git's context deadline is not the same as the caller's.
	// Some common mistakes:
	// * `defer stdinWriter.Close()` then call `cmd.Run()`: the Run() would never return if the command is killed by timeout
	// * `go { case <- parentContext.Done(): stdinWriter.Close() }` with `cmd.Run(DefaultTimeout)`: the command would have been killed by timeout but the Run doesn't return until stdinWriter.Close()
	// * `go { if stdoutReader.Read() err != nil: stdinWriter.Close() }` with `cmd.Run()`: the stdoutReader may never return error if the command is killed by timeout
	// In the future, ideally the git module itself should have full control of the stdin, to avoid such problems and make it easier to refactor to a better architecture.
	// Use new functions like WithStdinWriter to avoid such problems.
	Stdin io.Reader

	PipelineFunc func(Context) error
}

func commonBaseEnvs() []string {
	envs := []string{
		// Make Gitea use internal git config only, to prevent conflicts with user's git config
		// It's better to use GIT_CONFIG_GLOBAL, but it requires git >= 2.32, so we still use HOME at the moment.
		"HOME=" + HomeDir(),
		// Avoid using system git config, it would cause problems (eg: use macOS osxkeychain to show a modal dialog, auto installing lfs hooks)
		// This might be a breaking change in 1.24, because some users said that they have put some configs like "receive.certNonceSeed" in "/etc/gitconfig"
		// For these users, they need to migrate the necessary configs to Gitea's git config file manually.
		"GIT_CONFIG_NOSYSTEM=1",
		// Ignore replace references (https://git-scm.com/docs/git-replace)
		"GIT_NO_REPLACE_OBJECTS=1",
	}

	// some environment variables should be passed to git command
	passThroughEnvKeys := []string{
		"GNUPGHOME", // git may call gnupg to do commit signing
	}
	for _, key := range passThroughEnvKeys {
		if val, ok := os.LookupEnv(key); ok {
			envs = append(envs, key+"="+val)
		}
	}
	return envs
}

// CommonGitCmdEnvs returns the common environment variables for a "git" command.
func CommonGitCmdEnvs() []string {
	return append(commonBaseEnvs(), []string{
		"LC_ALL=" + DefaultLocale,
		"GIT_TERMINAL_PROMPT=0", // avoid prompting for credentials interactively, supported since git v2.3
	}...)
}

// CommonCmdServEnvs is like CommonGitCmdEnvs, but it only returns minimal required environment variables for the "gitea serv" command
func CommonCmdServEnvs() []string {
	return commonBaseEnvs()
}

var ErrBrokenCommand = errors.New("git command is broken")

func (c *Command) WithDir(dir string) *Command {
	c.opts.Dir = dir
	return c
}

func (c *Command) WithEnv(env []string) *Command {
	c.opts.Env = env
	return c
}

func (c *Command) WithTimeout(timeout time.Duration) *Command {
	c.opts.Timeout = timeout
	return c
}

func (c *Command) WithStdoutReader(r *io.ReadCloser) *Command {
	c.cmdStdoutReader = r
	return c
}

// WithStdout is deprecated, use WithStdoutReader instead
func (c *Command) WithStdout(stdout io.Writer) *Command {
	c.opts.Stdout = stdout
	return c
}

func (c *Command) WithStderrReader(r *io.ReadCloser) *Command {
	c.cmdStderrReader = r
	return c
}

func (c *Command) WithStdinWriter(w *io.WriteCloser) *Command {
	c.cmdStdinWriter = w
	return c
}

// WithStdin is deprecated, use WithStdinWriter instead
func (c *Command) WithStdin(stdin io.Reader) *Command {
	c.opts.Stdin = stdin
	return c
}

func (c *Command) WithPipelineFunc(f func(Context) error) *Command {
	c.opts.PipelineFunc = f
	return c
}

// WithParentCallerInfo can be used to set the caller info (usually function name) of the parent function of the caller.
// For most cases, "Run" family functions can get its caller info automatically
// But if you need to call "Run" family functions in a wrapper function: "FeatureFunc -> GeneralWrapperFunc -> RunXxx",
// then you can to call this function in GeneralWrapperFunc to set the caller info of FeatureFunc.
// The caller info can only be set once.
func (c *Command) WithParentCallerInfo(optInfo ...string) *Command {
	if c.callerInfo != "" {
		return c
	}
	if len(optInfo) > 0 {
		c.callerInfo = optInfo[0]
		return c
	}
	skip := 1 /*parent "wrap/run" functions*/ + 1 /*this function*/
	callerFuncName := util.CallerFuncName(skip)
	callerInfo := callerFuncName
	if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 {
		callerInfo = callerInfo[pos+1:]
	}
	c.callerInfo = callerInfo
	return c
}

func (c *Command) Start(ctx context.Context) (retErr error) {
	if c.cmd != nil {
		// this is a programming error, it will cause serious deadlock problems, so it must be fixed.
		panic("git command has already been started")
	}

	defer func() {
		if retErr != nil {
			// release the pipes to avoid resource leak
			c.closeStdioPipes()
			// if error occurs, we must also finish the task, otherwise, cmdFinished will be called in "Wait" function
			if c.cmdFinished != nil {
				c.cmdFinished()
			}
		}
	}()

	if len(c.preErrors) != 0 {
		// In most cases, such error shouldn't happen. If it happens, it must be a programming error, so we log it as error level with more details
		err := errors.Join(c.preErrors...)
		log.Error("git command: %s, error: %s", c.LogString(), err)
		return err
	}

	cmdLogString := c.LogString()
	if c.callerInfo == "" {
		c.WithParentCallerInfo()
	}
	// these logs are for debugging purposes only, so no guarantee of correctness or stability
	desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", c.callerInfo, logArgSanitize(c.opts.Dir), cmdLogString)
	log.Debug("git.Command: %s", desc)

	_, span := gtprof.GetTracer().Start(ctx, gtprof.TraceSpanGitRun)
	defer span.End()
	span.SetAttributeString(gtprof.TraceAttrFuncCaller, c.callerInfo)
	span.SetAttributeString(gtprof.TraceAttrGitCommand, cmdLogString)

	if c.opts.Timeout <= 0 {
		c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContext(ctx, desc)
	} else {
		c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContextTimeout(ctx, c.opts.Timeout, desc)
	}

	c.cmdStartTime = time.Now()

	c.cmd = exec.CommandContext(ctx, c.prog, append(c.configArgs, c.args...)...)
	if c.opts.Env == nil {
		c.cmd.Env = os.Environ()
	} else {
		c.cmd.Env = c.opts.Env
	}

	process.SetSysProcAttribute(c.cmd)
	c.cmd.Env = append(c.cmd.Env, CommonGitCmdEnvs()...)
	c.cmd.Dir = c.opts.Dir
	c.cmd.Stdout = c.opts.Stdout
	c.cmd.Stdin = c.opts.Stdin

	if _, err := safeAssignPipe(c.cmdStdinWriter, c.cmd.StdinPipe); err != nil {
		return err
	}
	if _, err := safeAssignPipe(c.cmdStdoutReader, c.cmd.StdoutPipe); err != nil {
		return err
	}
	if _, err := safeAssignPipe(c.cmdStderrReader, c.cmd.StderrPipe); err != nil {
		return err
	}

	if c.cmdManagedStderr != nil {
		if c.cmd.Stderr != nil {
			panic("CombineStderr needs managed (but not caller-provided) stderr pipe")
		}
		c.cmd.Stderr = c.cmdManagedStderr
	}
	return c.cmd.Start()
}

func (c *Command) closeStdioPipes() {
	safeClosePtrCloser(c.cmdStdoutReader)
	safeClosePtrCloser(c.cmdStderrReader)
	safeClosePtrCloser(c.cmdStdinWriter)
}

func (c *Command) Wait() error {
	defer func() {
		c.closeStdioPipes()
		c.cmdFinished()
	}()

	if c.opts.PipelineFunc != nil {
		errCallback := c.opts.PipelineFunc(&cmdContext{Context: c.cmdCtx, cmd: c})
		// after the pipeline function returns, we can safely cancel the command context and close the stdio pipes
		c.cmdCancel(errCallback)
		c.closeStdioPipes()
		errWait := c.cmd.Wait()
		errCause := context.Cause(c.cmdCtx)
		// the pipeline function should be able to know whether it succeeds or fails
		if errCallback == nil && (errCause == nil || errors.Is(errCause, context.Canceled)) {
			return nil
		}
		return errors.Join(errCallback, errCause, errWait)
	}

	// there might be other goroutines using the context or pipes, so we just wait for the command to finish
	errWait := c.cmd.Wait()
	elapsed := time.Since(c.cmdStartTime)
	if elapsed > time.Second {
		log.Debug("slow git.Command.Run: %s (%s)", c, elapsed) // TODO: no need to log this for long-running commands
	}

	// Here the logic is different from "PipelineFunc" case,
	// because PipelineFunc can return error if it fails, it knows whether it succeeds or fails.
	// But in normal case, the caller just runs the git command, the command's exit code is the source of truth.
	// If the caller need to know whether the command error is caused by cancellation, it should check the "err" by itself.
	errCause := context.Cause(c.cmdCtx)
	return errors.Join(errCause, errWait)
}

func (c *Command) StartWithStderr(ctx context.Context) RunStdError {
	c.cmdManagedStderr = &bytes.Buffer{}
	err := c.Start(ctx)
	if err != nil {
		return &runStdError{err: err}
	}
	return nil
}

func (c *Command) WaitWithStderr() RunStdError {
	if c.cmdManagedStderr == nil {
		panic("CombineStderr needs managed (but not caller-provided) stderr pipe")
	}
	errWait := c.Wait()
	if errWait == nil {
		// if no exec error but only stderr output, the stderr output is still saved in "c.cmdManagedStderr" and can be read later
		return nil
	}
	return &runStdError{err: errWait, stderr: util.UnsafeBytesToString(c.cmdManagedStderr.Bytes())}
}

func (c *Command) RunWithStderr(ctx context.Context) RunStdError {
	if err := c.StartWithStderr(ctx); err != nil {
		return &runStdError{err: err}
	}
	return c.WaitWithStderr()
}

func (c *Command) Run(ctx context.Context) (err error) {
	if err = c.Start(ctx); err != nil {
		return err
	}
	return c.Wait()
}

// RunStdString runs the command and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdString(ctx context.Context) (stdout, stderr string, runErr RunStdError) {
	stdoutBytes, stderrBytes, runErr := c.WithParentCallerInfo().runStdBytes(ctx)
	return util.UnsafeBytesToString(stdoutBytes), util.UnsafeBytesToString(stderrBytes), runErr
}

// RunStdBytes runs the command and returns stdout/stderr as bytes. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdBytes(ctx context.Context) (stdout, stderr []byte, runErr RunStdError) {
	return c.WithParentCallerInfo().runStdBytes(ctx)
}

func (c *Command) runStdBytes(ctx context.Context) ([]byte, []byte, RunStdError) {
	if c.opts.Stdout != nil || c.cmdStdoutReader != nil || c.cmdStderrReader != nil {
		// we must panic here, otherwise there would be bugs if developers set Stdin/Stderr by mistake, and it would be very difficult to debug
		panic("stdout and stderr field must be nil when using RunStdBytes")
	}
	stdoutBuf := &bytes.Buffer{}
	err := c.WithParentCallerInfo().
		WithStdout(stdoutBuf).
		RunWithStderr(ctx)
	return stdoutBuf.Bytes(), c.cmdManagedStderr.Bytes(), err
}

func (c *Command) DebugKill() {
	_ = c.cmd.Process.Kill()
}