garm/vendor/github.com/lxc/lxd/shared/util_linux.go
Gabriel Adrian Samfira c61b7fd268
Update go modules
Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2023-03-12 16:22:37 +02:00

607 lines
18 KiB
Go

//go:build linux
package shared
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"reflect"
"strings"
"sync/atomic"
"unsafe"
"github.com/pkg/xattr"
"golang.org/x/sys/unix"
"github.com/lxc/lxd/lxd/revert"
"github.com/lxc/lxd/shared/logger"
"github.com/lxc/lxd/shared/units"
)
// --- pure Go functions ---
func GetFileStat(p string) (uid int, gid int, major uint32, minor uint32, inode uint64, nlink int, err error) {
var stat unix.Stat_t
err = unix.Lstat(p, &stat)
if err != nil {
return
}
uid = int(stat.Uid)
gid = int(stat.Gid)
inode = uint64(stat.Ino)
nlink = int(stat.Nlink)
if stat.Mode&unix.S_IFBLK != 0 || stat.Mode&unix.S_IFCHR != 0 {
major = unix.Major(uint64(stat.Rdev))
minor = unix.Minor(uint64(stat.Rdev))
}
return
}
// GetPathMode returns a os.FileMode for the provided path.
func GetPathMode(path string) (os.FileMode, error) {
fi, err := os.Stat(path)
if err != nil {
return os.FileMode(0000), err
}
mode, _, _ := GetOwnerMode(fi)
return mode, nil
}
func SetSize(fd int, width int, height int) (err error) {
var dimensions [4]uint16
dimensions[0] = uint16(height)
dimensions[1] = uint16(width)
_, _, errno := unix.Syscall6(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TIOCSWINSZ), uintptr(unsafe.Pointer(&dimensions)), 0, 0, 0)
if errno != 0 {
return errno
}
return nil
}
// GetAllXattr retrieves all extended attributes associated with a file, directory or symbolic link.
func GetAllXattr(path string) (map[string]string, error) {
xattrNames, err := xattr.LList(path)
if err != nil {
// Some filesystems don't support llistxattr() for various reasons.
// Interpret this as a set of no xattrs, instead of an error.
if errors.Is(err, unix.EOPNOTSUPP) {
return nil, nil
}
return nil, fmt.Errorf("Failed getting extended attributes from %q: %w", path, err)
}
var xattrs = make(map[string]string, len(xattrNames))
for _, xattrName := range xattrNames {
value, err := xattr.LGet(path, xattrName)
if err != nil {
return nil, fmt.Errorf("Failed getting %q extended attribute from %q: %w", xattrName, path, err)
}
xattrs[xattrName] = string(value)
}
return xattrs, nil
}
var ObjectFound = fmt.Errorf("Found requested object")
func LookupUUIDByBlockDevPath(diskDevice string) (string, error) {
uuid := ""
readUUID := func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if (info.Mode() & os.ModeSymlink) == os.ModeSymlink {
link, err := os.Readlink(path)
if err != nil {
return err
}
// filepath.Join() will call Clean() on the result and
// thus resolve those ugly "../../" parts that make it
// hard to compare the strings.
absPath := filepath.Join("/dev/disk/by-uuid", link)
if absPath == diskDevice {
uuid = path
// Will allows us to avoid needlessly travers
// the whole directory.
return ObjectFound
}
}
return nil
}
err := filepath.Walk("/dev/disk/by-uuid", readUUID)
if err != nil && err != ObjectFound {
return "", fmt.Errorf("Failed to detect UUID: %s", err)
}
if uuid == "" {
return "", fmt.Errorf("Failed to detect UUID")
}
lastSlash := strings.LastIndex(uuid, "/")
return uuid[lastSlash+1:], nil
}
// Detect whether err is an errno.
func GetErrno(err error) (errno error, iserrno bool) {
sysErr, ok := err.(*os.SyscallError)
if ok {
return sysErr.Err, true
}
pathErr, ok := err.(*os.PathError)
if ok {
return pathErr.Err, true
}
tmpErrno, ok := err.(unix.Errno)
if ok {
return tmpErrno, true
}
return nil, false
}
// Utsname returns the same info as unix.Utsname, as strings.
type Utsname struct {
Sysname string
Nodename string
Release string
Version string
Machine string
Domainname string
}
// Uname returns Utsname as strings.
func Uname() (*Utsname, error) {
/*
* Based on: https://groups.google.com/forum/#!topic/golang-nuts/Jel8Bb-YwX8
* there is really no better way to do this, which is
* unfortunate. Also, we ditch the more accepted CharsToString
* version in that thread, since it doesn't seem as portable,
* viz. github issue #206.
*/
uname := unix.Utsname{}
err := unix.Uname(&uname)
if err != nil {
return nil, err
}
return &Utsname{
Sysname: intArrayToString(uname.Sysname),
Nodename: intArrayToString(uname.Nodename),
Release: intArrayToString(uname.Release),
Version: intArrayToString(uname.Version),
Machine: intArrayToString(uname.Machine),
Domainname: intArrayToString(uname.Domainname),
}, nil
}
func intArrayToString(arr any) string {
slice := reflect.ValueOf(arr)
s := ""
for i := 0; i < slice.Len(); i++ {
val := slice.Index(i)
valInt := int64(-1)
switch val.Kind() {
case reflect.Int:
case reflect.Int8:
valInt = int64(val.Int())
case reflect.Uint:
case reflect.Uint8:
valInt = int64(val.Uint())
default:
continue
}
if valInt == 0 {
break
}
s += string(byte(valInt))
}
return s
}
func DeviceTotalMemory() (int64, error) {
return GetMeminfo("MemTotal")
}
func GetMeminfo(field string) (int64, error) {
// Open /proc/meminfo
f, err := os.Open("/proc/meminfo")
if err != nil {
return -1, err
}
defer func() { _ = f.Close() }()
// Read it line by line
scan := bufio.NewScanner(f)
for scan.Scan() {
line := scan.Text()
// We only care about MemTotal
if !strings.HasPrefix(line, field+":") {
continue
}
// Extract the before last (value) and last (unit) fields
fields := strings.Split(line, " ")
value := fields[len(fields)-2] + fields[len(fields)-1]
// Feed the result to units.ParseByteSizeString to get an int value
valueBytes, err := units.ParseByteSizeString(value)
if err != nil {
return -1, err
}
return valueBytes, nil
}
return -1, fmt.Errorf("Couldn't find %s", field)
}
// OpenPtyInDevpts creates a new PTS pair, configures them and returns them.
func OpenPtyInDevpts(devpts_fd int, uid, gid int64) (*os.File, *os.File, error) {
revert := revert.New()
defer revert.Fail()
var fd int
var ptx *os.File
var err error
// Create a PTS pair.
if devpts_fd >= 0 {
fd, err = unix.Openat(devpts_fd, "ptmx", unix.O_RDWR|unix.O_CLOEXEC|unix.O_NOCTTY, 0)
} else {
fd, err = unix.Openat(-1, "/dev/ptmx", unix.O_RDWR|unix.O_CLOEXEC|unix.O_NOCTTY, 0)
}
if err != nil {
return nil, nil, err
}
ptx = os.NewFile(uintptr(fd), "/dev/pts/ptmx")
revert.Add(func() { _ = ptx.Close() })
// Unlock the ptx and pty.
val := 0
_, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&val)))
if errno != 0 {
return nil, nil, unix.Errno(errno)
}
var pty *os.File
ptyFd, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCGPTPEER, uintptr(unix.O_NOCTTY|unix.O_CLOEXEC|os.O_RDWR))
// We can only fallback to looking up the fd in /dev/pts when we aren't dealing with the container's devpts instance.
if errno == 0 {
// Get the pty side.
id := 0
_, _, errno = unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCGPTN, uintptr(unsafe.Pointer(&id)))
if errno != 0 {
return nil, nil, unix.Errno(errno)
}
pty = os.NewFile(ptyFd, fmt.Sprintf("/dev/pts/%d", id))
} else {
if devpts_fd >= 0 {
return nil, nil, fmt.Errorf("TIOCGPTPEER required but not available")
}
// Get the pty side.
id := 0
_, _, errno = unix.Syscall(unix.SYS_IOCTL, uintptr(ptx.Fd()), unix.TIOCGPTN, uintptr(unsafe.Pointer(&id)))
if errno != 0 {
return nil, nil, unix.Errno(errno)
}
// Open the pty.
pty, err = os.OpenFile(fmt.Sprintf("/dev/pts/%d", id), unix.O_NOCTTY|unix.O_CLOEXEC|os.O_RDWR, 0)
if err != nil {
return nil, nil, err
}
}
revert.Add(func() { _ = pty.Close() })
// Configure both sides
for _, entry := range []*os.File{ptx, pty} {
// Get termios.
t, err := unix.IoctlGetTermios(int(entry.Fd()), unix.TCGETS)
if err != nil {
return nil, nil, err
}
// Set flags.
t.Cflag |= unix.IMAXBEL
t.Cflag |= unix.IUTF8
t.Cflag |= unix.BRKINT
t.Cflag |= unix.IXANY
t.Cflag |= unix.HUPCL
// Set termios.
err = unix.IoctlSetTermios(int(entry.Fd()), unix.TCSETS, t)
if err != nil {
return nil, nil, err
}
// Set the default window size.
sz := &unix.Winsize{
Col: 80,
Row: 25,
}
err = unix.IoctlSetWinsize(int(entry.Fd()), unix.TIOCSWINSZ, sz)
if err != nil {
return nil, nil, err
}
// Set CLOEXEC.
_, _, errno = unix.Syscall(unix.SYS_FCNTL, uintptr(entry.Fd()), unix.F_SETFD, unix.FD_CLOEXEC)
if errno != 0 {
return nil, nil, unix.Errno(errno)
}
}
// Fix the ownership of the pty side.
err = unix.Fchown(int(pty.Fd()), int(uid), int(gid))
if err != nil {
return nil, nil, err
}
revert.Success()
return ptx, pty, nil
}
// OpenPty creates a new PTS pair, configures them and returns them.
func OpenPty(uid, gid int64) (*os.File, *os.File, error) {
return OpenPtyInDevpts(-1, uid, gid)
}
// Extensively commented directly in the code. Please leave the comments!
// Looking at this in a couple of months noone will know why and how this works
// anymore.
func ExecReaderToChannel(r io.Reader, bufferSize int, exited <-chan struct{}, fd int) <-chan []byte {
if bufferSize <= (128 * 1024) {
bufferSize = (128 * 1024)
}
ch := make(chan ([]byte))
channelCtx, channelCancel := context.WithCancel(context.Background())
// [1]: This function has just one job: Dealing with the case where we
// are running an interactive shell session where we put a process in
// the background that does hold stdin/stdout open, but does not
// generate any output at all. This case cannot be dealt within the
// following function call. Here's why: Assume the above case, now the
// attached child (the shell in this example) exits. This will not
// generate any poll() event: We won't get POLLHUP because the
// background process is holding stdin/stdout open and no one is writing
// to it. So we effectively block on GetPollRevents() in the function
// below. Hence, we use another go routine here who's only job is to
// handle that case: When we detect that the child has exited we check
// whether a POLLIN or POLLHUP event has been generated. If not, we know
// that there's nothing buffered on stdout and exit.
var attachedChildIsDead int32 = 0
go func() {
<-exited
atomic.StoreInt32(&attachedChildIsDead, 1)
defer channelCancel()
ret, revents, err := GetPollRevents(fd, 0, (unix.POLLIN | unix.POLLPRI | unix.POLLERR | unix.POLLHUP | unix.POLLRDHUP | unix.POLLNVAL))
if ret < 0 {
logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) on file descriptor: %s", err)
// Something went wrong so let's exited otherwise we
// end up in an endless loop.
} else if ret > 0 {
if (revents & unix.POLLERR) > 0 {
logger.Warnf("Detected poll(POLLERR) event")
// Read end has likely been closed so again,
// avoid an endless loop.
} else if (revents & unix.POLLNVAL) > 0 {
logger.Debugf("Detected poll(POLLNVAL) event")
// Well, someone closed the fd haven't they? So
// let's go home.
}
} else if ret == 0 {
logger.Debugf("No data in stdout: exiting")
}
}()
go func() {
readSize := (128 * 1024)
offset := 0
buf := make([]byte, bufferSize)
avoidAtomicLoad := false
defer close(ch)
defer channelCancel()
for {
nr := 0
var err error
ret, revents, err := GetPollRevents(fd, -1, (unix.POLLIN | unix.POLLPRI | unix.POLLERR | unix.POLLHUP | unix.POLLRDHUP | unix.POLLNVAL))
if ret < 0 {
// This condition is only reached in cases where we are massively f*cked since we even handle
// EINTR in the underlying C wrapper around poll(). So let's exit here.
logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting", err)
return
}
// [2]: If the process exits before all its data has been read by us and no other process holds stdin or
// stdout open, then we will observe a (POLLHUP | POLLRDHUP | POLLIN) event. This means, we need to
// keep on reading from the pty file descriptor until we get a simple POLLHUP back.
both := ((revents & (unix.POLLIN | unix.POLLPRI)) > 0) && ((revents & (unix.POLLHUP | unix.POLLRDHUP)) > 0)
if both {
logger.Debugf("Detected poll(POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event")
read := buf[offset : offset+readSize]
nr, err = r.Read(read)
}
if (revents & unix.POLLERR) > 0 {
logger.Warnf("Detected poll(POLLERR) event: exiting")
return
} else if (revents & unix.POLLNVAL) > 0 {
logger.Warnf("Detected poll(POLLNVAL) event: exiting")
return
}
if ((revents & (unix.POLLIN | unix.POLLPRI)) > 0) && !both {
// This might appear unintuitive at first but is actually a nice trick: Assume we are running
// a shell session in a container and put a process in the background that is writing to
// stdout. Now assume the attached process (aka the shell in this example) exits because we
// used Ctrl+D to send EOF or something. If no other process would be holding stdout open we
// would expect to observe either a (POLLHUP | POLLRDHUP | POLLIN | POLLPRI) event if there
// is still data buffered from the previous process or a simple (POLLHUP | POLLRDHUP) if
// no data is buffered. The fact that we only observe a (POLLIN | POLLPRI) event means that
// another process is holding stdout open and is writing to it.
// One counter argument that can be leveraged is (brauner looks at tycho :))
// "Hey, you need to write at least one additional tty buffer to make sure that
// everything that the attached child has written is actually shown."
// The answer to that is:
// "This case can only happen if the process has exited and has left data in stdout which
// would generate a (POLLIN | POLLPRI | POLLHUP | POLLRDHUP) event and this case is already
// handled and triggers another codepath. (See [2].)"
if avoidAtomicLoad || atomic.LoadInt32(&attachedChildIsDead) == 1 {
avoidAtomicLoad = true
// Handle race between atomic.StorInt32() in the go routine
// explained in [1] and atomic.LoadInt32() in the go routine
// here:
// We need to check for (POLLHUP | POLLRDHUP) here again since we might
// still be handling a pure POLLIN event from a write prior to the childs
// exit. But the child might have exited right before and performed
// atomic.StoreInt32() to update attachedChildIsDead before we
// performed our atomic.LoadInt32(). This means we accidentally hit this
// codepath and are misinformed about the available poll() events. So we
// need to perform a non-blocking poll() again to exclude that case:
//
// - If we detect no (POLLHUP | POLLRDHUP) event we know the child
// has already exited but someone else is holding stdin/stdout open and
// writing to it.
// Note that his case should only ever be triggered in situations like
// running a shell and doing stuff like:
// > ./lxc exec xen1 -- bash
// root@xen1:~# yes &
// .
// .
// .
// now send Ctrl+D or type "exit". By the time the Ctrl+D/exit event is
// triggered, we will have read all of the childs data it has written to
// stdout and so we can assume that anything that comes now belongs to
// the process that is holding stdin/stdout open.
//
// - If we detect a (POLLHUP | POLLRDHUP) event we know that we've
// hit this codepath on accident caused by the race between
// atomic.StoreInt32() in the go routine explained in [1] and
// atomic.LoadInt32() in this go routine. So the next call to
// GetPollRevents() will either return
// (POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP)
// or (POLLHUP | POLLRDHUP). Both will trigger another codepath (See [2].)
// that takes care that all data of the child that is buffered in
// stdout is written out.
ret, revents, err := GetPollRevents(fd, 0, (unix.POLLIN | unix.POLLPRI | unix.POLLERR | unix.POLLHUP | unix.POLLRDHUP | unix.POLLNVAL))
if ret < 0 {
logger.Errorf("Failed to poll(POLLIN | POLLPRI | POLLERR | POLLHUP | POLLRDHUP) on file descriptor: %s. Exiting", err)
return
} else if (revents & (unix.POLLHUP | unix.POLLRDHUP | unix.POLLERR | unix.POLLNVAL)) == 0 {
logger.Debugf("Exiting but background processes are still running")
return
}
}
read := buf[offset : offset+readSize]
nr, err = r.Read(read)
}
// The attached process has exited and we have read all data that may have
// been buffered.
if ((revents & (unix.POLLHUP | unix.POLLRDHUP)) > 0) && !both {
logger.Debugf("Detected poll(POLLHUP) event: exiting")
return
}
// Check if channel is closed before potentially writing to it below.
if channelCtx.Err() != nil {
logger.Debug("Detected closed channel: exiting")
return
}
offset += nr
if offset > 0 && (offset+readSize >= bufferSize || err != nil) {
ch <- buf[0:offset]
offset = 0
buf = make([]byte, bufferSize)
}
}
}()
return ch
}
// GetPollRevents poll for events on provided fd.
func GetPollRevents(fd int, timeout int, flags int) (int, int, error) {
pollFd := unix.PollFd{
Fd: int32(fd),
Events: int16(flags),
Revents: 0,
}
pollFds := []unix.PollFd{pollFd}
again:
n, err := unix.Poll(pollFds, timeout)
if err != nil {
if err == unix.EAGAIN || err == unix.EINTR {
goto again
}
return -1, -1, err
}
return n, int(pollFds[0].Revents), err
}
// ExitStatus extracts the exit status from the error returned by exec.Cmd.
// If a nil err is provided then an exit status of 0 is returned along with the nil error.
// If a valid exit status can be extracted from err then it is returned along with a nil error.
// If no valid exit status can be extracted then a -1 exit status is returned along with the err provided.
func ExitStatus(err error) (int, error) {
if err == nil {
return 0, err // No error exit status.
}
var exitErr *exec.ExitError
// Detect and extract ExitError to check the embedded exit status.
if errors.As(err, &exitErr) {
// If the process was signaled, extract the signal.
status, isWaitStatus := exitErr.Sys().(unix.WaitStatus)
if isWaitStatus && status.Signaled() {
return 128 + int(status.Signal()), nil // 128 + n == Fatal error signal "n"
}
// Otherwise capture the exit status from the command.
return exitErr.ExitCode(), nil
}
return -1, err // Not able to extract an exit status.
}