diff --git a/pkg/debugcmd/checks/check.go b/pkg/debugcmd/checks/check.go new file mode 100644 index 00000000..3ce8974b --- /dev/null +++ b/pkg/debugcmd/checks/check.go @@ -0,0 +1,62 @@ +package checks + +import ( + "context" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +type Checker interface { + Name() string + Run(ctx context.Context, data *CheckData) []HealthCheck +} + +type HealthCheck struct { + Name string `json:"name"` + OK bool `json:"ok"` + Message string `json:"message,omitempty"` + Evidence map[string]interface{} `json:"evidence,omitempty"` +} + +type CheckData struct { + Twin uint32 + Contract uint64 + Workload gridtypes.Workload + VM func(ctx context.Context, id string) bool + Network func(ctx context.Context, id zos.NetID) string +} + +func success(name, message string, evidence map[string]interface{}) HealthCheck { + if evidence == nil { + evidence = make(map[string]interface{}) + } + return HealthCheck{Name: name, OK: true, Message: message, Evidence: evidence} +} + +func failure(name, message string, evidence map[string]interface{}) HealthCheck { + if evidence == nil { + evidence = make(map[string]interface{}) + } + return HealthCheck{Name: name, OK: false, Message: message, Evidence: evidence} +} + +func IsHealthy(checks []HealthCheck) bool { + for _, check := range checks { + if !check.OK { + return false + } + } + return true +} + +func Run(ctx context.Context, workloadType gridtypes.WorkloadType, data *CheckData) []HealthCheck { + switch workloadType { + case zos.NetworkType, zos.NetworkLightType: + return NetworkCheckerInstance.Run(ctx, data) + case zos.ZMachineType, zos.ZMachineLightType: + return VMCheckerInstance.Run(ctx, data) + default: + return nil + } +} diff --git a/pkg/debugcmd/checks/network.go b/pkg/debugcmd/checks/network.go new file mode 100644 index 00000000..eeb55309 --- /dev/null +++ b/pkg/debugcmd/checks/network.go @@ -0,0 +1,140 @@ +package checks + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + cnins "github.com/containernetworking/plugins/pkg/ns" + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" + "github.com/threefoldtech/zosbase/pkg/network/namespace" + "github.com/threefoldtech/zosbase/pkg/network/nr" + "github.com/threefoldtech/zosbase/pkg/versioned" + "github.com/threefoldtech/zosbase/pkg/zinit" + "github.com/vishvananda/netlink" +) + +const ( + networkdVolatileDir = "/var/run/cache/networkd" + networksDir = "networks" + myceliumKeyDir = "mycelium-key" +) + +type NetworkChecker struct { + netID zos.NetID + nsName string + netCfgPath string + nrr *nr.NetResource +} + +func (nc *NetworkChecker) Name() string { return "network" } + +func (nc *NetworkChecker) Run(ctx context.Context, data *CheckData) []HealthCheck { + netID := zos.NetworkID(data.Twin, data.Workload.Name) + nc.netID = netID + nc.nsName = data.Network(ctx, netID) + nc.netCfgPath = filepath.Join(networkdVolatileDir, networksDir, netID.String()) + nc.nrr = nr.New(pkg.Network{NetID: netID}, filepath.Join(networkdVolatileDir, myceliumKeyDir)) + + return []HealthCheck{ + nc.checkConfig(), + nc.checkNamespace(), + nc.checkInterfaces(), + nc.checkBridge(), + nc.checkMycelium(), + } +} + +func (nc *NetworkChecker) checkConfig() HealthCheck { + _, raw, err := versioned.ReadFile(nc.netCfgPath) + if err != nil { + return failure("network.config", fmt.Sprintf("config file not found: %v", err), map[string]interface{}{"path": nc.netCfgPath, "netid": nc.netID.String()}) + } + + var netCfg pkg.Network + if err := json.Unmarshal(raw, &netCfg); err != nil { + return failure("network.config", fmt.Sprintf("config file invalid: %v", err), map[string]interface{}{"path": nc.netCfgPath, "netid": nc.netID.String()}) + } + + if netCfg.NetID != nc.netID { + return failure("network.config", fmt.Sprintf("netid mismatch: expected %s, got %s", nc.netID.String(), netCfg.NetID.String()), map[string]interface{}{"expected": nc.netID.String(), "got": netCfg.NetID.String()}) + } + + return success("network.config", "config valid", map[string]interface{}{"path": nc.netCfgPath, "netid": nc.netID.String()}) +} + +func (nc *NetworkChecker) checkNamespace() HealthCheck { + if !namespace.Exists(nc.nsName) { + return failure("network.namespace", "namespace not found", map[string]interface{}{"namespace": nc.nsName}) + } + return success("network.namespace", "namespace exists", map[string]interface{}{"namespace": nc.nsName}) +} + +func (nc *NetworkChecker) checkInterfaces() HealthCheck { + wgIface, _ := nc.nrr.WGName() + nrIface, _ := nc.nrr.NRIface() + pubIface := "public" + + netnsLinks := map[string]struct{}{} + if netNS, err := namespace.GetByName(nc.nsName); err == nil { + _ = netNS.Do(func(_ cnins.NetNS) error { + links, err := netlink.LinkList() + if err == nil { + for _, l := range links { + netnsLinks[l.Attrs().Name] = struct{}{} + } + } + return nil + }) + netNS.Close() + } + + missing := []string{} + for _, iface := range []string{wgIface, nrIface, pubIface} { + if _, ok := netnsLinks[iface]; !ok { + missing = append(missing, iface) + } + } + + if len(missing) > 0 { + return failure("network.interfaces", fmt.Sprintf("missing interfaces: %v", missing), map[string]interface{}{"namespace": nc.nsName, "missing": missing}) + } + + return success("network.interfaces", "all required interfaces present", map[string]interface{}{"namespace": nc.nsName}) +} + +func (nc *NetworkChecker) checkBridge() HealthCheck { + brName, _ := nc.nrr.BridgeName() + brPath := filepath.Join("/sys/class/net", brName) + + if _, err := os.Stat(brPath); err != nil { + return failure("network.bridge", fmt.Sprintf("bridge not found: %v", err), map[string]interface{}{"bridge": brName}) + } + + brifDir := filepath.Join(brPath, "brif") + ents, err := os.ReadDir(brifDir) + if err != nil || len(ents) == 0 { + return failure("network.bridge", fmt.Sprintf("bridge has no members: %v", err), map[string]interface{}{"bridge": brName}) + } + + return success("network.bridge", "bridge has members", map[string]interface{}{"bridge": brName}) +} + +func (nc *NetworkChecker) checkMycelium() HealthCheck { + service := nc.nrr.MyceliumServiceName() + st, err := zinit.Default().Status(service) + if err != nil { + return failure("network.mycelium", fmt.Sprintf("cannot get service status: %v", err), map[string]interface{}{"service": service}) + } + + if !st.State.Is(zinit.ServiceStateRunning) { + return failure("network.mycelium", fmt.Sprintf("service not running: %s", st.State.String()), map[string]interface{}{"service": service, "state": st.State.String()}) + } + + return success("network.mycelium", "service running", map[string]interface{}{"service": service, "pid": st.Pid}) +} + +var NetworkCheckerInstance = &NetworkChecker{} diff --git a/pkg/debugcmd/checks/system.go b/pkg/debugcmd/checks/system.go new file mode 100644 index 00000000..3020fa6b --- /dev/null +++ b/pkg/debugcmd/checks/system.go @@ -0,0 +1,46 @@ +package checks + +import ( + "context" + "fmt" + "os/exec" + "strings" + "time" +) + +const systemProbeTimeout = 60 * time.Second + +type SystemChecker struct { + command string +} + +func (sc *SystemChecker) Name() string { return "system" } + +func (sc *SystemChecker) Run(ctx context.Context, data *CheckData) []HealthCheck { + if sc.command == "" { + return nil + } + + parts := strings.Fields(sc.command) + if len(parts) == 0 { + return []HealthCheck{failure("system.probe", "empty probe command", nil)} + } + + probeCtx, cancel := context.WithTimeout(ctx, systemProbeTimeout) + defer cancel() + + cmd := exec.CommandContext(probeCtx, parts[0], parts[1:]...) + output, err := cmd.CombinedOutput() + if err != nil { + return []HealthCheck{failure("system.probe", fmt.Sprintf("probe failed: %v", err), map[string]interface{}{"error": err.Error()})} + } + + return []HealthCheck{success("system.probe", "probe executed successfully", map[string]interface{}{ + "output": string(output), + "exit_code": cmd.ProcessState.ExitCode(), + })} +} + +func NewSystemChecker(command string) *SystemChecker { + return &SystemChecker{command: command} +} diff --git a/pkg/debugcmd/checks/vm.go b/pkg/debugcmd/checks/vm.go new file mode 100644 index 00000000..1ea94500 --- /dev/null +++ b/pkg/debugcmd/checks/vm.go @@ -0,0 +1,120 @@ +package checks + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/vm" +) + +const vmdVolatileDir = "/var/run/cache/vmd" + +type VMChecker struct { + workloadID gridtypes.WorkloadID + vmID string + cfgPath string + machine *vm.Machine + vmExists func(ctx context.Context, id string) bool +} + +func (vc *VMChecker) Name() string { return "vm" } + +func (vc *VMChecker) Run(ctx context.Context, data *CheckData) []HealthCheck { + workloadID, err := gridtypes.NewWorkloadID(data.Twin, data.Contract, data.Workload.Name) + if err != nil { + return []HealthCheck{failure("vm.init", fmt.Sprintf("invalid workload ID: %v", err), nil)} + } + + vc.workloadID = workloadID + vc.vmID = workloadID.String() + vc.cfgPath = filepath.Join(vmdVolatileDir, workloadID.String()) + vc.vmExists = data.VM + + return []HealthCheck{ + vc.checkConfig(), + vc.checkVMD(ctx), + vc.checkProcess(), + vc.checkDisks(), + vc.checkVirtioFS(), + } +} + +func (vc *VMChecker) loadMachine() (*vm.Machine, error) { + if vc.machine != nil { + return vc.machine, nil + } + machine, err := vm.MachineFromFile(vc.cfgPath) + if err != nil { + return nil, err + } + vc.machine = machine + return machine, nil +} + +func (vc *VMChecker) checkConfig() HealthCheck { + if _, err := os.Stat(vc.cfgPath); err != nil { + return failure("vm.config", fmt.Sprintf("config file not found: %v", err), map[string]interface{}{"path": vc.cfgPath}) + } + if _, err := vm.MachineFromFile(vc.cfgPath); err != nil { + return failure("vm.config", fmt.Sprintf("config file invalid: %v", err), map[string]interface{}{"path": vc.cfgPath}) + } + return success("vm.config", "config valid", map[string]interface{}{"path": vc.cfgPath, "vm_id": vc.vmID}) +} + +func (vc *VMChecker) checkVMD(ctx context.Context) HealthCheck { + if !vc.vmExists(ctx, vc.vmID) { + return failure("vm.vmd", "vmd reports VM does not exist", map[string]interface{}{"vm_id": vc.vmID}) + } + return success("vm.vmd", "vmd reports VM exists", map[string]interface{}{"vm_id": vc.vmID}) +} + +func (vc *VMChecker) checkProcess() HealthCheck { + ps, err := vm.Find(vc.vmID) + if err != nil { + return failure("vm.process", fmt.Sprintf("process not found: %v", err), map[string]interface{}{"vm_id": vc.vmID}) + } + return success("vm.process", "process running", map[string]interface{}{"vm_id": vc.vmID, "pid": ps.Pid}) +} + +func (vc *VMChecker) checkDisks() HealthCheck { + machine, err := vc.loadMachine() + if err != nil { + return failure("vm.disks", "config not available", map[string]interface{}{"vm_id": vc.vmID}) + } + + for _, disk := range machine.Disks { + if disk.Path == "" { + continue + } + if _, err := os.Stat(disk.Path); err != nil { + return failure("vm.disks", fmt.Sprintf("disk missing: %s", disk.Path), map[string]interface{}{"path": disk.Path, "vm_id": vc.vmID}) + } + } + + // TODO: check for files on disks? + + return success("vm.disks", "all disks valid", map[string]interface{}{"vm_id": vc.vmID}) +} + +func (vc *VMChecker) checkVirtioFS() HealthCheck { + machine, err := vc.loadMachine() + if err != nil { + return failure("vm.virtiofs", fmt.Sprintf("config unavailable: %v", err), map[string]interface{}{"vm_id": vc.vmID}) + } + + for i := range machine.FS { + sock := filepath.Join("/var/run", fmt.Sprintf("virtio-%s-%d.socket", vc.vmID, i)) + if _, err := os.Stat(sock); err != nil { + return failure("vm.virtiofs", fmt.Sprintf("socket missing: %s", sock), map[string]interface{}{"socket": sock, "vm_id": vc.vmID}) + } + } + + return success("vm.virtiofs", "all virtiofs sockets present", map[string]interface{}{"vm_id": vc.vmID}) +} + +// TODO: add cloud-console check + +var VMCheckerInstance = &VMChecker{} diff --git a/pkg/debugcmd/deps.go b/pkg/debugcmd/deps.go new file mode 100644 index 00000000..0d62a252 --- /dev/null +++ b/pkg/debugcmd/deps.go @@ -0,0 +1,64 @@ +package debugcmd + +import ( + "context" + "fmt" + "strconv" + "strings" + + "github.com/threefoldtech/zosbase/pkg" + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +// Provision is the subset of the provision zbus interface used by debug commands. +type Provision interface { + ListTwins(ctx context.Context) ([]uint32, error) + List(ctx context.Context, twin uint32) ([]gridtypes.Deployment, error) + Get(ctx context.Context, twin uint32, contract uint64) (gridtypes.Deployment, error) + Changes(ctx context.Context, twin uint32, contract uint64) ([]gridtypes.Workload, error) +} + +// VM is the subset of the vmd zbus interface used by debug commands. +type VM interface { + Exists(ctx context.Context, id string) bool + Inspect(ctx context.Context, id string) (pkg.VMInfo, error) + Logs(ctx context.Context, id string) (string, error) + LogsFull(ctx context.Context, id string) (string, error) +} + +// Network is the subset of the network zbus interface used by debug commands. +type Network interface { + Namespace(ctx context.Context, id zos.NetID) string +} + +type Deps struct { + Provision Provision + VM VM + Network Network +} + +// ParseDeploymentID parses a deployment identifier in the format "twin-id:contract-id" +// and returns the twin ID and contract ID. +func ParseDeploymentID(deploymentStr string) (uint32, uint64, error) { + if deploymentStr == "" { + return 0, 0, fmt.Errorf("deployment identifier is required") + } + + parts := strings.Split(deploymentStr, ":") + if len(parts) != 2 { + return 0, 0, fmt.Errorf("invalid deployment format: expected 'twin-id:contract-id', got '%s'", deploymentStr) + } + + twinID, err := strconv.ParseUint(parts[0], 10, 32) + if err != nil { + return 0, 0, fmt.Errorf("invalid twin ID: %s: %w", parts[0], err) + } + + contractID, err := strconv.ParseUint(parts[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("invalid contract ID: %s: %w", parts[1], err) + } + + return uint32(twinID), contractID, nil +} diff --git a/pkg/debugcmd/get.go b/pkg/debugcmd/get.go new file mode 100644 index 00000000..0146946f --- /dev/null +++ b/pkg/debugcmd/get.go @@ -0,0 +1,39 @@ +package debugcmd + +import ( + "context" + "encoding/json" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type GetRequest struct { + Deployment string `json:"deployment"` // Format: "twin-id:contract-id" +} + +type GetResponse struct { + Deployment gridtypes.Deployment `json:"deployment"` +} + +func ParseGetRequest(payload []byte) (GetRequest, error) { + var req GetRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func Get(ctx context.Context, deps Deps, req GetRequest) (GetResponse, error) { + twinID, contractID, err := ParseDeploymentID(req.Deployment) + if err != nil { + return GetResponse{}, err + } + + // TODO: only return active deployment. should return all + deployment, err := deps.Provision.Get(ctx, twinID, contractID) + if err != nil { + return GetResponse{}, err + } + + return GetResponse{Deployment: deployment}, nil +} diff --git a/pkg/debugcmd/health.go b/pkg/debugcmd/health.go new file mode 100644 index 00000000..8dc337ba --- /dev/null +++ b/pkg/debugcmd/health.go @@ -0,0 +1,118 @@ +package debugcmd + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/threefoldtech/zosbase/pkg/debugcmd/checks" + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type HealthRequest struct { + Deployment string `json:"deployment"` + Options map[string]interface{} `json:"options,omitempty"` +} + +type HealthStatus string + +const ( + HealthHealthy HealthStatus = "healthy" + HealthUnhealthy HealthStatus = "unhealthy" +) + +type WorkloadHealth struct { + WorkloadID string `json:"workload_id"` + Type string `json:"type"` + Name string `json:"name"` + Status HealthStatus `json:"status"` + Checks []checks.HealthCheck `json:"checks"` +} + +type HealthResponse struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + Workloads []WorkloadHealth `json:"workloads"` +} + +func ParseHealthRequest(payload []byte) (HealthRequest, error) { + var req HealthRequest + return req, json.Unmarshal(payload, &req) +} + +func Health(ctx context.Context, deps Deps, req HealthRequest) (HealthResponse, error) { + var twinID uint32 + var contractID uint64 + var err error + + hasSystemProbe := req.Options != nil && req.Options["system_probe"] != nil + + if req.Deployment != "" { + twinID, contractID, err = ParseDeploymentID(req.Deployment) + if err != nil { + return HealthResponse{}, err + } + } else if !hasSystemProbe { + return HealthResponse{}, fmt.Errorf("deployment is required when system_probe is not specified") + } + + out := HealthResponse{TwinID: twinID, ContractID: contractID} + + if hasSystemProbe { + if probeCmd, ok := req.Options["system_probe"].(string); ok && probeCmd != "" { + checkData := &checks.CheckData{Twin: twinID, Contract: contractID} + allChecks := checks.NewSystemChecker(probeCmd).Run(ctx, checkData) + if len(allChecks) > 0 { + out.Workloads = append(out.Workloads, newWorkloadHealth("system", "diagnostic", "system.probe", allChecks)) + } + } + } + + if req.Deployment != "" { + deployment, err := deps.Provision.Get(ctx, twinID, contractID) + if err != nil { + return HealthResponse{}, fmt.Errorf("failed to get deployment: %w", err) + } + + for _, wl := range deployment.Workloads { + workloadID, err := gridtypes.NewWorkloadID(twinID, contractID, wl.Name) + if err != nil { + continue + } + + checkData := &checks.CheckData{ + Network: deps.Network.Namespace, + VM: deps.VM.Exists, + Twin: twinID, + Contract: contractID, + Workload: wl, + } + + allChecks := checks.Run(ctx, wl.Type, checkData) + if len(allChecks) > 0 { + out.Workloads = append(out.Workloads, newWorkloadHealth( + workloadID.String(), + string(wl.Type), + string(wl.Name), + allChecks, + )) + } + } + } + + return out, nil +} + +func newWorkloadHealth(workloadID, workloadType, name string, allChecks []checks.HealthCheck) WorkloadHealth { + status := HealthUnhealthy + if checks.IsHealthy(allChecks) { + status = HealthHealthy + } + return WorkloadHealth{ + WorkloadID: workloadID, + Type: workloadType, + Name: name, + Status: status, + Checks: allChecks, + } +} diff --git a/pkg/debugcmd/history.go b/pkg/debugcmd/history.go new file mode 100644 index 00000000..090d8850 --- /dev/null +++ b/pkg/debugcmd/history.go @@ -0,0 +1,64 @@ +package debugcmd + +import ( + "context" + "encoding/json" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" +) + +type HistoryRequest struct { + Deployment string `json:"deployment"` // Format: "twin-id:contract-id" +} + +type WorkloadTransaction struct { + Seq int `json:"seq"` + Type string `json:"type"` + Name string `json:"name"` + Created gridtypes.Timestamp `json:"created"` + State gridtypes.ResultState `json:"state"` + Message string `json:"message"` +} + +type HistoryResponse struct { + Deployment string `json:"deployment"` + History []WorkloadTransaction `json:"history"` +} + +func ParseHistoryRequest(payload []byte) (HistoryRequest, error) { + var req HistoryRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func History(ctx context.Context, deps Deps, req HistoryRequest) (HistoryResponse, error) { + twinID, contractID, err := ParseDeploymentID(req.Deployment) + if err != nil { + return HistoryResponse{}, err + } + + // TODO: only return history for active deployment. + history, err := deps.Provision.Changes(ctx, twinID, contractID) + if err != nil { + return HistoryResponse{}, err + } + + transactions := make([]WorkloadTransaction, 0, len(history)) + for idx, wl := range history { + transactions = append(transactions, WorkloadTransaction{ + Seq: idx + 1, + Type: string(wl.Type), + Name: string(wl.Name), + Created: wl.Result.Created, + State: wl.Result.State, + Message: wl.Result.Error, + }) + } + + return HistoryResponse{ + Deployment: req.Deployment, + History: transactions, + }, nil +} diff --git a/pkg/debugcmd/info.go b/pkg/debugcmd/info.go new file mode 100644 index 00000000..06c0fcc9 --- /dev/null +++ b/pkg/debugcmd/info.go @@ -0,0 +1,114 @@ +package debugcmd + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/threefoldtech/zosbase/pkg/gridtypes" + "github.com/threefoldtech/zosbase/pkg/gridtypes/zos" +) + +type InfoRequest struct { + Deployment string `json:"deployment"` // Format: "twin-id:contract-id" + Workload string `json:"workload"` // Workload name + Verbose bool `json:"verbose"` // If true, return full logs +} + +type InfoResponse struct { + WorkloadID string `json:"workload_id"` + Type string `json:"type"` + Name string `json:"name"` + Info interface{} `json:"info,omitempty"` + Logs string `json:"logs,omitempty"` +} + +func ParseInfoRequest(payload []byte) (InfoRequest, error) { + var req InfoRequest + if err := json.Unmarshal(payload, &req); err != nil { + return req, err + } + return req, nil +} + +func Info(ctx context.Context, deps Deps, req InfoRequest) (InfoResponse, error) { + if req.Workload == "" { + return InfoResponse{}, fmt.Errorf("workload name is required") + } + + twinID, contractID, err := ParseDeploymentID(req.Deployment) + if err != nil { + return InfoResponse{}, err + } + + deployment, err := deps.Provision.Get(ctx, twinID, contractID) + if err != nil { + return InfoResponse{}, fmt.Errorf("failed to get deployment: %w", err) + } + + var workload *gridtypes.Workload + for i := range deployment.Workloads { + if string(deployment.Workloads[i].Name) == req.Workload { + workload = &deployment.Workloads[i] + break + } + } + + if workload == nil { + return InfoResponse{}, fmt.Errorf("workload '%s' not found in deployment", req.Workload) + } + + workloadID, _ := gridtypes.NewWorkloadID(twinID, contractID, workload.Name) + resp := InfoResponse{ + WorkloadID: workloadID.String(), + Type: string(workload.Type), + Name: string(workload.Name), + } + + // TODO: Handle different workload types + switch workload.Type { + case zos.ZMachineType, zos.ZMachineLightType: + return handleZMachineInfo(ctx, deps, workloadID.String(), req.Verbose, resp) + case zos.NetworkType, zos.NetworkLightType: + return handleNetworkInfo(ctx, deps, twinID, workload, resp) + default: + return InfoResponse{}, fmt.Errorf("workload type '%s' not supported for info command", workload.Type) + } +} + +func handleZMachineInfo(ctx context.Context, deps Deps, vmID string, verbose bool, resp InfoResponse) (InfoResponse, error) { + // TODO: extend inspect to view more info of the vm + info, err := deps.VM.Inspect(ctx, vmID) + if err != nil { + return InfoResponse{}, fmt.Errorf("failed to inspect vm: %w", err) + } + resp.Info = info + + var raw string + if verbose { + raw, err = deps.VM.LogsFull(ctx, vmID) + } else { + raw, err = deps.VM.Logs(ctx, vmID) + } + if err != nil { + return InfoResponse{}, fmt.Errorf("failed to get vm logs: %w", err) + } + + resp.Logs = raw + return resp, nil +} + +func handleNetworkInfo(ctx context.Context, deps Deps, twinID uint32, workload *gridtypes.Workload, resp InfoResponse) (InfoResponse, error) { + netID := zos.NetworkID(twinID, workload.Name) + nsName := deps.Network.Namespace(ctx, netID) + + networkInfo := map[string]interface{}{ + "net_id": netID.String(), + "namespace": nsName, + "state": string(workload.Result.State), + } + + resp.Info = networkInfo + resp.Logs = "Network workloads don't support logs" + return resp, nil +} diff --git a/pkg/debugcmd/list.go b/pkg/debugcmd/list.go new file mode 100644 index 00000000..42f28ba8 --- /dev/null +++ b/pkg/debugcmd/list.go @@ -0,0 +1,78 @@ +package debugcmd + +import ( + "context" + "encoding/json" +) + +type ListRequest struct { + TwinID uint32 `json:"twin_id"` // optional, if not provided lists for all twins +} + +type ListWorkload struct { + Type string `json:"type"` + Name string `json:"name"` + State string `json:"state"` +} + +type ListDeployment struct { + TwinID uint32 `json:"twin_id"` + ContractID uint64 `json:"contract_id"` + Workloads []ListWorkload `json:"workloads"` +} + +type ListResponse struct { + Deployments []ListDeployment `json:"deployments"` +} + +func ParseListRequest(payload []byte) (ListRequest, error) { + if len(payload) == 0 { + return ListRequest{}, nil + } + + var req ListRequest + if err := json.Unmarshal(payload, &req); err != nil { + return ListRequest{}, err + } + return req, nil +} + +func List(ctx context.Context, deps Deps, req ListRequest) (ListResponse, error) { + twins := []uint32{req.TwinID} + if req.TwinID == 0 { + allTwins, err := deps.Provision.ListTwins(ctx) + if err != nil { + return ListResponse{}, err + } + + twins = allTwins + } + + deployments := make([]ListDeployment, 0) + for _, twin := range twins { + // TODO: this is only returning active deployments, + // cause when deprovision the workload is removed from the key list. + deploymentList, err := deps.Provision.List(ctx, twin) + if err != nil { + return ListResponse{}, err + } + + for _, d := range deploymentList { + workloads := make([]ListWorkload, 0, len(d.Workloads)) + for _, wl := range d.Workloads { + workloads = append(workloads, ListWorkload{ + Type: string(wl.Type), + Name: string(wl.Name), + State: string(wl.Result.State), + }) + } + deployments = append(deployments, ListDeployment{ + TwinID: d.TwinID, + ContractID: d.ContractID, + Workloads: workloads, + }) + } + } + + return ListResponse{Deployments: deployments}, nil +} diff --git a/pkg/environment/config.go b/pkg/environment/config.go index 9029b958..46bffab4 100644 --- a/pkg/environment/config.go +++ b/pkg/environment/config.go @@ -42,6 +42,7 @@ type Config struct { Users struct { Authorized []string `json:"authorized"` } `json:"users"` + AdminTwins []uint32 `json:"admin_twins"` // list of twin IDs allowed to access debug endpoints. RolloutUpgrade struct { TestFarms []uint32 `json:"test_farms"` } `json:"rollout_upgrade"` diff --git a/pkg/network/nr/net_resource.go b/pkg/network/nr/net_resource.go index 51836804..4ec909ea 100644 --- a/pkg/network/nr/net_resource.go +++ b/pkg/network/nr/net_resource.go @@ -222,7 +222,7 @@ func (nr *NetResource) Create() error { return nil } -func (nr *NetResource) myceliumServiceName() string { +func (nr *NetResource) MyceliumServiceName() string { return fmt.Sprintf("mycelium-%s", nr.ID()) } @@ -286,7 +286,7 @@ func (nr *NetResource) SetMycelium() (err error) { return err } - name := nr.myceliumServiceName() + name := nr.MyceliumServiceName() init := zinit.Default() exists, err := init.Exists(name) @@ -618,7 +618,7 @@ func (nr *NetResource) Delete() error { return err } - myceliumName := nr.myceliumServiceName() + myceliumName := nr.MyceliumServiceName() init := zinit.Default() exists, err := init.Exists(myceliumName) if err == nil && exists { diff --git a/pkg/provision.go b/pkg/provision.go index 6cfa8b23..8169aa84 100644 --- a/pkg/provision.go +++ b/pkg/provision.go @@ -18,6 +18,7 @@ type Provision interface { Get(twin uint32, contractID uint64) (gridtypes.Deployment, error) List(twin uint32) ([]gridtypes.Deployment, error) Changes(twin uint32, contractID uint64) ([]gridtypes.Workload, error) + ListTwins() ([]uint32, error) ListPublicIPs() ([]string, error) ListPrivateIPs(twin uint32, network gridtypes.Name) ([]string, error) } diff --git a/pkg/provision/engine.go b/pkg/provision/engine.go index 14cb8800..6358b543 100644 --- a/pkg/provision/engine.go +++ b/pkg/provision/engine.go @@ -1098,6 +1098,10 @@ func (n *NativeEngine) Changes(twin uint32, contractID uint64) ([]gridtypes.Work return changes, nil } +func (n *NativeEngine) ListTwins() ([]uint32, error) { + return n.storage.Twins() +} + func (n *NativeEngine) ListPublicIPs() ([]string, error) { // for efficiency this method should just find out configured public Ips. // but currently the only way to do this is by scanning the nft rules diff --git a/pkg/stubs/provision_stub.go b/pkg/stubs/provision_stub.go index 859094b9..cc24258f 100644 --- a/pkg/stubs/provision_stub.go +++ b/pkg/stubs/provision_stub.go @@ -159,3 +159,20 @@ func (s *ProvisionStub) ListPublicIPs(ctx context.Context) (ret0 []string, ret1 } return } + +func (s *ProvisionStub) ListTwins(ctx context.Context) (ret0 []uint32, ret1 error) { + args := []interface{}{} + result, err := s.client.RequestContext(ctx, s.module, s.object, "ListTwins", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} diff --git a/pkg/stubs/vmd_stub.go b/pkg/stubs/vmd_stub.go index 474743e5..9017acdf 100644 --- a/pkg/stubs/vmd_stub.go +++ b/pkg/stubs/vmd_stub.go @@ -124,6 +124,23 @@ func (s *VMModuleStub) Logs(ctx context.Context, arg0 string) (ret0 string, ret1 return } +func (s *VMModuleStub) LogsFull(ctx context.Context, arg0 string) (ret0 string, ret1 error) { + args := []interface{}{arg0} + result, err := s.client.RequestContext(ctx, s.module, s.object, "LogsFull", args...) + if err != nil { + panic(err) + } + result.PanicOnError() + ret1 = result.CallError() + loader := zbus.Loader{ + &ret0, + } + if err := result.Unmarshal(&loader); err != nil { + panic(err) + } + return +} + func (s *VMModuleStub) Metrics(ctx context.Context) (ret0 pkg.MachineMetrics, ret1 error) { args := []interface{}{} result, err := s.client.RequestContext(ctx, s.module, s.object, "Metrics", args...) diff --git a/pkg/vm.go b/pkg/vm.go index ee8021dc..551d4010 100644 --- a/pkg/vm.go +++ b/pkg/vm.go @@ -276,6 +276,7 @@ type VMModule interface { Delete(name string) error Exists(name string) bool Logs(name string) (string, error) + LogsFull(name string) (string, error) List() ([]string, error) Metrics() (MachineMetrics, error) // Lock set lock on VM (pause,resume) diff --git a/pkg/vm/client.go b/pkg/vm/client.go index 6e76d790..0d494dbe 100644 --- a/pkg/vm/client.go +++ b/pkg/vm/client.go @@ -116,6 +116,7 @@ func (c *Client) Inspect(ctx context.Context) (VMData, error) { return VMData{}, fmt.Errorf("got unexpected http code '%s' on machine info, Response: %s", response.Status, string(body)) } + // TODO: use more info like running state, etc. var data struct { Config struct { CPU struct { diff --git a/pkg/vm/manager.go b/pkg/vm/manager.go index f460b502..671e07e7 100644 --- a/pkg/vm/manager.go +++ b/pkg/vm/manager.go @@ -583,6 +583,16 @@ func (m *Module) Logs(name string) (string, error) { return m.tail(path) } +// LogsFull returns full machine logs for given machine name. +func (m *Module) LogsFull(name string) (string, error) { + path := m.logsPath(name) + b, err := os.ReadFile(path) + if err != nil { + return "", err + } + return string(b), nil +} + // Inspect a machine by name func (m *Module) Inspect(name string) (pkg.VMInfo, error) { if !m.Exists(name) { diff --git a/pkg/zos_api/debug.go b/pkg/zos_api/debug.go new file mode 100644 index 00000000..cea2097a --- /dev/null +++ b/pkg/zos_api/debug.go @@ -0,0 +1,55 @@ +package zosapi + +import ( + "context" + + "github.com/threefoldtech/zosbase/pkg/debugcmd" +) + +func (g *ZosAPI) debugDeploymentListHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseListRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.List(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeploymentGetHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseGetRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.Get(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeploymentHistoryHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseHistoryRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.History(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeploymentInfoHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseInfoRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.Info(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeploymentHealthHandler(ctx context.Context, payload []byte) (interface{}, error) { + req, err := debugcmd.ParseHealthRequest(payload) + if err != nil { + return nil, err + } + return debugcmd.Health(ctx, g.debugDeps(), req) +} + +func (g *ZosAPI) debugDeps() debugcmd.Deps { + return debugcmd.Deps{ + Provision: g.provisionStub, + VM: g.vmStub, + Network: g.networkerStub, + } +} diff --git a/pkg/zos_api/middlewares.go b/pkg/zos_api/middlewares.go index ebf95f6c..ef167923 100644 --- a/pkg/zos_api/middlewares.go +++ b/pkg/zos_api/middlewares.go @@ -6,6 +6,8 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" + + "github.com/threefoldtech/zosbase/pkg/environment" ) func (g *ZosAPI) authorized(ctx context.Context, _ []byte) (context.Context, error) { @@ -17,6 +19,22 @@ func (g *ZosAPI) authorized(ctx context.Context, _ []byte) (context.Context, err return ctx, nil } +func (g *ZosAPI) adminAuthorized(ctx context.Context, _ []byte) (context.Context, error) { + user := peer.GetTwinID(ctx) + cfg, err := environment.GetConfig() + if err != nil { + return nil, fmt.Errorf("failed to get environment config: %w", err) + } + + for _, id := range cfg.AdminTwins { + if id == user { + return ctx, nil + } + } + + return nil, fmt.Errorf("unauthorized") +} + func (g *ZosAPI) log(ctx context.Context, _ []byte) (context.Context, error) { env := peer.GetEnvelope(ctx) request := env.GetRequest() diff --git a/pkg/zos_api/routes.go b/pkg/zos_api/routes.go index 22976feb..abd6772d 100644 --- a/pkg/zos_api/routes.go +++ b/pkg/zos_api/routes.go @@ -14,6 +14,15 @@ func (g *ZosAPI) SetupRoutes(router *peer.Router) { system.WithHandler("diagnostics", g.systemDiagnosticsHandler) system.WithHandler("node_features_get", g.systemNodeFeaturesHandler) + debug := root.SubRoute("debug") + debug.Use(g.adminAuthorized) + debugDeployment := debug.SubRoute("deployment") + debugDeployment.WithHandler("list", g.debugDeploymentListHandler) + debugDeployment.WithHandler("get", g.debugDeploymentGetHandler) + debugDeployment.WithHandler("history", g.debugDeploymentHistoryHandler) + debugDeployment.WithHandler("info", g.debugDeploymentInfoHandler) + debugDeployment.WithHandler("health", g.debugDeploymentHealthHandler) + perf := root.SubRoute("perf") perf.WithHandler("get", g.perfGetHandler) perf.WithHandler("get_all", g.perfGetAllHandler) diff --git a/pkg/zos_api/zos_api.go b/pkg/zos_api/zos_api.go index 789d69f1..d9287ce1 100644 --- a/pkg/zos_api/zos_api.go +++ b/pkg/zos_api/zos_api.go @@ -26,6 +26,7 @@ type ZosAPI struct { systemMonitorStub *stubs.SystemMonitorStub provisionStub *stubs.ProvisionStub networkerStub *stubs.NetworkerStub + vmStub *stubs.VMModuleStub statisticsStub *stubs.StatisticsStub storageStub *stubs.StorageModuleStub performanceMonitorStub *stubs.PerformanceMonitorStub @@ -51,6 +52,7 @@ func NewZosAPI(manager substrate.Manager, client zbus.Client, msgBrokerCon strin systemMonitorStub: stubs.NewSystemMonitorStub(client), provisionStub: stubs.NewProvisionStub(client), networkerStub: stubs.NewNetworkerStub(client), + vmStub: stubs.NewVMModuleStub(client), statisticsStub: stubs.NewStatisticsStub(client), storageStub: storageModuleStub, performanceMonitorStub: stubs.NewPerformanceMonitorStub(client),