Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .air.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ tmp_dir = "tmp"
[build]
args_bin = []
bin = "./tmp/main"
cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && sudo setcap cap_net_admin,cap_net_bind_service=+eip ./tmp/main"
cmd = "go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api"
delay = 1000
exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"]
exclude_file = []
exclude_regex = ["_test.go"]
exclude_unchanged = false
follow_symlink = false
full_bin = ""
full_bin = "sudo ./tmp/main"
include_dir = []
include_ext = ["go", "tpl", "tmpl", "html", "yaml"]
include_file = []
Expand Down
21 changes: 21 additions & 0 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
hvType = hypervisor.Type(*request.Body.Hypervisor)
}

// Parse GPU configuration (vGPU mode)
var gpuConfig *instances.GPUConfig
if request.Body.Gpu != nil && request.Body.Gpu.Profile != nil && *request.Body.Gpu.Profile != "" {
gpuConfig = &instances.GPUConfig{
Profile: *request.Body.Gpu.Profile,
}
}

// Calculate default resource limits when not specified (0 = auto)
// Uses proportional allocation based on CPU: (vcpus / cpuCapacity) * resourceCapacity
if diskIOBps == 0 {
Expand Down Expand Up @@ -220,6 +228,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
Devices: deviceRefs,
Volumes: volumes,
Hypervisor: hvType,
GPU: gpuConfig,
}

inst, err := s.InstanceManager.CreateInstance(ctx, domainReq)
Expand Down Expand Up @@ -685,5 +694,17 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
oapiInst.Volumes = &oapiVolumes
}

// Convert GPU info
if inst.GPUProfile != "" {
gpu := &oapi.InstanceGPU{
Profile: lo.ToPtr(inst.GPUProfile),
}
// Only set MdevUuid when non-empty to avoid "mdev_uuid": "" in output
if inst.GPUMdevUUID != "" {
gpu.MdevUuid = lo.ToPtr(inst.GPUMdevUUID)
}
oapiInst.Gpu = gpu
}

return oapiInst
}
41 changes: 41 additions & 0 deletions cmd/api/api/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ func (s *ApiService) GetResources(ctx context.Context, _ oapi.GetResourcesReques
})
}

// Add GPU status if available
if status.GPU != nil {
gpuStatus := convertGPUResourceStatus(status.GPU)
resp.Gpu = &gpuStatus
}

return oapi.GetResources200JSONResponse(resp), nil
}

Expand All @@ -75,3 +81,38 @@ func convertResourceStatus(rs resources.ResourceStatus) oapi.ResourceStatus {
Source: source,
}
}

func convertGPUResourceStatus(gs *resources.GPUResourceStatus) oapi.GPUResourceStatus {
result := oapi.GPUResourceStatus{
Mode: oapi.GPUResourceStatusMode(gs.Mode),
TotalSlots: gs.TotalSlots,
UsedSlots: gs.UsedSlots,
}

// Convert profiles (vGPU mode)
if len(gs.Profiles) > 0 {
profiles := make([]oapi.GPUProfile, len(gs.Profiles))
for i, p := range gs.Profiles {
profiles[i] = oapi.GPUProfile{
Name: p.Name,
FramebufferMb: p.FramebufferMB,
Available: p.Available,
}
}
result.Profiles = &profiles
}

// Convert devices (passthrough mode)
if len(gs.Devices) > 0 {
devices := make([]oapi.PassthroughDevice, len(gs.Devices))
for i, d := range gs.Devices {
devices[i] = oapi.PassthroughDevice{
Name: d.Name,
Available: d.Available,
}
}
result.Devices = &devices
}

return result
}
21 changes: 21 additions & 0 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/kernel/hypeman"
"github.com/kernel/hypeman/cmd/api/api"
"github.com/kernel/hypeman/cmd/api/config"
"github.com/kernel/hypeman/lib/devices"
"github.com/kernel/hypeman/lib/guest"
"github.com/kernel/hypeman/lib/hypervisor/qemu"
"github.com/kernel/hypeman/lib/instances"
Expand Down Expand Up @@ -200,6 +201,26 @@ func run() error {
return fmt.Errorf("reconcile device state: %w", err)
}

// Reconcile mdev devices (clears orphaned vGPUs from crashed VMs)
// Build mdev info from instances - only destroys mdevs tracked by hypeman
logger.Info("Reconciling mdev devices...")
var mdevInfos []devices.MdevReconcileInfo
if allInstances != nil {
for _, inst := range allInstances {
if inst.GPUMdevUUID != "" {
mdevInfos = append(mdevInfos, devices.MdevReconcileInfo{
InstanceID: inst.Id,
MdevUUID: inst.GPUMdevUUID,
IsRunning: inst.State == instances.StateRunning || inst.State == instances.StateUnknown,
})
}
}
}
if err := devices.ReconcileMdevs(app.Ctx, mdevInfos); err != nil {
// Log but don't fail - mdev cleanup is best-effort
logger.Warn("failed to reconcile mdev devices", "error", err)
}

// Initialize ingress manager (starts Caddy daemon and DNS server for dynamic upstreams)
logger.Info("Initializing ingress manager...")
if err := app.IngressManager.Initialize(app.Ctx); err != nil {
Expand Down
Loading