From 4f26dfc483d89087d3dd0840da515d24864c5320 Mon Sep 17 00:00:00 2001 From: Brian Perkins Date: Tue, 27 Jan 2026 23:03:01 +0000 Subject: [PATCH 1/3] WIP: Support for updating the TSC provided by MSHV at runtime. --- arch/x86/include/asm/mshyperv.h | 2 + arch/x86/kernel/cpu/mshyperv.c | 4 +- drivers/hv/mshv_vtl_main.c | 62 +++++++++++++++++++++++++++++++ include/asm-generic/hyperv-tlfs.h | 9 +++++ include/uapi/hyperv/hvgdk_mini.h | 12 ++++++ include/uapi/linux/mshv.h | 8 ++++ 6 files changed, 95 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1addcdbe7c6f6..d5ab4fb40645a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -241,6 +241,8 @@ void hyperv_setup_mmu_ops(void); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); +void hv_save_sched_clock_state(void); +void hv_restore_sched_clock_state(void); int hyperv_flush_guest_mapping(u64 as); int hyperv_flush_guest_mapping_range(u64 as, hyperv_fill_flush_list_func fill_func, void *data); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 55c69b2a43507..cf30ca96bfee1 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -266,13 +266,13 @@ static void restore_hv_clock_tsc_state(void) * suspend-resume and the offset used to measure time needs to be * corrected, post resume. */ -static void hv_save_sched_clock_state(void) +void hv_save_sched_clock_state(void) { old_save_sched_clock_state(); save_hv_clock_tsc_state(); } -static void hv_restore_sched_clock_state(void) +void hv_restore_sched_clock_state(void) { restore_hv_clock_tsc_state(); old_restore_sched_clock_state(); diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index 92c5d1c37fdd7..1f88e7dea114e 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -32,6 +32,7 @@ #ifdef CONFIG_X86_64 #include +#include #include #include @@ -42,6 +43,7 @@ #include #include "../../kernel/fpu/legacy.h" +#include "../../kernel/time/timekeeping.h" #endif @@ -865,6 +867,59 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg) return 0; } +#ifdef CONFIG_X86_64 +// void hv_save_sched_clock_state(void); +// void hv_restore_sched_clock_state(void); + +static int restore_partition_time_with_cpus_stopped(void *data) +{ + struct mshv_partition_time *partition_time = data; + struct hv_input_restore_partition_time *input; + unsigned long irq_flags; + int result = 0; + u64 status; + + local_irq_save(irq_flags); + + lock_map_acquire_try(&tick_freeze_map); + sched_clock_suspend(); + timekeeping_suspend(); + + hv_save_sched_clock_state(); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + input->partition_id = HV_PARTITION_ID_SELF; + input->tsc_sequence = partition_time->tsc_sequence; + input->reserved = 0; + input->reference_time_in_100_ns = partition_time->reference_time_in_100_ns; + input->tsc = partition_time->tsc; + status = hv_do_hypercall(HVCALL_RESTORE_PARTITION_TIME, input, NULL); + if (hv_result_success(status)) { + hv_restore_sched_clock_state(); + } else { + pr_err("HVCALL_RESTORE_PARTITION_TIME failed ! [Err: %#llx\n]", status); + result = -EINVAL; + } + + timekeeping_resume(); + sched_clock_resume(); + lock_map_release(&tick_freeze_map); + + local_irq_restore(irq_flags); + + return result; +} + +static int mshv_restore_partition_time(void __user *arg) +{ + struct mshv_partition_time partition_time; + + if (copy_from_user(&partition_time, arg, sizeof(partition_time))) + return -EFAULT; + + return stop_machine(restore_partition_time_with_cpus_stopped, &partition_time, cpu_online_mask); +} +#endif + static void mshv_vtl_cancel(int cpu) { int here = get_cpu(); @@ -2596,6 +2651,13 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_VTL_ADD_VTL0_MEMORY: ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg); break; + +#if defined(CONFIG_X86_64) + case MSHV_RESTORE_PARTITION_TIME: + ret = mshv_restore_partition_time((void __user *)arg); + break; +#endif + #if defined(CONFIG_X86_64) && defined(CONFIG_INTEL_TDX_GUEST) case MSHV_VTL_TDCALL: ret = mshv_vtl_ioctl_tdcall((void __user *)arg); diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index ddeef1ebbad8c..e04523711294f 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -197,6 +197,7 @@ union hv_reference_tsc_msr { #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db +#define HVCALL_RESTORE_PARTITION_TIME 0x0103 #define HVCALL_MMIO_READ 0x0106 #define HVCALL_MMIO_WRITE 0x0107 @@ -1002,4 +1003,12 @@ struct hv_enable_vp_vtl { u16 mbz1; struct hv_init_vp_context vp_context; } __packed; + +struct hv_input_restore_partition_time { + u64 partition_id; + u32 tsc_sequence; + u32 reserved; + u64 reference_time_in_100_ns; + u64 tsc; +} __packed; #endif diff --git a/include/uapi/hyperv/hvgdk_mini.h b/include/uapi/hyperv/hvgdk_mini.h index 12b069822c4d1..a977c733818d0 100644 --- a/include/uapi/hyperv/hvgdk_mini.h +++ b/include/uapi/hyperv/hvgdk_mini.h @@ -261,6 +261,8 @@ union hv_hypervisor_version_info { #define HVCALL_GET_VP_CPUID_VALUES 0x00f4 #define HVCALL_START_VP 0x0099 #define HVCALL_GET_VP_INDEX_FROM_APIC_ID 0x009a +#define HVCALL_RESTORE_PARTITION_TIME 0x0103 + /* * Some macros - i.e. GENMASK_ULL and BIT_ULL - are not currently supported by @@ -1310,4 +1312,14 @@ struct hv_input_install_intercept { union hv_intercept_parameters intercept_parameter; } __packed; +#if defined(__x86_64__) +struct hv_input_restore_partition_time { + __u64 partition_id; + __u32 tsc_sequence; + __u32 reserved; + __u64 reference_time_in_100_ns; + __u64 tsc; +} __packed; +#endif + #endif /* _UAPI_HV_HVGDK_MINI_H */ diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index bf9cc25f7bdac..0037758ebce5e 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -306,6 +306,13 @@ struct mshv_map_device_intr { __u8 padding[7]; } __packed; +struct mshv_partition_time { + __u32 tsc_sequence; + __u32 reserved; + __u64 reference_time_in_100_ns; + __u64 tsc; +} __packed; + #define MSHV_KICK_CPUS_FLAG_WAIT_FOR_CPUS (1 << 0) #define MSHV_KICK_CPUS_FLAG_CANCEL_CPU_RUN (1 << 1) @@ -371,6 +378,7 @@ struct mshv_map_device_intr { struct mshv_map_device_intr) /* For x86-64 only */ +#define MSHV_RESTORE_PARTITION_TIME _IOW(MSHV_IOCTL, 0x13, struct mshv_partition_time) #define MSHV_VTL_GUEST_VSM_VMSA_PFN _IOWR(MSHV_IOCTL, 0x34, __u64) /* For x86-64 SEV-SNP only */ From 081d3c756bcb6b688c73708e1ae9ee51929f4c14 Mon Sep 17 00:00:00 2001 From: Brian Perkins Date: Fri, 30 Jan 2026 18:54:12 +0000 Subject: [PATCH 2/3] use current cpu to execute restore partition time logic --- drivers/hv/mshv_vtl_main.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index 1f88e7dea114e..c65de943f8c21 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -868,24 +868,19 @@ static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg) } #ifdef CONFIG_X86_64 -// void hv_save_sched_clock_state(void); -// void hv_restore_sched_clock_state(void); - static int restore_partition_time_with_cpus_stopped(void *data) { struct mshv_partition_time *partition_time = data; struct hv_input_restore_partition_time *input; - unsigned long irq_flags; int result = 0; u64 status; - local_irq_save(irq_flags); - - lock_map_acquire_try(&tick_freeze_map); + // Save current clock state. No other CPUs are running so no locks are taken. sched_clock_suspend(); timekeeping_suspend(); - hv_save_sched_clock_state(); + + // Interrupts are disabled, make the hypercall to update the TSC. input = *this_cpu_ptr(hyperv_pcpu_input_arg); input->partition_id = HV_PARTITION_ID_SELF; input->tsc_sequence = partition_time->tsc_sequence; @@ -893,30 +888,33 @@ static int restore_partition_time_with_cpus_stopped(void *data) input->reference_time_in_100_ns = partition_time->reference_time_in_100_ns; input->tsc = partition_time->tsc; status = hv_do_hypercall(HVCALL_RESTORE_PARTITION_TIME, input, NULL); - if (hv_result_success(status)) { - hv_restore_sched_clock_state(); - } else { + if (!hv_result_success(status)) { pr_err("HVCALL_RESTORE_PARTITION_TIME failed ! [Err: %#llx\n]", status); result = -EINVAL; } + // Restore clock state using current TSC value. + hv_restore_sched_clock_state(); timekeeping_resume(); sched_clock_resume(); - lock_map_release(&tick_freeze_map); - - local_irq_restore(irq_flags); return result; } static int mshv_restore_partition_time(void __user *arg) { + unsigned long irq_flags; struct mshv_partition_time partition_time; + int ret; if (copy_from_user(&partition_time, arg, sizeof(partition_time))) return -EFAULT; - return stop_machine(restore_partition_time_with_cpus_stopped, &partition_time, cpu_online_mask); + // Stop other CPUs, using the current one to restore partition time. + local_irq_save(irq_flags); + ret = stop_machine(restore_partition_time_with_cpus_stopped, &partition_time, cpumask_of(smp_processor_id())); + local_irq_restore(irq_flags); + return ret; } #endif From 9a25f51e1940b8e2305ff129ad1b3324ee663fd9 Mon Sep 17 00:00:00 2001 From: Brian Perkins Date: Fri, 30 Jan 2026 19:45:09 +0000 Subject: [PATCH 3/3] minor PR feedback --- drivers/hv/mshv_vtl_main.c | 10 +++++----- include/uapi/linux/mshv.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index c65de943f8c21..75a5683b3506f 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -875,12 +875,12 @@ static int restore_partition_time_with_cpus_stopped(void *data) int result = 0; u64 status; - // Save current clock state. No other CPUs are running so no locks are taken. + /* Save current clock state. No other CPUs are running so no locks are taken. */ sched_clock_suspend(); timekeeping_suspend(); hv_save_sched_clock_state(); - // Interrupts are disabled, make the hypercall to update the TSC. + /* Interrupts are disabled, make the hypercall to update the TSC. */ input = *this_cpu_ptr(hyperv_pcpu_input_arg); input->partition_id = HV_PARTITION_ID_SELF; input->tsc_sequence = partition_time->tsc_sequence; @@ -889,11 +889,11 @@ static int restore_partition_time_with_cpus_stopped(void *data) input->tsc = partition_time->tsc; status = hv_do_hypercall(HVCALL_RESTORE_PARTITION_TIME, input, NULL); if (!hv_result_success(status)) { - pr_err("HVCALL_RESTORE_PARTITION_TIME failed ! [Err: %#llx\n]", status); + pr_err("HVCALL_RESTORE_PARTITION_TIME failed with %#llx\n", status); result = -EINVAL; } - // Restore clock state using current TSC value. + /* Restore clock state using current TSC value. */ hv_restore_sched_clock_state(); timekeeping_resume(); sched_clock_resume(); @@ -910,7 +910,7 @@ static int mshv_restore_partition_time(void __user *arg) if (copy_from_user(&partition_time, arg, sizeof(partition_time))) return -EFAULT; - // Stop other CPUs, using the current one to restore partition time. + /* Stop other CPUs, using the current one to restore partition time. */ local_irq_save(irq_flags); ret = stop_machine(restore_partition_time_with_cpus_stopped, &partition_time, cpumask_of(smp_processor_id())); local_irq_restore(irq_flags); diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 0037758ebce5e..26ca0f99779fc 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -309,8 +309,8 @@ struct mshv_map_device_intr { struct mshv_partition_time { __u32 tsc_sequence; __u32 reserved; - __u64 reference_time_in_100_ns; - __u64 tsc; + __u64 reference_time_in_100_ns; + __u64 tsc; } __packed; #define MSHV_KICK_CPUS_FLAG_WAIT_FOR_CPUS (1 << 0)