Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
77b0db4
hypervisor: Add get_msr_based_features method
olivereanderson Jan 25, 2026
825cb6c
hypervisor: Add get_msr_index_list method to the hypervisor trait
olivereanderson Feb 18, 2026
4876e24
misc: Don't expose MSR buffer directly through the hypervisor
olivereanderson Feb 20, 2026
b1b9c1c
hypervisor: msr_filter method on Vm trait
olivereanderson Feb 20, 2026
344a35b
arch: Disable UINTR state components
olivereanderson Feb 25, 2026
e664287
arch: Disable CET in CPUID for CPU profiles
olivereanderson Feb 26, 2026
94bb60c
arch: Change CPU profile policies for MCA and MCE
olivereanderson Mar 3, 2026
7ffb7fb
arch: Change CPUID profile policy for WAITPKG
olivereanderson Feb 25, 2026
d62b30d
arch: Change CPU profile policy for KVM_CLOCKSOURCE
olivereanderson Mar 5, 2026
1f10b61
arch: Change CPU profile generation tool
olivereanderson Jan 26, 2026
89c4fca
arch: Lookup methods on CPUID definitions
olivereanderson Feb 17, 2026
dc2d401
arch: Add data structures for MSR definitions
olivereanderson Jan 7, 2026
1f757a6
arch: INTEL MSR-based feature definitions
olivereanderson Jan 7, 2026
df44c43
arch: MSR compatibility checks
olivereanderson Jan 16, 2026
12a5bd6
arch: Add a list of all Intel architectural MSRS
olivereanderson Feb 12, 2026
a43e3cd
arch: Include a list of non-architectural MSRs
olivereanderson Feb 13, 2026
501aa9b
arch: Add lists of KVM MSRS
olivereanderson Feb 13, 2026
e3e8d32
arch: Add a list of HyperV MSRs
olivereanderson Mar 5, 2026
6e098a1
arch: Add required MSR update functionality
olivereanderson Jan 21, 2026
ef4210e
arch: Deny MSR functionality
olivereanderson Feb 24, 2026
fdbaeb1
vmm: Apply MSR adjustments according to CPU profile (if any)
olivereanderson Jan 26, 2026
8d869a1
arch: Make the CPU profile generation tool MSR aware
olivereanderson Jan 26, 2026
7331ff7
arch: Use MSR aware profiles
olivereanderson Jan 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions arch/src/bin/generate-cpu-profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
feature = "cpu_profile_generation",
feature = "kvm"
))]
use std::io::BufWriter;

use anyhow::Context;
use clap::{Arg, Command};
Expand All @@ -27,12 +26,5 @@ fn main() -> anyhow::Result<()> {
let profile_name = cmd_arg.get_one::<String>("name").unwrap();

let hypervisor = hypervisor::new().context("Could not obtain hypervisor")?;
// TODO: Consider letting the user provide a file path as a target instead of writing to stdout.
// The way it is now should be sufficient for a PoC however.
let writer = BufWriter::new(std::io::stdout().lock());
arch::x86_64::cpu_profile_generation::generate_profile_data(
writer,
hypervisor.as_ref(),
profile_name,
)
arch::x86_64::cpu_profile_generation::generate_profile_data(hypervisor.as_ref(), profile_name)
}
183 changes: 161 additions & 22 deletions arch/src/x86_64/cpu_profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use std::io::Write;

use hypervisor::arch::x86::CpuIdEntry;
use hypervisor::arch::x86::{CpuIdEntry, MsrEntry};
use hypervisor::{CpuVendor, HypervisorType};
use log::error;
use serde::ser::SerializeStruct;
Expand All @@ -15,6 +15,7 @@ use thiserror::Error;
use crate::deserialize_u32_hex;
use crate::x86_64::CpuidReg;
use crate::x86_64::cpuid_definitions::Parameters;
use crate::x86_64::msr_definitions::RegisterAddress;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case")]
Expand All @@ -30,7 +31,7 @@ pub enum CpuProfile {
}

impl CpuProfile {
/// Loads pre-generated data associated with a CPU profile.
/// Loads pre-generated CPUID data associated with a CPU profile.
///
/// If the `amx` flag is false then the AMX tile state components will be
/// zeroed out from the associated profile data. This is necessary because
Expand All @@ -39,18 +40,18 @@ impl CpuProfile {
//
// We can only generate CPU profiles for the KVM hypervisor for the time being.
#[cfg(feature = "kvm")]
pub(in crate::x86_64) fn data(&self, amx: bool) -> Option<CpuProfileData> {
let mut data: CpuProfileData = match self {
pub(in crate::x86_64) fn cpuid_data(&self, amx: bool) -> Option<CpuIdProfileData> {
let mut data: CpuIdProfileData = match self {
Self::Host => None,
Self::Skylake => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/skylake.json"))
serde_json::from_slice(include_bytes!("cpu_profiles/skylake.cpuid.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {e:?}");
})
.expect("should be able to deserialize pre-generated data"),
),
Self::SapphireRapids => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.json"))
serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.cpuid.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {e:?}");
})
Expand Down Expand Up @@ -84,25 +85,56 @@ impl CpuProfile {
}

#[cfg(not(feature = "kvm"))]
pub(in crate::x86_64) fn data(&self, _amx: bool) -> Option<CpuProfileData> {
pub(in crate::x86_64) fn cpuid_data(&self, _amx: bool) -> Option<CpuIdProfileData> {
if matches!(*self, Self::Host) {
return None;
}
// This will need to be addressed before upstreaming.
// We will probably need one profile per hypervisor.
unreachable!()
}

/// Loads pre-generated MSR data associated with a CPU profile.
#[cfg(feature = "kvm")]
pub(in crate::x86_64) fn msr_data(&self) -> Option<MsrProfileData> {
match self {
Self::Host => None,
Self::Skylake => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/skylake.msr.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {e:?}");
})
.expect("should be able to deserialize pre-generated data"),
),
Self::SapphireRapids => Some(
serde_json::from_slice(include_bytes!("cpu_profiles/sapphire-rapids.msr.json"))
.inspect_err(|e| {
error!("BUG: could not deserialize CPU profile. Got error: {e:?}");
})
.expect("should be able to deserialize pre-generated data"),
),
}
}

#[cfg(not(feature = "kvm"))]
pub(in crate::x86_64) fn msr_data(&self) -> Option<MsrProfileData> {
if matches!(*self, Self::Host) {
return None;
}
// CPU profiles are currently only available when using KVM as the hypervisor.
unreachable!()
}
}

/// Every [`CpuProfile`] different from `Host` has associated [`CpuProfileData`].
/// Every [`CpuProfile`] different from `Host` has associated [`CpuIdProfileData`].
///
/// New constructors of this struct may only be generated through the CHV CLI (when built from source with
/// the `cpu-profile-generation` feature) which other hosts may then attempt to load in order to
/// increase the likelihood of successful live migrations among all hosts that opted in to the given
/// CPU profile.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(dead_code)]
pub struct CpuProfileData {
pub struct CpuIdProfileData {
/// The hypervisor used when generating this CPU profile.
pub(in crate::x86_64) hypervisor: HypervisorType,
/// The vendor of the CPU belonging to the host that generated this CPU profile.
Expand All @@ -111,19 +143,6 @@ pub struct CpuProfileData {
pub(in crate::x86_64) adjustments: Vec<(Parameters, CpuidOutputRegisterAdjustments)>,
}

/* TODO: The [`CpuProfile`] struct will likely need a few more iterations. The following
section should explain why:

# MSR restrictions

CPU profiles also need to restrict which MSRs may be manipulated by the guest as various physical CPUs
can have differing supported MSRs.

The CPU profile will thus necessarily need to contain some data related to MSR restrictions. That will
be taken care of in a follow up MR.

*/

/// Used for adjusting an entire cpuid output register (EAX, EBX, ECX or EDX)
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
pub(super) struct CpuidOutputRegisterAdjustments {
Expand Down Expand Up @@ -276,10 +295,130 @@ impl CpuidOutputRegisterAdjustments {
}
}

#[derive(Debug, Clone)]
pub(in crate::x86_64) struct FeatureMsrAdjustment {
pub(in crate::x86_64) mask: u64,
pub(in crate::x86_64) replacements: u64,
}

impl Serialize for FeatureMsrAdjustment {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut s = serializer.serialize_struct("FeatureMsrAdjustment", 2)?;
let mut serialize_field = |key, value| {
// two bytes for "0x" prefix and 16 for the hex encoded number
let mut buffer = [0_u8; 18];
let _ = write!(&mut buffer[..], "{value:#018x}");
let str = core::str::from_utf8(&buffer[..])
.expect("the buffer should be filled with valid UTF-8 bytes");
s.serialize_field(key, str)
};
serialize_field("mask", self.mask)?;
serialize_field("replacements", self.replacements)?;
s.end()
}
}

impl<'de> Deserialize<'de> for FeatureMsrAdjustment {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
struct ProvisionalFeatureMsrAdjustment<'a> {
#[serde(borrow)]
mask: &'a str,
#[serde(borrow)]
replacements: &'a str,
}

let ProvisionalFeatureMsrAdjustment { mask, replacements } =
ProvisionalFeatureMsrAdjustment::deserialize(deserializer)?;
let parse_u64 = |hex: &str, field_name: &str| {
u64::from_str_radix(hex.strip_prefix("0x").unwrap_or(""), 16).map_err(|_| {
<D::Error as serde::de::Error>::custom(format!("Unable to deserialize FeatureMsrAdjustment: could not deserialize {field_name} the value {hex} is not a hex encoded 64 bit integer"))
})
};
let mask = parse_u64(mask, "mask")?;
let replacements = parse_u64(replacements, "replacements")?;
Ok(FeatureMsrAdjustment { mask, replacements })
}
}

impl FeatureMsrAdjustment {
/// Returns a struct describing the Feature MSRs that should be set
/// and the ones that should be denied based on `adjustments` and the given
/// `feature_msrs`.
///
/// # Errors
///
/// The only way for this to error is if there exists one or more entries in
/// `adjustments` that do not have a corresponding entry in `feature_msrs`.
/// In this case the missing MSR will be logged and the unit type is returned
/// as the error variant.
pub(in crate::x86_64) fn adjust_to(
adjustments: &[(RegisterAddress, FeatureMsrAdjustment)],
feature_msrs: &[MsrEntry],
) -> Result<Vec<MsrEntry>, ()> {
let mut output_feature_msrs = Vec::with_capacity(feature_msrs.len());
for (reg_address, adjustment) in adjustments {
let Some(entry) = feature_msrs
.iter()
.find(|entry| entry.index == reg_address.0)
else {
error!(
"Did not find feature based MSR entry for MSR:={:#x}",
reg_address.0
);
return Err(());
};
// Adjust the entry and push it to outputs
{
let mut entry = *entry;
let data = entry.data;
entry.data = (adjustment.mask & data) | adjustment.replacements;
// TODO: Perhaps trace! would be a better log level?
log::debug!(
"adjusted MSR-based feature: register address:={:#x} value:={:#x}",
entry.index,
entry.data
);
output_feature_msrs.push(entry);
}
}
Ok(output_feature_msrs)
}
}

pub struct RequiredMsrUpdates {
pub msr_based_features: Vec<MsrEntry>,
pub denied_msrs: Vec<RegisterAddress>,
}

/// Every [`CpuProfile`] different from `Host` has associated [`MsrProfileData`].
///
/// New constructors of this struct may only be generated through the CHV CLI (when built from source with
/// the `cpu-profile-generation` feature) which other hosts may then attempt to load in order to
/// increase the likelihood of successful live migrations among all hosts that opted in to the given
/// CPU profile.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(in crate::x86_64) struct MsrProfileData {
pub(in crate::x86_64) cpu_vendor: CpuVendor,
pub(in crate::x86_64) hypervisor_type: HypervisorType,
pub(in crate::x86_64) adjustments: Vec<(RegisterAddress, FeatureMsrAdjustment)>,
pub(in crate::x86_64) permitted_msrs: Vec<RegisterAddress>,
}

#[derive(Debug, Error)]
#[error("Required CPUID entries not found")]
pub struct MissingCpuidEntriesError;

#[derive(Debug, Error)]
#[error("Required MSR entries not found")]
pub struct MissingMsrEntriesError;

#[cfg(test)]
mod tests {
use proptest::prelude::*;
Expand Down
Loading
Loading