kernel版本:5.10.129
进入内核的时候,arm64处理器的异常级别可能是1或者2,函数el2_setup的主要工作如下:
1、如果异常级别是1,那么在异常级别1执行内核
2、如果异常级别是2, 那么根据处理器是否支持虚拟化宿主扩展(Virtuallization Host Extensions,VHE),决定是否需要降级到异常级别
具体实现代码如下:
/** If we're fortunate enough to boot at EL2, ensure that the world is* sane before dropping to EL1.** Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if* booted in EL1 or EL2 respectively.*/
SYM_FUNC_START(el2_setup)msr SPsel, #1 // We want to use SP_EL{1,2}mrs x0, CurrentELcmp x0, #CurrentEL_EL2b.eq 1fmov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr sctlr_el1, x0mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1isbret1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)msr sctlr_el2, x0#ifdef CONFIG_ARM64_VHE/** Check for VHE being present. For the rest of the EL2 setup,* x2 being non-zero indicates that we do have VHE, and that the* kernel is intended to run at EL2.*/mrs x2, id_aa64mmfr1_el1ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#elsemov x2, xzr
#endif/* Hyp configuration. */mov_q x0, HCR_HOST_NVHE_FLAGScbz x2, set_hcrmov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:msr hcr_el2, x0isb/** Allow Non-secure EL1 and EL0 to access physical timer and counter.* This is not necessary for VHE, since the host kernel runs in EL2,* and EL0 accesses are configured in the later stage of boot process.* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in* EL2.*/cbnz x2, 1fmrs x0, cnthctl_el2orr x0, x0, #3 // Enable EL1 physical timersmsr cnthctl_el2, x0
1:msr cntvoff_el2, xzr // Clear virtual offset#ifdef CONFIG_ARM_GIC_V3/* GICv3 system register access */mrs x0, id_aa64pfr0_el1ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4cbz x0, 3fmrs_s x0, SYS_ICC_SRE_EL2orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1msr_s SYS_ICC_SRE_EL2, x0isb // Make sure SRE is now setmrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,tbz x0, #0, 3f // and check that it sticksmsr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults3:
#endif/* Populate ID registers. */mrs x0, midr_el1mrs x1, mpidr_el1msr vpidr_el2, x0msr vmpidr_el2, x1#ifdef CONFIG_COMPATmsr hstr_el2, xzr // Disable CP15 traps to EL2
#endif/* EL2 debug */mrs x1, id_aa64dfr0_el1sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4cmp x0, #1b.lt 4f // Skip if no PMU presentmrs x0, pmcr_el0 // Disable debug access trapsubfx x0, x0, #11, #5 // to EL2 and allow access to
4:csel x3, xzr, x0, lt // all PMU counters from EL1/* Statistical profiling */ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4cbz x0, 7f // Skip if SPE not presentcbnz x2, 6f // VHE?mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)cbnz x4, 5f // then permit sampling of physicalmov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \1 << SYS_PMSCR_EL2_PA_SHIFT)msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)orr x3, x3, x1 // If we don't have VHE, thenb 7f // use EL1&0 translation.
6: // For VHE, use EL2 translationorr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
7:msr mdcr_el2, x3 // Configure debug traps/* LORegions */mrs x1, id_aa64mmfr1_el1ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4cbz x0, 1fmsr_s SYS_LORC_EL1, xzr
1:/* Stage-2 translation */msr vttbr_el2, xzrcbz x2, install_el2_stubmov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2isbretSYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)/** When VHE is not in use, early init of EL2 and EL1 needs to be* done here.* When VHE _is_ in use, EL1 will not be used in the host and* requires no configuration, and all non-hyp-specific EL2 setup* will be done via the _EL1 system register aliases in __cpu_setup.*/mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr sctlr_el1, x0/* Coprocessor traps. */mov x0, #0x33ffmsr cptr_el2, x0 // Disable copro. traps to EL2/* SVE register access */mrs x1, id_aa64pfr0_el1ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4cbz x1, 7fbic x0, x0, #CPTR_EL2_TZ // Also disable SVE trapsmsr cptr_el2, x0 // Disable copro. traps to EL2isbmov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vectormsr_s SYS_ZCR_EL2, x1 // length for EL1./* Hypervisor stub */
7: adr_l x0, __hyp_stub_vectorsmsr vbar_el2, x0/* spsr */mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\PSR_MODE_EL1h)msr spsr_el2, x0msr elr_el2, lrmov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2eret
SYM_FUNC_END(el2_setup)
该汇编函数,用w0寄存器保存返回值,返回值有两种:
BOOT_CPU_MODE_EL1:表示当前CPU跳入内核时处于权限级EL1
BOOT_CPU_MODE_EL2:表示当前CPU跳入内核时处于权限级EL2
该汇编函数中的状态寄存器访问指令:
MRS: 状态寄存器到通用寄存器的传送指令。
MRS:({R0-R12}⬅CPSR,SPSR)
MSR: 通用寄存器到状态寄存器的传送指令。
MSR:(CPSR,SPSR⬅{R0-R12})
选择栈
msr SPsel, #1
由于当前异常级别可能是EL1也可能是EL2,因此msr SPsel, #1切换到当前异常级别的SP,如果当前为EL1,则切换到SPEL1,如果当前为EL2,则切换到SPEL2
判断当前的异常级别
mrs x0, CurrentELcmp x0, #CurrentEL_EL2b.ne 1f
CurrentEL就是获取PSTATE中current exception level域的特殊寄存器
#define CurrentEL_EL2 (2 << 2)
判断当前是否处于EL2 ,当cpu不处于EL2时跳转到标号1
设置EL2端模式
1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)msr sctlr_el2, x0
设置EL2下为大端存储
设置EL0/EL1的端模式
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr sctlr_el1, x0mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1isbret
如果处于EL1级别,设置EL0和EL1下为大端存储,将启动时的异常级别EL1保存到w0
VHE支持
#ifdef CONFIG_ARM64_VHE/** Check for VHE being present. For the rest of the EL2 setup,* x2 being non-zero indicates that we do have VHE, and that the* kernel is intended to run at EL2.*/mrs x2, id_aa64mmfr1_el1ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#elsemov x2, xzr
#endif
通过读取id_aa64mmfr1_el1,存入X2, 判断CPU是支持虚拟主机扩展VHE模式,还是传统的分离Hyp模式。
vhe的全称是Virtualization Host Extension support。是armv8.1的新特性,其最终要就是支持type-2的hypervisors 这种扩展让kernel直接跑在el2上,这样可以减少host和guest之间share的寄存器,并减少overhead of virtualization 具体实现如下面的patch:https://lwn.net/Articles/650524/
Hyp configuration
/* Hyp configuration. */mov_q x0, HCR_HOST_NVHE_FLAGScbz x2, set_hcrmov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:msr hcr_el2, x0isb
当寄存器x2,0表示Hyp模式;非0表示VHE模式
HCR_HOST_NVHE_FLAGS宏定义如下:
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
HCR_HOST_NVHE_FLAGS宏定义如下:
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
access physical timer and counter
/** Allow Non-secure EL1 and EL0 to access physical timer and counter.* This is not necessary for VHE, since the host kernel runs in EL2,* and EL0 accesses are configured in the later stage of boot process.* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in* EL2.*/cbnz x2, 1fmrs x0, cnthctl_el2orr x0, x0, #3 // Enable EL1 physical timersmsr cnthctl_el2, x0
1:msr cntvoff_el2, xzr // Clear virtual offset
Hyp模式, 启动计数器 ,设定Non-secure EL1 and EL0可访问物理定时器和计数器,虚拟偏移量清零; VHE模式只需要清零偏移量
GIC设置
/* GICv3 system register access */mrs x0, id_aa64pfr0_el1ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4cbz x0, 3f
ID_AA64PFR0_EL1的bit24~27如果为0表示不支持GIC V3, 为1表示支持GICV3
mrs_s x0, SYS_ICC_SRE_EL2orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1msr_s SYS_ICC_SRE_EL2, x0isb // Make sure SRE is now setmrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,tbz x0, #0, 3f // and check that it sticksmsr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to
ICC_SRE_EL2_ENABLE表示Non-secure EL1 accesses to ICC_SRE_EL1 are permitted if EL3 is not present or ICC_SRE_EL3.Enable is 1, otherwise Non-secure EL1 accesses to ICC_SRE_EL1 trap to EL3
如果是GICv3,设置SYS_ICC_SRE_EL2、SYS_ICH_HCR_EL2寄存器,这两个寄存器是GICv3的CPU接口寄存器。设置完毕后需要重新读取来确认,如果设置不成功则跳转到3f
Populate ID registers
mrs x0, midr_el1mrs x1, mpidr_el1msr vpidr_el2, x0msr vmpidr_el2, x1
根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
hstr_el2清零
msr hstr_el2, xzr
将Hypervisor系统陷入寄存器HSTR_EL2清零。一般情况下,当客户虚拟机是AArch32位,会有Thumb和协处理器方式,不希望在访问相关寄存器陷入到EL2中
EL2 debug
mrs x1, id_aa64dfr0_el1sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4cmp x0, #1b.lt 4f // Skip if no PMU presentmrs x0, pmcr_el0 // Disable debug access trapsubfx x0, x0, #11, #5 // to EL2 and allow access to
4:csel x3, xzr, x0, lt // all PMU counters from EL1
通过id_aa64dfr0_el1寄存器的pmu version来判断是否支持pmu
如果pmu version小于1则不支持PMU,直接将x3清零;
否则表示支持pmu,则从pmcr_el0获取事件的数量赋值给x3,也就是说x3用于保存事件数量
Statistical profiling
/* Statistical profiling */ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4cbz x0, 7f // Skip if SPE not presentcbnz x2, 6f // VHE?mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)cbnz x4, 5f // then permit sampling of physicalmov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \1 << SYS_PMSCR_EL2_PA_SHIFT)msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)orr x3, x3, x1 // If we don't have VHE, thenb 7f // use EL1&0 translation.
6: // For VHE, use EL2 translationorr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
7:msr mdcr_el2, x3 // Configure debug traps
判断PMS是否支持
LORegions
mrs x1, id_aa64mmfr1_el1ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4cbz x0, 1fmsr_s SYS_LORC_EL1, xzr
Stage-2 translation
msr vttbr_el2, xzrcbz x2, install_el2_stubmov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2isbret
如上VHE和HYP模式将分别走不同的分支
如果是VHE模式,直接返回w0为BOOT_CPU_MODE_EL2。由于进入内核时,CPU处于EL2?,所以直接调用ret指令返回,CPU仍然是EL2 install_el2_stub是Hyp模式相关设置和返回代码
install_el2_stub
SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) /** When VHE is not in use, early init of EL2 and EL1 needs to be * done here.* When VHE _is_ in use, EL1 will not be used in the host and* requires no configuration, and all non-hyp-specific EL2 setup* will be done via the _EL1 system register aliases in __cpu_setup. */mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ mov x0, #0x33ffmsr cptr_el2, x0 // Disable copro. traps to EL2 /* SVE register access */mrs x1, id_aa64pfr0_el1ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 cbz x1, 7f bic x0, x0, #CPTR_EL2_TZ // Also disable SVE trapsmsr cptr_el2, x0 // Disable copro. traps to EL2 isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector msr_s SYS_ZCR_EL2, x1 // length for EL1. /* Hypervisor stub */
7: adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 /* spsr */mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lrmov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret
当非VHE模式下,用于EL1和EL2的早期初始化
总结
el2_setup根据当前CPU处于EL1还是EL2主要做了如下的工作:
1、设置当前异常级别的栈
2、判断当前的异常级别,并据此设置端模式;
3、如果支持VHE,设置VHE,否则设置传统HYP模式
4、启动定时器
5、GIC设置
6、根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
7、hstr_el2清零
8、EL2 debug相关寄存器设置