kernel版本:5.10.129
进入内核的时候,arm64处理器的异常级别可能是1或者2,函数el2_setup的主要工作如下:
1、如果异常级别是1,那么在异常级别1执行内核
2、如果异常级别是2, 那么根据处理器是否支持虚拟化宿主扩展(Virtuallization Host Extensions,VHE),决定是否需要降级到异常级别

具体实现代码如下:

/** If we're fortunate enough to boot at EL2, ensure that the world is* sane before dropping to EL1.** Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if* booted in EL1 or EL2 respectively.*/
SYM_FUNC_START(el2_setup)msr	SPsel, #1			// We want to use SP_EL{1,2}mrs	x0, CurrentELcmp	x0, #CurrentEL_EL2b.eq	1fmov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr	sctlr_el1, x0mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1isbret1:	mov_q	x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)msr	sctlr_el2, x0#ifdef CONFIG_ARM64_VHE/** Check for VHE being present. For the rest of the EL2 setup,* x2 being non-zero indicates that we do have VHE, and that the* kernel is intended to run at EL2.*/mrs	x2, id_aa64mmfr1_el1ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#elsemov	x2, xzr
#endif/* Hyp configuration. */mov_q	x0, HCR_HOST_NVHE_FLAGScbz	x2, set_hcrmov_q	x0, HCR_HOST_VHE_FLAGS
set_hcr:msr	hcr_el2, x0isb/** Allow Non-secure EL1 and EL0 to access physical timer and counter.* This is not necessary for VHE, since the host kernel runs in EL2,* and EL0 accesses are configured in the later stage of boot process.* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in* EL2.*/cbnz	x2, 1fmrs	x0, cnthctl_el2orr	x0, x0, #3			// Enable EL1 physical timersmsr	cnthctl_el2, x0
1:msr	cntvoff_el2, xzr		// Clear virtual offset#ifdef CONFIG_ARM_GIC_V3/* GICv3 system register access */mrs	x0, id_aa64pfr0_el1ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4cbz	x0, 3fmrs_s	x0, SYS_ICC_SRE_EL2orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1msr_s	SYS_ICC_SRE_EL2, x0isb					// Make sure SRE is now setmrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,tbz	x0, #0, 3f			// and check that it sticksmsr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults3:
#endif/* Populate ID registers. */mrs	x0, midr_el1mrs	x1, mpidr_el1msr	vpidr_el2, x0msr	vmpidr_el2, x1#ifdef CONFIG_COMPATmsr	hstr_el2, xzr			// Disable CP15 traps to EL2
#endif/* EL2 debug */mrs	x1, id_aa64dfr0_el1sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4cmp	x0, #1b.lt	4f				// Skip if no PMU presentmrs	x0, pmcr_el0			// Disable debug access trapsubfx	x0, x0, #11, #5			// to EL2 and allow access to
4:csel	x3, xzr, x0, lt			// all PMU counters from EL1/* Statistical profiling */ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4cbz	x0, 7f				// Skip if SPE not presentcbnz	x2, 6f				// VHE?mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,and	x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)cbnz	x4, 5f				// then permit sampling of physicalmov	x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \1 << SYS_PMSCR_EL2_PA_SHIFT)msr_s	SYS_PMSCR_EL2, x4		// addresses and physical counter
5:mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)orr	x3, x3, x1			// If we don't have VHE, thenb	7f				// use EL1&0 translation.
6:						// For VHE, use EL2 translationorr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
7:msr	mdcr_el2, x3			// Configure debug traps/* LORegions */mrs	x1, id_aa64mmfr1_el1ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4cbz	x0, 1fmsr_s	SYS_LORC_EL1, xzr
1:/* Stage-2 translation */msr	vttbr_el2, xzrcbz	x2, install_el2_stubmov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2isbretSYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)/** When VHE is not in use, early init of EL2 and EL1 needs to be* done here.* When VHE _is_ in use, EL1 will not be used in the host and* requires no configuration, and all non-hyp-specific EL2 setup* will be done via the _EL1 system register aliases in __cpu_setup.*/mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr	sctlr_el1, x0/* Coprocessor traps. */mov	x0, #0x33ffmsr	cptr_el2, x0			// Disable copro. traps to EL2/* SVE register access */mrs	x1, id_aa64pfr0_el1ubfx	x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4cbz	x1, 7fbic	x0, x0, #CPTR_EL2_TZ		// Also disable SVE trapsmsr	cptr_el2, x0			// Disable copro. traps to EL2isbmov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vectormsr_s	SYS_ZCR_EL2, x1			// length for EL1./* Hypervisor stub */
7:	adr_l	x0, __hyp_stub_vectorsmsr	vbar_el2, x0/* spsr */mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\PSR_MODE_EL1h)msr	spsr_el2, x0msr	elr_el2, lrmov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2eret
SYM_FUNC_END(el2_setup)

该汇编函数,用w0寄存器保存返回值,返回值有两种:

BOOT_CPU_MODE_EL1:表示当前CPU跳入内核时处于权限级EL1

BOOT_CPU_MODE_EL2:表示当前CPU跳入内核时处于权限级EL2

该汇编函数中的状态寄存器访问指令:
MRS: 状态寄存器到通用寄存器的传送指令。
MRS:({R0-R12}⬅CPSR,SPSR)
MSR: 通用寄存器到状态寄存器的传送指令。
MSR:(CPSR,SPSR⬅{R0-R12})

选择栈

	msr	SPsel, #1

由于当前异常级别可能是EL1也可能是EL2,因此msr SPsel, #1切换到当前异常级别的SP,如果当前为EL1,则切换到SPEL1,如果当前为EL2,则切换到SPEL2

判断当前的异常级别

	mrs	x0, CurrentELcmp	x0, #CurrentEL_EL2b.ne    1f

CurrentEL就是获取PSTATE中current exception level域的特殊寄存器
#define CurrentEL_EL2 (2 << 2)
判断当前是否处于EL2 ,当cpu不处于EL2时跳转到标号1

设置EL2端模式

	1:      mov_q   x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)msr     sctlr_el2, x0

设置EL2下为大端存储

设置EL0/EL1的端模式

	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)msr	sctlr_el1, x0mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1isbret

如果处于EL1级别,设置EL0和EL1下为大端存储,将启动时的异常级别EL1保存到w0

VHE支持

#ifdef CONFIG_ARM64_VHE/** Check for VHE being present. For the rest of the EL2 setup,* x2 being non-zero indicates that we do have VHE, and that the* kernel is intended to run at EL2.*/mrs	x2, id_aa64mmfr1_el1ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#elsemov	x2, xzr
#endif

通过读取id_aa64mmfr1_el1,存入X2, 判断CPU是支持虚拟主机扩展VHE模式,还是传统的分离Hyp模式。

vhe的全称是Virtualization Host Extension support。是armv8.1的新特性,其最终要就是支持type-2的hypervisors 这种扩展让kernel直接跑在el2上,这样可以减少host和guest之间share的寄存器,并减少overhead of virtualization 具体实现如下面的patch:https://lwn.net/Articles/650524/

Hyp configuration

	/* Hyp configuration. */mov_q	x0, HCR_HOST_NVHE_FLAGScbz	x2, set_hcrmov_q	x0, HCR_HOST_VHE_FLAGS
set_hcr:msr	hcr_el2, x0isb

当寄存器x2,0表示Hyp模式;非0表示VHE模式
HCR_HOST_NVHE_FLAGS宏定义如下:

#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)

HCR_HOST_NVHE_FLAGS宏定义如下:

#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)

access physical timer and counter

	/** Allow Non-secure EL1 and EL0 to access physical timer and counter.* This is not necessary for VHE, since the host kernel runs in EL2,* and EL0 accesses are configured in the later stage of boot process.* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in* EL2.*/cbnz	x2, 1fmrs	x0, cnthctl_el2orr	x0, x0, #3			// Enable EL1 physical timersmsr	cnthctl_el2, x0
1:msr	cntvoff_el2, xzr		// Clear virtual offset

Hyp模式, 启动计数器 ,设定Non-secure EL1 and EL0可访问物理定时器和计数器,虚拟偏移量清零; VHE模式只需要清零偏移量

GIC设置

	/* GICv3 system register access */mrs	x0, id_aa64pfr0_el1ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4cbz	x0, 3f

ID_AA64PFR0_EL1的bit24~27如果为0表示不支持GIC V3, 为1表示支持GICV3

mrs_s	x0, SYS_ICC_SRE_EL2orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1msr_s	SYS_ICC_SRE_EL2, x0isb					// Make sure SRE is now setmrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,tbz	x0, #0, 3f			// and check that it sticksmsr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to 

ICC_SRE_EL2_ENABLE表示Non-secure EL1 accesses to ICC_SRE_EL1 are permitted if EL3 is not present or ICC_SRE_EL3.Enable is 1, otherwise Non-secure EL1 accesses to ICC_SRE_EL1 trap to EL3

如果是GICv3,设置SYS_ICC_SRE_EL2、SYS_ICH_HCR_EL2寄存器,这两个寄存器是GICv3的CPU接口寄存器。设置完毕后需要重新读取来确认,如果设置不成功则跳转到3f

Populate ID registers

	mrs	x0, midr_el1mrs	x1, mpidr_el1msr	vpidr_el2, x0msr	vmpidr_el2, x1

根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器

hstr_el2清零

	msr	hstr_el2, xzr

将Hypervisor系统陷入寄存器HSTR_EL2清零。一般情况下,当客户虚拟机是AArch32位,会有Thumb和协处理器方式,不希望在访问相关寄存器陷入到EL2中

EL2 debug

	mrs	x1, id_aa64dfr0_el1sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4cmp	x0, #1b.lt	4f				// Skip if no PMU presentmrs	x0, pmcr_el0			// Disable debug access trapsubfx	x0, x0, #11, #5			// to EL2 and allow access to
4:csel	x3, xzr, x0, lt			// all PMU counters from EL1

通过id_aa64dfr0_el1寄存器的pmu version来判断是否支持pmu
如果pmu version小于1则不支持PMU,直接将x3清零;
否则表示支持pmu,则从pmcr_el0获取事件的数量赋值给x3,也就是说x3用于保存事件数量

Statistical profiling

	/* Statistical profiling */ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4cbz	x0, 7f				// Skip if SPE not presentcbnz	x2, 6f				// VHE?mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,and	x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)cbnz	x4, 5f				// then permit sampling of physicalmov	x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \1 << SYS_PMSCR_EL2_PA_SHIFT)msr_s	SYS_PMSCR_EL2, x4		// addresses and physical counter
5:mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)orr	x3, x3, x1			// If we don't have VHE, thenb	7f				// use EL1&0 translation.
6:						// For VHE, use EL2 translationorr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
7:msr	mdcr_el2, x3			// Configure debug traps

判断PMS是否支持

LORegions

	mrs	x1, id_aa64mmfr1_el1ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4cbz	x0, 1fmsr_s	SYS_LORC_EL1, xzr

Stage-2 translation

	msr	vttbr_el2, xzrcbz	x2, install_el2_stubmov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2isbret

如上VHE和HYP模式将分别走不同的分支

如果是VHE模式,直接返回w0为BOOT_CPU_MODE_EL2。由于进入内核时,CPU处于EL2?,所以直接调用ret指令返回,CPU仍然是EL2 install_el2_stub是Hyp模式相关设置和返回代码

install_el2_stub


SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)                                                                                                   /** When VHE is not in use, early init of EL2 and EL1 needs to be                                                                         * done here.* When VHE _is_ in use, EL1 will not be used in the host and* requires no configuration, and all non-hyp-specific EL2 setup* will be done via the _EL1 system register aliases in __cpu_setup.                                                                     */mov_q   x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)                                                                                            msr     sctlr_el1, x0                                                                                                                    /* Coprocessor traps. */                                                                                                                 mov     x0, #0x33ffmsr     cptr_el2, x0                    // Disable copro. traps to EL2                                                                   /* SVE register access */mrs     x1, id_aa64pfr0_el1ubfx    x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4                                                                                               cbz     x1, 7f                                                                                                                           bic     x0, x0, #CPTR_EL2_TZ            // Also disable SVE trapsmsr     cptr_el2, x0                    // Disable copro. traps to EL2                                                                   isb     mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector                                                                       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.                                                                               /* Hypervisor stub */
7:      adr_l   x0, __hyp_stub_vectors                                                                                                           msr     vbar_el2, x0                                                                                                                     /* spsr */mov     x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\                                                                           PSR_MODE_EL1h)                                                                                                             msr     spsr_el2, x0                                                                                                                     msr     elr_el2, lrmov     w0, #BOOT_CPU_MODE_EL2          // This CPU booted in EL2                                                                        eret

当非VHE模式下,用于EL1和EL2的早期初始化

总结
el2_setup根据当前CPU处于EL1还是EL2主要做了如下的工作:

1、设置当前异常级别的栈
2、判断当前的异常级别,并据此设置端模式;
3、如果支持VHE,设置VHE,否则设置传统HYP模式
4、启动定时器
5、GIC设置
6、根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
7、hstr_el2清零
8、EL2 debug相关寄存器设置