arch/x86/cpu/sipi_vector.S - u-boot-mtk - Git at Google

 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2015 Google, Inc
  *
  * Taken from coreboot file of the same name
  */

 /*
  * The SIPI vector is responsible for initializing the APs in the sytem. It
  * loads microcode, sets up MSRs, and enables caching before calling into
  * C code
  */

 #include <asm/global_data.h>
 #include <asm/msr-index.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
 #include <asm/sipi.h>

 #define CODE_SEG	(X86_GDT_ENTRY_32BIT_CS * X86_GDT_ENTRY_SIZE)
 #define DATA_SEG	(X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE)

 /*
  * First we have the 16-bit section. Every AP process starts here.
  * The simple task is to load U-Boot's Global Descriptor Table (GDT) to allow
  * U-Boot's 32-bit code to become visible, then jump to ap_start.
  *
  * Note that this code is copied to RAM below 1MB in mp_init.c, and runs from
  * there, but the 32-bit code (ap_start and onwards) is part of U-Boot and
  * is therefore relocated to the top of RAM with other U-Boot code. This
  * means that for the 16-bit code we must write relocatable code, but for the
  * rest, we can do what we like.
  */
 .text
 .code16
 .globl ap_start16
 ap_start16:
 	cli
 	xorl	%eax, %eax
 	movl	%eax, %cr3		/* Invalidate TLB */

 	/* setup the data segment */
 	movw	%cs, %ax
 	movw	%ax, %ds

 	/* Use an address relative to the data segment for the GDT */
 	movl	$gdtaddr, %ebx
 	subl	$ap_start16, %ebx

 	data32 lgdt (%ebx)

 	movl	%cr0, %eax
 	andl	$(~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_NE | \
 		    X86_CR0_TS | X86_CR0_EM | X86_CR0_MP)), %eax
 	orl	$(X86_CR0_NW | X86_CR0_CD | X86_CR0_PE), %eax
 	movl	%eax, %cr0

 	movl	$ap_start_jmp, %eax
 	subl	$ap_start16, %eax
 	movw	%ax, %bp

 	/* Jump to ap_start within U-Boot */
 data32 cs	ljmp	*(%bp)

 	.align	4
 .globl sipi_params_16bit
 sipi_params_16bit:
 	/* 48-bit far pointer */
 ap_start_jmp:
 	.long	0		/* offset set to ap_start by U-Boot */
 	.word	CODE_SEG	/* segment */

 	.word	0		/* padding */
 gdtaddr:
 	.word	0 /* limit */
 	.long	0 /* table */
 	.word	0 /* unused */

 .globl ap_start16_code_end
 ap_start16_code_end:

 /*
  * Set up the special 'fs' segment for global_data. Then jump to ap_continue
  * to set up the AP.
  */
 .globl ap_start
 ap_start:
 	.code32
 	movw	$DATA_SEG, %ax
 	movw	%ax, %ds
 	movw	%ax, %es
 	movw	%ax, %ss
 	movw	%ax, %gs

 	movw	$(X86_GDT_ENTRY_32BIT_FS * X86_GDT_ENTRY_SIZE), %ax
 	movw	%ax, %fs

 	/* Load the Interrupt descriptor table */
 	mov	idt_ptr, %ebx
 	lidt	(%ebx)

 	/* Obtain cpu number */
 	movl	ap_count, %eax
 1:
 	movl	%eax, %ecx
 	inc	%ecx
 	lock cmpxchg %ecx, ap_count
 	jnz	1b

 	/* Setup stacks for each CPU */
 	movl	stack_size, %eax
 	mul	%ecx
 	movl	stack_top, %edx
 	subl	%eax, %edx
 	mov	%edx, %esp
 	/* Save cpu number */
 	mov	%ecx, %esi

 	/* Determine if one should check microcode versions */
 	mov	microcode_ptr, %edi
 	test	%edi, %edi
 	jz	microcode_done /* Bypass if no microde exists */

 	/* Get the Microcode version */
 	mov	$1, %eax
 	cpuid
 	mov	$MSR_IA32_UCODE_REV, %ecx
 	rdmsr
 	/* If something already loaded skip loading again */
 	test	%edx, %edx
 	jnz	microcode_done

 	/* Determine if parallel microcode loading is allowed */
 	cmp	$0xffffffff, microcode_lock
 	je	load_microcode

 	/* Protect microcode loading */
 lock_microcode:
 	lock bts $0, microcode_lock
 	jc	lock_microcode

 load_microcode:
 	/* Load new microcode */
 	mov	$MSR_IA32_UCODE_WRITE, %ecx
 	xor	%edx, %edx
 	mov	%edi, %eax
 	/*
 	 * The microcode pointer is passed in pointing to the header. Adjust
 	 * pointer to reflect the payload (header size is 48 bytes)
 	 */
 	add	$UCODE_HEADER_LEN, %eax
 	pusha
 	wrmsr
 	popa

 	/* Unconditionally unlock microcode loading */
 	cmp	$0xffffffff, microcode_lock
 	je	microcode_done

 	xor	%eax, %eax
 	mov	%eax, microcode_lock

 microcode_done:
 	/*
 	 * Load MSRs. Each entry in the table consists of:
 	 * 0: index,
 	 * 4: value[31:0]
 	 * 8: value[63:32]
 	 * See struct saved_msr in mp_init.c.
 	 */
 	mov	msr_table_ptr, %edi
 	mov	msr_count, %ebx
 	test	%ebx, %ebx
 	jz	1f
 load_msr:
 	mov	(%edi), %ecx
 	mov	4(%edi), %eax
 	mov	8(%edi), %edx
 	wrmsr
 	add	$12, %edi
 	dec	%ebx
 	jnz	load_msr

 1:
 	/* Enable caching */
 	mov	%cr0, %eax
 	andl	$(~(X86_CR0_CD | X86_CR0_NW)), %eax
 	mov	%eax, %cr0

 	/* c_handler(cpu_num) */
 	movl	%esi, %eax	/* cpu_num */
 	mov	c_handler, %esi
 	call	*%esi

 	/* This matches struct sipi_param */
 	.align	4
 .globl	sipi_params
 sipi_params:
 idt_ptr:
 	.long 0
 stack_top:
 	.long 0
 stack_size:
 	.long 0
 microcode_lock:
 	.long 0
 microcode_ptr:
 	.long 0
 msr_table_ptr:
 	.long 0
 msr_count:
 	.long 0
 c_handler:
 	.long 0
 ap_count:
 	.long 0
	/* SPDX-License-Identifier: GPL-2.0 */
	/*
	* Copyright (c) 2015 Google, Inc
	*
	* Taken from coreboot file of the same name
	*/

	/*
	* The SIPI vector is responsible for initializing the APs in the sytem. It
	* loads microcode, sets up MSRs, and enables caching before calling into
	* C code
	*/

	#include <asm/global_data.h>
	#include <asm/msr-index.h>
	#include <asm/processor.h>
	#include <asm/processor-flags.h>
	#include <asm/sipi.h>

	#define CODE_SEG (X86_GDT_ENTRY_32BIT_CS * X86_GDT_ENTRY_SIZE)
	#define DATA_SEG (X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE)

	/*
	* First we have the 16-bit section. Every AP process starts here.
	* The simple task is to load U-Boot's Global Descriptor Table (GDT) to allow
	* U-Boot's 32-bit code to become visible, then jump to ap_start.
	*
	* Note that this code is copied to RAM below 1MB in mp_init.c, and runs from
	* there, but the 32-bit code (ap_start and onwards) is part of U-Boot and
	* is therefore relocated to the top of RAM with other U-Boot code. This
	* means that for the 16-bit code we must write relocatable code, but for the
	* rest, we can do what we like.
	*/
	.text
	.code16
	.globl ap_start16
	ap_start16:
	cli
	xorl %eax, %eax
	movl %eax, %cr3 /* Invalidate TLB */

	/* setup the data segment */
	movw %cs, %ax
	movw %ax, %ds

	/* Use an address relative to the data segment for the GDT */
	movl $gdtaddr, %ebx
	subl $ap_start16, %ebx

	data32 lgdt (%ebx)

	movl %cr0, %eax
	andl $(~(X86_CR0_PG \| X86_CR0_AM \| X86_CR0_WP \| X86_CR0_NE \| \
	X86_CR0_TS \| X86_CR0_EM \| X86_CR0_MP)), %eax
	orl $(X86_CR0_NW \| X86_CR0_CD \| X86_CR0_PE), %eax
	movl %eax, %cr0

	movl $ap_start_jmp, %eax
	subl $ap_start16, %eax
	movw %ax, %bp

	/* Jump to ap_start within U-Boot */
	data32 cs ljmp *(%bp)

	.align 4
	.globl sipi_params_16bit
	sipi_params_16bit:
	/* 48-bit far pointer */
	ap_start_jmp:
	.long 0 /* offset set to ap_start by U-Boot */
	.word CODE_SEG /* segment */

	.word 0 /* padding */
	gdtaddr:
	.word 0 /* limit */
	.long 0 /* table */
	.word 0 /* unused */

	.globl ap_start16_code_end
	ap_start16_code_end:

	/*
	* Set up the special 'fs' segment for global_data. Then jump to ap_continue
	* to set up the AP.
	*/
	.globl ap_start
	ap_start:
	.code32
	movw $DATA_SEG, %ax
	movw %ax, %ds
	movw %ax, %es
	movw %ax, %ss
	movw %ax, %gs

	movw $(X86_GDT_ENTRY_32BIT_FS * X86_GDT_ENTRY_SIZE), %ax
	movw %ax, %fs

	/* Load the Interrupt descriptor table */
	mov idt_ptr, %ebx
	lidt (%ebx)

	/* Obtain cpu number */
	movl ap_count, %eax
	1:
	movl %eax, %ecx
	inc %ecx
	lock cmpxchg %ecx, ap_count
	jnz 1b

	/* Setup stacks for each CPU */
	movl stack_size, %eax
	mul %ecx
	movl stack_top, %edx
	subl %eax, %edx
	mov %edx, %esp
	/* Save cpu number */
	mov %ecx, %esi

	/* Determine if one should check microcode versions */
	mov microcode_ptr, %edi
	test %edi, %edi
	jz microcode_done /* Bypass if no microde exists */

	/* Get the Microcode version */
	mov $1, %eax
	cpuid
	mov $MSR_IA32_UCODE_REV, %ecx
	rdmsr
	/* If something already loaded skip loading again */
	test %edx, %edx
	jnz microcode_done

	/* Determine if parallel microcode loading is allowed */
	cmp $0xffffffff, microcode_lock
	je load_microcode

	/* Protect microcode loading */
	lock_microcode:
	lock bts $0, microcode_lock
	jc lock_microcode

	load_microcode:
	/* Load new microcode */
	mov $MSR_IA32_UCODE_WRITE, %ecx
	xor %edx, %edx
	mov %edi, %eax
	/*
	* The microcode pointer is passed in pointing to the header. Adjust
	* pointer to reflect the payload (header size is 48 bytes)
	*/
	add $UCODE_HEADER_LEN, %eax
	pusha
	wrmsr
	popa

	/* Unconditionally unlock microcode loading */
	cmp $0xffffffff, microcode_lock
	je microcode_done

	xor %eax, %eax
	mov %eax, microcode_lock

	microcode_done:
	/*
	* Load MSRs. Each entry in the table consists of:
	* 0: index,
	* 4: value[31:0]
	* 8: value[63:32]
	* See struct saved_msr in mp_init.c.
	*/
	mov msr_table_ptr, %edi
	mov msr_count, %ebx
	test %ebx, %ebx
	jz 1f
	load_msr:
	mov (%edi), %ecx
	mov 4(%edi), %eax
	mov 8(%edi), %edx
	wrmsr
	add $12, %edi
	dec %ebx
	jnz load_msr

	1:
	/* Enable caching */
	mov %cr0, %eax
	andl $(~(X86_CR0_CD \| X86_CR0_NW)), %eax
	mov %eax, %cr0

	/* c_handler(cpu_num) */
	movl %esi, %eax /* cpu_num */
	mov c_handler, %esi
	call *%esi

	/* This matches struct sipi_param */
	.align 4
	.globl sipi_params
	sipi_params:
	idt_ptr:
	.long 0
	stack_top:
	.long 0
	stack_size:
	.long 0
	microcode_lock:
	.long 0
	microcode_ptr:
	.long 0
	msr_table_ptr:
	.long 0
	msr_count:
	.long 0
	c_handler:
	.long 0
	ap_count:
	.long 0