|
楼主 |
发表于 2003-11-20 21:42:42
|
显示全部楼层
head.S源码分析:
首先,head.S是与内核保护模式其它代码一起链接的,代码都会被定位到虚拟地址
0xC0000000 + 0x100000,从而里边的符号的地址在链接时会被定位到0xC0100000以上
的地址(就是它们的值是大于等于0xC0100000的数),比如在head.S中的以下标号:
mmu_cr4_features:在我的内核里是c0379dc4(各人编译后的结果可能不同,但都在
0xC0100000以上);
pg0:c0102000,这个值在同个版本下就相同了,下面会解释。
为什么会这样呢:
看arch/i386/Makefile(它被主Makefile通过include指令包含进去了,看前面解释)有:
LINKFLAGS =-T $(TOPDIR)/arch/i386/vmlinux.lds $(LDFLAGS)
主Makefile:
vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs
$(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \
...........
所以vmlinux链接时要用到这个LINKFLAGS作为链接选项。
-T选项告诉ld要用arch/i386/vmlinux.lds(lds大概就是ld script的简写吧)做为链接的依据。
-T commandfile
--script=commandfile
Read link commands from the file commandfile. These commands replace ld's default
link script (rather than adding to it)/*从ld-2.9.1manual引用的*/
所以vmlinux.lds是个很重要的东东,很多令人迷惑的地方都可以在里边找到解释。
vmlinux.lds里比较前的地方有这样一句:. = 0xC0000000 + 0x100000;
这就告诉ld把组成vmlinux的.o文件都重定位到0xC0100000开始处,所以里边用到的符号值也会
从原值(在.o里的值)加上0xC0100000变成大于等于0xC0100000的值。
你可以看看System.map里边的内容(是个纯文本文件),里边记录了保护模式内核符号的值及
其对应的虚拟地址值,搜索一下就能看到mmu_cr4_features和pg0的值。
记住,进入head.S时cpu已经工作在保护模式了(由setup.s同过movw $1, %ax lmsw %ax启动)
但保护模式也可以不分页,这时也的确还没分页,通过段基址+偏移所得到的线性地址马上作为
物理地址用。跳到head.S的第一跳指令时cs=0x10,通过索引gdt得到基址是0,这时eip=0x100000,
加起来线性地址=物理地址=0x100000。
有了这些基础,我们可以开始一行行分析head.S了(有汉字就是我的注释):
/*
* linux/arch/i386/kernel/head.S -- the 32-bit startup code.
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Enhanced CPU detection and feature setting code by Mike Jagdis
* and Martin Mares, November 1997.
*/
.text
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>/*在原文件解压后在include目录下并没有asm这个目录,由config时
建一个link到include/asm-i386.*/
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
#define OLD_CL_BASE_ADDR 0x90000
#define OLD_CL_OFFSET 0x90022
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
/*
* References to members of the boot_cpu_data structure.
*/
#define CPU_PARAMS SYMBOL_NAME(boot_cpu_data)
#define X86 CPU_PARAMS+0
#define X86_VENDOR CPU_PARAMS+1
#define X86_MODEL CPU_PARAMS+2
#define X86_MASK CPU_PARAMS+3
#define X86_HARD_MATH CPU_PARAMS+6
#define X86_CPUID CPU_PARAMS+8
#define X86_CAPABILITY CPU_PARAMS+12
#define X86_VENDOR_ID CPU_PARAMS+28
/*
* swapper_pg_dir is the main page directory, address 0x00101000
*
* On entry, %esi points to the real-mode code as a 32-bit pointer.
*/
startup_32:
/*
* Set segments to known values
*/
cld
movl $(__KERNEL_DS),%eax /*__KERNEL_DS=0x18,索引到那个setup.s里的临时
gdt table,结果基址也为0*/
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
#ifdef CONFIG_SMP
orw %bx,%bx
jz 1f
/*
* New page tables may be in 4Mbyte page mode and may
* be using the global pages.
*
* NOTE! If we are on a 486 we may have no cr4 at all!
* So we do not try to touch it unless we really have
* some bits in it to set. This won't work if the BSP
* implements cr4 but this AP does not -- very unlikely
* but be warned! The same applies to the pse feature
* if not equally supported. --macro
*
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
#ifdef CONFIG_SMP/*如果你要编译的是支持对称多处理机(多cpu)的内核,这个宏就会被定义
第一次看这些代码,建议把被该宏包住的东西都删了,只看如何处理单cpu*/
orw %bx,%bx/*如果是在主cpu被启动,从setup.s来的话,会把%bx弄成0,
否则,是在后来由其它代码(Trampoline.S)启动从cpu
初始化它,因为这时已分页,不用再初始化页表,只开启从cpu的
cr4 page options,然后把页表基址弄到cr3......(看下边的临时
标号1:前的东西及3:后的东西)*/
jz 1f
/*
* New page tables may be in 4Mbyte page mode and may
* be using the global pages.
*
* NOTE! If we are on a 486 we may have no cr4 at all!
* So we do not try to touch it unless we really have
* some bits in it to set. This won't work if the BSP
* implements cr4 but this AP does not -- very unlikely
* but be warned! The same applies to the pse feature
* if not equally supported. --macro
*
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
cmpl $0,cr4_bits
je 3f
movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
orl cr4_bits,%eax
movl %eax,%cr4
jmp 3f
1:
#endif
/*
* Initialize page tables
*/
movl $pg0-__PAGE_OFFSET,%edi /* initialize page tables
pg0=0xc0000000+0x100000+0x2000,__PAGE_OFFSET=0xC0000000,
减后给%edi=0x102000,这也就是pg0在内存中的“正确”线性地址
和物理地址*/
movl $007,%eax /* "007" doesn't mean with right to kill, but
PRESENT+RW+USER */
/*
页表项是这样的一个32位结构:
31 12 11 9 8 7 6 5 4 3 2 1 0
| 物理页基地址| |...........|u/s|r/w|p 所以007是指基地址为0最后3为置位。
通过循环下边4条语句,把一个管理8m内存的页表给建起来了。 结果为:
页表项物理地址 页表项的值 对应的物理空间
0x102000 0x007 0~4k
0x102004 0x1007 8~16k
.
.
.
0x104000 0x2000007 8m-4k~8m
*/
2: stosl
add $0x1000,%eax
cmp $empty_zero_page-__PAGE_OFFSET,%edi /*empty_zero_page-__PAGE_OFFSET=0x104000*/
jne 2b
/*
* Enable paging
*/
3:
movl $swapper_pg_dir-__PAGE_OFFSET,%eax /*%eax为0x102000*/
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $0x80000000,%eax/*置pg位*/
movl %eax,%cr0 /* ..and set paging (PG) bit
现在分页了,都在虚拟地址0xc0100000后运行,我们不用再把那些标号减去
__PAGE_OFFSET了,直接引用它们才对,想想虚拟地址怎么变成物理地址。。。。*/
jmp 1f /* flush the prefetch-queue */
1:
movl $1f,%eax/*原先eip=0x1000xx,要把它弄成0xc01000xx*/
jmp *%eax /* make sure eip is relocated 加个*号变成绝对跳*/
1:
/* Set up the stack pointer */
lss stack_start,%esp
#ifdef CONFIG_SMP
orw %bx,%bx /*从cpu不用cleans bss,%bx不为0,跳到checkcputype*/
jz 1f /* Initial CPU cleans BSS */
pushl $0
popfl
jmp checkCPUtype
1:
#endif CONFIG_SMP
/*
* Clear BSS first so that there are no surprises...
* No need to cld as DF is already clear from cld above...
*/
/*把bss对应的空间都通通清0.__bss_start _end都在vmlinux.lds里有,它们的值都
比较大(对应的虚拟地址和物理地址也就比较高),在所以的test data后边,看
System.map也可以看出来*/
xorl %eax,%eax
movl $ SYMBOL_NAME(__bss_start),%edi
movl $ SYMBOL_NAME(_end),%ecx
subl %edi,%ecx
rep
stosb
/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
call setup_idt
/*
* Initialize eflags eflags都变成0了. Some BIOS's leave bits like NT set. This would
* confuse the debugger if this code is traced.
* XXX - best to initialize before switching to protected mode.
*/
pushl $0
popfl
/*
* Copy bootup parameters out of the way. First 2kB of
* _empty_zero_page is for boot parameters, second 2kB
* is for the command line.
*
* Note: %esi still has the pointer to the real-mode data.
*/
/*现在要把bootsect和setup代码中的2k东西弄过来,因为里边由一些参数还有用,它们都在
前2k的地方*/
movl $ SYMBOL_NAME(empty_zero_page),%edi
movl $512,%ecx
cld
rep
movsl /*把512*4=2k的东西move过来*/
xorl %eax,%eax
movl $512,%ecx
rep
stosl /*2k清0*/
movl SYMBOL_NAME(empty_zero_page)+NEW_CL_POINTER,%esi
andl %esi,%esi /*非0说明是新protocol,%esi是command line地址*/
jnz 2f # New command line protocol
cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR/*这些是处理老引导协议的command line地址的,不管了*/
jne 1f
movzwl OLD_CL_OFFSET,%esi
addl $(OLD_CL_BASE_ADDR),%esi
2:
movl $ SYMBOL_NAME(empty_zero_page)+2048,%edi/*将新的2k command line弄到empty_zero_page+2048开始处*/
movl $512,%ecx
rep
movsl
1:
checkCPUtype:
/*Intel Architecture
Software Developer’s
Manual
Volume 3:
System Programming
第18.10.1小节介绍如何通过eflag的几个bit区分cpu386,486,pentium
我把它们摘录下来,自己理解吧
18.10.1. Using EFLAGS Flags to Distinguish Between 32-Bit Intel
Architecture Processors
The following bits in the EFLAGS register that can be used to differentiate between the 32-bit
Intel Architecture processors:
.Bit 18 (the AC flag) can be used to distinguish an Intel386?processor from the P6 family,
Pentium? and Intel486?processors. Since it is not implemented on the Intel386?
processor, it will always be clear.
.Bit 21 (the ID flag) indicates whether an application can execute the CPUID instruction.
The ability to set and clear this bit indicates that the processor is a P6 family or Pentium?
processor. The CPUID instruction can then be used to determine which processor.
.Bits 19 (the VIF flag) and 20 (the VIP flag) will always be zero on processors that do not
support virtual mode extensions, which includes all 32-bit processors prior to the Pentium?
processor.
Refer to Chapter 10, Processor Identification and Feature Determination, in the Intel Architecture
Software Developer's Manual, Volume 1, for more information on identifying processors.
*/
movl $-1,X86_CPUID # -1 for no CPUID initially
/* check if it is 486 or 386. */
/*
* XXX - this does a lot of unnecessary setup. Alignment checks don't
* apply at our cpl of 0 and the stack ought to be aligned already, and
* we don't need to preserve eflags.
*/
movb $3,X86 # at least 386
pushfl # push EFLAGS
popl %eax # get EFLAGS
movl %eax,%ecx # save original EFLAGS
xorl $0x40000,%eax # flip AC bit in EFLAGS
pushl %eax # copy to EFLAGS
popfl # set EFLAGS
pushfl # get new EFLAGS
popl %eax # put it in eax
xorl %ecx,%eax # change in flags
andl $0x40000,%eax # check if AC bit changed不为全0说明变了,是386以上cpu
je is386
movb $4,X86 # at least 486
movl %ecx,%eax
xorl $0x200000,%eax # check ID flag
pushl %eax
popfl # if we are on a straight 486DX, SX, or
pushfl # 487SX we can't change it
popl %eax
xorl %ecx,%eax
pushl %ecx # restore original EFLAGS
popfl
andl $0x200000,%eax/*不为全0说明id flag变了,是486以上cpu*/
je is486
/* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
cpuid
movl %eax,X86_CPUID # save CPUID level
movl %ebx,X86_VENDOR_ID # lo 4 chars
movl %edx,X86_VENDOR_ID+4 # next 4 chars
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well?非0说明有其它功能号可用
je is486
/*通过功能号%eax=1获得和存储cpu version and feature information.*/
movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid
movb %al,%cl # save reg for future use
andb $0x0f,%ah # mask processor family
movb %ah,X86 #save processor family
andb $0xf0,%al # mask model
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
is486:
movl %cr0,%eax # 486 or better
andl $0x80000011,%eax # Save PG,PE,ET
orl $0x50022,%eax # set AM, WP, NE and MP
jmp 2f
is386: pushl %ecx # restore original EFLAGS
popfl
movl %cr0,%eax # 386
andl $0x80000011,%eax # Save PG,PE,ET
orl $2,%eax # set MP
2: movl %eax,%cr0
call check_x87
incb ready
lgdt gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f /*以前用的是在setup.s中的segment desc,现在用gdt_descr中的*/
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ds # after changing gdt.
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
#ifdef CONFIG_SMP
movl $(__KERNEL_DS), %eax
movl %eax,%ss # Reload the stack pointer (segment only)
#else
lss stack_start,%esp # Load processor stack
#endif
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
#ifdef CONFIG_SMP
movb ready, %cl /*主cpu把ready弄成1,所以它call start_kerne,
其它的cpu又incb ready,弄得ready大于1,所以call initialize_secondary*/
cmpb $1,%cl
je 1f # the first CPU calls start_kernel
# all other CPUs call initialize_secondary
call SYMBOL_NAME(initialize_secondary)
jmp L6
1:
#endif
call SYMBOL_NAME(start_kernel)/*终于跑到init/main.c中了*/
L6:
jmp L6 # main should never return here, but
# just in case, we know what happens.
ready: .byte 0
/*
* We depend on ET to be correct. This checks for 287/387.
*/
check_x87:
movb $0,X86_HARD_MATH
clts
fninit /初始化fpu*/
fstsw %ax/*存放fpu状态字*/
cmpb $0,%al
je 1f
movl %cr0,%eax /* no coprocessor: have to set bits */
xorl $4,%eax /* set EM */
movl %eax,%cr0
ret
ALIGN
1: movb $1,X86_HARD_MATH
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
ret
/*
* setup_idt
*
* sets up a idt with 256 entries pointing to
* ignore_int, interrupt gates. It doesn't actually load
* idt - that can be done only after paging has been enabled
* and the kernel moved to PAGE_OFFSET. Interrupts
* are enabled elsewhere, when we can be relatively
* sure everything is ok.
*/
setup_idt:
lea ignore_int,%edx
movl $(__KERNEL_CS << 16),%eax
movw %dx,%ax /* selector = 0x0010 = cs ,%eax为中断门的低4bytes*/
movw $0x8E00,%dx /* interrupt gate - dpl=0, present %edx为高4bytes */
lea SYMBOL_NAME(idt_table),%edi
/*在arch/i386/kernel/Traps.c定义 struct desc_struct idt_table[256]
__attribute__((__section__(".data.idt"))) = { {0, 0}, };
到vmlinux.lds搜索.data.idt你就会发现idt_table在.bss前一点,在内存中很高的地方了*/
mov $256,%ecx
rp_sidt:/*256个entries都弄成相同的东西,如果中断来了就显示一条消息*/
movl %eax,(%edi)
movl %edx,4(%edi)
addl $8,%edi
dec %ecx
jne rp_sidt
ret
/*在init_task.c定义 union task_union init_task_union __attribute__((__section__(".data.init_task")))
= { INIT_TASK(init_task_union.task) };
同样的,你可以在vmlinux.lds和System.map找到根据。
*/
ENTRY(stack_start)
.long SYMBOL_NAME(init_task_union)+8192
.long __KERNEL_DS
/* This is the default interrupt "handler" :-) */
int_msg:
.asciz "Unknown interrupt\n"
ALIGN
ignore_int:
cld
pushl %eax
pushl %ecx
pushl %edx
pushl %es
pushl %ds
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
pushl $int_msg
call SYMBOL_NAME(printk)
popl %eax
popl %ds
popl %es
popl %edx
popl %ecx
popl %eax
iret
/*
* The interrupt descriptor table has room for 256 idt's,
* the global descriptor table is dependent on the number
* of tasks we can have..
*/
#define IDT_ENTRIES 256
#define GDT_ENTRIES (__TSS(NR_CPUS))
.globl SYMBOL_NAME(idt)
.globl SYMBOL_NAME(gdt)
ALIGN
.word 0
idt_descr:
.word IDT_ENTRIES*8-1 # idt contains 256 entries
SYMBOL_NAME(idt):
.long SYMBOL_NAME(idt_table)
.word 0
gdt_descr:
.word GDT_ENTRIES*8-1
SYMBOL_NAME(gdt):
.long SYMBOL_NAME(gdt_table)
* This is initialized to create an identity-mapping at 0-8M (for bootup
* purposes) and another mapping of the 0-8M area at virtual address
* PAGE_OFFSET.
*/
.org 0x1000
ENTRY(swapper_pg_dir)
.long 0x00102007
.long 0x00103007
.fill BOOT_USER_PGD_PTRS-2,4,0
/* default: 766 entries */
.long 0x00102007
.long 0x00103007
/* default: 254 entries */
.fill BOOT_KERNEL_PGD_PTRS-2,4,0
/*
* The page tables are initialized to only 8MB here - the final page
* tables are set up later depending on memory size.
*/
.org 0x2000
ENTRY(pg0)
.org 0x3000
ENTRY(pg1)
/*
* empty_zero_page must immediately follow the page tables ! (The
* initialization loop counts until empty_zero_page)
*/
.org 0x4000
ENTRY(empty_zero_page)
.org 0x5000
/*
* Real beginning of normal "text" segment
*/
ENTRY(stext)
ENTRY(_stext)
/*
* This starts the data section. Note that the above is all
* in the text section because it has alignment requirements
* that we cannot fulfill any other way.
*/
.data
ALIGN
/*
* This contains typically 140 quadwords, depending on NR_CPUS.
*
* NOTE! Make sure the gdt descriptor in head.S matches this if you
* change anything.
*/
ENTRY(gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* not used */
.quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* not used */
.quad 0x0000000000000000 /* not used */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
.quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
.fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */
/*------老江于华南理工食品楼518一台破电脑前*/
/*等找到工作后再继续搜索内核的可爱秘密,有可能把我看完的一点grub代码也注释一下*/ |
|