本文共 11699 字,大约阅读时间需要 38 分钟。
在 x86 32位系统下,进程的虚拟地址空间为 232 (4G)大小,其中在windows系统下4G地址空间中0x00000000-0x7FFFFFFF 是用户地址空间,0x80000000-0xFFFFFFFF 是内核空间。在Linux系统下,0xC00000000-0XFFFFFFFF为系统空间,为所有的系统进程所共享,0X00000000-0XBFFFFFFF为用户空间。本文主要研究在Linux系统下的虚拟地址空间。
我们所写的程序都在磁盘上存放,而在运行时向内存中加载的只有指令和数据。而这些指令和数据都不可能直接加载到真实的内存中,而是加载到虚拟地址空间中。每个进制都有自己的虚拟地址空间,并且结构相同都被划分出若干段。其中,用户空间是每个进程私有的,而内核空间是所有进程共享的。
Linux用户进程分段存储内容
Section | 属性 | 存储内容 |
---|---|---|
栈 | 局部变量、const局部常量、函数参数、返回地址等 | |
堆 | 动态分配的内存 | |
BSS段 | 可读;可写 | 未初始化/初始化为0的静态变量/全局变量 |
数据段 | 可读;可写 | 初始化为~0的静态变量/全局变量 |
代码段 | 只读;可执行 | 可执行代码、常量(字符串常量;const全局常量;enum常量;#define常量等) |
下面通过一段代码来初步了解虚拟地址空间。
#includeint gdata1 = 10; // 已初始化的全局变量int gdata2 = 0; // 初始化为0的全局变量int gdata3; // 未初始化的全局变量static int gdata4 = 11; // 静态、已初始化的变量static int gdata5 = 0; // 静态、初始化为0的变量static int gdata6; // 静态、未初始化的变量int main(){ int a = 12; // 已初始化的局部变量 int b = 0; // 初始化为0的局部变量 int c; // 未初始化的局部变量 static int e = 13; // 局部、静态、已初始化 static int f = 0; // 局部、静态、初始化为0 static int g; // 局部、静态、未初始化 return 0;}
在上述代码中:
gdata1——gdata6 为全局变量,最终是以数据的形式被加载到内存中,并添加到符号表中。其中:mov dword ptr[a], 0ch
指令 ,存放于 .text段,而在该指令运行时会在栈上开辟出一块空间用于存放a的值,因此我们也说局部变量在栈上开辟空间。 e,f,g 静态的局部变量存放于数据段,在程序运行至该行代码时进行初始化。因此,e存放 .data 段,f,g存放于 .bss 段。在Linux系统下可执行文件为ELF格式,可以通过readlef -S a.out
或 objdump -h a.out
查看我们之前写的程序的ELF文件的段表
$ objdump -h a.out
a.out: file format elf64-x86-64Sections:Idx Name Size VMA LMA File off Algn 0 .interp 0000001c 0000000000400238 0000000000400238 00000238 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 1 .note.ABI-tag 00000020 0000000000400254 0000000000400254 00000254 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 2 .note.gnu.build-id 00000024 0000000000400274 0000000000400274 00000274 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 3 .gnu.hash 00000024 0000000000400298 0000000000400298 00000298 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 4 .dynsym 00000090 00000000004002c0 00000000004002c0 000002c0 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 5 .dynstr 00000090 0000000000400350 0000000000400350 00000350 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 6 .gnu.version 0000000c 00000000004003e0 00000000004003e0 000003e0 2**1 CONTENTS, ALLOC, LOAD, READONLY, DATA 7 .gnu.version_r 00000040 00000000004003f0 00000000004003f0 000003f0 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 8 .rela.dyn 00000018 0000000000400430 0000000000400430 00000430 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 9 .rela.plt 00000078 0000000000400448 0000000000400448 00000448 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 10 .init 0000001a 00000000004004c0 00000000004004c0 000004c0 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 11 .plt 00000060 00000000004004e0 00000000004004e0 000004e0 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE 12 .text 000001d2 0000000000400540 0000000000400540 00000540 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE 13 .fini 00000009 0000000000400714 0000000000400714 00000714 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 14 .rodata 00000010 0000000000400720 0000000000400720 00000720 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 15 .eh_frame_hdr 00000044 0000000000400730 0000000000400730 00000730 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 16 .eh_frame 00000134 0000000000400778 0000000000400778 00000778 2**3 CONTENTS, ALLOC, LOAD, READONLY, DATA 17 .init_array 00000010 0000000000600df8 0000000000600df8 00000df8 2**3 CONTENTS, ALLOC, LOAD, DATA 18 .fini_array 00000008 0000000000600e08 0000000000600e08 00000e08 2**3 CONTENTS, ALLOC, LOAD, DATA 19 .jcr 00000008 0000000000600e10 0000000000600e10 00000e10 2**3 CONTENTS, ALLOC, LOAD, DATA 20 .dynamic 000001e0 0000000000600e18 0000000000600e18 00000e18 2**3 CONTENTS, ALLOC, LOAD, DATA 21 .got 00000008 0000000000600ff8 0000000000600ff8 00000ff8 2**3 CONTENTS, ALLOC, LOAD, DATA 22 .got.plt 00000040 0000000000601000 0000000000601000 00001000 2**3 CONTENTS, ALLOC, LOAD, DATA 23 .data 00000010 0000000000601040 0000000000601040 00001040 2**2 CONTENTS, ALLOC, LOAD, DATA 24 .bss 00000020 0000000000601050 0000000000601050 00001050 2**2 ALLOC 25 .comment 0000002d 0000000000000000 0000000000000000 00001050 2**0 CONTENTS, READONLY
表中的每一列分别对应 section的大小、虚拟地址(Virtual Memory Address)、装载地址(Load Memory Address),文件偏移。
通过readelf -s a.out
或objdump -t a.out
查看该应用程序ELF文件的符号表
a.out: file format elf64-x86-64SYMBOL TABLE:0000000000400238 l d .interp 0000000000000000 .interp0000000000400254 l d .note.ABI-tag 0000000000000000 .note.ABI-tag0000000000400274 l d .note.gnu.build-id 0000000000000000 .note.gnu.build-id0000000000400298 l d .gnu.hash 0000000000000000 .gnu.hash00000000004002c0 l d .dynsym 0000000000000000 .dynsym0000000000400350 l d .dynstr 0000000000000000 .dynstr00000000004003e0 l d .gnu.version 0000000000000000 .gnu.version00000000004003f0 l d .gnu.version_r 0000000000000000 .gnu.version_r0000000000400430 l d .rela.dyn 0000000000000000 .rela.dyn0000000000400448 l d .rela.plt 0000000000000000 .rela.plt00000000004004c0 l d .init 0000000000000000 .init00000000004004e0 l d .plt 0000000000000000 .plt0000000000400540 l d .text 0000000000000000 .text0000000000400714 l d .fini 0000000000000000 .fini0000000000400720 l d .rodata 0000000000000000 .rodata0000000000400730 l d .eh_frame_hdr 0000000000000000 .eh_frame_hdr0000000000400778 l d .eh_frame 0000000000000000 .eh_frame0000000000600df8 l d .init_array 0000000000000000 .init_array0000000000600e08 l d .fini_array 0000000000000000 .fini_array0000000000600e10 l d .jcr 0000000000000000 .jcr0000000000600e18 l d .dynamic 0000000000000000 .dynamic0000000000600ff8 l d .got 0000000000000000 .got0000000000601000 l d .got.plt 0000000000000000 .got.plt0000000000601040 l d .data 0000000000000000 .data0000000000601050 l d .bss 0000000000000000 .bss0000000000000000 l d .comment 0000000000000000 .comment0000000000000000 l df *ABS* 0000000000000000 crtstuff.c0000000000600e10 l O .jcr 0000000000000000 __JCR_LIST__0000000000400570 l F .text 0000000000000000 deregister_tm_clones00000000004005a0 l F .text 0000000000000000 register_tm_clones00000000004005e0 l F .text 0000000000000000 __do_global_dtors_aux0000000000601050 l O .bss 0000000000000001 completed.63550000000000600e08 l O .fini_array 0000000000000000 __do_global_dtors_aux_fini_array_entry0000000000400600 l F .text 0000000000000000 frame_dummy0000000000600df8 l O .init_array 0000000000000000 __frame_dummy_init_array_entry0000000000000000 l df *ABS* 0000000000000000 a.cpp000000000060105c l O .bss 0000000000000001 _ZStL8__ioinit0000000000601048 l O .data 0000000000000004 _ZL6gdata40000000000601060 l O .bss 0000000000000004 _ZL6gdata50000000000601064 l O .bss 0000000000000004 _ZL6gdata60000000000400641 l F .text 000000000000003d _Z41__static_initialization_and_destruction_0ii000000000040067e l F .text 0000000000000015 _GLOBAL__sub_I_gdata10000000000601068 l O .bss 0000000000000004 _ZZ4mainE1g000000000060106c l O .bss 0000000000000004 _ZZ4mainE1f000000000060104c l O .data 0000000000000004 _ZZ4mainE1e0000000000000000 l df *ABS* 0000000000000000 crtstuff.c00000000004008a8 l O .eh_frame 0000000000000000 __FRAME_END__0000000000600e10 l O .jcr 0000000000000000 __JCR_END__0000000000000000 l df *ABS* 00000000000000000000000000400730 l .eh_frame_hdr 0000000000000000 __GNU_EH_FRAME_HDR0000000000601000 l O .got.plt 0000000000000000 _GLOBAL_OFFSET_TABLE_0000000000600e08 l .init_array 0000000000000000 __init_array_end0000000000600df8 l .init_array 0000000000000000 __init_array_start0000000000600e18 l O .dynamic 0000000000000000 _DYNAMIC0000000000601040 w .data 0000000000000000 data_start0000000000400710 g F .text 0000000000000002 __libc_csu_fini0000000000400540 g F .text 0000000000000000 _start0000000000000000 w *UND* 0000000000000000 __gmon_start__0000000000601054 g O .bss 0000000000000004 gdata20000000000400714 g F .fini 0000000000000000 _fini0000000000000000 F *UND* 0000000000000000 _ZNSt8ios_base4InitC1Ev@@GLIBCXX_3.40000000000000000 F *UND* 0000000000000000 __libc_start_main@@GLIBC_2.2.50000000000000000 F *UND* 0000000000000000 __cxa_atexit@@GLIBC_2.2.50000000000400530 F *UND* 0000000000000000 _ZNSt8ios_base4InitD1Ev@@GLIBCXX_3.40000000000400720 g O .rodata 0000000000000004 _IO_stdin_used0000000000601040 g .data 0000000000000000 __data_start0000000000601044 g O .data 0000000000000004 gdata10000000000601050 g O .data 0000000000000000 .hidden __TMC_END__0000000000400728 g O .rodata 0000000000000000 .hidden __dso_handle00000000004006a0 g F .text 0000000000000065 __libc_csu_init0000000000601050 g .bss 0000000000000000 __bss_start0000000000601070 g .bss 0000000000000000 _end0000000000601050 g .data 0000000000000000 _edata0000000000601058 g O .bss 0000000000000004 gdata3000000000040062d g F .text 0000000000000014 main00000000004004c0 g F .init 0000000000000000 _init
其中,我们可以看到gdata1-6 以及 e,f,g所属的分段,
0000000000601044 g O .data 0000000000000004 gdata10000000000601054 g O .bss 0000000000000004 gdata20000000000601058 g O .bss 0000000000000004 gdata30000000000601048 l O .data 0000000000000004 _ZL6gdata40000000000601060 l O .bss 0000000000000004 _ZL6gdata50000000000601064 l O .bss 0000000000000004 _ZL6gdata60000000000601068 l O .bss 0000000000000004 _ZZ4mainE1g000000000060106c l O .bss 0000000000000004 _ZZ4mainE1f000000000060104c l O .data 0000000000000004 _ZZ4mainE1e
我们所写的程序,不管时C语言或是C++语言最终编译器都会先转化为汇编指令,而后再转化成机器指令存储在磁盘上。在我们运行程序时,把程序的指令和数据加载到虚拟内存空间中,然后根据内存中分段的偏移映射到物理内存上执行。
转载地址:http://vqio.baihongyu.com/