Where is My Variable?

At this course point, you probably already have created one or many variables on your programs a long of the computer science course. Nevertheless, many computer science students still didn't realize the true meaning of that, just know that they are there. The goal of this exercise if to cast some light on the subject by snooping in the "Where is My Variable?" program life-cycle.

The source code

Let's begin with Where is My Variable's source code (where.cc):

1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
int a;
int b = 10;
int * c;
int d[10];
int e[10] = {0,1,2,3,4,5,6,7,8,9};
//const int f;
const int g = 10;

int main() {
    int h;
    int i = 10;
    int * j;
    int k[10];
    int l[10] = {0,1,2,3,4,5,6,7,8,9};
    //const int m;
    const int n = 10;
    int * o;
    int * p = new int;
    int * q = new int(10);
    int * r = new int[10];
    static int s;
    static int t = 10;
}

This program declares in various variable and constant forms.

Simple, very simple!

Compilation

Now let's take a look at the compilation process for "Where is My Variable". For the upcoming discussion, we'll take the widely-used GNU compiler (g++) and its associated tools (binutils). We can compile the program as follows:

# g++ -O0 -c where.cc

This produces the object file where.o. More specifically,

# file where.o
where.o: ELF 32-bit LSB relocatable, Intel 80386, version 1 (SYSV), not stripped

tells us where.o is a relocatable object file, compiled for the IA-32 architecture (I used a standard PC for this study), that contains a symbol table (not stripped).

By the way,

# objdump -hrt where.o

where.o:     file format elf32-i386

Sections:
Idx Name          Size      VMA       LMA       File off  Algn
  0 .text         000000a8  00000000  00000000  00000034  2**0
                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
  1 .data         0000004c  00000000  00000000  000000e0  2**5
                  CONTENTS, ALLOC, LOAD, DATA
  2 .bss          0000004c  00000000  00000000  00000140  2**5
                  ALLOC
  3 .rodata       00000004  00000000  00000000  00000140  2**2
                  CONTENTS, ALLOC, LOAD, READONLY, DATA
  4 .comment      0000002d  00000000  00000000  00000144  2**0
                  CONTENTS, READONLY
SYMBOL TABLE:
00000000 l    df *ABS*  00000000 where.cc
00000000 l    d  .text  00000000 .text
00000000 l    d  .data  00000000 .data
00000000 l    d  .bss 00000000 .bss
00000000 l    d  .rodata  00000000 .rodata
00000000 l     O .rodata  00000004 _ZL1g
00000048 l     O .bss 00000004 _ZZ4mainE1s
00000048 l     O .data  00000004 _ZZ4mainE1t
00000000 l    d  .comment 00000000 .comment
00000000 g     O .bss 00000004 a
00000000 g     O .data  00000004 b
00000004 g     O .bss 00000004 c
00000020 g     O .bss 00000028 d
00000020 g     O .data  00000028 e
00000000 g     F .text  000000a8 main
00000000         *UND*  00000000 _Znwj
00000000         *UND*  00000000 _Znaj


RELOCATION RECORDS FOR [.text]:
OFFSET   TYPE              VALUE 
0000006b R_386_PC32        _Znwj
0000007b R_386_PC32        _Znwj
00000091 R_386_PC32        _Znaj


RELOCATION RECORDS FOR [.eh_frame]:
OFFSET   TYPE              VALUE 
00000020 R_386_PC32        .text

tells us where.o has 5 sections:

  1. .text: that's "Where is My Variable" compiled program, i.e. IA-32 opcodes corresponding to the program. This will be used by the program loader to initialize the process' code segment.
  2. .data: "Where is My Variable" initialized global variables and initialized static local variables, so this section is not empty. In this case, the .data section contain the variable initial values to be loaded into the data segment.
  3. .bss: "Where is My Variable" have non-initialized variables, global and local, so this section is also not empty. So, it indicates how many bytes must be allocated and zeroed in the data segment in addition to section .data.
  4. .rodata: this segment contains the constant values, which is tagged read-only. Most operating systems do not support a read-only data segment for processes (running programs), so the contents of .rodata go either to the process' code segment (because it's read-only), or to the data segment (because it's data). Since the compiler doesn't know the policy adopted by your OS, it creates this extra ELF section.
  5. .comment: this segment contains 33 bytes of comments which cannot be tracked back to our program, since we didn't write any comment. We'll soon see where it comes from.

But, where actually is my variable?

Let's take a look at the assembly code:

1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
29.
30.
31.
32.
33.
34.
35.
36.
37.
38.
39.
40.
41.
42.
43.
44.
45.
46.
47.
48.
49.
50.
51.
52.
53.
54.
55.
56.
57.
58.
59.
60.
61.
62.
63.
64.
65.
66.
67.
68.
69.
70.
71.
72.
73.
74.
75.
76.
77.
78.
79.
80.
81.
82.
83.
84.
85.
86.
87.
88.
89.
90.
91.
92.
93.
94.
95.
96.
97.
98.
99.
100.
101.
102.
103.
104.
105.
106.
107.
108.
109.
110.
111.
112.
113.
114.
115.
116.
117.
# g++ -Os -S where.cc -o -
  .file "where.cc"
  .text
  .globl  a
  .bss
  .align 4
  .type a, @object
  .size a, 4
a:
  .zero 4
  .globl  b
  .data
  .align 4
  .type b, @object
  .size b, 4
b:
  .long 10
  .globl  c
  .bss
  .align 4
  .type c, @object
  .size c, 4
c:
  .zero 4
  .globl  d
  .align 32
  .type d, @object
  .size d, 40
d:
  .zero 40
  .globl  e
  .data
  .align 32
  .type e, @object
  .size e, 40
e:
  .long 0
  .long 1
  .long 2
  .long 3
  .long 4
  .long 5
  .long 6
  .long 7
  .long 8
  .long 9
  .section  .rodata
  .align 4
  .type _ZL1g, @object
  .size _ZL1g, 4
_ZL1g:
  .long 10
  .local  _ZZ4mainE1s
  .comm _ZZ4mainE1s,4,4
  .data
  .align 4
  .type _ZZ4mainE1t, @object
  .size _ZZ4mainE1t, 4
_ZZ4mainE1t:
  .long 10
  .text
  .globl  main
  .type main, @function
main:
.LFB0:
  .cfi_startproc
  leal  4(%esp), %ecx
  .cfi_def_cfa 1, 0
  andl  $-16, %esp
  pushl -4(%ecx)
  pushl %ebp
  .cfi_escape 0x10,0x5,0x2,0x75,0
  movl  %esp, %ebp
  pushl %ecx
  .cfi_escape 0xf,0x3,0x75,0x7c,0x6
  subl  $116, %esp
  movl  $10, -12(%ebp)
  movl  $0, -108(%ebp)
  movl  $1, -104(%ebp)
  movl  $2, -100(%ebp)
  movl  $3, -96(%ebp)
  movl  $4, -92(%ebp)
  movl  $5, -88(%ebp)
  movl  $6, -84(%ebp)
  movl  $7, -80(%ebp)
  movl  $8, -76(%ebp)
  movl  $9, -72(%ebp)
  movl  $10, -16(%ebp)
  subl  $12, %esp
  pushl $4
  call  _Znwj
  addl  $16, %esp
  movl  %eax, -20(%ebp)
  subl  $12, %esp
  pushl $4
  call  _Znwj
  addl  $16, %esp
  movl  $10, (%eax)
  movl  %eax, -24(%ebp)
  subl  $12, %esp
  pushl $40
  call  _Znaj
  addl  $16, %esp
  movl  %eax, -28(%ebp)
  movl  $0, %eax
  movl  -4(%ebp), %ecx
  .cfi_def_cfa 1, 0
  leave
  .cfi_restore 5
  leal  -4(%ecx), %esp
  .cfi_def_cfa 4, 4
  ret
  .cfi_endproc
.LFE0:
  .size main, .-main
  .ident  "GCC: (GNU) 7.3.1 20180303 (Red Hat 7.3.1-5)"
  .section  .note.GNU-stack,"",@progbits

Tells us: