function call optimization

9
Function Call Optimization This note describes the observations on the following Function Calls: 1. Constructor call 2. Constructor call of Base Class 3. Get / Set Methods. To Test for the above a sample Base Class has been developed. class Base { public: size_t nameLen; char *name; protected: Base(char *nameStr): nameLen((nameStr)? strlen(nameStr): 0), name(strcpy(new char[nameLen+1], nameStr)) {} }; This generates the following assembly in debug build using g++. The function calls have been marked in red. _ZN4BaseC2EPc: => Base::Base() .LFB1443: .loc 3 17 0 pushq %rbp # .LCFI22: movq %rsp, %rbp #, .LCFI23: pushq %rbx # .LCFI24: subq $40, %rsp #, .LCFI25: movq %rdi, -16(%rbp) # this, this movq %rsi, -24(%rbp) # nameStr, nameStr .LBB13: .loc 3 20 0 movq -16(%rbp), %rax # this, movq %rax, -32(%rbp) #, cmpq $0, -24(%rbp) #, nameStr je .L39 #, movq -24(%rbp), %rdi # nameStr, nameStr call strlen # => strlen()

Upload: ppd1961

Post on 19-Jun-2015

1.146 views

Category:

Documents


2 download

DESCRIPTION

Optimization is illustrated in x86 Assembly

TRANSCRIPT

Function Call Optimization

This note describes the observations on the following Function Calls:

1. Constructor call2. Constructor call of Base Class3. Get / Set Methods.

To Test for the above a sample Base Class has been developed.

class Base{

public:size_t nameLen;char *name;

protected:Base(char *nameStr):

nameLen((nameStr)? strlen(nameStr): 0),name(strcpy(new char[nameLen+1], nameStr))

{}…

};

This generates the following assembly in debug build using g++. The function calls have been marked in red.

_ZN4BaseC2EPc: => Base::Base().LFB1443:

.loc 3 17 0pushq %rbp #

.LCFI22:movq %rsp, %rbp #,

.LCFI23:pushq %rbx #

.LCFI24:subq $40, %rsp #,

.LCFI25:movq %rdi, -16(%rbp) # this, thismovq %rsi, -24(%rbp) # nameStr, nameStr

.LBB13:.loc 3 20 0movq -16(%rbp), %rax # this,movq %rax, -32(%rbp) #,cmpq $0, -24(%rbp) #, nameStrje .L39 #,movq -24(%rbp), %rdi # nameStr, nameStrcall strlen # => strlen()movq %rax, -40(%rbp) # tmp61,jmp .L40 #

.L39:movq $0, -40(%rbp) #,

.L40:movq -40(%rbp), %rax #,movq -32(%rbp), %rdx #,movq %rax, (%rdx) #, <variable>.nameLenmovq -16(%rbp), %rbx # this, thismovq -16(%rbp), %rax # this, thismovq (%rax), %rdi # <variable>.nameLen, tmp63incq %rdi # tmp63call _Znam # => operator new()movq %rax, %rdi #, tmp65movq -24(%rbp), %rsi # nameStr, nameStrcall strcpy # => strcpy()movq %rax, 8(%rbx) #, <variable>.name

.LBE13:addq $40, %rsp #,popq %rbx #leaveret

The Derived Class,

class Derived: public Base{

Base *myBase;

public:Derived(char *name):

Base(name),myBase((Base*)this)

{}…

};

generates the following assembly:

_ZN7DerivedC1EPc: => Derived::Derived().LFB1452:

.loc 3 53 0pushq %rbp #

.LCFI19:movq %rsp, %rbp #,

.LCFI20:subq $16, %rsp #,

.LCFI21:movq %rdi, -8(%rbp) # this, thismovq %rsi, -16(%rbp) # name, name

.LBB12:.loc 3 56 0movq -16(%rbp), %rsi # name, namemovq -8(%rbp), %rdi # this, thiscall _ZN4BaseC2EPc # => Base::Base()movq -8(%rbp), %rdx # this, thismovq -8(%rbp), %rax # this, thismovq %rax, 16(%rdx) # this, <variable>.myBase

.LBE12:leaveret

Finally, the instantiation of a derived Class Object as in

char *s; Derived d(s);

generates:

.loc 2 8 0movq -24(%rbp), %rsi # s, sleaq -64(%rbp), %rdi #, tmp59

.LEHB0:call _ZN7DerivedC1EPc # => Derived::Derived()

This means that the functions are called respectively as we had expected.

Building the instantiation in release mode, we see the following:

.LCFI2:testq %rdi, %rdi # smovq %rsp, %rbp #, tmp114je .L3 #,call strlen # => strlen()

.L3:

.L5:leaq 1(%rax), %rdi #, tmp67movq %rax, (%rbp) # tmp63, <variable>.nameLen

.LEHB0:call _Znam # => operator new()

.LEHE0:movq %rbx, %rsi # s, nameStrmovq %rax, %rdi #, <anonymous>call strcpy # => strcpy()movq %rax, 8(%rbp) # tmp70, <variable>.namemovq 8(%rsp), %rdi # <variable>.name,

<variable>.namemovq %rbp, 16(%rbp) # tmp114, <variable>.myBasetestq %rdi, %rdi # <variable>.namejne .L44 #,

Thus we see that the Derived (and Base) Constructor calls have been totally optimized.

The above is illustrated in terms of the constructors. Similar optimizations do (mostly) take place for (non-virtual) destructors, copy constructor, copy assignment operator, all non-virtual inline member functions and all inline global functions.

Only virtual member functions are not inlined as their call sequence is runtime dependent.

The same behavior is observed in case of Get / Set Methods.

class Base{

…char *GetName() const { return name; }void SetName(char *nameStr){

if (nameStr){

if (name){

delete [] name;}

name = strcpy(new char[strlen(nameStr)+1], nameStr);

}}…

};

The assemblies for the Get / Set Methods are shown below:

_ZNK4Base7GetNameEv: => Base::GetName().LFB1448:

.loc 3 29 0pushq %rbp #

.LCFI17:movq %rsp, %rbp #,

.LCFI18:movq %rdi, -8(%rbp) # this, this

.LBB11:.loc 3 29 0movq -8(%rbp), %rax # this, thismovq 8(%rax), %rax # <variable>.name,

<variable>.name.LBE11:

leaveret

_ZN4Base7SetNameEPc: => Base::SetName().LFB1449:

.loc 3 31 0pushq %rbp #

.LCFI13:movq %rsp, %rbp #,

.LCFI14:pushq %rbx #

.LCFI15:subq $24, %rsp #,

.LCFI16:movq %rdi, -16(%rbp) # this, thismovq %rsi, -24(%rbp) # nameStr, nameStr

.LBB10:.loc 3 32 0cmpq $0, -24(%rbp) #, nameStrje .L29 #,.loc 3 34 0movq -16(%rbp), %rax # this, thiscmpq $0, 8(%rax) #, <variable>.nameje .L31 #,.loc 3 36 0movq -16(%rbp), %rax # this, thiscmpq $0, 8(%rax) #, <variable>.nameje .L31 #,movq -16(%rbp), %rax # this, thismovq 8(%rax), %rdi # <variable>.name,

<variable>.namecall _ZdaPv # => operator delete()

.L31:.loc 3 39 0movq -16(%rbp), %rbx # this, thismovq -24(%rbp), %rdi # nameStr, nameStrcall strlen # => strlen()movq %rax, %rdi # tmp65, tmp63incq %rdi # tmp63call _Znam # => operatot new()movq %rax, %rdi #, tmp66movq -24(%rbp), %rsi # nameStr, nameStrcall strcpy # => strcpy()movq %rax, 8(%rbx) #, <variable>.name

.L29:

.LBE10:.loc 3 41 0addq $24, %rsp #,popq %rbx #leaveret

The calls,

char *oldName = d.GetName();char *newName = "My Gang";d.SetName(newName);

generate the following assembly in debug build:

.LEHE0:.loc 2 11 0leaq -64(%rbp), %rdi #, tmp60call _ZNK4Base7GetNameEv # => Base::GetName()movq %rax, -72(%rbp) # tmp61, oldName.loc 2 12 0movq $.LC0, -80(%rbp) #, newName.loc 2 13 0movq -80(%rbp), %rsi # newName, newNameleaq -64(%rbp), %rdi #, tmp63

.LEHB1:call _ZN4Base7SetNameEPc # => Base::SetName().loc 2 16 0movq $.LC1, -80(%rbp) #, newName

The same in release build is:

.L10:movl $.LC0, %edi #, nameStrcall strlen # => strlen()leaq 1(%rax), %rdi #, tmp80

.LEHB1:call _Znam # => operator new()movq %rax, %rdi #, tmp85movl $.LC0, %esi #, nameStrcall strcpy # => strcpy()testq %rax, %rax # tmp86movq %rax, 8(%rsp) # tmp86, <variable>.nameje .L14 #,movq %rax, %rdi # tmp86, <variable>.namecall _ZdaPv # => operator delete()movl $.LC1, %edi #, newNamecall strlen #leaq 1(%rax), %rdi #, tmp90call _Znam # => operator new()movq %rax, %rdi #, tmp95movl $.LC1, %esi #, newNamecall strcpy # => strcpy()movq %rax, 8(%rsp) # tmp96, <variable>.name