C++构造/析构函数中的多态(二)

本来是几年以前写的一篇博客：C++陷阱：构造函数中的多态。然后有同学在评论中讨论了起来，为了记录我就在这里单独写一篇，基本上就是用编译器的实现去证明了早就被大家熟知的一些结论。

默认构造函数/析构函数不是在所有情况下都会被生成出来的。为此我还特地翻出《Inside C++ object model》：

2.1 Default Constructor Construction

The C++ Annotated Reference Manual (ARM) [ELLIS90] (Section 12.1) tells us that “default constructors…are generated (by the compiler) where needed….”

后面别人还罗列了好些例子告诉你哪些情况才算needed。本文我就解释构造函数中的多态评论中的问题。实验代码如下：

#include <stdio.h>

class Base {
public:
    Base() {
        Init();
    }

    //virtual
     ~Base() {
        printf("Base dtor\n");
        Release();
    }

    virtual void Init() {
        printf("Base::Init\n");
    }

    virtual void Release() {
        printf("Base::Release\n");
    }
};

class Derived : public Base {
public:
    /*
    ~Derived() {
        printf("Derived dtor\n");
    } // */

    virtual void Init() {
        printf("Derived::Init\n");
    }

    virtual void Release() {
        printf("Derived:Release\n");
    }
};

int main()
{
    Base *obj = new Derived();
    delete obj;
    return 0;
}

去掉Derived的析构函数，去掉Base析构函数的virtual：

# g++ -Wa,-adhln -g test.cpp > test.s

test.cpp      ****     delete obj;
                  .loc 1 44 0
0028 488B45F0      movq    -16(%rbp), %rax
002c 488945E0      movq    %rax, -32(%rbp)
0030 48837DE0      cmpq    $0, -32(%rbp)       
    00
0035 7520          jne .L17                   
0037 EB30          jmp .L18
...
              .L17:
                  .loc 1 44 0
0057 488B7DE0      movq    -32(%rbp), %rdi
005b E8000000      call    _ZN4BaseD1Ev             # 直接call

从这里甚至可以看到delete对空指针的判定。Base的析构函数不是virtual，所以这里编译器生成的析构函数调用代码根本不需要用到vptr，直接call就可以。而具体call谁则是根据obj指向的类型Base确定。即使Derived用户定义了析构函数也不会调用，无论是否virtual。

事实上编译器甚至不需要生成Derived的析构函数，多傻的编译器才会生成这些什么事都不做的代码而光进出函数就得好几条指令？

查看程序中的符号，没有生成Derived析构函数：

# nm test
...
0000000000400816 W _ZN4Base4InitEv
00000000004007fe W _ZN4Base7ReleaseEv
000000000040082e W _ZN4BaseC2Ev
0000000000400876 W _ZN4BaseD1Ev             
00000000004007e6 W _ZN7Derived4InitEv
00000000004007ce W _ZN7Derived7ReleaseEv
0000000000400852 W _ZN7DerivedC1Ev
...

现在把Base析构函数变为virtual的：

test.cpp      ****     delete obj;
                  .loc 1 44 0
0028 48837DF0      cmpq    $0, -16(%rbp)
    00
002d 7520          jne .L12
002f EB32          jmp .L13
...
              .L12:
                  .loc 1 44 0
004f 488B45F0      movq    -16(%rbp), %rax     # this -> rax
0053 488B00        movq    (%rax), %rax        # *rax -> vptr
0056 4883C008      addq    $8, %rax            # vptr += 8
005a 488B00        movq    (%rax), %rax        # *vptr -> Base::~Base
005d 488B7DF0      movq    -16(%rbp), %rdi     # this as first argument (passed by rdi)
0061 FFD0          call    *%rax               # call

析构函数动态调用这也是预期的。至于为什么是偏移vptr+8，是因为第一个指针指向的是type_info，具体可看浅议 Dynamic_cast 和 RTTI。vptr和virtual function table还需要详述吗？

此时就会生成Derived的析构函数：

...
000000000040084c W _ZN4Base4InitEv
00000000004008ac W _ZN4Base7ReleaseEv
0000000000400864 W _ZN4BaseC2Ev
0000000000400970 W _ZN4BaseD0Ev
00000000004009b0 W _ZN4BaseD1Ev
00000000004008c4 W _ZN4BaseD2Ev
0000000000400834 W _ZN7Derived4InitEv
000000000040081c W _ZN7Derived7ReleaseEv
0000000000400888 W _ZN7DerivedC1Ev
0000000000400904 W _ZN7DerivedD0Ev          
000000000040093a W _ZN7DerivedD1Ev
...

细心的人就会发现无论是Base还是Derived都会生成多个析构函数，这个深入下去还有很多内容，具体可以参看：GNU GCC (g++): Why does it generate multiple dtors?。

甚至可以运行这个例子看到调用到了Derived的析构函数：

(gdb) ni
0x000000000040080d      45          return 0;
1: x/3i $pc
0x40080d <main()+117>:  callq  *%rax                  # 调用
0x40080f <main()+119>:  mov    $0x0,%eax
0x400814 <main()+124>:  add    $0x28,%rsp
(gdb) si
Derived::~Derived (this=0x7ffff7ffd000, __in_chrg=<value optimized out>) at test.cpp:24
24      class Derived : public Base {
1: x/3i $pc
0x400904 <Derived::~Derived()>: push   %rbp
0x400905 <Derived::~Derived()+1>:       mov    %rsp,%rbp
0x400908 <Derived::~Derived()+4>:       sub    $0x10,%rsp

其实看Derived的析构函数实现会发现很多有趣的东西：

(gdb) disassemble 'Derived::~Derived'
Dump of assembler code for function Derived::~Derived():
0x0000000000400904 <Derived::~Derived()+0>:     push   %rbp
0x0000000000400905 <Derived::~Derived()+1>:     mov    %rsp,%rbp
0x0000000000400908 <Derived::~Derived()+4>:     sub    $0x10,%rsp
0x000000000040090c <Derived::~Derived()+8>:     mov    %rdi,-0x8(%rbp)
0x0000000000400910 <Derived::~Derived()+12>:    mov    $0x400b50,%edx
0x0000000000400915 <Derived::~Derived()+17>:    mov    -0x8(%rbp),%rax
0x0000000000400919 <Derived::~Derived()+21>:    mov    %rdx,(%rax)
0x000000000040091c <Derived::~Derived()+24>:    mov    -0x8(%rbp),%rdi
0x0000000000400920 <Derived::~Derived()+28>:    callq  0x4008c4 <Base::~Base()>
0x0000000000400925 <Derived::~Derived()+33>:    mov    $0x1,%eax
0x000000000040092a <Derived::~Derived()+38>:    test   %al,%al
0x000000000040092c <Derived::~Derived()+40>:    je     0x400937 <Derived::~Derived()+51>
0x000000000040092e <Derived::~Derived()+42>:    mov    -0x8(%rbp),%rdi
0x0000000000400932 <Derived::~Derived()+46>:    callq  0x400670 <_ZdlPv@plt>               
0x0000000000400937 <Derived::~Derived()+51>:    leaveq
0x0000000000400938 <Derived::~Derived()+52>:    retq

实际上这个析构函数就是上面的D0版本，它做了一件重要的事就是delete this。具体的可以google gcc对析构函数的实现。

构造函数和析构函数中根本就不会启用多态，这个是结论或者说是标准，但不是原因(真怕又有人告诉我c++ standard某section这样写的所以这就是理由)。既然反正已经看实现了，就索性看一眼编译器怎么处理这个问题：

// Base::~Base
0000 55            pushq   %rbp
              .LCFI22:
0001 4889E5        movq    %rsp, %rbp
              .LCFI23:
0004 4883EC10      subq    $16, %rsp
              .LCFI24:
0008 48897DF8      movq    %rdi, -8(%rbp)
                  .loc 1 10 0
000c BA000000      movl    $_ZTV4Base+16, %edx
    00
0011 488B45F8      movq    -8(%rbp), %rax
0015 488910        movq    %rdx, (%rax)
                  .loc 1 11 0
0018 BF000000      movl    $.LC4, %edi
    00
001d E8000000      call    puts
    00
                  .loc 1 12 0
0022 488B7DF8      movq    -8(%rbp), %rdi
0026 E8000000      call    _ZN4Base7ReleaseEv  # 直接call绝对地址

构造函数一样：

0000 55            pushq   %rbp
              .LCFI9:
0001 4889E5        movq    %rsp, %rbp
              .LCFI10:
0004 4883EC10      subq    $16, %rsp
              .LCFI11:
0008 48897DF8      movq    %rdi, -8(%rbp)
              .LBB2:
                  .loc 1 5 0
000c B8000000      movl    $_ZTV4Base+16, %eax
    00
0011 488B55F8      movq    -8(%rbp), %rdx
0015 488902        movq    %rax, (%rdx)
                  .loc 1 6 0
0018 488B7DF8      movq    -8(%rbp), %rdi
001c E8000000      call    _ZN4Base4InitEv  # 直接call地址

END

C++构造/析构函数中的多态(二)

相关文章

C++陷阱：virtual析构函数

C++陷阱：构造函数中的多态

多重继承和void*的糗事