33.4 std::vector

我将std::vector称作c数组的安全封装。在内部实现上,它和std::string相似,包含指向缓冲区的指针,指向数组尾部的指针,以及指向缓冲区尾部的指针。 std::vector中的元素在内存中连续存放,和通常中的数组一样。在C++11包含一个新方法.data(),它返回指向缓冲区的指针,这和std::string中的.c_str()相同。 在堆中分配的缓冲区大小将超过数组自身大小。 MSVC和GCC的实现相似,只是结构体中元素名稍有不同,因此下面的代码在两种编译器上都能工作。下面是用于转储std::vector结构的类C代码。

  1. #include <stdio.h>
  2. #include <vector>
  3. #include <algorithm>
  4. #include <functional>
  5. struct vector_of_ints
  6. {
  7. // MSVC names:
  8. int *Myfirst;
  9. int *Mylast;
  10. int *Myend;
  11. // GCC structure is the same, names are: _M_start, _M_finish, _M_end_of_storage
  12. };
  13. void dump(struct vector_of_ints *in)
  14. {
  15. printf ("_Myfirst=%p, _Mylast=%p, _Myend=%p\n", in->Myfirst, in->Mylast, in->Myend);
  16. size_t size=(in->Mylast-in->Myfirst);
  17. size_t capacity=(in->Myend-in->Myfirst);
  18. printf ("size=%d, capacity=%d\n", size, capacity);
  19. for (size_t i=0; i<size; i++)
  20. printf ("element %d: %d\n", i, in->Myfirst[i]);
  21. };
  22. int main()
  23. {
  24. std::vector<int> c;
  25. dump ((struct vector_of_ints*)(void*)&c);
  26. c.push_back(1);
  27. dump ((struct vector_of_ints*)(void*)&c);
  28. c.push_back(2);
  29. dump ((struct vector_of_ints*)(void*)&c);
  30. c.push_back(3);
  31. dump ((struct vector_of_ints*)(void*)&c);
  32. c.push_back(4);
  33. dump ((struct vector_of_ints*)(void*)&c);
  34. c.reserve (6);
  35. dump ((struct vector_of_ints*)(void*)&c);
  36. c.push_back(5);
  37. dump ((struct vector_of_ints*)(void*)&c);
  38. c.push_back(6);
  39. dump ((struct vector_of_ints*)(void*)&c);
  40. printf ("%d\n", c.at(5)); // bounds checking
  41. printf ("%d\n", c[8]); // operator[], no bounds checking
  42. };

如果编译器是MSVC,下面是输出样例。

  1. _Myfirst=00000000, _Mylast=00000000, _Myend=00000000
  2. size=0, capacity=0
  3. _Myfirst=0051CF48, _Mylast=0051CF4C, _Myend=0051CF4C
  4. size=1, capacity=1
  5. element 0: 1
  6. _Myfirst=0051CF58, _Mylast=0051CF60, _Myend=0051CF60
  7. size=2, capacity=2
  8. element 0: 1
  9. element 1: 2
  10. _Myfirst=0051C278, _Mylast=0051C284, _Myend=0051C284
  11. size=3, capacity=3
  12. element 0: 1
  13. element 1: 2
  14. element 2: 3
  15. _Myfirst=0051C290, _Mylast=0051C2A0, _Myend=0051C2A0
  16. size=4, capacity=4
  17. element 0: 1
  18. element 1: 2
  19. element 2: 3
  20. element 3: 4
  21. _Myfirst=0051B180, _Mylast=0051B190, _Myend=0051B198
  22. size=4, capacity=6
  23. element 0: 1
  24. element 1: 2
  25. element 2: 3
  26. element 3: 4
  27. _Myfirst=0051B180, _Mylast=0051B194, _Myend=0051B198
  28. size=5, capacity=6
  29. element 0: 1
  30. element 1: 2
  31. element 2: 3
  32. element 3: 4
  33. element 4: 5
  34. _Myfirst=0051B180, _Mylast=0051B198, _Myend=0051B198
  35. size=6, capacity=6
  36. element 0: 1
  37. element 1: 2
  38. element 2: 3
  39. element 3: 4
  40. element 4: 5
  41. element 5: 6
  42. 6
  43. 6619158

可以看到,在main函数的头部也没有分配空间。当第一次push_back()调用结束后,缓冲区被分配。同时在每一次调用push_back()后,数组的大小和缓冲区容量都增大了。缓冲区地址也变化了,因为push_back()函数每次都会在堆中重新分配缓冲区。这是个耗时的操作,这就是为什么提前预测数组大小,同时调用.reserve()方法预留空间比较重要了。最后数据是垃圾数据,没有数组元素位于这个位置,因此打印出了随机的数据。这也说明了std::vector的[]操作并不校验数组下标是否越界。然而.at()方法会做相应检查,当出现越界时会抛出std::out_of_range。 让我们来看代码:

Listing 34.11: MSVC 2012 /GS- /Ob1

  1. $SG52650 DB ’%d’, 0aH, 00H
  2. $SG52651 DB ’%d’, 0aH, 00H
  3. _this$ = -4 ; size = 4
  4. __Pos$ = 8 ; size = 4
  5. ?at@?$vector@HV?$allocator@H@std@@@std@@QAEAAHI@Z PROC ; std::vector<int,std::allocator<int> >::
  6. at, COMDAT
  7. ; _this$ = ecx
  8. push ebp
  9. mov ebp, esp
  10. push ecx
  11. mov DWORD PTR _this$[ebp], ecx
  12. mov eax, DWORD PTR _this$[ebp]
  13. mov ecx, DWORD PTR _this$[ebp]
  14. mov edx, DWORD PTR [eax+4]
  15. sub edx, DWORD PTR [ecx]
  16. sar edx, 2
  17. cmp edx, DWORD PTR __Pos$[ebp]
  18. ja SHORT $LN1@at
  19. push OFFSET ??_C@_0BM@NMJKDPPO@invalid?5vector?$DMT?$DO?5subscript?$AA@
  20. call DWORD PTR __imp_?_Xout_of_range@std@@YAXPBD@Z
  21. $LN1@at:
  22. mov eax, DWORD PTR _this$[ebp]
  23. mov ecx, DWORD PTR [eax]
  24. mov edx, DWORD PTR __Pos$[ebp]
  25. lea eax, DWORD PTR [ecx+edx*4]
  26. $LN3@at:
  27. mov esp, ebp
  28. pop ebp
  29. ret 4
  30. ?at@?$vector@HV?$allocator@H@std@@@std@@QAEAAHI@Z ENDP ; std::vector<int,std::allocator<int> >::
  31. at
  32. _c$ = -36 ; size = 12
  33. $T1 = -24 ; size = 4
  34. $T2 = -20 ; size = 4
  35. $T3 = -16 ; size = 4
  36. $T4 = -12 ; size = 4
  37. $T5 = -8 ; size = 4
  38. $T6 = -4 ; size = 4
  39. _main PROC
  40. push ebp
  41. mov ebp, esp
  42. sub esp, 36 ; 00000024H
  43. mov DWORD PTR _c$[ebp], 0 ; Myfirst
  44. mov DWORD PTR _c$[ebp+4], 0 ; Mylast
  45. mov DWORD PTR _c$[ebp+8], 0 ; Myend
  46. lea eax, DWORD PTR _c$[ebp]
  47. push eax
  48. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  49. add esp, 4
  50. mov DWORD PTR $T6[ebp], 1
  51. lea ecx, DWORD PTR $T6[ebp]
  52. push ecx
  53. lea ecx, DWORD PTR _c$[ebp]
  54. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  55. ::allocator<int> >::push_back
  56. lea edx, DWORD PTR _c$[ebp]
  57. push edx
  58. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  59. add esp, 4
  60. mov DWORD PTR $T5[ebp], 2
  61. lea eax, DWORD PTR $T5[ebp]
  62. push eax
  63. lea ecx, DWORD PTR _c$[ebp]
  64. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  65. ::allocator<int> >::push_back
  66. lea ecx, DWORD PTR _c$[ebp]
  67. push ecx
  68. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  69. add esp, 4
  70. mov DWORD PTR $T4[ebp], 3
  71. lea edx, DWORD PTR $T4[ebp]
  72. push edx
  73. lea ecx, DWORD PTR _c$[ebp]
  74. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  75. ::allocator<int> >::push_back
  76. lea eax, DWORD PTR _c$[ebp]
  77. push eax
  78. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  79. add esp, 4
  80. mov DWORD PTR $T3[ebp], 4
  81. lea ecx, DWORD PTR $T3[ebp]
  82. push ecx
  83. lea ecx, DWORD PTR _c$[ebp]
  84. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  85. ::allocator<int> >::push_back
  86. lea edx, DWORD PTR _c$[ebp]
  87. push edx
  88. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  89. add esp, 4
  90. push 6
  91. lea ecx, DWORD PTR _c$[ebp]
  92. call ?reserve@?$vector@HV?$allocator@H@std@@@std@@QAEXI@Z ; std::vector<int,std::
  93. allocator<int> >::reserve
  94. lea eax, DWORD PTR _c$[ebp]
  95. push eax
  96. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  97. add esp, 4
  98. mov DWORD PTR $T2[ebp], 5
  99. lea ecx, DWORD PTR $T2[ebp]
  100. push ecx
  101. lea ecx, DWORD PTR _c$[ebp]
  102. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  103. ::allocator<int> >::push_back
  104. lea edx, DWORD PTR _c$[ebp]
  105. push edx
  106. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  107. add esp, 4
  108. mov DWORD PTR $T1[ebp], 6
  109. lea eax, DWORD PTR $T1[ebp]
  110. push eax
  111. lea ecx, DWORD PTR _c$[ebp]
  112. call ?push_back@?$vector@HV?$allocator@H@std@@@std@@QAEX$$QAH@Z ; std::vector<int,std
  113. ::allocator<int> >::push_back
  114. lea ecx, DWORD PTR _c$[ebp]
  115. push ecx
  116. call ?dump@@YAXPAUvector_of_ints@@@Z ; dump
  117. add esp, 4
  118. push 5
  119. lea ecx, DWORD PTR _c$[ebp]
  120. call ?at@?$vector@HV?$allocator@H@std@@@std@@QAEAAHI@Z ; std::vector<int,std::
  121. allocator<int> >::at
  122. mov edx, DWORD PTR [eax]
  123. push edx
  124. push OFFSET $SG52650 ; ’%d
  125. call DWORD PTR __imp__printf
  126. add esp, 8
  127. mov eax, 8
  128. shl eax, 2
  129. mov ecx, DWORD PTR _c$[ebp]
  130. mov edx, DWORD PTR [ecx+eax]
  131. push edx
  132. push OFFSET $SG52651 ; ’%d
  133. call DWORD PTR __imp__printf
  134. add esp, 8
  135. lea ecx, DWORD PTR _c$[ebp]
  136. call ?_Tidy@?$vector@HV?$allocator@H@std@@@std@@IAEXXZ ; std::vector<int,std::
  137. allocator<int> >::_Tidy
  138. xor eax, eax
  139. mov esp, ebp
  140. pop ebp
  141. ret 0
  142. _main ENDP

我们看到.at()方法如何进行辩解检查,当发生错误时将抛出异常。最后一个printf()调用只是从内存中读取数据,并没有做任何检查。 有人可能会问,为什么没有用类似std::string中size和capacity的变量,我猜想可能是为了让边界检查更快,但是我不确定。 GCC生成的代码基本上相同,但是.at()方法被内联了。

Listing34.12: GCC 4.8.1 -fno-inline-small-functions –O1

  1. main proc near
  2. push ebp
  3. mov ebp, esp
  4. push edi
  5. push esi
  6. push ebx
  7. and esp, 0FFFFFFF0h
  8. sub esp, 20h
  9. mov dword ptr [esp+14h], 0
  10. mov dword ptr [esp+18h], 0
  11. mov dword ptr [esp+1Ch], 0
  12. lea eax, [esp+14h]
  13. mov [esp], eax
  14. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  15. mov dword ptr [esp+10h], 1
  16. lea eax, [esp+10h]
  17. mov [esp+4], eax
  18. lea eax, [esp+14h]
  19. mov [esp], eax
  20. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  21. >>::push_back(int const&)
  22. lea eax, [esp+14h]
  23. mov [esp], eax
  24. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  25. mov dword ptr [esp+10h], 2
  26. lea eax, [esp+10h]
  27. mov [esp+4], eax
  28. lea eax, [esp+14h]
  29. mov [esp], eax
  30. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  31. >>::push_back(int const&)
  32. lea eax, [esp+14h]
  33. mov [esp], eax
  34. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  35. mov dword ptr [esp+10h], 3
  36. lea eax, [esp+10h]
  37. mov [esp+4], eax
  38. lea eax, [esp+14h]
  39. mov [esp], eax
  40. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  41. >>::push_back(int const&)
  42. lea eax, [esp+14h]
  43. mov [esp], eax
  44. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  45. mov dword ptr [esp+10h], 4
  46. lea eax, [esp+10h]
  47. mov [esp+4], eax
  48. lea eax, [esp+14h]
  49. mov [esp], eax
  50. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  51. >>::push_back(int const&)
  52. lea eax, [esp+14h]
  53. mov [esp], eax
  54. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  55. mov ebx, [esp+14h]
  56. mov eax, [esp+1Ch]
  57. sub eax, ebx
  58. cmp eax, 17h
  59. ja short loc_80001CF
  60. mov edi, [esp+18h]
  61. sub edi, ebx
  62. sar edi, 2
  63. mov dword ptr [esp], 18h
  64. call _Znwj ; operator new(uint)
  65. mov esi, eax
  66. test edi, edi
  67. jz short loc_80001AD
  68. lea eax, ds:0[edi*4]
  69. mov [esp+8], eax ; n
  70. mov [esp+4], ebx ; src
  71. mov [esp], esi ; dest
  72. call memmove
  73. loc_80001AD: ; CODE XREF: main+F8
  74. mov eax, [esp+14h]
  75. test eax, eax
  76. jz short loc_80001BD
  77. mov [esp], eax ; void *
  78. call _ZdlPv ; operator delete(void *)
  79. loc_80001BD: ; CODE XREF: main+117
  80. mov [esp+14h], esi
  81. lea eax, [esi+edi*4]
  82. mov [esp+18h], eax
  83. add esi, 18h
  84. mov [esp+1Ch], esi
  85. loc_80001CF: ; CODE XREF: main+DD
  86. lea eax, [esp+14h]
  87. mov [esp], eax
  88. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  89. mov dword ptr [esp+10h], 5
  90. lea eax, [esp+10h]
  91. mov [esp+4], eax
  92. lea eax, [esp+14h]
  93. mov [esp], eax
  94. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  95. >>::push_back(int const&)
  96. lea eax, [esp+14h]
  97. mov [esp], eax
  98. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  99. mov dword ptr [esp+10h], 6
  100. lea eax, [esp+10h]
  101. mov [esp+4], eax
  102. lea eax, [esp+14h]
  103. mov [esp], eax
  104. call _ZNSt6vectorIiSaIiEE9push_backERKi ; std::vector<int,std::allocator<int
  105. >>::push_back(int const&)
  106. lea eax, [esp+14h]
  107. mov [esp], eax
  108. call _Z4dumpP14vector_of_ints ; dump(vector_of_ints *)
  109. mov eax, [esp+14h]
  110. mov edx, [esp+18h]
  111. sub edx, eax
  112. cmp edx, 17h
  113. ja short loc_8000246
  114. mov dword ptr [esp], offset aVector_m_range ; "vector::_M_range_check"
  115. call _ZSt20__throw_out_of_rangePKc ; std::__throw_out_of_range(char const*)
  116. loc_8000246: ; CODE XREF: main+19C
  117. mov eax, [eax+14h]
  118. mov [esp+8], eax
  119. mov dword ptr [esp+4], offset aD ; "%d\n"
  120. mov dword ptr [esp], 1
  121. call __printf_chk
  122. mov eax, [esp+14h]
  123. mov eax, [eax+20h]
  124. mov [esp+8], eax
  125. mov dword ptr [esp+4], offset aD ; "%d\n"
  126. mov dword ptr [esp], 1
  127. call __printf_chk
  128. mov eax, [esp+14h]
  129. test eax, eax
  130. jz short loc_80002AC
  131. mov [esp], eax ; void *
  132. call _ZdlPv ; operator delete(void *)
  133. jmp short loc_80002AC
  134. ; ---------------------------------------------------------------------------
  135. mov ebx, eax
  136. mov edx, [esp+14h]
  137. test edx, edx
  138. jz short loc_80002A4
  139. mov [esp], edx ; void *
  140. call _ZdlPv ; operator delete(void *)
  141. loc_80002A4: ; CODE XREF: main+1FE
  142. mov [esp], ebx
  143. call _Unwind_Resume
  144. ; ---------------------------------------------------------------------------
  145. loc_80002AC: ; CODE XREF: main+1EA
  146. ; main+1F4
  147. mov eax, 0
  148. lea esp, [ebp-0Ch]
  149. pop ebx
  150. pop esi
  151. pop edi
  152. pop ebp
  153. locret_80002B8: ; DATA XREF: .eh_frame:08000510
  154. ; .eh_frame:080005BC
  155. retn
  156. main endp

.reserve()方法也被内联了。如果缓冲区大小小于新的size,则调用new()申请新缓冲区,调用memmove()拷贝缓冲区内容,然后调用delete()释放旧的缓冲区。 让你给我们看看通过GCC编译后程序的输出

  1. _Myfirst=0x(nil), _Mylast=0x(nil), _Myend=0x(nil)
  2. size=0, capacity=0
  3. _Myfirst=0x8257008, _Mylast=0x825700c, _Myend=0x825700c
  4. size=1, capacity=1
  5. element 0: 1
  6. _Myfirst=0x8257018, _Mylast=0x8257020, _Myend=0x8257020
  7. size=2, capacity=2
  8. element 0: 1
  9. element 1: 2
  10. _Myfirst=0x8257028, _Mylast=0x8257034, _Myend=0x8257038
  11. size=3, capacity=4
  12. element 0: 1
  13. element 1: 2
  14. element 2: 3
  15. _Myfirst=0x8257028, _Mylast=0x8257038, _Myend=0x8257038
  16. size=4, capacity=4
  17. element 0: 1
  18. element 1: 2
  19. element 2: 3
  20. element 3: 4
  21. _Myfirst=0x8257040, _Mylast=0x8257050, _Myend=0x8257058
  22. size=4, capacity=6
  23. element 0: 1
  24. element 1: 2
  25. element 2: 3
  26. element 3: 4
  27. _Myfirst=0x8257040, _Mylast=0x8257054, _Myend=0x8257058
  28. size=5, capacity=6
  29. element 0: 1
  30. element 1: 2
  31. element 2: 3
  32. element 3: 4
  33. element 4: 5
  34. _Myfirst=0x8257040, _Mylast=0x8257058, _Myend=0x8257058
  35. size=6, capacity=6
  36. element 0: 1
  37. element 1: 2
  38. element 2: 3
  39. element 3: 4
  40. element 4: 5
  41. element 5: 6
  42. 6
  43. 0

我们可以看到缓冲区大小的增长不同于MSVC中。 简单的实验说明MSVC中缓冲区每次扩大当前大小的50%,而GCC中则每次扩大100%。