(mis)Optimization

Reads/writes to registers are quite special. I may even dare to say that they are embodiment of sideeffects. In the previous example we wrote four different values to the same register. If you didn’tknow that address was a register, you may have simplified the logic to just write the final value 1 << (11 + 16) into the register.

Actually, LLVM, the compiler’s backend / optimizer, does not know we are dealing with a register andwill merge the writes thus changing the behavior of our program. Let’s check that really quick.

  1. $ cargo run --release
  2. (..)
  3. Breakpoint 1, main () at src/07-registers/src/main.rs:9
  4. 9 aux7::init();
  5. (gdb) next
  6. 25 *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
  7. (gdb) disassemble /m
  8. Dump of assembler code for function main:
  9. 7 #[entry]
  10. 8 fn main() -> ! {
  11. 9 aux7::init();
  12. 0x08000188 <+0>: bl 0x800019c <aux7::init>
  13. 0x0800018c <+4>: movw r0, #4120 ; 0x1018
  14. 0x08000190 <+8>: mov.w r1, #134217728 ; 0x8000000
  15. 0x08000194 <+12>: movt r0, #18432 ; 0x4800
  16. 10
  17. 11 unsafe {
  18. 12 // A magic address!
  19. 13 const GPIOE_BSRR: u32 = 0x48001018;
  20. 14
  21. 15 // Turn on the "North" LED (red)
  22. 16 *(GPIOE_BSRR as *mut u32) = 1 << 9;
  23. 17
  24. 18 // Turn on the "East" LED (green)
  25. 19 *(GPIOE_BSRR as *mut u32) = 1 << 11;
  26. 20
  27. 21 // Turn off the "North" LED
  28. 22 *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
  29. 23
  30. 24 // Turn off the "East" LED
  31. 25 *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
  32. => 0x08000198 <+16>: str r1, [r0, #0]
  33. 26 }
  34. 27
  35. 28 loop {}
  36. 0x0800019a <+18>: b.n 0x800019a <main+18>
  37. End of assembler dump.

The state of the LEDs didn’t change this time! The str instruction is the one that writes a valueto the register. Our debug (unoptimized) program had four of them, one for each write to theregister, but the release (optimized) program only has one.

We can check that using objdump:

  1. $ # same as cargo objdump -- -d -no-show-raw-insn -print-imm-hex -source target/thumbv7em-none-eabihf/debug/registers
  2. $ cargo objdump --bin registers -- -d -no-show-raw-insn -print-imm-hex -source
  3. registers: file format ELF32-arm-little
  4. Disassembly of section .text:
  5. main:
  6. ; #[entry]
  7. 8000188: sub sp, #0x18
  8. ; aux7::init();
  9. 800018a: bl #0xbc
  10. 800018e: str r0, [sp, #0x14]
  11. 8000190: b #-0x2 <main+0xa>
  12. ; *(GPIOE_BSRR as *mut u32) = 1 << 9;
  13. 8000192: b #-0x2 <main+0xc>
  14. 8000194: movw r0, #0x1018
  15. 8000198: movt r0, #0x4800
  16. 800019c: mov.w r1, #0x200
  17. 80001a0: str r1, [r0]
  18. ; *(GPIOE_BSRR as *mut u32) = 1 << 11;
  19. 80001a2: b #-0x2 <main+0x1c>
  20. 80001a4: movw r0, #0x1018
  21. 80001a8: movt r0, #0x4800
  22. 80001ac: mov.w r1, #0x800
  23. 80001b0: str r1, [r0]
  24. 80001b2: movs r0, #0x19
  25. ; *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
  26. 80001b4: mov r1, r0
  27. 80001b6: cmp r0, #0x9
  28. 80001b8: str r1, [sp, #0x10]
  29. 80001ba: bvs #0x54 <main+0x8a>
  30. 80001bc: b #-0x2 <main+0x36>
  31. 80001be: ldr r0, [sp, #0x10]
  32. 80001c0: and r1, r0, #0x1f
  33. 80001c4: movs r2, #0x1
  34. 80001c6: lsl.w r1, r2, r1
  35. 80001ca: lsrs r2, r0, #0x5
  36. 80001cc: cmp r2, #0x0
  37. 80001ce: str r1, [sp, #0xc]
  38. 80001d0: bne #0x4c <main+0x98>
  39. 80001d2: b #-0x2 <main+0x4c>
  40. 80001d4: movw r0, #0x1018
  41. 80001d8: movt r0, #0x4800
  42. 80001dc: ldr r1, [sp, #0xc]
  43. 80001de: str r1, [r0]
  44. 80001e0: movs r0, #0x1b
  45. ; *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
  46. 80001e2: mov r2, r0
  47. 80001e4: cmp r0, #0xb
  48. 80001e6: str r2, [sp, #0x8]
  49. 80001e8: bvs #0x42 <main+0xa6>
  50. 80001ea: b #-0x2 <main+0x64>
  51. 80001ec: ldr r0, [sp, #0x8]
  52. 80001ee: and r1, r0, #0x1f
  53. 80001f2: movs r2, #0x1
  54. 80001f4: lsl.w r1, r2, r1
  55. 80001f8: lsrs r2, r0, #0x5
  56. 80001fa: cmp r2, #0x0
  57. 80001fc: str r1, [sp, #0x4]
  58. 80001fe: bne #0x3a <main+0xb4>
  59. 8000200: b #-0x2 <main+0x7a>
  60. 8000202: movw r0, #0x1018
  61. 8000206: movt r0, #0x4800
  62. 800020a: ldr r1, [sp, #0x4]
  63. 800020c: str r1, [r0]
  64. ; loop {}
  65. 800020e: b #-0x2 <main+0x88>
  66. 8000210: b #-0x4 <main+0x88>
  67. ; *(GPIOE_BSRR as *mut u32) = 1 << (9 + 16);
  68. 8000212: movw r0, #0x41bc
  69. 8000216: movt r0, #0x800
  70. 800021a: bl #0x3b28
  71. 800021e: trap
  72. 8000220: movw r0, #0x4204
  73. 8000224: movt r0, #0x800
  74. 8000228: bl #0x3b1a
  75. 800022c: trap
  76. ; *(GPIOE_BSRR as *mut u32) = 1 << (11 + 16);
  77. 800022e: movw r0, #0x421c
  78. 8000232: movt r0, #0x800
  79. 8000236: bl #0x3b0c
  80. 800023a: trap
  81. 800023c: movw r0, #0x4234
  82. 8000240: movt r0, #0x800
  83. 8000244: bl #0x3afe
  84. 8000248: trap

How do we prevent LLVM from misoptimizing our program? We use volatile operations instead of plainreads/writes:

  1. #![no_main]
  2. #![no_std]
  3. use core::ptr;
  4. #[allow(unused_imports)]
  5. use aux7::{entry, iprint, iprintln};
  6. #[entry]
  7. fn main() -> ! {
  8. aux7::init();
  9. unsafe {
  10. // A magic address!
  11. const GPIOE_BSRR: u32 = 0x48001018;
  12. // Turn on the "North" LED (red)
  13. ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << 9);
  14. // Turn on the "East" LED (green)
  15. ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << 11);
  16. // Turn off the "North" LED
  17. ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << (9 + 16));
  18. // Turn off the "East" LED
  19. ptr::write_volatile(GPIOE_BSRR as *mut u32, 1 << (11 + 16));
  20. }
  21. loop {}
  22. }

If we look at the disassembly of this new program compiled in release mode:

  1. $ cargo objdump --bin registers --release -- -d -no-show-raw-insn -print-imm-hex -source
  2. registers: file format ELF32-arm-little
  3. Disassembly of section .text:
  4. main:
  5. ; #[entry]
  6. 8000188: bl #0x22
  7. ; aux7::init();
  8. 800018c: movw r0, #0x1018
  9. 8000190: mov.w r1, #0x200
  10. 8000194: movt r0, #0x4800
  11. 8000198: str r1, [r0]
  12. 800019a: mov.w r1, #0x800
  13. 800019e: str r1, [r0]
  14. 80001a0: mov.w r1, #0x2000000
  15. 80001a4: str r1, [r0]
  16. 80001a6: mov.w r1, #0x8000000
  17. 80001aa: str r1, [r0]
  18. ; loop {}
  19. 80001ac: b #-0x4 <main+0x24>

We see that the four writes (str instructions) are preserved. If you run it (use stepi), you’llalso see that behavior of the program is preserved.