Disassembly window shows seriously messed up mapping of source code onto machine code - by Dmitry Me

Status : 

  Fixed<br /><br />
		This item has been fixed in the current or upcoming version of this product.<br /><br />
		A more detailed explanation for the resolution of this particular item may have been provided in the comments section.


1
0
Sign in
to vote
ID 690107 Comments
Status Closed Workarounds
Type Bug Repros 0
Opened 9/22/2011 12:02:49 AM
Access Restriction Public
Moderator Decision Sent to Engineering Team for consideration

Description

This behavior is observed in version 10.0.40219.1 SP1Rel

I have this code:

#include "stdafx.h"
#include <Windows.h>

int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
        return len;
    }
    return myStrlenImpl( ptr + 1, 1 + len);
}

int myStrlen( const char* ptr )
{
	return myStrlenImpl( ptr, 0 );
}

int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
{
    char buffer[100];
	strcpy( buffer, "Hello" );
	if( myStrlen( buffer ) ) {
		Sleep( 0 );
	}
    return 0;
}

I compile it with /O2, run and open the disassembly. Here's what I see:

--- c:\pathtofile\source.cpp ----
     8:     }
     9:     return myStrlenImpl( ptr + 1, 1 + len);
00401000  jmp         myStrlenImpl+7 (401007h)  
00401002  inc         dword ptr [esp+4]  
00401006  inc         eax  
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3: 
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007  cmp         byte ptr [eax],0  
0040100A  jne         myStrlenImpl+2 (401002h)  
     7:         return len;
0040100C  mov         eax,dword ptr [esp+4]  
    10: }
00401010  ret  
    11: 
    12: int myStrlen( const char* ptr )
    13: {
    14: 	return myStrlenImpl( ptr, 0 );
    15: }
    16: 
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011  push        ebp  
00401012  mov         ebp,esp  
00401014  sub         esp,68h  
00401017  mov         eax,dword ptr [___security_cookie (403000h)]  
0040101C  xor         eax,ebp  
0040101E  mov         dword ptr [ebp-4],eax  
00401021  push        esi  
00401022  push        edi  
    19:     char buffer[100];
    20: 	strcpy( buffer, "Hello" );
00401023  mov         esi,offset string "Hello" (4020E4h)  
00401028  lea         edi,[ebp-68h]  
0040102B  movs        dword ptr es:[edi],dword ptr [esi]  
0040102C  movs        word ptr es:[edi],word ptr [esi]  
    21: 	if( myStrlen( buffer ) ) {
0040102E  cmp         byte ptr [ebp-68h],0  
00401032  pop         edi  
00401033  pop         esi  
00401034  je          wmain+3Eh (40104Fh)  
00401036  push        1  
00401038  lea         eax,[ebp-67h]  
0040103B  call        myStrlenImpl (401000h)  
00401040  add         esp,4  
00401043  test        eax,eax  
00401045  je          wmain+3Eh (40104Fh)  
    22: 		Sleep( 0 );
00401047  push        0  
00401049  call        dword ptr [__imp__Sleep@4 (402000h)]  
    23: 	}
    24:     return 0;
    25: }
0040104F  mov         ecx,dword ptr [ebp-4]  
00401052  xor         ecx,ebp  
00401054  xor         eax,eax  
00401056  call        __security_check_cookie (40105Dh)  
0040105B  leave  
0040105C  ret  
--- f:\dd\vctools\crt_bld\self_x86\crt\src\intel\secchk.c ----------------------

Note that the first lines in the file are the end of the function, then go the #defines and then the start of the function - in fact the function is split into two parts and interleaved with #defines. Such weird mapping seriously impedes usability.
Sign in to post a comment.
Posted by Microsoft on 10/4/2011 at 1:05 PM
Dmitry,

The compiler did some interesting rearrangement of the code in myStrlenImpl and wound up associating the wrong line # with one of the instructions in that code. In pseudo-code the rewrite looks like this (with associated line #s):

{                            ; line #5
    goto L1                 ; line #9 <-- this is incorrect - it should be line 5
L2: ptr = ptr + 1            ; line #9
    len = len + 1            ; line #9
L1: if (ptr != 0) goto L2    ; line #6
    return len             ; line #7
}                            ; line #10

The incorrect line generated for the first instruction has been fixed in our compiler and you should not see that in the next major release.

You may still see the strange disassembly at the start of the source file. I suspect that it is an artifact of how the debugger decodes the instructions before the current instruction pointer - it's clearly not decoding all the way to the start of myStrlenImpl at offset 00401000, resulting in an odd decoding. If you enter myStrlenImpl into the Disassembly window's Address bar, you will see the correct decoding.

Mark Levine
Visual C++


00031000 EB 05                jmp         myStrlenImpl+7 (31007h)
00031002 FF 44 24 04         inc         dword ptr [esp+4]
00031006 40                 inc         eax
#include "stdafx.h"
#include <Windows.h>
#if 0
int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
        return len;
    }
    return myStrlenImpl( ptr + 1, 1 + len);
}
#else
int myStrlenImpl(const char * ptr, int len);
#endif

int myStrlen( const char* ptr )
{
    return myStrlenImpl( ptr, 0 );
}

int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
00031007 80 38 00             cmp         byte ptr [eax],0
0003100A 75 F6                jne         myStrlenImpl+2 (31002h)
        return len;
0003100C 8B 44 24 04         mov         eax,dword ptr [esp+4]
}
00031010 C3                 ret
Posted by Microsoft on 10/4/2011 at 11:07 AM
Dmitry,

Thank you for the attached project. With your project, I have been able to reproduce what you are seeing. It appears that the project needs to be built to "Favor small code" (/Os) in order to demonstrate the strange disassembly.

Mark
Posted by Dmitry Me on 10/3/2011 at 10:50 PM
Attached a project privately. In IDE I have "Win32 Release" configuration selected, I put a breakpoint onto _tmain() opening brace and hit F5. Once the program starts I go to the Disassembly window. Here's how it looks like when the program has just started ("current instruction" is address 00401011):

00401001 add         eax,42444FFh
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret

then I start stepping over the machine code and once I execute the call into myStrlenImpl() the disassembly changes to this:

8:     }
     9:     return myStrlenImpl( ptr + 1, 1 + len);
00401000 jmp         myStrlenImpl+7 (401007h)
00401002 inc         dword ptr [esp+4]
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret

Where the code of myStrlenImpl() is interleaved with #includes. So there're two issues: disassembly displayed changes as I step over the code and also there's interleaving of function code with #includes.
Posted by Microsoft on 10/3/2011 at 6:33 PM
Dmitry, I have not been able to reproduce this problem locally. You have indicated that this behavior is not stably reproduceable. I'd like to try to gather some more information.

What OS are you running on?
Do you build from the command line or from the IDE?
If building from the IDE, are you using the standard Release/Win32 configuration or have you made modifications?
Also, if you are building from the IDE, could you perhaps share your solution (to try to remove any unnecessary variations I might be introducing when I try to reproduce it).

Mark Levine
Visual C++
Posted by MS-Moderator10 [Feedback Moderator] on 9/22/2011 at 7:54 PM
Thank you for submitting feedback on Visual Studio 2010 and .NET Framework. Your issue has been routed to the appropriate VS development team for investigation. We will contact you if we require any additional information.
Posted by MS-Moderator01 on 9/22/2011 at 12:43 AM
Thank you for your feedback, we are currently reviewing the issue you have submitted. If this issue is urgent, please contact support directly(http://support.microsoft.com)
Posted by Dmitry Me on 9/22/2011 at 12:29 AM
Great, this things seems to be not stably reproduceable. Here's an alternative disassembly:

--- c:\pathtofile\source.cpp ----
00401001 add         eax,42444FFh
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret
--- f:\dd\vctools\crt_bld\self_x86\crt\src\intel\secchk.c ----------------------

Note there is

0040100A jne         myStrlenImpl+2 (401002h)

but there's no code with address 401002h.