Home Dashboard Directory Help
Search

Disassembly window shows seriously messed up mapping of source code onto machine code by Dmitry Me


Status: 

Closed
 as Fixed Help for as Fixed


1
0
Sign in
to vote
Type: Bug
ID: 690107
Opened: 9/22/2011 12:02:49 AM
Access Restriction: Public
Moderator Decision: Sent to Engineering Team for consideration
0
Workaround(s)
view
0
User(s) can reproduce this bug

Description

This behavior is observed in version 10.0.40219.1 SP1Rel

I have this code:

#include "stdafx.h"
#include <Windows.h>

int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
        return len;
    }
    return myStrlenImpl( ptr + 1, 1 + len);
}

int myStrlen( const char* ptr )
{
    return myStrlenImpl( ptr, 0 );
}

int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
{
    char buffer[100];
    strcpy( buffer, "Hello" );
    if( myStrlen( buffer ) ) {
        Sleep( 0 );
    }
    return 0;
}

I compile it with /O2, run and open the disassembly. Here's what I see:

--- c:\pathtofile\source.cpp ----
     8:     }
     9:     return myStrlenImpl( ptr + 1, 1 + len);
00401000 jmp         myStrlenImpl+7 (401007h)
00401002 inc         dword ptr [esp+4]
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret
--- f:\dd\vctools\crt_bld\self_x86\crt\src\intel\secchk.c ----------------------

Note that the first lines in the file are the end of the function, then go the #defines and then the start of the function - in fact the function is split into two parts and interleaved with #defines. Such weird mapping seriously impedes usability.
Details
Sign in to post a comment.
Posted by Microsoft on 10/4/2011 at 1:05 PM
Dmitry,

The compiler did some interesting rearrangement of the code in myStrlenImpl and wound up associating the wrong line # with one of the instructions in that code. In pseudo-code the rewrite looks like this (with associated line #s):

{                            ; line #5
    goto L1                 ; line #9 <-- this is incorrect - it should be line 5
L2: ptr = ptr + 1            ; line #9
    len = len + 1            ; line #9
L1: if (ptr != 0) goto L2    ; line #6
    return len             ; line #7
}                            ; line #10

The incorrect line generated for the first instruction has been fixed in our compiler and you should not see that in the next major release.

You may still see the strange disassembly at the start of the source file. I suspect that it is an artifact of how the debugger decodes the instructions before the current instruction pointer - it's clearly not decoding all the way to the start of myStrlenImpl at offset 00401000, resulting in an odd decoding. If you enter myStrlenImpl into the Disassembly window's Address bar, you will see the correct decoding.

Mark Levine
Visual C++


00031000 EB 05                jmp         myStrlenImpl+7 (31007h)
00031002 FF 44 24 04         inc         dword ptr [esp+4]
00031006 40                 inc         eax
#include "stdafx.h"
#include <Windows.h>
#if 0
int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
        return len;
    }
    return myStrlenImpl( ptr + 1, 1 + len);
}
#else
int myStrlenImpl(const char * ptr, int len);
#endif

int myStrlen( const char* ptr )
{
    return myStrlenImpl( ptr, 0 );
}

int myStrlenImpl( const char * ptr, int len )
{
    if( *ptr == 0 ) {
00031007 80 38 00             cmp         byte ptr [eax],0
0003100A 75 F6                jne         myStrlenImpl+2 (31002h)
        return len;
0003100C 8B 44 24 04         mov         eax,dword ptr [esp+4]
}
00031010 C3                 ret
Posted by Microsoft on 10/4/2011 at 11:07 AM
Dmitry,

Thank you for the attached project. With your project, I have been able to reproduce what you are seeing. It appears that the project needs to be built to "Favor small code" (/Os) in order to demonstrate the strange disassembly.

Mark
Posted by Dmitry Me on 10/3/2011 at 10:50 PM
Attached a project privately. In IDE I have "Win32 Release" configuration selected, I put a breakpoint onto _tmain() opening brace and hit F5. Once the program starts I go to the Disassembly window. Here's how it looks like when the program has just started ("current instruction" is address 00401011):

00401001 add         eax,42444FFh
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret

then I start stepping over the machine code and once I execute the call into myStrlenImpl() the disassembly changes to this:

8:     }
     9:     return myStrlenImpl( ptr + 1, 1 + len);
00401000 jmp         myStrlenImpl+7 (401007h)
00401002 inc         dword ptr [esp+4]
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret

Where the code of myStrlenImpl() is interleaved with #includes. So there're two issues: disassembly displayed changes as I step over the code and also there's interleaving of function code with #includes.
Posted by Microsoft on 10/3/2011 at 6:33 PM
Dmitry, I have not been able to reproduce this problem locally. You have indicated that this behavior is not stably reproduceable. I'd like to try to gather some more information.

What OS are you running on?
Do you build from the command line or from the IDE?
If building from the IDE, are you using the standard Release/Win32 configuration or have you made modifications?
Also, if you are building from the IDE, could you perhaps share your solution (to try to remove any unnecessary variations I might be introducing when I try to reproduce it).

Mark Levine
Visual C++
Posted by MS-Moderator10 [Feedback Moderator] on 9/22/2011 at 7:54 PM
Thank you for submitting feedback on Visual Studio 2010 and .NET Framework. Your issue has been routed to the appropriate VS development team for investigation. We will contact you if we require any additional information.
Posted by MS-Moderator01 on 9/22/2011 at 12:43 AM
Thank you for your feedback, we are currently reviewing the issue you have submitted. If this issue is urgent, please contact support directly(http://support.microsoft.com)
Posted by Dmitry Me on 9/22/2011 at 12:29 AM
Great, this things seems to be not stably reproduceable. Here's an alternative disassembly:

--- c:\pathtofile\source.cpp ----
00401001 add         eax,42444FFh
00401006 inc         eax
     1: #include "stdafx.h"
     2: #include <Windows.h>
     3:
     4: int myStrlenImpl( const char * ptr, int len )
     5: {
     6:     if( *ptr == 0 ) {
00401007 cmp         byte ptr [eax],0
0040100A jne         myStrlenImpl+2 (401002h)
     7:         return len;
0040100C mov         eax,dword ptr [esp+4]
    10: }
00401010 ret
    11:
    12: int myStrlen( const char* ptr )
    13: {
    14:     return myStrlenImpl( ptr, 0 );
    15: }
    16:
    17: int _tmain(int /*argc*/, _TCHAR* /*argv*/[])
    18: {
00401011 push        ebp
00401012 mov         ebp,esp
00401014 sub         esp,68h
00401017 mov         eax,dword ptr [___security_cookie (403000h)]
0040101C xor         eax,ebp
0040101E mov         dword ptr [ebp-4],eax
00401021 push        esi
00401022 push        edi
    19:     char buffer[100];
    20:     strcpy( buffer, "Hello" );
00401023 mov         esi,offset string "Hello" (4020E4h)
00401028 lea         edi,[ebp-68h]
0040102B movs        dword ptr es:[edi],dword ptr [esi]
0040102C movs        word ptr es:[edi],word ptr [esi]
    21:     if( myStrlen( buffer ) ) {
0040102E cmp         byte ptr [ebp-68h],0
00401032 pop         edi
00401033 pop         esi
00401034 je         wmain+3Eh (40104Fh)
00401036 push        1
00401038 lea         eax,[ebp-67h]
0040103B call        myStrlenImpl (401000h)
00401040 add         esp,4
00401043 test        eax,eax
00401045 je         wmain+3Eh (40104Fh)
    22:         Sleep( 0 );
00401047 push        0
00401049 call        dword ptr [__imp__Sleep@4 (402000h)]
    23:     }
    24:     return 0;
    25: }
0040104F mov         ecx,dword ptr [ebp-4]
00401052 xor         ecx,ebp
00401054 xor         eax,eax
00401056 call        __security_check_cookie (40105Dh)
0040105B leave
0040105C ret
--- f:\dd\vctools\crt_bld\self_x86\crt\src\intel\secchk.c ----------------------

Note there is

0040100A jne         myStrlenImpl+2 (401002h)

but there's no code with address 401002h.

Sign in to post a workaround.
File Name Submitted By Submitted On File Size  
AnalyzeTest-690107.zip (restricted) 10/3/2011 -