#1
  1. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Nov 2006
    Posts
    8
    Rep Power
    0

    Program ending prematurely [Inline x86 ASM in CPP]


    First, I'd like to say that this IS a school project. I'm not going to try to hide that.

    Anyway, I was provided with this c++ code:
    Code:
      #include <iostream>
      using namespace std;
      
      
      //----Prototypes------------------------------------------------------
      void CmpTest( char *buffer1, char *buffer2, int size );
      int  MemCmp( char* mem1, char* mem2, int size );
      
      
      //////////////////////////////////////////////////////////////////////
      //  main() : int
      //////////////////////////////////////////////////////////////////////
      int main()
      {
        char buffer1[1024], buffer2[1024];
      
        for (int i=0; i<1024; i+=2 )
        {
          buffer1[i] = 0xCD;
          buffer2[i] = 0xCD;
          buffer1[i+1] = 0xAB;
          buffer2[i+1] = 0xAB;
        }
      
        CmpTest( buffer1, buffer2, 1024 );
        buffer1[251] = 0xAC;
        CmpTest( buffer1, buffer2, 1024 );
      
        return 0;
      }
      
      
      //////////////////////////////////////////////////////////////////////
      //  CmpTest( buffer1, buffer2 : char*, size : int )
      //    Accepts two memory locations, a number of bytes to compare, and
      //    prints the results of the comparsion.
      //////////////////////////////////////////////////////////////////////
      void CmpTest( char *buffer1, char *buffer2, int size )
      {
        int index_of_diff = MemCmp( buffer1, buffer2, size );
        if (index_of_diff == -1)
        {
          cout << "Memory regions match" << endl;
        }
        else
        {
          cout << "Memory regions differ at index " << index_of_diff << endl;
        }
      }
      
      
      //////////////////////////////////////////////////////////////////////
      //  MemCmp( mem1, mem2 : char*, size : int )
      //    Accepts two memory locations, a number of bytes to compare, and
      //    returns the index of the first difference or -1 if there are
      //    no differences.
      //////////////////////////////////////////////////////////////////////
      int MemCmp( char* mem1, char* mem2, int size )
      {
        int result = -1;
      
        // BEGIN inline assembly replacement
        for (int i=0; i<size; ++i)
        {
          if (mem1[i] != mem2[i]) 
          {
            result = i;
            break;
          }
        }
        // END inline assembly replacement
      
        return result;
      }
    I'm supposed to change the MemCmp function into inline x86 assembly, as stated in the code. Here's what I've done so far:
    Code:
      int MemCmp( char* mem1, char* mem2, int size )
      {
    	  int result;
    	  asm(
    		".intel_syntax\n"
    		"push ebp\n"
    		"mov ebp,esp\n"
    		"sub esp,4\n"
    		"mov DWORD PTR [ebp-4],0\n"
    		"jmp while1_cond\n"
    		"while1_body:\n"
    		"mov %0,[ebp-4]\n"
    		"add %2,%0\n"
    		"add %3,%0\n"
    		"push %0\n"
    		"push %1\n"
    		"movzx %0, BYTE PTR [%2]\n"
    		"movzx %1, BYTE PTR [%3]\n"
    		"cmp %0,%1\n"
    		"pop %1\n"
    		"pop %0\n"
    		"je if_end\n"
    		"jmp memcmpend\n"
    		"if_end:\n"
    		"inc %0\n"
    		"mov [ebp-4],%0\n"
    		"while1_cond:\n"
    		"mov %0,[ebp-4]\n"
    		"cmp %0,%1\n"
    		"jl while1_body\n"
    		"memcmpend:\n"
    		"leave\n"
    		".att_syntax\n"
    		: "=a"(result)
    		:"b"(size),"c"(mem1),"d"(mem2)
    		:"ebp", "esp"
    	  );
    	  cout << result << endl;
        return result;
      }
    And here's the assembly g++ generates for the inline code in this method
    Code:
    	.intel_syntax
    push ebp
    mov ebp,esp
    sub esp,4
    mov DWORD PTR [ebp-4],0
    jmp while1_cond
    while1_body:
    mov %eax,[ebp-4]
    add %ecx,%eax
    add %edx,%eax
    push %eax
    push %ebx
    movzx %eax, BYTE PTR [%ecx]
    movzx %ebx, BYTE PTR [%edx]
    cmp %eax,%ebx
    pop %ebx
    pop %eax
    je if_end
    jmp memcmpend
    if_end:
    inc %eax
    mov [ebp-4],%eax
    while1_cond:
    mov %eax,[ebp-4]
    cmp %eax,%ebx
    jl while1_body
    memcmpend:
    leave
    .att_syntax
    As you can see in the full cpp code, the arrays should only be different at memory location 251, and the first time MemCmp is called, there should be NO difference in the arrays.

    My MemCmp function exits when the loop is at its 45th iteration (ebp-4 will be 45).

    I have no idea why this is happening, and I was hoping someone could spot something that might be causing this.

    Thanks for the help.
  2. #2
  3. Commie Mutant Traitor
    Devshed Intermediate (1500 - 1999 posts)

    Join Date
    Jun 2004
    Location
    Alpharetta, GA
    Posts
    1,806
    Rep Power
    1570
    As it happens, this can be done with no looping at all using CMPSB and a REPE prefix:

    Code:
      int MemCmp( char* mem1, char* mem2, int size )
      {
        asm(
    		".intel_syntax\n\t"
    
                    // At this point, ECX = size, ESI = mem1, and EDI = mem2,
                   // so all we need to do is...
                    "mov eax, ecx\n\t"    // save the size in eax
                    "cld\n\t"       // make sure the direction flag is cleared,
                                     // so that REP increments rather than decrements
    
                    "repe cmpsb\n\t" // compare [BYTE PTR ESI] to [BYTE PTR EDI],
                                     // inc the pointers,
                                     // and dec ECX,
                                     // until ECX == 0 or a mismatch is found
    
                    "jnz mismatch\n\t"
                    "mov eax, -1\n\t"
                    "jmp finish\n"
            "mismatch:\n\t"
                    "sub eax, ecx\n\t"
                    "dec eax\n"
            "finish:\n\t"
    		".att_syntax\n"
    		: "=a"(result)
    		:"c"(size),"S"(mem1),"D"(mem2)
    		:
    	  );
    
        return result;
      }
    IIRC, with modern processors, this is not necessarily the fastest code for this, because it stalls the pipeline in some models of the CPU. It is, however, very compact and should still be faster than the pure C++ version.
    Last edited by Schol-R-LEA; April 26th, 2010 at 11:45 PM.
    Rev First Speaker Schol-R-LEA;2 JAM LCF ELF KoR KCO BiWM TGIF
    #define KINSEY (rand() % 7) λ Scheme is the Red Pill
    Scheme in Short Understanding the C/C++ Preprocessor
    Taming Python A Highly Opinionated Review of Programming Languages for the Novice, v1.1

    FOR SALE: One ShapeSystem 2300 CMD, extensively modified for human use. Includes s/w for anthro, transgender, sex-appeal enhance, & Gillian Anderson and Jason D. Poit clone forms. Some wear. $4500 obo. tverres@et.ins.gov

IMN logo majestic logo threadwatch logo seochat tools logo