;-----------------------------------------------------------------------------
;
; 32-bit X86 MD5 hash generator 1.0 -- Paul Houle (paulhoule.com) 4/12/2010
;
; Time critical assembly logic -- called only from phmd5.c
;
;-----------------------------------------------------------------------------

	.386
	.model	flat,c
	.code

;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
; The below RND? macros generate most of the rfc1321 hash code.  Note: the
; rfc1321 A,B,C,D variables are kept registered in eax, ebx, ecx & edx.
;
; RND1_2 is a "subroutine-macro" used by both RND1 and RND2.
;
; You have to read rfc1321 to understand this.  r?... args are the registers
; holding the dwords to operate on.  srcIX is the dword index into the source
; block pointed to by esi.  rotc is the rotate amount for the stage.  TsubI
; is the sine function table constant for the stage.
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

RND1_2	macro	rA,rB,rB_X,rC_Y,rD_Z,srcIX,rotc,TsubI
	mov	edi,[esi + srcIX * 4]

	xor	rC_Y,rD_Z
	mov	ebp,rB_X

	and	ebp,rC_Y
	xor	rC_Y,rD_Z

	add	edi,rA
	xor	ebp,rD_Z
	lea	ebp,[ebp + edi + TsubI]
	rol	ebp,rotc
	lea	rA,[rB + ebp]
	endm

;	Rounds one and two are the same with function args permuted.
RND1	macro	rA,rB_X,rC_Y,rD_Z,srcIX,rotc,TsubI
	RND1_2	rA,rB_X, rB_X,rC_Y,rD_Z, srcIX,rotc,TsubI
	endm

RND2	macro	rA,rB_X,rC_Y,rD_Z,srcIX,rotc,TsubI
	RND1_2	rA,rB_X, rD_Z,rB_X,rC_Y, srcIX,rotc,TsubI
	endm

RND3	macro	rA,rB_X,rC_Y,rD_Z,srcIX,rotc,TsubI
	mov	edi,[esi + srcIX * 4]

	xor	rC_Y,rD_Z
	add	edi,rA
	xor	rC_Y,rB_X

	lea	edi,[edi + rC_Y + TsubI]
	xor	rC_Y,rD_Z
	rol	edi,rotc
	xor	rC_Y,rB_X
	lea	rA,[rB_X + edi]
	endm

RND4	macro	rA,rB_X,rC_Y,rD_Z,srcIX,rotc,TsubI
	mov	edi,[esi + srcIX * 4]
	not	rD_Z
	mov	ebp,rB_X
	or	ebp,rD_Z
	not	rD_Z
	add	edi,rA
	xor	ebp,rC_Y
	lea	edi,[edi + ebp + TsubI]
	rol	edi,rotc
	lea	rA,[rB_X + edi]

	endm

;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
; This routine processes a NON-ZERO multiple of 64 input bytes (dbytes).
; The C code enforces these "dbytes" requirements; they are not checked here.
;
; pdata addresses the input data.  The C code insures this is dword aligned.
;
; phash points to the current dword-aligned 16-byte working hash value.
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Phmd5DoBlocks	proc	uses ebx esi edi, phash:ptr, pdata:ptr, dbytes:dword
	push	ebp

	mov	edi,phash		;load arguments
	mov	esi,pdata
	mov	ebp,dbytes

	mov	eax,[edi+ 0]		;load working hash value
	mov	ebx,[edi+ 4]
	mov	ecx,[edi+ 8]
	mov	edx,[edi+12]

	add	ebp,esi			;calc input endpoint
	push	edi			;build bloop stack frame
	push	ebp
bloop:
	RND1	eax, ebx, ecx, edx,  0,  7, 0d76aa478h
	RND1	edx, eax, ebx, ecx,  1, 12, 0e8c7b756h
	RND1	ecx, edx, eax, ebx,  2, 17, 0242070dbh
	RND1	ebx, ecx, edx, eax,  3, 22, 0c1bdceeeh
	RND1	eax, ebx, ecx, edx,  4,  7, 0f57c0fafh
	RND1	edx, eax, ebx, ecx,  5, 12, 04787c62ah
	RND1	ecx, edx, eax, ebx,  6, 17, 0a8304613h
	RND1	ebx, ecx, edx, eax,  7, 22, 0fd469501h
	RND1	eax, ebx, ecx, edx,  8,  7, 0698098d8h
	RND1	edx, eax, ebx, ecx,  9, 12, 08b44f7afh
	RND1	ecx, edx, eax, ebx, 10, 17, 0ffff5bb1h
	RND1	ebx, ecx, edx, eax, 11, 22, 0895cd7beh
	RND1	eax, ebx, ecx, edx, 12,  7, 06b901122h
	RND1	edx, eax, ebx, ecx, 13, 12, 0fd987193h
	RND1	ecx, edx, eax, ebx, 14, 17, 0a679438eh
	RND1	ebx, ecx, edx, eax, 15, 22, 049b40821h

	RND2	eax, ebx, ecx, edx,  1,  5, 0f61e2562h
	RND2	edx, eax, ebx, ecx,  6,  9, 0c040b340h
	RND2	ecx, edx, eax, ebx, 11, 14, 0265e5a51h
	RND2	ebx, ecx, edx, eax,  0, 20, 0e9b6c7aah
	RND2	eax, ebx, ecx, edx,  5,  5, 0d62f105dh
	RND2	edx, eax, ebx, ecx, 10,  9, 002441453h
	RND2	ecx, edx, eax, ebx, 15, 14, 0d8a1e681h
	RND2	ebx, ecx, edx, eax,  4, 20, 0e7d3fbc8h
	RND2	eax, ebx, ecx, edx,  9,  5, 021e1cde6h
	RND2	edx, eax, ebx, ecx, 14,  9, 0c33707d6h
	RND2	ecx, edx, eax, ebx,  3, 14, 0f4d50d87h
	RND2	ebx, ecx, edx, eax,  8, 20, 0455a14edh
	RND2	eax, ebx, ecx, edx, 13,  5, 0a9e3e905h
	RND2	edx, eax, ebx, ecx,  2,  9, 0fcefa3f8h
	RND2	ecx, edx, eax, ebx,  7, 14, 0676f02d9h
	RND2	ebx, ecx, edx, eax, 12, 20, 08d2a4c8ah

	RND3	eax, ebx, ecx, edx,  5,  4, 0fffa3942h
	RND3	edx, eax, ebx, ecx,  8, 11, 08771f681h
	RND3	ecx, edx, eax, ebx, 11, 16, 06d9d6122h
	RND3	ebx, ecx, edx, eax, 14, 23, 0fde5380ch
	RND3	eax, ebx, ecx, edx,  1,  4, 0a4beea44h
	RND3	edx, eax, ebx, ecx,  4, 11, 04bdecfa9h
	RND3	ecx, edx, eax, ebx,  7, 16, 0f6bb4b60h
	RND3	ebx, ecx, edx, eax, 10, 23, 0bebfbc70h
	RND3	eax, ebx, ecx, edx, 13,  4, 0289b7ec6h
	RND3	edx, eax, ebx, ecx,  0, 11, 0eaa127fah
	RND3	ecx, edx, eax, ebx,  3, 16, 0d4ef3085h
	RND3	ebx, ecx, edx, eax,  6, 23, 004881d05h
	RND3	eax, ebx, ecx, edx,  9,  4, 0d9d4d039h
	RND3	edx, eax, ebx, ecx, 12, 11, 0e6db99e5h
	RND3	ecx, edx, eax, ebx, 15, 16, 01fa27cf8h
	RND3	ebx, ecx, edx, eax,  2, 23, 0c4ac5665h

	RND4	eax, ebx, ecx, edx,  0,  6, 0f4292244h
	RND4	edx, eax, ebx, ecx,  7, 10, 0432aff97h
	RND4	ecx, edx, eax, ebx, 14, 15, 0ab9423a7h
	RND4	ebx, ecx, edx, eax,  5, 21, 0fc93a039h
	RND4	eax, ebx, ecx, edx, 12,  6, 0655b59c3h
	RND4	edx, eax, ebx, ecx,  3, 10, 08f0ccc92h
	RND4	ecx, edx, eax, ebx, 10, 15, 0ffeff47dh
	RND4	ebx, ecx, edx, eax,  1, 21, 085845dd1h
	RND4	eax, ebx, ecx, edx,  8,  6, 06fa87e4fh
	RND4	edx, eax, ebx, ecx, 15, 10, 0fe2ce6e0h
	RND4	ecx, edx, eax, ebx,  6, 15, 0a3014314h
	RND4	ebx, ecx, edx, eax, 13, 21, 04e0811a1h
	RND4	eax, ebx, ecx, edx,  4,  6, 0f7537e82h
	RND4	edx, eax, ebx, ecx, 11, 10, 0bd3af235h
	RND4	ecx, edx, eax, ebx,  2, 15, 02ad7d2bbh
	RND4	ebx, ecx, edx, eax,  9, 21, 0eb86d391h

	mov	edi,[esp+4]		;restore ptr to working hash
	add	esi,64			;point to next input block

	add	eax,[edi+ 0]		;update registered working hash value
	add	ebx,[edi+ 4]
	add	ecx,[edi+ 8]
	add	edx,[edi+12]

	mov	[edi+ 0],eax		;store updated working hash
	mov	[edi+ 4],ebx
	mov	[edi+ 8],ecx
	mov	[edi+12],edx

	cmp	esi,[esp]		;loop if another input block exists
	jne	bloop

	pop	esi			;discard bloop stack frame
	pop	edi

	pop	ebp
	ret
Phmd5DoBlocks	endp

	end
