/******************************************************************************
*									      *
*  128x32 BIT MULTIPLY (UNSIGNED)  (assumes product can fit into 4 words)     *
*  01/14/07 (dkc)							      *
*									      *
******************************************************************************/
unsigned int carry(unsigned int a, unsigned int b, unsigned int sum);
void mul128_32(unsigned int a0, unsigned int a2, unsigned int a4,
	       unsigned int a6, unsigned int *product, unsigned int m0) {
unsigned int a1,a3,a5,a7,m1,temp;
unsigned int p0,p1,p2,p3,p4,p5,p6,p7;
unsigned int s0,s1,s2,s3,s4,s5,s6,s7;
unsigned int c1,c2,c3;

m1=m0&0xffff;
m0=m0>>16;

a1=a0&0xffff;
a0=a0>>16;
a3=a2&0xffff;
a2=a2>>16;
a5=a4&0xffff;
a4=a4>>16;
a7=a6&0xffff;
a6=a6>>16;

//
// least significant half of multiplier
//
p0=a0*m1;
p1=a1*m1;
p2=a2*m1;
p3=a3*m1;
p4=a4*m1;
p5=a5*m1;
p6=a6*m1;
p7=a7*m1;

s3=p7+(p6<<16);
c3=carry(p7,(p6<<16),s3);

s2=p5+(p6>>16);
c2=carry(p5,(p6>>16),s2);
temp=s2+(p4<<16);
c2+=carry(s2,(p4<<16),temp);
s2=temp;

s1=p3+(p4>>16);
c1=carry(p3,(p4>>16),s1);
temp=s1+(p2<<16);
c1+=carry(s1,(p2<<16),temp);
s1=temp;

s0=p1+(p2>>16);
s0=s0+(p0<<16);

temp=s2+c3;
c2+=carry(s2,c3,temp);
s2=temp;

temp=s1+c2;
c1+=carry(s1,c2,temp);
s1=temp;

s0=s0+c1;

if (m0==0)
   goto askip;

//
// most significant half of multiplier
//
p0=a0*m0;
p1=a1*m0;
p2=a2*m0;
p3=a3*m0;
p4=a4*m0;
p5=a5*m0;
p6=a6*m0;
p7=a7*m0;

s7=p7+(p6<<16);
c3=carry(p7,(p6<<16),s7);

s6=p5+(p6>>16);
c2=carry(p5,(p6>>16),s6);
temp=s6+(p4<<16);
c2+=carry(s6,(p4<<16),temp);
s6=temp;

s5=p3+(p4>>16);
c1=carry(p3,(p4>>16),s5);
temp=s5+(p2<<16);
c1+=carry(s5,(p2<<16),temp);
s5=temp;

s4=p1+(p2>>16);
s4=s4+(p0<<16);

temp=s6+c3;
c2+=carry(s6,c3,temp);
s6=temp;

temp=s5+c2;
c1+=carry(s5,c2,temp);
s5=temp;

s4=s4+c1;

//
// add products for most significant and least significant halves of multiplier
//
temp=s3+(s7<<16);
c3=carry(s3,(s7<<16),temp);
s3=temp;

temp=s2+(s7>>16);
c2=carry(s2,(s7>>16),temp);
s2=temp;
temp=s2+(s6<<16);
c2+=carry(s2,(s6<<16),temp);
s2=temp;

temp=s1+(s6>>16);
c1=carry(s1,(s6>>16),temp);
s1=temp;
temp=s1+(s5<<16);
c1+=carry(s1,(s5<<16),temp);
s1=temp;

s0=s0+(s5>>16);
s0=s0+(s4<<16);

temp=s2+c3;
c2+=carry(s2,c3,temp);
s2=temp;

temp=s1+c2;
c1+=carry(s1,c2,temp);
s1=temp;

s0=s0+c1;

askip:
*product=s0;
*(product+1)=s1;
*(product+2)=s2;
*(product+3)=s3;
return;
}