padddq (128bit + 64bit = 128bit):
// carry emulation via pcmpgtq equivalent
movdqa xmmx, xmma
movdqa xmmy, xmmb
movdqa xmmz, xmma
paddq xmma, xmmb
pcmpgtd xmmx, xmmb
pcmpgtd xmmy, xmma
pcmpeqd xmmz, xmmb
pshufd xmmx, xmmx, 1|1|1|1
pshufd xmmz, xmmz, 1|1|1|1
pand xmmz, xmmy
por xmmz, xmmx
punpcklqdq xmmz, xmmz
pslldq xmmz, 8 or pshufd
psubq xmma, xmmz