packusdw (saturated clamp from unsigned long to unsigned short),
packssduw (saturated clamp from signed long to unsigned short):
There is a condition for unsigned long inputs, range is “only” [0x00000000, 0x7FFFFFFF].
There is a condition for signed long inputs, range is “only” [0x80008000, 0x7FFFFFFF]. You can go to full signed long if there would exist “psubsd”, which does not.
// via packssdw
psubd xmmx, 0x00008000 // signed short in long
psubd xmmy, 0x00008000
packssdw xmmx, xmmy // cast long to short
paddw xmmy, 0x8000 // unsigned short
// with punpck and variable
movdqa xmm?1, 0x00008000
movdqa xmm?2, xmm?1
puncklwd xmm?2, xmm?1 // 0v0000000080008000
punckldq xmm?2, xmm?2 // 0v8000800080008000
psubd xmmx, xmm?1
psubd xmmy, xmm?1
packssdw xmmx, xmmy
paddw xmmy, xmm?2
// with pshufw and variable
movdqa xmm?1, 0x00008000
pshuflw xmm?2, xmm?1, 2|2|0|0 // 0v?????????80008000
pshufhw xmm?2, xmm?1, 2|2|0|0 // 0v8000800080008000
psubd xmmx, xmm?1
psubd xmmy, xmm?1
packssdw xmmx, xmmy
paddw xmmy, xmm?2
// with pshufw and variable and no memory access
pcmpeqd xmm?1, xmm?1 // 0xFFFFFFFF
pslld xmm?1, 31 // 0x80000000
pslrd xmm?1, 16 // 0x00008000
pshuflw xmm?2, xmm?1, 2|2|0|0 // 0v?????????80008000
pshufhw xmm?2, xmm?1, 2|2|0|0 // 0v8000800080008000
psubd xmmx, xmm?1
psubd xmmy, xmm?1
packssdw xmmx, xmmy
paddw xmmy, xmm?2