51 #    include <mmintrin.h> 
   53 #  include <SDL_cpuinfo.h> 
   61 #define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8)  | (((x) & 0x0000ff00) << 8)  | ((x) << 24)) 
   68 static int SDL_imageFilterUseMMX = 1;
 
   83         if (SDL_imageFilterUseMMX == 0) {
 
   95         SDL_imageFilterUseMMX = 0;
 
  103         SDL_imageFilterUseMMX = 1;
 
  118 static int SDL_imageFilterAddMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
  145         __m64 *mSrc1 = (__m64*)Src1;
 
  146         __m64 *mSrc2 = (__m64*)Src2;
 
  147         __m64 *mDest = (__m64*)Dest;
 
  149         for (i = 0; i < SrcLength/8; i++) {
 
  150                 *mDest = _m_paddusb(*mSrc1, *mSrc2);    
 
  173 int SDL_imageFilterAdd(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int length)
 
  175         unsigned int i, istart;
 
  176         unsigned char *cursrc1, *cursrc2, *curdst;
 
  180         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  188                 SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
 
  191                 if ((length & 7) > 0) {
 
  193                         istart = length & 0xfffffff8;
 
  194                         cursrc1 = &Src1[istart];
 
  195                         cursrc2 = &Src2[istart];
 
  196                         curdst = &Dest[istart];
 
  210         for (i = istart; i < length; i++) {
 
  211                 result = (int) *cursrc1 + (
int) *cursrc2;
 
  214                 *curdst = (
unsigned char) result;
 
  235 static int SDL_imageFilterMeanMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength,
 
  273         __m64 *mSrc1 = (__m64*)Src1;
 
  274         __m64 *mSrc2 = (__m64*)Src2;
 
  275         __m64 *mDest = (__m64*)Dest;
 
  276         __m64 *mMask = (__m64*)Mask;
 
  278         for (i = 0; i < SrcLength/8; i++) {
 
  281                 mm1 = _m_psrlwi(mm1, 1);        
 
  282                 mm2 = _m_psrlwi(mm2, 1);        
 
  283                 mm1 = _m_pand(mm1, *mMask);     
 
  284                 mm2 = _m_pand(mm2, *mMask);     
 
  285                 *mDest = _m_paddusb(mm1, mm2);  
 
  308 int SDL_imageFilterMean(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int length)
 
  310         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
 
  311         unsigned int i, istart;
 
  312         unsigned char *cursrc1, *cursrc2, *curdst;
 
  316         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  323                 SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
 
  326                 if ((length & 7) > 0) {
 
  328                         istart = length & 0xfffffff8;
 
  329                         cursrc1 = &Src1[istart];
 
  330                         cursrc2 = &Src2[istart];
 
  331                         curdst = &Dest[istart];
 
  345         for (i = istart; i < length; i++) {
 
  346                 result = (int) *cursrc1 / 2 + (
int) *cursrc2 / 2;
 
  347                 *curdst = (
unsigned char) result;
 
  367 static int SDL_imageFilterSubMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
  394         __m64 *mSrc1 = (__m64*)Src1;
 
  395         __m64 *mSrc2 = (__m64*)Src2;
 
  396         __m64 *mDest = (__m64*)Dest;
 
  398         for (i = 0; i < SrcLength/8; i++) {
 
  399                 *mDest = _m_psubusb(*mSrc1, *mSrc2);    
 
  422 int SDL_imageFilterSub(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int length)
 
  424         unsigned int i, istart;
 
  425         unsigned char *cursrc1, *cursrc2, *curdst;
 
  429         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  436                 SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
 
  439                 if ((length & 7) > 0) {
 
  441                         istart = length & 0xfffffff8;
 
  442                         cursrc1 = &Src1[istart];
 
  443                         cursrc2 = &Src2[istart];
 
  444                         curdst = &Dest[istart];
 
  458         for (i = istart; i < length; i++) {
 
  459                 result = (int) *cursrc1 - (
int) *cursrc2;
 
  462                 *curdst = (
unsigned char) result;
 
  482 static int SDL_imageFilterAbsDiffMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
  512         __m64 *mSrc1 = (__m64*)Src1;
 
  513         __m64 *mSrc2 = (__m64*)Src2;
 
  514         __m64 *mDest = (__m64*)Dest;
 
  516         for (i = 0; i < SrcLength/8; i++) {
 
  517                 __m64 mm1 = _m_psubusb(*mSrc2, *mSrc1); 
 
  518                 __m64 mm2 = _m_psubusb(*mSrc1, *mSrc2); 
 
  519                 *mDest = _m_por(mm1, mm2);              
 
  544         unsigned int i, istart;
 
  545         unsigned char *cursrc1, *cursrc2, *curdst;
 
  549         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  556                 SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
 
  559                 if ((length & 7) > 0) {
 
  561                         istart = length & 0xfffffff8;
 
  562                         cursrc1 = &Src1[istart];
 
  563                         cursrc2 = &Src2[istart];
 
  564                         curdst = &Dest[istart];
 
  578         for (i = istart; i < length; i++) {
 
  579                 result = abs((
int) *cursrc1 - (
int) *cursrc2);
 
  580                 *curdst = (
unsigned char) result;
 
  600 static int SDL_imageFilterMultMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
  687         __m64 *mSrc1 = (__m64*)Src1;
 
  688         __m64 *mSrc2 = (__m64*)Src2;
 
  689         __m64 *mDest = (__m64*)Dest;
 
  690         __m64 mm0 = _m_from_int(0); 
 
  692         for (i = 0; i < SrcLength/8; i++) {
 
  693                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
 
  694                 mm1 = _m_punpcklbw(*mSrc1, mm0);        
 
  695                 mm2 = _m_punpckhbw(*mSrc1, mm0);        
 
  696                 mm3 = _m_punpcklbw(*mSrc2, mm0);        
 
  697                 mm4 = _m_punpckhbw(*mSrc2, mm0);        
 
  698                 mm1 = _m_pmullw(mm1, mm3);              
 
  699                 mm2 = _m_pmullw(mm2, mm4);              
 
  700                 mm5 = _m_psrawi(mm1, 15);               
 
  701                 mm6 = _m_psrawi(mm2, 15);               
 
  702                 mm1 = _m_pxor(mm1, mm5);                
 
  703                 mm2 = _m_pxor(mm2, mm6);                
 
  704                 mm1 = _m_psubsw(mm1, mm5);              
 
  705                 mm2 = _m_psubsw(mm2, mm6);              
 
  706                 *mDest = _m_packuswb(mm1, mm2);         
 
  729 int SDL_imageFilterMult(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int length)
 
  731         unsigned int i, istart;
 
  732         unsigned char *cursrc1, *cursrc2, *curdst;
 
  736         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  743                 SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
 
  746                 if ((length & 7) > 0) {
 
  748                         istart = length & 0xfffffff8;
 
  749                         cursrc1 = &Src1[istart];
 
  750                         cursrc2 = &Src2[istart];
 
  751                         curdst = &Dest[istart];
 
  765         for (i = istart; i < length; i++) {
 
  769                 result = (int) *cursrc1 * (
int) *cursrc2;
 
  772                 *curdst = (
unsigned char) result;
 
  821                 "1:mov  (%%edx), %%al \n\t"       
  823                 "mov %%al, (%%edi)  \n\t"        
  828 #  elif defined(__x86_64__) 
  829                 "1:mov  (%%rdx), %%al \n\t"       
  831                 "mov %%al, (%%rdi)  \n\t"        
  864         unsigned int i, istart;
 
  865         unsigned char *cursrc1, *cursrc2, *curdst;
 
  868         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
  879                         if ((length & 7) > 0) {
 
  881                                 istart = length & 0xfffffff8;
 
  882                                 cursrc1 = &Src1[istart];
 
  883                                 cursrc2 = &Src2[istart];
 
  884                                 curdst = &Dest[istart];
 
  902         for (i = istart; i < length; i++) {
 
  903                 *curdst = (int)*cursrc1 * (
int)*cursrc2;  
 
  923 static int SDL_imageFilterMultDivby2MMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
  962         __m64 *mSrc1 = (__m64*)Src1;
 
  963         __m64 *mSrc2 = (__m64*)Src2;
 
  964         __m64 *mDest = (__m64*)Dest;
 
  965         __m64 mm0 = _m_from_int(0); 
 
  967         for (i = 0; i < SrcLength/8; i++) {
 
  968                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
 
  969                 mm1 = _m_punpcklbw(*mSrc1, mm0);        
 
  970                 mm2 = _m_punpckhbw(*mSrc1, mm0);        
 
  971                 mm3 = _m_punpcklbw(*mSrc2, mm0);        
 
  972                 mm4 = _m_punpckhbw(*mSrc2, mm0);        
 
  973                 mm1 = _m_psrlwi(mm1, 1);                
 
  974                 mm2 = _m_psrlwi(mm2, 1);                
 
  975                 mm1 = _m_pmullw(mm1, mm3);              
 
  976                 mm2 = _m_pmullw(mm2, mm4);              
 
  977                 *mDest = _m_packuswb(mm1, mm2);         
 
 1002         unsigned int i, istart;
 
 1003         unsigned char *cursrc1, *cursrc2, *curdst;
 
 1007         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
 1014                 SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
 
 1017                 if ((length & 7) > 0) {
 
 1019                         istart = length & 0xfffffff8;
 
 1020                         cursrc1 = &Src1[istart];
 
 1021                         cursrc2 = &Src2[istart];
 
 1022                         curdst = &Dest[istart];
 
 1036         for (i = istart; i < length; i++) {
 
 1037                 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
 
 1040                 *curdst = (
unsigned char) result;
 
 1060 static int SDL_imageFilterMultDivby4MMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
 1101         __m64 *mSrc1 = (__m64*)Src1;
 
 1102         __m64 *mSrc2 = (__m64*)Src2;
 
 1103         __m64 *mDest = (__m64*)Dest;
 
 1104         __m64 mm0 = _m_from_int(0); 
 
 1106         for (i = 0; i < SrcLength/8; i++) {
 
 1107                 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
 
 1108                 mm1 = _m_punpcklbw(*mSrc1, mm0);        
 
 1109                 mm2 = _m_punpckhbw(*mSrc1, mm0);        
 
 1110                 mm3 = _m_punpcklbw(*mSrc2, mm0);        
 
 1111                 mm4 = _m_punpckhbw(*mSrc2, mm0);        
 
 1112                 mm1 = _m_psrlwi(mm1, 1);                
 
 1113                 mm2 = _m_psrlwi(mm2, 1);                
 
 1114                 mm3 = _m_psrlwi(mm3, 1);                
 
 1115                 mm4 = _m_psrlwi(mm4, 1);                
 
 1116                 mm1 = _m_pmullw(mm1, mm3);              
 
 1117                 mm2 = _m_pmullw(mm2, mm4);              
 
 1118                 *mDest = _m_packuswb(mm1, mm2);         
 
 1143         unsigned int i, istart;
 
 1144         unsigned char *cursrc1, *cursrc2, *curdst;
 
 1148         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
 1155                 SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
 
 1158                 if ((length & 7) > 0) {
 
 1160                         istart = length & 0xfffffff8;
 
 1161                         cursrc1 = &Src1[istart];
 
 1162                         cursrc2 = &Src2[istart];
 
 1163                         curdst = &Dest[istart];
 
 1177         for (i = istart; i < length; i++) {
 
 1178                 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
 
 1181                 *curdst = (
unsigned char) result;
 
 1201 static int SDL_imageFilterBitAndMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
 1250         __m64 *mSrc1 = (__m64*)Src1;
 
 1251         __m64 *mSrc2 = (__m64*)Src2;
 
 1252         __m64 *mDest = (__m64*)Dest;
 
 1254         for (i = 0; i < SrcLength/8; i++) {
 
 1255                 *mDest = _m_pand(*mSrc1, *mSrc2);       
 
 1280         unsigned int i, istart;
 
 1281         unsigned char *cursrc1, *cursrc2, *curdst;
 
 1284         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
 1293                 SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
 
 1296                 if ((length & 7) > 0) {
 
 1299                         istart = length & 0xfffffff8;
 
 1300                         cursrc1 = &Src1[istart];
 
 1301                         cursrc2 = &Src2[istart];
 
 1302                         curdst = &Dest[istart];
 
 1316         for (i = istart; i < length; i++) {
 
 1317                 *curdst = (*cursrc1) & (*cursrc2);
 
 1337 static int SDL_imageFilterBitOrMMX(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
 1364         __m64 *mSrc1 = (__m64*)Src1;
 
 1365         __m64 *mSrc2 = (__m64*)Src2;
 
 1366         __m64 *mDest = (__m64*)Dest;
 
 1368         for (i = 0; i < SrcLength/8; i++) {
 
 1369                 *mDest = _m_por(*mSrc1, *mSrc2);        
 
 1394         unsigned int i, istart;
 
 1395         unsigned char *cursrc1, *cursrc2, *curdst;
 
 1398         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
 1406                 SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
 
 1409                 if ((length & 7) > 0) {
 
 1411                         istart = length & 0xfffffff8;
 
 1412                         cursrc1 = &Src1[istart];
 
 1413                         cursrc2 = &Src2[istart];
 
 1414                         curdst = &Dest[istart];
 
 1428         for (i = istart; i < length; i++) {
 
 1429                 *curdst = *cursrc1 | *cursrc2;
 
 1448 static int SDL_imageFilterDivASM(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int SrcLength)
 
 1487                 "1: mov (%%esi), %%bl  \n\t"     
 1490                 "movb  $255, (%%edi)   \n\t"     
 1492                 "2: xor %%ah, %%ah     \n\t"     
 1493                 "mov   (%%edx), %%al   \n\t"     
 1495                 "mov   %%al, (%%edi)   \n\t"     
 1508 #  elif defined(__x86_64__) 
 1510                 "1: mov (%%rsi), %%bl  \n\t"     
 1513                 "movb  $255, (%%rdi)   \n\t"     
 1515                 "2: xor %%ah, %%ah     \n\t"     
 1516                 "mov   (%%rdx), %%al   \n\t"     
 1518                 "mov   %%al, (%%rdi)   \n\t"     
 1529                 : 
"memory", 
"rax", 
"rbx" 
 1549 int SDL_imageFilterDiv(
unsigned char *Src1, 
unsigned char *Src2, 
unsigned char *Dest, 
unsigned int length)
 
 1551         unsigned int i, istart;
 
 1552         unsigned char *cursrc1, *cursrc2, *curdst;
 
 1555         if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
 
 1563                         SDL_imageFilterDivASM(Src1, Src2, Dest, length);
 
 1591         for (i = istart; i < length; i++) {
 
 1592                 if (*cursrc2 == 0) {
 
 1595                         *curdst = (int)*cursrc1 / (
int)*cursrc2;  
 
 1617 static int SDL_imageFilterBitNegationMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength)
 
 1643         __m64 *mSrc1 = (__m64*)Src1;
 
 1644         __m64 *mDest = (__m64*)Dest;
 
 1646         mm1 = _m_pcmpeqb(mm1, mm1);             
 
 1648         for (i = 0; i < SrcLength/8; i++) {
 
 1649                 *mDest = _m_pxor(*mSrc1, mm1);  
 
 1673         unsigned int i, istart;
 
 1674         unsigned char *cursrc1, *curdst;
 
 1677         if ((Src1 == NULL) || (Dest == NULL))
 
 1684                 SDL_imageFilterBitNegationMMX(Src1, Dest, length);
 
 1687                 if ((length & 7) > 0) {
 
 1689                         istart = length & 0xfffffff8;
 
 1690                         cursrc1 = &Src1[istart];
 
 1691                         curdst = &Dest[istart];
 
 1704         for (i = istart; i < length; i++) {
 
 1705                 *curdst = ~(*cursrc1);
 
 1724 static int SDL_imageFilterAddByteMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char C)
 
 1758         __m64 *mSrc1 = (__m64*)Src1;
 
 1759         __m64 *mDest = (__m64*)Dest;
 
 1763         __m64 mm1 = _m_from_int(i);
 
 1764         __m64 mm2 = _m_from_int(i);
 
 1765         mm1 = _m_punpckldq(mm1, mm2);                   
 
 1767         for (i = 0; i < SrcLength/8; i++) {
 
 1768                 *mDest = _m_paddusb(*mSrc1, mm1);       
 
 1793         unsigned int i, istart;
 
 1795         unsigned char *cursrc1, *curdest;
 
 1799         if ((Src1 == NULL) || (Dest == NULL))
 
 1806                 memcpy(Src1, Dest, length);
 
 1813                 SDL_imageFilterAddByteMMX(Src1, Dest, length, C);
 
 1816                 if ((length & 7) > 0) {
 
 1818                         istart = length & 0xfffffff8;
 
 1819                         cursrc1 = &Src1[istart];
 
 1820                         curdest = &Dest[istart];
 
 1834         for (i = istart; i < length; i++) {
 
 1835                 result = (int) *cursrc1 + iC;
 
 1838                 *curdest = (
unsigned char) result;
 
 1857 static int SDL_imageFilterAddUintMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned int C, 
unsigned int D)
 
 1888         __m64 *mSrc1 = (__m64*)Src1;
 
 1889         __m64 *mDest = (__m64*)Dest;
 
 1891         __m64 mm1 = _m_from_int(C);
 
 1892         __m64 mm2 = _m_from_int(C);
 
 1893         mm1 = _m_punpckldq(mm1, mm2);                   
 
 1896         for (i = 0; i < SrcLength/8; i++) {
 
 1897                 *mDest = _m_paddusb(*mSrc1, mm1);       
 
 1921         unsigned int i, j, istart, D;
 
 1923         unsigned char *cursrc1;
 
 1924         unsigned char *curdest;
 
 1928         if ((Src1 == NULL) || (Dest == NULL))
 
 1935                 memcpy(Src1, Dest, length);
 
 1943                 SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D);
 
 1946                 if ((length & 7) > 0) {
 
 1948                         istart = length & 0xfffffff8;
 
 1949                         cursrc1 = &Src1[istart];
 
 1950                         curdest = &Dest[istart];
 
 1963         iC[3] = (int) ((C >> 24) & 0xff);
 
 1964         iC[2] = (int) ((C >> 16) & 0xff);
 
 1965         iC[1] = (int) ((C >>  8) & 0xff);
 
 1966         iC[0] = (int) ((C >>  0) & 0xff);
 
 1967         for (i = istart; i < length; i += 4) {
 
 1968                 for (j = 0; j < 4; j++) {
 
 1970                                 result = (int) *cursrc1 + iC[j];
 
 1971                                 if (result > 255) result = 255;
 
 1972                                 *curdest = (
unsigned char) result;
 
 1993 static int SDL_imageFilterAddByteToHalfMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char C,
 
 1994                                                                         unsigned char *Mask)
 
 2032         __m64 *mSrc1 = (__m64*)Src1;
 
 2033         __m64 *mDest = (__m64*)Dest;
 
 2034         __m64 *mMask = (__m64*)Mask;
 
 2038         __m64 mm1 = _m_from_int(i);
 
 2039         __m64 mm2 = _m_from_int(i);
 
 2040         mm1 = _m_punpckldq(mm1, mm2);                   
 
 2042         for (i = 0; i < SrcLength/8; i++) {
 
 2043                 __m64 mm2 = _m_psrlwi(*mSrc1, 1);       
 
 2044                 mm2 = _m_pand(mm2, *mMask);             
 
 2046                 *mDest = _m_paddusb(mm1, mm2);          
 
 2070         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
 
 2071         unsigned int i, istart;
 
 2073         unsigned char *cursrc1;
 
 2074         unsigned char *curdest;
 
 2078         if ((Src1 == NULL) || (Dest == NULL))
 
 2086                 SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask);
 
 2089                 if ((length & 7) > 0) {
 
 2091                         istart = length & 0xfffffff8;
 
 2092                         cursrc1 = &Src1[istart];
 
 2093                         curdest = &Dest[istart];
 
 2107         for (i = istart; i < length; i++) {
 
 2108                 result = (int) (*cursrc1 / 2) + iC;
 
 2111                 *curdest = (
unsigned char) result;
 
 2164         __m64 *mSrc1 = (__m64*)Src1;
 
 2165         __m64 *mDest = (__m64*)Dest;
 
 2169         __m64 mm1 = _m_from_int(i);
 
 2170         __m64 mm2 = _m_from_int(i);
 
 2171         mm1 = _m_punpckldq(mm1, mm2);                   
 
 2173         for (i = 0; i < SrcLength/8; i++) {
 
 2174                 *mDest = _m_psubusb(*mSrc1, mm1);       
 
 2198         unsigned int i, istart;
 
 2200         unsigned char *cursrc1;
 
 2201         unsigned char *curdest;
 
 2205         if ((Src1 == NULL) || (Dest == NULL))
 
 2212                 memcpy(Src1, Dest, length);
 
 2222                 if ((length & 7) > 0) {
 
 2224                         istart = length & 0xfffffff8;
 
 2225                         cursrc1 = &Src1[istart];
 
 2226                         curdest = &Dest[istart];
 
 2240         for (i = istart; i < length; i++) {
 
 2241                 result = (int) *cursrc1 - iC;
 
 2244                 *curdest = (
unsigned char) result;
 
 2263 static int SDL_imageFilterSubUintMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned int C, 
unsigned int D)
 
 2294         __m64 *mSrc1 = (__m64*)Src1;
 
 2295         __m64 *mDest = (__m64*)Dest;
 
 2297         __m64 mm1 = _m_from_int(C);
 
 2298         __m64 mm2 = _m_from_int(C);
 
 2299         mm1 = _m_punpckldq(mm1, mm2);                   
 
 2302         for (i = 0; i < SrcLength/8; i++) {
 
 2303                 *mDest = _m_psubusb(*mSrc1, mm1);       
 
 2327         unsigned int i, j, istart, D;
 
 2329         unsigned char *cursrc1;
 
 2330         unsigned char *curdest;
 
 2334         if ((Src1 == NULL) || (Dest == NULL))
 
 2341                 memcpy(Src1, Dest, length);
 
 2349                 SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D);
 
 2352                 if ((length & 7) > 0) {
 
 2354                         istart = length & 0xfffffff8;
 
 2355                         cursrc1 = &Src1[istart];
 
 2356                         curdest = &Dest[istart];
 
 2369         iC[3] = (int) ((C >> 24) & 0xff);
 
 2370         iC[2] = (int) ((C >> 16) & 0xff);
 
 2371         iC[1] = (int) ((C >>  8) & 0xff);
 
 2372         iC[0] = (int) ((C >>  0) & 0xff);
 
 2373         for (i = istart; i < length; i += 4) {
 
 2374                 for (j = 0; j < 4; j++) {
 
 2376                                 result = (int) *cursrc1 - iC[j];
 
 2377                                 if (result < 0) result = 0;
 
 2378                                 *curdest = (
unsigned char) result;
 
 2399 static int SDL_imageFilterShiftRightMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N,
 
 2400                                                                  unsigned char *Mask)
 
 2440         __m64 *mSrc1 = (__m64*)Src1;
 
 2441         __m64 *mDest = (__m64*)Dest;
 
 2442         __m64 *mMask = (__m64*)Mask;
 
 2445         mm1 = _m_pcmpeqb(mm1, mm1);                     
 
 2447         for (i = 0; i < N; i++) {
 
 2448                 mm1 = _m_psrlwi(mm1, 1);                
 
 2449                 mm1 = _m_pand(mm1, *mMask);             
 
 2452         for (i = 0; i < SrcLength/8; i++) {
 
 2453                 __m64 mm0 = _m_psrlwi(*mSrc1, N);       
 
 2454                 *mDest = _m_pand(mm0, mm1);             
 
 2478         static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
 
 2479         unsigned int i, istart;
 
 2480         unsigned char *cursrc1;
 
 2481         unsigned char *curdest;
 
 2484         if ((Src1 == NULL) || (Dest == NULL))
 
 2496                 memcpy(Src1, Dest, length);
 
 2503                 SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask);
 
 2506                 if ((length & 7) > 0) {
 
 2508                         istart = length & 0xfffffff8;
 
 2509                         cursrc1 = &Src1[istart];
 
 2510                         curdest = &Dest[istart];
 
 2523         for (i = istart; i < length; i++) {
 
 2524                 *curdest = (
unsigned char) *cursrc1 >> N;
 
 2543 static int SDL_imageFilterShiftRightUintMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N)
 
 2568         __m64 *mSrc1 = (__m64*)Src1;
 
 2569         __m64 *mDest = (__m64*)Dest;
 
 2571         for (i = 0; i < SrcLength/8; i++) {
 
 2572                 *mDest = _m_psrldi(*mSrc1, N);
 
 2596         unsigned int i, istart;
 
 2597         unsigned char *cursrc1, *curdest;
 
 2598         unsigned int *icursrc1, *icurdest;
 
 2599         unsigned int result;
 
 2602         if ((Src1 == NULL) || (Dest == NULL))
 
 2613                 memcpy(Src1, Dest, length);
 
 2619                 SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N);
 
 2622                 if ((length & 7) > 0) {
 
 2624                         istart = length & 0xfffffff8;
 
 2625                         cursrc1 = &Src1[istart];
 
 2626                         curdest = &Dest[istart];
 
 2639         icursrc1=(
unsigned int *)cursrc1;
 
 2640         icurdest=(
unsigned int *)curdest;
 
 2641         for (i = istart; i < length; i += 4) {
 
 2643                         result = ((
unsigned int)*icursrc1 >> N);
 
 2664 static int SDL_imageFilterMultByByteMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char C)
 
 2731         __m64 *mSrc1 = (__m64*)Src1;
 
 2732         __m64 *mDest = (__m64*)Dest;
 
 2733         __m64 mm0 = _m_from_int(0);                             
 
 2737         __m64 mm1 = _m_from_int(i);
 
 2738         __m64 mm2 = _m_from_int(i);
 
 2739         mm1 = _m_punpckldq(mm1, mm2);                           
 
 2743                 for (i = 0; i < SrcLength/8; i++) {
 
 2745                         mm3 = _m_punpcklbw(*mSrc1, mm0);        
 
 2746                         mm4 = _m_punpckhbw(*mSrc1, mm0);        
 
 2747                         mm3 = _m_pmullw(mm3, mm1);              
 
 2748                         mm4 = _m_pmullw(mm4, mm1);              
 
 2749                         *mDest = _m_packuswb(mm3, mm4);         
 
 2754                 for (i = 0; i < SrcLength/8; i++) {
 
 2755                         __m64 mm3, mm4, mm5, mm6;
 
 2756                         mm3 = _m_punpcklbw(*mSrc1, mm0);        
 
 2757                         mm4 = _m_punpckhbw(*mSrc1, mm0);        
 
 2758                         mm3 = _m_pmullw(mm3, mm1);              
 
 2759                         mm4 = _m_pmullw(mm4, mm1);              
 
 2761                         mm5 = _m_psrawi(mm3, 15);               
 
 2762                         mm6 = _m_psrawi(mm4, 15);               
 
 2763                         mm3 = _m_pxor(mm3, mm5);                
 
 2764                         mm4 = _m_pxor(mm4, mm6);                
 
 2765                         mm3 = _m_psubsw(mm3, mm5);              
 
 2766                         mm4 = _m_psubsw(mm4, mm6);              
 
 2767                         *mDest = _m_packuswb(mm3, mm4);         
 
 2792         unsigned int i, istart;
 
 2794         unsigned char *cursrc1;
 
 2795         unsigned char *curdest;
 
 2799         if ((Src1 == NULL) || (Dest == NULL))
 
 2806                 memcpy(Src1, Dest, length);
 
 2812                 SDL_imageFilterMultByByteMMX(Src1, Dest, length, C);
 
 2815                 if ((length & 7) > 0) {
 
 2817                         istart = length & 0xfffffff8;
 
 2818                         cursrc1 = &Src1[istart];
 
 2819                         curdest = &Dest[istart];
 
 2833         for (i = istart; i < length; i++) {
 
 2834                 result = (int) *cursrc1 * iC;
 
 2837                 *curdest = (
unsigned char) result;
 
 2857 static int SDL_imageFilterShiftRightAndMultByByteMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N,
 
 2903         __m64 *mSrc1 = (__m64*)Src1;
 
 2904         __m64 *mDest = (__m64*)Dest;
 
 2905         __m64 mm0 = _m_from_int(0);                     
 
 2909         __m64 mm1 = _m_from_int(i);
 
 2910         __m64 mm2 = _m_from_int(i);
 
 2911         mm1 = _m_punpckldq(mm1, mm2);                   
 
 2912         for (i = 0; i < SrcLength/8; i++) {
 
 2913                 __m64 mm3, mm4, mm5, mm6;
 
 2914                 mm3 = _m_punpcklbw(*mSrc1, mm0);        
 
 2915                 mm4 = _m_punpckhbw(*mSrc1, mm0);        
 
 2916                 mm3 = _m_psrlwi(mm3, N);                
 
 2917                 mm4 = _m_psrlwi(mm4, N);                
 
 2918                 mm3 = _m_pmullw(mm3, mm1);              
 
 2919                 mm4 = _m_pmullw(mm4, mm1);              
 
 2920                 *mDest = _m_packuswb(mm3, mm4);         
 
 2946         unsigned int i, istart;
 
 2948         unsigned char *cursrc1;
 
 2949         unsigned char *curdest;
 
 2953         if ((Src1 == NULL) || (Dest == NULL))
 
 2964         if ((N == 0) && (C == 1)) {
 
 2965                 memcpy(Src1, Dest, length);
 
 2971                 SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);
 
 2974                 if ((length & 7) > 0) {
 
 2976                         istart = length & 0xfffffff8;
 
 2977                         cursrc1 = &Src1[istart];
 
 2978                         curdest = &Dest[istart];
 
 2992         for (i = istart; i < length; i++) {
 
 2993                 result = (int) (*cursrc1 >> N) * iC;
 
 2996                 *curdest = (
unsigned char) result;
 
 3016 static int SDL_imageFilterShiftLeftByteMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N,
 
 3017                                                                         unsigned char *Mask)
 
 3057         __m64 *mSrc1 = (__m64*)Src1;
 
 3058         __m64 *mDest = (__m64*)Dest;
 
 3059         __m64 *mMask = (__m64*)Mask;
 
 3062         mm1 = _m_pcmpeqb(mm1, mm1);                     
 
 3064         for (i = 0; i < N; i++) {
 
 3065                 mm1 = _m_psllwi(mm1, 1);                
 
 3066                 mm1 = _m_pand(mm1, *mMask);             
 
 3069         for (i = 0; i < SrcLength/8; i++) {
 
 3070                 __m64 mm0 = _m_psllwi(*mSrc1, N);       
 
 3071                 *mDest = _m_pand(mm0, mm1);             
 
 3095         static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
 
 3096         unsigned int i, istart;
 
 3097         unsigned char *cursrc1, *curdest;
 
 3101         if ((Src1 == NULL) || (Dest == NULL))
 
 3112                 memcpy(Src1, Dest, length);
 
 3118                 SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);
 
 3121                 if ((length & 7) > 0) {
 
 3123                         istart = length & 0xfffffff8;
 
 3124                         cursrc1 = &Src1[istart];
 
 3125                         curdest = &Dest[istart];
 
 3138         for (i = istart; i < length; i++) {
 
 3139                 result = ((int) *cursrc1 << N) & 0xff;
 
 3140                 *curdest = (
unsigned char) result;
 
 3159 static int SDL_imageFilterShiftLeftUintMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N)
 
 3184         __m64 *mSrc1 = (__m64*)Src1;
 
 3185         __m64 *mDest = (__m64*)Dest;
 
 3187         for (i = 0; i < SrcLength/8; i++) {
 
 3188                 *mDest = _m_pslldi(*mSrc1, N);  
 
 3212         unsigned int i, istart;
 
 3213         unsigned char *cursrc1, *curdest;
 
 3214         unsigned int *icursrc1, *icurdest;
 
 3215         unsigned int result;
 
 3218         if ((Src1 == NULL) || (Dest == NULL))
 
 3229                 memcpy(Src1, Dest, length);
 
 3235                 SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N);
 
 3238                 if ((length & 7) > 0) {
 
 3240                         istart = length & 0xfffffff8;
 
 3241                         cursrc1 = &Src1[istart];
 
 3242                         curdest = &Dest[istart];
 
 3255         icursrc1=(
unsigned int *)cursrc1;
 
 3256         icurdest=(
unsigned int *)curdest;
 
 3257         for (i = istart; i < length; i += 4) {
 
 3259                         result = ((
unsigned int)*icursrc1 << N);
 
 3280 static int SDL_imageFilterShiftLeftMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char N)
 
 3341         __m64 *mSrc1 = (__m64*)Src1;
 
 3342         __m64 *mDest = (__m64*)Dest;
 
 3343         __m64 mm0 = _m_from_int(0);                             
 
 3346                 for (i = 0; i < SrcLength/8; i++) {
 
 3348                         mm3 = _m_punpcklbw(*mSrc1, mm0);        
 
 3349                         mm4 = _m_punpckhbw(*mSrc1, mm0);        
 
 3350                         mm3 = _m_psllwi(mm3, N);                
 
 3351                         mm4 = _m_psllwi(mm4, N);                
 
 3352                         *mDest = _m_packuswb(mm3, mm4);         
 
 3357                 for (i = 0; i < SrcLength/8; i++) {
 
 3358                         __m64 mm3, mm4, mm5, mm6;
 
 3359                         mm3 = _m_punpcklbw(*mSrc1, mm0);        
 
 3360                         mm4 = _m_punpckhbw(*mSrc1, mm0);        
 
 3361                         mm3 = _m_psllwi(mm3, N);                
 
 3362                         mm4 = _m_psllwi(mm4, N);                
 
 3364                         mm5 = _m_psrawi(mm3, 15);               
 
 3365                         mm6 = _m_psrawi(mm4, 15);               
 
 3366                         mm3 = _m_pxor(mm3, mm5);                
 
 3367                         mm4 = _m_pxor(mm4, mm6);                
 
 3368                         mm3 = _m_psubsw(mm3, mm5);              
 
 3369                         mm4 = _m_psubsw(mm4, mm6);              
 
 3370                         *mDest = _m_packuswb(mm3, mm4);         
 
 3395         unsigned int i, istart;
 
 3396         unsigned char *cursrc1, *curdest;
 
 3400         if ((Src1 == NULL) || (Dest == NULL))
 
 3411                 memcpy(Src1, Dest, length);
 
 3417                 SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);
 
 3420                 if ((length & 7) > 0) {
 
 3422                         istart = length & 0xfffffff8;
 
 3423                         cursrc1 = &Src1[istart];
 
 3424                         curdest = &Dest[istart];
 
 3437         for (i = istart; i < length; i++) {
 
 3438                 result = (int) *cursrc1 << N;
 
 3441                 *curdest = (
unsigned char) result;
 
 3460 static int SDL_imageFilterBinarizeUsingThresholdMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char T)
 
 3498         __m64 *mSrc1 = (__m64*)Src1;
 
 3499         __m64 *mDest = (__m64*)Dest;
 
 3501         __m64 mm1 = _m_pcmpeqb(mm1, mm1);                       
 
 3502         __m64 mm2 = _m_pcmpeqb(mm2, mm2);                       
 
 3505         __m64 mm3 = _m_from_int(i);
 
 3506         __m64 mm4 = _m_from_int(i);
 
 3507         mm3 = _m_punpckldq(mm3, mm4);                   
 
 3508         mm2 = _m_psubusb(mm2, mm3);                     
 
 3510         for (i = 0; i < SrcLength/8; i++) {
 
 3511                 __m64 mm0 = _m_paddusb(*mSrc1, mm2);    
 
 3512                 *mDest = _m_pcmpeqb(mm0, mm1);          
 
 3536         unsigned int i, istart;
 
 3537         unsigned char *cursrc1;
 
 3538         unsigned char *curdest;
 
 3541         if ((Src1 == NULL) || (Dest == NULL))
 
 3548                 memset(Dest, 255, length);
 
 3554                 SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);
 
 3557                 if ((length & 7) > 0) {
 
 3559                         istart = length & 0xfffffff8;
 
 3560                         cursrc1 = &Src1[istart];
 
 3561                         curdest = &Dest[istart];
 
 3574         for (i = istart; i < length; i++) {
 
 3575                 *curdest = (
unsigned char)(((
unsigned char)*cursrc1 >= T) ? 255 : 0);
 
 3595 static int SDL_imageFilterClipToRangeMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
unsigned char Tmin,
 
 3645         __m64 *mSrc1 = (__m64*)Src1;
 
 3646         __m64 *mDest = (__m64*)Dest;
 
 3647         __m64 mm1 = _m_pcmpeqb(mm1, mm1);       
 
 3651         memset(&i, Tmax, 4);
 
 3652         mm3 = _m_from_int(i);
 
 3653         mm4 = _m_from_int(i);
 
 3654         mm3 = _m_punpckldq(mm3, mm4);           
 
 3655         mm1 = _m_psubusb(mm1, mm3);             
 
 3659         memset(&i, Tmin, 4);
 
 3660         mm5 = _m_from_int(i);
 
 3661         mm4 = _m_from_int(i);
 
 3662         mm5 = _m_punpckldq(mm5, mm4);           
 
 3663         mm7 = _m_paddusb(mm5, mm1);     
 
 3664         for (i = 0; i < SrcLength/8; i++) {
 
 3666                 mm0 = _m_paddusb(*mSrc1, mm1);  
 
 3667                 mm0 = _m_psubusb(mm0, mm7);     
 
 3668                 *mDest = _m_paddusb(mm0, mm5);  
 
 3694         unsigned int i, istart;
 
 3695         unsigned char *cursrc1;
 
 3696         unsigned char *curdest;
 
 3699         if ((Src1 == NULL) || (Dest == NULL))
 
 3705         if ((Tmin == 0) && (Tmax == 25)) {
 
 3706                 memcpy(Src1, Dest, length);
 
 3712                 SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax);
 
 3715                 if ((length & 7) > 0) {
 
 3717                         istart = length & 0xfffffff8;
 
 3718                         cursrc1 = &Src1[istart];
 
 3719                         curdest = &Dest[istart];
 
 3732         for (i = istart; i < length; i++) {
 
 3733                 if (*cursrc1 < Tmin) {
 
 3735                 } 
else if (*cursrc1 > Tmax) {
 
 3738                         *curdest = *cursrc1;
 
 3761 static int SDL_imageFilterNormalizeLinearMMX(
unsigned char *Src1, 
unsigned char *Dest, 
unsigned int SrcLength, 
int Cmin, 
int Cmax,
 
 3769                         mov ax, WORD PTR Nmax           
 
 3770                         mov bx, WORD PTR Cmax           
 
 3771                         sub ax, WORD PTR Nmin           
 
 3772                         sub bx, WORD PTR Cmin           
 
 3787                         mov ax, WORD PTR Cmin           
 
 3795                         mov ax, WORD PTR Nmin           
 
 3839         __m64 *mSrc1 = (__m64*)Src1;
 
 3840         __m64 *mDest = (__m64*)Dest;
 
 3841         __m64 mm0, mm1, mm2, mm3;
 
 3845         unsigned short a = Nmax - Nmin;
 
 3846         unsigned short b = Cmax - Cmin;
 
 3853         mm0 = _m_from_int(i);
 
 3854         mm1 = _m_from_int(i);
 
 3855         mm0 = _m_punpckldq(mm0, mm1);                   
 
 3857         i = (Cmin<<16)|(
short)Cmin;
 
 3858         mm1 = _m_from_int(i);
 
 3859         mm2 = _m_from_int(i);
 
 3860         mm1 = _m_punpckldq(mm1, mm2);                   
 
 3862         i = (Nmin<<16)|(
short)Nmin;
 
 3863         mm2 = _m_from_int(i);
 
 3864         mm3 = _m_from_int(i);
 
 3865         mm2 = _m_punpckldq(mm2, mm3);                   
 
 3866         __m64 mm7 = _m_from_int(0);                     
 
 3867         for (i = 0; i < SrcLength/8; i++) {
 
 3868                 __m64 mm3, mm4, mm5, mm6;
 
 3869                 mm3 = _m_punpcklbw(*mSrc1, mm7);        
 
 3870                 mm4 = _m_punpckhbw(*mSrc1, mm7);        
 
 3871                 mm3 = _m_psubusb(mm3, mm1);             
 
 3872                 mm4 = _m_psubusb(mm4, mm1);             
 
 3873                 mm3 = _m_pmullw(mm3, mm0);              
 
 3874                 mm4 = _m_pmullw(mm4, mm0);              
 
 3875                 mm3 = _m_paddusb(mm3, mm2);             
 
 3876                 mm4 = _m_paddusb(mm4, mm2);             
 
 3878                 mm5 = _m_psrawi(mm3, 15);               
 
 3879                 mm6 = _m_psrawi(mm4, 15);               
 
 3880                 mm3 = _m_pxor(mm3, mm5);                
 
 3881                 mm4 = _m_pxor(mm4, mm6);                
 
 3882                 mm3 = _m_psubsw(mm3, mm5);              
 
 3883                 mm4 = _m_psubsw(mm4, mm6);              
 
 3884                 *mDest = _m_packuswb(mm3, mm4);         
 
 3912         unsigned int i, istart;
 
 3913         unsigned char *cursrc;
 
 3914         unsigned char *curdest;
 
 3919         if ((Src == NULL) || (Dest == NULL))
 
 3926                 SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
 
 3929                 if ((length & 7) > 0) {
 
 3931                         istart = length & 0xfffffff8;
 
 3932                         cursrc = &Src[istart];
 
 3933                         curdest = &Dest[istart];
 
 3951         for (i = istart; i < length; i++) {
 
 3952                 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
 
 3955                 *curdest = (
unsigned char) result;
 
 3981                                                                                    signed short *Kernel, 
unsigned char Divisor)
 
 3984         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 3987         if ((columns < 3) || (rows < 3) || (Divisor == 0))
 
 3992 #if defined(USE_MMX) && defined(i386) 
 4071                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 4072                         "xor       %%ebx, %%ebx \n\t"    
 4074                         "mov          %4, %%edx \n\t"    
 4075                         "movq    (%%edx), %%mm5 \n\t"    
 4076                         "add          $8, %%edx \n\t"    
 4077                         "movq    (%%edx), %%mm6 \n\t"    
 4078                         "add          $8, %%edx \n\t"    
 4079                         "movq    (%%edx), %%mm7 \n\t"    
 4081                         "mov          %3, %%eax \n\t"    
 4082                         "mov          %1, %%esi \n\t"    
 4083                         "mov          %0, %%edi \n\t"    
 4084                         "add       %%eax, %%edi \n\t"    
 4086                         "mov          %2, %%edx \n\t"    
 4087                         "sub          $2, %%edx \n\t"    
 4089                         ".L10320:               \n\t" "mov       %%eax, %%ecx \n\t"      
 4090                         "sub          $2, %%ecx \n\t"    
 4094                         "movq    (%%esi), %%mm1 \n\t"    
 4095                         "add       %%eax, %%esi \n\t"    
 4096                         "movq    (%%esi), %%mm2 \n\t"    
 4097                         "add       %%eax, %%esi \n\t"    
 4098                         "movq    (%%esi), %%mm3 \n\t"    
 4099                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4100                         "punpcklbw %%mm0, %%mm2 \n\t"    
 4101                         "punpcklbw %%mm0, %%mm3 \n\t"    
 4102                         "pmullw    %%mm5, %%mm1 \n\t"    
 4103                         "pmullw    %%mm6, %%mm2 \n\t"    
 4104                         "pmullw    %%mm7, %%mm3 \n\t"    
 4105                         "paddsw    %%mm2, %%mm1 \n\t"    
 4106                         "paddsw    %%mm3, %%mm1 \n\t"    
 4107                         "movq      %%mm1, %%mm2 \n\t"    
 4108                         "psrlq       $32, %%mm1 \n\t"    
 4109                         "paddsw    %%mm2, %%mm1 \n\t"    
 4110                         "movq      %%mm1, %%mm3 \n\t"    
 4111                         "psrlq       $16, %%mm1 \n\t"    
 4112                         "paddsw    %%mm3, %%mm1 \n\t"    
 4114                         "movd      %%eax, %%mm2 \n\t"    
 4115                         "movd      %%edx, %%mm3 \n\t"    
 4116                         "movd      %%mm1, %%eax \n\t"    
 4117                         "psraw       $15, %%mm1 \n\t"    
 4118                         "movd      %%mm1, %%edx \n\t"    
 4120                         "movd      %%eax, %%mm1 \n\t"    
 4121                         "packuswb  %%mm0, %%mm1 \n\t"    
 4122                         "movd      %%mm1, %%eax \n\t"    
 4123                         "mov      %%al, (%%edi) \n\t"    
 4124                         "movd      %%mm3, %%edx \n\t"    
 4125                         "movd      %%mm2, %%eax \n\t"    
 4127                         "sub       %%eax, %%esi \n\t"    
 4128                         "sub       %%eax, %%esi \n\t"    
 4134                         "add          $2, %%esi \n\t"    
 4135                         "add          $2, %%edi \n\t"    
 4140                         "popa                   \n\t":
"=m" (Dest)       
 
 4171                                                                                    signed short *Kernel, 
unsigned char Divisor)
 
 4174         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 4177         if ((columns < 5) || (rows < 5) || (Divisor == 0))
 
 4182 #if defined(USE_MMX) && defined(i386) 
 4318                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 4319                         "xor       %%ebx, %%ebx \n\t"    
 4321                         "movd      %%ebx, %%mm5 \n\t"    
 4322                         "mov          %4, %%edx \n\t"    
 4323                         "mov          %1, %%esi \n\t"    
 4324                         "mov          %0, %%edi \n\t"    
 4325                         "add          $2, %%edi \n\t"    
 4326                         "mov          %3, %%eax \n\t"    
 4327                         "shl          $1, %%eax \n\t"    
 4328                         "add       %%eax, %%edi \n\t"    
 4329                         "shr          $1, %%eax \n\t"    
 4330                         "mov          %2, %%ebx \n\t"    
 4331                         "sub          $4, %%ebx \n\t"    
 4333                         ".L10330:               \n\t" "mov       %%eax, %%ecx \n\t"      
 4334                         "sub          $4, %%ecx \n\t"    
 4336                         ".L10332:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 4337                         "movd      %%esi, %%mm6 \n\t"    
 4339                         "movq    (%%esi), %%mm1 \n\t"    
 4340                         "movq      %%mm1, %%mm2 \n\t"    
 4341                         "add       %%eax, %%esi \n\t"    
 4342                         "movq    (%%edx), %%mm3 \n\t"    
 4343                         "add          $8, %%edx \n\t"    
 4344                         "movq    (%%edx), %%mm4 \n\t"    
 4345                         "add          $8, %%edx \n\t"    
 4346                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4347                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4348                         "pmullw    %%mm3, %%mm1 \n\t"    
 4349                         "pmullw    %%mm4, %%mm2 \n\t"    
 4350                         "paddsw    %%mm2, %%mm1 \n\t"    
 4351                         "paddsw    %%mm1, %%mm7 \n\t"    
 4353                         "movq    (%%esi), %%mm1 \n\t"    
 4354                         "movq      %%mm1, %%mm2 \n\t"    
 4355                         "add       %%eax, %%esi \n\t"    
 4356                         "movq    (%%edx), %%mm3 \n\t"    
 4357                         "add          $8, %%edx \n\t"    
 4358                         "movq    (%%edx), %%mm4 \n\t"    
 4359                         "add          $8, %%edx \n\t"    
 4360                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4361                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4362                         "pmullw    %%mm3, %%mm1 \n\t"    
 4363                         "pmullw    %%mm4, %%mm2 \n\t"    
 4364                         "paddsw    %%mm2, %%mm1 \n\t"    
 4365                         "paddsw    %%mm1, %%mm7 \n\t"    
 4367                         "movq    (%%esi), %%mm1 \n\t"    
 4368                         "movq      %%mm1, %%mm2 \n\t"    
 4369                         "add       %%eax, %%esi \n\t"    
 4370                         "movq    (%%edx), %%mm3 \n\t"    
 4371                         "add          $8, %%edx \n\t"    
 4372                         "movq    (%%edx), %%mm4 \n\t"    
 4373                         "add          $8, %%edx \n\t"    
 4374                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4375                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4376                         "pmullw    %%mm3, %%mm1 \n\t"    
 4377                         "pmullw    %%mm4, %%mm2 \n\t"    
 4378                         "paddsw    %%mm2, %%mm1 \n\t"    
 4379                         "paddsw    %%mm1, %%mm7 \n\t"    
 4381                         "movq    (%%esi), %%mm1 \n\t"    
 4382                         "movq      %%mm1, %%mm2 \n\t"    
 4383                         "add       %%eax, %%esi \n\t"    
 4384                         "movq    (%%edx), %%mm3 \n\t"    
 4385                         "add          $8, %%edx \n\t"    
 4386                         "movq    (%%edx), %%mm4 \n\t"    
 4387                         "add          $8, %%edx \n\t"    
 4388                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4389                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4390                         "pmullw    %%mm3, %%mm1 \n\t"    
 4391                         "pmullw    %%mm4, %%mm2 \n\t"    
 4392                         "paddsw    %%mm2, %%mm1 \n\t"    
 4393                         "paddsw    %%mm1, %%mm7 \n\t"    
 4395                         "movq    (%%esi), %%mm1 \n\t"    
 4396                         "movq      %%mm1, %%mm2 \n\t"    
 4397                         "movq    (%%edx), %%mm3 \n\t"    
 4398                         "add          $8, %%edx \n\t"    
 4399                         "movq    (%%edx), %%mm4 \n\t"    
 4400                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4401                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4402                         "pmullw    %%mm3, %%mm1 \n\t"    
 4403                         "pmullw    %%mm4, %%mm2 \n\t"    
 4404                         "paddsw    %%mm2, %%mm1 \n\t"    
 4405                         "paddsw    %%mm1, %%mm7 \n\t"    
 4407                         "movq      %%mm7, %%mm3 \n\t"    
 4408                         "psrlq       $32, %%mm7 \n\t"    
 4409                         "paddsw    %%mm3, %%mm7 \n\t"    
 4410                         "movq      %%mm7, %%mm2 \n\t"    
 4411                         "psrlq       $16, %%mm7 \n\t"    
 4412                         "paddsw    %%mm2, %%mm7 \n\t"    
 4414                         "movd      %%eax, %%mm1 \n\t"    
 4415                         "movd      %%ebx, %%mm2 \n\t"    
 4416                         "movd      %%edx, %%mm3 \n\t"    
 4417                         "movd      %%mm7, %%eax \n\t"    
 4418                         "psraw       $15, %%mm7 \n\t"    
 4419                         "movd      %%mm5, %%ebx \n\t"    
 4420                         "movd      %%mm7, %%edx \n\t"    
 4422                         "movd      %%eax, %%mm7 \n\t"    
 4423                         "packuswb  %%mm0, %%mm7 \n\t"    
 4424                         "movd      %%mm7, %%eax \n\t"    
 4425                         "mov      %%al, (%%edi) \n\t"    
 4426                         "movd      %%mm3, %%edx \n\t"    
 4427                         "movd      %%mm2, %%ebx \n\t"    
 4428                         "movd      %%mm1, %%eax \n\t"    
 4430                         "movd      %%mm6, %%esi \n\t"    
 4431                         "sub         $72, %%edx \n\t"    
 4437                         "add          $4, %%esi \n\t"    
 4438                         "add          $4, %%edi \n\t"    
 4443                         "popa                   \n\t":
"=m" (Dest)       
 
 4474                                                                                    signed short *Kernel, 
unsigned char Divisor)
 
 4477         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 4480         if ((columns < 7) || (rows < 7) || (Divisor == 0))
 
 4485 #if defined(USE_MMX) && defined(i386) 
 4649                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 4650                         "xor       %%ebx, %%ebx \n\t"    
 4652                         "movd      %%ebx, %%mm5 \n\t"    
 4653                         "mov          %4, %%edx \n\t"    
 4654                         "mov          %1, %%esi \n\t"    
 4655                         "mov          %0, %%edi \n\t"    
 4656                         "add          $3, %%edi \n\t"    
 4657                         "mov          %3, %%eax \n\t"    
 4658                         "add       %%eax, %%edi \n\t"    
 4659                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"        
 4660                         "sub          $6, %%ebx \n\t"    
 4662                         ".L10340:               \n\t" "mov       %%eax, %%ecx \n\t"      
 4663                         "sub          $6, %%ecx \n\t"    
 4665                         ".L10342:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 4666                         "movd      %%esi, %%mm6 \n\t"    
 4668                         "movq    (%%esi), %%mm1 \n\t"    
 4669                         "movq      %%mm1, %%mm2 \n\t"    
 4670                         "add       %%eax, %%esi \n\t"    
 4671                         "movq    (%%edx), %%mm3 \n\t"    
 4672                         "add          $8, %%edx \n\t"    
 4673                         "movq    (%%edx), %%mm4 \n\t"    
 4674                         "add          $8, %%edx \n\t"    
 4675                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4676                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4677                         "pmullw    %%mm3, %%mm1 \n\t"    
 4678                         "pmullw    %%mm4, %%mm2 \n\t"    
 4679                         "paddsw    %%mm2, %%mm1 \n\t"    
 4680                         "paddsw    %%mm1, %%mm7 \n\t"    
 4682                         "movq    (%%esi), %%mm1 \n\t"    
 4683                         "movq      %%mm1, %%mm2 \n\t"    
 4684                         "add       %%eax, %%esi \n\t"    
 4685                         "movq    (%%edx), %%mm3 \n\t"    
 4686                         "add          $8, %%edx \n\t"    
 4687                         "movq    (%%edx), %%mm4 \n\t"    
 4688                         "add          $8, %%edx \n\t"    
 4689                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4690                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4691                         "pmullw    %%mm3, %%mm1 \n\t"    
 4692                         "pmullw    %%mm4, %%mm2 \n\t"    
 4693                         "paddsw    %%mm2, %%mm1 \n\t"    
 4694                         "paddsw    %%mm1, %%mm7 \n\t"    
 4696                         "movq    (%%esi), %%mm1 \n\t"    
 4697                         "movq      %%mm1, %%mm2 \n\t"    
 4698                         "add       %%eax, %%esi \n\t"    
 4699                         "movq    (%%edx), %%mm3 \n\t"    
 4700                         "add          $8, %%edx \n\t"    
 4701                         "movq    (%%edx), %%mm4 \n\t"    
 4702                         "add          $8, %%edx \n\t"    
 4703                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4704                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4705                         "pmullw    %%mm3, %%mm1 \n\t"    
 4706                         "pmullw    %%mm4, %%mm2 \n\t"    
 4707                         "paddsw    %%mm2, %%mm1 \n\t"    
 4708                         "paddsw    %%mm1, %%mm7 \n\t"    
 4710                         "movq    (%%esi), %%mm1 \n\t"    
 4711                         "movq      %%mm1, %%mm2 \n\t"    
 4712                         "add       %%eax, %%esi \n\t"    
 4713                         "movq    (%%edx), %%mm3 \n\t"    
 4714                         "add          $8, %%edx \n\t"    
 4715                         "movq    (%%edx), %%mm4 \n\t"    
 4716                         "add          $8, %%edx \n\t"    
 4717                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4718                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4719                         "pmullw    %%mm3, %%mm1 \n\t"    
 4720                         "pmullw    %%mm4, %%mm2 \n\t"    
 4721                         "paddsw    %%mm2, %%mm1 \n\t"    
 4722                         "paddsw    %%mm1, %%mm7 \n\t"    
 4724                         "movq    (%%esi), %%mm1 \n\t"    
 4725                         "movq      %%mm1, %%mm2 \n\t"    
 4726                         "add       %%eax, %%esi \n\t"    
 4727                         "movq    (%%edx), %%mm3 \n\t"    
 4728                         "add          $8, %%edx \n\t"    
 4729                         "movq    (%%edx), %%mm4 \n\t"    
 4730                         "add          $8, %%edx \n\t"    
 4731                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4732                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4733                         "pmullw    %%mm3, %%mm1 \n\t"    
 4734                         "pmullw    %%mm4, %%mm2 \n\t"    
 4735                         "paddsw    %%mm2, %%mm1 \n\t"    
 4736                         "paddsw    %%mm1, %%mm7 \n\t"    
 4738                         "movq    (%%esi), %%mm1 \n\t"    
 4739                         "movq      %%mm1, %%mm2 \n\t"    
 4740                         "add       %%eax, %%esi \n\t"    
 4741                         "movq    (%%edx), %%mm3 \n\t"    
 4742                         "add          $8, %%edx \n\t"    
 4743                         "movq    (%%edx), %%mm4 \n\t"    
 4744                         "add          $8, %%edx \n\t"    
 4745                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4746                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4747                         "pmullw    %%mm3, %%mm1 \n\t"    
 4748                         "pmullw    %%mm4, %%mm2 \n\t"    
 4749                         "paddsw    %%mm2, %%mm1 \n\t"    
 4750                         "paddsw    %%mm1, %%mm7 \n\t"    
 4752                         "movq    (%%esi), %%mm1 \n\t"    
 4753                         "movq      %%mm1, %%mm2 \n\t"    
 4754                         "movq    (%%edx), %%mm3 \n\t"    
 4755                         "add          $8, %%edx \n\t"    
 4756                         "movq    (%%edx), %%mm4 \n\t"    
 4757                         "punpcklbw %%mm0, %%mm1 \n\t"    
 4758                         "punpckhbw %%mm0, %%mm2 \n\t"    
 4759                         "pmullw    %%mm3, %%mm1 \n\t"    
 4760                         "pmullw    %%mm4, %%mm2 \n\t"    
 4761                         "paddsw    %%mm2, %%mm1 \n\t"    
 4762                         "paddsw    %%mm1, %%mm7 \n\t"    
 4764                         "movq      %%mm7, %%mm3 \n\t"    
 4765                         "psrlq       $32, %%mm7 \n\t"    
 4766                         "paddsw    %%mm3, %%mm7 \n\t"    
 4767                         "movq      %%mm7, %%mm2 \n\t"    
 4768                         "psrlq       $16, %%mm7 \n\t"    
 4769                         "paddsw    %%mm2, %%mm7 \n\t"    
 4771                         "movd      %%eax, %%mm1 \n\t"    
 4772                         "movd      %%ebx, %%mm2 \n\t"    
 4773                         "movd      %%edx, %%mm3 \n\t"    
 4774                         "movd      %%mm7, %%eax \n\t"    
 4775                         "psraw       $15, %%mm7 \n\t"    
 4776                         "movd      %%mm5, %%ebx \n\t"    
 4777                         "movd      %%mm7, %%edx \n\t"    
 4779                         "movd      %%eax, %%mm7 \n\t"    
 4780                         "packuswb  %%mm0, %%mm7 \n\t"    
 4781                         "movd      %%mm7, %%eax \n\t"    
 4782                         "mov      %%al, (%%edi) \n\t"    
 4783                         "movd      %%mm3, %%edx \n\t"    
 4784                         "movd      %%mm2, %%ebx \n\t"    
 4785                         "movd      %%mm1, %%eax \n\t"    
 4787                         "movd      %%mm6, %%esi \n\t"    
 4788                         "sub        $104, %%edx \n\t"    
 4794                         "add          $6, %%esi \n\t"    
 4795                         "add          $6, %%edi \n\t"    
 4800                         "popa                   \n\t":
"=m" (Dest)       
 
 4831                                                                                    signed short *Kernel, 
unsigned char Divisor)
 
 4834         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 4837         if ((columns < 9) || (rows < 9) || (Divisor == 0))
 
 4842 #if defined(USE_MMX) && defined(i386) 
 5106                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 5107                         "xor       %%ebx, %%ebx \n\t"    
 5109                         "movd      %%ebx, %%mm5 \n\t"    
 5110                         "mov          %4, %%edx \n\t"    
 5111                         "mov          %1, %%esi \n\t"    
 5112                         "mov          %0, %%edi \n\t"    
 5113                         "add          $4, %%edi \n\t"    
 5114                         "mov          %3, %%eax \n\t"    
 5115                         "add       %%eax, %%edi \n\t"    
 5116                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"  
 5117                         "sub          $8, %%ebx \n\t"    
 5119                         ".L10350:               \n\t" "mov       %%eax, %%ecx \n\t"      
 5120                         "sub          $8, %%ecx \n\t"    
 5122                         ".L10352:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 5123                         "movd      %%esi, %%mm6 \n\t"    
 5125                         "movq    (%%esi), %%mm1 \n\t"    
 5126                         "movq      %%mm1, %%mm2 \n\t"    
 5128                         "movq    (%%edx), %%mm3 \n\t"    
 5129                         "add          $8, %%edx \n\t"    
 5130                         "movq    (%%edx), %%mm4 \n\t"    
 5131                         "add          $8, %%edx \n\t"    
 5132                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5133                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5134                         "pmullw    %%mm3, %%mm1 \n\t"    
 5135                         "pmullw    %%mm4, %%mm2 \n\t"    
 5136                         "paddsw    %%mm2, %%mm1 \n\t"    
 5137                         "paddsw    %%mm1, %%mm7 \n\t"    
 5138                         "movq    (%%esi), %%mm1 \n\t"    
 5139                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5140                         "movq    (%%edx), %%mm3 \n\t"    
 5141                         "add          $8, %%edx \n\t"    
 5142                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5143                         "pmullw    %%mm3, %%mm1 \n\t"    
 5144                         "paddsw    %%mm1, %%mm7 \n\t"    
 5146                         "movq    (%%esi), %%mm1 \n\t"    
 5147                         "movq      %%mm1, %%mm2 \n\t"    
 5149                         "movq    (%%edx), %%mm3 \n\t"    
 5150                         "add          $8, %%edx \n\t"    
 5151                         "movq    (%%edx), %%mm4 \n\t"    
 5152                         "add          $8, %%edx \n\t"    
 5153                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5154                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5155                         "pmullw    %%mm3, %%mm1 \n\t"    
 5156                         "pmullw    %%mm4, %%mm2 \n\t"    
 5157                         "paddsw    %%mm2, %%mm1 \n\t"    
 5158                         "paddsw    %%mm1, %%mm7 \n\t"    
 5159                         "movq    (%%esi), %%mm1 \n\t"    
 5160                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5161                         "movq    (%%edx), %%mm3 \n\t"    
 5162                         "add          $8, %%edx \n\t"    
 5163                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5164                         "pmullw    %%mm3, %%mm1 \n\t"    
 5165                         "paddsw    %%mm1, %%mm7 \n\t"    
 5167                         "movq    (%%esi), %%mm1 \n\t"    
 5168                         "movq      %%mm1, %%mm2 \n\t"    
 5170                         "movq    (%%edx), %%mm3 \n\t"    
 5171                         "add          $8, %%edx \n\t"    
 5172                         "movq    (%%edx), %%mm4 \n\t"    
 5173                         "add          $8, %%edx \n\t"    
 5174                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5175                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5176                         "pmullw    %%mm3, %%mm1 \n\t"    
 5177                         "pmullw    %%mm4, %%mm2 \n\t"    
 5178                         "paddsw    %%mm2, %%mm1 \n\t"    
 5179                         "paddsw    %%mm1, %%mm7 \n\t"    
 5180                         "movq    (%%esi), %%mm1 \n\t"    
 5181                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5182                         "movq    (%%edx), %%mm3 \n\t"    
 5183                         "add          $8, %%edx \n\t"    
 5184                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5185                         "pmullw    %%mm3, %%mm1 \n\t"    
 5186                         "paddsw    %%mm1, %%mm7 \n\t"    
 5188                         "movq    (%%esi), %%mm1 \n\t"    
 5189                         "movq      %%mm1, %%mm2 \n\t"    
 5191                         "movq    (%%edx), %%mm3 \n\t"    
 5192                         "add          $8, %%edx \n\t"    
 5193                         "movq    (%%edx), %%mm4 \n\t"    
 5194                         "add          $8, %%edx \n\t"    
 5195                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5196                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5197                         "pmullw    %%mm3, %%mm1 \n\t"    
 5198                         "pmullw    %%mm4, %%mm2 \n\t"    
 5199                         "paddsw    %%mm2, %%mm1 \n\t"    
 5200                         "paddsw    %%mm1, %%mm7 \n\t"    
 5201                         "movq    (%%esi), %%mm1 \n\t"    
 5202                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5203                         "movq    (%%edx), %%mm3 \n\t"    
 5204                         "add          $8, %%edx \n\t"    
 5205                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5206                         "pmullw    %%mm3, %%mm1 \n\t"    
 5207                         "paddsw    %%mm1, %%mm7 \n\t"    
 5209                         "movq    (%%esi), %%mm1 \n\t"    
 5210                         "movq      %%mm1, %%mm2 \n\t"    
 5212                         "movq    (%%edx), %%mm3 \n\t"    
 5213                         "add          $8, %%edx \n\t"    
 5214                         "movq    (%%edx), %%mm4 \n\t"    
 5215                         "add          $8, %%edx \n\t"    
 5216                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5217                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5218                         "pmullw    %%mm3, %%mm1 \n\t"    
 5219                         "pmullw    %%mm4, %%mm2 \n\t"    
 5220                         "paddsw    %%mm2, %%mm1 \n\t"    
 5221                         "paddsw    %%mm1, %%mm7 \n\t"    
 5222                         "movq    (%%esi), %%mm1 \n\t"    
 5223                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5224                         "movq    (%%edx), %%mm3 \n\t"    
 5225                         "add          $8, %%edx \n\t"    
 5226                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5227                         "pmullw    %%mm3, %%mm1 \n\t"    
 5228                         "paddsw    %%mm1, %%mm7 \n\t"    
 5230                         "movq    (%%esi), %%mm1 \n\t"    
 5231                         "movq      %%mm1, %%mm2 \n\t"    
 5233                         "movq    (%%edx), %%mm3 \n\t"    
 5234                         "add          $8, %%edx \n\t"    
 5235                         "movq    (%%edx), %%mm4 \n\t"    
 5236                         "add          $8, %%edx \n\t"    
 5237                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5238                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5239                         "pmullw    %%mm3, %%mm1 \n\t"    
 5240                         "pmullw    %%mm4, %%mm2 \n\t"    
 5241                         "paddsw    %%mm2, %%mm1 \n\t"    
 5242                         "paddsw    %%mm1, %%mm7 \n\t"    
 5243                         "movq    (%%esi), %%mm1 \n\t"    
 5244                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5245                         "movq    (%%edx), %%mm3 \n\t"    
 5246                         "add          $8, %%edx \n\t"    
 5247                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5248                         "pmullw    %%mm3, %%mm1 \n\t"    
 5249                         "paddsw    %%mm1, %%mm7 \n\t"    
 5251                         "movq    (%%esi), %%mm1 \n\t"    
 5252                         "movq      %%mm1, %%mm2 \n\t"    
 5254                         "movq    (%%edx), %%mm3 \n\t"    
 5255                         "add          $8, %%edx \n\t"    
 5256                         "movq    (%%edx), %%mm4 \n\t"    
 5257                         "add          $8, %%edx \n\t"    
 5258                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5259                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5260                         "pmullw    %%mm3, %%mm1 \n\t"    
 5261                         "pmullw    %%mm4, %%mm2 \n\t"    
 5262                         "paddsw    %%mm2, %%mm1 \n\t"    
 5263                         "paddsw    %%mm1, %%mm7 \n\t"    
 5264                         "movq    (%%esi), %%mm1 \n\t"    
 5265                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5266                         "movq    (%%edx), %%mm3 \n\t"    
 5267                         "add          $8, %%edx \n\t"    
 5268                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5269                         "pmullw    %%mm3, %%mm1 \n\t"    
 5270                         "paddsw    %%mm1, %%mm7 \n\t"    
 5272                         "movq    (%%esi), %%mm1 \n\t"    
 5273                         "movq      %%mm1, %%mm2 \n\t"    
 5275                         "movq    (%%edx), %%mm3 \n\t"    
 5276                         "add          $8, %%edx \n\t"    
 5277                         "movq    (%%edx), %%mm4 \n\t"    
 5278                         "add          $8, %%edx \n\t"    
 5279                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5280                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5281                         "pmullw    %%mm3, %%mm1 \n\t"    
 5282                         "pmullw    %%mm4, %%mm2 \n\t"    
 5283                         "paddsw    %%mm2, %%mm1 \n\t"    
 5284                         "paddsw    %%mm1, %%mm7 \n\t"    
 5285                         "movq    (%%esi), %%mm1 \n\t"    
 5286                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 5287                         "movq    (%%edx), %%mm3 \n\t"    
 5288                         "add          $8, %%edx \n\t"    
 5289                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5290                         "pmullw    %%mm3, %%mm1 \n\t"    
 5291                         "paddsw    %%mm1, %%mm7 \n\t"    
 5293                         "movq    (%%esi), %%mm1 \n\t"    
 5294                         "movq      %%mm1, %%mm2 \n\t"    
 5296                         "movq    (%%edx), %%mm3 \n\t"    
 5297                         "add          $8, %%edx \n\t"    
 5298                         "movq    (%%edx), %%mm4 \n\t"    
 5299                         "add          $8, %%edx \n\t"    
 5300                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5301                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5302                         "pmullw    %%mm3, %%mm1 \n\t"    
 5303                         "pmullw    %%mm4, %%mm2 \n\t"    
 5304                         "paddsw    %%mm2, %%mm1 \n\t"    
 5305                         "paddsw    %%mm1, %%mm7 \n\t"    
 5306                         "movq    (%%esi), %%mm1 \n\t"    
 5307                         "movq    (%%edx), %%mm3 \n\t"    
 5308                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5309                         "pmullw    %%mm3, %%mm1 \n\t"    
 5310                         "paddsw    %%mm1, %%mm7 \n\t"    
 5312                         "movq      %%mm7, %%mm3 \n\t"    
 5313                         "psrlq       $32, %%mm7 \n\t"    
 5314                         "paddsw    %%mm3, %%mm7 \n\t"    
 5315                         "movq      %%mm7, %%mm2 \n\t"    
 5316                         "psrlq       $16, %%mm7 \n\t"    
 5317                         "paddsw    %%mm2, %%mm7 \n\t"    
 5319                         "movd      %%eax, %%mm1 \n\t"    
 5320                         "movd      %%ebx, %%mm2 \n\t"    
 5321                         "movd      %%edx, %%mm3 \n\t"    
 5322                         "movd      %%mm7, %%eax \n\t"    
 5323                         "psraw       $15, %%mm7 \n\t"    
 5324                         "movd      %%mm5, %%ebx \n\t"    
 5325                         "movd      %%mm7, %%edx \n\t"    
 5327                         "movd      %%eax, %%mm7 \n\t"    
 5328                         "packuswb  %%mm0, %%mm7 \n\t"    
 5329                         "movd      %%mm7, %%eax \n\t"    
 5330                         "mov      %%al, (%%edi) \n\t"    
 5331                         "movd      %%mm3, %%edx \n\t"    
 5332                         "movd      %%mm2, %%ebx \n\t"    
 5333                         "movd      %%mm1, %%eax \n\t"    
 5335                         "movd      %%mm6, %%esi \n\t"    
 5336                         "sub        $208, %%edx \n\t"    
 5342                         "add          $8, %%esi \n\t"    
 5343                         "add          $8, %%edi \n\t"    
 5348                         "popa                   \n\t":
"=m" (Dest)       
 
 5379                                                                                            signed short *Kernel, 
unsigned char NRightShift)
 
 5382         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 5385         if ((columns < 3) || (rows < 3) || (NRightShift > 7))
 
 5390 #if defined(USE_MMX) && defined(i386) 
 5463                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 5464                         "xor       %%ebx, %%ebx \n\t"    
 5466                         "movd      %%ebx, %%mm4 \n\t"    
 5467                         "mov          %4, %%edx \n\t"    
 5468                         "movq    (%%edx), %%mm5 \n\t"    
 5469                         "add          $8, %%edx \n\t"    
 5470                         "movq    (%%edx), %%mm6 \n\t"    
 5471                         "add          $8, %%edx \n\t"    
 5472                         "movq    (%%edx), %%mm7 \n\t"    
 5474                         "mov          %3, %%eax \n\t"    
 5475                         "mov          %1, %%esi \n\t"    
 5476                         "mov          %0, %%edi \n\t"    
 5477                         "add       %%eax, %%edi \n\t"    
 5479                         "mov          %2, %%edx \n\t"    
 5480                         "sub          $2, %%edx \n\t"    
 5482                         ".L10360:               \n\t" "mov       %%eax, %%ecx \n\t"      
 5483                         "sub          $2, %%ecx \n\t"    
 5487                         "movq    (%%esi), %%mm1 \n\t"    
 5488                         "add       %%eax, %%esi \n\t"    
 5489                         "movq    (%%esi), %%mm2 \n\t"    
 5490                         "add       %%eax, %%esi \n\t"    
 5491                         "movq    (%%esi), %%mm3 \n\t"    
 5492                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5493                         "punpcklbw %%mm0, %%mm2 \n\t"    
 5494                         "punpcklbw %%mm0, %%mm3 \n\t"    
 5495                         "psrlw     %%mm4, %%mm1 \n\t"    
 5496                         "psrlw     %%mm4, %%mm2 \n\t"    
 5497                         "psrlw     %%mm4, %%mm3 \n\t"    
 5498                         "pmullw    %%mm5, %%mm1 \n\t"    
 5499                         "pmullw    %%mm6, %%mm2 \n\t"    
 5500                         "pmullw    %%mm7, %%mm3 \n\t"    
 5501                         "paddsw    %%mm2, %%mm1 \n\t"    
 5502                         "paddsw    %%mm3, %%mm1 \n\t"    
 5503                         "movq      %%mm1, %%mm2 \n\t"    
 5504                         "psrlq       $32, %%mm1 \n\t"    
 5505                         "paddsw    %%mm2, %%mm1 \n\t"    
 5506                         "movq      %%mm1, %%mm3 \n\t"    
 5507                         "psrlq       $16, %%mm1 \n\t"    
 5508                         "paddsw    %%mm3, %%mm1 \n\t"    
 5509                         "packuswb  %%mm0, %%mm1 \n\t"    
 5510                         "movd      %%mm1, %%ebx \n\t"    
 5511                         "mov      %%bl, (%%edi) \n\t"    
 5513                         "sub       %%eax, %%esi \n\t"    
 5514                         "sub       %%eax, %%esi \n\t" "inc              %%esi \n\t"      
 5519                         "add          $2, %%esi \n\t"    
 5520                         "add          $2, %%edi \n\t"    
 5525                         "popa                   \n\t":
"=m" (Dest)       
 
 5556                                                                                            signed short *Kernel, 
unsigned char NRightShift)
 
 5559         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 5562         if ((columns < 5) || (rows < 5) || (NRightShift > 7))
 
 5567 #if defined(USE_MMX) && defined(i386) 
 5702                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 5703                         "xor       %%ebx, %%ebx \n\t"    
 5705                         "movd      %%ebx, %%mm5 \n\t"    
 5706                         "mov          %4, %%edx \n\t"    
 5707                         "mov          %1, %%esi \n\t"    
 5708                         "mov          %0, %%edi \n\t"    
 5709                         "add          $2, %%edi \n\t"    
 5710                         "mov          %3, %%eax \n\t"    
 5711                         "shl          $1, %%eax \n\t"    
 5712                         "add       %%eax, %%edi \n\t"    
 5713                         "shr          $1, %%eax \n\t"    
 5714                         "mov          %2, %%ebx \n\t"    
 5715                         "sub          $4, %%ebx \n\t"    
 5717                         ".L10370:               \n\t" "mov       %%eax, %%ecx \n\t"      
 5718                         "sub          $4, %%ecx \n\t"    
 5720                         ".L10372:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 5721                         "movd      %%esi, %%mm6 \n\t"    
 5723                         "movq    (%%esi), %%mm1 \n\t"    
 5724                         "movq      %%mm1, %%mm2 \n\t"    
 5725                         "add       %%eax, %%esi \n\t"    
 5726                         "movq    (%%edx), %%mm3 \n\t"    
 5727                         "add          $8, %%edx \n\t"    
 5728                         "movq    (%%edx), %%mm4 \n\t"    
 5729                         "add          $8, %%edx \n\t"    
 5730                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5731                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5732                         "psrlw     %%mm5, %%mm1 \n\t"    
 5733                         "psrlw     %%mm5, %%mm2 \n\t"    
 5734                         "pmullw    %%mm3, %%mm1 \n\t"    
 5735                         "pmullw    %%mm4, %%mm2 \n\t"    
 5736                         "paddsw    %%mm2, %%mm1 \n\t"    
 5737                         "paddsw    %%mm1, %%mm7 \n\t"    
 5739                         "movq    (%%esi), %%mm1 \n\t"    
 5740                         "movq      %%mm1, %%mm2 \n\t"    
 5741                         "add       %%eax, %%esi \n\t"    
 5742                         "movq    (%%edx), %%mm3 \n\t"    
 5743                         "add          $8, %%edx \n\t"    
 5744                         "movq    (%%edx), %%mm4 \n\t"    
 5745                         "add          $8, %%edx \n\t"    
 5746                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5747                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5748                         "psrlw     %%mm5, %%mm1 \n\t"    
 5749                         "psrlw     %%mm5, %%mm2 \n\t"    
 5750                         "pmullw    %%mm3, %%mm1 \n\t"    
 5751                         "pmullw    %%mm4, %%mm2 \n\t"    
 5752                         "paddsw    %%mm2, %%mm1 \n\t"    
 5753                         "paddsw    %%mm1, %%mm7 \n\t"    
 5755                         "movq    (%%esi), %%mm1 \n\t"    
 5756                         "movq      %%mm1, %%mm2 \n\t"    
 5757                         "add       %%eax, %%esi \n\t"    
 5758                         "movq    (%%edx), %%mm3 \n\t"    
 5759                         "add          $8, %%edx \n\t"    
 5760                         "movq    (%%edx), %%mm4 \n\t"    
 5761                         "add          $8, %%edx \n\t"    
 5762                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5763                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5764                         "psrlw     %%mm5, %%mm1 \n\t"    
 5765                         "psrlw     %%mm5, %%mm2 \n\t"    
 5766                         "pmullw    %%mm3, %%mm1 \n\t"    
 5767                         "pmullw    %%mm4, %%mm2 \n\t"    
 5768                         "paddsw    %%mm2, %%mm1 \n\t"    
 5769                         "paddsw    %%mm1, %%mm7 \n\t"    
 5771                         "movq    (%%esi), %%mm1 \n\t"    
 5772                         "movq      %%mm1, %%mm2 \n\t"    
 5773                         "add       %%eax, %%esi \n\t"    
 5774                         "movq    (%%edx), %%mm3 \n\t"    
 5775                         "add          $8, %%edx \n\t"    
 5776                         "movq    (%%edx), %%mm4 \n\t"    
 5777                         "add          $8, %%edx \n\t"    
 5778                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5779                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5780                         "psrlw     %%mm5, %%mm1 \n\t"    
 5781                         "psrlw     %%mm5, %%mm2 \n\t"    
 5782                         "pmullw    %%mm3, %%mm1 \n\t"    
 5783                         "pmullw    %%mm4, %%mm2 \n\t"    
 5784                         "paddsw    %%mm2, %%mm1 \n\t"    
 5785                         "paddsw    %%mm1, %%mm7 \n\t"    
 5787                         "movq    (%%esi), %%mm1 \n\t"    
 5788                         "movq      %%mm1, %%mm2 \n\t"    
 5789                         "movq    (%%edx), %%mm3 \n\t"    
 5790                         "add          $8, %%edx \n\t"    
 5791                         "movq    (%%edx), %%mm4 \n\t"    
 5792                         "punpcklbw %%mm0, %%mm1 \n\t"    
 5793                         "punpckhbw %%mm0, %%mm2 \n\t"    
 5794                         "psrlw     %%mm5, %%mm1 \n\t"    
 5795                         "psrlw     %%mm5, %%mm2 \n\t"    
 5796                         "pmullw    %%mm3, %%mm1 \n\t"    
 5797                         "pmullw    %%mm4, %%mm2 \n\t"    
 5798                         "paddsw    %%mm2, %%mm1 \n\t"    
 5799                         "paddsw    %%mm1, %%mm7 \n\t"    
 5801                         "movq      %%mm7, %%mm3 \n\t"    
 5802                         "psrlq       $32, %%mm7 \n\t"    
 5803                         "paddsw    %%mm3, %%mm7 \n\t"    
 5804                         "movq      %%mm7, %%mm2 \n\t"    
 5805                         "psrlq       $16, %%mm7 \n\t"    
 5806                         "paddsw    %%mm2, %%mm7 \n\t"    
 5807                         "movd      %%eax, %%mm1 \n\t"    
 5808                         "packuswb  %%mm0, %%mm7 \n\t"    
 5809                         "movd      %%mm7, %%eax \n\t"    
 5810                         "mov      %%al, (%%edi) \n\t"    
 5811                         "movd      %%mm1, %%eax \n\t"    
 5813                         "movd      %%mm6, %%esi \n\t"    
 5814                         "sub         $72, %%edx \n\t"    
 5820                         "add          $4, %%esi \n\t"    
 5821                         "add          $4, %%edi \n\t"    
 5826                         "popa                   \n\t":
"=m" (Dest)       
 
 5857                                                                                            signed short *Kernel, 
unsigned char NRightShift)
 
 5860         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 5863         if ((columns < 7) || (rows < 7) || (NRightShift > 7))
 
 5868 #if defined(USE_MMX) && defined(i386) 
 6035                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 6036                         "xor       %%ebx, %%ebx \n\t"    
 6038                         "movd      %%ebx, %%mm5 \n\t"    
 6039                         "mov          %4, %%edx \n\t"    
 6040                         "mov          %1, %%esi \n\t"    
 6041                         "mov          %0, %%edi \n\t"    
 6042                         "add          $3, %%edi \n\t"    
 6043                         "mov          %3, %%eax \n\t"    
 6044                         "add       %%eax, %%edi \n\t"    
 6045                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"        
 6046                         "sub          $6, %%ebx \n\t"    
 6048                         ".L10380:               \n\t" "mov       %%eax, %%ecx \n\t"      
 6049                         "sub          $6, %%ecx \n\t"    
 6051                         ".L10382:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 6052                         "movd      %%esi, %%mm6 \n\t"    
 6054                         "movq    (%%esi), %%mm1 \n\t"    
 6055                         "movq      %%mm1, %%mm2 \n\t"    
 6056                         "add       %%eax, %%esi \n\t"    
 6057                         "movq    (%%edx), %%mm3 \n\t"    
 6058                         "add          $8, %%edx \n\t"    
 6059                         "movq    (%%edx), %%mm4 \n\t"    
 6060                         "add          $8, %%edx \n\t"    
 6061                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6062                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6063                         "psrlw     %%mm5, %%mm1 \n\t"    
 6064                         "psrlw     %%mm5, %%mm2 \n\t"    
 6065                         "pmullw    %%mm3, %%mm1 \n\t"    
 6066                         "pmullw    %%mm4, %%mm2 \n\t"    
 6067                         "paddsw    %%mm2, %%mm1 \n\t"    
 6068                         "paddsw    %%mm1, %%mm7 \n\t"    
 6070                         "movq    (%%esi), %%mm1 \n\t"    
 6071                         "movq      %%mm1, %%mm2 \n\t"    
 6072                         "add       %%eax, %%esi \n\t"    
 6073                         "movq    (%%edx), %%mm3 \n\t"    
 6074                         "add          $8, %%edx \n\t"    
 6075                         "movq    (%%edx), %%mm4 \n\t"    
 6076                         "add          $8, %%edx \n\t"    
 6077                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6078                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6079                         "psrlw     %%mm5, %%mm1 \n\t"    
 6080                         "psrlw     %%mm5, %%mm2 \n\t"    
 6081                         "pmullw    %%mm3, %%mm1 \n\t"    
 6082                         "pmullw    %%mm4, %%mm2 \n\t"    
 6083                         "paddsw    %%mm2, %%mm1 \n\t"    
 6084                         "paddsw    %%mm1, %%mm7 \n\t"    
 6086                         "movq    (%%esi), %%mm1 \n\t"    
 6087                         "movq      %%mm1, %%mm2 \n\t"    
 6088                         "add       %%eax, %%esi \n\t"    
 6089                         "movq    (%%edx), %%mm3 \n\t"    
 6090                         "add          $8, %%edx \n\t"    
 6091                         "movq    (%%edx), %%mm4 \n\t"    
 6092                         "add          $8, %%edx \n\t"    
 6093                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6094                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6095                         "psrlw     %%mm5, %%mm1 \n\t"    
 6096                         "psrlw     %%mm5, %%mm2 \n\t"    
 6097                         "pmullw    %%mm3, %%mm1 \n\t"    
 6098                         "pmullw    %%mm4, %%mm2 \n\t"    
 6099                         "paddsw    %%mm2, %%mm1 \n\t"    
 6100                         "paddsw    %%mm1, %%mm7 \n\t"    
 6102                         "movq    (%%esi), %%mm1 \n\t"    
 6103                         "movq      %%mm1, %%mm2 \n\t"    
 6104                         "add       %%eax, %%esi \n\t"    
 6105                         "movq    (%%edx), %%mm3 \n\t"    
 6106                         "add          $8, %%edx \n\t"    
 6107                         "movq    (%%edx), %%mm4 \n\t"    
 6108                         "add          $8, %%edx \n\t"    
 6109                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6110                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6111                         "psrlw     %%mm5, %%mm1 \n\t"    
 6112                         "psrlw     %%mm5, %%mm2 \n\t"    
 6113                         "pmullw    %%mm3, %%mm1 \n\t"    
 6114                         "pmullw    %%mm4, %%mm2 \n\t"    
 6115                         "paddsw    %%mm2, %%mm1 \n\t"    
 6116                         "paddsw    %%mm1, %%mm7 \n\t"    
 6118                         "movq    (%%esi), %%mm1 \n\t"    
 6119                         "movq      %%mm1, %%mm2 \n\t"    
 6120                         "add       %%eax, %%esi \n\t"    
 6121                         "movq    (%%edx), %%mm3 \n\t"    
 6122                         "add          $8, %%edx \n\t"    
 6123                         "movq    (%%edx), %%mm4 \n\t"    
 6124                         "add          $8, %%edx \n\t"    
 6125                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6126                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6127                         "psrlw     %%mm5, %%mm1 \n\t"    
 6128                         "psrlw     %%mm5, %%mm2 \n\t"    
 6129                         "pmullw    %%mm3, %%mm1 \n\t"    
 6130                         "pmullw    %%mm4, %%mm2 \n\t"    
 6131                         "paddsw    %%mm2, %%mm1 \n\t"    
 6132                         "paddsw    %%mm1, %%mm7 \n\t"    
 6134                         "movq    (%%esi), %%mm1 \n\t"    
 6135                         "movq      %%mm1, %%mm2 \n\t"    
 6136                         "add       %%eax, %%esi \n\t"    
 6137                         "movq    (%%edx), %%mm3 \n\t"    
 6138                         "add          $8, %%edx \n\t"    
 6139                         "movq    (%%edx), %%mm4 \n\t"    
 6140                         "add          $8, %%edx \n\t"    
 6141                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6142                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6143                         "psrlw     %%mm5, %%mm1 \n\t"    
 6144                         "psrlw     %%mm5, %%mm2 \n\t"    
 6145                         "pmullw    %%mm3, %%mm1 \n\t"    
 6146                         "pmullw    %%mm4, %%mm2 \n\t"    
 6147                         "paddsw    %%mm2, %%mm1 \n\t"    
 6148                         "paddsw    %%mm1, %%mm7 \n\t"    
 6150                         "movq    (%%esi), %%mm1 \n\t"    
 6151                         "movq      %%mm1, %%mm2 \n\t"    
 6152                         "movq    (%%edx), %%mm3 \n\t"    
 6153                         "add          $8, %%edx \n\t"    
 6154                         "movq    (%%edx), %%mm4 \n\t"    
 6155                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6156                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6157                         "psrlw     %%mm5, %%mm1 \n\t"    
 6158                         "psrlw     %%mm5, %%mm2 \n\t"    
 6159                         "pmullw    %%mm3, %%mm1 \n\t"    
 6160                         "pmullw    %%mm4, %%mm2 \n\t"    
 6161                         "paddsw    %%mm2, %%mm1 \n\t"    
 6162                         "paddsw    %%mm1, %%mm7 \n\t"    
 6164                         "movq      %%mm7, %%mm3 \n\t"    
 6165                         "psrlq       $32, %%mm7 \n\t"    
 6166                         "paddsw    %%mm3, %%mm7 \n\t"    
 6167                         "movq      %%mm7, %%mm2 \n\t"    
 6168                         "psrlq       $16, %%mm7 \n\t"    
 6169                         "paddsw    %%mm2, %%mm7 \n\t"    
 6170                         "movd      %%eax, %%mm1 \n\t"    
 6171                         "packuswb  %%mm0, %%mm7 \n\t"    
 6172                         "movd      %%mm7, %%eax \n\t"    
 6173                         "mov      %%al, (%%edi) \n\t"    
 6174                         "movd      %%mm1, %%eax \n\t"    
 6176                         "movd      %%mm6, %%esi \n\t"    
 6177                         "sub        $104, %%edx \n\t"    
 6183                         "add          $6, %%esi \n\t"    
 6184                         "add          $6, %%edi \n\t"    
 6189                         "popa                   \n\t":
"=m" (Dest)       
 
 6220                                                                                            signed short *Kernel, 
unsigned char NRightShift)
 
 6223         if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
 
 6226         if ((columns < 9) || (rows < 9) || (NRightShift > 7))
 
 6231 #if defined(USE_MMX) && defined(i386) 
 6511                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 6512                         "xor       %%ebx, %%ebx \n\t"    
 6514                         "movd      %%ebx, %%mm5 \n\t"    
 6515                         "mov          %4, %%edx \n\t"    
 6516                         "mov          %1, %%esi \n\t"    
 6517                         "mov          %0, %%edi \n\t"    
 6518                         "add          $4, %%edi \n\t"    
 6519                         "mov          %3, %%eax \n\t"    
 6520                         "add       %%eax, %%edi \n\t"    
 6521                         "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "add       %%eax, %%edi \n\t" "mov          %2, %%ebx \n\t"  
 6522                         "sub          $8, %%ebx \n\t"    
 6524                         ".L10390:               \n\t" "mov       %%eax, %%ecx \n\t"      
 6525                         "sub          $8, %%ecx \n\t"    
 6527                         ".L10392:               \n\t" "pxor      %%mm7, %%mm7 \n\t"      
 6528                         "movd      %%esi, %%mm6 \n\t"    
 6530                         "movq    (%%esi), %%mm1 \n\t"    
 6531                         "movq      %%mm1, %%mm2 \n\t"    
 6533                         "movq    (%%edx), %%mm3 \n\t"    
 6534                         "add          $8, %%edx \n\t"    
 6535                         "movq    (%%edx), %%mm4 \n\t"    
 6536                         "add          $8, %%edx \n\t"    
 6537                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6538                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6539                         "psrlw     %%mm5, %%mm1 \n\t"    
 6540                         "psrlw     %%mm5, %%mm2 \n\t"    
 6541                         "pmullw    %%mm3, %%mm1 \n\t"    
 6542                         "pmullw    %%mm4, %%mm2 \n\t"    
 6543                         "paddsw    %%mm2, %%mm1 \n\t"    
 6544                         "paddsw    %%mm1, %%mm7 \n\t"    
 6545                         "movq    (%%esi), %%mm1 \n\t"    
 6546                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6547                         "movq    (%%edx), %%mm3 \n\t"    
 6548                         "add          $8, %%edx \n\t"    
 6549                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6550                         "psrlw     %%mm5, %%mm1 \n\t"    
 6551                         "pmullw    %%mm3, %%mm1 \n\t"    
 6552                         "paddsw    %%mm1, %%mm7 \n\t"    
 6554                         "movq    (%%esi), %%mm1 \n\t"    
 6555                         "movq      %%mm1, %%mm2 \n\t"    
 6557                         "movq    (%%edx), %%mm3 \n\t"    
 6558                         "add          $8, %%edx \n\t"    
 6559                         "movq    (%%edx), %%mm4 \n\t"    
 6560                         "add          $8, %%edx \n\t"    
 6561                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6562                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6563                         "psrlw     %%mm5, %%mm1 \n\t"    
 6564                         "psrlw     %%mm5, %%mm2 \n\t"    
 6565                         "pmullw    %%mm3, %%mm1 \n\t"    
 6566                         "pmullw    %%mm4, %%mm2 \n\t"    
 6567                         "paddsw    %%mm2, %%mm1 \n\t"    
 6568                         "paddsw    %%mm1, %%mm7 \n\t"    
 6569                         "movq    (%%esi), %%mm1 \n\t"    
 6570                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6571                         "movq    (%%edx), %%mm3 \n\t"    
 6572                         "add          $8, %%edx \n\t"    
 6573                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6574                         "psrlw     %%mm5, %%mm1 \n\t"    
 6575                         "pmullw    %%mm3, %%mm1 \n\t"    
 6576                         "paddsw    %%mm1, %%mm7 \n\t"    
 6578                         "movq    (%%esi), %%mm1 \n\t"    
 6579                         "movq      %%mm1, %%mm2 \n\t"    
 6581                         "movq    (%%edx), %%mm3 \n\t"    
 6582                         "add          $8, %%edx \n\t"    
 6583                         "movq    (%%edx), %%mm4 \n\t"    
 6584                         "add          $8, %%edx \n\t"    
 6585                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6586                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6587                         "psrlw     %%mm5, %%mm1 \n\t"    
 6588                         "psrlw     %%mm5, %%mm2 \n\t"    
 6589                         "pmullw    %%mm3, %%mm1 \n\t"    
 6590                         "pmullw    %%mm4, %%mm2 \n\t"    
 6591                         "paddsw    %%mm2, %%mm1 \n\t"    
 6592                         "paddsw    %%mm1, %%mm7 \n\t"    
 6593                         "movq    (%%esi), %%mm1 \n\t"    
 6594                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6595                         "movq    (%%edx), %%mm3 \n\t"    
 6596                         "add          $8, %%edx \n\t"    
 6597                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6598                         "psrlw     %%mm5, %%mm1 \n\t"    
 6599                         "pmullw    %%mm3, %%mm1 \n\t"    
 6600                         "paddsw    %%mm1, %%mm7 \n\t"    
 6602                         "movq    (%%esi), %%mm1 \n\t"    
 6603                         "movq      %%mm1, %%mm2 \n\t"    
 6605                         "movq    (%%edx), %%mm3 \n\t"    
 6606                         "add          $8, %%edx \n\t"    
 6607                         "movq    (%%edx), %%mm4 \n\t"    
 6608                         "add          $8, %%edx \n\t"    
 6609                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6610                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6611                         "psrlw     %%mm5, %%mm1 \n\t"    
 6612                         "psrlw     %%mm5, %%mm2 \n\t"    
 6613                         "pmullw    %%mm3, %%mm1 \n\t"    
 6614                         "pmullw    %%mm4, %%mm2 \n\t"    
 6615                         "paddsw    %%mm2, %%mm1 \n\t"    
 6616                         "paddsw    %%mm1, %%mm7 \n\t"    
 6617                         "movq    (%%esi), %%mm1 \n\t"    
 6618                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6619                         "movq    (%%edx), %%mm3 \n\t"    
 6620                         "add          $8, %%edx \n\t"    
 6621                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6622                         "psrlw     %%mm5, %%mm1 \n\t"    
 6623                         "pmullw    %%mm3, %%mm1 \n\t"    
 6624                         "paddsw    %%mm1, %%mm7 \n\t"    
 6626                         "movq    (%%esi), %%mm1 \n\t"    
 6627                         "movq      %%mm1, %%mm2 \n\t"    
 6629                         "movq    (%%edx), %%mm3 \n\t"    
 6630                         "add          $8, %%edx \n\t"    
 6631                         "movq    (%%edx), %%mm4 \n\t"    
 6632                         "add          $8, %%edx \n\t"    
 6633                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6634                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6635                         "psrlw     %%mm5, %%mm1 \n\t"    
 6636                         "psrlw     %%mm5, %%mm2 \n\t"    
 6637                         "pmullw    %%mm3, %%mm1 \n\t"    
 6638                         "pmullw    %%mm4, %%mm2 \n\t"    
 6639                         "paddsw    %%mm2, %%mm1 \n\t"    
 6640                         "paddsw    %%mm1, %%mm7 \n\t"    
 6641                         "movq    (%%esi), %%mm1 \n\t"    
 6642                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6643                         "movq    (%%edx), %%mm3 \n\t"    
 6644                         "add          $8, %%edx \n\t"    
 6645                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6646                         "psrlw     %%mm5, %%mm1 \n\t"    
 6647                         "pmullw    %%mm3, %%mm1 \n\t"    
 6648                         "paddsw    %%mm1, %%mm7 \n\t"    
 6650                         "movq    (%%esi), %%mm1 \n\t"    
 6651                         "movq      %%mm1, %%mm2 \n\t"    
 6653                         "movq    (%%edx), %%mm3 \n\t"    
 6654                         "add          $8, %%edx \n\t"    
 6655                         "movq    (%%edx), %%mm4 \n\t"    
 6656                         "add          $8, %%edx \n\t"    
 6657                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6658                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6659                         "psrlw     %%mm5, %%mm1 \n\t"    
 6660                         "psrlw     %%mm5, %%mm2 \n\t"    
 6661                         "pmullw    %%mm3, %%mm1 \n\t"    
 6662                         "pmullw    %%mm4, %%mm2 \n\t"    
 6663                         "paddsw    %%mm2, %%mm1 \n\t"    
 6664                         "paddsw    %%mm1, %%mm7 \n\t"    
 6665                         "movq    (%%esi), %%mm1 \n\t"    
 6666                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6667                         "movq    (%%edx), %%mm3 \n\t"    
 6668                         "add          $8, %%edx \n\t"    
 6669                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6670                         "psrlw     %%mm5, %%mm1 \n\t"    
 6671                         "pmullw    %%mm3, %%mm1 \n\t"    
 6672                         "paddsw    %%mm1, %%mm7 \n\t"    
 6674                         "movq    (%%esi), %%mm1 \n\t"    
 6675                         "movq      %%mm1, %%mm2 \n\t"    
 6677                         "movq    (%%edx), %%mm3 \n\t"    
 6678                         "add          $8, %%edx \n\t"    
 6679                         "movq    (%%edx), %%mm4 \n\t"    
 6680                         "add          $8, %%edx \n\t"    
 6681                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6682                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6683                         "psrlw     %%mm5, %%mm1 \n\t"    
 6684                         "psrlw     %%mm5, %%mm2 \n\t"    
 6685                         "pmullw    %%mm3, %%mm1 \n\t"    
 6686                         "pmullw    %%mm4, %%mm2 \n\t"    
 6687                         "paddsw    %%mm2, %%mm1 \n\t"    
 6688                         "paddsw    %%mm1, %%mm7 \n\t"    
 6689                         "movq    (%%esi), %%mm1 \n\t"    
 6690                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6691                         "movq    (%%edx), %%mm3 \n\t"    
 6692                         "add          $8, %%edx \n\t"    
 6693                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6694                         "psrlw     %%mm5, %%mm1 \n\t"    
 6695                         "pmullw    %%mm3, %%mm1 \n\t"    
 6696                         "paddsw    %%mm1, %%mm7 \n\t"    
 6698                         "movq    (%%esi), %%mm1 \n\t"    
 6699                         "movq      %%mm1, %%mm2 \n\t"    
 6701                         "movq    (%%edx), %%mm3 \n\t"    
 6702                         "add          $8, %%edx \n\t"    
 6703                         "movq    (%%edx), %%mm4 \n\t"    
 6704                         "add          $8, %%edx \n\t"    
 6705                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6706                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6707                         "psrlw     %%mm5, %%mm1 \n\t"    
 6708                         "psrlw     %%mm5, %%mm2 \n\t"    
 6709                         "pmullw    %%mm3, %%mm1 \n\t"    
 6710                         "pmullw    %%mm4, %%mm2 \n\t"    
 6711                         "paddsw    %%mm2, %%mm1 \n\t"    
 6712                         "paddsw    %%mm1, %%mm7 \n\t"    
 6713                         "movq    (%%esi), %%mm1 \n\t"    
 6714                         "dec              %%esi \n\t" "add       %%eax, %%esi \n\t"      
 6715                         "movq    (%%edx), %%mm3 \n\t"    
 6716                         "add          $8, %%edx \n\t"    
 6717                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6718                         "psrlw     %%mm5, %%mm1 \n\t"    
 6719                         "pmullw    %%mm3, %%mm1 \n\t"    
 6720                         "paddsw    %%mm1, %%mm7 \n\t"    
 6722                         "movq    (%%esi), %%mm1 \n\t"    
 6723                         "movq      %%mm1, %%mm2 \n\t"    
 6725                         "movq    (%%edx), %%mm3 \n\t"    
 6726                         "add          $8, %%edx \n\t"    
 6727                         "movq    (%%edx), %%mm4 \n\t"    
 6728                         "add          $8, %%edx \n\t"    
 6729                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6730                         "punpckhbw %%mm0, %%mm2 \n\t"    
 6731                         "psrlw     %%mm5, %%mm1 \n\t"    
 6732                         "psrlw     %%mm5, %%mm2 \n\t"    
 6733                         "pmullw    %%mm3, %%mm1 \n\t"    
 6734                         "pmullw    %%mm4, %%mm2 \n\t"    
 6735                         "paddsw    %%mm2, %%mm1 \n\t"    
 6736                         "paddsw    %%mm1, %%mm7 \n\t"    
 6737                         "movq    (%%esi), %%mm1 \n\t"    
 6738                         "movq    (%%edx), %%mm3 \n\t"    
 6739                         "punpcklbw %%mm0, %%mm1 \n\t"    
 6740                         "psrlw     %%mm5, %%mm1 \n\t"    
 6741                         "pmullw    %%mm3, %%mm1 \n\t"    
 6742                         "paddsw    %%mm1, %%mm7 \n\t"    
 6744                         "movq      %%mm7, %%mm3 \n\t"    
 6745                         "psrlq       $32, %%mm7 \n\t"    
 6746                         "paddsw    %%mm3, %%mm7 \n\t"    
 6747                         "movq      %%mm7, %%mm2 \n\t"    
 6748                         "psrlq       $16, %%mm7 \n\t"    
 6749                         "paddsw    %%mm2, %%mm7 \n\t"    
 6750                         "movd      %%eax, %%mm1 \n\t"    
 6751                         "packuswb  %%mm0, %%mm7 \n\t"    
 6752                         "movd      %%mm7, %%eax \n\t"    
 6753                         "mov      %%al, (%%edi) \n\t"    
 6754                         "movd      %%mm1, %%eax \n\t"    
 6756                         "movd      %%mm6, %%esi \n\t"    
 6757                         "sub        $208, %%edx \n\t"    
 6763                         "add          $8, %%esi \n\t"    
 6764                         "add          $8, %%edi \n\t"    
 6769                         "popa                   \n\t":
"=m" (Dest)       
 
 6802         if ((Src == NULL) || (Dest == NULL))
 
 6805         if ((columns < 8) || (rows < 3))
 
 6810 #if defined(USE_MMX) && defined(i386) 
 6923                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 6924                         "mov          %3, %%eax \n\t"    
 6926                         "mov          %1, %%esi \n\t"    
 6927                         "mov          %0, %%edi \n\t"    
 6928                         "add       %%eax, %%edi \n\t"    
 6930                         "mov          %2, %%edx \n\t"    
 6931                         "sub          $2, %%edx \n\t"    
 6933                         ".L10400:                \n\t" "mov       %%eax, %%ecx \n\t"     
 6934                         "shr          $3, %%ecx \n\t"    
 6935                         "mov       %%esi, %%ebx \n\t"    
 6936                         "movd      %%edi, %%mm1 \n\t"    
 6940                         "movq    (%%esi), %%mm4 \n\t"    
 6941                         "movq      %%mm4, %%mm5 \n\t"    
 6942                         "add          $2, %%esi \n\t"    
 6943                         "punpcklbw %%mm0, %%mm4 \n\t"    
 6944                         "punpckhbw %%mm0, %%mm5 \n\t"    
 6945                         "movq    (%%esi), %%mm6 \n\t"    
 6946                         "movq      %%mm6, %%mm7 \n\t"    
 6947                         "sub          $2, %%esi \n\t"    
 6948                         "punpcklbw %%mm0, %%mm6 \n\t"    
 6949                         "punpckhbw %%mm0, %%mm7 \n\t"    
 6950                         "add       %%eax, %%esi \n\t"    
 6951                         "movq    (%%esi), %%mm2 \n\t"    
 6952                         "movq      %%mm2, %%mm3 \n\t"    
 6953                         "add          $2, %%esi \n\t"    
 6954                         "punpcklbw %%mm0, %%mm2 \n\t"    
 6955                         "punpckhbw %%mm0, %%mm3 \n\t"    
 6956                         "paddw     %%mm2, %%mm4 \n\t"    
 6957                         "paddw     %%mm3, %%mm5 \n\t"    
 6958                         "paddw     %%mm2, %%mm4 \n\t"    
 6959                         "paddw     %%mm3, %%mm5 \n\t"    
 6960                         "movq    (%%esi), %%mm2 \n\t"    
 6961                         "movq      %%mm2, %%mm3 \n\t"    
 6962                         "sub          $2, %%esi \n\t"    
 6963                         "punpcklbw %%mm0, %%mm2 \n\t"    
 6964                         "punpckhbw %%mm0, %%mm3 \n\t"    
 6965                         "paddw     %%mm2, %%mm6 \n\t"    
 6966                         "paddw     %%mm3, %%mm7 \n\t"    
 6967                         "paddw     %%mm2, %%mm6 \n\t"    
 6968                         "paddw     %%mm3, %%mm7 \n\t"    
 6969                         "add       %%eax, %%esi \n\t"    
 6970                         "movq    (%%esi), %%mm2 \n\t"    
 6971                         "movq      %%mm2, %%mm3 \n\t"    
 6972                         "add          $2, %%esi \n\t"    
 6973                         "punpcklbw %%mm0, %%mm2 \n\t"    
 6974                         "punpckhbw %%mm0, %%mm3 \n\t"    
 6975                         "paddw     %%mm2, %%mm4 \n\t"    
 6976                         "paddw     %%mm3, %%mm5 \n\t"    
 6977                         "movq    (%%esi), %%mm2 \n\t"    
 6978                         "movq      %%mm2, %%mm3 \n\t"    
 6979                         "sub          $2, %%esi \n\t"    
 6980                         "punpcklbw %%mm0, %%mm2 \n\t"    
 6981                         "punpckhbw %%mm0, %%mm3 \n\t"    
 6982                         "paddw     %%mm2, %%mm6 \n\t"    
 6983                         "paddw     %%mm3, %%mm7 \n\t"    
 6985                         "movq      %%mm4, %%mm2 \n\t"    
 6986                         "psrlq       $32, %%mm4 \n\t"    
 6987                         "psubw     %%mm2, %%mm4 \n\t"    
 6988                         "movq      %%mm6, %%mm3 \n\t"    
 6989                         "psrlq       $32, %%mm6 \n\t"    
 6990                         "psubw     %%mm3, %%mm6 \n\t"    
 6991                         "punpckldq %%mm6, %%mm4 \n\t"    
 6992                         "movq      %%mm5, %%mm2 \n\t"    
 6993                         "psrlq       $32, %%mm5 \n\t"    
 6994                         "psubw     %%mm2, %%mm5 \n\t"    
 6995                         "movq      %%mm7, %%mm3 \n\t"    
 6996                         "psrlq       $32, %%mm7 \n\t"    
 6997                         "psubw     %%mm3, %%mm7 \n\t"    
 6998                         "punpckldq %%mm7, %%mm5 \n\t"    
 7000                         "movq      %%mm4, %%mm6 \n\t"    
 7001                         "movq      %%mm5, %%mm7 \n\t"    
 7002                         "psraw       $15, %%mm6 \n\t"    
 7003                         "psraw       $15, %%mm7 \n\t"    
 7004                         "pxor      %%mm6, %%mm4 \n\t"    
 7005                         "pxor      %%mm7, %%mm5 \n\t"    
 7006                         "psubsw    %%mm6, %%mm4 \n\t"    
 7007                         "psubsw    %%mm7, %%mm5 \n\t"    
 7008                         "packuswb  %%mm5, %%mm4 \n\t"    
 7009                         "movq    %%mm4, (%%edi) \n\t"    
 7011                         "sub       %%eax, %%esi \n\t"    
 7012                         "sub       %%eax, %%esi \n\t" "add $8,          %%esi \n\t"      
 7013                         "add $8,          %%edi \n\t"    
 7017                         "mov       %%ebx, %%esi \n\t"    
 7018                         "movd      %%mm1, %%edi \n\t"    
 7019                         "add       %%eax, %%esi \n\t"    
 7020                         "add       %%eax, %%edi \n\t"    
 7025                         "popa                   \n\t":
"=m" (Dest)       
 
 7053                                                                         unsigned char NRightShift)
 
 7056         if ((Src == NULL) || (Dest == NULL))
 
 7058         if ((columns < 8) || (rows < 3) || (NRightShift > 7))
 
 7063 #if defined(USE_MMX) && defined(i386) 
 7191                         (
"pusha              \n\t" "pxor      %%mm0, %%mm0 \n\t"         
 7192                         "mov          %3, %%eax \n\t"    
 7193                         "xor       %%ebx, %%ebx \n\t"    
 7195                         "movd      %%ebx, %%mm1 \n\t"    
 7197                         "mov          %1, %%esi \n\t"    
 7198                         "mov          %0, %%edi \n\t"    
 7199                         "add       %%eax, %%edi \n\t"    
 7204                         ".L10410:                \n\t" "mov       %%eax, %%ecx \n\t"     
 7205                         "shr          $3, %%ecx \n\t"    
 7206                         "mov       %%esi, %%ebx \n\t"    
 7207                         "mov       %%edi, %%edx \n\t"    
 7211                         "movq    (%%esi), %%mm4 \n\t"    
 7212                         "movq      %%mm4, %%mm5 \n\t"    
 7213                         "add          $2, %%esi \n\t"    
 7214                         "punpcklbw %%mm0, %%mm4 \n\t"    
 7215                         "punpckhbw %%mm0, %%mm5 \n\t"    
 7216                         "psrlw     %%mm1, %%mm4 \n\t"    
 7217                         "psrlw     %%mm1, %%mm5 \n\t"    
 7218                         "movq    (%%esi), %%mm6 \n\t"    
 7219                         "movq      %%mm6, %%mm7 \n\t"    
 7220                         "sub          $2, %%esi \n\t"    
 7221                         "punpcklbw %%mm0, %%mm6 \n\t"    
 7222                         "punpckhbw %%mm0, %%mm7 \n\t"    
 7223                         "psrlw     %%mm1, %%mm6 \n\t"    
 7224                         "psrlw     %%mm1, %%mm7 \n\t"    
 7225                         "add       %%eax, %%esi \n\t"    
 7226                         "movq    (%%esi), %%mm2 \n\t"    
 7227                         "movq      %%mm2, %%mm3 \n\t"    
 7228                         "add          $2, %%esi \n\t"    
 7229                         "punpcklbw %%mm0, %%mm2 \n\t"    
 7230                         "punpckhbw %%mm0, %%mm3 \n\t"    
 7231                         "psrlw     %%mm1, %%mm2 \n\t"    
 7232                         "psrlw     %%mm1, %%mm3 \n\t"    
 7233                         "paddw     %%mm2, %%mm4 \n\t"    
 7234                         "paddw     %%mm3, %%mm5 \n\t"    
 7235                         "paddw     %%mm2, %%mm4 \n\t"    
 7236                         "paddw     %%mm3, %%mm5 \n\t"    
 7237                         "movq    (%%esi), %%mm2 \n\t"    
 7238                         "movq      %%mm2, %%mm3 \n\t"    
 7239                         "sub          $2, %%esi \n\t"    
 7240                         "punpcklbw %%mm0, %%mm2 \n\t"    
 7241                         "punpckhbw %%mm0, %%mm3 \n\t"    
 7242                         "psrlw     %%mm1, %%mm2 \n\t"    
 7243                         "psrlw     %%mm1, %%mm3 \n\t"    
 7244                         "paddw     %%mm2, %%mm6 \n\t"    
 7245                         "paddw     %%mm3, %%mm7 \n\t"    
 7246                         "paddw     %%mm2, %%mm6 \n\t"    
 7247                         "paddw     %%mm3, %%mm7 \n\t"    
 7248                         "add       %%eax, %%esi \n\t"    
 7249                         "movq    (%%esi), %%mm2 \n\t"    
 7250                         "movq      %%mm2, %%mm3 \n\t"    
 7251                         "add          $2, %%esi \n\t"    
 7252                         "punpcklbw %%mm0, %%mm2 \n\t"    
 7253                         "punpckhbw %%mm0, %%mm3 \n\t"    
 7254                         "psrlw     %%mm1, %%mm2 \n\t"    
 7255                         "psrlw     %%mm1, %%mm3 \n\t"    
 7256                         "paddw     %%mm2, %%mm4 \n\t"    
 7257                         "paddw     %%mm3, %%mm5 \n\t"    
 7258                         "movq    (%%esi), %%mm2 \n\t"    
 7259                         "movq      %%mm2, %%mm3 \n\t"    
 7260                         "sub          $2, %%esi \n\t"    
 7261                         "punpcklbw %%mm0, %%mm2 \n\t"    
 7262                         "punpckhbw %%mm0, %%mm3 \n\t"    
 7263                         "psrlw     %%mm1, %%mm2 \n\t"    
 7264                         "psrlw     %%mm1, %%mm3 \n\t"    
 7265                         "paddw     %%mm2, %%mm6 \n\t"    
 7266                         "paddw     %%mm3, %%mm7 \n\t"    
 7268                         "movq      %%mm4, %%mm2 \n\t"    
 7269                         "psrlq       $32, %%mm4 \n\t"    
 7270                         "psubw     %%mm2, %%mm4 \n\t"    
 7271                         "movq      %%mm6, %%mm3 \n\t"    
 7272                         "psrlq       $32, %%mm6 \n\t"    
 7273                         "psubw     %%mm3, %%mm6 \n\t"    
 7274                         "punpckldq %%mm6, %%mm4 \n\t"    
 7275                         "movq      %%mm5, %%mm2 \n\t"    
 7276                         "psrlq       $32, %%mm5 \n\t"    
 7277                         "psubw     %%mm2, %%mm5 \n\t"    
 7278                         "movq      %%mm7, %%mm3 \n\t"    
 7279                         "psrlq       $32, %%mm7 \n\t"    
 7280                         "psubw     %%mm3, %%mm7 \n\t"    
 7281                         "punpckldq %%mm7, %%mm5 \n\t"    
 7283                         "movq      %%mm4, %%mm6 \n\t"    
 7284                         "movq      %%mm5, %%mm7 \n\t"    
 7285                         "psraw       $15, %%mm6 \n\t"    
 7286                         "psraw       $15, %%mm7 \n\t"    
 7287                         "pxor      %%mm6, %%mm4 \n\t"    
 7288                         "pxor      %%mm7, %%mm5 \n\t"    
 7289                         "psubsw    %%mm6, %%mm4 \n\t"    
 7290                         "psubsw    %%mm7, %%mm5 \n\t"    
 7291                         "packuswb  %%mm5, %%mm4 \n\t"    
 7292                         "movq    %%mm4, (%%edi) \n\t"    
 7294                         "sub       %%eax, %%esi \n\t"    
 7295                         "sub       %%eax, %%esi \n\t" "add $8,          %%esi \n\t"      
 7296                         "add $8,          %%edi \n\t"    
 7300                         "mov       %%ebx, %%esi \n\t"    
 7301                         "mov       %%edx, %%edi \n\t"    
 7302                         "add       %%eax, %%esi \n\t"    
 7303                         "add       %%eax, %%edi \n\t"    
 7308                         "popa                   \n\t":
"=m" (Dest)       
 
 7341                 "mov       %%esp, %%ebx \n\t"    
 7342                 "sub          $4, %%ebx \n\t"    
 7343                 "and        $-32, %%ebx \n\t"    
 7344                 "mov     %%esp, (%%ebx) \n\t"    
 7345                 "mov       %%ebx, %%esp \n\t"    
 7366                 "mov     (%%esp), %%ebx \n\t"    
 7367                 "mov       %%ebx, %%esp \n\t"    
int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftLeftByte: D = (S << N) 
int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, unsigned char NRightShift)
Filter using SobelXShiftRight: Dij = saturation255( ... ) 
void SDL_imageFilterMMXon()
Enable MMX check for filter functions and use MMX code if available. 
int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) 
int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... ) 
int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Div: D = S1 / S2. 
int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... ) 
int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Mean: D = S1/2 + S2/2. 
int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin, unsigned char Tmax)
Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax. 
int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using SubByte: D = saturation0(S - C) 
int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftRight: D = saturation0(S >> N) 
int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... ) 
int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C) 
int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... ) 
int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultDivby4: D = saturation255(S1/2 * S2/2) 
int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
Internal ASM Filter using MultNor: D = S1 * S2. 
#define SWAP_32(x)
Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.). 
int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T)
Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0. 
int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using MultByByte: D = saturation255(S * C) 
int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultNor: D = S1 * S2. 
int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using AbsDiff: D = | S1 - S2 |. 
int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using BitOr: D = S1 | S2. 
int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Add: D = saturation255(S1 + S2) 
int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultDivby2: D = saturation255(S1/2 * S2) 
void SDL_imageFilterMMXoff()
Disable MMX check for filter functions and and force to use non-MMX C based code. ...
int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using BitAnd: D = S1 & S2. 
int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) 
int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter ShiftLeft: D = saturation255(S << N) 
int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Mult: D = saturation255(S1 * S2) 
int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Sub: D = saturation0(S1 - S2) 
int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using AddByteToHalf: D = saturation255(S/2 + C) 
int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int columns)
Filter using SobelX: Dij = saturation255( ... ) 
int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin, int Nmax)
Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) ...
int SDL_imageFilterMMXdetect(void)
MMX detection routine (with override flag). 
int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftLeftUint: D = ((uint)S << N) 
int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
Internal MMX Filter using SubByte: D = saturation0(S - C) 
int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... ) 
int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length)
Filter using BitNegation: D = !S. 
int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... ) 
int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N, unsigned char C)
Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C) 
int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using AddByte: D = saturation255(S + C) 
int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... ) 
void SDL_imageFilterRestoreStack(void)
Restore previously aligned stack. 
void SDL_imageFilterAlignStack(void)
Align stack to 32 byte boundary,. 
int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... )