00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <string.h>
00017
00018 #include "SDL_imageFilter.h"
00019
00023 #define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | ((x) << 24))
00024
00025
00026
00030 static int SDL_imageFilterUseMMX = 1;
00031
00032
00033 #if defined(__GNUC__)
00034 #define GCC__
00035 #endif
00036
00042 unsigned int _cpuFlags()
00043 {
00044 int flags = 0;
00045
00046 #ifdef USE_MMX
00047 #if !defined(GCC__)
00048 __asm
00049 {
00050 pusha
00051 mov eax, 1
00052 cpuid
00053 mov flags,edx
00054 popa
00055 }
00056 #else
00057 asm volatile ("pusha \n\t" "mov %1, %%eax \n\t"
00058 "cpuid \n\t"
00059 "mov %%edx, %0 \n\t"
00060 "popa \n\t":"=m" (flags)
00061 :"i"(0x00000001)
00062 );
00063 #endif
00064 #endif
00065
00066 return (flags);
00067 }
00068
00074 int SDL_imageFilterMMXdetect(void)
00075 {
00076 unsigned int mmx_bit;
00077
00078
00079 if (SDL_imageFilterUseMMX == 0) {
00080 return (0);
00081 }
00082
00083 mmx_bit = _cpuFlags();
00084 mmx_bit &= 0x00800000;
00085 mmx_bit = (mmx_bit && 0x00800000);
00086
00087 return (mmx_bit);
00088 }
00089
00093 void SDL_imageFilterMMXoff()
00094 {
00095 SDL_imageFilterUseMMX = 0;
00096 }
00097
00101 void SDL_imageFilterMMXon()
00102 {
00103 SDL_imageFilterUseMMX = 1;
00104 }
00105
00106
00107
00118 int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00119 {
00120 #ifdef USE_MMX
00121 #if !defined(GCC__)
00122 __asm
00123 {
00124 pusha
00125 mov eax, Src1
00126 mov ebx, Src2
00127 mov edi, Dest
00128 mov ecx, SrcLength
00129 shr ecx, 3
00130 align 16
00131 L1010:
00132 movq mm1, [eax]
00133 paddusb mm1, [ebx]
00134 movq [edi], mm1
00135 add eax, 8
00136 add ebx, 8
00137 add edi, 8
00138 dec ecx
00139 jnz L1010
00140 emms
00141 popa
00142 }
00143 #else
00144 asm volatile
00145 ("pusha \n\t" "mov %2, %%eax \n\t"
00146 "mov %1, %%ebx \n\t"
00147 "mov %0, %%edi \n\t"
00148 "mov %3, %%ecx \n\t"
00149 "shr $3, %%ecx \n\t"
00150 ".align 16 \n\t"
00151 "1: movq (%%eax), %%mm1 \n\t"
00152 "paddusb (%%ebx), %%mm1 \n\t"
00153 "movq %%mm1, (%%edi) \n\t"
00154 "add $8, %%eax \n\t"
00155 "add $8, %%ebx \n\t"
00156 "add $8, %%edi \n\t" "dec %%ecx \n\t"
00157 "jnz 1b \n\t"
00158 "emms \n\t"
00159 "popa \n\t":"=m" (Dest)
00160 :"m"(Src2),
00161 "m"(Src1),
00162 "m"(SrcLength)
00163 );
00164 #endif
00165 return (0);
00166 #else
00167 return (-1);
00168 #endif
00169 }
00170
00181 int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00182 {
00183 unsigned int i, istart;
00184 unsigned char *cursrc1, *cursrc2, *curdst;
00185 int result;
00186
00187
00188 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00189 return(-1);
00190 if (length == 0)
00191 return(0);
00192
00193 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00194
00195
00196 SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
00197
00198
00199 if ((length & 7) > 0) {
00200
00201 istart = length & 0xfffffff8;
00202 cursrc1 = &Src1[istart];
00203 cursrc2 = &Src2[istart];
00204 curdst = &Dest[istart];
00205 } else {
00206
00207 return (0);
00208 }
00209 } else {
00210
00211 istart = 0;
00212 cursrc1 = Src1;
00213 cursrc2 = Src2;
00214 curdst = Dest;
00215 }
00216
00217
00218 for (i = istart; i < length; i++) {
00219 result = (int) *cursrc1 + (int) *cursrc2;
00220 if (result > 255)
00221 result = 255;
00222 *curdst = (unsigned char) result;
00223
00224 cursrc1++;
00225 cursrc2++;
00226 curdst++;
00227 }
00228
00229 return (0);
00230 }
00231
00243 int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength,
00244 unsigned char *Mask)
00245 {
00246 #ifdef USE_MMX
00247 #if !defined(GCC__)
00248 __asm
00249 {
00250 pusha
00251 mov edx, Mask
00252 movq mm0, [edx]
00253 mov eax, Src1
00254 mov ebx, Src2
00255 mov edi, Dest
00256 mov ecx, SrcLength
00257 shr ecx, 3
00258 align 16
00259 L21011:
00260 movq mm1, [eax]
00261 movq mm2, [ebx]
00262
00263 psrlw mm1, 1
00264 psrlw mm2, 1
00265 pand mm1, mm0
00266
00267 pand mm2, mm0
00268
00269 paddusb mm1, mm2
00270 movq [edi], mm1
00271 add eax, 8
00272 add ebx, 8
00273 add edi, 8
00274 dec ecx
00275 jnz L21011
00276 emms
00277 popa
00278 }
00279 #else
00280 asm volatile
00281 ("pusha \n\t" "movl %4, %%edx \n\t"
00282 "movq (%%edx), %%mm0 \n\t"
00283 "mov %2, %%eax \n\t"
00284 "mov %1, %%ebx \n\t"
00285 "mov %0, %%edi \n\t"
00286 "mov %3, %%ecx \n\t"
00287 "shr $3, %%ecx \n\t"
00288 ".align 16 \n\t"
00289 "1: \n\t"
00290 "movq (%%eax), %%mm1 \n\t"
00291 "movq (%%ebx), %%mm2 \n\t"
00292
00293 "psrlw $1, %%mm1 \n\t"
00294 "psrlw $1, %%mm2 \n\t"
00295
00296 ".byte 0x0f, 0xdb, 0xc8 \n\t"
00297
00298 ".byte 0x0f, 0xdb, 0xd0 \n\t"
00299 "paddusb %%mm2, %%mm1 \n\t"
00300 "movq %%mm1, (%%edi) \n\t"
00301 "add $8, %%eax \n\t"
00302 "add $8, %%ebx \n\t"
00303 "add $8, %%edi \n\t"
00304 "dec %%ecx \n\t"
00305 "jnz 1b \n\t"
00306 "emms \n\t"
00307 "popa \n\t":"=m" (Dest)
00308 :"m"(Src2),
00309 "m"(Src1),
00310 "m"(SrcLength),
00311 "m"(Mask)
00312 );
00313 #endif
00314 return (0);
00315 #else
00316 return (-1);
00317 #endif
00318 }
00319
00330 int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00331 {
00332 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
00333 unsigned int i, istart;
00334 unsigned char *cursrc1, *cursrc2, *curdst;
00335 int result;
00336
00337
00338 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00339 return(-1);
00340 if (length == 0)
00341 return(0);
00342
00343 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00344
00345 SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
00346
00347
00348 if ((length & 7) > 0) {
00349
00350 istart = length & 0xfffffff8;
00351 cursrc1 = &Src1[istart];
00352 cursrc2 = &Src2[istart];
00353 curdst = &Dest[istart];
00354 } else {
00355
00356 return (0);
00357 }
00358 } else {
00359
00360 istart = 0;
00361 cursrc1 = Src1;
00362 cursrc2 = Src2;
00363 curdst = Dest;
00364 }
00365
00366
00367 for (i = istart; i < length; i++) {
00368 result = (int) *cursrc1 / 2 + (int) *cursrc2 / 2;
00369 *curdst = (unsigned char) result;
00370
00371 cursrc1++;
00372 cursrc2++;
00373 curdst++;
00374 }
00375
00376 return (0);
00377 }
00378
00389 int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00390 {
00391 #ifdef USE_MMX
00392 #if !defined(GCC__)
00393 __asm
00394 {
00395 pusha
00396 mov eax, Src1
00397 mov ebx, Src2
00398 mov edi, Dest
00399 mov ecx, SrcLength
00400 shr ecx, 3
00401 align 16
00402 L1012:
00403 movq mm1, [eax]
00404 psubusb mm1, [ebx]
00405 movq [edi], mm1
00406 add eax, 8
00407 add ebx, 8
00408 add edi, 8
00409 dec ecx
00410 jnz L1012
00411 emms
00412 popa
00413 }
00414 #else
00415 asm volatile
00416 ("pusha \n\t" "mov %2, %%eax \n\t"
00417 "mov %1, %%ebx \n\t"
00418 "mov %0, %%edi \n\t"
00419 "mov %3, %%ecx \n\t"
00420 "shr $3, %%ecx \n\t"
00421 ".align 16 \n\t"
00422 "1: movq (%%eax), %%mm1 \n\t"
00423 "psubusb (%%ebx), %%mm1 \n\t"
00424 "movq %%mm1, (%%edi) \n\t"
00425 "add $8, %%eax \n\t"
00426 "add $8, %%ebx \n\t"
00427 "add $8, %%edi \n\t" "dec %%ecx \n\t"
00428 "jnz 1b \n\t"
00429 "emms \n\t"
00430 "popa \n\t":"=m" (Dest)
00431 :"m"(Src2),
00432 "m"(Src1),
00433 "m"(SrcLength)
00434 );
00435 #endif
00436 return (0);
00437 #else
00438 return (-1);
00439 #endif
00440 }
00441
00452 int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00453 {
00454 unsigned int i, istart;
00455 unsigned char *cursrc1, *cursrc2, *curdst;
00456 int result;
00457
00458
00459 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00460 return(-1);
00461 if (length == 0)
00462 return(0);
00463
00464 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00465
00466 SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
00467
00468
00469 if ((length & 7) > 0) {
00470
00471 istart = length & 0xfffffff8;
00472 cursrc1 = &Src1[istart];
00473 cursrc2 = &Src2[istart];
00474 curdst = &Dest[istart];
00475 } else {
00476
00477 return (0);
00478 }
00479 } else {
00480
00481 istart = 0;
00482 cursrc1 = Src1;
00483 cursrc2 = Src2;
00484 curdst = Dest;
00485 }
00486
00487
00488 for (i = istart; i < length; i++) {
00489 result = (int) *cursrc1 - (int) *cursrc2;
00490 if (result < 0)
00491 result = 0;
00492 *curdst = (unsigned char) result;
00493
00494 cursrc1++;
00495 cursrc2++;
00496 curdst++;
00497 }
00498
00499 return (0);
00500 }
00501
00512 int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00513 {
00514 #ifdef USE_MMX
00515 #if !defined(GCC__)
00516 __asm
00517 {
00518 pusha
00519 mov eax, Src1
00520 mov ebx, Src2
00521 mov edi, Dest
00522 mov ecx, SrcLength
00523 shr ecx, 3
00524 align 16
00525 L1013:
00526 movq mm1, [eax]
00527 movq mm2, [ebx]
00528 psubusb mm1, [ebx]
00529 psubusb mm2, [eax]
00530 por mm1, mm2
00531 movq [edi], mm1
00532 add eax, 8
00533 add ebx, 8
00534 add edi, 8
00535 dec ecx
00536 jnz L1013
00537 emms
00538 popa
00539 }
00540 #else
00541 asm volatile
00542 ("pusha \n\t" "mov %2, %%eax \n\t"
00543 "mov %1, %%ebx \n\t"
00544 "mov %0, %%edi \n\t"
00545 "mov %3, %%ecx \n\t"
00546 "shr $3, %%ecx \n\t"
00547 ".align 16 \n\t"
00548 "1: movq (%%eax), %%mm1 \n\t"
00549 "movq (%%ebx), %%mm2 \n\t"
00550 "psubusb (%%ebx), %%mm1 \n\t"
00551 "psubusb (%%eax), %%mm2 \n\t"
00552 "por %%mm2, %%mm1 \n\t"
00553 "movq %%mm1, (%%edi) \n\t"
00554 "add $8, %%eax \n\t"
00555 "add $8, %%ebx \n\t"
00556 "add $8, %%edi \n\t" "dec %%ecx \n\t"
00557 "jnz 1b \n\t"
00558 "emms \n\t"
00559 "popa \n\t":"=m" (Dest)
00560 :"m"(Src2),
00561 "m"(Src1),
00562 "m"(SrcLength)
00563 );
00564 #endif
00565 return (0);
00566 #else
00567 return (-1);
00568 #endif
00569 }
00570
00581 int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00582 {
00583 unsigned int i, istart;
00584 unsigned char *cursrc1, *cursrc2, *curdst;
00585 int result;
00586
00587
00588 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00589 return(-1);
00590 if (length == 0)
00591 return(0);
00592
00593 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00594
00595 SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
00596
00597
00598 if ((length & 7) > 0) {
00599
00600 istart = length & 0xfffffff8;
00601 cursrc1 = &Src1[istart];
00602 cursrc2 = &Src2[istart];
00603 curdst = &Dest[istart];
00604 } else {
00605
00606 return (0);
00607 }
00608 } else {
00609
00610 istart = 0;
00611 cursrc1 = Src1;
00612 cursrc2 = Src2;
00613 curdst = Dest;
00614 }
00615
00616
00617 for (i = istart; i < length; i++) {
00618 result = abs((int) *cursrc1 - (int) *cursrc2);
00619 *curdst = (unsigned char) result;
00620
00621 cursrc1++;
00622 cursrc2++;
00623 curdst++;
00624 }
00625
00626 return (0);
00627 }
00628
00639 int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00640 {
00641 #ifdef USE_MMX
00642 #if !defined(GCC__)
00643 __asm
00644 {
00645 pusha
00646 mov eax, Src1
00647 mov ebx, Src2
00648 mov edi, Dest
00649 mov ecx, SrcLength
00650 shr ecx, 3
00651 pxor mm0, mm0
00652 align 16
00653 L1014:
00654 movq mm1, [eax]
00655 movq mm3, [ebx]
00656 movq mm2, mm1
00657 movq mm4, mm3
00658 punpcklbw mm1, mm0
00659 punpckhbw mm2, mm0
00660 punpcklbw mm3, mm0
00661 punpckhbw mm4, mm0
00662 pmullw mm1, mm3
00663 pmullw mm2, mm4
00664
00665 movq mm5, mm1
00666 movq mm6, mm2
00667 psraw mm5, 15
00668 psraw mm6, 15
00669 pxor mm1, mm5
00670 pxor mm2, mm6
00671 psubsw mm1, mm5
00672 psubsw mm2, mm6
00673 packuswb mm1, mm2
00674 movq [edi], mm1
00675 add eax, 8
00676 add ebx, 8
00677 add edi, 8
00678 dec ecx
00679 jnz L1014
00680 emms
00681 popa
00682 }
00683 #else
00684 asm volatile
00685 ("pusha \n\t" "mov %2, %%eax \n\t"
00686 "mov %1, %%ebx \n\t"
00687 "mov %0, %%edi \n\t"
00688 "mov %3, %%ecx \n\t"
00689 "shr $3, %%ecx \n\t"
00690 "pxor %%mm0, %%mm0 \n\t"
00691 ".align 16 \n\t"
00692 "1: movq (%%eax), %%mm1 \n\t"
00693 "movq (%%ebx), %%mm3 \n\t"
00694 "movq %%mm1, %%mm2 \n\t"
00695 "movq %%mm3, %%mm4 \n\t"
00696 "punpcklbw %%mm0, %%mm1 \n\t"
00697 "punpckhbw %%mm0, %%mm2 \n\t"
00698 "punpcklbw %%mm0, %%mm3 \n\t"
00699 "punpckhbw %%mm0, %%mm4 \n\t"
00700 "pmullw %%mm3, %%mm1 \n\t"
00701 "pmullw %%mm4, %%mm2 \n\t"
00702
00703 "movq %%mm1, %%mm5 \n\t"
00704 "movq %%mm2, %%mm6 \n\t"
00705 "psraw $15, %%mm5 \n\t"
00706 "psraw $15, %%mm6 \n\t"
00707 "pxor %%mm5, %%mm1 \n\t"
00708 "pxor %%mm6, %%mm2 \n\t"
00709 "psubsw %%mm5, %%mm1 \n\t"
00710 "psubsw %%mm6, %%mm2 \n\t"
00711 "packuswb %%mm2, %%mm1 \n\t"
00712 "movq %%mm1, (%%edi) \n\t"
00713 "add $8, %%eax \n\t"
00714 "add $8, %%ebx \n\t"
00715 "add $8, %%edi \n\t" "dec %%ecx \n\t"
00716 "jnz 1b \n\t"
00717 "emms \n\t"
00718 "popa \n\t":"=m" (Dest)
00719 :"m"(Src2),
00720 "m"(Src1),
00721 "m"(SrcLength)
00722 );
00723 #endif
00724 return (0);
00725 #else
00726 return (-1);
00727 #endif
00728 }
00729
00740 int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00741 {
00742 unsigned int i, istart;
00743 unsigned char *cursrc1, *cursrc2, *curdst;
00744 int result;
00745
00746
00747 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00748 return(-1);
00749 if (length == 0)
00750 return(0);
00751
00752 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
00753
00754 SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
00755
00756
00757 if ((length & 7) > 0) {
00758
00759 istart = length & 0xfffffff8;
00760 cursrc1 = &Src1[istart];
00761 cursrc2 = &Src2[istart];
00762 curdst = &Dest[istart];
00763 } else {
00764
00765 return (0);
00766 }
00767 } else {
00768
00769 istart = 0;
00770 cursrc1 = Src1;
00771 cursrc2 = Src2;
00772 curdst = Dest;
00773 }
00774
00775
00776 for (i = istart; i < length; i++) {
00777
00778
00779
00780 result = (int) *cursrc1 * (int) *cursrc2;
00781 if (result > 255)
00782 result = 255;
00783 *curdst = (unsigned char) result;
00784
00785 cursrc1++;
00786 cursrc2++;
00787 curdst++;
00788 }
00789
00790 return (0);
00791 }
00792
00803 int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00804 {
00805 #ifdef USE_MMX
00806 #if !defined(GCC__)
00807 __asm
00808 {
00809 pusha
00810 mov edx, Src1
00811 mov esi, Src2
00812 mov edi, Dest
00813 mov ecx, SrcLength
00814 align 16
00815 L10141:
00816 mov al, [edx]
00817 mul [esi]
00818 mov [edi], al
00819 inc edx
00820 inc esi
00821 inc edi
00822 dec ecx
00823 jnz L10141
00824 popa
00825 }
00826 #else
00827 asm volatile
00828 ("pusha \n\t" "mov %2, %%edx \n\t"
00829 "mov %1, %%esi \n\t"
00830 "mov %0, %%edi \n\t"
00831 "mov %3, %%ecx \n\t"
00832 ".align 16 \n\t"
00833 "1:mov (%%edx), %%al \n\t"
00834 "mulb (%%esi) \n\t"
00835 "mov %%al, (%%edi) \n\t"
00836 "inc %%edx \n\t"
00837 "inc %%esi \n\t"
00838 "inc %%edi \n\t" "dec %%ecx \n\t"
00839 "jnz 1b \n\t"
00840 "popa \n\t":"=m" (Dest)
00841 :"m"(Src2),
00842 "m"(Src1),
00843 "m"(SrcLength)
00844 );
00845 #endif
00846 return (0);
00847 #else
00848 return (-1);
00849 #endif
00850 }
00851
00862 int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
00863 {
00864 unsigned int i, istart;
00865 unsigned char *cursrc1, *cursrc2, *curdst;
00866 int result;
00867
00868
00869 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
00870 return(-1);
00871 if (length == 0)
00872 return(0);
00873
00874 if (SDL_imageFilterMMXdetect()) {
00875 if (length > 0) {
00876
00877 SDL_imageFilterMultNorASM(Src1, Src2, Dest, length);
00878
00879
00880 if ((length & 7) > 0) {
00881
00882 istart = length & 0xfffffff8;
00883 cursrc1 = &Src1[istart];
00884 cursrc2 = &Src2[istart];
00885 curdst = &Dest[istart];
00886 } else {
00887
00888 return (0);
00889 }
00890 } else {
00891
00892 return (0);
00893 }
00894 } else {
00895
00896 istart = 0;
00897 cursrc1 = Src1;
00898 cursrc2 = Src2;
00899 curdst = Dest;
00900 }
00901
00902
00903 for (i = istart; i < length; i++) {
00904 result = (int) *cursrc1 * (int) *cursrc2;
00905 *curdst = (unsigned char) result;
00906
00907 cursrc1++;
00908 cursrc2++;
00909 curdst++;
00910 }
00911
00912 return (0);
00913 }
00914
00925 int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
00926 {
00927 #ifdef USE_MMX
00928 #if !defined(GCC__)
00929 __asm
00930 {
00931 pusha
00932 mov eax, Src1
00933 mov ebx, Src2
00934 mov edi, Dest
00935 mov ecx, SrcLength
00936 shr ecx, 3
00937 pxor mm0, mm0
00938 align 16
00939 L1015:
00940 movq mm1, [eax]
00941 movq mm3, [ebx]
00942 movq mm2, mm1
00943 movq mm4, mm3
00944 punpcklbw mm1, mm0
00945 punpckhbw mm2, mm0
00946 punpcklbw mm3, mm0
00947 punpckhbw mm4, mm0
00948 psrlw mm1, 1
00949 psrlw mm2, 1
00950 pmullw mm1, mm3
00951 pmullw mm2, mm4
00952 packuswb mm1, mm2
00953 movq [edi], mm1
00954 add eax, 8
00955 add ebx, 8
00956 add edi, 8
00957 dec ecx
00958 jnz L1015
00959 emms
00960 popa
00961 }
00962 #else
00963 asm volatile
00964 ("pusha \n\t" "mov %2, %%eax \n\t"
00965 "mov %1, %%ebx \n\t"
00966 "mov %0, %%edi \n\t"
00967 "mov %3, %%ecx \n\t"
00968 "shr $3, %%ecx \n\t"
00969 "pxor %%mm0, %%mm0 \n\t"
00970 ".align 16 \n\t"
00971 "1: movq (%%eax), %%mm1 \n\t"
00972 "movq (%%ebx), %%mm3 \n\t"
00973 "movq %%mm1, %%mm2 \n\t"
00974 "movq %%mm3, %%mm4 \n\t"
00975 "punpcklbw %%mm0, %%mm1 \n\t"
00976 "punpckhbw %%mm0, %%mm2 \n\t"
00977 "punpcklbw %%mm0, %%mm3 \n\t"
00978 "punpckhbw %%mm0, %%mm4 \n\t"
00979 "psrlw $1, %%mm1 \n\t"
00980 "psrlw $1, %%mm2 \n\t"
00981 "pmullw %%mm3, %%mm1 \n\t"
00982 "pmullw %%mm4, %%mm2 \n\t"
00983 "packuswb %%mm2, %%mm1 \n\t"
00984 "movq %%mm1, (%%edi) \n\t"
00985 "add $8, %%eax \n\t"
00986 "add $8, %%ebx \n\t"
00987 "add $8, %%edi \n\t" "dec %%ecx \n\t"
00988 "jnz 1b \n\t"
00989 "emms \n\t"
00990 "popa \n\t":"=m" (Dest)
00991 :"m"(Src2),
00992 "m"(Src1),
00993 "m"(SrcLength)
00994 );
00995 #endif
00996 return (0);
00997 #else
00998 return (-1);
00999 #endif
01000 }
01001
01012 int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01013 {
01014 unsigned int i, istart;
01015 unsigned char *cursrc1, *cursrc2, *curdst;
01016 int result;
01017
01018
01019 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01020 return(-1);
01021 if (length == 0)
01022 return(0);
01023
01024 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01025
01026 SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
01027
01028
01029 if ((length & 7) > 0) {
01030
01031 istart = length & 0xfffffff8;
01032 cursrc1 = &Src1[istart];
01033 cursrc2 = &Src2[istart];
01034 curdst = &Dest[istart];
01035 } else {
01036
01037 return (0);
01038 }
01039 } else {
01040
01041 istart = 0;
01042 cursrc1 = Src1;
01043 cursrc2 = Src2;
01044 curdst = Dest;
01045 }
01046
01047
01048 for (i = istart; i < length; i++) {
01049 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
01050 if (result > 255)
01051 result = 255;
01052 *curdst = (unsigned char) result;
01053
01054 cursrc1++;
01055 cursrc2++;
01056 curdst++;
01057 }
01058
01059 return (0);
01060 }
01061
01072 int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01073 {
01074 #ifdef USE_MMX
01075 #if !defined(GCC__)
01076 __asm
01077 {
01078 pusha
01079 mov eax, Src1
01080 mov ebx, Src2
01081 mov edi, Dest
01082 mov ecx, SrcLength
01083 shr ecx, 3
01084 pxor mm0, mm0
01085 align 16
01086 L1016:
01087 movq mm1, [eax]
01088 movq mm3, [ebx]
01089 movq mm2, mm1
01090 movq mm4, mm3
01091 punpcklbw mm1, mm0
01092 punpckhbw mm2, mm0
01093 punpcklbw mm3, mm0
01094 punpckhbw mm4, mm0
01095 psrlw mm1, 1
01096 psrlw mm2, 1
01097 psrlw mm3, 1
01098 psrlw mm4, 1
01099 pmullw mm1, mm3
01100 pmullw mm2, mm4
01101 packuswb mm1, mm2
01102 movq [edi], mm1
01103 add eax, 8
01104 add ebx, 8
01105 add edi, 8
01106 dec ecx
01107 jnz L1016
01108 emms
01109 popa
01110 }
01111 #else
01112 asm volatile
01113 ("pusha \n\t" "mov %2, %%eax \n\t"
01114 "mov %1, %%ebx \n\t"
01115 "mov %0, %%edi \n\t"
01116 "mov %3, %%ecx \n\t"
01117 "shr $3, %%ecx \n\t"
01118 "pxor %%mm0, %%mm0 \n\t"
01119 ".align 16 \n\t"
01120 "1: movq (%%eax), %%mm1 \n\t"
01121 "movq (%%ebx), %%mm3 \n\t"
01122 "movq %%mm1, %%mm2 \n\t"
01123 "movq %%mm3, %%mm4 \n\t"
01124 "punpcklbw %%mm0, %%mm1 \n\t"
01125 "punpckhbw %%mm0, %%mm2 \n\t"
01126 "punpcklbw %%mm0, %%mm3 \n\t"
01127 "punpckhbw %%mm0, %%mm4 \n\t"
01128 "psrlw $1, %%mm1 \n\t"
01129 "psrlw $1, %%mm2 \n\t"
01130 "psrlw $1, %%mm3 \n\t"
01131 "psrlw $1, %%mm4 \n\t"
01132 "pmullw %%mm3, %%mm1 \n\t"
01133 "pmullw %%mm4, %%mm2 \n\t"
01134 "packuswb %%mm2, %%mm1 \n\t"
01135 "movq %%mm1, (%%edi) \n\t"
01136 "add $8, %%eax \n\t"
01137 "add $8, %%ebx \n\t"
01138 "add $8, %%edi \n\t" "dec %%ecx \n\t"
01139 "jnz 1b \n\t"
01140 "emms \n\t"
01141 "popa \n\t":"=m" (Dest)
01142 :"m"(Src2),
01143 "m"(Src1),
01144 "m"(SrcLength)
01145 );
01146 #endif
01147 return (0);
01148 #else
01149 return (-1);
01150 #endif
01151 }
01152
01163 int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01164 {
01165 unsigned int i, istart;
01166 unsigned char *cursrc1, *cursrc2, *curdst;
01167 int result;
01168
01169
01170 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01171 return(-1);
01172 if (length == 0)
01173 return(0);
01174
01175 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01176
01177 SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
01178
01179
01180 if ((length & 7) > 0) {
01181
01182 istart = length & 0xfffffff8;
01183 cursrc1 = &Src1[istart];
01184 cursrc2 = &Src2[istart];
01185 curdst = &Dest[istart];
01186 } else {
01187
01188 return (0);
01189 }
01190 } else {
01191
01192 istart = 0;
01193 cursrc1 = Src1;
01194 cursrc2 = Src2;
01195 curdst = Dest;
01196 }
01197
01198
01199 for (i = istart; i < length; i++) {
01200 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
01201 if (result > 255)
01202 result = 255;
01203 *curdst = (unsigned char) result;
01204
01205 cursrc1++;
01206 cursrc2++;
01207 curdst++;
01208 }
01209
01210 return (0);
01211 }
01212
01223 int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01224 {
01225 #ifdef USE_MMX
01226 #if !defined(GCC__)
01227 __asm
01228 {
01229 pusha
01230 mov eax, Src1
01231 mov ebx, Src2
01232 mov edi, Dest
01233 mov ecx, SrcLength
01234 shr ecx, 3
01235 align 16
01236 L1017:
01237 movq mm1, [eax]
01238 pand mm1, [ebx]
01239 movq [edi], mm1
01240 add eax, 8
01241 add ebx, 8
01242 add edi, 8
01243 dec ecx
01244 jnz L1017
01245 emms
01246 popa
01247 }
01248 #else
01249 asm volatile
01250 ("pusha \n\t" "mov %2, %%eax \n\t"
01251 "mov %1, %%ebx \n\t"
01252 "mov %0, %%edi \n\t"
01253 "mov %3, %%ecx \n\t"
01254 "shr $3, %%ecx \n\t"
01255 ".align 16 \n\t"
01256 "1: movq (%%eax), %%mm1 \n\t"
01257 "pand (%%ebx), %%mm1 \n\t"
01258 "movq %%mm1, (%%edi) \n\t"
01259 "add $8, %%eax \n\t"
01260 "add $8, %%ebx \n\t"
01261 "add $8, %%edi \n\t" "dec %%ecx \n\t"
01262 "jnz 1b \n\t"
01263 "emms \n\t"
01264 "popa \n\t":"=m" (Dest)
01265 :"m"(Src2),
01266 "m"(Src1),
01267 "m"(SrcLength)
01268 );
01269 #endif
01270 return (0);
01271 #else
01272 return (-1);
01273 #endif
01274 }
01275
01286 int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01287 {
01288 unsigned int i, istart;
01289 unsigned char *cursrc1, *cursrc2, *curdst;
01290
01291
01292 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01293 return(-1);
01294 if (length == 0)
01295 return(0);
01296
01297 if ((SDL_imageFilterMMXdetect()>0) && (length>7)) {
01298
01299
01300
01301 SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
01302
01303
01304 if ((length & 7) > 0) {
01305
01306
01307 istart = length & 0xfffffff8;
01308 cursrc1 = &Src1[istart];
01309 cursrc2 = &Src2[istart];
01310 curdst = &Dest[istart];
01311 } else {
01312
01313 return (0);
01314 }
01315 } else {
01316
01317 istart = 0;
01318 cursrc1 = Src1;
01319 cursrc2 = Src2;
01320 curdst = Dest;
01321 }
01322
01323
01324 for (i = istart; i < length; i++) {
01325 *curdst = (*cursrc1) & (*cursrc2);
01326
01327 cursrc1++;
01328 cursrc2++;
01329 curdst++;
01330 }
01331
01332 return (0);
01333 }
01334
01345 int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01346 {
01347 #ifdef USE_MMX
01348 #if !defined(GCC__)
01349 __asm
01350 {
01351 pusha
01352 mov eax, Src1
01353 mov ebx, Src2
01354 mov edi, Dest
01355 mov ecx, SrcLength
01356 shr ecx, 3
01357 align 16
01358 L91017:
01359 movq mm1, [eax]
01360 por mm1, [ebx]
01361 movq [edi], mm1
01362 add eax, 8
01363 add ebx, 8
01364 add edi, 8
01365 dec ecx
01366 jnz L91017
01367 emms
01368 popa
01369 }
01370 #else
01371 asm volatile
01372 ("pusha \n\t" "mov %2, %%eax \n\t"
01373 "mov %1, %%ebx \n\t"
01374 "mov %0, %%edi \n\t"
01375 "mov %3, %%ecx \n\t"
01376 "shr $3, %%ecx \n\t"
01377 ".align 16 \n\t"
01378 "1: movq (%%eax), %%mm1 \n\t"
01379 "por (%%ebx), %%mm1 \n\t"
01380 "movq %%mm1, (%%edi) \n\t"
01381 "add $8, %%eax \n\t"
01382 "add $8, %%ebx \n\t"
01383 "add $8, %%edi \n\t" "dec %%ecx \n\t"
01384 "jnz 1b \n\t"
01385 "emms \n\t"
01386 "popa \n\t":"=m" (Dest)
01387 :"m"(Src2),
01388 "m"(Src1),
01389 "m"(SrcLength)
01390 );
01391 #endif
01392 return (0);
01393 #else
01394 return (-1);
01395 #endif
01396 }
01397
01408 int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01409 {
01410 unsigned int i, istart;
01411 unsigned char *cursrc1, *cursrc2, *curdst;
01412
01413
01414 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01415 return(-1);
01416 if (length == 0)
01417 return(0);
01418
01419 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01420
01421
01422 SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
01423
01424
01425 if ((length & 7) > 0) {
01426
01427 istart = length & 0xfffffff8;
01428 cursrc1 = &Src1[istart];
01429 cursrc2 = &Src2[istart];
01430 curdst = &Dest[istart];
01431 } else {
01432
01433 return (0);
01434 }
01435 } else {
01436
01437 istart = 0;
01438 cursrc1 = Src1;
01439 cursrc2 = Src2;
01440 curdst = Dest;
01441 }
01442
01443
01444 for (i = istart; i < length; i++) {
01445 *curdst = *cursrc1 | *cursrc2;
01446
01447 cursrc1++;
01448 cursrc2++;
01449 curdst++;
01450 }
01451 return (0);
01452 }
01453
01464 int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
01465 {
01466 #ifdef USE_MMX
01467 #if !defined(GCC__)
01468 __asm
01469 {
01470 pusha
01471 mov edx, Src1
01472 mov esi, Src2
01473 mov edi, Dest
01474 mov ecx, SrcLength
01475 align 16
01476 L10191:
01477 mov bl, [esi]
01478 cmp bl, 0
01479 jnz L10192
01480 mov [edi], 255
01481 jmp L10193
01482 L10192:
01483 xor ah, ah
01484 mov al, [edx]
01485 div bl
01486 mov [edi], al
01487 L10193:
01488 inc edx
01489 inc esi
01490 inc edi
01491 dec ecx
01492 jnz L10191
01493 popa
01494 }
01495 #else
01496 asm volatile
01497 ("pusha \n\t" "mov %2, %%edx \n\t"
01498 "mov %1, %%esi \n\t"
01499 "mov %0, %%edi \n\t"
01500 "mov %3, %%ecx \n\t"
01501 ".align 16 \n\t"
01502 "1: mov (%%esi), %%bl \n\t"
01503 "cmp $0, %%bl \n\t"
01504 "jnz 2f \n\t" "movb $255, (%%edi) \n\t"
01505 "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t"
01506 "mov (%%edx), %%al \n\t"
01507 "div %%bl \n\t"
01508 "mov %%al, (%%edi) \n\t"
01509 "3: inc %%edx \n\t"
01510 "inc %%esi \n\t"
01511 "inc %%edi \n\t" "dec %%ecx \n\t"
01512 "jnz 1b \n\t"
01513 "popa \n\t":"=m" (Dest)
01514 :"m"(Src2),
01515 "m"(Src1),
01516 "m"(SrcLength)
01517 );
01518 #endif
01519 return (0);
01520 #else
01521 return (-1);
01522 #endif
01523 }
01524
01535 int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
01536 {
01537 unsigned int i, istart;
01538 unsigned char *cursrc1, *cursrc2, *curdst;
01539 int result;
01540
01541
01542 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
01543 return(-1);
01544 if (length == 0)
01545 return(0);
01546
01547 if (SDL_imageFilterMMXdetect()) {
01548 if (length > 0) {
01549
01550 SDL_imageFilterDivASM(Src1, Src2, Dest, length);
01551
01552
01553 return (0);
01554 } else {
01555 return (-1);
01556 }
01557 } else {
01558
01559 istart = 0;
01560 cursrc1 = Src1;
01561 cursrc2 = Src2;
01562 curdst = Dest;
01563 }
01564
01565
01566 for (i = istart; i < length; i++) {
01567 result = (int) *cursrc1 / (int) *cursrc2;
01568 *curdst = (unsigned char) result;
01569
01570 cursrc1++;
01571 cursrc2++;
01572 curdst++;
01573 }
01574
01575 return (0);
01576 }
01577
01578
01579
01589 int SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength)
01590 {
01591 #ifdef USE_MMX
01592 #if !defined(GCC__)
01593 __asm
01594 {
01595 pusha
01596 pcmpeqb mm1, mm1
01597 mov eax, Src1
01598 mov edi, Dest
01599 mov ecx, SrcLength
01600 shr ecx, 3
01601 align 16
01602 L91117:
01603 movq mm0, [eax]
01604 pxor mm0, mm1
01605 movq [edi], mm0
01606 add eax, 8
01607 add edi, 8
01608 dec ecx
01609 jnz L91117
01610 emms
01611 popa
01612 }
01613 #else
01614 asm volatile
01615 ("pusha \n\t" "pcmpeqb %%mm1, %%mm1 \n\t"
01616 "mov %1, %%eax \n\t"
01617 "mov %0, %%edi \n\t"
01618 "mov %2, %%ecx \n\t"
01619 "shr $3, %%ecx \n\t"
01620 ".align 16 \n\t"
01621 "1: movq (%%eax), %%mm0 \n\t"
01622 "pxor %%mm1, %%mm0 \n\t"
01623 "movq %%mm0, (%%edi) \n\t"
01624 "add $8, %%eax \n\t"
01625 "add $8, %%edi \n\t" "dec %%ecx \n\t"
01626 "jnz 1b \n\t"
01627 "emms \n\t"
01628 "popa \n\t":"=m" (Dest)
01629 :"m"(Src1),
01630 "m"(SrcLength)
01631 );
01632 #endif
01633 return (0);
01634 #else
01635 return (-1);
01636 #endif
01637 }
01638
01648 int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length)
01649 {
01650 unsigned int i, istart;
01651 unsigned char *cursrc1, *curdst;
01652
01653
01654 if ((Src1 == NULL) || (Dest == NULL))
01655 return(-1);
01656 if (length == 0)
01657 return(0);
01658
01659 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01660
01661 SDL_imageFilterBitNegationMMX(Src1, Dest, length);
01662
01663
01664 if ((length & 7) > 0) {
01665
01666 istart = length & 0xfffffff8;
01667 cursrc1 = &Src1[istart];
01668 curdst = &Dest[istart];
01669 } else {
01670
01671 return (0);
01672 }
01673 } else {
01674
01675 istart = 0;
01676 cursrc1 = Src1;
01677 curdst = Dest;
01678 }
01679
01680
01681 for (i = istart; i < length; i++) {
01682 *curdst = ~(*cursrc1);
01683
01684 cursrc1++;
01685 curdst++;
01686 }
01687
01688 return (0);
01689 }
01690
01701 int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
01702 {
01703 #ifdef USE_MMX
01704 #if !defined(GCC__)
01705 __asm
01706 {
01707 pusha
01708
01709 mov al, C
01710 mov ah, al
01711 mov bx, ax
01712 shl eax, 16
01713 mov ax, bx
01714 movd mm1, eax
01715 movd mm2, eax
01716 punpckldq mm1, mm2
01717 mov eax, Src1
01718 mov edi, Dest
01719 mov ecx, SrcLength
01720 shr ecx, 3
01721 align 16
01722 L1021:
01723 movq mm0, [eax]
01724 paddusb mm0, mm1
01725 movq [edi], mm0
01726 add eax, 8
01727 add edi, 8
01728 dec ecx
01729 jnz L1021
01730 emms
01731 popa
01732 }
01733 #else
01734 asm volatile
01735 ("pusha \n\t"
01736
01737 "mov %3, %%al \n\t"
01738 "mov %%al, %%ah \n\t"
01739 "mov %%ax, %%bx \n\t"
01740 "shl $16, %%eax \n\t"
01741 "mov %%bx, %%ax \n\t"
01742 "movd %%eax, %%mm1 \n\t"
01743 "movd %%eax, %%mm2 \n\t"
01744 "punpckldq %%mm2, %%mm1 \n\t"
01745 "mov %1, %%eax \n\t"
01746 "mov %0, %%edi \n\t"
01747 "mov %2, %%ecx \n\t"
01748 "shr $3, %%ecx \n\t"
01749 ".align 16 \n\t"
01750 "1: \n\t"
01751 "movq (%%eax), %%mm0 \n\t"
01752 "paddusb %%mm1, %%mm0 \n\t"
01753 "movq %%mm0, (%%edi) \n\t"
01754 "add $8, %%eax \n\t"
01755 "add $8, %%edi \n\t"
01756 "dec %%ecx \n\t"
01757 "jnz 1b \n\t"
01758 "emms \n\t"
01759 "popa \n\t":"=m" (Dest)
01760 :"m"(Src1),
01761 "m"(SrcLength),
01762 "m"(C)
01763 );
01764 #endif
01765 return (0);
01766 #else
01767 return (-1);
01768 #endif
01769 }
01770
01782 int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
01783 {
01784 unsigned int i, istart;
01785 int iC;
01786 unsigned char *cursrc1, *curdest;
01787 int result;
01788
01789
01790 if ((Src1 == NULL) || (Dest == NULL))
01791 return(-1);
01792 if (length == 0)
01793 return(0);
01794
01795
01796 if (C == 0) {
01797 memcpy(Src1, Dest, length);
01798 return (0);
01799 }
01800
01801 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01802
01803
01804 SDL_imageFilterAddByteMMX(Src1, Dest, length, C);
01805
01806
01807 if ((length & 7) > 0) {
01808
01809 istart = length & 0xfffffff8;
01810 cursrc1 = &Src1[istart];
01811 curdest = &Dest[istart];
01812 } else {
01813
01814 return (0);
01815 }
01816 } else {
01817
01818 istart = 0;
01819 cursrc1 = Src1;
01820 curdest = Dest;
01821 }
01822
01823
01824 iC = (int) C;
01825 for (i = istart; i < length; i++) {
01826 result = (int) *cursrc1 + iC;
01827 if (result > 255)
01828 result = 255;
01829 *curdest = (unsigned char) result;
01830
01831 cursrc1++;
01832 curdest++;
01833 }
01834 return (0);
01835 }
01836
01848 int SDL_imageFilterAddUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
01849 {
01850 #ifdef USE_MMX
01851 #if !defined(GCC__)
01852 __asm
01853 {
01854 pusha
01855
01856 mov eax, C
01857 movd mm1, eax
01858 mov eax, D
01859 movd mm2, eax
01860 punpckldq mm1, mm2
01861 mov eax, Src1
01862 mov edi, Dest
01863 mov ecx, SrcLength
01864 shr ecx, 3
01865 align 16
01866 L11023:
01867 movq mm0, [eax]
01868 paddusb mm0, mm1
01869 movq [edi], mm0
01870 add eax, 8
01871 add edi, 8
01872 dec ecx
01873 jnz L11023
01874 emms
01875 popa
01876 }
01877 #else
01878 asm volatile
01879 ("pusha \n\t"
01880
01881 "mov %3, %%eax \n\t"
01882 "movd %%eax, %%mm1 \n\t"
01883 "mov %4, %%eax \n\t"
01884 "movd %%eax, %%mm2 \n\t"
01885 "punpckldq %%mm2, %%mm1 \n\t"
01886 "mov %1, %%eax \n\t"
01887 "mov %0, %%edi \n\t"
01888 "mov %2, %%ecx \n\t"
01889 "shr $3, %%ecx \n\t"
01890 ".align 16 \n\t"
01891 "1: \n\t"
01892 "movq (%%eax), %%mm0 \n\t"
01893 "paddusb %%mm1, %%mm0 \n\t"
01894 "movq %%mm0, (%%edi) \n\t"
01895 "add $8, %%eax \n\t"
01896 "add $8, %%edi \n\t"
01897 "dec %%ecx \n\t"
01898 "jnz 1b \n\t"
01899 "emms \n\t"
01900 "popa \n\t":"=m" (Dest)
01901 :"m"(Src1),
01902 "m"(SrcLength),
01903 "m"(C),
01904 "m"(D)
01905 );
01906 #endif
01907 return (0);
01908 #else
01909 return (-1);
01910 #endif
01911 }
01912
01923 int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
01924 {
01925 unsigned int i, j, istart, D;
01926 int iC[4];
01927 unsigned char *cursrc1;
01928 unsigned char *curdest;
01929 int result;
01930
01931
01932 if ((Src1 == NULL) || (Dest == NULL))
01933 return(-1);
01934 if (length == 0)
01935 return(0);
01936
01937
01938 if (C == 0) {
01939 memcpy(Src1, Dest, length);
01940 return (0);
01941 }
01942
01943 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
01944
01945
01946 D=SWAP_32(C);
01947 SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D);
01948
01949
01950 if ((length & 7) > 0) {
01951
01952 istart = length & 0xfffffff8;
01953 cursrc1 = &Src1[istart];
01954 curdest = &Dest[istart];
01955 } else {
01956
01957 return (0);
01958 }
01959 } else {
01960
01961 istart = 0;
01962 cursrc1 = Src1;
01963 curdest = Dest;
01964 }
01965
01966
01967 iC[3] = (int) ((C >> 24) & 0xff);
01968 iC[2] = (int) ((C >> 16) & 0xff);
01969 iC[1] = (int) ((C >> 8) & 0xff);
01970 iC[0] = (int) ((C >> 0) & 0xff);
01971 for (i = istart; i < length; i += 4) {
01972 for (j = 0; j < 4; j++) {
01973 if ((i+j)<length) {
01974 result = (int) *cursrc1 + iC[j];
01975 if (result > 255) result = 255;
01976 *curdest = (unsigned char) result;
01977
01978 cursrc1++;
01979 curdest++;
01980 }
01981 }
01982 }
01983 return (0);
01984 }
01985
01997 int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C,
01998 unsigned char *Mask)
01999 {
02000 #ifdef USE_MMX
02001 #if !defined(GCC__)
02002 __asm
02003 {
02004 pusha
02005
02006 mov al, C
02007 mov ah, al
02008 mov bx, ax
02009 shl eax, 16
02010 mov ax, bx
02011 movd mm1, eax
02012 movd mm2, eax
02013 punpckldq mm1, mm2
02014 mov edx, Mask
02015 movq mm0, [edx]
02016 mov eax, Src1
02017 mov edi, Dest
02018 mov ecx, SrcLength
02019 shr ecx, 3
02020 align 16
02021 L1022:
02022 movq mm2, [eax]
02023 psrlw mm2, 1
02024 pand mm2, mm0
02025
02026 paddusb mm2, mm1
02027 movq [edi], mm2
02028 add eax, 8
02029 add edi, 8
02030 dec ecx
02031 jnz L1022
02032 emms
02033 popa
02034 }
02035 #else
02036 asm volatile
02037 ("pusha \n\t"
02038
02039 "mov %3, %%al \n\t"
02040 "mov %%al, %%ah \n\t"
02041 "mov %%ax, %%bx \n\t"
02042 "shl $16, %%eax \n\t"
02043 "mov %%bx, %%ax \n\t"
02044 "movd %%eax, %%mm1 \n\t"
02045 "movd %%eax, %%mm2 \n\t"
02046 "punpckldq %%mm2, %%mm1 \n\t"
02047 "movl %4, %%edx \n\t"
02048 "movq (%%edx), %%mm0 \n\t"
02049 "mov %1, %%eax \n\t"
02050 "mov %0, %%edi \n\t"
02051 "mov %2, %%ecx \n\t"
02052 "shr $3, %%ecx \n\t"
02053 ".align 16 \n\t"
02054 "1: \n\t"
02055 "movq (%%eax), %%mm2 \n\t"
02056 "psrlw $1, %%mm2 \n\t"
02057
02058 ".byte 0x0f, 0xdb, 0xd0 \n\t"
02059 "paddusb %%mm1, %%mm2 \n\t"
02060 "movq %%mm2, (%%edi) \n\t"
02061 "add $8, %%eax \n\t"
02062 "add $8, %%edi \n\t"
02063 "dec %%ecx \n\t"
02064 "jnz 1b \n\t"
02065 "emms \n\t"
02066 "popa \n\t":"=m" (Dest)
02067 :"m"(Src1),
02068 "m"(SrcLength),
02069 "m"(C),
02070 "m"(Mask)
02071 );
02072 #endif
02073 return (0);
02074 #else
02075 return (-1);
02076 #endif
02077 }
02078
02089 int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02090 {
02091 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
02092 unsigned int i, istart;
02093 int iC;
02094 unsigned char *cursrc1;
02095 unsigned char *curdest;
02096 int result;
02097
02098
02099 if ((Src1 == NULL) || (Dest == NULL))
02100 return(-1);
02101 if (length == 0)
02102 return(0);
02103
02104 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02105
02106
02107 SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask);
02108
02109
02110 if ((length & 7) > 0) {
02111
02112 istart = length & 0xfffffff8;
02113 cursrc1 = &Src1[istart];
02114 curdest = &Dest[istart];
02115 } else {
02116
02117 return (0);
02118 }
02119 } else {
02120
02121 istart = 0;
02122 cursrc1 = Src1;
02123 curdest = Dest;
02124 }
02125
02126
02127 iC = (int) C;
02128 for (i = istart; i < length; i++) {
02129 result = (int) (*cursrc1 / 2) + iC;
02130 if (result > 255)
02131 result = 255;
02132 *curdest = (unsigned char) result;
02133
02134 cursrc1++;
02135 curdest++;
02136 }
02137
02138 return (0);
02139 }
02140
02151 int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
02152 {
02153 #ifdef USE_MMX
02154 #if !defined(GCC__)
02155 __asm
02156 {
02157 pusha
02158
02159 mov al, C
02160 mov ah, al
02161 mov bx, ax
02162 shl eax, 16
02163 mov ax, bx
02164 movd mm1, eax
02165 movd mm2, eax
02166 punpckldq mm1, mm2
02167 mov eax, Src1
02168 mov edi, Dest
02169 mov ecx, SrcLength
02170 shr ecx, 3
02171 align 16
02172 L1023:
02173 movq mm0, [eax]
02174 psubusb mm0, mm1
02175 movq [edi], mm0
02176 add eax, 8
02177 add edi, 8
02178 dec ecx
02179 jnz L1023
02180 emms
02181 popa
02182 }
02183 #else
02184 asm volatile
02185 ("pusha \n\t"
02186
02187 "mov %3, %%al \n\t"
02188 "mov %%al, %%ah \n\t"
02189 "mov %%ax, %%bx \n\t"
02190 "shl $16, %%eax \n\t"
02191 "mov %%bx, %%ax \n\t"
02192 "movd %%eax, %%mm1 \n\t"
02193 "movd %%eax, %%mm2 \n\t"
02194 "punpckldq %%mm2, %%mm1 \n\t"
02195 "mov %1, %%eax \n\t"
02196 "mov %0, %%edi \n\t"
02197 "mov %2, %%ecx \n\t"
02198 "shr $3, %%ecx \n\t"
02199 ".align 16 \n\t"
02200 "1: movq (%%eax), %%mm0 \n\t"
02201 "psubusb %%mm1, %%mm0 \n\t"
02202 "movq %%mm0, (%%edi) \n\t"
02203 "add $8, %%eax \n\t"
02204 "add $8, %%edi \n\t"
02205 "dec %%ecx \n\t"
02206 "jnz 1b \n\t"
02207 "emms \n\t"
02208 "popa \n\t":"=m" (Dest)
02209 :"m"(Src1),
02210 "m"(SrcLength),
02211 "m"(C)
02212 );
02213 #endif
02214 return (0);
02215 #else
02216 return (-1);
02217 #endif
02218 }
02219
02230 int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02231 {
02232 unsigned int i, istart;
02233 int iC;
02234 unsigned char *cursrc1;
02235 unsigned char *curdest;
02236 int result;
02237
02238
02239 if ((Src1 == NULL) || (Dest == NULL))
02240 return(-1);
02241 if (length == 0)
02242 return(0);
02243
02244
02245 if (C == 0) {
02246 memcpy(Src1, Dest, length);
02247 return (0);
02248 }
02249
02250 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02251
02252
02253 SDL_imageFilterSubByteMMX(Src1, Dest, length, C);
02254
02255
02256 if ((length & 7) > 0) {
02257
02258 istart = length & 0xfffffff8;
02259 cursrc1 = &Src1[istart];
02260 curdest = &Dest[istart];
02261 } else {
02262
02263 return (0);
02264 }
02265 } else {
02266
02267 istart = 0;
02268 cursrc1 = Src1;
02269 curdest = Dest;
02270 }
02271
02272
02273 iC = (int) C;
02274 for (i = istart; i < length; i++) {
02275 result = (int) *cursrc1 - iC;
02276 if (result < 0)
02277 result = 0;
02278 *curdest = (unsigned char) result;
02279
02280 cursrc1++;
02281 curdest++;
02282 }
02283 return (0);
02284 }
02285
02297 int SDL_imageFilterSubUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D)
02298 {
02299 #ifdef USE_MMX
02300 #if !defined(GCC__)
02301 __asm
02302 {
02303 pusha
02304
02305 mov eax, C
02306 movd mm1, eax
02307 mov eax, D
02308 movd mm2, eax
02309 punpckldq mm1, mm2
02310 mov eax, Src1
02311 mov edi, Dest
02312 mov ecx, SrcLength
02313 shr ecx, 3
02314 align 16
02315 L11024:
02316 movq mm0, [eax]
02317 psubusb mm0, mm1
02318 movq [edi], mm0
02319 add eax, 8
02320 add edi, 8
02321 dec ecx
02322 jnz L11024
02323 emms
02324 popa
02325 }
02326 #else
02327 asm volatile
02328 ("pusha \n\t"
02329
02330 "mov %3, %%eax \n\t"
02331 "movd %%eax, %%mm1 \n\t"
02332 "mov %4, %%eax \n\t"
02333 "movd %%eax, %%mm2 \n\t"
02334 "punpckldq %%mm2, %%mm1 \n\t"
02335 "mov %1, %%eax \n\t"
02336 "mov %0, %%edi \n\t"
02337 "mov %2, %%ecx \n\t"
02338 "shr $3, %%ecx \n\t"
02339 ".align 16 \n\t"
02340 "1: movq (%%eax), %%mm0 \n\t"
02341 "psubusb %%mm1, %%mm0 \n\t"
02342 "movq %%mm0, (%%edi) \n\t"
02343 "add $8, %%eax \n\t"
02344 "add $8, %%edi \n\t"
02345 "dec %%ecx \n\t"
02346 "jnz 1b \n\t"
02347 "emms \n\t"
02348 "popa \n\t":"=m" (Dest)
02349 :"m"(Src1),
02350 "m"(SrcLength),
02351 "m"(C),
02352 "m"(D)
02353 );
02354 #endif
02355 return (0);
02356 #else
02357 return (-1);
02358 #endif
02359 }
02360
02371 int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
02372 {
02373 unsigned int i, j, istart, D;
02374 int iC[4];
02375 unsigned char *cursrc1;
02376 unsigned char *curdest;
02377 int result;
02378
02379
02380 if ((Src1 == NULL) || (Dest == NULL))
02381 return(-1);
02382 if (length == 0)
02383 return(0);
02384
02385
02386 if (C == 0) {
02387 memcpy(Src1, Dest, length);
02388 return (0);
02389 }
02390
02391 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02392
02393
02394 D=SWAP_32(C);
02395 SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D);
02396
02397
02398 if ((length & 7) > 0) {
02399
02400 istart = length & 0xfffffff8;
02401 cursrc1 = &Src1[istart];
02402 curdest = &Dest[istart];
02403 } else {
02404
02405 return (0);
02406 }
02407 } else {
02408
02409 istart = 0;
02410 cursrc1 = Src1;
02411 curdest = Dest;
02412 }
02413
02414
02415 iC[3] = (int) ((C >> 24) & 0xff);
02416 iC[2] = (int) ((C >> 16) & 0xff);
02417 iC[1] = (int) ((C >> 8) & 0xff);
02418 iC[0] = (int) ((C >> 0) & 0xff);
02419 for (i = istart; i < length; i += 4) {
02420 for (j = 0; j < 4; j++) {
02421 if ((i+j)<length) {
02422 result = (int) *cursrc1 - iC[j];
02423 if (result < 0) result = 0;
02424 *curdest = (unsigned char) result;
02425
02426 cursrc1++;
02427 curdest++;
02428 }
02429 }
02430 }
02431 return (0);
02432 }
02433
02445 int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
02446 unsigned char *Mask)
02447 {
02448 #ifdef USE_MMX
02449 #if !defined(GCC__)
02450 __asm
02451 {
02452 pusha
02453 mov edx, Mask
02454 movq mm0, [edx]
02455 xor ecx, ecx
02456 mov cl, N
02457 movd mm3, ecx
02458 pcmpeqb mm1, mm1
02459 L10240:
02460 psrlw mm1, 1
02461 pand mm1, mm0
02462
02463 dec cl
02464 jnz L10240
02465
02466 mov eax, Src1
02467 mov edi, Dest
02468 mov ecx, SrcLength
02469 shr ecx, 3
02470 align 16
02471 L10241:
02472 movq mm0, [eax]
02473 psrlw mm0, mm3
02474 pand mm0, mm1
02475
02476 movq [edi], mm0
02477 add eax, 8
02478 add edi, 8
02479 dec ecx
02480 jnz L10241
02481 emms
02482 popa
02483 }
02484 #else
02485 asm volatile
02486 ("pusha \n\t" "movl %4, %%edx \n\t"
02487 "movq (%%edx), %%mm0 \n\t"
02488 "xor %%ecx, %%ecx \n\t"
02489 "mov %3, %%cl \n\t"
02490 "movd %%ecx, %%mm3 \n\t"
02491 "pcmpeqb %%mm1, %%mm1 \n\t"
02492 "1: \n\t"
02493 "psrlw $1, %%mm1 \n\t"
02494
02495 ".byte 0x0f, 0xdb, 0xc8 \n\t"
02496 "dec %%cl \n\t"
02497 "jnz 1b \n\t"
02498
02499 "mov %1, %%eax \n\t"
02500 "mov %0, %%edi \n\t"
02501 "mov %2, %%ecx \n\t"
02502 "shr $3, %%ecx \n\t"
02503 ".align 16 \n\t"
02504 "2: \n\t"
02505 "movq (%%eax), %%mm0 \n\t"
02506 "psrlw %%mm3, %%mm0 \n\t"
02507
02508 ".byte 0x0f, 0xdb, 0xc1 \n\t"
02509 "movq %%mm0, (%%edi) \n\t"
02510 "add $8, %%eax \n\t"
02511 "add $8, %%edi \n\t"
02512 "dec %%ecx \n\t"
02513 "jnz 2b \n\t"
02514 "emms \n\t"
02515 "popa \n\t":"=m" (Dest)
02516 :"m"(Src1),
02517 "m"(SrcLength),
02518 "m"(N),
02519 "m"(Mask)
02520 );
02521 #endif
02522 return (0);
02523 #else
02524 return (-1);
02525 #endif
02526 }
02527
02538 int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
02539 {
02540 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
02541 unsigned int i, istart;
02542 unsigned char *cursrc1;
02543 unsigned char *curdest;
02544
02545
02546 if ((Src1 == NULL) || (Dest == NULL))
02547 return(-1);
02548 if (length == 0)
02549 return(0);
02550
02551
02552 if (N > 8) {
02553 return (-1);
02554 }
02555
02556
02557 if (N == 0) {
02558 memcpy(Src1, Dest, length);
02559 return (0);
02560 }
02561
02562 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02563
02564
02565 SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask);
02566
02567
02568 if ((length & 7) > 0) {
02569
02570 istart = length & 0xfffffff8;
02571 cursrc1 = &Src1[istart];
02572 curdest = &Dest[istart];
02573 } else {
02574
02575 return (0);
02576 }
02577 } else {
02578
02579 istart = 0;
02580 cursrc1 = Src1;
02581 curdest = Dest;
02582 }
02583
02584
02585 for (i = istart; i < length; i++) {
02586 *curdest = (unsigned char) *cursrc1 >> N;
02587
02588 cursrc1++;
02589 curdest++;
02590 }
02591
02592 return (0);
02593 }
02594
02605 int SDL_imageFilterShiftRightUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
02606 {
02607 #ifdef USE_MMX
02608 #if !defined(GCC__)
02609 __asm
02610 {
02611 pusha
02612 mov eax, Src1
02613 mov edi, Dest
02614 mov ecx, SrcLength
02615 shr ecx, 3
02616 align 16
02617 L13023:
02618 movq mm0, [eax]
02619 psrld mm0, N
02620 movq [edi], mm0
02621 add eax, 8
02622 add edi, 8
02623 dec ecx
02624 jnz L13023
02625 emms
02626 popa
02627 }
02628 #else
02629 asm volatile
02630 ("pusha \n\t"
02631 "mov %1, %%eax \n\t"
02632 "mov %0, %%edi \n\t"
02633 "mov %2, %%ecx \n\t"
02634 "shr $3, %%ecx \n\t"
02635 ".align 16 \n\t"
02636 "1: movq (%%eax), %%mm0 \n\t"
02637 "psrld %3, %%mm0 \n\t"
02638 "movq %%mm0, (%%edi) \n\t"
02639 "add $8, %%eax \n\t"
02640 "add $8, %%edi \n\t"
02641 "dec %%ecx \n\t"
02642 "jnz 1b \n\t"
02643 "emms \n\t"
02644 "popa \n\t":"=m" (Dest)
02645 :"m"(Src1),
02646 "m"(SrcLength),
02647 "m"(N)
02648 );
02649 #endif
02650 return (0);
02651 #else
02652 return (-1);
02653 #endif
02654 }
02655
02666 int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
02667 {
02668 unsigned int i, istart;
02669 unsigned char *cursrc1, *curdest;
02670 unsigned int *icursrc1, *icurdest;
02671 int result;
02672
02673
02674 if ((Src1 == NULL) || (Dest == NULL))
02675 return(-1);
02676 if (length == 0)
02677 return(0);
02678
02679 if (N > 32) {
02680 return (-1);
02681 }
02682
02683
02684 if (N == 0) {
02685 memcpy(Src1, Dest, length);
02686 return (0);
02687 }
02688
02689 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02690
02691 SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N);
02692
02693
02694 if ((length & 7) > 0) {
02695
02696 istart = length & 0xfffffff8;
02697 cursrc1 = &Src1[istart];
02698 curdest = &Dest[istart];
02699 } else {
02700
02701 return (0);
02702 }
02703 } else {
02704
02705 istart = 0;
02706 cursrc1 = Src1;
02707 curdest = Dest;
02708 }
02709
02710
02711 icursrc1=(unsigned int *)cursrc1;
02712 icurdest=(unsigned int *)curdest;
02713 for (i = istart; i < length; i += 4) {
02714 if ((i+4)<length) {
02715 result = ((unsigned int)*icursrc1 >> N);
02716 *icurdest = (unsigned int)result;
02717 }
02718
02719 icursrc1++;
02720 icurdest++;
02721 }
02722
02723 return (0);
02724 }
02725
02736 int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
02737 {
02738 #ifdef USE_MMX
02739 #if !defined(GCC__)
02740 __asm
02741 {
02742 pusha
02743
02744 mov al, C
02745 xor ah, ah
02746 mov bx, ax
02747 shl eax, 16
02748 mov ax, bx
02749 movd mm1, eax
02750 movd mm2, eax
02751 punpckldq mm1, mm2
02752 pxor mm0, mm0
02753 mov eax, Src1
02754 mov edi, Dest
02755 mov ecx, SrcLength
02756 shr ecx, 3
02757 cmp al, 128
02758 jg L10251
02759 align 16
02760 L10250:
02761 movq mm3, [eax]
02762 movq mm4, mm3
02763 punpcklbw mm3, mm0
02764 punpckhbw mm4, mm0
02765 pmullw mm3, mm1
02766 pmullw mm4, mm1
02767 packuswb mm3, mm4
02768 movq [edi], mm3
02769 add eax, 8
02770 add edi, 8
02771 dec ecx
02772 jnz L10250
02773 jmp L10252
02774 align 16
02775 L10251:
02776 movq mm3, [eax]
02777 movq mm4, mm3
02778 punpcklbw mm3, mm0
02779 punpckhbw mm4, mm0
02780 pmullw mm3, mm1
02781 pmullw mm4, mm1
02782
02783 movq mm5, mm3
02784 movq mm6, mm4
02785 psraw mm5, 15
02786 psraw mm6, 15
02787 pxor mm3, mm5
02788 pxor mm4, mm6
02789 psubsw mm3, mm5
02790 psubsw mm4, mm6
02791 packuswb mm3, mm4
02792 movq [edi], mm3
02793 add eax, 8
02794 add edi, 8
02795 dec ecx
02796 jnz L10251
02797 L10252:
02798 emms
02799 popa
02800 }
02801 #else
02802 asm volatile
02803 ("pusha \n\t"
02804
02805 "mov %3, %%al \n\t"
02806 "xor %%ah, %%ah \n\t"
02807 "mov %%ax, %%bx \n\t"
02808 "shl $16, %%eax \n\t"
02809 "mov %%bx, %%ax \n\t"
02810 "movd %%eax, %%mm1 \n\t"
02811 "movd %%eax, %%mm2 \n\t"
02812 "punpckldq %%mm2, %%mm1 \n\t"
02813 "pxor %%mm0, %%mm0 \n\t"
02814 "mov %1, %%eax \n\t"
02815 "mov %0, %%edi \n\t"
02816 "mov %2, %%ecx \n\t"
02817 "shr $3, %%ecx \n\t"
02818 "cmp $128, %%al \n\t"
02819 "jg 2f \n\t" ".align 16 \n\t"
02820 "1: movq (%%eax), %%mm3 \n\t"
02821 "movq %%mm3, %%mm4 \n\t"
02822 "punpcklbw %%mm0, %%mm3 \n\t"
02823 "punpckhbw %%mm0, %%mm4 \n\t"
02824 "pmullw %%mm1, %%mm3 \n\t"
02825 "pmullw %%mm1, %%mm4 \n\t"
02826 "packuswb %%mm4, %%mm3 \n\t"
02827 "movq %%mm3, (%%edi) \n\t"
02828 "add $8, %%eax \n\t"
02829 "add $8, %%edi \n\t"
02830 "dec %%ecx \n\t"
02831 "jnz 1b \n\t"
02832 "jmp 3f \n\t" ".align 16 \n\t"
02833 "2: movq (%%eax), %%mm3 \n\t"
02834 "movq %%mm3, %%mm4 \n\t"
02835 "punpcklbw %%mm0, %%mm3 \n\t"
02836 "punpckhbw %%mm0, %%mm4 \n\t"
02837 "pmullw %%mm1, %%mm3 \n\t"
02838 "pmullw %%mm1, %%mm4 \n\t"
02839
02840 "movq %%mm3, %%mm5 \n\t"
02841 "movq %%mm4, %%mm6 \n\t"
02842 "psraw $15, %%mm5 \n\t"
02843 "psraw $15, %%mm6 \n\t"
02844 "pxor %%mm5, %%mm3 \n\t"
02845 "pxor %%mm6, %%mm4 \n\t"
02846 "psubsw %%mm5, %%mm3 \n\t"
02847 "psubsw %%mm6, %%mm4 \n\t"
02848 "packuswb %%mm4, %%mm3 \n\t"
02849 "movq %%mm3, (%%edi) \n\t"
02850 "add $8, %%eax \n\t"
02851 "add $8, %%edi \n\t"
02852 "dec %%ecx \n\t"
02853 "jnz 2b \n\t"
02854 "3: emms \n\t"
02855 "popa \n\t":"=m" (Dest)
02856 :"m"(Src1),
02857 "m"(SrcLength),
02858 "m"(C)
02859 );
02860 #endif
02861 return (0);
02862 #else
02863 return (-1);
02864 #endif
02865 }
02866
02877 int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
02878 {
02879 unsigned int i, istart;
02880 int iC;
02881 unsigned char *cursrc1;
02882 unsigned char *curdest;
02883 int result;
02884
02885
02886 if ((Src1 == NULL) || (Dest == NULL))
02887 return(-1);
02888 if (length == 0)
02889 return(0);
02890
02891
02892 if (C == 1) {
02893 memcpy(Src1, Dest, length);
02894 return (0);
02895 }
02896
02897 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
02898
02899 SDL_imageFilterMultByByteMMX(Src1, Dest, length, C);
02900
02901
02902 if ((length & 7) > 0) {
02903
02904 istart = length & 0xfffffff8;
02905 cursrc1 = &Src1[istart];
02906 curdest = &Dest[istart];
02907 } else {
02908
02909 return (0);
02910 }
02911 } else {
02912
02913 istart = 0;
02914 cursrc1 = Src1;
02915 curdest = Dest;
02916 }
02917
02918
02919 iC = (int) C;
02920 for (i = istart; i < length; i++) {
02921 result = (int) *cursrc1 * iC;
02922 if (result > 255)
02923 result = 255;
02924 *curdest = (unsigned char) result;
02925
02926 cursrc1++;
02927 curdest++;
02928 }
02929
02930 return (0);
02931 }
02932
02944 int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
02945 unsigned char C)
02946 {
02947 #ifdef USE_MMX
02948 #if !defined(GCC__)
02949 __asm
02950 {
02951 pusha
02952
02953 mov al, C
02954 xor ah, ah
02955 mov bx, ax
02956 shl eax, 16
02957 mov ax, bx
02958 movd mm1, eax
02959 movd mm2, eax
02960 punpckldq mm1, mm2
02961 xor ecx, ecx
02962 mov cl, N
02963 movd mm7, ecx
02964 pxor mm0, mm0
02965 mov eax, Src1
02966 mov edi, Dest
02967 mov ecx, SrcLength
02968 shr ecx, 3
02969 align 16
02970 L1026:
02971 movq mm3, [eax]
02972 movq mm4, mm3
02973 punpcklbw mm3, mm0
02974 punpckhbw mm4, mm0
02975 psrlw mm3, mm7
02976 psrlw mm4, mm7
02977 pmullw mm3, mm1
02978 pmullw mm4, mm1
02979 packuswb mm3, mm4
02980 movq [edi], mm3
02981 add eax, 8
02982 add edi, 8
02983 dec ecx
02984 jnz L1026
02985 emms
02986 popa
02987 }
02988 #else
02989 asm volatile
02990 ("pusha \n\t"
02991
02992 "mov %4, %%al \n\t"
02993 "xor %%ah, %%ah \n\t"
02994 "mov %%ax, %%bx \n\t"
02995 "shl $16, %%eax \n\t"
02996 "mov %%bx, %%ax \n\t"
02997 "movd %%eax, %%mm1 \n\t"
02998 "movd %%eax, %%mm2 \n\t"
02999 "punpckldq %%mm2, %%mm1 \n\t"
03000 "xor %%ecx, %%ecx \n\t"
03001 "mov %3, %%cl \n\t"
03002 "movd %%ecx, %%mm7 \n\t"
03003 "pxor %%mm0, %%mm0 \n\t"
03004 "mov %1, %%eax \n\t"
03005 "mov %0, %%edi \n\t"
03006 "mov %2, %%ecx \n\t"
03007 "shr $3, %%ecx \n\t"
03008 ".align 16 \n\t"
03009 "1: movq (%%eax), %%mm3 \n\t"
03010 "movq %%mm3, %%mm4 \n\t"
03011 "punpcklbw %%mm0, %%mm3 \n\t"
03012 "punpckhbw %%mm0, %%mm4 \n\t"
03013 "psrlw %%mm7, %%mm3 \n\t"
03014 "psrlw %%mm7, %%mm4 \n\t"
03015 "pmullw %%mm1, %%mm3 \n\t"
03016 "pmullw %%mm1, %%mm4 \n\t"
03017 "packuswb %%mm4, %%mm3 \n\t"
03018 "movq %%mm3, (%%edi) \n\t"
03019 "add $8, %%eax \n\t"
03020 "add $8, %%edi \n\t"
03021 "dec %%ecx \n\t"
03022 "jnz 1b \n\t"
03023 "emms \n\t"
03024 "popa \n\t":"=m" (Dest)
03025 :"m"(Src1),
03026 "m"(SrcLength),
03027 "m"(N),
03028 "m"(C)
03029 );
03030 #endif
03031 return (0);
03032 #else
03033 return (-1);
03034 #endif
03035 }
03036
03048 int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N,
03049 unsigned char C)
03050 {
03051 unsigned int i, istart;
03052 int iC;
03053 unsigned char *cursrc1;
03054 unsigned char *curdest;
03055 int result;
03056
03057
03058 if ((Src1 == NULL) || (Dest == NULL))
03059 return(-1);
03060 if (length == 0)
03061 return(0);
03062
03063
03064 if (N > 8) {
03065 return (-1);
03066 }
03067
03068
03069 if ((N == 0) && (C == 1)) {
03070 memcpy(Src1, Dest, length);
03071 return (0);
03072 }
03073
03074 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03075
03076 SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);
03077
03078
03079 if ((length & 7) > 0) {
03080
03081 istart = length & 0xfffffff8;
03082 cursrc1 = &Src1[istart];
03083 curdest = &Dest[istart];
03084 } else {
03085
03086 return (0);
03087 }
03088 } else {
03089
03090 istart = 0;
03091 cursrc1 = Src1;
03092 curdest = Dest;
03093 }
03094
03095
03096 iC = (int) C;
03097 for (i = istart; i < length; i++) {
03098 result = (int) (*cursrc1 >> N) * iC;
03099 if (result > 255)
03100 result = 255;
03101 *curdest = (unsigned char) result;
03102
03103 cursrc1++;
03104 curdest++;
03105 }
03106
03107 return (0);
03108 }
03109
03121 int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N,
03122 unsigned char *Mask)
03123 {
03124 #ifdef USE_MMX
03125 #if !defined(GCC__)
03126 __asm
03127 {
03128 pusha
03129 mov edx, Mask
03130 movq mm0, [edx]
03131 xor ecx, ecx
03132 mov cl, N
03133 movd mm3, ecx
03134 pcmpeqb mm1, mm1
03135 L10270:
03136 psllw mm1, 1
03137 pand mm1, mm0
03138
03139 dec cl
03140 jnz L10270
03141
03142 mov eax, Src1
03143 mov edi, Dest
03144 mov ecx, SrcLength
03145 shr ecx, 3
03146 align 16
03147 L10271:
03148 movq mm0, [eax]
03149 psllw mm0, mm3
03150 pand mm0, mm1
03151
03152 movq [edi], mm0
03153 add eax, 8
03154 add edi, 8
03155 dec ecx
03156 jnz L10271
03157 emms
03158 popa
03159 }
03160 #else
03161 asm volatile
03162 ("pusha \n\t" "movl %4, %%edx \n\t"
03163 "movq (%%edx), %%mm0 \n\t"
03164 "xor %%ecx, %%ecx \n\t"
03165 "mov %3, %%cl \n\t"
03166 "movd %%ecx, %%mm3 \n\t"
03167 "pcmpeqb %%mm1, %%mm1 \n\t"
03168 "1: \n\t"
03169 "psllw $1, %%mm1 \n\t"
03170
03171 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t"
03172 "jnz 1b \n\t"
03173
03174 "mov %1, %%eax \n\t"
03175 "mov %0, %%edi \n\t"
03176 "mov %2, %%ecx \n\t"
03177 "shr $3, %%ecx \n\t"
03178 ".align 16 \n\t"
03179 "2: movq (%%eax), %%mm0 \n\t"
03180 "psllw %%mm3, %%mm0 \n\t"
03181
03182 ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t"
03183 "add $8, %%eax \n\t"
03184 "add $8, %%edi \n\t"
03185 "dec %%ecx \n\t"
03186 "jnz 2b \n\t"
03187 "emms \n\t"
03188 "popa \n\t":"=m" (Dest)
03189 :"m"(Src1),
03190 "m"(SrcLength),
03191 "m"(N),
03192 "m"(Mask)
03193 );
03194 #endif
03195 return (0);
03196 #else
03197 return (-1);
03198 #endif
03199 }
03200
03211 int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03212 {
03213 static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
03214 unsigned int i, istart;
03215 unsigned char *cursrc1, *curdest;
03216 int result;
03217
03218
03219 if ((Src1 == NULL) || (Dest == NULL))
03220 return(-1);
03221 if (length == 0)
03222 return(0);
03223
03224 if (N > 8) {
03225 return (-1);
03226 }
03227
03228
03229 if (N == 0) {
03230 memcpy(Src1, Dest, length);
03231 return (0);
03232 }
03233
03234 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03235
03236 SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);
03237
03238
03239 if ((length & 7) > 0) {
03240
03241 istart = length & 0xfffffff8;
03242 cursrc1 = &Src1[istart];
03243 curdest = &Dest[istart];
03244 } else {
03245
03246 return (0);
03247 }
03248 } else {
03249
03250 istart = 0;
03251 cursrc1 = Src1;
03252 curdest = Dest;
03253 }
03254
03255
03256 for (i = istart; i < length; i++) {
03257 result = ((int) *cursrc1 << N) & 0xff;
03258 *curdest = (unsigned char) result;
03259
03260 cursrc1++;
03261 curdest++;
03262 }
03263
03264 return (0);
03265 }
03266
03277 int SDL_imageFilterShiftLeftUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
03278 {
03279 #ifdef USE_MMX
03280 #if !defined(GCC__)
03281 __asm
03282 {
03283 pusha
03284 mov eax, Src1
03285 mov edi, Dest
03286 mov ecx, SrcLength
03287 shr ecx, 3
03288 align 16
03289 L12023:
03290 movq mm0, [eax]
03291 pslld mm0, N
03292 movq [edi], mm0
03293 add eax, 8
03294 add edi, 8
03295 dec ecx
03296 jnz L12023
03297 emms
03298 popa
03299 }
03300 #else
03301 asm volatile
03302 ("pusha \n\t"
03303 "mov %1, %%eax \n\t"
03304 "mov %0, %%edi \n\t"
03305 "mov %2, %%ecx \n\t"
03306 "shr $3, %%ecx \n\t"
03307 ".align 16 \n\t"
03308 "1: movq (%%eax), %%mm0 \n\t"
03309 "pslld %3, %%mm0 \n\t"
03310 "movq %%mm0, (%%edi) \n\t"
03311 "add $8, %%eax \n\t"
03312 "add $8, %%edi \n\t"
03313 "dec %%ecx \n\t"
03314 "jnz 1b \n\t"
03315 "emms \n\t"
03316 "popa \n\t":"=m" (Dest)
03317 :"m"(Src1),
03318 "m"(SrcLength),
03319 "m"(N)
03320 );
03321 #endif
03322 return (0);
03323 #else
03324 return (-1);
03325 #endif
03326 }
03327
03338 int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03339 {
03340 unsigned int i, istart;
03341 unsigned char *cursrc1, *curdest;
03342 unsigned int *icursrc1, *icurdest;
03343 int result;
03344
03345
03346 if ((Src1 == NULL) || (Dest == NULL))
03347 return(-1);
03348 if (length == 0)
03349 return(0);
03350
03351 if (N > 32) {
03352 return (-1);
03353 }
03354
03355
03356 if (N == 0) {
03357 memcpy(Src1, Dest, length);
03358 return (0);
03359 }
03360
03361 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03362
03363 SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N);
03364
03365
03366 if ((length & 7) > 0) {
03367
03368 istart = length & 0xfffffff8;
03369 cursrc1 = &Src1[istart];
03370 curdest = &Dest[istart];
03371 } else {
03372
03373 return (0);
03374 }
03375 } else {
03376
03377 istart = 0;
03378 cursrc1 = Src1;
03379 curdest = Dest;
03380 }
03381
03382
03383 icursrc1=(unsigned int *)cursrc1;
03384 icurdest=(unsigned int *)curdest;
03385 for (i = istart; i < length; i += 4) {
03386 if ((i+4)<length) {
03387 result = ((unsigned int)*icursrc1 << N);
03388 *icurdest = (unsigned int)result;
03389 }
03390
03391 icursrc1++;
03392 icurdest++;
03393 }
03394
03395 return (0);
03396 }
03397
03408 int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N)
03409 {
03410 #ifdef USE_MMX
03411 #if !defined(GCC__)
03412 __asm
03413 {
03414 pusha
03415 xor eax, eax
03416 mov al, N
03417 movd mm7, eax
03418 pxor mm0, mm0
03419 mov eax, Src1
03420 mov edi, Dest
03421 mov ecx, SrcLength
03422 shr ecx, 3
03423 cmp al, 7
03424 jg L10281
03425 align 16
03426 L10280:
03427 movq mm3, [eax]
03428 movq mm4, mm3
03429 punpcklbw mm3, mm0
03430 punpckhbw mm4, mm0
03431 psllw mm3, mm7
03432 psllw mm4, mm7
03433 packuswb mm3, mm4
03434 movq [edi], mm3
03435 add eax, 8
03436 add edi, 8
03437 dec ecx
03438 jnz L10280
03439 jmp L10282
03440 align 16
03441 L10281:
03442 movq mm3, [eax]
03443 movq mm4, mm3
03444 punpcklbw mm3, mm0
03445 punpckhbw mm4, mm0
03446 psllw mm3, mm7
03447 psllw mm4, mm7
03448
03449 movq mm5, mm3
03450 movq mm6, mm4
03451 psraw mm5, 15
03452 psraw mm6, 15
03453 pxor mm3, mm5
03454 pxor mm4, mm6
03455 psubsw mm3, mm5
03456 psubsw mm4, mm6
03457 packuswb mm3, mm4
03458 movq [edi], mm3
03459 add eax, 8
03460 add edi, 8
03461 dec ecx
03462 jnz L10281
03463 L10282:
03464 emms
03465 popa
03466 }
03467 #else
03468 asm volatile
03469 ("pusha \n\t" "xor %%eax, %%eax \n\t"
03470 "mov %3, %%al \n\t"
03471 "movd %%eax, %%mm7 \n\t"
03472 "pxor %%mm0, %%mm0 \n\t"
03473 "mov %1, %%eax \n\t"
03474 "mov %0, %%edi \n\t"
03475 "mov %2, %%ecx \n\t"
03476 "shr $3, %%ecx \n\t"
03477 "cmp $7, %%al \n\t"
03478 "jg 2f \n\t" ".align 16 \n\t"
03479 "1: movq (%%eax), %%mm3 \n\t"
03480 "movq %%mm3, %%mm4 \n\t"
03481 "punpcklbw %%mm0, %%mm3 \n\t"
03482 "punpckhbw %%mm0, %%mm4 \n\t"
03483 "psllw %%mm7, %%mm3 \n\t"
03484 "psllw %%mm7, %%mm4 \n\t"
03485 "packuswb %%mm4, %%mm3 \n\t"
03486 "movq %%mm3, (%%edi) \n\t"
03487 "add $8, %%eax \n\t"
03488 "add $8, %%edi \n\t"
03489 "dec %%ecx \n\t"
03490 "jnz 1b \n\t"
03491 "jmp 3f \n\t" ".align 16 \n\t"
03492 "2: movq (%%eax), %%mm3 \n\t"
03493 "movq %%mm3, %%mm4 \n\t"
03494 "punpcklbw %%mm0, %%mm3 \n\t"
03495 "punpckhbw %%mm0, %%mm4 \n\t"
03496 "psllw %%mm7, %%mm3 \n\t"
03497 "psllw %%mm7, %%mm4 \n\t"
03498
03499 "movq %%mm3, %%mm5 \n\t"
03500 "movq %%mm4, %%mm6 \n\t"
03501 "psraw $15, %%mm5 \n\t"
03502 "psraw $15, %%mm6 \n\t"
03503 "pxor %%mm5, %%mm3 \n\t"
03504 "pxor %%mm6, %%mm4 \n\t"
03505 "psubsw %%mm5, %%mm3 \n\t"
03506 "psubsw %%mm6, %%mm4 \n\t"
03507 "packuswb %%mm4, %%mm3 \n\t"
03508 "movq %%mm3, (%%edi) \n\t"
03509 "add $8, %%eax \n\t"
03510 "add $8, %%edi \n\t"
03511 "dec %%ecx \n\t"
03512 "jnz 2b \n\t"
03513 "3: emms \n\t"
03514 "popa \n\t":"=m" (Dest)
03515 :"m"(Src1),
03516 "m"(SrcLength),
03517 "m"(N)
03518 );
03519 #endif
03520 return (0);
03521 #else
03522 return (-1);
03523 #endif
03524 }
03525
03536 int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
03537 {
03538 unsigned int i, istart;
03539 unsigned char *cursrc1, *curdest;
03540 int result;
03541
03542
03543 if ((Src1 == NULL) || (Dest == NULL))
03544 return(-1);
03545 if (length == 0)
03546 return(0);
03547
03548 if (N > 8) {
03549 return (-1);
03550 }
03551
03552
03553 if (N == 0) {
03554 memcpy(Src1, Dest, length);
03555 return (0);
03556 }
03557
03558 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03559
03560 SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);
03561
03562
03563 if ((length & 7) > 0) {
03564
03565 istart = length & 0xfffffff8;
03566 cursrc1 = &Src1[istart];
03567 curdest = &Dest[istart];
03568 } else {
03569
03570 return (0);
03571 }
03572 } else {
03573
03574 istart = 0;
03575 cursrc1 = Src1;
03576 curdest = Dest;
03577 }
03578
03579
03580 for (i = istart; i < length; i++) {
03581 result = (int) *cursrc1 << N;
03582 if (result > 255)
03583 result = 255;
03584 *curdest = (unsigned char) result;
03585
03586 cursrc1++;
03587 curdest++;
03588 }
03589
03590 return (0);
03591 }
03592
03603 int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T)
03604 {
03605 #ifdef USE_MMX
03606 #if !defined(GCC__)
03607 __asm
03608 {
03609 pusha
03610
03611 pcmpeqb mm1, mm1
03612 pcmpeqb mm2, mm2
03613 mov al, T
03614 mov ah, al
03615 mov bx, ax
03616 shl eax, 16
03617 mov ax, bx
03618 movd mm3, eax
03619 movd mm4, eax
03620 punpckldq mm3, mm4
03621 psubusb mm2, mm3
03622 mov eax, Src1
03623 mov edi, Dest
03624 mov ecx, SrcLength
03625 shr ecx, 3
03626 align 16
03627 L1029:
03628 movq mm0, [eax]
03629 paddusb mm0, mm2
03630 pcmpeqb mm0, mm1
03631 movq [edi], mm0
03632 add eax, 8
03633 add edi, 8
03634 dec ecx
03635 jnz L1029
03636 emms
03637 popa
03638 }
03639 #else
03640 asm volatile
03641 ("pusha \n\t"
03642
03643 "pcmpeqb %%mm1, %%mm1 \n\t"
03644 "pcmpeqb %%mm2, %%mm2 \n\t"
03645 "mov %3, %%al \n\t"
03646 "mov %%al, %%ah \n\t"
03647 "mov %%ax, %%bx \n\t"
03648 "shl $16, %%eax \n\t"
03649 "mov %%bx, %%ax \n\t"
03650 "movd %%eax, %%mm3 \n\t"
03651 "movd %%eax, %%mm4 \n\t"
03652 "punpckldq %%mm4, %%mm3 \n\t"
03653 "psubusb %%mm3, %%mm2 \n\t"
03654 "mov %1, %%eax \n\t"
03655 "mov %0, %%edi \n\t"
03656 "mov %2, %%ecx \n\t"
03657 "shr $3, %%ecx \n\t"
03658 ".align 16 \n\t"
03659 "1: \n\t"
03660 "movq (%%eax), %%mm0 \n\t"
03661 "paddusb %%mm2, %%mm0 \n\t"
03662 "pcmpeqb %%mm1, %%mm0 \n\t"
03663 "movq %%mm0, (%%edi) \n\t"
03664 "add $8, %%eax \n\t"
03665 "add $8, %%edi \n\t"
03666 "dec %%ecx \n\t"
03667 "jnz 1b \n\t"
03668 "emms \n\t"
03669 "popa \n\t":"=m" (Dest)
03670 :"m"(Src1),
03671 "m"(SrcLength),
03672 "m"(T)
03673 );
03674 #endif
03675 return (0);
03676 #else
03677 return (-1);
03678 #endif
03679 }
03680
03691 int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T)
03692 {
03693 unsigned int i, istart;
03694 unsigned char *cursrc1;
03695 unsigned char *curdest;
03696
03697
03698 if ((Src1 == NULL) || (Dest == NULL))
03699 return(-1);
03700 if (length == 0)
03701 return(0);
03702
03703
03704 if (T == 0) {
03705 memset(Dest, 255, length);
03706 return (0);
03707 }
03708
03709 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03710
03711 SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);
03712
03713
03714 if ((length & 7) > 0) {
03715
03716 istart = length & 0xfffffff8;
03717 cursrc1 = &Src1[istart];
03718 curdest = &Dest[istart];
03719 } else {
03720
03721 return (0);
03722 }
03723 } else {
03724
03725 istart = 0;
03726 cursrc1 = Src1;
03727 curdest = Dest;
03728 }
03729
03730
03731 for (i = istart; i < length; i++) {
03732 *curdest = ((unsigned char) *cursrc1 >= T) ? 255 : 0;
03733
03734 cursrc1++;
03735 curdest++;
03736 }
03737
03738 return (0);
03739 }
03740
03752 int SDL_imageFilterClipToRangeMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin,
03753 unsigned char Tmax)
03754 {
03755 #ifdef USE_MMX
03756 #if !defined(GCC__)
03757 __asm
03758 {
03759 pusha
03760 pcmpeqb mm1, mm1
03761
03762 mov al, Tmax
03763 mov ah, al
03764 mov bx, ax
03765 shl eax, 16
03766 mov ax, bx
03767 movd mm3, eax
03768 movd mm4, eax
03769 punpckldq mm3, mm4
03770 psubusb mm1, mm3
03771
03772 mov al, Tmin
03773 mov ah, al
03774 mov bx, ax
03775 shl eax, 16
03776 mov ax, bx
03777 movd mm5, eax
03778 movd mm4, eax
03779 punpckldq mm5, mm4
03780 movq mm7, mm5
03781 paddusb mm7, mm1
03782 mov eax, Src1
03783 mov edi, Dest
03784 mov ecx, SrcLength
03785 shr ecx, 3
03786 align 16
03787 L1030:
03788 movq mm0, [eax]
03789 paddusb mm0, mm1
03790 psubusb mm0, mm7
03791 paddusb mm0, mm5
03792 movq [edi], mm0
03793 add eax, 8
03794 add edi, 8
03795 dec ecx
03796 jnz L1030
03797 emms
03798 popa
03799 }
03800 #else
03801 asm volatile
03802 ("pusha \n\t" "pcmpeqb %%mm1, %%mm1 \n\t"
03803
03804 "mov %4, %%al \n\t"
03805 "mov %%al, %%ah \n\t"
03806 "mov %%ax, %%bx \n\t"
03807 "shl $16, %%eax \n\t"
03808 "mov %%bx, %%ax \n\t"
03809 "movd %%eax, %%mm3 \n\t"
03810 "movd %%eax, %%mm4 \n\t"
03811 "punpckldq %%mm4, %%mm3 \n\t"
03812 "psubusb %%mm3, %%mm1 \n\t"
03813
03814 "mov %3, %%al \n\t"
03815 "mov %%al, %%ah \n\t"
03816 "mov %%ax, %%bx \n\t"
03817 "shl $16, %%eax \n\t"
03818 "mov %%bx, %%ax \n\t"
03819 "movd %%eax, %%mm5 \n\t"
03820 "movd %%eax, %%mm4 \n\t"
03821 "punpckldq %%mm4, %%mm5 \n\t"
03822 "movq %%mm5, %%mm7 \n\t"
03823 "paddusb %%mm1, %%mm7 \n\t"
03824 "mov %1, %%eax \n\t"
03825 "mov %0, %%edi \n\t"
03826 "mov %2, %%ecx \n\t"
03827 "shr $3, %%ecx \n\t"
03828 ".align 16 \n\t"
03829 "1: \n\t"
03830 "movq (%%eax), %%mm0 \n\t"
03831 "paddusb %%mm1, %%mm0 \n\t"
03832 "psubusb %%mm7, %%mm0 \n\t"
03833 "paddusb %%mm5, %%mm0 \n\t"
03834 "movq %%mm0, (%%edi) \n\t"
03835 "add $8, %%eax \n\t"
03836 "add $8, %%edi \n\t"
03837 "dec %%ecx \n\t"
03838 "jnz 1b \n\t"
03839 "emms \n\t"
03840 "popa \n\t":"=m" (Dest)
03841 :"m"(Src1),
03842 "m"(SrcLength),
03843 "m"(Tmin),
03844 "m"(Tmax)
03845 );
03846 #endif
03847 return (0);
03848 #else
03849 return (-1);
03850 #endif
03851 }
03852
03864 int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin,
03865 unsigned char Tmax)
03866 {
03867 unsigned int i, istart;
03868 unsigned char *cursrc1;
03869 unsigned char *curdest;
03870
03871
03872 if ((Src1 == NULL) || (Dest == NULL))
03873 return(-1);
03874 if (length == 0)
03875 return(0);
03876
03877
03878 if ((Tmin == 0) && (Tmax == 25)) {
03879 memcpy(Src1, Dest, length);
03880 return (0);
03881 }
03882
03883 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
03884
03885 SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax);
03886
03887
03888 if ((length & 7) > 0) {
03889
03890 istart = length & 0xfffffff8;
03891 cursrc1 = &Src1[istart];
03892 curdest = &Dest[istart];
03893 } else {
03894
03895 return (0);
03896 }
03897 } else {
03898
03899 istart = 0;
03900 cursrc1 = Src1;
03901 curdest = Dest;
03902 }
03903
03904
03905 for (i = istart; i < length; i++) {
03906 if (*cursrc1 < Tmin) {
03907 *curdest = Tmin;
03908 } else if (*cursrc1 > Tmax) {
03909 *curdest = Tmax;
03910 } else {
03911 *curdest = *cursrc1;
03912 }
03913
03914 cursrc1++;
03915 curdest++;
03916 }
03917
03918 return (0);
03919 }
03920
03934 int SDL_imageFilterNormalizeLinearMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax,
03935 int Nmin, int Nmax)
03936 {
03937 #ifdef USE_MMX
03938 #if !defined(GCC__)
03939 __asm
03940 {
03941 pusha
03942 mov ax, WORD PTR Nmax
03943 mov bx, WORD PTR Cmax
03944 sub ax, WORD PTR Nmin
03945 sub bx, WORD PTR Cmin
03946 jz L10311
03947 xor dx, dx
03948 div bx
03949 jmp L10312
03950 L10311:
03951 mov ax, 255
03952 L10312:
03953 mov bx, ax
03954 shl eax, 16
03955 mov ax, bx
03956 movd mm0, eax
03957 movd mm1, eax
03958 punpckldq mm0, mm1
03959
03960 mov ax, WORD PTR Cmin
03961 mov bx, ax
03962 shl eax, 16
03963 mov ax, bx
03964 movd mm1, eax
03965 movd mm2, eax
03966 punpckldq mm1, mm2
03967
03968 mov ax, WORD PTR Nmin
03969 mov bx, ax
03970 shl eax, 16
03971 mov ax, bx
03972 movd mm2, eax
03973 movd mm3, eax
03974 punpckldq mm2, mm3
03975 pxor mm7, mm7
03976 mov eax, Src1
03977 mov edi, Dest
03978 mov ecx, SrcLength
03979 shr ecx, 3
03980 align 16
03981 L1031:
03982 movq mm3, [eax]
03983 movq mm4, mm3
03984 punpcklbw mm3, mm7
03985 punpckhbw mm4, mm7
03986 psubusb mm3, mm1
03987 psubusb mm4, mm1
03988 pmullw mm3, mm0
03989 pmullw mm4, mm0
03990 paddusb mm3, mm2
03991 paddusb mm4, mm2
03992
03993 movq mm5, mm3
03994 movq mm6, mm4
03995 psraw mm5, 15
03996 psraw mm6, 15
03997 pxor mm3, mm5
03998 pxor mm4, mm6
03999 psubsw mm3, mm5
04000 psubsw mm4, mm6
04001 packuswb mm3, mm4
04002 movq [edi], mm3
04003 add eax, 8
04004 add edi, 8
04005 dec ecx
04006 jnz L1031
04007 emms
04008 popa
04009 }
04010 #else
04011 asm volatile
04012 ("pusha \n\t" "mov %6, %%ax \n\t"
04013 "mov %4, %%bx \n\t"
04014 "sub %5, %%ax \n\t"
04015 "sub %3, %%bx \n\t"
04016 "jz 1f \n\t"
04017 "xor %%dx, %%dx \n\t"
04018 "div %%bx \n\t"
04019 "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t"
04020 "2: \n\t"
04021 "mov %%ax, %%bx \n\t"
04022 "shl $16, %%eax \n\t"
04023 "mov %%bx, %%ax \n\t"
04024 "movd %%eax, %%mm0 \n\t"
04025 "movd %%eax, %%mm1 \n\t"
04026 "punpckldq %%mm1, %%mm0 \n\t"
04027
04028 "mov %3, %%ax \n\t"
04029 "mov %%ax, %%bx \n\t"
04030 "shl $16, %%eax \n\t"
04031 "mov %%bx, %%ax \n\t"
04032 "movd %%eax, %%mm1 \n\t"
04033 "movd %%eax, %%mm2 \n\t"
04034 "punpckldq %%mm2, %%mm1 \n\t"
04035
04036 "mov %5, %%ax \n\t"
04037 "mov %%ax, %%bx \n\t"
04038 "shl $16, %%eax \n\t"
04039 "mov %%bx, %%ax \n\t"
04040 "movd %%eax, %%mm2 \n\t"
04041 "movd %%eax, %%mm3 \n\t"
04042 "punpckldq %%mm3, %%mm2 \n\t"
04043 "pxor %%mm7, %%mm7 \n\t"
04044 "mov %1, %%eax \n\t"
04045 "mov %0, %%edi \n\t"
04046 "mov %2, %%ecx \n\t"
04047 "shr $3, %%ecx \n\t"
04048 ".align 16 \n\t"
04049 "1: \n\t"
04050 "movq (%%eax), %%mm3 \n\t"
04051 "movq %%mm3, %%mm4 \n\t"
04052 "punpcklbw %%mm7, %%mm3 \n\t"
04053 "punpckhbw %%mm7, %%mm4 \n\t"
04054 "psubusb %%mm1, %%mm3 \n\t"
04055 "psubusb %%mm1, %%mm4 \n\t"
04056 "pmullw %%mm0, %%mm3 \n\t"
04057 "pmullw %%mm0, %%mm4 \n\t"
04058 "paddusb %%mm2, %%mm3 \n\t"
04059 "paddusb %%mm2, %%mm4 \n\t"
04060
04061 "movq %%mm3, %%mm5 \n\t"
04062 "movq %%mm4, %%mm6 \n\t"
04063 "psraw $15, %%mm5 \n\t"
04064 "psraw $15, %%mm6 \n\t"
04065 "pxor %%mm5, %%mm3 \n\t"
04066 "pxor %%mm6, %%mm4 \n\t"
04067 "psubsw %%mm5, %%mm3 \n\t"
04068 "psubsw %%mm6, %%mm4 \n\t"
04069 "packuswb %%mm4, %%mm3 \n\t"
04070 "movq %%mm3, (%%edi) \n\t"
04071 "add $8, %%eax \n\t"
04072 "add $8, %%edi \n\t"
04073 "dec %%ecx \n\t"
04074 "jnz 1b \n\t"
04075 "emms \n\t"
04076 "popa \n\t":"=m" (Dest)
04077 :"m"(Src1),
04078 "m"(SrcLength),
04079 "m"(Cmin),
04080 "m"(Cmax),
04081 "m"(Nmin),
04082 "m"(Nmax)
04083 );
04084 #endif
04085 return (0);
04086 #else
04087 return (-1);
04088 #endif
04089 }
04090
04104 int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin,
04105 int Nmax)
04106 {
04107 unsigned int i, istart;
04108 unsigned char *cursrc;
04109 unsigned char *curdest;
04110 int dN, dC, factor;
04111 int result;
04112
04113
04114 if ((Src == NULL) || (Dest == NULL))
04115 return(-1);
04116 if (length == 0)
04117 return(0);
04118
04119 if ((SDL_imageFilterMMXdetect()) && (length > 7)) {
04120
04121 SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
04122
04123
04124 if ((length & 7) > 0) {
04125
04126 istart = length & 0xfffffff8;
04127 cursrc = &Src[istart];
04128 curdest = &Dest[istart];
04129 } else {
04130
04131 return (0);
04132 }
04133 } else {
04134
04135 istart = 0;
04136 cursrc = Src;
04137 curdest = Dest;
04138 }
04139
04140
04141 dC = Cmax - Cmin;
04142 if (dC == 0)
04143 return (0);
04144 dN = Nmax - Nmin;
04145 factor = dN / dC;
04146 for (i = istart; i < length; i++) {
04147 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
04148 if (result > 255)
04149 result = 255;
04150 *curdest = (unsigned char) result;
04151
04152 cursrc++;
04153 curdest++;
04154 }
04155
04156 return (0);
04157 }
04158
04159
04160
04175 int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04176 signed short *Kernel, unsigned char Divisor)
04177 {
04178
04179 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04180 return(-1);
04181
04182 if ((columns < 3) || (rows < 3) || (Divisor == 0))
04183 return (-1);
04184
04185 if ((SDL_imageFilterMMXdetect())) {
04186 #ifdef USE_MMX
04187 #if !defined(GCC__)
04188 __asm
04189 {
04190 pusha
04191 pxor mm0, mm0
04192 xor ebx, ebx
04193 mov bl, Divisor
04194 mov edx, Kernel
04195 movq mm5, [edx]
04196 add edx, 8
04197 movq mm6, [edx]
04198 add edx, 8
04199 movq mm7, [edx]
04200
04201 mov eax, columns
04202 mov esi, Src
04203 mov edi, Dest
04204 add edi, eax
04205 inc edi
04206 mov edx, rows
04207 sub edx, 2
04208
04209 L10320:
04210 mov ecx, eax
04211 sub ecx, 2
04212 align 16
04213 L10322:
04214
04215 movq mm1, [esi]
04216 add esi, eax
04217 movq mm2, [esi]
04218 add esi, eax
04219 movq mm3, [esi]
04220 punpcklbw mm1, mm0
04221 punpcklbw mm2, mm0
04222 punpcklbw mm3, mm0
04223 pmullw mm1, mm5
04224 pmullw mm2, mm6
04225 pmullw mm3, mm7
04226 paddsw mm1, mm2
04227 paddsw mm1, mm3
04228 movq mm2, mm1
04229 psrlq mm1, 32
04230 paddsw mm1, mm2
04231 movq mm3, mm1
04232 psrlq mm1, 16
04233 paddsw mm1, mm3
04234
04235 movd mm2, eax
04236 movd mm3, edx
04237 movd eax, mm1
04238 psraw mm1, 15
04239 movd edx, mm1
04240 idiv bx
04241 movd mm1, eax
04242 packuswb mm1, mm0
04243 movd eax, mm1
04244 mov [edi], al
04245 movd edx, mm3
04246 movd eax, mm2
04247
04248 sub esi, eax
04249 sub esi, eax
04250 inc esi
04251 inc edi
04252
04253 dec ecx
04254 jnz L10322
04255 add esi, 2
04256 add edi, 2
04257 dec edx
04258 jnz L10320
04259
04260 emms
04261 popa
04262 }
04263 #else
04264 asm volatile
04265 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
04266 "xor %%ebx, %%ebx \n\t"
04267 "mov %5, %%bl \n\t"
04268 "mov %4, %%edx \n\t"
04269 "movq (%%edx), %%mm5 \n\t"
04270 "add $8, %%edx \n\t"
04271 "movq (%%edx), %%mm6 \n\t"
04272 "add $8, %%edx \n\t"
04273 "movq (%%edx), %%mm7 \n\t"
04274
04275 "mov %3, %%eax \n\t"
04276 "mov %1, %%esi \n\t"
04277 "mov %0, %%edi \n\t"
04278 "add %%eax, %%edi \n\t"
04279 "inc %%edi \n\t"
04280 "mov %2, %%edx \n\t"
04281 "sub $2, %%edx \n\t"
04282
04283 ".L10320: \n\t" "mov %%eax, %%ecx \n\t"
04284 "sub $2, %%ecx \n\t"
04285 ".align 16 \n\t"
04286 ".L10322: \n\t"
04287
04288 "movq (%%esi), %%mm1 \n\t"
04289 "add %%eax, %%esi \n\t"
04290 "movq (%%esi), %%mm2 \n\t"
04291 "add %%eax, %%esi \n\t"
04292 "movq (%%esi), %%mm3 \n\t"
04293 "punpcklbw %%mm0, %%mm1 \n\t"
04294 "punpcklbw %%mm0, %%mm2 \n\t"
04295 "punpcklbw %%mm0, %%mm3 \n\t"
04296 "pmullw %%mm5, %%mm1 \n\t"
04297 "pmullw %%mm6, %%mm2 \n\t"
04298 "pmullw %%mm7, %%mm3 \n\t"
04299 "paddsw %%mm2, %%mm1 \n\t"
04300 "paddsw %%mm3, %%mm1 \n\t"
04301 "movq %%mm1, %%mm2 \n\t"
04302 "psrlq $32, %%mm1 \n\t"
04303 "paddsw %%mm2, %%mm1 \n\t"
04304 "movq %%mm1, %%mm3 \n\t"
04305 "psrlq $16, %%mm1 \n\t"
04306 "paddsw %%mm3, %%mm1 \n\t"
04307
04308 "movd %%eax, %%mm2 \n\t"
04309 "movd %%edx, %%mm3 \n\t"
04310 "movd %%mm1, %%eax \n\t"
04311 "psraw $15, %%mm1 \n\t"
04312 "movd %%mm1, %%edx \n\t"
04313 "idivw %%bx \n\t"
04314 "movd %%eax, %%mm1 \n\t"
04315 "packuswb %%mm0, %%mm1 \n\t"
04316 "movd %%mm1, %%eax \n\t"
04317 "mov %%al, (%%edi) \n\t"
04318 "movd %%mm3, %%edx \n\t"
04319 "movd %%mm2, %%eax \n\t"
04320
04321 "sub %%eax, %%esi \n\t"
04322 "sub %%eax, %%esi \n\t"
04323 "inc %%esi \n\t"
04324 "inc %%edi \n\t"
04325
04326 "dec %%ecx \n\t"
04327 "jnz .L10322 \n\t"
04328 "add $2, %%esi \n\t"
04329 "add $2, %%edi \n\t"
04330 "dec %%edx \n\t"
04331 "jnz .L10320 \n\t"
04332
04333 "emms \n\t"
04334 "popa \n\t":"=m" (Dest)
04335 :"m"(Src),
04336 "m"(rows),
04337 "m"(columns),
04338 "m"(Kernel),
04339 "m"(Divisor)
04340 );
04341 #endif
04342 #endif
04343 return (0);
04344 } else {
04345
04346 return (-1);
04347 }
04348 }
04349
04364 int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04365 signed short *Kernel, unsigned char Divisor)
04366 {
04367
04368 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04369 return(-1);
04370
04371 if ((columns < 5) || (rows < 5) || (Divisor == 0))
04372 return (-1);
04373
04374 if ((SDL_imageFilterMMXdetect())) {
04375 #ifdef USE_MMX
04376 #if !defined(GCC__)
04377 __asm
04378 {
04379 pusha
04380 pxor mm0, mm0
04381 xor ebx, ebx
04382 mov bl, Divisor
04383 movd mm5, ebx
04384 mov edx, Kernel
04385 mov esi, Src
04386 mov edi, Dest
04387 add edi, 2
04388 mov eax, columns
04389 shl eax, 1
04390 add edi, eax
04391 shr eax, 1
04392 mov ebx, rows
04393 sub ebx, 4
04394
04395 L10330:
04396 mov ecx, eax
04397 sub ecx, 4
04398 align 16
04399 L10332:
04400 pxor mm7, mm7
04401 movd mm6, esi
04402
04403 movq mm1, [esi]
04404 movq mm2, mm1
04405 add esi, eax
04406 movq mm3, [edx]
04407 add edx, 8
04408 movq mm4, [edx]
04409 add edx, 8
04410 punpcklbw mm1, mm0
04411 punpckhbw mm2, mm0
04412 pmullw mm1, mm3
04413 pmullw mm2, mm4
04414 paddsw mm1, mm2
04415 paddsw mm7, mm1
04416
04417 movq mm1, [esi]
04418 movq mm2, mm1
04419 add esi, eax
04420 movq mm3, [edx]
04421 add edx, 8
04422 movq mm4, [edx]
04423 add edx, 8
04424 punpcklbw mm1, mm0
04425 punpckhbw mm2, mm0
04426 pmullw mm1, mm3
04427 pmullw mm2, mm4
04428 paddsw mm1, mm2
04429 paddsw mm7, mm1
04430
04431 movq mm1, [esi]
04432 movq mm2, mm1
04433 add esi, eax
04434 movq mm3, [edx]
04435 add edx, 8
04436 movq mm4, [edx]
04437 add edx, 8
04438 punpcklbw mm1, mm0
04439 punpckhbw mm2, mm0
04440 pmullw mm1, mm3
04441 pmullw mm2, mm4
04442 paddsw mm1, mm2
04443 paddsw mm7, mm1
04444
04445 movq mm1, [esi]
04446 movq mm2, mm1
04447 add esi, eax
04448 movq mm3, [edx]
04449 add edx, 8
04450 movq mm4, [edx]
04451 add edx, 8
04452 punpcklbw mm1, mm0
04453 punpckhbw mm2, mm0
04454 pmullw mm1, mm3
04455 pmullw mm2, mm4
04456 paddsw mm1, mm2
04457 paddsw mm7, mm1
04458
04459 movq mm1, [esi]
04460 movq mm2, mm1
04461 movq mm3, [edx]
04462 add edx, 8
04463 movq mm4, [edx]
04464 punpcklbw mm1, mm0
04465 punpckhbw mm2, mm0
04466 pmullw mm1, mm3
04467 pmullw mm2, mm4
04468 paddsw mm1, mm2
04469 paddsw mm7, mm1
04470
04471 movq mm3, mm7
04472 psrlq mm7, 32
04473 paddsw mm7, mm3
04474 movq mm2, mm7
04475 psrlq mm7, 16
04476 paddsw mm7, mm2
04477
04478 movd mm1, eax
04479 movd mm2, ebx
04480 movd mm3, edx
04481 movd eax, mm7
04482 psraw mm7, 15
04483 movd ebx, mm5
04484 movd edx, mm7
04485 idiv bx
04486 movd mm7, eax
04487 packuswb mm7, mm0
04488 movd eax, mm7
04489 mov [edi], al
04490 movd edx, mm3
04491 movd ebx, mm2
04492 movd eax, mm1
04493
04494 movd esi, mm6
04495 sub edx, 72
04496 inc esi
04497 inc edi
04498
04499 dec ecx
04500 jnz L10332
04501 add esi, 4
04502 add edi, 4
04503 dec ebx
04504 jnz L10330
04505
04506 emms
04507 popa
04508 }
04509 #else
04510 asm volatile
04511 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
04512 "xor %%ebx, %%ebx \n\t"
04513 "mov %5, %%bl \n\t"
04514 "movd %%ebx, %%mm5 \n\t"
04515 "mov %4, %%edx \n\t"
04516 "mov %1, %%esi \n\t"
04517 "mov %0, %%edi \n\t"
04518 "add $2, %%edi \n\t"
04519 "mov %3, %%eax \n\t"
04520 "shl $1, %%eax \n\t"
04521 "add %%eax, %%edi \n\t"
04522 "shr $1, %%eax \n\t"
04523 "mov %2, %%ebx \n\t"
04524 "sub $4, %%ebx \n\t"
04525
04526 ".L10330: \n\t" "mov %%eax, %%ecx \n\t"
04527 "sub $4, %%ecx \n\t"
04528 ".align 16 \n\t"
04529 ".L10332: \n\t" "pxor %%mm7, %%mm7 \n\t"
04530 "movd %%esi, %%mm6 \n\t"
04531
04532 "movq (%%esi), %%mm1 \n\t"
04533 "movq %%mm1, %%mm2 \n\t"
04534 "add %%eax, %%esi \n\t"
04535 "movq (%%edx), %%mm3 \n\t"
04536 "add $8, %%edx \n\t"
04537 "movq (%%edx), %%mm4 \n\t"
04538 "add $8, %%edx \n\t"
04539 "punpcklbw %%mm0, %%mm1 \n\t"
04540 "punpckhbw %%mm0, %%mm2 \n\t"
04541 "pmullw %%mm3, %%mm1 \n\t"
04542 "pmullw %%mm4, %%mm2 \n\t"
04543 "paddsw %%mm2, %%mm1 \n\t"
04544 "paddsw %%mm1, %%mm7 \n\t"
04545
04546 "movq (%%esi), %%mm1 \n\t"
04547 "movq %%mm1, %%mm2 \n\t"
04548 "add %%eax, %%esi \n\t"
04549 "movq (%%edx), %%mm3 \n\t"
04550 "add $8, %%edx \n\t"
04551 "movq (%%edx), %%mm4 \n\t"
04552 "add $8, %%edx \n\t"
04553 "punpcklbw %%mm0, %%mm1 \n\t"
04554 "punpckhbw %%mm0, %%mm2 \n\t"
04555 "pmullw %%mm3, %%mm1 \n\t"
04556 "pmullw %%mm4, %%mm2 \n\t"
04557 "paddsw %%mm2, %%mm1 \n\t"
04558 "paddsw %%mm1, %%mm7 \n\t"
04559
04560 "movq (%%esi), %%mm1 \n\t"
04561 "movq %%mm1, %%mm2 \n\t"
04562 "add %%eax, %%esi \n\t"
04563 "movq (%%edx), %%mm3 \n\t"
04564 "add $8, %%edx \n\t"
04565 "movq (%%edx), %%mm4 \n\t"
04566 "add $8, %%edx \n\t"
04567 "punpcklbw %%mm0, %%mm1 \n\t"
04568 "punpckhbw %%mm0, %%mm2 \n\t"
04569 "pmullw %%mm3, %%mm1 \n\t"
04570 "pmullw %%mm4, %%mm2 \n\t"
04571 "paddsw %%mm2, %%mm1 \n\t"
04572 "paddsw %%mm1, %%mm7 \n\t"
04573
04574 "movq (%%esi), %%mm1 \n\t"
04575 "movq %%mm1, %%mm2 \n\t"
04576 "add %%eax, %%esi \n\t"
04577 "movq (%%edx), %%mm3 \n\t"
04578 "add $8, %%edx \n\t"
04579 "movq (%%edx), %%mm4 \n\t"
04580 "add $8, %%edx \n\t"
04581 "punpcklbw %%mm0, %%mm1 \n\t"
04582 "punpckhbw %%mm0, %%mm2 \n\t"
04583 "pmullw %%mm3, %%mm1 \n\t"
04584 "pmullw %%mm4, %%mm2 \n\t"
04585 "paddsw %%mm2, %%mm1 \n\t"
04586 "paddsw %%mm1, %%mm7 \n\t"
04587
04588 "movq (%%esi), %%mm1 \n\t"
04589 "movq %%mm1, %%mm2 \n\t"
04590 "movq (%%edx), %%mm3 \n\t"
04591 "add $8, %%edx \n\t"
04592 "movq (%%edx), %%mm4 \n\t"
04593 "punpcklbw %%mm0, %%mm1 \n\t"
04594 "punpckhbw %%mm0, %%mm2 \n\t"
04595 "pmullw %%mm3, %%mm1 \n\t"
04596 "pmullw %%mm4, %%mm2 \n\t"
04597 "paddsw %%mm2, %%mm1 \n\t"
04598 "paddsw %%mm1, %%mm7 \n\t"
04599
04600 "movq %%mm7, %%mm3 \n\t"
04601 "psrlq $32, %%mm7 \n\t"
04602 "paddsw %%mm3, %%mm7 \n\t"
04603 "movq %%mm7, %%mm2 \n\t"
04604 "psrlq $16, %%mm7 \n\t"
04605 "paddsw %%mm2, %%mm7 \n\t"
04606
04607 "movd %%eax, %%mm1 \n\t"
04608 "movd %%ebx, %%mm2 \n\t"
04609 "movd %%edx, %%mm3 \n\t"
04610 "movd %%mm7, %%eax \n\t"
04611 "psraw $15, %%mm7 \n\t"
04612 "movd %%mm5, %%ebx \n\t"
04613 "movd %%mm7, %%edx \n\t"
04614 "idivw %%bx \n\t"
04615 "movd %%eax, %%mm7 \n\t"
04616 "packuswb %%mm0, %%mm7 \n\t"
04617 "movd %%mm7, %%eax \n\t"
04618 "mov %%al, (%%edi) \n\t"
04619 "movd %%mm3, %%edx \n\t"
04620 "movd %%mm2, %%ebx \n\t"
04621 "movd %%mm1, %%eax \n\t"
04622
04623 "movd %%mm6, %%esi \n\t"
04624 "sub $72, %%edx \n\t"
04625 "inc %%esi \n\t"
04626 "inc %%edi \n\t"
04627
04628 "dec %%ecx \n\t"
04629 "jnz .L10332 \n\t"
04630 "add $4, %%esi \n\t"
04631 "add $4, %%edi \n\t"
04632 "dec %%ebx \n\t"
04633 "jnz .L10330 \n\t"
04634
04635 "emms \n\t"
04636 "popa \n\t":"=m" (Dest)
04637 :"m"(Src),
04638 "m"(rows),
04639 "m"(columns),
04640 "m"(Kernel),
04641 "m"(Divisor)
04642 );
04643 #endif
04644 #endif
04645 return (0);
04646 } else {
04647
04648 return (-1);
04649 }
04650 }
04651
04666 int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
04667 signed short *Kernel, unsigned char Divisor)
04668 {
04669
04670 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
04671 return(-1);
04672
04673 if ((columns < 7) || (rows < 7) || (Divisor == 0))
04674 return (-1);
04675
04676 if ((SDL_imageFilterMMXdetect())) {
04677 #ifdef USE_MMX
04678 #if !defined(GCC__)
04679 __asm
04680 {
04681 pusha
04682 pxor mm0, mm0
04683 xor ebx, ebx
04684 mov bl, Divisor
04685 movd mm5, ebx
04686 mov edx, Kernel
04687 mov esi, Src
04688 mov edi, Dest
04689 add edi, 3
04690 mov eax, columns
04691 add edi, eax
04692 add edi, eax
04693 add edi, eax
04694 mov ebx, rows
04695 sub ebx, 6
04696
04697 L10340:
04698 mov ecx, eax
04699 sub ecx, 6
04700 align 16
04701 L10342:
04702 pxor mm7, mm7
04703 movd mm6, esi
04704
04705 movq mm1, [esi]
04706 movq mm2, mm1
04707 add esi, eax
04708 movq mm3, [edx]
04709 add edx, 8
04710 movq mm4, [edx]
04711 add edx, 8
04712 punpcklbw mm1, mm0
04713 punpckhbw mm2, mm0
04714 pmullw mm1, mm3
04715 pmullw mm2, mm4
04716 paddsw mm1, mm2
04717 paddsw mm7, mm1
04718
04719 movq mm1, [esi]
04720 movq mm2, mm1
04721 add esi, eax
04722 movq mm3, [edx]
04723 add edx, 8
04724 movq mm4, [edx]
04725 add edx, 8
04726 punpcklbw mm1, mm0
04727 punpckhbw mm2, mm0
04728 pmullw mm1, mm3
04729 pmullw mm2, mm4
04730 paddsw mm1, mm2
04731 paddsw mm7, mm1
04732
04733 movq mm1, [esi]
04734 movq mm2, mm1
04735 add esi, eax
04736 movq mm3, [edx]
04737 add edx, 8
04738 movq mm4, [edx]
04739 add edx, 8
04740 punpcklbw mm1, mm0
04741 punpckhbw mm2, mm0
04742 pmullw mm1, mm3
04743 pmullw mm2, mm4
04744 paddsw mm1, mm2
04745 paddsw mm7, mm1
04746
04747 movq mm1, [esi]
04748 movq mm2, mm1
04749 add esi, eax
04750 movq mm3, [edx]
04751 add edx, 8
04752 movq mm4, [edx]
04753 add edx, 8
04754 punpcklbw mm1, mm0
04755 punpckhbw mm2, mm0
04756 pmullw mm1, mm3
04757 pmullw mm2, mm4
04758 paddsw mm1, mm2
04759 paddsw mm7, mm1
04760
04761 movq mm1, [esi]
04762 movq mm2, mm1
04763 add esi, eax
04764 movq mm3, [edx]
04765 add edx, 8
04766 movq mm4, [edx]
04767 add edx, 8
04768 punpcklbw mm1, mm0
04769 punpckhbw mm2, mm0
04770 pmullw mm1, mm3
04771 pmullw mm2, mm4
04772 paddsw mm1, mm2
04773 paddsw mm7, mm1
04774
04775 movq mm1, [esi]
04776 movq mm2, mm1
04777 add esi, eax
04778 movq mm3, [edx]
04779 add edx, 8
04780 movq mm4, [edx]
04781 add edx, 8
04782 punpcklbw mm1, mm0
04783 punpckhbw mm2, mm0
04784 pmullw mm1, mm3
04785 pmullw mm2, mm4
04786 paddsw mm1, mm2
04787 paddsw mm7, mm1
04788
04789 movq mm1, [esi]
04790 movq mm2, mm1
04791 movq mm3, [edx]
04792 add edx, 8
04793 movq mm4, [edx]
04794 punpcklbw mm1, mm0
04795 punpckhbw mm2, mm0
04796 pmullw mm1, mm3
04797 pmullw mm2, mm4
04798 paddsw mm1, mm2
04799 paddsw mm7, mm1
04800
04801 movq mm3, mm7
04802 psrlq mm7, 32
04803 paddsw mm7, mm3
04804 movq mm2, mm7
04805 psrlq mm7, 16
04806 paddsw mm7, mm2
04807
04808 movd mm1, eax
04809 movd mm2, ebx
04810 movd mm3, edx
04811 movd eax, mm7
04812 psraw mm7, 15
04813 movd ebx, mm5
04814 movd edx, mm7
04815 idiv bx
04816 movd mm7, eax
04817 packuswb mm7, mm0
04818 movd eax, mm7
04819 mov [edi], al
04820 movd edx, mm3
04821 movd ebx, mm2
04822 movd eax, mm1
04823
04824 movd esi, mm6
04825 sub edx, 104
04826 inc esi
04827 inc edi
04828
04829 dec ecx
04830 jnz L10342
04831 add esi, 6
04832 add edi, 6
04833 dec ebx
04834 jnz L10340
04835
04836 emms
04837 popa
04838 }
04839 #else
04840 asm volatile
04841 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
04842 "xor %%ebx, %%ebx \n\t"
04843 "mov %5, %%bl \n\t"
04844 "movd %%ebx, %%mm5 \n\t"
04845 "mov %4, %%edx \n\t"
04846 "mov %1, %%esi \n\t"
04847 "mov %0, %%edi \n\t"
04848 "add $3, %%edi \n\t"
04849 "mov %3, %%eax \n\t"
04850 "add %%eax, %%edi \n\t"
04851 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
04852 "sub $6, %%ebx \n\t"
04853
04854 ".L10340: \n\t" "mov %%eax, %%ecx \n\t"
04855 "sub $6, %%ecx \n\t"
04856 ".align 16 \n\t"
04857 ".L10342: \n\t" "pxor %%mm7, %%mm7 \n\t"
04858 "movd %%esi, %%mm6 \n\t"
04859
04860 "movq (%%esi), %%mm1 \n\t"
04861 "movq %%mm1, %%mm2 \n\t"
04862 "add %%eax, %%esi \n\t"
04863 "movq (%%edx), %%mm3 \n\t"
04864 "add $8, %%edx \n\t"
04865 "movq (%%edx), %%mm4 \n\t"
04866 "add $8, %%edx \n\t"
04867 "punpcklbw %%mm0, %%mm1 \n\t"
04868 "punpckhbw %%mm0, %%mm2 \n\t"
04869 "pmullw %%mm3, %%mm1 \n\t"
04870 "pmullw %%mm4, %%mm2 \n\t"
04871 "paddsw %%mm2, %%mm1 \n\t"
04872 "paddsw %%mm1, %%mm7 \n\t"
04873
04874 "movq (%%esi), %%mm1 \n\t"
04875 "movq %%mm1, %%mm2 \n\t"
04876 "add %%eax, %%esi \n\t"
04877 "movq (%%edx), %%mm3 \n\t"
04878 "add $8, %%edx \n\t"
04879 "movq (%%edx), %%mm4 \n\t"
04880 "add $8, %%edx \n\t"
04881 "punpcklbw %%mm0, %%mm1 \n\t"
04882 "punpckhbw %%mm0, %%mm2 \n\t"
04883 "pmullw %%mm3, %%mm1 \n\t"
04884 "pmullw %%mm4, %%mm2 \n\t"
04885 "paddsw %%mm2, %%mm1 \n\t"
04886 "paddsw %%mm1, %%mm7 \n\t"
04887
04888 "movq (%%esi), %%mm1 \n\t"
04889 "movq %%mm1, %%mm2 \n\t"
04890 "add %%eax, %%esi \n\t"
04891 "movq (%%edx), %%mm3 \n\t"
04892 "add $8, %%edx \n\t"
04893 "movq (%%edx), %%mm4 \n\t"
04894 "add $8, %%edx \n\t"
04895 "punpcklbw %%mm0, %%mm1 \n\t"
04896 "punpckhbw %%mm0, %%mm2 \n\t"
04897 "pmullw %%mm3, %%mm1 \n\t"
04898 "pmullw %%mm4, %%mm2 \n\t"
04899 "paddsw %%mm2, %%mm1 \n\t"
04900 "paddsw %%mm1, %%mm7 \n\t"
04901
04902 "movq (%%esi), %%mm1 \n\t"
04903 "movq %%mm1, %%mm2 \n\t"
04904 "add %%eax, %%esi \n\t"
04905 "movq (%%edx), %%mm3 \n\t"
04906 "add $8, %%edx \n\t"
04907 "movq (%%edx), %%mm4 \n\t"
04908 "add $8, %%edx \n\t"
04909 "punpcklbw %%mm0, %%mm1 \n\t"
04910 "punpckhbw %%mm0, %%mm2 \n\t"
04911 "pmullw %%mm3, %%mm1 \n\t"
04912 "pmullw %%mm4, %%mm2 \n\t"
04913 "paddsw %%mm2, %%mm1 \n\t"
04914 "paddsw %%mm1, %%mm7 \n\t"
04915
04916 "movq (%%esi), %%mm1 \n\t"
04917 "movq %%mm1, %%mm2 \n\t"
04918 "add %%eax, %%esi \n\t"
04919 "movq (%%edx), %%mm3 \n\t"
04920 "add $8, %%edx \n\t"
04921 "movq (%%edx), %%mm4 \n\t"
04922 "add $8, %%edx \n\t"
04923 "punpcklbw %%mm0, %%mm1 \n\t"
04924 "punpckhbw %%mm0, %%mm2 \n\t"
04925 "pmullw %%mm3, %%mm1 \n\t"
04926 "pmullw %%mm4, %%mm2 \n\t"
04927 "paddsw %%mm2, %%mm1 \n\t"
04928 "paddsw %%mm1, %%mm7 \n\t"
04929
04930 "movq (%%esi), %%mm1 \n\t"
04931 "movq %%mm1, %%mm2 \n\t"
04932 "add %%eax, %%esi \n\t"
04933 "movq (%%edx), %%mm3 \n\t"
04934 "add $8, %%edx \n\t"
04935 "movq (%%edx), %%mm4 \n\t"
04936 "add $8, %%edx \n\t"
04937 "punpcklbw %%mm0, %%mm1 \n\t"
04938 "punpckhbw %%mm0, %%mm2 \n\t"
04939 "pmullw %%mm3, %%mm1 \n\t"
04940 "pmullw %%mm4, %%mm2 \n\t"
04941 "paddsw %%mm2, %%mm1 \n\t"
04942 "paddsw %%mm1, %%mm7 \n\t"
04943
04944 "movq (%%esi), %%mm1 \n\t"
04945 "movq %%mm1, %%mm2 \n\t"
04946 "movq (%%edx), %%mm3 \n\t"
04947 "add $8, %%edx \n\t"
04948 "movq (%%edx), %%mm4 \n\t"
04949 "punpcklbw %%mm0, %%mm1 \n\t"
04950 "punpckhbw %%mm0, %%mm2 \n\t"
04951 "pmullw %%mm3, %%mm1 \n\t"
04952 "pmullw %%mm4, %%mm2 \n\t"
04953 "paddsw %%mm2, %%mm1 \n\t"
04954 "paddsw %%mm1, %%mm7 \n\t"
04955
04956 "movq %%mm7, %%mm3 \n\t"
04957 "psrlq $32, %%mm7 \n\t"
04958 "paddsw %%mm3, %%mm7 \n\t"
04959 "movq %%mm7, %%mm2 \n\t"
04960 "psrlq $16, %%mm7 \n\t"
04961 "paddsw %%mm2, %%mm7 \n\t"
04962
04963 "movd %%eax, %%mm1 \n\t"
04964 "movd %%ebx, %%mm2 \n\t"
04965 "movd %%edx, %%mm3 \n\t"
04966 "movd %%mm7, %%eax \n\t"
04967 "psraw $15, %%mm7 \n\t"
04968 "movd %%mm5, %%ebx \n\t"
04969 "movd %%mm7, %%edx \n\t"
04970 "idivw %%bx \n\t"
04971 "movd %%eax, %%mm7 \n\t"
04972 "packuswb %%mm0, %%mm7 \n\t"
04973 "movd %%mm7, %%eax \n\t"
04974 "mov %%al, (%%edi) \n\t"
04975 "movd %%mm3, %%edx \n\t"
04976 "movd %%mm2, %%ebx \n\t"
04977 "movd %%mm1, %%eax \n\t"
04978
04979 "movd %%mm6, %%esi \n\t"
04980 "sub $104, %%edx \n\t"
04981 "inc %%esi \n\t"
04982 "inc %%edi \n\t"
04983
04984 "dec %%ecx \n\t"
04985 "jnz .L10342 \n\t"
04986 "add $6, %%esi \n\t"
04987 "add $6, %%edi \n\t"
04988 "dec %%ebx \n\t"
04989 "jnz .L10340 \n\t"
04990
04991 "emms \n\t"
04992 "popa \n\t":"=m" (Dest)
04993 :"m"(Src),
04994 "m"(rows),
04995 "m"(columns),
04996 "m"(Kernel),
04997 "m"(Divisor)
04998 );
04999 #endif
05000 #endif
05001 return (0);
05002 } else {
05003
05004 return (-1);
05005 }
05006 }
05007
05022 int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05023 signed short *Kernel, unsigned char Divisor)
05024 {
05025
05026 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05027 return(-1);
05028
05029 if ((columns < 9) || (rows < 9) || (Divisor == 0))
05030 return (-1);
05031
05032 if ((SDL_imageFilterMMXdetect())) {
05033 #ifdef USE_MMX
05034 #if !defined(GCC__)
05035 __asm
05036 {
05037 pusha
05038 pxor mm0, mm0
05039 xor ebx, ebx
05040 mov bl, Divisor
05041 movd mm5, ebx
05042 mov edx, Kernel
05043 mov esi, Src
05044 mov edi, Dest
05045 add edi, 4
05046 mov eax, columns
05047 add edi, eax
05048 add edi, eax
05049 add edi, eax
05050 add edi, eax
05051 mov ebx, rows
05052 sub ebx, 8
05053
05054 L10350:
05055 mov ecx, eax
05056 sub ecx, 8
05057 align 16
05058 L10352:
05059 pxor mm7, mm7
05060 movd mm6, esi
05061
05062 movq mm1, [esi]
05063 movq mm2, mm1
05064 inc esi
05065 movq mm3, [edx]
05066 add edx, 8
05067 movq mm4, [edx]
05068 add edx, 8
05069 punpcklbw mm1, mm0
05070 punpckhbw mm2, mm0
05071 pmullw mm1, mm3
05072 pmullw mm2, mm4
05073 paddsw mm1, mm2
05074 paddsw mm7, mm1
05075 movq mm1, [esi]
05076 dec esi
05077 add esi, eax
05078 movq mm3, [edx]
05079 add edx, 8
05080 punpcklbw mm1, mm0
05081 pmullw mm1, mm3
05082 paddsw mm7, mm1
05083
05084 movq mm1, [esi]
05085 movq mm2, mm1
05086 inc esi
05087 movq mm3, [edx]
05088 add edx, 8
05089 movq mm4, [edx]
05090 add edx, 8
05091 punpcklbw mm1, mm0
05092 punpckhbw mm2, mm0
05093 pmullw mm1, mm3
05094 pmullw mm2, mm4
05095 paddsw mm1, mm2
05096 paddsw mm7, mm1
05097 movq mm1, [esi]
05098 dec esi
05099 add esi, eax
05100 movq mm3, [edx]
05101 add edx, 8
05102 punpcklbw mm1, mm0
05103 pmullw mm1, mm3
05104 paddsw mm7, mm1
05105
05106 movq mm1, [esi]
05107 movq mm2, mm1
05108 inc esi
05109 movq mm3, [edx]
05110 add edx, 8
05111 movq mm4, [edx]
05112 add edx, 8
05113 punpcklbw mm1, mm0
05114 punpckhbw mm2, mm0
05115 pmullw mm1, mm3
05116 pmullw mm2, mm4
05117 paddsw mm1, mm2
05118 paddsw mm7, mm1
05119 movq mm1, [esi]
05120 dec esi
05121 add esi, eax
05122 movq mm3, [edx]
05123 add edx, 8
05124 punpcklbw mm1, mm0
05125 pmullw mm1, mm3
05126 paddsw mm7, mm1
05127
05128 movq mm1, [esi]
05129 movq mm2, mm1
05130 inc esi
05131 movq mm3, [edx]
05132 add edx, 8
05133 movq mm4, [edx]
05134 add edx, 8
05135 punpcklbw mm1, mm0
05136 punpckhbw mm2, mm0
05137 pmullw mm1, mm3
05138 pmullw mm2, mm4
05139 paddsw mm1, mm2
05140 paddsw mm7, mm1
05141 movq mm1, [esi]
05142 dec esi
05143 add esi, eax
05144 movq mm3, [edx]
05145 add edx, 8
05146 punpcklbw mm1, mm0
05147 pmullw mm1, mm3
05148 paddsw mm7, mm1
05149
05150 movq mm1, [esi]
05151 movq mm2, mm1
05152 inc esi
05153 movq mm3, [edx]
05154 add edx, 8
05155 movq mm4, [edx]
05156 add edx, 8
05157 punpcklbw mm1, mm0
05158 punpckhbw mm2, mm0
05159 pmullw mm1, mm3
05160 pmullw mm2, mm4
05161 paddsw mm1, mm2
05162 paddsw mm7, mm1
05163 movq mm1, [esi]
05164 dec esi
05165 add esi, eax
05166 movq mm3, [edx]
05167 add edx, 8
05168 punpcklbw mm1, mm0
05169 pmullw mm1, mm3
05170 paddsw mm7, mm1
05171
05172 movq mm1, [esi]
05173 movq mm2, mm1
05174 inc esi
05175 movq mm3, [edx]
05176 add edx, 8
05177 movq mm4, [edx]
05178 add edx, 8
05179 punpcklbw mm1, mm0
05180 punpckhbw mm2, mm0
05181 pmullw mm1, mm3
05182 pmullw mm2, mm4
05183 paddsw mm1, mm2
05184 paddsw mm7, mm1
05185 movq mm1, [esi]
05186 dec esi
05187 add esi, eax
05188 movq mm3, [edx]
05189 add edx, 8
05190 punpcklbw mm1, mm0
05191 pmullw mm1, mm3
05192 paddsw mm7, mm1
05193
05194 movq mm1, [esi]
05195 movq mm2, mm1
05196 inc esi
05197 movq mm3, [edx]
05198 add edx, 8
05199 movq mm4, [edx]
05200 add edx, 8
05201 punpcklbw mm1, mm0
05202 punpckhbw mm2, mm0
05203 pmullw mm1, mm3
05204 pmullw mm2, mm4
05205 paddsw mm1, mm2
05206 paddsw mm7, mm1
05207 movq mm1, [esi]
05208 dec esi
05209 add esi, eax
05210 movq mm3, [edx]
05211 add edx, 8
05212 punpcklbw mm1, mm0
05213 pmullw mm1, mm3
05214 paddsw mm7, mm1
05215
05216 movq mm1, [esi]
05217 movq mm2, mm1
05218 inc esi
05219 movq mm3, [edx]
05220 add edx, 8
05221 movq mm4, [edx]
05222 add edx, 8
05223 punpcklbw mm1, mm0
05224 punpckhbw mm2, mm0
05225 pmullw mm1, mm3
05226 pmullw mm2, mm4
05227 paddsw mm1, mm2
05228 paddsw mm7, mm1
05229 movq mm1, [esi]
05230 dec esi
05231 add esi, eax
05232 movq mm3, [edx]
05233 add edx, 8
05234 punpcklbw mm1, mm0
05235 pmullw mm1, mm3
05236 paddsw mm7, mm1
05237
05238 movq mm1, [esi]
05239 movq mm2, mm1
05240 inc esi
05241 movq mm3, [edx]
05242 add edx, 8
05243 movq mm4, [edx]
05244 add edx, 8
05245 punpcklbw mm1, mm0
05246 punpckhbw mm2, mm0
05247 pmullw mm1, mm3
05248 pmullw mm2, mm4
05249 paddsw mm1, mm2
05250 paddsw mm7, mm1
05251 movq mm1, [esi]
05252 movq mm3, [edx]
05253 punpcklbw mm1, mm0
05254 pmullw mm1, mm3
05255 paddsw mm7, mm1
05256
05257 movq mm3, mm7
05258 psrlq mm7, 32
05259 paddsw mm7, mm3
05260 movq mm2, mm7
05261 psrlq mm7, 16
05262 paddsw mm7, mm2
05263
05264 movd mm1, eax
05265 movd mm2, ebx
05266 movd mm3, edx
05267 movd eax, mm7
05268 psraw mm7, 15
05269 movd ebx, mm5
05270 movd edx, mm7
05271 idiv bx
05272 movd mm7, eax
05273 packuswb mm7, mm0
05274 movd eax, mm7
05275 mov [edi], al
05276 movd edx, mm3
05277 movd ebx, mm2
05278 movd eax, mm1
05279
05280 movd esi, mm6
05281 sub edx, 208
05282 inc esi
05283 inc edi
05284
05285 dec ecx
05286 jnz L10352
05287 add esi, 8
05288 add edi, 8
05289 dec ebx
05290 jnz L10350
05291
05292 emms
05293 popa
05294 }
05295 #else
05296 asm volatile
05297 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
05298 "xor %%ebx, %%ebx \n\t"
05299 "mov %5, %%bl \n\t"
05300 "movd %%ebx, %%mm5 \n\t"
05301 "mov %4, %%edx \n\t"
05302 "mov %1, %%esi \n\t"
05303 "mov %0, %%edi \n\t"
05304 "add $4, %%edi \n\t"
05305 "mov %3, %%eax \n\t"
05306 "add %%eax, %%edi \n\t"
05307 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
05308 "sub $8, %%ebx \n\t"
05309
05310 ".L10350: \n\t" "mov %%eax, %%ecx \n\t"
05311 "sub $8, %%ecx \n\t"
05312 ".align 16 \n\t"
05313 ".L10352: \n\t" "pxor %%mm7, %%mm7 \n\t"
05314 "movd %%esi, %%mm6 \n\t"
05315
05316 "movq (%%esi), %%mm1 \n\t"
05317 "movq %%mm1, %%mm2 \n\t"
05318 "inc %%esi \n\t"
05319 "movq (%%edx), %%mm3 \n\t"
05320 "add $8, %%edx \n\t"
05321 "movq (%%edx), %%mm4 \n\t"
05322 "add $8, %%edx \n\t"
05323 "punpcklbw %%mm0, %%mm1 \n\t"
05324 "punpckhbw %%mm0, %%mm2 \n\t"
05325 "pmullw %%mm3, %%mm1 \n\t"
05326 "pmullw %%mm4, %%mm2 \n\t"
05327 "paddsw %%mm2, %%mm1 \n\t"
05328 "paddsw %%mm1, %%mm7 \n\t"
05329 "movq (%%esi), %%mm1 \n\t"
05330 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05331 "movq (%%edx), %%mm3 \n\t"
05332 "add $8, %%edx \n\t"
05333 "punpcklbw %%mm0, %%mm1 \n\t"
05334 "pmullw %%mm3, %%mm1 \n\t"
05335 "paddsw %%mm1, %%mm7 \n\t"
05336
05337 "movq (%%esi), %%mm1 \n\t"
05338 "movq %%mm1, %%mm2 \n\t"
05339 "inc %%esi \n\t"
05340 "movq (%%edx), %%mm3 \n\t"
05341 "add $8, %%edx \n\t"
05342 "movq (%%edx), %%mm4 \n\t"
05343 "add $8, %%edx \n\t"
05344 "punpcklbw %%mm0, %%mm1 \n\t"
05345 "punpckhbw %%mm0, %%mm2 \n\t"
05346 "pmullw %%mm3, %%mm1 \n\t"
05347 "pmullw %%mm4, %%mm2 \n\t"
05348 "paddsw %%mm2, %%mm1 \n\t"
05349 "paddsw %%mm1, %%mm7 \n\t"
05350 "movq (%%esi), %%mm1 \n\t"
05351 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05352 "movq (%%edx), %%mm3 \n\t"
05353 "add $8, %%edx \n\t"
05354 "punpcklbw %%mm0, %%mm1 \n\t"
05355 "pmullw %%mm3, %%mm1 \n\t"
05356 "paddsw %%mm1, %%mm7 \n\t"
05357
05358 "movq (%%esi), %%mm1 \n\t"
05359 "movq %%mm1, %%mm2 \n\t"
05360 "inc %%esi \n\t"
05361 "movq (%%edx), %%mm3 \n\t"
05362 "add $8, %%edx \n\t"
05363 "movq (%%edx), %%mm4 \n\t"
05364 "add $8, %%edx \n\t"
05365 "punpcklbw %%mm0, %%mm1 \n\t"
05366 "punpckhbw %%mm0, %%mm2 \n\t"
05367 "pmullw %%mm3, %%mm1 \n\t"
05368 "pmullw %%mm4, %%mm2 \n\t"
05369 "paddsw %%mm2, %%mm1 \n\t"
05370 "paddsw %%mm1, %%mm7 \n\t"
05371 "movq (%%esi), %%mm1 \n\t"
05372 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05373 "movq (%%edx), %%mm3 \n\t"
05374 "add $8, %%edx \n\t"
05375 "punpcklbw %%mm0, %%mm1 \n\t"
05376 "pmullw %%mm3, %%mm1 \n\t"
05377 "paddsw %%mm1, %%mm7 \n\t"
05378
05379 "movq (%%esi), %%mm1 \n\t"
05380 "movq %%mm1, %%mm2 \n\t"
05381 "inc %%esi \n\t"
05382 "movq (%%edx), %%mm3 \n\t"
05383 "add $8, %%edx \n\t"
05384 "movq (%%edx), %%mm4 \n\t"
05385 "add $8, %%edx \n\t"
05386 "punpcklbw %%mm0, %%mm1 \n\t"
05387 "punpckhbw %%mm0, %%mm2 \n\t"
05388 "pmullw %%mm3, %%mm1 \n\t"
05389 "pmullw %%mm4, %%mm2 \n\t"
05390 "paddsw %%mm2, %%mm1 \n\t"
05391 "paddsw %%mm1, %%mm7 \n\t"
05392 "movq (%%esi), %%mm1 \n\t"
05393 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05394 "movq (%%edx), %%mm3 \n\t"
05395 "add $8, %%edx \n\t"
05396 "punpcklbw %%mm0, %%mm1 \n\t"
05397 "pmullw %%mm3, %%mm1 \n\t"
05398 "paddsw %%mm1, %%mm7 \n\t"
05399
05400 "movq (%%esi), %%mm1 \n\t"
05401 "movq %%mm1, %%mm2 \n\t"
05402 "inc %%esi \n\t"
05403 "movq (%%edx), %%mm3 \n\t"
05404 "add $8, %%edx \n\t"
05405 "movq (%%edx), %%mm4 \n\t"
05406 "add $8, %%edx \n\t"
05407 "punpcklbw %%mm0, %%mm1 \n\t"
05408 "punpckhbw %%mm0, %%mm2 \n\t"
05409 "pmullw %%mm3, %%mm1 \n\t"
05410 "pmullw %%mm4, %%mm2 \n\t"
05411 "paddsw %%mm2, %%mm1 \n\t"
05412 "paddsw %%mm1, %%mm7 \n\t"
05413 "movq (%%esi), %%mm1 \n\t"
05414 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05415 "movq (%%edx), %%mm3 \n\t"
05416 "add $8, %%edx \n\t"
05417 "punpcklbw %%mm0, %%mm1 \n\t"
05418 "pmullw %%mm3, %%mm1 \n\t"
05419 "paddsw %%mm1, %%mm7 \n\t"
05420
05421 "movq (%%esi), %%mm1 \n\t"
05422 "movq %%mm1, %%mm2 \n\t"
05423 "inc %%esi \n\t"
05424 "movq (%%edx), %%mm3 \n\t"
05425 "add $8, %%edx \n\t"
05426 "movq (%%edx), %%mm4 \n\t"
05427 "add $8, %%edx \n\t"
05428 "punpcklbw %%mm0, %%mm1 \n\t"
05429 "punpckhbw %%mm0, %%mm2 \n\t"
05430 "pmullw %%mm3, %%mm1 \n\t"
05431 "pmullw %%mm4, %%mm2 \n\t"
05432 "paddsw %%mm2, %%mm1 \n\t"
05433 "paddsw %%mm1, %%mm7 \n\t"
05434 "movq (%%esi), %%mm1 \n\t"
05435 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05436 "movq (%%edx), %%mm3 \n\t"
05437 "add $8, %%edx \n\t"
05438 "punpcklbw %%mm0, %%mm1 \n\t"
05439 "pmullw %%mm3, %%mm1 \n\t"
05440 "paddsw %%mm1, %%mm7 \n\t"
05441
05442 "movq (%%esi), %%mm1 \n\t"
05443 "movq %%mm1, %%mm2 \n\t"
05444 "inc %%esi \n\t"
05445 "movq (%%edx), %%mm3 \n\t"
05446 "add $8, %%edx \n\t"
05447 "movq (%%edx), %%mm4 \n\t"
05448 "add $8, %%edx \n\t"
05449 "punpcklbw %%mm0, %%mm1 \n\t"
05450 "punpckhbw %%mm0, %%mm2 \n\t"
05451 "pmullw %%mm3, %%mm1 \n\t"
05452 "pmullw %%mm4, %%mm2 \n\t"
05453 "paddsw %%mm2, %%mm1 \n\t"
05454 "paddsw %%mm1, %%mm7 \n\t"
05455 "movq (%%esi), %%mm1 \n\t"
05456 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05457 "movq (%%edx), %%mm3 \n\t"
05458 "add $8, %%edx \n\t"
05459 "punpcklbw %%mm0, %%mm1 \n\t"
05460 "pmullw %%mm3, %%mm1 \n\t"
05461 "paddsw %%mm1, %%mm7 \n\t"
05462
05463 "movq (%%esi), %%mm1 \n\t"
05464 "movq %%mm1, %%mm2 \n\t"
05465 "inc %%esi \n\t"
05466 "movq (%%edx), %%mm3 \n\t"
05467 "add $8, %%edx \n\t"
05468 "movq (%%edx), %%mm4 \n\t"
05469 "add $8, %%edx \n\t"
05470 "punpcklbw %%mm0, %%mm1 \n\t"
05471 "punpckhbw %%mm0, %%mm2 \n\t"
05472 "pmullw %%mm3, %%mm1 \n\t"
05473 "pmullw %%mm4, %%mm2 \n\t"
05474 "paddsw %%mm2, %%mm1 \n\t"
05475 "paddsw %%mm1, %%mm7 \n\t"
05476 "movq (%%esi), %%mm1 \n\t"
05477 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
05478 "movq (%%edx), %%mm3 \n\t"
05479 "add $8, %%edx \n\t"
05480 "punpcklbw %%mm0, %%mm1 \n\t"
05481 "pmullw %%mm3, %%mm1 \n\t"
05482 "paddsw %%mm1, %%mm7 \n\t"
05483
05484 "movq (%%esi), %%mm1 \n\t"
05485 "movq %%mm1, %%mm2 \n\t"
05486 "inc %%esi \n\t"
05487 "movq (%%edx), %%mm3 \n\t"
05488 "add $8, %%edx \n\t"
05489 "movq (%%edx), %%mm4 \n\t"
05490 "add $8, %%edx \n\t"
05491 "punpcklbw %%mm0, %%mm1 \n\t"
05492 "punpckhbw %%mm0, %%mm2 \n\t"
05493 "pmullw %%mm3, %%mm1 \n\t"
05494 "pmullw %%mm4, %%mm2 \n\t"
05495 "paddsw %%mm2, %%mm1 \n\t"
05496 "paddsw %%mm1, %%mm7 \n\t"
05497 "movq (%%esi), %%mm1 \n\t"
05498 "movq (%%edx), %%mm3 \n\t"
05499 "punpcklbw %%mm0, %%mm1 \n\t"
05500 "pmullw %%mm3, %%mm1 \n\t"
05501 "paddsw %%mm1, %%mm7 \n\t"
05502
05503 "movq %%mm7, %%mm3 \n\t"
05504 "psrlq $32, %%mm7 \n\t"
05505 "paddsw %%mm3, %%mm7 \n\t"
05506 "movq %%mm7, %%mm2 \n\t"
05507 "psrlq $16, %%mm7 \n\t"
05508 "paddsw %%mm2, %%mm7 \n\t"
05509
05510 "movd %%eax, %%mm1 \n\t"
05511 "movd %%ebx, %%mm2 \n\t"
05512 "movd %%edx, %%mm3 \n\t"
05513 "movd %%mm7, %%eax \n\t"
05514 "psraw $15, %%mm7 \n\t"
05515 "movd %%mm5, %%ebx \n\t"
05516 "movd %%mm7, %%edx \n\t"
05517 "idivw %%bx \n\t"
05518 "movd %%eax, %%mm7 \n\t"
05519 "packuswb %%mm0, %%mm7 \n\t"
05520 "movd %%mm7, %%eax \n\t"
05521 "mov %%al, (%%edi) \n\t"
05522 "movd %%mm3, %%edx \n\t"
05523 "movd %%mm2, %%ebx \n\t"
05524 "movd %%mm1, %%eax \n\t"
05525
05526 "movd %%mm6, %%esi \n\t"
05527 "sub $208, %%edx \n\t"
05528 "inc %%esi \n\t"
05529 "inc %%edi \n\t"
05530
05531 "dec %%ecx \n\t"
05532 "jnz .L10352 \n\t"
05533 "add $8, %%esi \n\t"
05534 "add $8, %%edi \n\t"
05535 "dec %%ebx \n\t"
05536 "jnz .L10350 \n\t"
05537
05538 "emms \n\t"
05539 "popa \n\t":"=m" (Dest)
05540 :"m"(Src),
05541 "m"(rows),
05542 "m"(columns),
05543 "m"(Kernel),
05544 "m"(Divisor)
05545 );
05546 #endif
05547 #endif
05548 return (0);
05549 } else {
05550
05551 return (-1);
05552 }
05553 }
05554
05569 int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05570 signed short *Kernel, unsigned char NRightShift)
05571 {
05572
05573 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05574 return(-1);
05575
05576 if ((columns < 3) || (rows < 3) || (NRightShift > 7))
05577 return (-1);
05578
05579 if ((SDL_imageFilterMMXdetect())) {
05580 #ifdef USE_MMX
05581 #if !defined(GCC__)
05582 __asm
05583 {
05584 pusha
05585 pxor mm0, mm0
05586 xor ebx, ebx
05587 mov bl, NRightShift
05588 movd mm4, ebx
05589 mov edx, Kernel
05590 movq mm5, [edx]
05591 add edx, 8
05592 movq mm6, [edx]
05593 add edx, 8
05594 movq mm7, [edx]
05595
05596 mov eax, columns
05597 mov esi, Src
05598 mov edi, Dest
05599 add edi, eax
05600 inc edi
05601 mov edx, rows
05602 sub edx, 2
05603
05604 L10360:
05605 mov ecx, eax
05606 sub ecx, 2
05607 align 16
05608 L10362:
05609
05610 movq mm1, [esi]
05611 add esi, eax
05612 movq mm2, [esi]
05613 add esi, eax
05614 movq mm3, [esi]
05615 punpcklbw mm1, mm0
05616 punpcklbw mm2, mm0
05617 punpcklbw mm3, mm0
05618 psrlw mm1, mm4
05619 psrlw mm2, mm4
05620 psrlw mm3, mm4
05621 pmullw mm1, mm5
05622 pmullw mm2, mm6
05623 pmullw mm3, mm7
05624 paddsw mm1, mm2
05625 paddsw mm1, mm3
05626 movq mm2, mm1
05627 psrlq mm1, 32
05628 paddsw mm1, mm2
05629 movq mm3, mm1
05630 psrlq mm1, 16
05631 paddsw mm1, mm3
05632 packuswb mm1, mm0
05633 movd ebx, mm1
05634 mov [edi], bl
05635
05636 sub esi, eax
05637 sub esi, eax
05638 inc esi
05639 inc edi
05640
05641 dec ecx
05642 jnz L10362
05643 add esi, 2
05644 add edi, 2
05645 dec edx
05646 jnz L10360
05647
05648 emms
05649 popa
05650 }
05651 #else
05652 asm volatile
05653 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
05654 "xor %%ebx, %%ebx \n\t"
05655 "mov %5, %%bl \n\t"
05656 "movd %%ebx, %%mm4 \n\t"
05657 "mov %4, %%edx \n\t"
05658 "movq (%%edx), %%mm5 \n\t"
05659 "add $8, %%edx \n\t"
05660 "movq (%%edx), %%mm6 \n\t"
05661 "add $8, %%edx \n\t"
05662 "movq (%%edx), %%mm7 \n\t"
05663
05664 "mov %3, %%eax \n\t"
05665 "mov %1, %%esi \n\t"
05666 "mov %0, %%edi \n\t"
05667 "add %%eax, %%edi \n\t"
05668 "inc %%edi \n\t"
05669 "mov %2, %%edx \n\t"
05670 "sub $2, %%edx \n\t"
05671
05672 ".L10360: \n\t" "mov %%eax, %%ecx \n\t"
05673 "sub $2, %%ecx \n\t"
05674 ".align 16 \n\t"
05675 ".L10362: \n\t"
05676
05677 "movq (%%esi), %%mm1 \n\t"
05678 "add %%eax, %%esi \n\t"
05679 "movq (%%esi), %%mm2 \n\t"
05680 "add %%eax, %%esi \n\t"
05681 "movq (%%esi), %%mm3 \n\t"
05682 "punpcklbw %%mm0, %%mm1 \n\t"
05683 "punpcklbw %%mm0, %%mm2 \n\t"
05684 "punpcklbw %%mm0, %%mm3 \n\t"
05685 "psrlw %%mm4, %%mm1 \n\t"
05686 "psrlw %%mm4, %%mm2 \n\t"
05687 "psrlw %%mm4, %%mm3 \n\t"
05688 "pmullw %%mm5, %%mm1 \n\t"
05689 "pmullw %%mm6, %%mm2 \n\t"
05690 "pmullw %%mm7, %%mm3 \n\t"
05691 "paddsw %%mm2, %%mm1 \n\t"
05692 "paddsw %%mm3, %%mm1 \n\t"
05693 "movq %%mm1, %%mm2 \n\t"
05694 "psrlq $32, %%mm1 \n\t"
05695 "paddsw %%mm2, %%mm1 \n\t"
05696 "movq %%mm1, %%mm3 \n\t"
05697 "psrlq $16, %%mm1 \n\t"
05698 "paddsw %%mm3, %%mm1 \n\t"
05699 "packuswb %%mm0, %%mm1 \n\t"
05700 "movd %%mm1, %%ebx \n\t"
05701 "mov %%bl, (%%edi) \n\t"
05702
05703 "sub %%eax, %%esi \n\t"
05704 "sub %%eax, %%esi \n\t" "inc %%esi \n\t"
05705 "inc %%edi \n\t"
05706
05707 "dec %%ecx \n\t"
05708 "jnz .L10362 \n\t"
05709 "add $2, %%esi \n\t"
05710 "add $2, %%edi \n\t"
05711 "dec %%edx \n\t"
05712 "jnz .L10360 \n\t"
05713
05714 "emms \n\t"
05715 "popa \n\t":"=m" (Dest)
05716 :"m"(Src),
05717 "m"(rows),
05718 "m"(columns),
05719 "m"(Kernel),
05720 "m"(NRightShift)
05721 );
05722 #endif
05723 #endif
05724 return (0);
05725 } else {
05726
05727 return (-1);
05728 }
05729 }
05730
05745 int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
05746 signed short *Kernel, unsigned char NRightShift)
05747 {
05748
05749 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
05750 return(-1);
05751
05752 if ((columns < 5) || (rows < 5) || (NRightShift > 7))
05753 return (-1);
05754
05755 if ((SDL_imageFilterMMXdetect())) {
05756 #ifdef USE_MMX
05757 #if !defined(GCC__)
05758 __asm
05759 {
05760 pusha
05761 pxor mm0, mm0
05762 xor ebx, ebx
05763 mov bl, NRightShift
05764 movd mm5, ebx
05765 mov edx, Kernel
05766 mov esi, Src
05767 mov edi, Dest
05768 add edi, 2
05769 mov eax, columns
05770 shl eax, 1
05771 add edi, eax
05772 shr eax, 1
05773 mov ebx, rows
05774 sub ebx, 4
05775
05776 L10370:
05777 mov ecx, eax
05778 sub ecx, 4
05779 align 16
05780 L10372:
05781 pxor mm7, mm7
05782 movd mm6, esi
05783
05784 movq mm1, [esi]
05785 movq mm2, mm1
05786 add esi, eax
05787 movq mm3, [edx]
05788 add edx, 8
05789 movq mm4, [edx]
05790 add edx, 8
05791 punpcklbw mm1, mm0
05792 punpckhbw mm2, mm0
05793 psrlw mm1, mm5
05794 psrlw mm2, mm5
05795 pmullw mm1, mm3
05796 pmullw mm2, mm4
05797 paddsw mm1, mm2
05798 paddsw mm7, mm1
05799
05800 movq mm1, [esi]
05801 movq mm2, mm1
05802 add esi, eax
05803 movq mm3, [edx]
05804 add edx, 8
05805 movq mm4, [edx]
05806 add edx, 8
05807 punpcklbw mm1, mm0
05808 punpckhbw mm2, mm0
05809 psrlw mm1, mm5
05810 psrlw mm2, mm5
05811 pmullw mm1, mm3
05812 pmullw mm2, mm4
05813 paddsw mm1, mm2
05814 paddsw mm7, mm1
05815
05816 movq mm1, [esi]
05817 movq mm2, mm1
05818 add esi, eax
05819 movq mm3, [edx]
05820 add edx, 8
05821 movq mm4, [edx]
05822 add edx, 8
05823 punpcklbw mm1, mm0
05824 punpckhbw mm2, mm0
05825 psrlw mm1, mm5
05826 psrlw mm2, mm5
05827 pmullw mm1, mm3
05828 pmullw mm2, mm4
05829 paddsw mm1, mm2
05830 paddsw mm7, mm1
05831
05832 movq mm1, [esi]
05833 movq mm2, mm1
05834 add esi, eax
05835 movq mm3, [edx]
05836 add edx, 8
05837 movq mm4, [edx]
05838 add edx, 8
05839 punpcklbw mm1, mm0
05840 punpckhbw mm2, mm0
05841 psrlw mm1, mm5
05842 psrlw mm2, mm5
05843 pmullw mm1, mm3
05844 pmullw mm2, mm4
05845 paddsw mm1, mm2
05846 paddsw mm7, mm1
05847
05848 movq mm1, [esi]
05849 movq mm2, mm1
05850 movq mm3, [edx]
05851 add edx, 8
05852 movq mm4, [edx]
05853 punpcklbw mm1, mm0
05854 punpckhbw mm2, mm0
05855 psrlw mm1, mm5
05856 psrlw mm2, mm5
05857 pmullw mm1, mm3
05858 pmullw mm2, mm4
05859 paddsw mm1, mm2
05860 paddsw mm7, mm1
05861
05862 movq mm3, mm7
05863 psrlq mm7, 32
05864 paddsw mm7, mm3
05865 movq mm2, mm7
05866 psrlq mm7, 16
05867 paddsw mm7, mm2
05868 movd mm1, eax
05869 packuswb mm7, mm0
05870 movd eax, mm7
05871 mov [edi], al
05872 movd eax, mm1
05873
05874 movd esi, mm6
05875 sub edx, 72
05876 inc esi
05877 inc edi
05878
05879 dec ecx
05880 jnz L10372
05881 add esi, 4
05882 add edi, 4
05883 dec ebx
05884 jnz L10370
05885
05886 emms
05887 popa
05888 }
05889 #else
05890 asm volatile
05891 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
05892 "xor %%ebx, %%ebx \n\t"
05893 "mov %5, %%bl \n\t"
05894 "movd %%ebx, %%mm5 \n\t"
05895 "mov %4, %%edx \n\t"
05896 "mov %1, %%esi \n\t"
05897 "mov %0, %%edi \n\t"
05898 "add $2, %%edi \n\t"
05899 "mov %3, %%eax \n\t"
05900 "shl $1, %%eax \n\t"
05901 "add %%eax, %%edi \n\t"
05902 "shr $1, %%eax \n\t"
05903 "mov %2, %%ebx \n\t"
05904 "sub $4, %%ebx \n\t"
05905
05906 ".L10370: \n\t" "mov %%eax, %%ecx \n\t"
05907 "sub $4, %%ecx \n\t"
05908 ".align 16 \n\t"
05909 ".L10372: \n\t" "pxor %%mm7, %%mm7 \n\t"
05910 "movd %%esi, %%mm6 \n\t"
05911
05912 "movq (%%esi), %%mm1 \n\t"
05913 "movq %%mm1, %%mm2 \n\t"
05914 "add %%eax, %%esi \n\t"
05915 "movq (%%edx), %%mm3 \n\t"
05916 "add $8, %%edx \n\t"
05917 "movq (%%edx), %%mm4 \n\t"
05918 "add $8, %%edx \n\t"
05919 "punpcklbw %%mm0, %%mm1 \n\t"
05920 "punpckhbw %%mm0, %%mm2 \n\t"
05921 "psrlw %%mm5, %%mm1 \n\t"
05922 "psrlw %%mm5, %%mm2 \n\t"
05923 "pmullw %%mm3, %%mm1 \n\t"
05924 "pmullw %%mm4, %%mm2 \n\t"
05925 "paddsw %%mm2, %%mm1 \n\t"
05926 "paddsw %%mm1, %%mm7 \n\t"
05927
05928 "movq (%%esi), %%mm1 \n\t"
05929 "movq %%mm1, %%mm2 \n\t"
05930 "add %%eax, %%esi \n\t"
05931 "movq (%%edx), %%mm3 \n\t"
05932 "add $8, %%edx \n\t"
05933 "movq (%%edx), %%mm4 \n\t"
05934 "add $8, %%edx \n\t"
05935 "punpcklbw %%mm0, %%mm1 \n\t"
05936 "punpckhbw %%mm0, %%mm2 \n\t"
05937 "psrlw %%mm5, %%mm1 \n\t"
05938 "psrlw %%mm5, %%mm2 \n\t"
05939 "pmullw %%mm3, %%mm1 \n\t"
05940 "pmullw %%mm4, %%mm2 \n\t"
05941 "paddsw %%mm2, %%mm1 \n\t"
05942 "paddsw %%mm1, %%mm7 \n\t"
05943
05944 "movq (%%esi), %%mm1 \n\t"
05945 "movq %%mm1, %%mm2 \n\t"
05946 "add %%eax, %%esi \n\t"
05947 "movq (%%edx), %%mm3 \n\t"
05948 "add $8, %%edx \n\t"
05949 "movq (%%edx), %%mm4 \n\t"
05950 "add $8, %%edx \n\t"
05951 "punpcklbw %%mm0, %%mm1 \n\t"
05952 "punpckhbw %%mm0, %%mm2 \n\t"
05953 "psrlw %%mm5, %%mm1 \n\t"
05954 "psrlw %%mm5, %%mm2 \n\t"
05955 "pmullw %%mm3, %%mm1 \n\t"
05956 "pmullw %%mm4, %%mm2 \n\t"
05957 "paddsw %%mm2, %%mm1 \n\t"
05958 "paddsw %%mm1, %%mm7 \n\t"
05959
05960 "movq (%%esi), %%mm1 \n\t"
05961 "movq %%mm1, %%mm2 \n\t"
05962 "add %%eax, %%esi \n\t"
05963 "movq (%%edx), %%mm3 \n\t"
05964 "add $8, %%edx \n\t"
05965 "movq (%%edx), %%mm4 \n\t"
05966 "add $8, %%edx \n\t"
05967 "punpcklbw %%mm0, %%mm1 \n\t"
05968 "punpckhbw %%mm0, %%mm2 \n\t"
05969 "psrlw %%mm5, %%mm1 \n\t"
05970 "psrlw %%mm5, %%mm2 \n\t"
05971 "pmullw %%mm3, %%mm1 \n\t"
05972 "pmullw %%mm4, %%mm2 \n\t"
05973 "paddsw %%mm2, %%mm1 \n\t"
05974 "paddsw %%mm1, %%mm7 \n\t"
05975
05976 "movq (%%esi), %%mm1 \n\t"
05977 "movq %%mm1, %%mm2 \n\t"
05978 "movq (%%edx), %%mm3 \n\t"
05979 "add $8, %%edx \n\t"
05980 "movq (%%edx), %%mm4 \n\t"
05981 "punpcklbw %%mm0, %%mm1 \n\t"
05982 "punpckhbw %%mm0, %%mm2 \n\t"
05983 "psrlw %%mm5, %%mm1 \n\t"
05984 "psrlw %%mm5, %%mm2 \n\t"
05985 "pmullw %%mm3, %%mm1 \n\t"
05986 "pmullw %%mm4, %%mm2 \n\t"
05987 "paddsw %%mm2, %%mm1 \n\t"
05988 "paddsw %%mm1, %%mm7 \n\t"
05989
05990 "movq %%mm7, %%mm3 \n\t"
05991 "psrlq $32, %%mm7 \n\t"
05992 "paddsw %%mm3, %%mm7 \n\t"
05993 "movq %%mm7, %%mm2 \n\t"
05994 "psrlq $16, %%mm7 \n\t"
05995 "paddsw %%mm2, %%mm7 \n\t"
05996 "movd %%eax, %%mm1 \n\t"
05997 "packuswb %%mm0, %%mm7 \n\t"
05998 "movd %%mm7, %%eax \n\t"
05999 "mov %%al, (%%edi) \n\t"
06000 "movd %%mm1, %%eax \n\t"
06001
06002 "movd %%mm6, %%esi \n\t"
06003 "sub $72, %%edx \n\t"
06004 "inc %%esi \n\t"
06005 "inc %%edi \n\t"
06006
06007 "dec %%ecx \n\t"
06008 "jnz .L10372 \n\t"
06009 "add $4, %%esi \n\t"
06010 "add $4, %%edi \n\t"
06011 "dec %%ebx \n\t"
06012 "jnz .L10370 \n\t"
06013
06014 "emms \n\t"
06015 "popa \n\t":"=m" (Dest)
06016 :"m"(Src),
06017 "m"(rows),
06018 "m"(columns),
06019 "m"(Kernel),
06020 "m"(NRightShift)
06021 );
06022 #endif
06023 #endif
06024 return (0);
06025 } else {
06026
06027 return (-1);
06028 }
06029 }
06030
06045 int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
06046 signed short *Kernel, unsigned char NRightShift)
06047 {
06048
06049 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
06050 return(-1);
06051
06052 if ((columns < 7) || (rows < 7) || (NRightShift > 7))
06053 return (-1);
06054
06055 if ((SDL_imageFilterMMXdetect())) {
06056 #ifdef USE_MMX
06057 #if !defined(GCC__)
06058 __asm
06059 {
06060 pusha
06061 pxor mm0, mm0
06062 xor ebx, ebx
06063 mov bl, NRightShift
06064 movd mm5, ebx
06065 mov edx, Kernel
06066 mov esi, Src
06067 mov edi, Dest
06068 add edi, 3
06069 mov eax, columns
06070 add edi, eax
06071 add edi, eax
06072 add edi, eax
06073 mov ebx, rows
06074 sub ebx, 6
06075
06076 L10380:
06077 mov ecx, eax
06078 sub ecx, 6
06079 align 16
06080 L10382:
06081 pxor mm7, mm7
06082 movd mm6, esi
06083
06084 movq mm1, [esi]
06085 movq mm2, mm1
06086 add esi, eax
06087 movq mm3, [edx]
06088 add edx, 8
06089 movq mm4, [edx]
06090 add edx, 8
06091 punpcklbw mm1, mm0
06092 punpckhbw mm2, mm0
06093 psrlw mm1, mm5
06094 psrlw mm2, mm5
06095 pmullw mm1, mm3
06096 pmullw mm2, mm4
06097 paddsw mm1, mm2
06098 paddsw mm7, mm1
06099
06100 movq mm1, [esi]
06101 movq mm2, mm1
06102 add esi, eax
06103 movq mm3, [edx]
06104 add edx, 8
06105 movq mm4, [edx]
06106 add edx, 8
06107 punpcklbw mm1, mm0
06108 punpckhbw mm2, mm0
06109 psrlw mm1, mm5
06110 psrlw mm2, mm5
06111 pmullw mm1, mm3
06112 pmullw mm2, mm4
06113 paddsw mm1, mm2
06114 paddsw mm7, mm1
06115
06116 movq mm1, [esi]
06117 movq mm2, mm1
06118 add esi, eax
06119 movq mm3, [edx]
06120 add edx, 8
06121 movq mm4, [edx]
06122 add edx, 8
06123 punpcklbw mm1, mm0
06124 punpckhbw mm2, mm0
06125 psrlw mm1, mm5
06126 psrlw mm2, mm5
06127 pmullw mm1, mm3
06128 pmullw mm2, mm4
06129 paddsw mm1, mm2
06130 paddsw mm7, mm1
06131
06132 movq mm1, [esi]
06133 movq mm2, mm1
06134 add esi, eax
06135 movq mm3, [edx]
06136 add edx, 8
06137 movq mm4, [edx]
06138 add edx, 8
06139 punpcklbw mm1, mm0
06140 punpckhbw mm2, mm0
06141 psrlw mm1, mm5
06142 psrlw mm2, mm5
06143 pmullw mm1, mm3
06144 pmullw mm2, mm4
06145 paddsw mm1, mm2
06146 paddsw mm7, mm1
06147
06148 movq mm1, [esi]
06149 movq mm2, mm1
06150 add esi, eax
06151 movq mm3, [edx]
06152 add edx, 8
06153 movq mm4, [edx]
06154 add edx, 8
06155 punpcklbw mm1, mm0
06156 punpckhbw mm2, mm0
06157 psrlw mm1, mm5
06158 psrlw mm2, mm5
06159 pmullw mm1, mm3
06160 pmullw mm2, mm4
06161 paddsw mm1, mm2
06162 paddsw mm7, mm1
06163
06164 movq mm1, [esi]
06165 movq mm2, mm1
06166 add esi, eax
06167 movq mm3, [edx]
06168 add edx, 8
06169 movq mm4, [edx]
06170 add edx, 8
06171 punpcklbw mm1, mm0
06172 punpckhbw mm2, mm0
06173 psrlw mm1, mm5
06174 psrlw mm2, mm5
06175 pmullw mm1, mm3
06176 pmullw mm2, mm4
06177 paddsw mm1, mm2
06178 paddsw mm7, mm1
06179
06180 movq mm1, [esi]
06181 movq mm2, mm1
06182 movq mm3, [edx]
06183 add edx, 8
06184 movq mm4, [edx]
06185 punpcklbw mm1, mm0
06186 punpckhbw mm2, mm0
06187 psrlw mm1, mm5
06188 psrlw mm2, mm5
06189 pmullw mm1, mm3
06190 pmullw mm2, mm4
06191 paddsw mm1, mm2
06192 paddsw mm7, mm1
06193
06194 movq mm3, mm7
06195 psrlq mm7, 32
06196 paddsw mm7, mm3
06197 movq mm2, mm7
06198 psrlq mm7, 16
06199 paddsw mm7, mm2
06200 movd mm1, eax
06201 packuswb mm7, mm0
06202 movd eax, mm7
06203 mov [edi], al
06204 movd eax, mm1
06205
06206 movd esi, mm6
06207 sub edx, 104
06208 inc esi
06209 inc edi
06210
06211 dec ecx
06212 jnz L10382
06213 add esi, 6
06214 add edi, 6
06215 dec ebx
06216 jnz L10380
06217
06218 emms
06219 popa
06220 }
06221 #else
06222 asm volatile
06223 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
06224 "xor %%ebx, %%ebx \n\t"
06225 "mov %5, %%bl \n\t"
06226 "movd %%ebx, %%mm5 \n\t"
06227 "mov %4, %%edx \n\t"
06228 "mov %1, %%esi \n\t"
06229 "mov %0, %%edi \n\t"
06230 "add $3, %%edi \n\t"
06231 "mov %3, %%eax \n\t"
06232 "add %%eax, %%edi \n\t"
06233 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
06234 "sub $6, %%ebx \n\t"
06235
06236 ".L10380: \n\t" "mov %%eax, %%ecx \n\t"
06237 "sub $6, %%ecx \n\t"
06238 ".align 16 \n\t"
06239 ".L10382: \n\t" "pxor %%mm7, %%mm7 \n\t"
06240 "movd %%esi, %%mm6 \n\t"
06241
06242 "movq (%%esi), %%mm1 \n\t"
06243 "movq %%mm1, %%mm2 \n\t"
06244 "add %%eax, %%esi \n\t"
06245 "movq (%%edx), %%mm3 \n\t"
06246 "add $8, %%edx \n\t"
06247 "movq (%%edx), %%mm4 \n\t"
06248 "add $8, %%edx \n\t"
06249 "punpcklbw %%mm0, %%mm1 \n\t"
06250 "punpckhbw %%mm0, %%mm2 \n\t"
06251 "psrlw %%mm5, %%mm1 \n\t"
06252 "psrlw %%mm5, %%mm2 \n\t"
06253 "pmullw %%mm3, %%mm1 \n\t"
06254 "pmullw %%mm4, %%mm2 \n\t"
06255 "paddsw %%mm2, %%mm1 \n\t"
06256 "paddsw %%mm1, %%mm7 \n\t"
06257
06258 "movq (%%esi), %%mm1 \n\t"
06259 "movq %%mm1, %%mm2 \n\t"
06260 "add %%eax, %%esi \n\t"
06261 "movq (%%edx), %%mm3 \n\t"
06262 "add $8, %%edx \n\t"
06263 "movq (%%edx), %%mm4 \n\t"
06264 "add $8, %%edx \n\t"
06265 "punpcklbw %%mm0, %%mm1 \n\t"
06266 "punpckhbw %%mm0, %%mm2 \n\t"
06267 "psrlw %%mm5, %%mm1 \n\t"
06268 "psrlw %%mm5, %%mm2 \n\t"
06269 "pmullw %%mm3, %%mm1 \n\t"
06270 "pmullw %%mm4, %%mm2 \n\t"
06271 "paddsw %%mm2, %%mm1 \n\t"
06272 "paddsw %%mm1, %%mm7 \n\t"
06273
06274 "movq (%%esi), %%mm1 \n\t"
06275 "movq %%mm1, %%mm2 \n\t"
06276 "add %%eax, %%esi \n\t"
06277 "movq (%%edx), %%mm3 \n\t"
06278 "add $8, %%edx \n\t"
06279 "movq (%%edx), %%mm4 \n\t"
06280 "add $8, %%edx \n\t"
06281 "punpcklbw %%mm0, %%mm1 \n\t"
06282 "punpckhbw %%mm0, %%mm2 \n\t"
06283 "psrlw %%mm5, %%mm1 \n\t"
06284 "psrlw %%mm5, %%mm2 \n\t"
06285 "pmullw %%mm3, %%mm1 \n\t"
06286 "pmullw %%mm4, %%mm2 \n\t"
06287 "paddsw %%mm2, %%mm1 \n\t"
06288 "paddsw %%mm1, %%mm7 \n\t"
06289
06290 "movq (%%esi), %%mm1 \n\t"
06291 "movq %%mm1, %%mm2 \n\t"
06292 "add %%eax, %%esi \n\t"
06293 "movq (%%edx), %%mm3 \n\t"
06294 "add $8, %%edx \n\t"
06295 "movq (%%edx), %%mm4 \n\t"
06296 "add $8, %%edx \n\t"
06297 "punpcklbw %%mm0, %%mm1 \n\t"
06298 "punpckhbw %%mm0, %%mm2 \n\t"
06299 "psrlw %%mm5, %%mm1 \n\t"
06300 "psrlw %%mm5, %%mm2 \n\t"
06301 "pmullw %%mm3, %%mm1 \n\t"
06302 "pmullw %%mm4, %%mm2 \n\t"
06303 "paddsw %%mm2, %%mm1 \n\t"
06304 "paddsw %%mm1, %%mm7 \n\t"
06305
06306 "movq (%%esi), %%mm1 \n\t"
06307 "movq %%mm1, %%mm2 \n\t"
06308 "add %%eax, %%esi \n\t"
06309 "movq (%%edx), %%mm3 \n\t"
06310 "add $8, %%edx \n\t"
06311 "movq (%%edx), %%mm4 \n\t"
06312 "add $8, %%edx \n\t"
06313 "punpcklbw %%mm0, %%mm1 \n\t"
06314 "punpckhbw %%mm0, %%mm2 \n\t"
06315 "psrlw %%mm5, %%mm1 \n\t"
06316 "psrlw %%mm5, %%mm2 \n\t"
06317 "pmullw %%mm3, %%mm1 \n\t"
06318 "pmullw %%mm4, %%mm2 \n\t"
06319 "paddsw %%mm2, %%mm1 \n\t"
06320 "paddsw %%mm1, %%mm7 \n\t"
06321
06322 "movq (%%esi), %%mm1 \n\t"
06323 "movq %%mm1, %%mm2 \n\t"
06324 "add %%eax, %%esi \n\t"
06325 "movq (%%edx), %%mm3 \n\t"
06326 "add $8, %%edx \n\t"
06327 "movq (%%edx), %%mm4 \n\t"
06328 "add $8, %%edx \n\t"
06329 "punpcklbw %%mm0, %%mm1 \n\t"
06330 "punpckhbw %%mm0, %%mm2 \n\t"
06331 "psrlw %%mm5, %%mm1 \n\t"
06332 "psrlw %%mm5, %%mm2 \n\t"
06333 "pmullw %%mm3, %%mm1 \n\t"
06334 "pmullw %%mm4, %%mm2 \n\t"
06335 "paddsw %%mm2, %%mm1 \n\t"
06336 "paddsw %%mm1, %%mm7 \n\t"
06337
06338 "movq (%%esi), %%mm1 \n\t"
06339 "movq %%mm1, %%mm2 \n\t"
06340 "movq (%%edx), %%mm3 \n\t"
06341 "add $8, %%edx \n\t"
06342 "movq (%%edx), %%mm4 \n\t"
06343 "punpcklbw %%mm0, %%mm1 \n\t"
06344 "punpckhbw %%mm0, %%mm2 \n\t"
06345 "psrlw %%mm5, %%mm1 \n\t"
06346 "psrlw %%mm5, %%mm2 \n\t"
06347 "pmullw %%mm3, %%mm1 \n\t"
06348 "pmullw %%mm4, %%mm2 \n\t"
06349 "paddsw %%mm2, %%mm1 \n\t"
06350 "paddsw %%mm1, %%mm7 \n\t"
06351
06352 "movq %%mm7, %%mm3 \n\t"
06353 "psrlq $32, %%mm7 \n\t"
06354 "paddsw %%mm3, %%mm7 \n\t"
06355 "movq %%mm7, %%mm2 \n\t"
06356 "psrlq $16, %%mm7 \n\t"
06357 "paddsw %%mm2, %%mm7 \n\t"
06358 "movd %%eax, %%mm1 \n\t"
06359 "packuswb %%mm0, %%mm7 \n\t"
06360 "movd %%mm7, %%eax \n\t"
06361 "mov %%al, (%%edi) \n\t"
06362 "movd %%mm1, %%eax \n\t"
06363
06364 "movd %%mm6, %%esi \n\t"
06365 "sub $104, %%edx \n\t"
06366 "inc %%esi \n\t"
06367 "inc %%edi \n\t"
06368
06369 "dec %%ecx \n\t"
06370 "jnz .L10382 \n\t"
06371 "add $6, %%esi \n\t"
06372 "add $6, %%edi \n\t"
06373 "dec %%ebx \n\t"
06374 "jnz .L10380 \n\t"
06375
06376 "emms \n\t"
06377 "popa \n\t":"=m" (Dest)
06378 :"m"(Src),
06379 "m"(rows),
06380 "m"(columns),
06381 "m"(Kernel),
06382 "m"(NRightShift)
06383 );
06384 #endif
06385 #endif
06386 return (0);
06387 } else {
06388
06389 return (-1);
06390 }
06391 }
06392
06407 int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
06408 signed short *Kernel, unsigned char NRightShift)
06409 {
06410
06411 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
06412 return(-1);
06413
06414 if ((columns < 9) || (rows < 9) || (NRightShift > 7))
06415 return (-1);
06416
06417 if ((SDL_imageFilterMMXdetect())) {
06418 #ifdef USE_MMX
06419 #if !defined(GCC__)
06420 __asm
06421 {
06422 pusha
06423 pxor mm0, mm0
06424 xor ebx, ebx
06425 mov bl, NRightShift
06426 movd mm5, ebx
06427 mov edx, Kernel
06428 mov esi, Src
06429 mov edi, Dest
06430 add edi, 4
06431 mov eax, columns
06432 add edi, eax
06433 add edi, eax
06434 add edi, eax
06435 add edi, eax
06436 mov ebx, rows
06437 sub ebx, 8
06438
06439 L10390:
06440 mov ecx, eax
06441 sub ecx, 8
06442 align 16
06443 L10392:
06444 pxor mm7, mm7
06445 movd mm6, esi
06446
06447 movq mm1, [esi]
06448 movq mm2, mm1
06449 inc esi
06450 movq mm3, [edx]
06451 add edx, 8
06452 movq mm4, [edx]
06453 add edx, 8
06454 punpcklbw mm1, mm0
06455 punpckhbw mm2, mm0
06456 psrlw mm1, mm5
06457 psrlw mm2, mm5
06458 pmullw mm1, mm3
06459 pmullw mm2, mm4
06460 paddsw mm1, mm2
06461 paddsw mm7, mm1
06462 movq mm1, [esi]
06463 dec esi
06464 add esi, eax
06465 movq mm3, [edx]
06466 add edx, 8
06467 punpcklbw mm1, mm0
06468 psrlw mm1, mm5
06469 pmullw mm1, mm3
06470 paddsw mm7, mm1
06471
06472 movq mm1, [esi]
06473 movq mm2, mm1
06474 inc esi
06475 movq mm3, [edx]
06476 add edx, 8
06477 movq mm4, [edx]
06478 add edx, 8
06479 punpcklbw mm1, mm0
06480 punpckhbw mm2, mm0
06481 psrlw mm1, mm5
06482 psrlw mm2, mm5
06483 pmullw mm1, mm3
06484 pmullw mm2, mm4
06485 paddsw mm1, mm2
06486 paddsw mm7, mm1
06487 movq mm1, [esi]
06488 dec esi
06489 add esi, eax
06490 movq mm3, [edx]
06491 add edx, 8
06492 punpcklbw mm1, mm0
06493 psrlw mm1, mm5
06494 pmullw mm1, mm3
06495 paddsw mm7, mm1
06496
06497 movq mm1, [esi]
06498 movq mm2, mm1
06499 inc esi
06500 movq mm3, [edx]
06501 add edx, 8
06502 movq mm4, [edx]
06503 add edx, 8
06504 punpcklbw mm1, mm0
06505 punpckhbw mm2, mm0
06506 psrlw mm1, mm5
06507 psrlw mm2, mm5
06508 pmullw mm1, mm3
06509 pmullw mm2, mm4
06510 paddsw mm1, mm2
06511 paddsw mm7, mm1
06512 movq mm1, [esi]
06513 dec esi
06514 add esi, eax
06515 movq mm3, [edx]
06516 add edx, 8
06517 punpcklbw mm1, mm0
06518 psrlw mm1, mm5
06519 pmullw mm1, mm3
06520 paddsw mm7, mm1
06521
06522 movq mm1, [esi]
06523 movq mm2, mm1
06524 inc esi
06525 movq mm3, [edx]
06526 add edx, 8
06527 movq mm4, [edx]
06528 add edx, 8
06529 punpcklbw mm1, mm0
06530 punpckhbw mm2, mm0
06531 psrlw mm1, mm5
06532 psrlw mm2, mm5
06533 pmullw mm1, mm3
06534 pmullw mm2, mm4
06535 paddsw mm1, mm2
06536 paddsw mm7, mm1
06537 movq mm1, [esi]
06538 dec esi
06539 add esi, eax
06540 movq mm3, [edx]
06541 add edx, 8
06542 punpcklbw mm1, mm0
06543 psrlw mm1, mm5
06544 pmullw mm1, mm3
06545 paddsw mm7, mm1
06546
06547 movq mm1, [esi]
06548 movq mm2, mm1
06549 inc esi
06550 movq mm3, [edx]
06551 add edx, 8
06552 movq mm4, [edx]
06553 add edx, 8
06554 punpcklbw mm1, mm0
06555 punpckhbw mm2, mm0
06556 psrlw mm1, mm5
06557 psrlw mm2, mm5
06558 pmullw mm1, mm3
06559 pmullw mm2, mm4
06560 paddsw mm1, mm2
06561 paddsw mm7, mm1
06562 movq mm1, [esi]
06563 dec esi
06564 add esi, eax
06565 movq mm3, [edx]
06566 add edx, 8
06567 punpcklbw mm1, mm0
06568 psrlw mm1, mm5
06569 pmullw mm1, mm3
06570 paddsw mm7, mm1
06571
06572 movq mm1, [esi]
06573 movq mm2, mm1
06574 inc esi
06575 movq mm3, [edx]
06576 add edx, 8
06577 movq mm4, [edx]
06578 add edx, 8
06579 punpcklbw mm1, mm0
06580 punpckhbw mm2, mm0
06581 psrlw mm1, mm5
06582 psrlw mm2, mm5
06583 pmullw mm1, mm3
06584 pmullw mm2, mm4
06585 paddsw mm1, mm2
06586 paddsw mm7, mm1
06587 movq mm1, [esi]
06588 dec esi
06589 add esi, eax
06590 movq mm3, [edx]
06591 add edx, 8
06592 punpcklbw mm1, mm0
06593 psrlw mm1, mm5
06594 pmullw mm1, mm3
06595 paddsw mm7, mm1
06596
06597 movq mm1, [esi]
06598 movq mm2, mm1
06599 inc esi
06600 movq mm3, [edx]
06601 add edx, 8
06602 movq mm4, [edx]
06603 add edx, 8
06604 punpcklbw mm1, mm0
06605 punpckhbw mm2, mm0
06606 psrlw mm1, mm5
06607 psrlw mm2, mm5
06608 pmullw mm1, mm3
06609 pmullw mm2, mm4
06610 paddsw mm1, mm2
06611 paddsw mm7, mm1
06612 movq mm1, [esi]
06613 dec esi
06614 add esi, eax
06615 movq mm3, [edx]
06616 add edx, 8
06617 punpcklbw mm1, mm0
06618 psrlw mm1, mm5
06619 pmullw mm1, mm3
06620 paddsw mm7, mm1
06621
06622 movq mm1, [esi]
06623 movq mm2, mm1
06624 inc esi
06625 movq mm3, [edx]
06626 add edx, 8
06627 movq mm4, [edx]
06628 add edx, 8
06629 punpcklbw mm1, mm0
06630 punpckhbw mm2, mm0
06631 psrlw mm1, mm5
06632 psrlw mm2, mm5
06633 pmullw mm1, mm3
06634 pmullw mm2, mm4
06635 paddsw mm1, mm2
06636 paddsw mm7, mm1
06637 movq mm1, [esi]
06638 dec esi
06639 add esi, eax
06640 movq mm3, [edx]
06641 add edx, 8
06642 punpcklbw mm1, mm0
06643 psrlw mm1, mm5
06644 pmullw mm1, mm3
06645 paddsw mm7, mm1
06646
06647 movq mm1, [esi]
06648 movq mm2, mm1
06649 inc esi
06650 movq mm3, [edx]
06651 add edx, 8
06652 movq mm4, [edx]
06653 add edx, 8
06654 punpcklbw mm1, mm0
06655 punpckhbw mm2, mm0
06656 psrlw mm1, mm5
06657 psrlw mm2, mm5
06658 pmullw mm1, mm3
06659 pmullw mm2, mm4
06660 paddsw mm1, mm2
06661 paddsw mm7, mm1
06662 movq mm1, [esi]
06663 movq mm3, [edx]
06664 punpcklbw mm1, mm0
06665 psrlw mm1, mm5
06666 pmullw mm1, mm3
06667 paddsw mm7, mm1
06668
06669 movq mm3, mm7
06670 psrlq mm7, 32
06671 paddsw mm7, mm3
06672 movq mm2, mm7
06673 psrlq mm7, 16
06674 paddsw mm7, mm2
06675 movd mm1, eax
06676 packuswb mm7, mm0
06677 movd eax, mm7
06678 mov [edi], al
06679 movd eax, mm1
06680
06681 movd esi, mm6
06682 sub edx, 208
06683 inc esi
06684 inc edi
06685
06686 dec ecx
06687 jnz L10392
06688 add esi, 8
06689 add edi, 8
06690 dec ebx
06691 jnz L10390
06692
06693 emms
06694 popa
06695 }
06696 #else
06697 asm volatile
06698 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
06699 "xor %%ebx, %%ebx \n\t"
06700 "mov %5, %%bl \n\t"
06701 "movd %%ebx, %%mm5 \n\t"
06702 "mov %4, %%edx \n\t"
06703 "mov %1, %%esi \n\t"
06704 "mov %0, %%edi \n\t"
06705 "add $4, %%edi \n\t"
06706 "mov %3, %%eax \n\t"
06707 "add %%eax, %%edi \n\t"
06708 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
06709 "sub $8, %%ebx \n\t"
06710
06711 ".L10390: \n\t" "mov %%eax, %%ecx \n\t"
06712 "sub $8, %%ecx \n\t"
06713 ".align 16 \n\t"
06714 ".L10392: \n\t" "pxor %%mm7, %%mm7 \n\t"
06715 "movd %%esi, %%mm6 \n\t"
06716
06717 "movq (%%esi), %%mm1 \n\t"
06718 "movq %%mm1, %%mm2 \n\t"
06719 "inc %%esi \n\t"
06720 "movq (%%edx), %%mm3 \n\t"
06721 "add $8, %%edx \n\t"
06722 "movq (%%edx), %%mm4 \n\t"
06723 "add $8, %%edx \n\t"
06724 "punpcklbw %%mm0, %%mm1 \n\t"
06725 "punpckhbw %%mm0, %%mm2 \n\t"
06726 "psrlw %%mm5, %%mm1 \n\t"
06727 "psrlw %%mm5, %%mm2 \n\t"
06728 "pmullw %%mm3, %%mm1 \n\t"
06729 "pmullw %%mm4, %%mm2 \n\t"
06730 "paddsw %%mm2, %%mm1 \n\t"
06731 "paddsw %%mm1, %%mm7 \n\t"
06732 "movq (%%esi), %%mm1 \n\t"
06733 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06734 "movq (%%edx), %%mm3 \n\t"
06735 "add $8, %%edx \n\t"
06736 "punpcklbw %%mm0, %%mm1 \n\t"
06737 "psrlw %%mm5, %%mm1 \n\t"
06738 "pmullw %%mm3, %%mm1 \n\t"
06739 "paddsw %%mm1, %%mm7 \n\t"
06740
06741 "movq (%%esi), %%mm1 \n\t"
06742 "movq %%mm1, %%mm2 \n\t"
06743 "inc %%esi \n\t"
06744 "movq (%%edx), %%mm3 \n\t"
06745 "add $8, %%edx \n\t"
06746 "movq (%%edx), %%mm4 \n\t"
06747 "add $8, %%edx \n\t"
06748 "punpcklbw %%mm0, %%mm1 \n\t"
06749 "punpckhbw %%mm0, %%mm2 \n\t"
06750 "psrlw %%mm5, %%mm1 \n\t"
06751 "psrlw %%mm5, %%mm2 \n\t"
06752 "pmullw %%mm3, %%mm1 \n\t"
06753 "pmullw %%mm4, %%mm2 \n\t"
06754 "paddsw %%mm2, %%mm1 \n\t"
06755 "paddsw %%mm1, %%mm7 \n\t"
06756 "movq (%%esi), %%mm1 \n\t"
06757 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06758 "movq (%%edx), %%mm3 \n\t"
06759 "add $8, %%edx \n\t"
06760 "punpcklbw %%mm0, %%mm1 \n\t"
06761 "psrlw %%mm5, %%mm1 \n\t"
06762 "pmullw %%mm3, %%mm1 \n\t"
06763 "paddsw %%mm1, %%mm7 \n\t"
06764
06765 "movq (%%esi), %%mm1 \n\t"
06766 "movq %%mm1, %%mm2 \n\t"
06767 "inc %%esi \n\t"
06768 "movq (%%edx), %%mm3 \n\t"
06769 "add $8, %%edx \n\t"
06770 "movq (%%edx), %%mm4 \n\t"
06771 "add $8, %%edx \n\t"
06772 "punpcklbw %%mm0, %%mm1 \n\t"
06773 "punpckhbw %%mm0, %%mm2 \n\t"
06774 "psrlw %%mm5, %%mm1 \n\t"
06775 "psrlw %%mm5, %%mm2 \n\t"
06776 "pmullw %%mm3, %%mm1 \n\t"
06777 "pmullw %%mm4, %%mm2 \n\t"
06778 "paddsw %%mm2, %%mm1 \n\t"
06779 "paddsw %%mm1, %%mm7 \n\t"
06780 "movq (%%esi), %%mm1 \n\t"
06781 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06782 "movq (%%edx), %%mm3 \n\t"
06783 "add $8, %%edx \n\t"
06784 "punpcklbw %%mm0, %%mm1 \n\t"
06785 "psrlw %%mm5, %%mm1 \n\t"
06786 "pmullw %%mm3, %%mm1 \n\t"
06787 "paddsw %%mm1, %%mm7 \n\t"
06788
06789 "movq (%%esi), %%mm1 \n\t"
06790 "movq %%mm1, %%mm2 \n\t"
06791 "inc %%esi \n\t"
06792 "movq (%%edx), %%mm3 \n\t"
06793 "add $8, %%edx \n\t"
06794 "movq (%%edx), %%mm4 \n\t"
06795 "add $8, %%edx \n\t"
06796 "punpcklbw %%mm0, %%mm1 \n\t"
06797 "punpckhbw %%mm0, %%mm2 \n\t"
06798 "psrlw %%mm5, %%mm1 \n\t"
06799 "psrlw %%mm5, %%mm2 \n\t"
06800 "pmullw %%mm3, %%mm1 \n\t"
06801 "pmullw %%mm4, %%mm2 \n\t"
06802 "paddsw %%mm2, %%mm1 \n\t"
06803 "paddsw %%mm1, %%mm7 \n\t"
06804 "movq (%%esi), %%mm1 \n\t"
06805 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06806 "movq (%%edx), %%mm3 \n\t"
06807 "add $8, %%edx \n\t"
06808 "punpcklbw %%mm0, %%mm1 \n\t"
06809 "psrlw %%mm5, %%mm1 \n\t"
06810 "pmullw %%mm3, %%mm1 \n\t"
06811 "paddsw %%mm1, %%mm7 \n\t"
06812
06813 "movq (%%esi), %%mm1 \n\t"
06814 "movq %%mm1, %%mm2 \n\t"
06815 "inc %%esi \n\t"
06816 "movq (%%edx), %%mm3 \n\t"
06817 "add $8, %%edx \n\t"
06818 "movq (%%edx), %%mm4 \n\t"
06819 "add $8, %%edx \n\t"
06820 "punpcklbw %%mm0, %%mm1 \n\t"
06821 "punpckhbw %%mm0, %%mm2 \n\t"
06822 "psrlw %%mm5, %%mm1 \n\t"
06823 "psrlw %%mm5, %%mm2 \n\t"
06824 "pmullw %%mm3, %%mm1 \n\t"
06825 "pmullw %%mm4, %%mm2 \n\t"
06826 "paddsw %%mm2, %%mm1 \n\t"
06827 "paddsw %%mm1, %%mm7 \n\t"
06828 "movq (%%esi), %%mm1 \n\t"
06829 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06830 "movq (%%edx), %%mm3 \n\t"
06831 "add $8, %%edx \n\t"
06832 "punpcklbw %%mm0, %%mm1 \n\t"
06833 "psrlw %%mm5, %%mm1 \n\t"
06834 "pmullw %%mm3, %%mm1 \n\t"
06835 "paddsw %%mm1, %%mm7 \n\t"
06836
06837 "movq (%%esi), %%mm1 \n\t"
06838 "movq %%mm1, %%mm2 \n\t"
06839 "inc %%esi \n\t"
06840 "movq (%%edx), %%mm3 \n\t"
06841 "add $8, %%edx \n\t"
06842 "movq (%%edx), %%mm4 \n\t"
06843 "add $8, %%edx \n\t"
06844 "punpcklbw %%mm0, %%mm1 \n\t"
06845 "punpckhbw %%mm0, %%mm2 \n\t"
06846 "psrlw %%mm5, %%mm1 \n\t"
06847 "psrlw %%mm5, %%mm2 \n\t"
06848 "pmullw %%mm3, %%mm1 \n\t"
06849 "pmullw %%mm4, %%mm2 \n\t"
06850 "paddsw %%mm2, %%mm1 \n\t"
06851 "paddsw %%mm1, %%mm7 \n\t"
06852 "movq (%%esi), %%mm1 \n\t"
06853 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06854 "movq (%%edx), %%mm3 \n\t"
06855 "add $8, %%edx \n\t"
06856 "punpcklbw %%mm0, %%mm1 \n\t"
06857 "psrlw %%mm5, %%mm1 \n\t"
06858 "pmullw %%mm3, %%mm1 \n\t"
06859 "paddsw %%mm1, %%mm7 \n\t"
06860
06861 "movq (%%esi), %%mm1 \n\t"
06862 "movq %%mm1, %%mm2 \n\t"
06863 "inc %%esi \n\t"
06864 "movq (%%edx), %%mm3 \n\t"
06865 "add $8, %%edx \n\t"
06866 "movq (%%edx), %%mm4 \n\t"
06867 "add $8, %%edx \n\t"
06868 "punpcklbw %%mm0, %%mm1 \n\t"
06869 "punpckhbw %%mm0, %%mm2 \n\t"
06870 "psrlw %%mm5, %%mm1 \n\t"
06871 "psrlw %%mm5, %%mm2 \n\t"
06872 "pmullw %%mm3, %%mm1 \n\t"
06873 "pmullw %%mm4, %%mm2 \n\t"
06874 "paddsw %%mm2, %%mm1 \n\t"
06875 "paddsw %%mm1, %%mm7 \n\t"
06876 "movq (%%esi), %%mm1 \n\t"
06877 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06878 "movq (%%edx), %%mm3 \n\t"
06879 "add $8, %%edx \n\t"
06880 "punpcklbw %%mm0, %%mm1 \n\t"
06881 "psrlw %%mm5, %%mm1 \n\t"
06882 "pmullw %%mm3, %%mm1 \n\t"
06883 "paddsw %%mm1, %%mm7 \n\t"
06884
06885 "movq (%%esi), %%mm1 \n\t"
06886 "movq %%mm1, %%mm2 \n\t"
06887 "inc %%esi \n\t"
06888 "movq (%%edx), %%mm3 \n\t"
06889 "add $8, %%edx \n\t"
06890 "movq (%%edx), %%mm4 \n\t"
06891 "add $8, %%edx \n\t"
06892 "punpcklbw %%mm0, %%mm1 \n\t"
06893 "punpckhbw %%mm0, %%mm2 \n\t"
06894 "psrlw %%mm5, %%mm1 \n\t"
06895 "psrlw %%mm5, %%mm2 \n\t"
06896 "pmullw %%mm3, %%mm1 \n\t"
06897 "pmullw %%mm4, %%mm2 \n\t"
06898 "paddsw %%mm2, %%mm1 \n\t"
06899 "paddsw %%mm1, %%mm7 \n\t"
06900 "movq (%%esi), %%mm1 \n\t"
06901 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
06902 "movq (%%edx), %%mm3 \n\t"
06903 "add $8, %%edx \n\t"
06904 "punpcklbw %%mm0, %%mm1 \n\t"
06905 "psrlw %%mm5, %%mm1 \n\t"
06906 "pmullw %%mm3, %%mm1 \n\t"
06907 "paddsw %%mm1, %%mm7 \n\t"
06908
06909 "movq (%%esi), %%mm1 \n\t"
06910 "movq %%mm1, %%mm2 \n\t"
06911 "inc %%esi \n\t"
06912 "movq (%%edx), %%mm3 \n\t"
06913 "add $8, %%edx \n\t"
06914 "movq (%%edx), %%mm4 \n\t"
06915 "add $8, %%edx \n\t"
06916 "punpcklbw %%mm0, %%mm1 \n\t"
06917 "punpckhbw %%mm0, %%mm2 \n\t"
06918 "psrlw %%mm5, %%mm1 \n\t"
06919 "psrlw %%mm5, %%mm2 \n\t"
06920 "pmullw %%mm3, %%mm1 \n\t"
06921 "pmullw %%mm4, %%mm2 \n\t"
06922 "paddsw %%mm2, %%mm1 \n\t"
06923 "paddsw %%mm1, %%mm7 \n\t"
06924 "movq (%%esi), %%mm1 \n\t"
06925 "movq (%%edx), %%mm3 \n\t"
06926 "punpcklbw %%mm0, %%mm1 \n\t"
06927 "psrlw %%mm5, %%mm1 \n\t"
06928 "pmullw %%mm3, %%mm1 \n\t"
06929 "paddsw %%mm1, %%mm7 \n\t"
06930
06931 "movq %%mm7, %%mm3 \n\t"
06932 "psrlq $32, %%mm7 \n\t"
06933 "paddsw %%mm3, %%mm7 \n\t"
06934 "movq %%mm7, %%mm2 \n\t"
06935 "psrlq $16, %%mm7 \n\t"
06936 "paddsw %%mm2, %%mm7 \n\t"
06937 "movd %%eax, %%mm1 \n\t"
06938 "packuswb %%mm0, %%mm7 \n\t"
06939 "movd %%mm7, %%eax \n\t"
06940 "mov %%al, (%%edi) \n\t"
06941 "movd %%mm1, %%eax \n\t"
06942
06943 "movd %%mm6, %%esi \n\t"
06944 "sub $208, %%edx \n\t"
06945 "inc %%esi \n\t"
06946 "inc %%edi \n\t"
06947
06948 "dec %%ecx \n\t"
06949 "jnz .L10392 \n\t"
06950 "add $8, %%esi \n\t"
06951 "add $8, %%edi \n\t"
06952 "dec %%ebx \n\t"
06953 "jnz .L10390 \n\t"
06954
06955 "emms \n\t"
06956 "popa \n\t":"=m" (Dest)
06957 :"m"(Src),
06958 "m"(rows),
06959 "m"(columns),
06960 "m"(Kernel),
06961 "m"(NRightShift)
06962 );
06963 #endif
06964 #endif
06965 return (0);
06966 } else {
06967
06968 return (-1);
06969 }
06970 }
06971
06972
06973
06986 int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int columns)
06987 {
06988
06989 if ((Src == NULL) || (Dest == NULL))
06990 return(-1);
06991
06992 if ((columns < 8) || (rows < 3))
06993 return (-1);
06994
06995 if ((SDL_imageFilterMMXdetect())) {
06996 #ifdef USE_MMX
06997 #if !defined(GCC__)
06998 __asm
06999 {
07000 pusha
07001 pxor mm0, mm0
07002 mov eax, columns
07003
07004 mov esi, Src
07005 mov edi, Dest
07006 add edi, eax
07007 inc edi
07008 mov edx, rows
07009 sub edx, 2
07010
07011 L10400:
07012 mov ecx, eax
07013 shr ecx, 3
07014 mov ebx, esi
07015 movd mm1, edi
07016 align 16
07017 L10402:
07018
07019 movq mm4, [esi]
07020 movq mm5, mm4
07021 add esi, 2
07022 punpcklbw mm4, mm0
07023 punpckhbw mm5, mm0
07024 movq mm6, [esi]
07025 movq mm7, mm6
07026 sub esi, 2
07027 punpcklbw mm6, mm0
07028 punpckhbw mm7, mm0
07029 add esi, eax
07030 movq mm2, [esi]
07031 movq mm3, mm2
07032 add esi, 2
07033 punpcklbw mm2, mm0
07034 punpckhbw mm3, mm0
07035 paddw mm4, mm2
07036 paddw mm5, mm3
07037 paddw mm4, mm2
07038 paddw mm5, mm3
07039 movq mm2, [esi]
07040 movq mm3, mm2
07041 sub esi, 2
07042 punpcklbw mm2, mm0
07043 punpckhbw mm3, mm0
07044 paddw mm6, mm2
07045 paddw mm7, mm3
07046 paddw mm6, mm2
07047 paddw mm7, mm3
07048 add esi, eax
07049 movq mm2, [esi]
07050 movq mm3, mm2
07051 add esi, 2
07052 punpcklbw mm2, mm0
07053 punpckhbw mm3, mm0
07054 paddw mm4, mm2
07055 paddw mm5, mm3
07056 movq mm2, [esi]
07057 movq mm3, mm2
07058 sub esi, 2
07059 punpcklbw mm2, mm0
07060 punpckhbw mm3, mm0
07061 paddw mm6, mm2
07062 paddw mm7, mm3
07063
07064 movq mm2, mm4
07065 psrlq mm4, 32
07066 psubw mm4, mm2
07067 movq mm3, mm6
07068 psrlq mm6, 32
07069 psubw mm6, mm3
07070 punpckldq mm4, mm6
07071 movq mm2, mm5
07072 psrlq mm5, 32
07073 psubw mm5, mm2
07074 movq mm3, mm7
07075 psrlq mm7, 32
07076 psubw mm7, mm3
07077 punpckldq mm5, mm7
07078
07079 movq mm6, mm4
07080 movq mm7, mm5
07081 psraw mm6, 15
07082 psraw mm7, 15
07083 pxor mm4, mm6
07084 pxor mm5, mm7
07085 psubsw mm4, mm6
07086 psubsw mm5, mm7
07087 packuswb mm4, mm5
07088 movq [edi], mm4
07089
07090 sub esi, eax
07091 sub esi, eax
07092 add esi, 8
07093 add edi, 8
07094
07095 dec ecx
07096 jnz L10402
07097 mov esi, ebx
07098 movd edi, mm1
07099 add esi, eax
07100 add edi, eax
07101 dec edx
07102 jnz L10400
07103
07104 emms
07105 popa
07106 }
07107 #else
07108 asm volatile
07109 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
07110 "mov %3, %%eax \n\t"
07111
07112 "mov %1, %%esi \n\t"
07113 "mov %0, %%edi \n\t"
07114 "add %%eax, %%edi \n\t"
07115 "inc %%edi \n\t"
07116 "mov %2, %%edx \n\t"
07117 "sub $2, %%edx \n\t"
07118
07119 ".L10400: \n\t" "mov %%eax, %%ecx \n\t"
07120 "shr $3, %%ecx \n\t"
07121 "mov %%esi, %%ebx \n\t"
07122 "movd %%edi, %%mm1 \n\t"
07123 ".align 16 \n\t"
07124 ".L10402: \n\t"
07125
07126 "movq (%%esi), %%mm4 \n\t"
07127 "movq %%mm4, %%mm5 \n\t"
07128 "add $2, %%esi \n\t"
07129 "punpcklbw %%mm0, %%mm4 \n\t"
07130 "punpckhbw %%mm0, %%mm5 \n\t"
07131 "movq (%%esi), %%mm6 \n\t"
07132 "movq %%mm6, %%mm7 \n\t"
07133 "sub $2, %%esi \n\t"
07134 "punpcklbw %%mm0, %%mm6 \n\t"
07135 "punpckhbw %%mm0, %%mm7 \n\t"
07136 "add %%eax, %%esi \n\t"
07137 "movq (%%esi), %%mm2 \n\t"
07138 "movq %%mm2, %%mm3 \n\t"
07139 "add $2, %%esi \n\t"
07140 "punpcklbw %%mm0, %%mm2 \n\t"
07141 "punpckhbw %%mm0, %%mm3 \n\t"
07142 "paddw %%mm2, %%mm4 \n\t"
07143 "paddw %%mm3, %%mm5 \n\t"
07144 "paddw %%mm2, %%mm4 \n\t"
07145 "paddw %%mm3, %%mm5 \n\t"
07146 "movq (%%esi), %%mm2 \n\t"
07147 "movq %%mm2, %%mm3 \n\t"
07148 "sub $2, %%esi \n\t"
07149 "punpcklbw %%mm0, %%mm2 \n\t"
07150 "punpckhbw %%mm0, %%mm3 \n\t"
07151 "paddw %%mm2, %%mm6 \n\t"
07152 "paddw %%mm3, %%mm7 \n\t"
07153 "paddw %%mm2, %%mm6 \n\t"
07154 "paddw %%mm3, %%mm7 \n\t"
07155 "add %%eax, %%esi \n\t"
07156 "movq (%%esi), %%mm2 \n\t"
07157 "movq %%mm2, %%mm3 \n\t"
07158 "add $2, %%esi \n\t"
07159 "punpcklbw %%mm0, %%mm2 \n\t"
07160 "punpckhbw %%mm0, %%mm3 \n\t"
07161 "paddw %%mm2, %%mm4 \n\t"
07162 "paddw %%mm3, %%mm5 \n\t"
07163 "movq (%%esi), %%mm2 \n\t"
07164 "movq %%mm2, %%mm3 \n\t"
07165 "sub $2, %%esi \n\t"
07166 "punpcklbw %%mm0, %%mm2 \n\t"
07167 "punpckhbw %%mm0, %%mm3 \n\t"
07168 "paddw %%mm2, %%mm6 \n\t"
07169 "paddw %%mm3, %%mm7 \n\t"
07170
07171 "movq %%mm4, %%mm2 \n\t"
07172 "psrlq $32, %%mm4 \n\t"
07173 "psubw %%mm2, %%mm4 \n\t"
07174 "movq %%mm6, %%mm3 \n\t"
07175 "psrlq $32, %%mm6 \n\t"
07176 "psubw %%mm3, %%mm6 \n\t"
07177 "punpckldq %%mm6, %%mm4 \n\t"
07178 "movq %%mm5, %%mm2 \n\t"
07179 "psrlq $32, %%mm5 \n\t"
07180 "psubw %%mm2, %%mm5 \n\t"
07181 "movq %%mm7, %%mm3 \n\t"
07182 "psrlq $32, %%mm7 \n\t"
07183 "psubw %%mm3, %%mm7 \n\t"
07184 "punpckldq %%mm7, %%mm5 \n\t"
07185
07186 "movq %%mm4, %%mm6 \n\t"
07187 "movq %%mm5, %%mm7 \n\t"
07188 "psraw $15, %%mm6 \n\t"
07189 "psraw $15, %%mm7 \n\t"
07190 "pxor %%mm6, %%mm4 \n\t"
07191 "pxor %%mm7, %%mm5 \n\t"
07192 "psubsw %%mm6, %%mm4 \n\t"
07193 "psubsw %%mm7, %%mm5 \n\t"
07194 "packuswb %%mm5, %%mm4 \n\t"
07195 "movq %%mm4, (%%edi) \n\t"
07196
07197 "sub %%eax, %%esi \n\t"
07198 "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t"
07199 "add $8, %%edi \n\t"
07200
07201 "dec %%ecx \n\t"
07202 "jnz .L10402 \n\t"
07203 "mov %%ebx, %%esi \n\t"
07204 "movd %%mm1, %%edi \n\t"
07205 "add %%eax, %%esi \n\t"
07206 "add %%eax, %%edi \n\t"
07207 "dec %%edx \n\t"
07208 "jnz .L10400 \n\t"
07209
07210 "emms \n\t"
07211 "popa \n\t":"=m" (Dest)
07212 :"m"(Src),
07213 "m"(rows),
07214 "m"(columns)
07215 );
07216 #endif
07217 #endif
07218 return (0);
07219 } else {
07220
07221 return (-1);
07222 }
07223 }
07224
07238 int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns,
07239 unsigned char NRightShift)
07240 {
07241
07242 if ((Src == NULL) || (Dest == NULL))
07243 return(-1);
07244 if ((columns < 8) || (rows < 3) || (NRightShift > 7))
07245 return (-1);
07246
07247 if ((SDL_imageFilterMMXdetect())) {
07248 #ifdef USE_MMX
07249 #if !defined(GCC__)
07250 __asm
07251 {
07252 pusha
07253 pxor mm0, mm0
07254 mov eax, columns
07255 xor ebx, ebx
07256 mov bl, NRightShift
07257 movd mm1, ebx
07258
07259 mov esi, Src
07260 mov edi, Dest
07261 add edi, eax
07262 inc edi
07263
07264 sub rows, 2
07265
07266 L10410:
07267 mov ecx, eax
07268 shr ecx, 3
07269 mov ebx, esi
07270 mov edx, edi
07271 align 16
07272 L10412:
07273
07274 movq mm4, [esi]
07275 movq mm5, mm4
07276 add esi, 2
07277 punpcklbw mm4, mm0
07278 punpckhbw mm5, mm0
07279 psrlw mm4, mm1
07280 psrlw mm5, mm1
07281 movq mm6, [esi]
07282 movq mm7, mm6
07283 sub esi, 2
07284 punpcklbw mm6, mm0
07285 punpckhbw mm7, mm0
07286 psrlw mm6, mm1
07287 psrlw mm7, mm1
07288 add esi, eax
07289 movq mm2, [esi]
07290 movq mm3, mm2
07291 add esi, 2
07292 punpcklbw mm2, mm0
07293 punpckhbw mm3, mm0
07294 psrlw mm2, mm1
07295 psrlw mm3, mm1
07296 paddw mm4, mm2
07297 paddw mm5, mm3
07298 paddw mm4, mm2
07299 paddw mm5, mm3
07300 movq mm2, [esi]
07301 movq mm3, mm2
07302 sub esi, 2
07303 punpcklbw mm2, mm0
07304 punpckhbw mm3, mm0
07305 psrlw mm2, mm1
07306 psrlw mm3, mm1
07307 paddw mm6, mm2
07308 paddw mm7, mm3
07309 paddw mm6, mm2
07310 paddw mm7, mm3
07311 add esi, eax
07312 movq mm2, [esi]
07313 movq mm3, mm2
07314 add esi, 2
07315 punpcklbw mm2, mm0
07316 punpckhbw mm3, mm0
07317 psrlw mm2, mm1
07318 psrlw mm3, mm1
07319 paddw mm4, mm2
07320 paddw mm5, mm3
07321 movq mm2, [esi]
07322 movq mm3, mm2
07323 sub esi, 2
07324 punpcklbw mm2, mm0
07325 punpckhbw mm3, mm0
07326 psrlw mm2, mm1
07327 psrlw mm3, mm1
07328 paddw mm6, mm2
07329 paddw mm7, mm3
07330
07331 movq mm2, mm4
07332 psrlq mm4, 32
07333 psubw mm4, mm2
07334 movq mm3, mm6
07335 psrlq mm6, 32
07336 psubw mm6, mm3
07337 punpckldq mm4, mm6
07338 movq mm2, mm5
07339 psrlq mm5, 32
07340 psubw mm5, mm2
07341 movq mm3, mm7
07342 psrlq mm7, 32
07343 psubw mm7, mm3
07344 punpckldq mm5, mm7
07345
07346 movq mm6, mm4
07347 movq mm7, mm5
07348 psraw mm6, 15
07349 psraw mm7, 15
07350 pxor mm4, mm6
07351 pxor mm5, mm7
07352 psubsw mm4, mm6
07353 psubsw mm5, mm7
07354 packuswb mm4, mm5
07355 movq [edi], mm4
07356
07357 sub esi, eax
07358 sub esi, eax
07359 add esi, 8
07360 add edi, 8
07361
07362 dec ecx
07363 jnz L10412
07364 mov esi, ebx
07365 mov edi, edx
07366 add esi, eax
07367 add edi, eax
07368 dec rows
07369 jnz L10410
07370
07371 emms
07372 popa
07373 }
07374 #else
07375 asm volatile
07376 ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
07377 "mov %3, %%eax \n\t"
07378 "xor %%ebx, %%ebx \n\t"
07379 "mov %4, %%bl \n\t"
07380 "movd %%ebx, %%mm1 \n\t"
07381
07382 "mov %1, %%esi \n\t"
07383 "mov %0, %%edi \n\t"
07384 "add %%eax, %%edi \n\t"
07385 "inc %%edi \n\t"
07386
07387 "subl $2, %2 \n\t"
07388
07389 ".L10410: \n\t" "mov %%eax, %%ecx \n\t"
07390 "shr $3, %%ecx \n\t"
07391 "mov %%esi, %%ebx \n\t"
07392 "mov %%edi, %%edx \n\t"
07393 ".align 16 \n\t"
07394 ".L10412: \n\t"
07395
07396 "movq (%%esi), %%mm4 \n\t"
07397 "movq %%mm4, %%mm5 \n\t"
07398 "add $2, %%esi \n\t"
07399 "punpcklbw %%mm0, %%mm4 \n\t"
07400 "punpckhbw %%mm0, %%mm5 \n\t"
07401 "psrlw %%mm1, %%mm4 \n\t"
07402 "psrlw %%mm1, %%mm5 \n\t"
07403 "movq (%%esi), %%mm6 \n\t"
07404 "movq %%mm6, %%mm7 \n\t"
07405 "sub $2, %%esi \n\t"
07406 "punpcklbw %%mm0, %%mm6 \n\t"
07407 "punpckhbw %%mm0, %%mm7 \n\t"
07408 "psrlw %%mm1, %%mm6 \n\t"
07409 "psrlw %%mm1, %%mm7 \n\t"
07410 "add %%eax, %%esi \n\t"
07411 "movq (%%esi), %%mm2 \n\t"
07412 "movq %%mm2, %%mm3 \n\t"
07413 "add $2, %%esi \n\t"
07414 "punpcklbw %%mm0, %%mm2 \n\t"
07415 "punpckhbw %%mm0, %%mm3 \n\t"
07416 "psrlw %%mm1, %%mm2 \n\t"
07417 "psrlw %%mm1, %%mm3 \n\t"
07418 "paddw %%mm2, %%mm4 \n\t"
07419 "paddw %%mm3, %%mm5 \n\t"
07420 "paddw %%mm2, %%mm4 \n\t"
07421 "paddw %%mm3, %%mm5 \n\t"
07422 "movq (%%esi), %%mm2 \n\t"
07423 "movq %%mm2, %%mm3 \n\t"
07424 "sub $2, %%esi \n\t"
07425 "punpcklbw %%mm0, %%mm2 \n\t"
07426 "punpckhbw %%mm0, %%mm3 \n\t"
07427 "psrlw %%mm1, %%mm2 \n\t"
07428 "psrlw %%mm1, %%mm3 \n\t"
07429 "paddw %%mm2, %%mm6 \n\t"
07430 "paddw %%mm3, %%mm7 \n\t"
07431 "paddw %%mm2, %%mm6 \n\t"
07432 "paddw %%mm3, %%mm7 \n\t"
07433 "add %%eax, %%esi \n\t"
07434 "movq (%%esi), %%mm2 \n\t"
07435 "movq %%mm2, %%mm3 \n\t"
07436 "add $2, %%esi \n\t"
07437 "punpcklbw %%mm0, %%mm2 \n\t"
07438 "punpckhbw %%mm0, %%mm3 \n\t"
07439 "psrlw %%mm1, %%mm2 \n\t"
07440 "psrlw %%mm1, %%mm3 \n\t"
07441 "paddw %%mm2, %%mm4 \n\t"
07442 "paddw %%mm3, %%mm5 \n\t"
07443 "movq (%%esi), %%mm2 \n\t"
07444 "movq %%mm2, %%mm3 \n\t"
07445 "sub $2, %%esi \n\t"
07446 "punpcklbw %%mm0, %%mm2 \n\t"
07447 "punpckhbw %%mm0, %%mm3 \n\t"
07448 "psrlw %%mm1, %%mm2 \n\t"
07449 "psrlw %%mm1, %%mm3 \n\t"
07450 "paddw %%mm2, %%mm6 \n\t"
07451 "paddw %%mm3, %%mm7 \n\t"
07452
07453 "movq %%mm4, %%mm2 \n\t"
07454 "psrlq $32, %%mm4 \n\t"
07455 "psubw %%mm2, %%mm4 \n\t"
07456 "movq %%mm6, %%mm3 \n\t"
07457 "psrlq $32, %%mm6 \n\t"
07458 "psubw %%mm3, %%mm6 \n\t"
07459 "punpckldq %%mm6, %%mm4 \n\t"
07460 "movq %%mm5, %%mm2 \n\t"
07461 "psrlq $32, %%mm5 \n\t"
07462 "psubw %%mm2, %%mm5 \n\t"
07463 "movq %%mm7, %%mm3 \n\t"
07464 "psrlq $32, %%mm7 \n\t"
07465 "psubw %%mm3, %%mm7 \n\t"
07466 "punpckldq %%mm7, %%mm5 \n\t"
07467
07468 "movq %%mm4, %%mm6 \n\t"
07469 "movq %%mm5, %%mm7 \n\t"
07470 "psraw $15, %%mm6 \n\t"
07471 "psraw $15, %%mm7 \n\t"
07472 "pxor %%mm6, %%mm4 \n\t"
07473 "pxor %%mm7, %%mm5 \n\t"
07474 "psubsw %%mm6, %%mm4 \n\t"
07475 "psubsw %%mm7, %%mm5 \n\t"
07476 "packuswb %%mm5, %%mm4 \n\t"
07477 "movq %%mm4, (%%edi) \n\t"
07478
07479 "sub %%eax, %%esi \n\t"
07480 "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t"
07481 "add $8, %%edi \n\t"
07482
07483 "dec %%ecx \n\t"
07484 "jnz .L10412 \n\t"
07485 "mov %%ebx, %%esi \n\t"
07486 "mov %%edx, %%edi \n\t"
07487 "add %%eax, %%esi \n\t"
07488 "add %%eax, %%edi \n\t"
07489 "decl %2 \n\t"
07490 "jnz .L10410 \n\t"
07491
07492 "emms \n\t"
07493 "popa \n\t":"=m" (Dest)
07494 :"m"(Src),
07495 "m"(rows),
07496 "m"(columns),
07497 "m"(NRightShift)
07498 );
07499 #endif
07500 #endif
07501 return (0);
07502 } else {
07503
07504 return (-1);
07505 }
07506 }
07507
07511 void SDL_imageFilterAlignStack(void)
07512 {
07513 #ifdef USE_MMX
07514 #if !defined(GCC__)
07515 __asm
07516 {
07517 mov ebx, esp
07518 sub ebx, 4
07519 and ebx, -32
07520 mov [ebx], esp
07521 mov esp, ebx
07522 }
07523 #else
07524 asm volatile
07525 (
07526 "mov %%esp, %%ebx \n\t"
07527 "sub $4, %%ebx \n\t"
07528 "and $-32, %%ebx \n\t"
07529 "mov %%esp, (%%ebx) \n\t"
07530 "mov %%ebx, %%esp \n\t"
07531 ::);
07532 #endif
07533 #endif
07534 }
07535
07539 void SDL_imageFilterRestoreStack(void)
07540 {
07541 #ifdef USE_MMX
07542 #if !defined(GCC__)
07543 __asm
07544 {
07545 mov ebx, [esp]
07546 mov esp, ebx
07547 }
07548 #else
07549 asm volatile
07550 (
07551 "mov (%%esp), %%ebx \n\t"
07552 "mov %%ebx, %%esp \n\t"
07553 ::);
07554 #endif
07555 #endif
07556 }