BBC BASIC for Windows - 4x supersampling

BBC BASIC for Windows

Programming

Graphics and Games (Moderator: admin)

4x supersampling

« Previous Topic | Next Topic »

Pages: 1

Author

Topic: 4x supersampling (Read 141 times)

admin
Administrator

member is offline

Posts: 1145

Re: 4x supersampling
« Reply #7 on: Oct 22^nd, 2011, 9:13pm »

on Oct 22^nd, 2011, 7:49pm, David Williams wrote:

I'd say that 2x may be just about sufficient for 2D bitmap-based games.

When you did the previous test it was the straight lines which seemed to be the most critical - probably because the aliasing is periodic and therefore particularly noticeable. Might it be worth incorporating some filled polygons as well as the circles?

Richard.

Logged

David Williams
Developer

member is offline
Avatar

meh

Gender:

Posts: 452

Re: 4x supersampling
« Reply #8 on: Oct 23^rd, 2011, 4:05pm »

on Oct 22^nd, 2011, 9:13pm, Richard Russell wrote:

I've tried some rotated bitmaps, and 4x supersampling takes reasonably good care of the stray pixels stemming from rounding errors in the largely fixed-point calculations.

My attempt early this morning at translating my BASIC implementation of a generalised supersampler (1x, 2x, 4x, ... etc.) to assembly language resulted in nothing but crashes after an hour or so of bug-hunting. How annoying. I'm starting to hate assembly language.

EDIT: Perseverance often pays off. This time it did, because I've got the code working. Will upload it later for those very few who may be interested.

David.

« Last Edit: Oct 23^rd, 2011, 5:36pm by David Williams »

Logged

David Williams
Developer

member is offline
Avatar

meh

Gender:

Posts: 452

Re: 4x supersampling
« Reply #9 on: Oct 23^rd, 2011, 6:47pm »

For those it might interest, here's a demo program which draws filled circles on a large bitmap, which is then resized to the dimensions chosen by you. The size of this large 'internal' bitmap is dependent on the dimensions you choose, and the chosen sample size.

~~http://www.bb4wgames.com/misc/xn_supersample_asm.zip~~

Please first try these parameters:

Bitmap width: 640
Bitmap height: 480
Sample size: 8

If your choice of parameters results in memory requirements exceeding 255 MB, then you'll be told to revise your parameters.

If you want to see what the circles look like without supersampling, then enter 1 as the sample size.

I think a fast (optimised), and necessarily MMX-based 2x2 (or even 4x4) supersampler would make a very handy addition to GFXLIB. So I suppose that's next on the agenda. :)

Obviously, this generalised (and in any case totally unoptimised) supersampler is no good for realtime situations such as a game would demand.

David.

The assembler code (very rough around the edges):

Code:

      DEF PROC_asm
      
      LOCAL C%, I%, P%
      
      DIM C% 511
      
      FOR I% = 0 TO 2 STEP 2
        P% = C%
        [OPT I%
        
        ; // ALIGN
        ] : P% = (P% + 31) AND -32 : [OPT I%
        
        .supersample%
        
        ; Parameters:
        ;
        ;     pBm%, pBm2%, bmW%, bmH%, smpSz%
        ;
        
        pushad
        
        mov ebp, esp
        sub esp, 128
        
        ; -----------------------------------------------------------------------------------------------
        
        ; EBP!36 = pBm%
        ; EBP!40 = pBm2%
        ; EBP!44 = bmW%
        ; EBP!48 = bmH%
        ; EBP!52 = smpSz%
        
        ; -----------------------------------------------------------------------------------------------
        
        ;
        ; calc. smpSzSq% = smpSz%^2
        ;
        
        mov eax, [ebp + 52]
        imul eax, eax
        mov [esp + 0], eax                  ; ESP!0  =  smpSz%^2  = smpSzSq%
        
        ;
        ; Set FPU rounding mode to ''Truncate''
        ;
        
        finit
        
        xor eax, eax
        mov DWORD [esp + 8], &00000000
        
        fstcw [esp + 8]
        mov ax, [esp + 8]
        and ax, &F3FF
        or ax, &C00
        mov [esp + 8], ax
        fldcw [esp + 8]
        
        ;
        ; calc. S% = 65536 * (1.0 / smpSzSq%)
        ;
        
        push 65536
        fild DWORD [esp]                    ; st0 = 65536
        fld1                                ; st0 = 1.0,  st1 = 65536
        fidiv DWORD [esp + (0 +4)]          ; st0 = 1.0 / smpSzSq%,  st1 = 65536
        fmul                                ; st0 = 65536 * (1.0 / smpSzSq%)
        fistp DWORD [esp + (4 +4)]          ;
        add esp, 4                          ; ESP!4  =  S%
        
        ;
        ; Calc. bm2W% and bm2H%
        ;
        ; Where bm2W% = smpSz% * bmW%
        ;       bm2H% = smpSz% * bmH%
        ;
        ;
        
        mov eax, [ebp + 44]                 ; bmW%
        mov ebx, [ebp + 48]                 ; bmH%
        imul eax, [ebp + 52]                ; bmW% * smpSz%  =  bm2W%
        imul ebx, [ebp + 52]                ; bmH% * smpSz%  =  bm2H%
        mov [esp + 12], eax                 ; ESP!12  =  bm2W%
        mov [esp + 16], ebx                 ; ESP!16  =  bm2H%
        
        ;
        ; Calc. rowBytesLen% = 4 * bm2W%
        ;
        
        shl eax, 2                          ; = 4 * bm2W%
        mov [esp + 20], eax                 ; ESP!20  =  rowBytesLen%
        
        ;
        ; Calc. bmW%-1 and bmH%-1
        ;
        
        mov eax, [ebp + 44]                 ; bmW%
        mov ebx, [ebp + 48]                 ; bmH%
        sub eax, 1                          ; bmW% - 1
        sub ebx, 1                          ; bmH% - 1
        mov [esp + 24], eax                 ; ESP!24  =  bmW%-1
        mov [esp + 28], ebx                 ; ESP!28  =  bmH%-1
        
        ;
        ; Calc. rowBytesLen%*smpSz%
        ;
        
        mov eax, [esp + 20]                 ; rowBytesLen%
        imul eax, [ebp + 52]                ; rowBytesLen% * smpSz%
        mov [esp + 32], eax                 ; ESP!32  =  rowBytesLen% * smpSz%
        
        ;
        ; Calc. 4*smpSz%
        ;
        
        mov eax, [ebp + 52]                 ; smpSz%
        shl eax, 2                          ; 4*smpSz%
        mov [esp + 36], eax                 ; ESP!36  =  4*smpSz%
        
        ;
        ; So far, we have:
        ;
        ;     ESP!0  =  smpSzSq%
        ;     ESP!4  =  S% (= 65536 * 1.0/SmpSzSq%)
        ;     ESP!8  =  ...
        ;     ESP!12 =  bm2W%
        ;     ESP!16 =  bm2H%
        ;     ESP!20 =  rowBytesLen%
        ;     ESP!24 =  bmW% - 1
        ;     ESP!28 =  bmH% - 1
        ;     ESP!32 =  rowBytesLen% * smpSz%
        ;     ESP!36 =  4 * smpSz%
        ;
        
        mov eax, [ebp + 36]                ; EAX = pBm
        mov ebx, [ebp + 40]                ; EBX = pBm2
        
        mov DWORD [esp + 44], 0            ; ESP!44  =  Y-loop control variable (Y%)
        
        .supersample_yLoop%                ; Y-loop control var goes from 0 to bmH%-1
        
        mov DWORD [esp + 40], 0            ; ESP!40  =  X-loop control variable (X%)
        
        .supersample_xLoop%                ; X-loop control var goes from 0 to bmW%-1
        
        ;
        ; Calc. O% = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%)
        ;
        
        mov edi, [esp + 32]                ; EDI = rowBytesLen% * smpSz%
        mov esi, [esp + 36]                ; ESI = 4 * smpSz%
        imul edi, [esp + 44]               ; EDI = rowBytesLen% * smpSz% * Y%
        imul esi, [esp + 40]               ; ESI = 4 * smpSz% * X%
        add edi, esi                       ; EDI = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%) = O%
        
        ;
        ; ESP!48  =  red sum (rSum%)
        ; ESP!52  =  green sum (gSum%)
        ; ESP!56  =  blue sum (bSum% )
        ;
        
        mov DWORD [esp + 48], 0            ; init. rSum% = 0
        mov DWORD [esp + 52], 0            ; init. gSum% = 0
        mov DWORD [esp + 56], 0            ; init. bSum% = 0
        
        xor esi, esi                       ; inner Y-loop counter (y%) (goes from 0 to smpSz%-1)
        
        .supersample_innerYloop%
        
        push esi                           ; preserve ESI (inner Y-loop counter)
        
        xor esi, esi                       ; inner X-loop counter (x%) (goes from 0 to smpSz%-1)
        
        .supersample_innerXloop%
        
        ; EAX = pBm
        ; EBX = pBm2
        ; EDI = O%
        ; ESI = x%
        
        ;
        ; Calc. O2% = O% + 4*x%
        ;
        
        push esi                           ; preserve ESI (x%)
        shl esi, 2                         ; 4 * x%
        add esi, edi                       ; O% + 4*x% = O2%
        
        movzx ecx, BYTE [ebx + esi + 0]    ; load blue byte (blueVal)
        add [esp + (56 +8)], ecx           ; bSum% += blueVal
        
        movzx ecx, BYTE [ebx + esi + 1]    ; load green byte (greenVal)
        add [esp + (52 +8)], ecx           ; gSum% += greenVal
        
        movzx ecx, BYTE [ebx + esi + 2]    ; load red byte (redVal)
        add [esp + (48 +8)], ecx           ; rSum% += redVal
        
        pop esi
        add esi, 1                         ; x% += 1
        cmp esi, [ebp + 52]                ; x% > smpSz%-1 ?
        jl supersample_innerXloop%
        
        add edi, [esp + (20 +4)]           ; O% += rowBytesLen%
        pop esi                            ; ESI = y%
        add esi, 1                         ; y% += 1
        cmp esi, [ebp + 52]                ; y% > smpSz%-1 ?
        jl supersample_innerYloop%
        
        
        ;
        ; Calc. 4*(Y%*bmW% + X%)
        ;
        
        mov esi, [esp + 44]                ; Y%
        imul esi, [ebp + 44]               ; Y%*bmW%
        add esi, [esp + 40]                ; Y%*bmW% + X%
        shl esi, 2                         ; 4*(bmW% + X%)
        
        ;
        ; Write averaged red, green, blue values to bm1
        ;
        
        mov edx, [esp + 4]                 ; EDX = S% (= 65536 * 1.0/SmpSzSq%)
        
        mov ecx, [esp + 48]                ; ECX = rSum%
        imul ecx, edx
        shr ecx, 16
        mov [eax + esi + 2], cl
        
        mov ecx, [esp + 52]                ; ECX = gSum%
        imul ecx, edx
        shr ecx, 16
        mov [eax + esi + 1], cl
        
        mov ecx, [esp + 56]                ; ECX = bSum%
        imul ecx, edx
        shr ecx, 16
        mov [eax + esi + 0], cl
        
        mov edx, [ebp + 44]                ; EDX = bmW%
        add DWORD [esp + 40], 1            ; X += 1
        cmp DWORD [esp + 40], edx          ; X < bmW% ?
        jl near supersample_xLoop%
        
        mov edx, [ebp + 48]                ; EDX = bmH%
        add DWORD [esp + 44], 1            ; Y += 1
        cmp DWORD [esp + 44], edx          ; Y < bmH% ?
        jl near supersample_yLoop%
        
        add esp, 128
        popad
        ret 20
        
        ]
      NEXT I%
      ENDPROC

« Last Edit: Jan 2^nd, 2013, 05:05am by David Williams »

Logged

Pages: 1


« Previous Topic \| Next Topic »