Author |
Topic: 4x supersampling (Read 141 times) |
|
admin
Administrator
member is offline


Posts: 1145
|
 |
Re: 4x supersampling
« Reply #7 on: Oct 22nd, 2011, 9:13pm » |
|
on Oct 22nd, 2011, 7:49pm, David Williams wrote:| I'd say that 2x may be just about sufficient for 2D bitmap-based games. |
|
When you did the previous test it was the straight lines which seemed to be the most critical - probably because the aliasing is periodic and therefore particularly noticeable. Might it be worth incorporating some filled polygons as well as the circles?
Richard.
|
|
Logged
|
|
|
|
David Williams
Developer
member is offline

meh

Gender: 
Posts: 452
|
 |
Re: 4x supersampling
« Reply #8 on: Oct 23rd, 2011, 4:05pm » |
|
on Oct 22nd, 2011, 9:13pm, Richard Russell wrote:When you did the previous test it was the straight lines which seemed to be the most critical - probably because the aliasing is periodic and therefore particularly noticeable. Might it be worth incorporating some filled polygons as well as the circles?
Richard. |
|
I've tried some rotated bitmaps, and 4x supersampling takes reasonably good care of the stray pixels stemming from rounding errors in the largely fixed-point calculations.
My attempt early this morning at translating my BASIC implementation of a generalised supersampler (1x, 2x, 4x, ... etc.) to assembly language resulted in nothing but crashes after an hour or so of bug-hunting. How annoying. I'm starting to hate assembly language.
EDIT: Perseverance often pays off. This time it did, because I've got the code working. Will upload it later for those very few who may be interested.
David.
|
|
|
|
David Williams
Developer
member is offline

meh

Gender: 
Posts: 452
|
 |
Re: 4x supersampling
« Reply #9 on: Oct 23rd, 2011, 6:47pm » |
|
For those it might interest, here's a demo program which draws filled circles on a large bitmap, which is then resized to the dimensions chosen by you. The size of this large 'internal' bitmap is dependent on the dimensions you choose, and the chosen sample size.
http://www.bb4wgames.com/misc/xn_supersample_asm.zip
Please first try these parameters:
Bitmap width: 640 Bitmap height: 480 Sample size: 8
If your choice of parameters results in memory requirements exceeding 255 MB, then you'll be told to revise your parameters.
If you want to see what the circles look like without supersampling, then enter 1 as the sample size.
I think a fast (optimised), and necessarily MMX-based 2x2 (or even 4x4) supersampler would make a very handy addition to GFXLIB. So I suppose that's next on the agenda. :)
Obviously, this generalised (and in any case totally unoptimised) supersampler is no good for realtime situations such as a game would demand.
David.
The assembler code (very rough around the edges):
Code: DEF PROC_asm
LOCAL C%, I%, P%
DIM C% 511
FOR I% = 0 TO 2 STEP 2
P% = C%
[OPT I%
; // ALIGN
] : P% = (P% + 31) AND -32 : [OPT I%
.supersample%
; Parameters:
;
; pBm%, pBm2%, bmW%, bmH%, smpSz%
;
pushad
mov ebp, esp
sub esp, 128
; -----------------------------------------------------------------------------------------------
; EBP!36 = pBm%
; EBP!40 = pBm2%
; EBP!44 = bmW%
; EBP!48 = bmH%
; EBP!52 = smpSz%
; -----------------------------------------------------------------------------------------------
;
; calc. smpSzSq% = smpSz%^2
;
mov eax, [ebp + 52]
imul eax, eax
mov [esp + 0], eax ; ESP!0 = smpSz%^2 = smpSzSq%
;
; Set FPU rounding mode to ''Truncate''
;
finit
xor eax, eax
mov DWORD [esp + 8], &00000000
fstcw [esp + 8]
mov ax, [esp + 8]
and ax, &F3FF
or ax, &C00
mov [esp + 8], ax
fldcw [esp + 8]
;
; calc. S% = 65536 * (1.0 / smpSzSq%)
;
push 65536
fild DWORD [esp] ; st0 = 65536
fld1 ; st0 = 1.0, st1 = 65536
fidiv DWORD [esp + (0 +4)] ; st0 = 1.0 / smpSzSq%, st1 = 65536
fmul ; st0 = 65536 * (1.0 / smpSzSq%)
fistp DWORD [esp + (4 +4)] ;
add esp, 4 ; ESP!4 = S%
;
; Calc. bm2W% and bm2H%
;
; Where bm2W% = smpSz% * bmW%
; bm2H% = smpSz% * bmH%
;
;
mov eax, [ebp + 44] ; bmW%
mov ebx, [ebp + 48] ; bmH%
imul eax, [ebp + 52] ; bmW% * smpSz% = bm2W%
imul ebx, [ebp + 52] ; bmH% * smpSz% = bm2H%
mov [esp + 12], eax ; ESP!12 = bm2W%
mov [esp + 16], ebx ; ESP!16 = bm2H%
;
; Calc. rowBytesLen% = 4 * bm2W%
;
shl eax, 2 ; = 4 * bm2W%
mov [esp + 20], eax ; ESP!20 = rowBytesLen%
;
; Calc. bmW%-1 and bmH%-1
;
mov eax, [ebp + 44] ; bmW%
mov ebx, [ebp + 48] ; bmH%
sub eax, 1 ; bmW% - 1
sub ebx, 1 ; bmH% - 1
mov [esp + 24], eax ; ESP!24 = bmW%-1
mov [esp + 28], ebx ; ESP!28 = bmH%-1
;
; Calc. rowBytesLen%*smpSz%
;
mov eax, [esp + 20] ; rowBytesLen%
imul eax, [ebp + 52] ; rowBytesLen% * smpSz%
mov [esp + 32], eax ; ESP!32 = rowBytesLen% * smpSz%
;
; Calc. 4*smpSz%
;
mov eax, [ebp + 52] ; smpSz%
shl eax, 2 ; 4*smpSz%
mov [esp + 36], eax ; ESP!36 = 4*smpSz%
;
; So far, we have:
;
; ESP!0 = smpSzSq%
; ESP!4 = S% (= 65536 * 1.0/SmpSzSq%)
; ESP!8 = ...
; ESP!12 = bm2W%
; ESP!16 = bm2H%
; ESP!20 = rowBytesLen%
; ESP!24 = bmW% - 1
; ESP!28 = bmH% - 1
; ESP!32 = rowBytesLen% * smpSz%
; ESP!36 = 4 * smpSz%
;
mov eax, [ebp + 36] ; EAX = pBm
mov ebx, [ebp + 40] ; EBX = pBm2
mov DWORD [esp + 44], 0 ; ESP!44 = Y-loop control variable (Y%)
.supersample_yLoop% ; Y-loop control var goes from 0 to bmH%-1
mov DWORD [esp + 40], 0 ; ESP!40 = X-loop control variable (X%)
.supersample_xLoop% ; X-loop control var goes from 0 to bmW%-1
;
; Calc. O% = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%)
;
mov edi, [esp + 32] ; EDI = rowBytesLen% * smpSz%
mov esi, [esp + 36] ; ESI = 4 * smpSz%
imul edi, [esp + 44] ; EDI = rowBytesLen% * smpSz% * Y%
imul esi, [esp + 40] ; ESI = 4 * smpSz% * X%
add edi, esi ; EDI = (rowBytesLen% * smpSz% * Y%) + (4 * smpSz% * X%) = O%
;
; ESP!48 = red sum (rSum%)
; ESP!52 = green sum (gSum%)
; ESP!56 = blue sum (bSum% )
;
mov DWORD [esp + 48], 0 ; init. rSum% = 0
mov DWORD [esp + 52], 0 ; init. gSum% = 0
mov DWORD [esp + 56], 0 ; init. bSum% = 0
xor esi, esi ; inner Y-loop counter (y%) (goes from 0 to smpSz%-1)
.supersample_innerYloop%
push esi ; preserve ESI (inner Y-loop counter)
xor esi, esi ; inner X-loop counter (x%) (goes from 0 to smpSz%-1)
.supersample_innerXloop%
; EAX = pBm
; EBX = pBm2
; EDI = O%
; ESI = x%
;
; Calc. O2% = O% + 4*x%
;
push esi ; preserve ESI (x%)
shl esi, 2 ; 4 * x%
add esi, edi ; O% + 4*x% = O2%
movzx ecx, BYTE [ebx + esi + 0] ; load blue byte (blueVal)
add [esp + (56 +8)], ecx ; bSum% += blueVal
movzx ecx, BYTE [ebx + esi + 1] ; load green byte (greenVal)
add [esp + (52 +8)], ecx ; gSum% += greenVal
movzx ecx, BYTE [ebx + esi + 2] ; load red byte (redVal)
add [esp + (48 +8)], ecx ; rSum% += redVal
pop esi
add esi, 1 ; x% += 1
cmp esi, [ebp + 52] ; x% > smpSz%-1 ?
jl supersample_innerXloop%
add edi, [esp + (20 +4)] ; O% += rowBytesLen%
pop esi ; ESI = y%
add esi, 1 ; y% += 1
cmp esi, [ebp + 52] ; y% > smpSz%-1 ?
jl supersample_innerYloop%
;
; Calc. 4*(Y%*bmW% + X%)
;
mov esi, [esp + 44] ; Y%
imul esi, [ebp + 44] ; Y%*bmW%
add esi, [esp + 40] ; Y%*bmW% + X%
shl esi, 2 ; 4*(bmW% + X%)
;
; Write averaged red, green, blue values to bm1
;
mov edx, [esp + 4] ; EDX = S% (= 65536 * 1.0/SmpSzSq%)
mov ecx, [esp + 48] ; ECX = rSum%
imul ecx, edx
shr ecx, 16
mov [eax + esi + 2], cl
mov ecx, [esp + 52] ; ECX = gSum%
imul ecx, edx
shr ecx, 16
mov [eax + esi + 1], cl
mov ecx, [esp + 56] ; ECX = bSum%
imul ecx, edx
shr ecx, 16
mov [eax + esi + 0], cl
mov edx, [ebp + 44] ; EDX = bmW%
add DWORD [esp + 40], 1 ; X += 1
cmp DWORD [esp + 40], edx ; X < bmW% ?
jl near supersample_xLoop%
mov edx, [ebp + 48] ; EDX = bmH%
add DWORD [esp + 44], 1 ; Y += 1
cmp DWORD [esp + 44], edx ; Y < bmH% ?
jl near supersample_yLoop%
add esp, 128
popad
ret 20
]
NEXT I%
ENDPROC
|
|
|
|
|