; Juddpeg -- A jpeg decoder for the C64 ; ; SLJ 9/17/99 ; ; MCM-lace version 10/1/99 ; Generic version 10/7/99 ; Last update: 11/04/99 ; IFLI version: 11/22/99 ; ; ; Atari 8-bit port: Raphael Espino ; started 22-Nov-00 ; last updated 20-Jul-07 ; ;; ;; decoder memory map: ;; ;; --- zero page --- ;; $0080 - $00BF decoder zero page addresses ;; $00C0 - $00FF available for viewer ;; ;; ;; $0480 - $057F input buffer ;; ;; --- page 6 --- ;; $0600 - $061F decoder interface variables ;; $0620 - $062F viewer jump vectors ;; $0630 - $06FF available for viewer ;; ;; --- main RAM --- ;; $2000 - $25FF Huffman tree buffer ;; $2600 - $2FFF Y component image buffer ;; $3000 - $39FF Cr component image buffer ;; $3A00 - $43FF Cb component image buffer ;; $4400 - $52FF decoder code ;; $5300 - $70BF data tables ;; ;; $70C0 - $FFFF Available for decoder to use ;; ---------------- renderer interface addresses ----------------------- ;; mixed case addresses can be modified by viewer, changes made to ;; these will be read by the decoder LastCol = $600 ; Last column number to display LastRow = $601 ; Last row number to display FirstCol = $602 ; Column offset (left edge of output image) FirstRow = $603 ; Row offset (top edge of output image) IocbNum = $604 ; IOCB to read jpeg data from Error = $605 ; non 0 if error ocurred decoding jpeg ; error codes are as shown below ;; upper case addresses are set by decoder and should not be modified ;; by viewer WIDTH = $606 ; Width of image in pixels (2 bytes) HEIGHT = $608 ; Height of image in pixels (2 bytes) STACKPT = $60A ; stack pointer at program start VERSION = $60B ; decoder version number RERUN = $60C ; 2 bytes - restart address MEMTOP = $60E ; End of output buffer, above this is free for viewer - not any more it isn't ; not for viewer use, this is used to keep track of current position ; in huffman buffer SKIPERR = $610 ; Error in an image with restart markers ; decoder will continue decoding and set this to non 0 BUFCOUNT = $611 ; horizontal buffer count, this is increased by 1 for ; every buffer full of data on the current row ; the last buffer full will have msb set COMPONENTS = $612 ; number of colour components in this JPEG image ;; version 0 first Atari port of decoder ;; version 1 uses $0480 - $057F as input buffer ;; added ABORT code ;; fixed repeated rows+cols at end of image ;; version 2 added HuffTop ;; version 3 made width available to viewer ;; version 4 fixed DRI with restart interval = 0 bug ;; version 5 ignores HUFFMAN err if restart markers are being used ;; added SKIPERR ;; fixed repeated columns at end of image when coloff > 0 ;; version 6 rearranged interface addresses ;; added HEIGHT and BUFCOUNT ;; special handling of images with WIDTH > 320 ;; changed DISPCOLS, DISPROWS, coloff, rowoff, HuffTop to ;; LastCol, LastRow, FirstCol, FirstRow, MEMTOP ;; version 7 rearrange code and data areas to allow for bank switching ;; extended RAM without swaping out the output image buffer ;; renderer jump vectors RENDINIT = $620 ; renderer init vector RENDSTART = RENDINIT+3 ; renderer, image data about to start RENDDRAW = RENDINIT+6 ; renderer, draw 8 lines of image RENDEND = RENDINIT+9 ; renderer, image finished RENDUNUSED = RENDINIT+12 ; unused for now, point at an rts ;; Rest of page 6 from $630 - $6FF is available to renderer ;; ------------ end of renderer interface addresses ---------------- NOTJPG = 1 ; Error codes READERR = 2 BADQT = 3 ; bad data in DQT segment BADHT = 4 ; bad data in DHT segment HUFFERR = 5 PROGJPG = 6 ; progressive jpeg not supported ABORT = 128 ; abort decoding ; ; Atari addresses ; ; ; O.S. ROM ; CIOV = 58454 ; ; O.S. RAM ; COLOR4 = 712 ICCOM = 834 ICSTA = 835 ICBAL = 836 ICBAH = 837 ICBLL = 840 ICBLH = 841 ICAX1 = 842 ICAX2 = 843 ; ; PRESETS ; OPEN = 3 ; open channel GETBUF = 7 ; get buffer PUTBUF = 11 ; put buffer CLOSE = 12 ; close channel ;; decoder also uses empty space at 1152 to buffer input data INBUFFSZ = $FF ; input buffer size, must be <= 255 INBUFF = $0480 ; input buffer ;; ;; page 0 addresses used by decoder ;; mult1lo = 128 ; data tables mult1hi = 130 mult2lo = 132 mult2hi = 134 point = 136 ; image buffer pointer temp = 138 quantp = 140 ; Quant table huff = 142 ; Huffman pointers temp2 = 144 ; 1 byte count = 145 ; Used by GetBits, 1 byte ; and AddNode Bitslo = 146 ; and Dequantize Bitshi = 147 ; dest = 148 index = 150 ; 1 byte t1 = 151 t2 = 153 t3 = 155 DCT = 157 ; 16 bytes worth here vsamp = 173 ;Desample hsamp = 174 nbits = 175 ; number of bits left in current input byte byte = 176 ; current input byte inbytecnt= 177 ; number of bytes left in current buffer ;; ;; page 0 addresses from 178 upwards are available to renderer ;; ;; ;; data table addresses, the data is included at the end of this file ;; DataStart = $5300 Negmlo = DataStart ; $5300 Posmlo = Negmlo+$100 ; $5400 Negmhi = Posmlo+$200 ; $5600 Posmhi = Negmhi+$100 ; $5700 ; 2 pages a1lo = Posmhi+$200 ; $5900 ; cos(2a), a=pi/8 a1hi = a1lo+$100 ; $5A00 a2lo = a1hi+$100 ; $5B00 ; cos(a)-cos(3a) a2hi = a2lo+$100 ; $5C00 a3lo = a1lo ; cos(2a) a3hi = a1hi a4lo = a2hi+$100 ; $5D00 ; cos(a)+cos(3a) a4hi = a4lo+$100 ; $5E00 a4gh = a4hi+$100 ; $5F00 a5lo = a4gh+$100 ; $6000 ; cos(3a) a5hi = a5lo+$100 ; $6100 sec1 = a5hi+$100 ; $6200 sec2 = sec1+$200 ; $6300 sec3 = sec2+$200 ; $6500 sec4 = sec3+$200 ; $6700 sec5 = sec4+$200 ; $6900 sec6 = sec5+$200 ; $6B00 sec7 = sec6+$200 ; $6D00 ; - 2 pages worth a1216 = 46341 ;a1*2^16 a2216 = 35468 ;a2*2^16 a3216 = a1216 a4216 = 20091 ;... a5216 = 25080 trans = sec7+$200 ; $6F00 Veclo = trans+$80 ; $6F80 ; Vec to be quantized Vechi = Veclo+$40 ; $6FC0 QT0 = Vechi+$40 ; $7000 ; Quantization tables QT1 = QT0+$40 ; $7040 QT2 = QT1+$40 ; $7080 MAXBUFWID = 20 ; can fit 20 columns (160 pixels) in buffer LINELEN = MAXBUFWID * 8 ; number of bytes per line BUFLEN = LINELEN * 8 ; size of one buffer (8 lines worth) NUMBUFS = 2 ; number of buffer for each component ImgBuf = $2600 ; Image data buffer ImgBufSize = BUFLEN*NUMBUFS*3 YBuf = ImgBuf ; Y component image buffers - $2600 CrBuf = YBuf+BUFLEN*NUMBUFS ; Cr component image buffers - $3000 CbBuf = CrBuf+BUFLEN*NUMBUFS ; Cb component image buffers - $3A00 Huffmem = $2000 ; Huffman trees - 6 pages worth? $2000-$25FF .addr VERSION .addr RERUN+1 .byte 128+7 ; version 7 of Atari port of colour decoder .addr RunAgain ; set up address to restart decoder .addr segstart .addr segend-1 .org $4400 segstart: Start: tsx stx STACKPT RunAgain: ldx STACKPT ; make sure stack pointer is reset txs ; if we are re-running ldx #0 stx Error stx SKIPERR stx AtEOF stx SkipFF stx lastchar ; reset last character printed - comments not ; displayed in colour version, so this not used stx inbytecnt ; no data in input buffer yet stx reslen ; restart interval length stx reslen+1 stx nbits dex ; x = 255 stx filepos stx filepos+1 stx filepos+2 lda #>Posmlo sta mult1lo+1 lda #>Negmlo sta mult2lo+1 lda #>Posmhi sta mult1hi+1 lda #>Negmhi sta mult2hi+1 jsr OPNGR ; make sure screen open for gr.0 jsr strout ; display title info .byte 125, 155, "based on jpz-ifli, slj 12/8", 155,0 jsr RENDINIT ; let renderer initialise jsr GETIN ;check JPEG SOI cmp #$ff ;marker = $FFD8 bne @err1 jsr GETIN cmp #$d8 bne @err1 lda #Huffmem ; in Huff buffer sta MEMTOP+1 ; Zero out image buffer lda #ImgBuf sta point+1 ldx #>ImgBufSize lda #0 tay @loopbf: sta (point),y dey bne @loopbf inc point+1 dex bne @loopbf jsr GetAPP0 @loop: lda Error bne @err lda AtEOF ; exit if end of file reached bne @done2 jsr DoMarker ; decode next segment jmp @loop @err1: lda #NOTJPG sta Error @err: ;.A = err no cmp #ABORT ; did user abort? beq @done2 pha jsr OPNGR ; switch to GR.0 jsr strout .byte 155 .byte "error @ $",0 lda filepos+2 jsr HexOut lda filepos+1 jsr HexOut lda filepos jsr HexOut jsr PRINTNL pla asl tax lda ERRTAB-2,x tay lda ERRTAB-1,x tax jsr findlen jsr putbuf @done2: jsr RENDEND ; tell renderer we are about to finish @done: ;; return to environment jmp OPNGR ; make sure we're in GR.0 before returning AtEOF: .byte 0 ERRTAB: .addr @notjpg .addr @readerr .addr @dqt .addr @dht .addr @huff .addr @prgerr @notjpg: .byte "Not a jpg!",0 @readerr: .byte "Read err",0 @dqt: .byte "dqt err",0 @dht: .byte "dht err",0 @huff: .byte "Huff err",0 @prgerr: .byte "Unsupported jpg",0 ; Bit patterns (masks) BITP: .byte 0 .byte $01, $02, $04, $08, $10, $20, $40, $80 .byte $01, $02, $04, $08, $10, $20, $40, $80 ; ; DoMarker -- Read Marker and call ; appropriate routine. ; Unknown: jsr Ignore DoMarker: lda AtEOF beq @c1 rts @c1: jsr GetHeader ;find next bcs DoMarker Do2: lda Header cmp #$fe ; enable this to display comments in JPEG file beq @com ; disable to save RAM cmp #$dd beq @dri cmp #$db beq @dqt cmp #$c4 beq @dht cmp #$c0 beq @sof cmp #$c2 ; check for a progressive jpeg bne @notprg lda #PROGJPG sta Error rts @notprg: cmp #$da bne Unknown ; unknown segment start, just skip it @sos: jmp SOS ; start of scan - this is the image data @com: jmp Comment ; comment @dri: jmp DRI ; define restart interval marker @dqt: jmp DQT ; define quantization table @dht: jmp DHT ; define huffman table @sof: jmp SOF ; start of frame ; ; GetAPP0 -- Read JFIF header ; GetAPP0: jsr GetHeader bcs @jmp lda Header cmp #$e0 ;APP0 marker beq Ignore jmp Do2 @jmp: jmp DoMarker ; Ignore rest of segment Ignore: jsr GetByte bcs @rts lda AtEOF bne @rts jsr DecLen bne Ignore @rts: rts Header: .byte 0 Length: .word 0 ;lo,hi ;; keep numcols and numrows together, in that order numcols: .byte 0 ; image width (pixels/8) numrows: .byte 0 ; image height (pixels/8) ; ; GetHeader -- Read in header bytes. ; On exit: ; C set -> error ; Z set -> end of file ; GetHeader: lda #0 sta Header jsr GetByte bcs @rts cmp #$ff bne @sec jsr GetByte sta Header cmp #$d8 ;Start of JPEG beq @clc ;lame Photoshop cmp #$d9 ;End of file bne @c2 sta AtEOF beq @clc @c2: jsr GetByte bcs @rts sta Length+1 jsr GetByte bcs @rts sec sbc #2 sta Length bcs @c3 dec Length+1 @c3: ora Length+1 ;Empty segment beq GetHeader @clc: clc rts @sec: sec @rts: rts ; ; GetByte ; SkipFF: .byte 0 ;Flag GetByte: jsr GETIN cmp #$ff bne @rts ldx SkipFF beq @rts jsr GETIN sta Header tay bne @rts lda #$ff @rts: ldx AtEOF cpx #1 ; set carry bit as appropriate rts ;C set -> error GETIN: inc filepos bne @c1 inc filepos+1 bne @c1 inc filepos+2 @c1: ;; READ IN 1 BYTE FROM OPEN FILE ;; this uses an open IOCB to read in a buffer full of data from the ;; jpeg file and then returns a byte at a time from the buffer ;; if you don't want to use the IOCB, then replace this with something ;; else. This routine should return the next byte to decode in the ;; Accumulator. Return with carry clear for success, and carry set ;; for error lda inbytecnt bne @retbyte ;; no more data left in buffer, so go get another buffer full lda COLOR4 ; flash backgroud as image is being decoded clc adc #16 sta COLOR4 ldx IocbNum lda #INBUFFSZ ; read in a buffer's worth of data, sta ICBLL,x lda #0 ; should be faster than calling CIO for sta ICBLH,x ; just 1 byte at a time lda #INBUFF sta ICBAH,x lda #GETBUF jsr SetIcc lda ICBLL,x bne @noteof ; not reached eof yet inc AtEOF sec ; eof reached, set N flag rts @noteof: sta inbytecnt lda #0 sta @nextbyte @retbyte: ; return next byte available in buffer ldx @nextbyte lda INBUFF,x inc @nextbyte dec inbytecnt clc ; clear C flag rts ; C flag set on error @nextbyte: .byte 0 ; pointer to next byte ; ; GetBit -- get next bit! ; GetBit: dec nbits bpl @get lda #7 sta nbits jsr GetByte sta byte @get: asl byte @rts: rts ; ; OPEN SCREEN FOR GR.0 ; OPNGR: ldx #0 lda #CLOSE jsr SetIcc lda #E sta ICBAH lda #12 sta ICAX1 stx ICAX2 ; open for gr.0 lda #OPEN SetIcc: sta ICCOM,x jmp CIOV E: .byte "E:" ; display embeded string on screen ; string data should follow strout call and be terminated ; with a 0. Limited to 255 byte strings strout: pla ; get string address from stack tay pla tax iny bne @NIN2 inx @NIN2: jsr findlen tay txa pha ; modify return address to tya ; return immediately after pha ; terminating 0 putbuf: lda #PUTBUF ; found end of string ldx #0 jmp SetIcc findlen: sty ICBAL ; point at start of string sty @loop+1 stx @loop+2 stx ICBAH ldy #0 ; no data yet sty ICBLH @loop: lda $c000,y ; - this address gets modified beq @exit ; continue until we find terminating 0 iny bne @loop ; forced branch @exit: tya sta ICBLL clc adc @loop+1 bcc @noinx inx @noinx: rts ; Print hex number on screen HexOut: pha ; save original value lsr ; move hi hex digit to bottom nibble lsr lsr lsr jsr @print ; display hi hex digit pla ; restore original value and #$0F ; and get lo hex digit @print: cmp #10 ; if digit is > 9, then convert it to A bcc @c1 adc #6 @c1: adc #48 jmp PRINT1BYTE ;****************************** ; ; Main header processing routines ; ;******************************* DecLen: lda Length bne @c1 ora Length+1 beq @rts dec Length+1 @c1: dec Length lda Length ora Length+1 @rts: rts ;; handle a comment segment by displaying text on the screen Comment: jsr GetByte bcs @rts ldx lastchar ; get last character printed sta lastchar ; remember last char before converting cmp #$0D ; CR beq @oops2 cmp #$0A ; LF bne @c0 cpx #$0D beq @skip ; if CR/LF only display one return char @c0: cmp #32 ; only want printing chars bcc @oops cmp #125 bcc @c1 @oops: lda #'@' ; display @ instead of non printing char .byte $2C @oops2: lda #155 ; CR or LF to 155 @c1: jsr PRINT1BYTE ; display character on screen @skip: jsr DecLen bne Comment @rts: ; drop through to print final newline ;; print a newline PRINTNL: lda #155 ; print a return char ;; use CIO to put 1 byte on screen PRINT1BYTE: sta @dispchar jsr strout @dispchar: .byte 0,0 rts lastchar: .byte 0 ; last character displayed ; Lame restart markers reslen: .word 0 cres: .word 0 ;; define restart interval marker DRI: jsr GETIN ; remember restart interval length sta reslen+1 sta cres+1 jsr GETIN sta reslen sta cres rts ;; ;; check if we have reached restart interval ;; Make sure you only call this if (reslen || reslen+1) > 0 ;; DecRes: dec cres bne @rts lda cres+1 beq @restart dec cres+1 @rts: rts @restart: sta nbits ;Skip bits - set nbits to 0 jsr GetByte ;Read $FFxx lda reslen sta cres lda reslen+1 sta cres+1 Restart: lda #0 ldx #5 @l2: sta DClo,x sta DChi,x dex bpl @l2 rts ; Define Quantization table DQT: jsr DecLen beq @rts jsr GetByte bcs @err tay and #$0F ;number of QT bne @c1 ldx #QT0 bne @ok @c1: cmp #1 bne @c2 ldx #QT1 bne @ok @c2: cmp #2 bne @err ldx #QT2 @ok: stx point ;QT addr sta point+1 tya and #$F0 bne @err ;0 = 8-bit ldy #0 @loop: sty temp ;Counter lda Length ora Length+1 beq @err jsr GetByte bcs @err ldy temp sta (point),y jsr DecLen iny cpy #64 bne @loop jmp DQT ;Multiple QT's allowed @err: lda #BADQT ;Only 0-3 allowed sta Error @rts: rts ; Define Huffman table hufflen: .byte 0 DHT: jsr DecLen beq @jerr @getb: jsr GetByte bcc @cont @jerr: jmp @err @cont: tay ;Info byte and #$0F cmp #$04 bcs @jerr asl tax ;table num 0-3 tya and #$F0 beq @ok ;DC table cmp #$10 bne @jerr txa ;AC table ora #$08 ;+8 tax @ok: lda MEMTOP sta DCHuff0,x sta huff lda MEMTOP+1 sta DCHuff0+1,x sta huff+1 stx temp2 ldy #1 ;Right node jsr NewNode ;Root node ldx #1 @l1: stx temp lda Length ora Length+1 beq @err jsr GetByte bcs @err ldx temp sta symbols-1,x jsr DecLen inx cpx #17 bne @l1 lda #$FF sta huffbits sta huffbits+1 lda #1 sta hufflen @loop: inc huffbits+1 ;hi,lo! bne @c1 inc huffbits @c1: @l2: ldx hufflen dec symbols-1,x bpl @c2 cpx #16 beq @next asl huffbits+1 rol huffbits inc hufflen bne @l2 @c2: ldx temp2 lda DCHuff0,x sta huff lda DCHuff0+1,x sta huff+1 jsr GetByte bcs @err ldx hufflen jsr AddNode ; AddNode always returns with carry clear jsr DecLen jmp @loop @next: jsr DecLen beq @rts jmp @getb ;Multiple HTs @err: lda #BADHT sta Error @rts: rts ; Start of Frame SOF: ldx #5 lda #0 @l1: sta csampv,x sta csamph,x dex bpl @l1 jsr Get cmp #8 beq @ok lda #BADQT sta Error rts @ok: jsr Get sta HEIGHT+1 jsr Get sta HEIGHT sec sbc #1 sta numrows lda HEIGHT+1 sbc #0 ldx #1 jsr @multrowcols ; multiply numrows by 8 and get next byte sta WIDTH+1 jsr Get sta WIDTH sec sbc #1 ;0..7 instead of 1..8 sta numcols lda WIDTH+1 sbc #0 ldx #0 jsr @multrowcols ; multiply numcols by 8 and get next byte sta COMPONENTS sta temp @loop: jsr Get sta temp+1 ;ID jsr Get ldx temp+1 pha and #$0F sta csampv,x pla lsr lsr lsr lsr sta csamph,x jsr Get ldx temp+1 sta cquant,x dec temp bne @loop ldx #5 ;Find max sample lda #0 @l2: cmp csamph,x bcs @c2 lda csamph,x @c2: dex bne @l2 sta csamph ;Store in +0 ldx #5 lda #0 @l3: cmp csampv,x bcs @c3 lda csampv,x @c3: dex bne @l3 sta csampv ldx COMPONENTS @samplp: ldy #0 ;Compute expansion factors tya ;maxsamp/samp clc @cshlp: iny adc csamph,x cmp csamph ;max bcc @cshlp tya sta hsampst-1,x lda #0 tay clc @csvlp: iny adc csampv,x cmp csampv bcc @csvlp tya sta vsampst-1,x dex bne @samplp rts ;; ;; Multiply numrows or numcols by 8 and get next byte from input ;; X = 0 - multiply numcols by 8 ;; X = 1 - multiply numrows by 8 ;; returns ;; A = next input byte ;; @multrowcols: ldy #3 @rowlp: lsr ror numcols,x ; 1 col/row = 8 pixels, so mulitply by 8 dey bne @rowlp inc numcols,x ; drop through to get next byte Get: lda Length ora Length+1 beq @err2 jsr DecLen jsr GetByte bcc @rts @err2: pla pla @err: lda #READERR sta Error @rts: rts ; And finally -- start of scan! row: .byte 0 col: .byte 0 SOS: dec SkipFF ;Skip $FF bytes jsr RENDSTART ldy #2 @adjlp: ;; ;; Adjust a value to a csamph boundary, this ensures that colour ;; and greyscale data are aligned correctly ;; ;; Acc = value to aligne ;; ;; Returns ;; Acc = aligned value ;; ldx csamph dex txa eor #$FF and LastCol,y sta LastCol,y ; data will be incorrecty aligned compared dey dey bpl @adjlp jsr SetBufLimit jsr Get sta temp ;# of components sta COMPONENTS @l1: jsr Get sta temp+1 ;Component ID jsr Get ldx temp+1 pha and #$0F sta ACHuff,x pla lsr lsr lsr lsr sta DCHuff,x dec temp bne @l1 jsr Get ;Scan parameters jsr Get ;(progressive) jsr Get ;(ignore) ; Image data begins here lda #0 sta row sta col sta BUFCOUNT jsr Restart ReadY: ;Intensity ldx #1 ;Component lda #YBuf jsr ReadDU ReadCb: ;Chrominance ldx COMPONENTS dex beq ReadDone ldx #2 lda #CbBuf jsr ReadDU ReadCr: ;Chrominance ldx #3 lda #CrBuf jsr ReadDU ReadDone: lda reslen+1 ; check if restart interval defined ora reslen beq @norsin ; no restart interval, don't decrement it jsr DecRes ; decrement byte count till Restart marker expected @norsin: lda AtEOF ; exit at end of file ora Error ; check for ABORT code from renderer beq @skdone jmp @done @skdone: lda csamph ; Max sample clc adc col sta col ; check if we've reached end of buffer/image cmp FirstCol ; have we reached first visible column? bcc ReadY ; no, go do another column cmp LastCol ; have we reached last visible column? bcs @cknumcol ; yes, check if this is last column in image cmp buflimit ; check if we've reached buffer limit bcs @dobuf ; if so, then call viewer to handle data @cknumcol: cmp numcols ; reached last column in image? @bccrdy: bcc ReadY ; no, go back and do another column @dobuf: asl BUFCOUNT cmp numcols ; set carry if this is the last buffer ror BUFCOUNT ; last buffer of data will have msb set ; we've got a buffer worth of data for the viewer to display ldx row ldy csampv ; maximum vertical sampling factor lda #0 @calclines: inx cpx FirstRow ; is current row before starting row? bcc @noadd cpx LastRow ; is current row after last row? bcs @noadd ; accumulator holds # of lines in current buffer ; depending on vertical sampling factor, we adc curbufrows ; can have up to 24 lines (3 rows) of image @noadd: dey bne @calclines stx currow tax ; tell viewer how many lines are in buffer(s) beq @norend ; no lines to display, ignores all before 1st row ldy #>ImgBuf ;; this gets high byte of output buffer, passes it to decoder, jpy ;; uses buftab, jpz uses curbuf for current buffer, this may not ;; work though, needs looking at in a lot more detail! lda #BUFLEN sta curbuf+1 dex bne @next dec temp bne @loopy rtsdu: rts ; ; Fetch the data ; ; Parameters: ; Acc = curcol Fetch: ldx #0 stx dest+1 inx stx RendFlag ; 0 = render, 1 = don't render ldx row ; don't decode the rows before the 1st one inx ; 2 inx's because we may need bottom half of inx cpx FirstRow ; this row at top of screen if rows are bcc @decode ; being doubled and the check before calling ; viewer checks against row+1 cmp FirstCol ; blocks before the 1st visible column bcc @decode ; won't be displayed cmp LastCol ; blocks after the last visible column bcs @decode ; won't be displayed clc adc #MAXBUFWID lsr RendFlag ; render this column ;; carry will be set by lsr RendFlag sbc buflimit ; last column in current buffer asl ;offset = col*8 asl asl rol dest+1 adc curbuf ;Ybuf, etc. sta dest ;Data storage lda curbuf+1 adc dest+1 sta dest+1 @decode: jsr DecodeDC lda Error bne rtsdu jsr DecodeAC lda Error ora RendFlag bne rtsdu ldx curcomp lda cquant,x tax lda quanttablo,x sta quantp lda quanttabhi,x sta quantp+1 ldx #63 @loop: txa tay lda (quantp),y sta mult1lo sta mult1hi eor #$FF clc adc #1 sta mult2lo sta mult2hi ldy Veclo,x bne @c1 sty Bitslo sty Bitshi beq @high ; forced branch @c1: lda (mult1lo),y sec sbc (mult2lo),y sta Bitslo lda (mult1hi),y sbc (mult2hi),y sta Bitshi @high: ldy Vechi,x lda (mult1lo),y sec sbc (mult2lo),y clc adc Bitshi ldy zigzag,x ;Un-zigzag iny sta trans,y dey lda Bitslo sta trans,y dex bpl @loop ; drops through ;******************************* ; ; IDCT routines ; ;******************************* ; First the columns ldx #0 @l0: stx index ldy #0 @l1: lda trans,x sta DCT,y lda trans+1,x sta DCT+1,y txa clc adc #16 tax iny iny cpy #16 bne @l1 jsr IDCT ldy #0 ldx index @l1b: lda Coeff,y sta trans,x lda Coeff+1,y sta trans+1,x txa clc adc #16 tax iny iny cpy #16 bne @l1b ldx index inx inx cpx #16 bcc @l0 ;; this copies an 8x8 block of decoded pixels into the output ;; buffer. Change this to modify the layout of the data in the ;; output buffer. ; Then the rows rows: ldx curcol inx cpx numcols ; is current column >= to image max column? bcc @notlast ; is less than, continue as normal beq @lstcol ; is equal to, check how many pixels to do rts ; is greater than, so skip them @lstcol: lda WIDTH ; check if width is not a multiple of 8 and #%00000111 asl ; data is 2 bytes worth for each pixel bne @skp16 ; not a multiple, so do less than 8 lines @notlast: lda #16 @skp16: sta @endcol ldx row inx cpx numrows bne @notlastr lda HEIGHT and #%00000111 bne @not0 @notlastr: lda #8 ; we've got a full buffer's worth @not0: sta curbufrows asl ; multiply by 16 asl asl asl sta @endrow ldx #0 stx index @l0: ldy #0 ; copy 16 bytes from trans to DCT @l1: lda trans,x sta DCT,y lda trans+1,x sta DCT+1,y inx inx iny iny cpy #16 bne @l1 stx index jsr IDCT lda vsamp sta huff ; temporary index for vertical sample @vsamplp: ldy #0 ldx #0 @l1b: lda Coeff,x sta Bitslo lda Coeff+1,x cmp #$80 ror ror Bitslo cmp #$80 ror ror Bitslo sta Bitshi lda Bitslo adc #128 ;C determines rounding sta Bitslo lda Bitshi ;Range check adc #0 beq @cont bpl @pos ; if hi byte > 0 then store $FF lda #0 .byte $2C @pos: lda #$FF .byte $2C @cont: lda Bitslo stx temp2 ldx hsamp @hsamplp: sta (dest),y iny dex bne @hsamplp ldx temp2 inx inx cpx @endcol bne @l1b @nxtlne: clc lda dest ; move down to next line adc #LINELEN sta dest+1 .endif dec huff ; have we drawn vsamp lines vertically? bne @vsamplp ldx index ; do 64 pixels (8x8) worth of data cpx @endrow ; unless we have less than 8 lines to draw bcc @l0 ; (2 bytes per pixel converted to 1 byte ; per pixel) cpx #128 ; we drew all 8 lines, then exit bcc @clrlines rts @clrlines: ; we drew less than 8 lines, so store 0 lda #4 ; acc will be multiplied by 2 below, 4*2=8 ldy hsamp ; we need to take hsamp into account ; so multiply number of times we will @hsampmlt: ; loop by 2^hsamp asl dey bne @hsampmlt tay dey ; number of times to loop - 1 lda #0 ; in the rest @clrlp2: ldx hsamp @clrlp: sta (dest),y dey dex bne @clrlp inc index ; we've done the equivalent of 1 source inc index ; pixel, so increase index now cpy #0 ; have we done enough pixels yet? bpl @clrlp2 lda vsamp ; reset temporary value ready for next time sta huff ; round the loop bne @nxtlne ; forced branch @endcol: .byte 0 ; last column to copy, catches images ; that don't have a width of a multiple of 8 @endrow: .byte 0 ; last row to copy, catches images ; that don't have a height of a multiple of 8 ; ; Decode DC coeff. ; DecodeDC: ldx curcomp ;Set Huffman lda DCHuff,x asl tax lda DCHuff0,x sta huff lda DCHuff0+1,x sta huff+1 jsr GetHuff ;Get category ldx Error bne @rts jsr GetBits ;Get the bits ldx curcomp lda Bitslo clc adc DClo,x sta DClo,x sta Veclo lda DChi,x adc Bitshi sta DChi,x sta Vechi @rts: rts ; ; Decode AC coeffs ; DecodeAC: ldx curcomp ;Set Huffman lda ACHuff,x asl sta @tmphuf ldy #1 @loop: sty temp2 ;Index ldx @tmphuf lda ACHuff0,x sta huff lda ACHuff0+1,x sta huff+1 jsr GetHuff ;Get RLE len beq @fill ldx Error bne @done sta count ;temp lsr lsr lsr lsr ;# of zeros beq @skip @fill: tax lda #0 ldy temp2 @lout: sta Veclo,y sta Vechi,y iny cpy #64 bcs @done dex bne @lout sty temp2 @skip: lda count and #$0F ;category jsr GetBits ldy temp2 lda Bitslo sta Veclo,y lda Bitshi sta Vechi,y iny cpy #64 bcc @loop @done: rts @tmphuf: .byte 0 ; ; Dequantize the vector Vec ; ; Mult is 16 bit signed x 8 bit unsigned ; with 16-bit result, so sign etc. are ; taken care of automatically. ; ; result -> trans ; quanttablo: .byte QT0 .byte >QT1 .byte >QT2 ; Table to un-zigzag coeffs; multiples ; of 2, since 2 byte result. zigzag: .byte 0,2,16,32,18,4,6,20 .byte 34,48,64,50,36,22,8,10 .byte 24,38,52,66,80,96,82,68 .byte 54,40,26,12,14,28,42,56 .byte 70,84,98,112,114,100,86,72 .byte 58,44,30,46,60,74,88,102 .byte 116,118,104,90,76,62,78,92 .byte 106,120,122,108,94,110,124,126 ; ; Retrieve .A bits and convert ; to signed number in (Bitslo, Bitshi) ; GetBits: sta count tax beq @zero jsr GetBit lda #0 bcs @c1 lda #$FF ;0-> negative @c1: sta Bitshi rol sta Bitslo sta @sign dec count beq @done @loop: jsr GetBit rol Bitslo rol Bitshi dec count bne @loop @done: lda @sign bpl @rts inc Bitslo ;Make 2's comp bne @rts inc Bitshi @rts: rts @zero: sta Bitslo sta Bitshi rts @sign: .byte 0 ; ; Huffman tree routines. ; ; The Huffman tree is implemented as ; a series of 2-byte nodes. Left ; nodes are at huff+2, right nodes ; are at (huff) if link < $8000. ; Link = $80xx means xx=leaf value, ; link = $FFxx means no right link, ; link+2 = MEMTOP -> no left link. ; DCHuff0: .addr 0 ;Addresses DCHuff1: .addr 0 DCHuff2: .addr 0 DCHuff3: .addr 0 ACHuff0: .addr 0 ACHuff1: .addr 0 ACHuff2: .addr 0 ACHuff3: .addr 0 ; Create new node and make current node ; point to it. ; ; On entry: .Y = 0 -> right node, ; otherwise left node NewNode: tya bne @skip ; Y!=0 is a left node lda MEMTOP ; Y = 0, this is a right node sec ; point previous node at new one sbc huff ; new node will be at MEMTOP sta (huff),y ; so find offset from previous node to iny ; new one, and store that value in previous lda MEMTOP+1 ; node's pointer sbc huff+1 sta (huff),y @skip: lda MEMTOP ; move MEMTOP to take account of the new sta point ; node clc adc #2 sta MEMTOP lda MEMTOP+1 sta point+1 adc #0 sta MEMTOP+1 ldy #1 lda #$FF sta (point),y ;Init new node rts ; Add a new node; .X = length ; (huff) -> tree root huffbits: .word 0 ;hi,lo AddNode: sta @huffval @loop: ldy #1 cpx #9 bcc @c1 dey @c1: lda BITP,x and huffbits,y bne @right @left: lda huff ;Check if at end clc adc #2 pha tay lda huff+1 adc #0 pha cpy MEMTOP sbc MEMTOP+1 bcc @skip1 ;Not a new node ldy #$80 ;Create left node jsr NewNode @skip1: pla sta huff+1 pla sta huff jmp @dex @right: ldy #1 lda (huff),y ;Check for rt ptr bpl @skip2 dey ;.Y=0 -> rt node jsr NewNode @skip2: ldy #0 lda (huff),y clc adc huff pha iny lda (huff),y adc huff+1 sta huff+1 pla sta huff @dex: dex bne @loop lda #$80 ldy #1 sta (huff),y ;Store value lda @huffval dey sta (huff),y ;$80xx rts @huffval: .byte 0 ; ; GetHuff -- Get valid Huffman code ; from (huff) ; GetHuff: lda #16 sta @bitcnt ldy #1 lda (huff),y cmp #$80 beq @found dec @bitcnt jsr GetBit bcs @right lda huff adc #2 ;C clear tax lda huff+1 adc #0 tay cpx MEMTOP sbc MEMTOP+1 bcs @err sty huff+1 stx huff bcc GetHuff ; forced branch @right: ldy #1 lda (huff),y bmi @err pha dey lda (huff),y clc adc huff sta huff pla adc huff+1 sta huff+1 bne GetHuff ; forced branch @found: dey lda (huff),y rts @err: @ckcnt: dec @bitcnt bmi @bitend jsr GetBit jmp @ckcnt @bitend: lda reslen+1 ; if there are restart markers in ora reslen ; the file, then ignore the error sta SKIPERR bne @rts ; and let the image re-sync at next lda #HUFFERR ; if no restart markers, then give an error sta Error @rts: lda #0 rts @bitcnt: .byte 0 F0 = DCT F1 = DCT+2 F2 = DCT+4 F3 = DCT+6 F4 = DCT+8 F5 = DCT+10 F6 = DCT+12 F7 = DCT+14 C0 = Coeff C1 = Coeff+2 C2 = Coeff+4 C3 = Coeff+6 C4 = Coeff+8 C5 = Coeff+10 C6 = Coeff+12 C7 = Coeff+14 ; ; Compute the inverse DCT (1D) ; ; Uses modified reversed flowgraph from ; Pennebaker & Mitchell, p. 52 ; ; Input: DCT coeffs contained in Flo/Fhi ; Output: Original coeffs in COEFFS ; IDCT: ; Since the algorithm is really an ; FFT converted into a DCT, the ; coefficients need a little massaging ; before tranformation. ; ; Specifically, ; F(i) = S(i)/(2cos(i*pi/16)) i=0..7 ; with F(0)=F(0)*2/sqrt(2), which can ; be combined with the first step ; using the table for i=4. ; ; These multipliers can in part be ; incorporated in the quantization ; table, but for now they're out in ; the open. ; ldx #0 lda #sec4 sta point+1 lda F0 sta Bitslo lda F0+1 jsr PMult sta F0+1 lda Bitslo sta F0 ldx #0 lda #sec1 sta point+1 lda F1 sta Bitslo lda F1+1 jsr PMult sta F1+1 lda Bitslo sta F1 ldx #0 lda #sec2 sta point+1 lda F2 sta Bitslo lda F2+1 jsr PMult sta F2+1 lda Bitslo sta F2 ldx #0 lda #sec3 sta point+1 lda F3 sta Bitslo lda F3+1 jsr PMult sta F3+1 lda Bitslo sta F3 ldx #0 lda #sec4 sta point+1 lda F4 sta Bitslo lda F4+1 jsr PMult sta F4+1 lda Bitslo sta F4 ldx #0 lda #sec5 sta point+1 lda F5 sta Bitslo lda F5+1 jsr PMult sta F5+1 lda Bitslo sta F5 ldx #0 lda #sec6 sta point+1 lda F6 sta Bitslo lda F6+1 jsr PMult sta F6+1 lda Bitslo sta F6 ldx #0 lda #sec7 sta point+1 lda F7 sta Bitslo lda F7+1 jsr PMult sta F7+1 lda Bitslo sta F7 ; Stage 1: F(5) <- F(5) - F(3) ; F(1) <- F(1) + F(7) ; F(7) <- F(1) - F(7) ; F(3) <- F(5) + F(3) lda F5 sec sbc F3 sta t1 lda F5+1 sbc F3+1 sta t1+1 lda F1 clc adc F7 sta t2 lda F1+1 adc F7+1 sta t2+1 lda F1 sec sbc F7 sta t3 lda F1+1 sbc F7+1 sta t3+1 lda F5 clc adc F3 sta F3 lda F5+1 adc F3+1 sta F3+1 lda t3 sta F7 lda t3+1 sta F7+1 lda t2 sta F1 lda t2+1 sta F1+1 lda t1 sta F5 lda t1+1 sta F5+1 ; Stage 2: F(2) <- F(2) - F(6) ; F(6) <- F(2) + F(6) ; F(1) <- F(1) - F(3) ; F(3) <- F(1) + F(3) lda F2 sec sbc F6 sta t1 lda F2+1 sbc F6+1 sta t1+1 lda F2 clc adc F6 sta F6 lda F2+1 adc F6+1 sta F6+1 lda t1 sta F2 lda t1+1 sta F2+1 lda F1 sec sbc F3 sta t1 lda F1+1 sbc F3+1 sta t1+1 lda F1 clc adc F3 sta F3 lda F1+1 adc F3+1 sta F3+1 lda t1 sta F1 lda t1+1 sta F1+1 ; Stage 3: F(2) <- a1*F(2) ; F(5) <- -a2*F(5) + t1 ; F(1) <- a3*F(1) ; F(7) <- a4*F(7) + t1 ; where t1 = -a5*(F(5) + F(7)) ; F(2) <- a1*F(2) ldx F2 ;Lo ldy F2+1 ;Hi lda a1lo,y clc adc a1hi,x sta Bitslo ;lo byte lda a1hi,y adc #0 cpy #$80 bcc @pos1 sta Bitshi lda Bitslo sbc #a1216 @pos1: sta F2+1 lda Bitslo sta F2 ; F(1) = a3*F(1) ldx F1 ;Lo ldy F1+1 ;Hi lda a3lo,y clc adc a3hi,x sta Bitslo lda a3hi,y adc #0 cpy #$80 bcc @pos1b sta Bitshi lda Bitslo sbc #a3216 @pos1b: sta F1+1 lda Bitslo sta F1 ; t1 = -a5*(F(5) + F(7)) lda F5 clc adc F7 tax ;Lo lda F5+1 adc F7+1 tay ;Hi lda a5lo,y clc adc a5hi,x sta Bitslo lda a5hi,y adc #0 sta Bitshi cpy #$80 bcc @pos2 lda Bitslo sbc #a5216 sta Bitshi @pos2: lda Bitslo eor #$FF clc adc #1 sta t1 lda Bitshi eor #$FF adc #0 sta t1+1 ; F(5) = t1 - a2*F(5) ldx F5 ;Lo ldy F5+1 ;Hi lda a2lo,y clc adc a2hi,x sta Bitslo lda a2hi,y adc #0 cpy #$80 bcc @pos3 sta Bitshi lda Bitslo sbc #a2216 @pos3: sta Bitshi lda t1 sec sbc Bitslo sta F5 lda t1+1 sbc Bitshi sta F5+1 ; F(7) = a4*F(7) + t1 ldx F7 ;Lo ldy F7+1 ;Hi lda a4lo,y clc adc a4hi,x sta Bitslo lda a4hi,y adc a4gh,x ;a4*.X can be >255 cpy #$80 bcc @pos4 sta Bitshi lda Bitslo sbc #a4216 @pos4: sta Bitshi lda Bitslo clc adc t1 sta F7 lda Bitshi adc t1+1 sta F7+1 ; Stage 4: ; F(0) <- F(0) + F(4) ; F(4) <- F(0) - F(4) ; F(6) <- F(2) + F(6) lda F0 clc adc F4 sta t1 lda F0+1 adc F4+1 sta t1+1 lda F0 sec sbc F4 sta F4 lda F0+1 sbc F4+1 sta F4+1 lda t1 sta F0 lda t1+1 sta F0+1 lda F2 clc adc F6 sta F6 lda F2+1 adc F6+1 sta F6+1 ; Stage 5: ; F(0) <- F(0) + F(6) ; F(4) <- F(2) + F(4) ; F(2) <- F(4) - F(2) ; F(6) <- F(0) - F(6) ; F(3) <- F(3) + F(7) ; F(7) <- F(7) + F(1) ; F(1) <- F(1) - F(5) ; F(5) <- -F(5) lda F0 clc adc F6 sta t1 lda F0+1 adc F6+1 sta t1+1 lda F0 sec sbc F6 sta F6 lda F0+1 sbc F6+1 sta F6+1 lda t1 sta F0 lda t1+1 sta F0+1 lda F4 clc adc F2 sta t1 lda F4+1 adc F2+1 sta t1+1 lda F4 sec sbc F2 sta F2 lda F4+1 sbc F2+1 sta F2+1 lda t1 sta F4 lda t1+1 sta F4+1 lda F3 clc adc F7 sta F3 lda F3+1 adc F7+1 sta F3+1 lda F7 clc adc F1 sta F7 lda F7+1 adc F1+1 sta F7+1 lda F1 sec sbc F5 sta F1 lda F1+1 sbc F5+1 sta F1+1 lda #0 sec sbc F5 sta F5 lda #0 sbc F5+1 sta F5+1 ; Final stage: ; c(0) = F(0) + F(3) ; c(1) = F(4) + F(7) ; c(2) = F(2) + F(1) ; c(3) = F(6) + F(5) ; c(4) = F(6) - F(5) ; c(5) = F(2) - F(1) ; c(6) = F(4) - F(7) ; c(7) = F(0) - F(3) ; ; Note: values are offset -128 lda F0 clc adc F3 sta C0 lda F0+1 adc F3+1 sta C0+1 lda F4 clc adc F7 sta C1 lda F4+1 adc F7+1 sta C1+1 lda F2 clc adc F1 sta C2 lda F2+1 adc F1+1 sta C2+1 lda F6 clc adc F5 sta C3 lda F6+1 adc F5+1 sta C3+1 lda F6 sec sbc F5 sta C4 lda F6+1 sbc F5+1 sta C4+1 lda F2 sec sbc F1 sta C5 lda F2+1 sbc F1+1 sta C5+1 lda F4 sec sbc F7 sta C6 lda F4+1 sbc F7+1 sta C6+1 lda F0 sec sbc F3 sta C7 lda F0+1 sbc F3+1 sta C7+1 rts ;Sheeew! PMult: ;exit .A = Bitshi bmi @neg beq @ok @l1: inx ;Shift count lsr ror Bitslo cmp #0 bne @l1 @ok: sta Bitshi lda Bitslo asl rol Bitshi adc point sta point lda Bitshi adc point+1 sta point+1 ldy #0 lda (point),y sta Bitslo iny lda (point),y dex bmi @rts @l1b: asl Bitslo rol dex bpl @l1b @rts: rts @neg: sta Bitshi lda #0 sec sbc Bitslo sta Bitslo lda #0 sbc Bitshi beq @ok2 @l2: inx ;Shift count lsr ror Bitslo cmp #0 bne @l2 @ok2: asl Bitslo rol sta Bitshi lda Bitslo adc point sta point lda Bitshi adc point+1 sta point+1 ldy #0 lda (point),y sta Bitslo iny lda (point),y dex bmi @rts2 @l2b: asl Bitslo rol dex bpl @l2b @rts2: sta Bitshi lda #0 sec sbc Bitslo sta Bitslo lda #0 sbc Bitshi rts filepos: ; 3 bytes for current file position symbols = filepos + 3 csampv = symbols + 16 ; Sampling factors csamph = csampv + 6 ; (horizontal) hsampst = csamph + 6 vsampst = hsampst + 5 cquant = vsampst + 5 ; Quantization table DClo = cquant + 6 ; DC coeffs DChi = DClo + 6 ACHuff = DChi + 6 ; AC table to use DCHuff = ACHuff + 6 ; DC table to use Coeff = DCHuff + 6 segend: .org Coeff + 16 ;; need to add check here for overrun .if (* >= DataStart) .out .concat("Error - Decoder code overrun into data area ", .string(*)) .else .out .concat("decoder code ends at ", .string(*)) .endif ;; include data too .include "jpydata.inc" ;; file header data .segment "EXEHDR" .byte 255, 255 ;; run address .segment "AUTOSTRT" .addr 736 .addr 737 .addr segstart