; Juddpeg -- A jpeg decoder for the C64 ; ; SLJ 9/17/99 ; ; MCM-lace version 10/1/99 ; Generic version 10/7/99 ; Last update: 12/18/99 ;; ;; Atari 8-bit port: Raphael Espino ;; started 16-Oct-00 ;; last updated 08-Oct-03 ;; ;; to assemble: ;; ca65 jpy1223-8.asm ;; then link with one of the viewers ;; ;; ;; decoder memory map: ;; ;; --- zero page --- ;; $0080 - $00BF decoder zero page addresses ;; $00C0 - $00FF available for viewer ;; ;; $0480 - $057F input buffer ;; ;; --- page 6 --- ;; $0600 - $061F decoder interface variables ;; $0620 - $062F viewer jump vectors ;; $0630 - $06FF available for viewer ;; ;; --- main RAM --- ;; $2000 - $2EFF code ;; $2F00 - $4BFF data tables ;; $4C00 - $4DFF decoding buffers ;; $4E00 - $6BFF output buffer ;; $6C00 - $78FF Huffman buffer ;; ;; $7900 - $FFFF is guaranteed free for renderer and screen RAM while ;; decoder is running ;; MEMTOP-$7900 is also free for renderer and screen RAM while ;; decoder is decoding. MEMTOP available in $60C ;; $4C00 - $8BFF is free for viewer after decoding has finished ;; (when RENDEND vector is called) but not before! ;; ;; ---------------- renderer interface addresses ----------------------- ;; mixed case addresses can be modified by viewer, changes made to ;; these will be read by the decoder LastCol = $600 ; Last column number to display LastRow = $601 ; Last row number to display FirstCol = $602 ; Column offset (left edge of output image) FirstRow = $603 ; Row offset (top edge of output image) IocbNum = $604 ; IOCB to read jpeg data from Error = $605 ; non 0 if error ocurred decoding jpeg ; error codes are as shown below ;; upper case addresses are set by decoder and should not be modified ;; by viewer WIDTH = $606 ; Width of image in pixels (2 bytes) HEIGHT = $608 ; Height of image in pixels (2 bytes) STACKPT = $60A ; stack pointer at program start VERSION = $60B ; decoder version number RERUN = $60C ; 2 bytes - restart address MEMTOP = $60E ; End of output buffer, above this is free for viewer SKIPERR = $610 ; Error in an image with restart markers ; decoder will continue decoding and set this to non 0 BUFCOUNT = $611 ; horizontal buffer count, this is increased by 1 for ; every buffer full of data on the current row ; the last buffer full will have msb set ;; page 6 addresses from $630 up are available to viewer ;; ------------- end of decoder info addresses ---------------------- ;; version 0 first Atari port of decoder ;; version 1 uses $0480 - $057F as input buffer ;; added ABORT code ;; fixed repeated rows+cols at end of image ;; version 2 added MEMTOP ;; version 3 made WIDTH available to viewer ;; version 4 fixed DRI with restart interval = 0 bug ;; version 5 ignores HUFFMAN err if restart markers are being used ;; uses jpz HUFFMAN code to reduce buffer size ;; added SKIPERR ;; fixed repeated columns at end of image when ColOff > 0 ;; version 6 rearranged interface addresses ;; added HEIGHT and BUFCOUNT ;; special handling of images with WIDTH > 320 ;; changed DispCols, DipsRows, ColOff, RowOff to ;; LastCol, LastRow, FirstCol, FirstRow ;; renderer jump vectors RENDINIT = $620 ; renderer init vector RENDSTART = RENDINIT+3 ; renderer, image data about to start RENDDRAW = RENDINIT+6 ; renderer, draw 8 lines of image RENDEND = RENDINIT+9 ; renderer, image finished RENDUNUSED = RENDINIT+12 ; unused for now, point at an rts ;; Rest of page 6 from $630 - $6FF is available to renderer ;; ------------ end of renderer interface addresses ---------------- NOTJPG = 1 ; Error codes READERR = 2 BADQT = 3 ; bad data in DQT segment BADHT = 4 ; bad data in DHT segment HUFFERR = 5 PROGJPG = 6 ; progressive jpeg not supported ABORT = 128 ; abort decoding YBuf = 0 ; ; Atari addresses ; ; ; O.S. ROM ; CIOV = 58454 ; ; O.S. RAM ; COLOR4 = 712 ICCOM = 834 ICSTA = 835 ICBAL = 836 ICBAH = 837 ICBLL = 840 ICBLH = 841 ICAX1 = 842 ICAX2 = 843 ; ; PRESETS ; OPEN = 3 ; open channel GETBUF = 7 ; get buffer PUTBUF = 11 ; put buffer CLOSE = 12 ; close channel ;; decoder also uses empty space at 1152 to buffer input data INBUFFSZ = $FF ; input buffer size, must be <= 255 INBUFF = $0480 ; input buffer ;; ;; page 0 addresses used by decoder ;; mult1lo = 128 ; data tables mult1hi = 130 mult2lo = 132 mult2hi = 134 point = 136 ; image buffer pointer temp = 138 quantp = 140 ; Quant table huff = 142 ; Huffman pointers temp2 = 144 ; 1 byte count = 145 ; Used by GetBits, 1 byte ; and AddNode Bitslo = 146 ; and Dequantize Bitshi = 147 ; dest = 148 index = 150 ; 1 byte t1 = 151 t2 = 153 t3 = 155 DCT = 157 ; 16 bytes worth here Coeff = 173 ; 16 bytes worth here ;; ;; page 0 addresses from 192 upwards are available to renderer ;; ;; ;; data table addresses, the data is included at the end of this file ;; Negmlo = $2F00 Posmlo = Negmlo+$100 Negmhi = Posmlo+$200 Posmhi = Negmhi+$100 ; 2 pages a1lo = Posmhi+$200 ; cos(2a), a=pi/8 a1hi = a1lo+$100 a2lo = a1hi+$100 ; cos(a)-cos(3a) a2hi = a2lo+$100 a3lo = a1lo ; cos(2a) a3hi = a1hi a4lo = a2hi+$100 ; cos(a)+cos(3a) a4hi = a4lo+$100 a4gh = a4hi+$100 a5lo = a4gh+$100 ; cos(3a) a5hi = a5lo+$100 sec1 = a5hi+$100 sec2 = sec1+$200 sec3 = sec2+$200 sec4 = sec3+$200 sec5 = sec4+$200 sec6 = sec5+$200 sec7 = sec6+$200 ; - 2 pages worth a1216 = 46341 ;a1*2^16 a2216 = 35468 ;a2*2^16 a3216 = a1216 a4216 = 20091 ;... a5216 = 25080 ;; ;; decoding buffer addresses, these can be reused by viewer ;; when decoding has finished, but not until! ;; trans = sec7+$200 ; Transform Veclo = trans+$80 ; Vec to be quantized Vechi = Veclo+$40 QT0 = Vechi+$40 ; Quantization tables QT1 = QT0+$40 QT2 = QT1+$40 QT3 = QT2+$40 ImgBuf = QT3+$40 ; Image data output buffer ($4E00) ; Renderer gets its decoded image data from here BUFLEN = $A00 ; buffer size ImgBufSize = BUFLEN*3 ; have 3 buffers available for image Huffmem = ImgBuf+ImgBufSize ; Huffman trees, $D00 worth .addr VERSION .addr RERUN+1 .byte 6 ; version 6 of Atari port of decoder .addr RunAgain ; set up address to restart decoder .addr segstart .addr segend-1 .org $2000 segstart: Start: tsx stx STACKPT RunAgain: ldx STACKPT ; make sure stack pointer is reset txs ; if we are re-running lda #0 sta Error ; no error sta SKIPERR sta AtEOF ; not and end of file sta SkipFF sta lastchar ; reset last character printed sta inbytecnt ; no data in input buffer yet sta reslen ; restart interval length sta reslen+1 sta nbits lda #$FF sta filepos ; initialise current file pos sta filepos+1 sta filepos+2 lda #>Posmlo ; set up positions in RAM for mult tables sta mult1lo+1 lda #>Negmlo sta mult2lo+1 lda #>Posmhi sta mult1hi+1 lda #>Negmhi sta mult2hi+1 jsr OPNGR ; make sure screen open for gr.0 jsr strout ; display title info .byte 125, 155,"based on juddpeg, slj 11/11",155,0 jsr RENDINIT ; let renderer initialise jsr GETIN ; check JPEG SOI cmp #$ff ; marker = $FFD8 bne @err1 jsr GETIN cmp #$d8 bne @err1 ;; clear buffers here in case renderer uses them for something else jsr InitHuff ; set up pointer to huff memory jsr InitBuff ; set up pointer to image buffer and init it jsr GetAPP0 @loop: lda Error ; exit on error bne @err lda AtEOF ; exit if end of file reached bne @done2 jsr DoMarker ; decode next segment jmp @loop @err1: lda #NOTJPG ; not a jpeg image error sta Error @err: ; Acc = err no cmp #ABORT ; did user abort? beq @done2 pha jsr OPNGR ; switch to GR.0 jsr strout ; display error message .byte 155 .byte "error at pos=$",0 lda filepos+2 ; display file position jsr HexOut lda filepos+1 jsr HexOut lda filepos jsr HexOut jsr PRINTNL pla asl tax lda ERRTAB-2,X tay lda ERRTAB-1,X tax jsr findlen jsr putbuf @done2: jsr RENDEND ; tell renderer we are about to finish ;; return to environment jmp OPNGR ; make sure we're in GR.0 before returning AtEOF: .byte 0 ; has end of file been reached? ERRTAB: ; error messages table .addr @notjpg .addr @readerr .addr @dqt .addr @dht .addr @huff .addr @prgerr @notjpg: .byte "Not a jpg file",0 @readerr:.byte "Read error",0 @dqt: .byte "Quantization table error",0 @dht: .byte "Huffman table error",0 @huff: .byte "Huffman error",0 @prgerr: .byte "Unsupported jpg",0 ; Bit patterns (masks) BITP: .byte $0 .byte $01, $02, $04, $08, $10, $20, $40, $80 .byte $01, $02, $04, $08, $10, $20, $40, $80 ; Zero out image output buffer InitBuff: lda #ImgBuf sta point+1 ldx #>ImgBufSize lda #0 tay ; now initialise image buffer @loop: sta (point),Y dey bne @loop inc point+1 dex bne @loop rts ; ; DoMarker -- Read Marker and call ; appropriate routine. ; Unknown: jsr strout .byte "Unknown header:FF",0 lda Header jsr HexOut jsr anyway jsr Ignore ; ignore this segment DoMarker: lda AtEOF ; if we are at end of file, then return beq @c1 rts @c1: jsr GetHeader ; find next bcs DoMarker Do2: ; Handle diffent segments lda Header cmp #$fe beq @com cmp #$dd beq @dri cmp #$db beq @dqt cmp #$c4 beq @dht cmp #$c0 beq @sof cmp #$c2 ; check for a progressive jpeg bne @notprg lda #PROGJPG sta Error rts @notprg: cmp #$da bne Unknown ; unknown segment start, just skip it @sos: jmp SOS ; start of scan - this is the image data @com: jmp Comment ; comment @dri: jmp DRI ; define restart interval marker @dqt: jmp DQT ; define quantization table @dht: jmp DHT ; define huffman table @sof: jmp SOF ; start of frame ; ; display trying anyway message ; anyway: jsr strout .byte " trying anyway" .byte 155,0 rts ; ; GetAPP0 -- Read JFIF header ; GetAPP0: jsr GetHeader bcs @jmp lda Header cmp #$e0 ;APP0 marker beq Ignore jsr strout .byte "Not JFIF format" jsr anyway jmp Do2 @jmp: jmp DoMarker ; Ignore rest of segment Ignore: jsr GetByte ; exit if error reading next byte bcs @rts lda AtEOF ; exit if end of file reached bne @rts jsr DecLen bne Ignore ; skip bytes until end of segment reached @rts: rts Header: .byte 0 Length: .word 0 ;lo,hi ;; keep numcols and numrows together, in that order numcols: .byte 0 ; image width (pixels/8) numrows: .byte 0 ; image height (pixels/8) ; ; GetHeader -- Read in header bytes. ; On exit: ; C set -> error ; Z set -> end of file ; GetHeader: lda #0 sta Header jsr GetByte bcs @rts cmp #$ff bne @sec jsr GetByte sta Header cmp #$d8 ;Start of JPEG beq @clc ;lame Photoshop cmp #$d9 ;End of file bne @c2 sta AtEOF beq @clc @c2: jsr GetByte bcs @rts sta Length+1 jsr GetByte bcs @rts sec sbc #2 ; 2 bytes of length info included in count sta Length bcs @c3 dec Length+1 @c3: ora Length+1 ;Empty segment beq GetHeader @clc: clc rts @sec: sec @rts: rts ; ; GetByte ; SkipFF: .byte 0 ;Flag GetByte: lda COLOR4 ; flash backgroud as image is being decoded clc adc #16 sta COLOR4 jsr GETIN cmp #$ff bne @rts ldx SkipFF beq @rts ; we're not skipping $FF's so return byte jsr GETIN sta Header tay ; $ff00 -> $ff bne @rts lda #$ff @rts: ; at end of file? ldx AtEOF cpx #1 ; set carry bit as appropriate rts ; C set -> error ;; get one byte from file and return it in acc, updating file position GETIN: inc filepos bne @c1 inc filepos+1 bne @c1 inc filepos+2 @c1: ;; READ IN 1 BYTE FROM OPEN FILE ;; this uses an open IOCB to read in a buffer full of data from the ;; jpeg file and then returns a byte at a time from the buffer ;; if you don't want to use the IOCB, then replace this with something ;; else. This routine should return the next byte to decode in the ;; Accumulator. Return with carry clear for success, and carry set ;; for error lda inbytecnt bne @retbyte ;; no more data left in buffer, so go get another buffer full ldx IocbNum lda #INBUFFSZ ; read in a buffer's worth of data, sta ICBLL,X lda #0 ; should be faster than calling CIO for sta ICBLH,X ; just 1 byte at a time lda #INBUFF sta ICBAH,X lda #GETBUF jsr SetIcc lda ICBLL,X bne @noteof ; not reached eof yet @err: inc AtEOF sec ; eof reached, set C flag rts @noteof: sta inbytecnt lda #0 sta @nextbyte @retbyte: ; return next byte available in buffer ldx @nextbyte lda INBUFF,x inc @nextbyte dec inbytecnt clc ; clear C flag rts ; C flag set on error @nextbyte: .byte 0 ; pointer to next byte inbytecnt: .byte 0 ; number of bytes currently in buffer ; ; GetBit -- get next bit! ; nbits: .byte 0 ;# of bits left byte: .byte 0 GetBit: dec nbits bpl @get lda #7 sta nbits jsr GetByte sta byte @get: asl byte @rts: rts ; ; OPEN SCREEN FOR GR.0 ; OPNGR: ldx #0 lda #CLOSE jsr SetIcc lda #E sta ICBAH lda #12 sta ICAX1 stx ICAX2 ; open for gr.0 lda #OPEN SetIcc: sta ICCOM,x jmp CIOV E: .byte "E:" ; display embeded string on screen ; string data should follow strout call and be terminated ; with a 0 strout: pla ; get string address from stack tay pla tax iny bne @NIN2 inx @NIN2: jsr findlen tay txa pha ; modify return address to tya ; return immediately after pha ; terminating 0 putbuf: lda #PUTBUF ; found end of string ldx #0 jmp SetIcc ; let CIO display it findlen: sty ICBAL ; point at start of string sty @loop+1 stx @loop+2 stx ICBAH ldy #0 ; no data yet sty ICBLH @loop: lda $c000,y ; - this address gets modified beq @exit ; continue until we find terminating 0 iny bne @loop ; forced branch @exit: tya sta ICBLL clc adc @loop+1 bcc @noinx inx @noinx: rts ;; print a newline PRINTNL: lda #155 ; print a return char ;; use CIO to put 1 byte on screen PRINT1BYTE: sta @dispchar jsr strout @dispchar: .byte 0,0 rts ; Print hex number on screen HexOut: pha ; save original value lsr ; move hi hex digit to bottom nibble lsr lsr lsr jsr @print ; display hi hex digit pla ; restore original value and #$0F ; and get lo hex digit @print: cmp #10 ; if digit is > 9, then convert it to A bcc @c1 adc #6 @c1: adc #48 jmp PRINT1BYTE ;******************************* ; ; Main header processing routines ; ;******************************* DecLen: lda Length bne @c1 ora Length+1 beq @rts dec Length+1 @c1: dec Length lda Length ora Length+1 @rts: rts ;; handle a comment segment by displaying text on the screen Comment: jsr GetByte bcs @rts ldx lastchar ; get last character printed sta lastchar ; remember last char before converting cmp #$0D ; CR beq @oops2 cmp #$0A ; LF bne @c0 cpx #$0D beq @skip ; if CR/LF only display one return char @c0: cmp #32 ; only want printing chars bcc @oops cmp #125 bcc @c1 @oops: lda #'@' ; display @ instead of non printing char .byte $2C @oops2: lda #155 ; CR or LF to 155 @c1: jsr PRINT1BYTE ; display character on screen @skip: jsr DecLen bne Comment @rts: jmp PRINTNL ; display final newline lastchar: .byte 0 ; last character displayed ; Lame restart markers reslen: .word 0 cres: .word 0 ;; define restart interval marker DRI: jsr GETIN ; remember restart interval length sta reslen+1 sta cres+1 jsr GETIN sta reslen sta cres rts ;; check if we have reached restart interval DecRes: lda reslen+1 ; check if restart interval defined ora reslen beq @rts ; no restart interval, don't decrement it dec cres bne @rts lda cres+1 beq @restart dec cres+1 @rts: rts @restart: sta nbits ;Skip bits - set nbits to 0 jsr GetByte ;Read $FFxx lda reslen sta cres lda reslen+1 sta cres+1 Restart: ldx #5 @l2: sta DClo,X ; could save 3 bytes here by setting X to sta DChi,X ; 11 and keeping DClo and DChi allocations dex ; together bpl @l2 rts ; Define Quantization table DQT: jsr DecLen beq @rts jsr GetByte bcs @err tay and #$0F ;number of QT bne @c1 ldx #QT0 bne @ok @c1: cmp #1 bne @c2 ldx #QT1 bne @ok @c2: cmp #2 bne @c3 ldx #QT2 bne @ok @c3: cmp #3 bne @err ldx #QT3 @ok: stx point ;QT addr sta point+1 tya and #$F0 bne @err ;0 = 8-bit ldy #0 @loop: sty temp ;Counter lda Length ora Length+1 beq @err jsr GetByte bcs @err ldy temp sta (point),Y jsr DecLen iny cpy #64 bne @loop jmp DQT ;Multiple QT's allowed @err: lda #BADQT ;Only 0-3 allowed sta Error @rts: rts ; Define Huffman table hufflen: .byte 0 DHT: jsr DecLen beq @jerr @getb: jsr GetByte bcc @cont @jerr: jmp @err @cont: tay ;Info byte and #$0F cmp #$04 ; table number must be < 4 bcs @jerr asl tax ;table num 0-3 tya and #$F0 beq @ok ;DC table cmp #$10 bne @jerr txa ;AC table ora #$08 ;+8 tax @ok: lda MEMTOP sta DCHuff0,X sta huff lda MEMTOP+1 sta DCHuff0+1,X sta huff+1 stx temp2 ldy #1 ; start with a right node jsr NewNode ;Root node ldx #1 @l1: stx temp lda Length ora Length+1 beq @err ; ran out of data to read jsr GetByte bcs @err ldx temp sta symbols-1,X jsr DecLen ; byte has been read inx cpx #17 ; read in 16 bytes bne @l1 lda #$FF sta huffbits sta huffbits+1 lda #1 sta hufflen @loop: inc huffbits+1 ;hi,lo! bne @c1 inc huffbits @c1: @l2: ldx hufflen dec symbols-1,X bpl @c2 cpx #16 beq @next asl huffbits+1 rol huffbits inc hufflen bne @l2 @c2: ldx temp2 lda DCHuff0,X sta huff lda DCHuff0+1,X sta huff+1 jsr GetByte bcs @err ldx hufflen jsr AddNode jsr DecLen jmp @loop @next: jsr DecLen beq @rts jmp @getb ;Multiple HTs @err: lda #BADHT sta Error @rts: rts ; Start of Frame ncomps: .byte 0 ; Num components SOF: ldx #5 ; could save 3 bytes here by setting X to 11 lda #0 ; and keeping csampv/h allocations together. @l1: sta csampv,X ; initialise horizontal and vertical sta csamph,X ; sampling factors dex bpl @l1 jsr Get cmp #8 beq @ok lda #BADQT sta Error rts @ok: jsr Get sta HEIGHT+1 jsr Get sta HEIGHT ; get image height sec sbc #1 sta numrows ; then convert it to number of rows lda HEIGHT+1 sbc #0 ldx #1 jsr @multrowcols ; multiply numrows by 8 and get next byte sta WIDTH+1 ; set up image width jsr Get sta WIDTH sec sbc #1 ; 0..7 instead of 1..8 sta numcols ; then covert it to number of columns lda WIDTH+1 sbc #0 ldx #0 jsr @multrowcols ; multiply numcols by 8 and get next byte sta ncomps sta temp @loop: jsr Get sta temp+1 ;ID jsr Get ldx temp+1 pha and #$0F sta csampv,X pla lsr lsr lsr lsr sta csamph,X jsr Get ldx temp+1 sta cquant,X dec temp bne @loop ldx #5 ;Find max sample lda #0 @l2: cmp csamph,X bcs @c2 lda csamph,X @c2: dex bne @l2 sta csamph ;Store in +0 ldx #5 lda #0 @l3: cmp csampv,X bcs @c3 lda csampv,X @c3: dex bne @l3 sta csampv rts ;; ;; Multiply numrows or numcols by 8 and get next byte from input ;; X = 0 - multiply numcols by 8 ;; X = 1 - multiply numrows by 8 ;; returns ;; A = next input byte ;; @multrowcols: ldy #3 @rowlp: lsr ror numcols,x ; 1 col/row = 8 pixels, so mulitply by 8 dey bne @rowlp inc numcols,x ; drop through to get next byte Get: lda Length ora Length+1 beq @err2 jsr DecLen jsr GetByte bcc @rts @err2: pla pla @err: lda #READERR sta Error @rts: rts ; And finally -- start of scan! row: .byte 0 col: .byte 0 ; current column in image SOS: dec SkipFF ; Skip $FF bytes jsr RENDSTART ; tell renderer we're about to start image jsr SetBufLimit jsr Get sta temp ; # of components sta ncomps @l1: jsr Get sta temp+1 ; Component ID jsr Get ldx temp+1 pha and #$0F sta ACHuff,X pla lsr lsr lsr lsr sta DCHuff,X dec temp bne @l1 jsr Get ;Scan parameters jsr Get ;(progressive) jsr Get ;(ignore) ; Image data begins here lda #0 sta row sta col sta BUFCOUNT jsr Restart ReadY: ;Intensity ldx #1 ;Component ldy #0 ;Render flag - 0 = render jsr ReadDU ReadCb: ;Chrominance ldx ncomps ; is there a Cb component? dex beq ReadDone ldx #2 jsr ReadDUNoRend ; don't render Cb component ReadCr: ; Chrominance ldx #3 jsr ReadDUNoRend ; don't render Cr component ReadDone: jsr DecRes ; decrement byte count till Restart marker expected lda AtEOF ; exit at end of file ora Error ; check for ABORT code from renderer beq @skdone jmp @done @skdone: lda csamph ; Max sample clc adc col sta col ; check if we've reached end of buffer/image cmp FirstCol ; have we reached first visible column? bcc ReadY ; no, go do another column cmp LastCol ; have we reached last visible column? bcs @cknumcol ; yes, check if this is last column in image cmp buflimit ; check if we've reached buffer limit bcs @dobuf ; if so, then call viewer to handle data @cknumcol: cmp numcols ; reached last column in image? @bccrdy: bcc ReadY ; no, go back and do another column @dobuf: asl BUFCOUNT cmp numcols ; set carry if this is the last buffer ror BUFCOUNT ; last buffer of data will have msb set ; we've got a buffer worth of data for the viewer to display lda row sta currow lda csampv ; vertical sampling factor sta temp lda #0 tay @calclines: inc currow ldx currow cpx FirstRow ; is current row before starting row? bcc @noadd cpx LastRow ; is current row after last row? bcs @noadd ; depending on vertical sampling factor, we adc curbufrows ; can have up to 24 lines (3 rows) of image .byte $2C ; skip next two bytes (iny, iny) @noadd: iny ; current row is before FirstRow, there won't be iny ; any data for viewer in current buffer, so ; move to next buffer instead dec temp ; have we reached last row in current buffer? bne @calclines tax ; tell viewer how many lines are in buffer(s) beq @norend ; no lines to display, ignores all before 1st row lda buftab+1,y ; get high byte of buffer address where data is tay ; stored lda # trans ; quanttab: .addr QT0 .addr QT1 .addr QT2 .addr QT3 ; Table to un-zigzag coeffs; multiples ; of 2, since 2 byte result. zigzag: .byte 0,2,16,32,18,4,6,20 .byte 34,48,64,50,36,22,8,10 .byte 24,38,52,66,80,96,82,68 .byte 54,40,26,12,14,28,42,56 .byte 70,84,98,112,114,100,86,72 .byte 58,44,30,46,60,74,88,102 .byte 116,118,104,90,76,62,78,92 .byte 106,120,122,108,94,110,124,126 Dequantize: ldx curcomp lda cquant,X asl tax lda quanttab,X sta quantp lda quanttab+1,X sta quantp+1 ldx #63 @loop: txa tay lda (quantp),Y sta mult1lo sta mult1hi eor #$FF clc adc #1 sta mult2lo sta mult2hi ldy Veclo,X bne @c1 sty Bitslo sty Bitshi beq @high @c1: lda (mult1lo),Y sec sbc (mult2lo),Y sta Bitslo lda (mult1hi),Y sbc (mult2hi),Y sta Bitshi @high: ldy Vechi,X lda (mult1lo),Y sec sbc (mult2lo),Y clc adc Bitshi ldy zigzag,X ;Un-zigzag iny sta trans,Y dey lda Bitslo sta trans,Y dex bpl @loop rts ; ; Retrieve .A bits and convert ; to signed number in (Bitslo, Bitshi) ; sign: .byte 0 GetBits: sta count tax beq @zero jsr GetBit lda #0 bcs @c1 lda #$FF ;0-> negative @c1: sta Bitshi rol sta Bitslo sta sign dec count beq @done @loop: jsr GetBit rol Bitslo rol Bitshi dec count bne @loop @done: lda sign bpl @rts inc Bitslo ;Make 2's comp bne @rts inc Bitshi @rts: rts @zero: sta Bitslo sta Bitshi rts ; ; Huffman tree routines. ; ; The Huffman tree is implemented as ; a series of 2-byte nodes. Left ; nodes are at huff+2, right nodes ; are at (huff) if link < $8000. ; Link = $80xx means xx=leaf value, ; link = $FFxx means no right link, ; link+2 = MEMTOP -> no left link. ; DCHuff0: .addr 0 ;Addresses DCHuff1: .addr 0 DCHuff2: .addr 0 DCHuff3: .addr 0 ACHuff0: .addr 0 ACHuff1: .addr 0 ACHuff2: .addr 0 ACHuff3: .addr 0 InitHuff: lda #Huffmem ; in Huff buffer sta MEMTOP+1 rts ; Create new node and make current node ; point to it. ; ; On entry: .Y = 0 -> right node, ; otherwise left node NewNode: tya bne @skip ; Y!=0 is a left node lda MEMTOP ; Y = 0, this is a right node sec ; point previous node at new one sbc huff ; new node will be at MEMTOP sta (huff),Y ; so find offset from previous node to iny ; new one, and store that value in previous lda MEMTOP+1 ; node's pointer sbc huff+1 sta (huff),Y @skip: lda MEMTOP ; move MEMTOP to take account of the new sta point ; node clc adc #2 ; each node is 2 bytes worth sta MEMTOP lda MEMTOP+1 sta point+1 adc #0 sta MEMTOP+1 ldy #01 lda #$FF sta (point),Y ;Init new node rts ; Add a new node; .X = length ; (huff) -> tree root huffbits: .word 0 ;hi,lo huffval: .byte 0 AddNode: sta huffval @loop: ldy #1 cpx #9 bcc @c1 dey @c1: lda BITP,X and huffbits,Y bne @right @left: lda huff ;Check if at end clc adc #2 pha tay lda huff+1 adc #0 pha cpy MEMTOP sbc MEMTOP+1 bcc @skip1 ;Not a new node ldy #$80 ;Create left node jsr NewNode @skip1: pla sta huff+1 pla sta huff jmp @dex @right: ldy #1 lda (huff),Y ;Check for rt ptr bpl @skip2 dey ;.Y=0 -> rt node jsr NewNode @skip2: ldy #0 lda (huff),Y clc adc huff pha iny lda (huff),Y adc huff+1 sta huff+1 pla sta huff @dex: dex bne @loop lda #$80 ldy #01 sta (huff),Y ;Store value lda huffval dey sta (huff),Y ;$80xx rts ; ; GetHuff -- Get valid Huffman code ; from (huff) ; GetHuff: lda #16 sta bitcnt ldy #01 lda (huff),Y cmp #$80 beq @found dec bitcnt jsr GetBit bcs @right lda huff adc #2 ; C is already clear tax lda huff+1 adc #0 tay cpx MEMTOP sbc MEMTOP+1 bcs @err sty huff+1 stx huff bcc GetHuff @right: ldy #01 lda (huff),Y bmi @err pha dey lda (huff),Y clc adc huff sta huff pla adc huff+1 sta huff+1 bne GetHuff @found: dey lda (huff),Y rts @err: @ckcnt: dec bitcnt bmi @bitend jsr GetBit jmp @ckcnt @bitend: lda reslen+1 ; if there are restart markers in ora reslen ; the file, then ignore the error sta SKIPERR bne @rts ; and let the image re-sync at next ; restart marker lda #HUFFERR ; if no restart markers, then give an error sta Error @rts: lda #0 rts bitcnt: .byte 0 ;******************************* ; ; IDCT routines ; ;******************************* IDCT2D: ; First the columns ldx #0 @l0: stx index ldy #0 @l1: lda trans,X sta DCT,Y lda trans+1,X sta DCT+1,Y txa clc adc #16 tax iny iny cpy #16 bne @l1 jsr IDCT ldy #0 ldx index @l1b: lda Coeff,Y sta trans,X lda Coeff+1,Y sta trans+1,X txa clc adc #16 tax iny iny cpy #16 bne @l1b ldx index inx inx cpx #16 bcc @l0 ;; this copies an 8x8 block of decoded pixels into the output ;; buffer. Change this to modify the layout of the data in the ;; output buffer. ; Then the rows rows: ldx curcol inx cpx numcols ; is current column >= to image max column? bcc @notlast ; is less than, continue as normal beq @lstcol ; is equal to, check how many pixels to do rts ; is greater than, so skip them @lstcol: lda WIDTH ; check if width is not a multiple of 8 and #%00000111 asl ; data is 2 bytes worth for each pixel bne @skp16 ; not a multiple, so do less than 8 lines @notlast: lda #16 @skp16: sta @endcol ldx row inx cpx numrows bne @notlastr lda HEIGHT and #%00000111 bne @not0 @notlastr: lda #8 ; we've got a full buffer's worth @not0: sta curbufrows asl ; multiply by 16 asl asl asl sta @endrow ldx #0 stx index @l0: ldy #0 ; copy 16 bytes from trans to DCT @l1: lda trans,X sta DCT,Y lda trans+1,X sta DCT+1,Y inx inx iny iny cpy #16 bne @l1 stx index jsr IDCT ldy #0 ldx #0 @l1b: lda Coeff,X sta Bitslo lda Coeff+1,X cmp #$80 ror ror Bitslo cmp #$80 ror ror Bitslo sta Bitshi lda Bitslo adc #128 ;C determines rounding sta (dest),Y lda Bitshi ;Range check adc #0 beq @cont bpl @pos ; if hi byte > 0 then store $FF lda #0 .byte $2C @pos: lda #$FF sta (dest),Y @cont: iny inx inx cpx @endcol bne @l1b @nxtlne: clc lda dest ; move down to next line adc #<320 ; change this if max # of pixels in sta dest ; a line changes lda dest+1 adc #>320 ; ditto sta dest+1 ldx index ; do 64 pixels (8x8) worth of data cpx @endrow ; unless we have less than 8 lines to draw bcc @l0 ; (2 bytes per pixel converted to 1 byte ; per pixel) cpx #128 ; we drew all 8 lines, then exit bcc @clrlines rts @clrlines: ; we drew less than 8 lines, so store 0 lda #0 ; in the rest ldy #7 ; we're doing 8 rows of data at a time @clrlp: sta (dest),y inx inx dey bpl @clrlp stx index bne @nxtlne ; forced branch @endcol: .byte 0 ; last column to copy, catches images ; that don't have a width of a multiple of 8 @endrow: .byte 0 ; last row to copy, catches images ; that don't have a height of a multiple of 8 F0 = DCT F1 = DCT+2 F2 = DCT+4 F3 = DCT+6 F4 = DCT+8 F5 = DCT+10 F6 = DCT+12 F7 = DCT+14 C0 = Coeff C1 = Coeff+2 C2 = Coeff+4 C3 = Coeff+6 C4 = Coeff+8 C5 = Coeff+10 C6 = Coeff+12 C7 = Coeff+14 ; ; Compute the inverse DCT (1D) ; ; Uses modified reversed flowgraph from ; Pennebaker & Mitchell, p. 52 ; ; Input: DCT coeffs contained in Flo/Fhi ; Output: Original coeffs in COEFFS ; IDCT: jsr PrepDat ;Shift and such ; Stage 1: F(5) <- F(5) - F(3) ; F(1) <- F(1) + F(7) ; F(7) <- F(1) - F(7) ; F(3) <- F(5) + F(3) lda F5 sec sbc F3 sta t1 lda F5+1 sbc F3+1 sta t1+1 lda F1 clc adc F7 sta t2 lda F1+1 adc F7+1 sta t2+1 lda F1 sec sbc F7 sta t3 lda F1+1 sbc F7+1 sta t3+1 lda F5 clc adc F3 sta F3 lda F5+1 adc F3+1 sta F3+1 lda t3 sta F7 lda t3+1 sta F7+1 lda t2 sta F1 lda t2+1 sta F1+1 lda t1 sta F5 lda t1+1 sta F5+1 ; Stage 2: F(2) <- F(2) - F(6) ; F(6) <- F(2) + F(6) ; F(1) <- F(1) - F(3) ; F(3) <- F(1) + F(3) lda F2 sec sbc F6 sta t1 lda F2+1 sbc F6+1 sta t1+1 lda F2 clc adc F6 sta F6 lda F2+1 adc F6+1 sta F6+1 lda t1 sta F2 lda t1+1 sta F2+1 lda F1 sec sbc F3 sta t1 lda F1+1 sbc F3+1 sta t1+1 lda F1 clc adc F3 sta F3 lda F1+1 adc F3+1 sta F3+1 lda t1 sta F1 lda t1+1 sta F1+1 ; Stage 3: F(2) <- a1*F(2) ; F(5) <- -a2*F(5) + t1 ; F(1) <- a3*F(1) ; F(7) <- a4*F(7) + t1 ; where t1 = -a5*(F(5) + F(7)) ; F(2) <- a1*F(2) ldx F2 ;Lo ldy F2+1 ;Hi lda a1lo,Y clc adc a1hi,X sta Bitslo ;lo byte lda a1hi,Y adc #0 cpy #$80 bcc @pos1 sta Bitshi lda Bitslo sbc #a1216 @pos1: sta F2+1 lda Bitslo sta F2 ; F(1) = a3*F(1) ldx F1 ;Lo ldy F1+1 ;Hi lda a3lo,Y clc adc a3hi,X sta Bitslo lda a3hi,Y adc #0 cpy #$80 bcc @pos1b sta Bitshi lda Bitslo sbc #a3216 @pos1b: sta F1+1 lda Bitslo sta F1 ; t1 = -a5*(F(5) + F(7)) lda F5 clc adc F7 tax ;Lo lda F5+1 adc F7+1 tay ;Hi lda a5lo,Y clc adc a5hi,X sta Bitslo lda a5hi,Y adc #0 sta Bitshi cpy #$80 bcc @pos2 lda Bitslo sbc #a5216 sta Bitshi @pos2: lda Bitslo eor #$FF clc adc #1 sta t1 lda Bitshi eor #$FF adc #0 sta t1+1 ; F(5) = t1 - a2*F(5) ldx F5 ;Lo ldy F5+1 ;Hi lda a2lo,Y clc adc a2hi,X sta Bitslo lda a2hi,Y adc #0 cpy #$80 bcc @pos3 sta Bitshi lda Bitslo sbc #a2216 @pos3: sta Bitshi lda t1 sec sbc Bitslo sta F5 lda t1+1 sbc Bitshi sta F5+1 ; F(7) = a4*F(7) + t1 ldx F7 ;Lo ldy F7+1 ;Hi lda a4lo,Y clc adc a4hi,X sta Bitslo lda a4hi,Y adc a4gh,X ;a4*.X can be >255 cpy #$80 bcc @pos4 sta Bitshi lda Bitslo sbc #a4216 @pos4: sta Bitshi lda Bitslo clc adc t1 sta F7 lda Bitshi adc t1+1 sta F7+1 ; Stage 4: ; F(0) <- F(0) + F(4) ; F(4) <- F(0) - F(4) ; F(6) <- F(2) + F(6) lda F0 clc adc F4 sta t1 lda F0+1 adc F4+1 sta t1+1 lda F0 sec sbc F4 sta F4 lda F0+1 sbc F4+1 sta F4+1 lda t1 sta F0 lda t1+1 sta F0+1 lda F2 clc adc F6 sta F6 lda F2+1 adc F6+1 sta F6+1 ; Stage 5: ; F(0) <- F(0) + F(6) ; F(4) <- F(2) + F(4) ; F(2) <- F(4) - F(2) ; F(6) <- F(0) - F(6) ; F(3) <- F(3) + F(7) ; F(7) <- F(7) + F(1) ; F(1) <- F(1) - F(5) ; F(5) <- -F(5) lda F0 clc adc F6 sta t1 lda F0+1 adc F6+1 sta t1+1 lda F0 sec sbc F6 sta F6 lda F0+1 sbc F6+1 sta F6+1 lda t1 sta F0 lda t1+1 sta F0+1 lda F4 clc adc F2 sta t1 lda F4+1 adc F2+1 sta t1+1 lda F4 sec sbc F2 sta F2 lda F4+1 sbc F2+1 sta F2+1 lda t1 sta F4 lda t1+1 sta F4+1 lda F3 clc adc F7 sta F3 lda F3+1 adc F7+1 sta F3+1 lda F7 clc adc F1 sta F7 lda F7+1 adc F1+1 sta F7+1 lda F1 sec sbc F5 sta F1 lda F1+1 sbc F5+1 sta F1+1 lda #0 sec sbc F5 sta F5 lda #0 sbc F5+1 sta F5+1 ; Final stage: ; c(0) = F(0) + F(3) ; c(1) = F(4) + F(7) ; c(2) = F(2) + F(1) ; c(3) = F(6) + F(5) ; c(4) = F(6) - F(5) ; c(5) = F(2) - F(1) ; c(6) = F(4) - F(7) ; c(7) = F(0) - F(3) ; ; Note: values are offset -128 lda F0 clc adc F3 sta C0 lda F0+1 adc F3+1 sta C0+1 lda F4 clc adc F7 sta C1 lda F4+1 adc F7+1 sta C1+1 lda F2 clc adc F1 sta C2 lda F2+1 adc F1+1 sta C2+1 lda F6 clc adc F5 sta C3 lda F6+1 adc F5+1 sta C3+1 lda F6 sec sbc F5 sta C4 lda F6+1 sbc F5+1 sta C4+1 lda F2 sec sbc F1 sta C5 lda F2+1 sbc F1+1 sta C5+1 lda F4 sec sbc F7 sta C6 lda F4+1 sbc F7+1 sta C6+1 lda F0 sec sbc F3 sta C7 lda F0+1 sbc F3+1 sta C7+1 rts ;Sheeew! ; ; Since the algorithm is really an ; FFT converted into a DCT, the ; coefficients need a little massaging ; before tranformation. ; ; Specifically, ; F(i) = S(i)/(2cos(i*pi/16)) i=0..7 ; with F(0)=F(0)*2/sqrt(2), which can ; be combined with the first step ; using the table for i=4. ; ; These multipliers can in part be ; incorporated in the quantization ; table, but for now they're out in ; the open. ; PrepDat: ldx #0 lda #sec4 sta point+1 lda F0 sta Bitslo lda F0+1 jsr PMult sta F0+1 lda Bitslo sta F0 ldx #0 lda #sec1 sta point+1 lda F1 sta Bitslo lda F1+1 jsr PMult sta F1+1 lda Bitslo sta F1 ldx #0 lda #sec2 sta point+1 lda F2 sta Bitslo lda F2+1 jsr PMult sta F2+1 lda Bitslo sta F2 ldx #0 lda #sec3 sta point+1 lda F3 sta Bitslo lda F3+1 jsr PMult sta F3+1 lda Bitslo sta F3 ldx #0 lda #sec4 sta point+1 lda F4 sta Bitslo lda F4+1 jsr PMult sta F4+1 lda Bitslo sta F4 ldx #0 lda #sec5 sta point+1 lda F5 sta Bitslo lda F5+1 jsr PMult sta F5+1 lda Bitslo sta F5 ldx #0 lda #sec6 sta point+1 lda F6 sta Bitslo lda F6+1 jsr PMult sta F6+1 lda Bitslo sta F6 ldx #0 lda #sec7 sta point+1 lda F7 sta Bitslo lda F7+1 jsr PMult sta F7+1 lda Bitslo sta F7 rts PMult: ;exit .A = Bitshi bmi @neg beq @ok @l1: inx ;Shift count lsr ror Bitslo cmp #0 bne @l1 @ok: sta Bitshi lda Bitslo asl rol Bitshi adc point sta point lda Bitshi adc point+1 sta point+1 ldy #0 lda (point),Y sta Bitslo iny lda (point),Y dex bmi @rts @l1b: asl Bitslo rol dex bpl @l1b @rts: rts @neg: sta Bitshi lda #0 sec sbc Bitslo sta Bitslo lda #0 sbc Bitshi beq @ok2 @l2: inx ;Shift count lsr ror Bitslo cmp #0 bne @l2 @ok2: asl Bitslo rol sta Bitshi lda Bitslo adc point sta point lda Bitshi adc point+1 sta point+1 ldy #0 lda (point),Y sta Bitslo iny lda (point),Y dex bmi @rts2 @l2b: asl Bitslo rol dex bpl @l2b @rts2: sta Bitshi lda #0 sec sbc Bitslo sta Bitslo lda #0 sbc Bitshi rts filepos: ; 3 bytes for current file position symbols = filepos + 3 ;; keep csampv and csamph together and in this order csampv = symbols + 16 ; Sampling factors csamph = csampv + 6 ; (horizontal) cquant = csamph + 6 ; Quantization table DClo = cquant + 6 ; DC coeffs DChi = DClo + 6 ACHuff = DChi + 6 ; AC table to use DCHuff = ACHuff + 6 ; DC table to use segend: .org DCHuff+6 ;; end of jpeg decoder .if (* >= $2F00) .out .concat("Error - Decoder code overrun into data area ", .string(*), " Max=12031") .else .out .concat("decoder code ends at ", .string(*)) .endif .include "jpydata.inc" ;; file header data .segment "EXEHDR" .byte 255, 255 ;; run address .segment "AUTOSTRT" .addr 736 .addr 737 .addr segstart