/* $Header: /Faultline/com/adobe/acrobat/filters/DCTInputStream.java 1      $ */
/*
  DCTInputStream.java

  (C) Copyright 1997 Adobe Systems Inc.  All rights reserved.

  This filter is based on the Postscript 3010 version of the JPEG decoder
*/

package com.adobe.internal.pdftoolkit.core.filter;

import java.io.IOException;
import java.io.InputStream;

import com.adobe.internal.pdftoolkit.core.exceptions.PDFFilterFormatException;

public class DCTInputStream
  extends DecodeInputStream
{
/* gvl main DCT data record.  All fields in this record should be of a
 forced size; DO NOT USE, for example, int, integer, unsigned, or boolean
 in gvl or in any subsidiary structs because of problems passing the struct
 between PC configurations compiled for one model (16-bit ints, say) to
 other code compiled for a different model (32-bit ints, say).

 This is declared independently in class DCTgvl, but included here for efficiency,
 since only one instance is needed by the decoder

 In Java, on the other hand, all sizes are defined...
 */
    int dg_bs_byte;
    int dg_byteAhead;    /* DCTDecode */
    int dg_unused;       /* no. unused bits @ head of bitstream */

    int dg_state;        /* DCTDecode:
                           0 if no SOI marker seen yet;
                           1 if between scans;
                           2 if EOI marker seen;
                           3 if source exhausted;
                           4 if next strip should be decoded;
                           5 to deliver next scanline in current strip.
                          DCTEncode:
                           0 if no output yet;
                           1 after ouputting initial markers;
                           2 after scan setup for the rest of the image;
                           3 after all JPEG output or an error. */
    short dg_bigcolorline[];
    int dg_hFast, dg_vFast; /* Denote fast strip-handling options. */
    int dg_stripv, dg_stripmv;
    int dg_out_lines_processed;
    int dg_in_lines_processed; /* DCTEncode */
    int dg_old_in_lines_processed; /* DCTEncode */

    int dg_maxh, dg_maxv;   /* max(hy, hu, hv, hk) and max(vy, vu, vv, vk) */

    int dg_resyncmod8;
    int dg_resync;       /* Resync interval (0 = none or 1 to 65535) */
    int dg_resync_count; /* MCUs to next resync */

    int dg_lx;           /* no. of pixels per line */
    int dg_ly;           /* no. of lines */
    int dg_raster_size_in, dg_raster_size_out;

    int dg_framecn;      /* no. colors in the frame == total in the image */
    int dg_cn;           /* no. of color components in scan */

    int dg_encoding; /* ASJPEGBaseline: (SOF0) Basic sequential, 2 HT limit.
                         ASJPEGSequential: (SOF1) Extensions to baseline
                         (extra HuffTables, Sum(H*V) > 10, 2 other
                         extensions (16-bit QT with 8-bit input).
                         ASJPEGProgressive: (SOF2)
                         ASJPEGLossless: unimp.
                         ASJPEGHierarchical: unimp.
                         differential, arithmetic coding unimp. */
    int dg_colorconv;    /* Color transform options: 0, RGBtoYUV, or CMYKtoYUVK
                         (should change names to RGBtoYUV or CMYKtoYUVK) */
    int dg_blend;        /* For encoder downsampling: 0==chop, 1==blend1.
                         Decoder implements Radius bug kludge with
                         blend==16384  NON-REDBOOK.  NOT IMPLEMENTED BY SOME
                         HW JPEG DEVICES. */

//#if QuantCoefRaster
    boolean dg_haveQCoefRaster; /* 1 if a quantized coefficient array is in use;
                         needed for progressive encode/decode, sequential
                         decode with block smoothing, or converting between
                         sequential and progressive encodings. */
    int dg_scansCompleted; /* no. scans decoded/encoded */
    int dg_scansInImage; /* DCTEncode: Total no. scans in the compressed image.
                         DCTDecode: number of last scan for which an image
                         was reconstructed on the output ASStm. */
    boolean dg_eoiDecoded;   /* DCTDecode: has processed an EOI marker. */
    int dg_scanTabGroup; /* DCTEncode: group of 4 scan tables; framecn then
                         selects the scan table from the group. */

    int dg_blockSmooth;  /* DCTDecode: 1 enables inter-block interpolation to
                         fix 8x8 block-boundary defects; requires
                         haveQCoefRaster 1; NOT IMPLEMENTED */
    int dg_ss;           /* 1st zig-zag coef in band (prog.), 0 (seq.) */
    int dg_se;           /* last zig-zag coef in band (63 if seq.) */
    int dg_ah;           /* 0 on 1st scan of band, else pt. trans. of last */
    int dg_al;           /* pt trans. of this scan (1 less than last scan) */
//#endif /* QuantCoefRaster */
    int dg_nmdus_per_strip;
    int dg_nstrips_this_scan;

    int dg_nblocks;      /* Number of the 8x8 block being processed. */
    int dg_nsamps;       /* ((lx + maxh - 1) / maxh) * cn * maxh */
    int dg_cnmaxh;       /* cn * maxh */

  /* The CMYK-to-YCCK and YCCK-to-CMYK transforms use the same structures
   as the RGB-to-YCC and YCC-to-RGB transforms but with negated values and
   different offsets.  RUVtab, GUVtab, and BUVtab are larger when Blend=1
   because they are indexed by the sum of two sample values.
   */
  /* Encoder tables */
//  int RYtab[];
//  int GYtab[];
//  int BYtab[];
//  int RUVtab[];
//  int GUVtab[];
//  int BUVtab[];

  /* Decoder tables (also used by encoder DataStudy option) */
    int dg_UGtab[];
    int dg_VGtab[];

    int dg_equant[][];    /* Pointers to quantization tables */

    DCTjframe dg_jpframe[];
    DCTjscan  dg_jpscan[];

    int dg_frameqn;      /* DQT marker processed */
    int dg_framehn;      /* DHT marker processed */
    int dg_ps_version;   /* Encode: JPEG implementation version or 0 if Adobe APPE
                                   marker is not being written.
                         Decode: Implementation version number of the Adobe
                                   encoder that wrote the file, or 0 if the Adobe
                                   APPE marker was not present. */
    int dg_ps_flags;

    double dg_qfactor;

    int dg_qTableCount;
    boolean dg_qFitsIn8[];  /* 1 if the quant. coef. computed with
                         dg->qtizers[i] all fit in 8 bits, else 0 */

    boolean dg_relax;        /* What is allowed: 0 Baseline only, 1 non-Baseline
                         options permitted. */
    boolean dg_picky;        /* What is detected as errors:
                           0 Tolerant,
                           1 Intolerant (errors reported for Huffman code
                             impossible events, for garbage bytes at end of
                             marker segments, for use of quantizers bigger
                             than 255 with 8 bit samples). */

    int dg_lastp;

//#if BuildAdobeScanDir
    int dg_scanDirBytePos; /* DCTEncode: 0 if no scan directory.
                                      -1 if ASftell(dg->bs_fh) does not return
                                          a position.
                         Otherwise, there is a scan directory and
                         scanDirBytePos is the byte pos. of progScanBytes
                         table in the dg->bs_fh bit stream for ASfseek. */
    int dg_numLoggedScans; /* No. scans logged in the directory */
    int dg_progScanBytes[];
                        /* Byte pos. of the logged scan.
                         DCTDecode reads the (optional) progressive JPEG
                         scan directory from the APPE marker property
                         list.  To decode the logged scan, it must have
                         the indicated no. bytes + 2 ready. */
    byte dg_progScanNumber[];
                        /* Each byte is for a different logged scan; it is
                         the no. prog. JPEG scans to the logged scan. */
//#endif /* BuildAdobeScanDir */

    DCTdec_code_struct dg_dts[];
    DCTdec_code_struct dg_ats[];

    short dg_qtizers[][];    /* Quantizers; although 16-bit quantizers are illegal
                          with 8-bit sample values, Photoshop asked to have
                          them accommodated by the decoder because the
                          Independent JPEG Group implementation was producing
                          them on 7/16/96. */


    /* Constructor */
    private void initDCTgvl()
    {
        dg_state = 0;
        dg_picky = false;
        dg_colorconv = 0x3fff;

        dg_jpframe = new DCTjframe[DCTTables.ASDCTMaxColors];
        dg_jpscan = new DCTjscan[DCTTables.MAX_COMP_PER_SCAN];

        dg_qFitsIn8 = new boolean[DCTTables.ASDCTMaxColors];

        dg_dts = new DCTdec_code_struct[DCTTables.MAX_CODE_TBLS];
        dg_ats = new DCTdec_code_struct[DCTTables.MAX_CODE_TBLS];
        dg_qtizers = new short[4][64];
    }

    // Public contructors
    public DCTInputStream(
        InputStream in,
        int         inSize,
        int         outSize,
        FilterParams diparams )
    {
        super(in, inSize, outSize, 0, diparams);
	    initDCTgvl();
    }

    public DCTInputStream(InputStream  in, FilterParams p)
    {
        super(in, 0, p);
	    initDCTgvl();
    }

    public DCTInputStream(InputStream  in)
	{
		this(in, null);
	}

    int dgCount = 0;
    int dgPos = 0;

    @Override
	public void fill()
    {
		//long    value;
		//int		newChar, digitVal, dp;

        if (dg_state == 0) {
            try {
        	    /* If this is a progressive image, read all the scans in at once */
                while (DCTDNextScan() == 0) ;
            }
            catch (PDFFilterFormatException fe) {
                pendingException = fe;
                return;
            }
            catch (Exception e) {
            	pendingException = new PDFFilterFormatException("DCT Error: "+e.getMessage());
                return;
            }
        }

        while (outCount < outBuf.length)
        {
            if (dgPos >= dgCount) {
                try {
                    if (DCTDFilBuf()) {
                        pendingEOF = true;
                        return;
                    }
                    else {
                        dgCount = dg_lx * dg_framecn;
                        dgPos = 0;
                    }
                }
                catch (PDFFilterFormatException e) {
                    // pendingException = e;                    
                    return;
                }
                catch (IOException io) {
                    pendingException =
                        new PDFFilterFormatException("DCT IO Error: "+io.getMessage());
                }
            }

            int   count = Math.min(outBuf.length - outCount, dgCount - dgPos);
            for (int i = 0; i < count; i++) {
                int ub = dg_bigcolorline[dgPos++];

                if ((ub & 0xf800) != 0)
                    outBuf[outCount++] = (byte)(~(ub >> 15));
                else
                    outBuf[outCount++] = (byte)  (ub >>> 3);
            }
        }
    }

/* Helpers */
  boolean UngetAvail = false;
  int     UngetByte;

  private int FGETC()
  {
    if (UngetAvail) {
        UngetAvail = false;
        return UngetByte;
    }
    else {
        if ( inCount <= inPos ) {
            if (!fillInputBuffer()) {
                return -1;
            }
        }
		return (inBuf[inPos++] & 0xFF);
	}
  }

  private int UNGETC(int theByte)
  {
    UngetAvail = true;
    UngetByte = theByte;
    return theByte;
  }

// DCTDmain
private int qstrip[];
private int last_qcoef_ptr[][];

/*--------------------------------------------------------------------------
 Extract parameters from SOF0 (baseline seq.), SOF1 (extended seq.), or SOF2
 (prog.) marker segment.  Must occur after SOI and before SOS marker, but
 order relative to DQT, DRI, DHT, and APPn markers is arbitrary.
 --------------------------------------------------------------------------*/
private int DCTDGetFrameParms ()
throws PDFFilterFormatException
{ int frameSize;
  int dataPrecision;
  int i, framecn;

  frameSize = DCTDSGet16();
  dataPrecision = DCTDSGet8();
  if (dataPrecision != 8)
    DCTError.err(DCTError.dctdmain1, "Precision > 8 bits/color disallowed",
              DCTError.filtSrcRangeErr);
  dg_ly = DCTDSGet16();
  dg_lx = DCTDSGet16();

  if (dg_lx == 0)
    DCTError.err(DCTError.dctdmain2, "Image width is 0", DCTError.filtSrcLengthErr);
  if (dg_ly == 0)
    DCTError.err(DCTError.dctdmain3, "Image height is 0", DCTError.filtSrcLengthErr);

//#if 0
  /* The DNL marker is discussed in JPEG-8-R8 sections 6.2.1.1, and the number
   of lines = 0 option is discussed in Section 6.3.3.  When no. lines is 0,
   the decoder keeps going until it gets to a DNL marker.  The spec does not
   say when the DNL marker can legally occur.  It could occur (a) between any
   two MCUs or (b) only at the end of an MCU strip.
   */

  /*** Need to make DNL marker and 0 length image work. */
/*
  if (dg_ly == 0)
  { DCTDebugOut(dg->debugOutArg, "0 line length image\n");
    dg_ly = 65535;
  }
*/
//#endif

  /* JPEG permits [1..255] frame components but only [1..4] scan components.
   However, this implementation is limited to 4 frame and 4 scan components.
   Even so, it is conceivable that framecn != scancn, so be careful in
   distinguishing.
   */
  framecn = dg_framecn = DCTDSGet8();
  if ((framecn < 1) || (framecn > 4))
    DCTError.err(DCTError.dctdmain4, "Number of colors is not [1..4]", DCTError.filtSrcRangeErr);

  /* for each component loop and get the ID, horz, vert blocks/interleave,
   and qtable. */
  for (i = 0; i < framecn; i++)
  {
    DCTjframe jpframe = dg_jpframe[i] = new DCTjframe();
    jpframe.frameComponentId = DCTDSGet8();

    int hs = DCTDSGet8();
    jpframe.hs = hs >> 4;
    jpframe.vs = hs & 0xF;

    jpframe.frameQTNum = DCTDSGet8();
    frameSize -= 3;
  } /* for i */

  return(frameSize - 8);
} /* DCTDGetFrameParms */


/*---------------------------------------------------------------------
 Extract parameters from DQT marker segment (mandatory), which this
 implementation allows to occur anywhere between SOI and SOS.
 ---------------------------------------------------------------------*/
private int DCTDGetDQT()
throws PDFFilterFormatException
{ int i, qTableID, precision;
  int temp;
  short q[];
  int count;
  int tcount = 1+64;
  boolean sgetq;

  sgetq = false;
  count = dg_frameqn = DCTDSGet16() - 2;
  while (count > 0)
  { int inbyte = DCTDSGet8();
    /* These two lines were interchanged to bypass a 68030 compiler problem. */
    qTableID = inbyte & 0xF;
    precision = inbyte >> 4;

    /* JPEG disallows 16-bit quantizers with 8-bit samples, and with 8-bit
     quantizers a max. value of 255 is naturally enforced.  However, this
     code allows 16-bit quantizers unless dg->picky != 0 because they were
     used by Radius in 1991 (As I recollect, Radius used 16-bit containers
     but the values were in [1..255].) and by the Independent JPEG Group in
     1996.  Quantizers beyond a certain max. size will overflow the ASInt32
     range when a quant. coef. is requantized. (See DCTDQScale array in
     dctdxf.c.)  The smallest quantizer which can overflow is 4985 (4988?)
     in the final zig-zag scan position.  However, for any quantizer larger
     than 1023 x 2, the quant. coef. is always 0, so the overflow will not
     matter.  Therefor, no check for quantizer too large is needed below.
     */
    if (precision != 0)
    { if (dg_picky)
      { DCTError.err(DCTError.dctdmain5, "QuantTable precision is not 8 bits",
                  DCTError.filtSrcRangeErr);
      }
      else
      { sgetq = true;
        tcount = 1+128;
      }
    } /* if (precision != 0) */
    if ((qTableID + 1) > dg_qTableCount)
    { dg_qTableCount = qTableID + 1;
      if (qTableID > 3)
      { DCTError.err(DCTError.dctdmain6, "QuantTable number is not [0..3]",
                  DCTError.filtSrcRangeErr);
      }
    }

    q = dg_qtizers[qTableID];
    for (i = 0; i < 64; i++)
    { temp = sgetq ? DCTDSGet16() : DCTDSGet8();
      if (temp == 0)
      { DCTError.err(DCTError.dctdmain7, "Quantizer is 0", DCTError.filtSrcRangeErr);
      }
      q[i] = (short)temp;
    }

    count -= tcount;
  } /* while (count > 0) */

  return(count); /* return(frame size error) */
} /* DCTDGetDQT */


/*---------------------------------------------------------------------
 Extract parameters from DRI marker segment (must be in header).
 ---------------------------------------------------------------------*/
private int DCTDGetDRI()
throws PDFFilterFormatException
{ int seglen = DCTDSGet16();
  dg_resync = DCTDSGet16();
  dg_resyncmod8 = 0;
  /** On 6/1/96, I eliminated the kludge here for a 1990 Radius bug; that
   kludge was:   dg->resync_count = (dg->blend != 16384) ? dg->resync + 1 : 0;
   */
  dg_resync_count = dg_resync + 1;
  return(seglen - 4);
} /* DCTDGetDRI */


/*---------------------------------------------------------------------
 ---------------------------------------------------------------------*/
private void DCTDNoSOI()
throws PDFFilterFormatException
{
  DCTError.err(DCTError.dctdmain8, "Input did not begin with a JPEG SOI marker",
            DCTError.filtSrcRangeErr);
} /* DCTDNoSOI */


/*---------------------------------------------------------------------
 Adobe's APPE marker began with the ASCII text `Adobe' and this procedure
 was called after verifying the text.  `Adobe' is followed by a two-byte
 PS version number, four-byte ps_flags, and one-byte color transform code.
 Beginning with PS version 2015.009, an optional property list, described
 below, may occur after these mandatory fields.

 The 1 bits or other fields in ps_flags left-half contain information
 which might modify the decoding strategy; but inability on the part
 of another decoder to understand 1 bits in this field should never
 result in an error.  (Bit 0 now means that the encoder used blend=1
 on downsampled colors; other bits are not used in Version 100.)

 1 bits in ps_flags right-half and uninterpretable Color Transform codes
 cause an abort in Picky mode.  (A better approach would be to
 describe the color space of the image in some way rather than the
 color transform applied to it; but that isn't done.)

 JPEG marker information (height, width, ncolors, bits/color, etc.) and
 the information passed in this marker is sufficient to decode the image
 without any commandline options.  The information NOT included in the
 JPEG markers is color transform information and some encoder options
 (e.g., blend vs. chop) which might conceivably affect decoding.

 Adobe will use flag bits beginning with the leading bits in ps_flags left
 half and in ps_flags right-half and working toward lower bits.  Adobe will
 use color transform codes beginning with 1 and working toward higher
 numbers.  Any non-Adobe developer reading this comment and extending the
 implementation should seek some agreement with Adobe.

 Each property on the property list begins with a ASUns8 propertyID.  The first
 two IDs (1 and 2) have a non-standard structure grandfathered in.  (This is
 possible because these are only used privately when both the encoder and
 decoder understand the properties or else the properties do not occur.)
 The rest follow propertyID with a ASUns16 length field followed by property
 content described below.  For a propertyID it does not recognize, the decoder
 uses the length field to skip the content and continue interpretting other
 properties.

 The property list ends when fewer than 3 bytes remain in the marker.  On the
 return, DCTDDecodeImage will skip segSize bytes without error unless
 dg->picky != 0.

 Private property list codes:

   1  DQT marker substitute for private filter uses (e.g., source list
      compression or frame buffer compression); scales DCTEncode default
      tables like DCTEncode using qmax, qfactor, cscale, and default threshold
      tables to save about 60 bytes per QTable over a DQT marker.
        byte 0: no. qTables
             1: qmax
        2 to 5: qfactor
        6 to 9: cscale
             A: 4-bit qTableID0, 4-bit defQTableID0
             B: 4-bit qTableID1, 4-bit defQTableID1

   2  DHT marker substitute for private filter uses
        byte 0:     no. HTables
        1, 2, 3, 4: cTableId   0 (& 0x03 == data structure number [0..3]
                                  & 0x04 == 0 -> DC, 1 -> AC
                                  & 0xF0 == default table index)

 Public property list codes and content (after ASUns8 propertyID and
 ASUns16 length L):

   3  Scan directory.  Written only when the image contains more than one
                scan (i.e., for progressive JPEG or, if it were implemented,
                a multi-scan sequential image).
        byte 0: no. directory entries N (<= MaxLoggedScans)
        [1..N]: Each byte is the scan number of the logged scan.
     [N+1..5N]: Each 4 bytes is a ASUns32 giving the no. bytes to the end of
                the logged scan counting from the first byte of the SOI
                marker.  To decode the compressed scans through this scan
                number, provide 2 more code bytes than this number.  (On the
                final scan the 2 extra code bytes will be the 2 bytes of the
                EOI marker.)

   4  Caption.
        [1..L]: Caption.  An old implementation or a new DCTDecode compiled
                with BuildCaption == 0 will skip and ignore this.  Otherwise,
                allocate storage for the caption with AllocSmall and copy it
                into the allocated storage; set dg->captionLen to the caption
                length.  (Also 0-terminate the string.)
 ---------------------------------------------------------------------*/
private int DCTDAdobeMarker(int segSize)
throws PDFFilterFormatException
{ /* Mandatory Adobe APPE fields */
  segSize -= 7;
  dg_ps_version = DCTDSGet16();
  /* dg->blend and dg->bigct are unused by this decoder, but on a JPEG-to-JPEG
   conversion, these (and any other) values are preserved in dg->ps_flags and
   will be rewritten.
   */
  dg_ps_flags = DCTDSGet32();
  /* dg->blend = (dg->ps_flags & avgSSFlag) != 0 ? 1 : 0; */
  /* if BigCT dg->bigct = (dg->ps_flags & accurateCTFlag) != 0 ? 1 : 0; endif */
  { /* Dictionary ColorTransform key overrules the Adobe APPE marker when
     dontKnowCTFlag is 1 in dg->ps_flags.
     */
    dg_colorconv = DCTDSGet8();
  }
  if (   (dg_picky)
      && ((dg_ps_flags & 0xFFFF) != 0 || (dg_colorconv > 2)))
            DCTError.err(DCTError.dctdmain9, "Invalid JPEG APPE marker",
                DCTError.filtSrcRangeErr);

  if (dg_state != 1)
    DCTDNoSOI();

  /* Optional property list elements */
  while (segSize > 3)
  { int propertyID = DCTDSGet8();
    int propertyLen = 0;
    if (propertyID >= 3)
    { propertyLen = DCTDSGet16(); /* Standard 2-byte prop. len */
      segSize -= (propertyLen + 3);
      if (segSize < 0)
          DCTError.err(DCTError.dctdmain9, "Invalid JPEG APPE marker",
                    DCTError.filtSrcRangeErr);
    } else
    { segSize -= 1;
    }

    /* Java: Dump propertyIDs we don't want to handle */
    switch (propertyID)
    {
        case 1:
        case 2:
            if (!DCTTables.ExtendedAPPE)
                propertyID = 0;
                break;
        case 3:
            if (!DCTTables.BuildAdobeScanDir)
                propertyID = 0;
                break;
        case 4:
            if (!DCTTables.BuildCaption)
                propertyID = 0;
                break;
    }

    switch (propertyID)
    {
    /*case 1,2:   APPE extension equivalent to DQT */
            /* These can't possibly be ON for applications and use
               questionable casts between int and float, so are
               just eliminated in the Java version */

    case 3:
      { int ii;
        if (propertyLen < 5)
          DCTError.err(DCTError.dctdmain9, "Invalid JPEG APPE marker",
                    DCTError.filtSrcRangeErr);
        dg_numLoggedScans = DCTDSGet8();
        if ((propertyLen -= (dg_numLoggedScans * 5 + 1)) < 0)
          DCTError.err(DCTError.dctdmain9, "Invalid JPEG APPE marker",
                    DCTError.filtSrcRangeErr);
        dg_progScanNumber = DCTDSFRead(dg_numLoggedScans);
        dg_progScanBytes = new int[dg_numLoggedScans];
        for (ii = 0; ii < dg_numLoggedScans; ii++)
        { dg_progScanBytes[ii] = DCTDSGet32();  /* ASUns32 file positions */
	}

        /* If new tables are added at the end of the scan directory later,
           skip them in an older version which does not interpret the format.
         */
          while(propertyLen > 0)
          { DCTDSGet8();
            propertyLen--;
          }
        continue;
	}

    default:
      while(propertyLen > 0)
      { DCTDSGet8();
        propertyLen--;
      }
      continue;
    } /* switch (propertyID) */
  } /* while (segSize > 3) */
  /* The caller will skip bytes if segSize > 0 at the return unless
     dg->picky == 1.
   */
  return(segSize);
} /* DCTDAdobeMarker */


/*---------------------------------------------------------------------
 Extract parameters from DHT marker segment.  (JPEG allows it to occur in
 the header between SOI and SOS markers or between scans.)  Setup the
 decoder's Huffman decoding tables.
 ---------------------------------------------------------------------*/
private int DCTDGetDHT()
throws PDFFilterFormatException
{ int cTableId, tableNumb;
  int count;
  DCTdec_code_struct acc;
  DCTdec_code_struct dcc;

  count = dg_framehn = DCTDSGet16() - 2;
  /* read in the codeTables, and store pointers to them in global structures */
  while (count > 0)
  { cTableId = DCTDSGet8();
    tableNumb = cTableId & 0x0F; /* Baseline tables are 0 and 1 only. */
    if (   ((cTableId & 0xEC) != 0)
        || (  ((cTableId & 0x2) != 0)
            && ((DCTTables.MAX_CODE_TBLS < 4) || (dg_encoding == DCTTables.ASJPEGBaseline))
           )
       )
    { DCTError.err(DCTError.dctdmain12, "Invalid table number in JPEG DHT marker",
                DCTError.filtSrcRangeErr);
    }

    if ((cTableId & 0x10) != 0)
    { acc = dg_ats[tableNumb];
      if (acc == null)
          acc = dg_ats[tableNumb] = DCTdec_code_struct.newACdecode();
      DCTDMakeHuffDec(acc, true);
    } else
    { dcc = dg_dts[tableNumb];
      if (dcc == null)
          dcc = dg_dts[tableNumb] = DCTdec_code_struct.newDCdecode();
      /* The parts of an ac_code_struct and a dc_code_struct used here are
       identical (i.e., the structures are identical for the decoder), so
       it is ok to cast dts into an ac_code_struct.
       */
      DCTDMakeHuffDec(dcc, false);
    }
    count -= 17 + dg_lastp;
  } /* while (count > 0) */

  return(count);
} /* DCTDGetDHT */


/*--------------------------------------------------------------------------
 Extract parameters from the SOS marker segment (Start of Scan).  JPEG
 permits [1..255] frame comp., but only [1..4] of these per scan.  In a
 particular scan, components must occur in the same order as in the frame
 (i.e., a single pass through the frame components should match up the IDs;
 see 9-R7 B.2.3 Scan Header Syntax).  Although this implementation restricts
 the frame to only 4 components (DCTMaxColors in ASFiltExt.h), it does
 permit a subset of these for the scan.  JPEG specifies quantization and
 sampling parameters with SOF but Huffman-coding parameters with SOS.  This
 accommodates JPEG progressive encoding.

 For progressive JPEG, the Ss, Se, Ah, and Al scan parameters are used as
 follows (See CD 10918 or JPEG-9-R7 B.2.3 Scan Header Syntax):
   Ss: Specifies the first DCT coef. in each block coded in the scan.
   Se: Specifies the last DCT coef. in each block coded in the scan.
   Ah: Succ. approx. high bit pos.; specifies the point transform used in
       the preceding scan of the coef. in the band [Ss..Se].  It is 0 for
       the first scan of each band; on subsequent scans, it is the value
       of Al for the previous scan.
   Al: Succ. approx. low bit pos. for point transform.
 [So on the 1st scan of a band, Ah==0, Al==2 (say) causes qcoef/4 to be
  coded; the qcoef/4 calc. rounds toward 0.  On the 2nd scan, Ah==2, Al==1
  causes another low bit of qcoef/2 to be coded.  On the last scan, Ah==1,
  Al==0 causes the final bit of qcoef to be coded. ]
 --------------------------------------------------------------------------*/
private int DCTDGetScanParms()
throws PDFFilterFormatException
{ /* 16 bits representing the size of the scan segment */
  int scanSize = DCTDSGet16();
  int ii, jj, cn;
  int ss, se, ah, al;

  cn = dg_cn = DCTDSGet8();
  if (cn == 0 || cn > dg_framecn)
    DCTError.err(DCTError.dctdmain14, "Invalid number of colors in JPEG SOS marker",
              DCTError.filtSrcRangeErr);
  if ((scanSize -= (cn + cn + 6)) < 0)
    return(scanSize);
  jj = 0;
  for (ii = 0; ii < cn; ii++)
  { DCTjscan jpscan = dg_jpscan[ii]/* = new DCTjscan()*/;
    if (jpscan == null)
        jpscan = dg_jpscan[ii] = new DCTjscan();

    DCTjframe jpframe;
    int scanComponentId = DCTDSGet8();
    int acdcid = DCTDSGet8();
    /* On 5/20/96, I reversed DC and AC Id's here to fix a bug */
    jpscan.DCCodeTableId = acdcid >> 4;
    jpscan.ACCodeTableId = acdcid & 0xF;
    do
    { jpframe = dg_jpframe[jj++];
      if (jpframe.frameComponentId == scanComponentId)
        break;
    } while (jj < dg_framecn);
    if (jj > dg_framecn)
        DCTError.err(DCTError.dctdmain15, "JPEG SOS marker component ID matches no frame ID",
              DCTError.filtSrcRangeErr);

    if (jpframe.frameQTNum >= dg_qTableCount)
    { DCTError.err(DCTError.dctdmain16, "Invalid QuantTable number in JPEG SOF marker",
                DCTError.filtSrcRangeErr);
    }
    jpscan.jpframe = jpframe;
  } /* for (ii = 0; ii < cn; ii++) */
  ss = DCTDSGet8();
  se = DCTDSGet8();
  ah = DCTDSGet8();
  al = ah & 0xF;
  ah >>= 4;
  switch (dg_encoding)
  { case DCTTables.ASJPEGBaseline:
    case DCTTables.ASJPEGSequential:
      if (dg_picky)
      { if ((ss != 0) || (se != 63) || (ah != 0) || (al != 0))
        DCTError.err(DCTError.dctdmain18, "Invalid Ss Se Ah or Al field in JPEG SOS marker",
                DCTError.filtSrcRangeErr);
      }
      break;

    case DCTTables.ASJPEGProgressive:
      if (   (se > 63)
          || (se < ss)
          || (ss == 0 && se != 0)
          || (ah > 0 && ah <= al)
          || (al > 11)
         )
        DCTError.err(DCTError.dctdmain18, "Invalid Ss Se Ah or Al field in JPEG SOS marker",
                DCTError.filtSrcRangeErr);

      if (ss != 0 && cn != 1)
      { DCTError.err(DCTError.dctdmain17, "More than 1 AC scan color is invalid",
                  DCTError.filtSrcRangeErr);
      }
      dg_ss = ss;
      dg_se = se;
      dg_ah = ah;
      dg_al = al;
      break;

  } /* switch (dg->encoding) */

  if ((scanSize > 0) && (!dg_picky))
  { do /* Skip extraneous marker bytes */
    { DCTDSGet8();
      scanSize--;
    } while (scanSize > 0);
  }
  return(scanSize);
} /* DCTDGetScanParms */


/*--------------------------------------------------------------------------
 DCTDDecodeImage is called by DCTDFilBuf or DCTDNextScan to process all
 markers from the beginning through the first SOS (start-of-scan) marker,
 all markers between scans, and all markers after the final scan through the
 EOI (end-of-image) marker.  It returns after updating dg->state when
 it has processed an SOS or EOI marker or when the source bitstream
 cannot be read or returns an error.

 When a quantized coefficient raster is constructed (for progressive JPEG,
 for sequential JPEG with block smoothing, or for conversion between JPEG
 types), DCTDDecodeImage, after decoding an SOS marker, also decodes the
 next scan of the image into the quantized coefficient raster (qcoefBase)
 for each color.

 The JPEG sequencing rules are as follows (See 9-R7 B.2 or 8-R8 6.2.1):
 1) An SOI marker begins and an EOI marker ends compressed image data.
 2) Inside the SOI and EOI markers, a single frame can occur initiated by
    one of the SOFn markers.
 3) After the SOFn marker and before the first SOS marker, miscellaneous
    markers can occur (COM, APPn, DQT, DHT, DAC, DRI) in arbitrary sequence
    and replication.  A DQT and a DHT or DAC marker must precede the SOS.
    (DAC markers are not supported by this implemenation.)
 4) An SOS marker initiates the (first) entropy coded segment.
 5) In the entropy coded segment, if a DRI marker was given before SOS,
    RST markers must occur at the designated places; no other markers may
    occur during the entropy coded part.  A DNL marker may terminate the
    image prematurely at the end of an 8*maxv scanline strip.
 6) For progressive JPEG, a number of scans occur; at the end of the first
    scan, miscellaneous markers may occur except DQT; another SOS marker
    initiates the next scan.
 7) An EOI marker must follow the last entropy coded segment or DNL marker,
    terminating the compressed image.

 The rules of this implementation differ from the JPEG rules as follows:
 1) DNL markers are unimplemented.
 2) Arithmetic coding (DAC markers), hierarchical and spatial (lossless)
    JPEG are unimplemented.
 3) Multiple scans within the frame are illegal for sequential JPEG.
 4) The input image to the encoder and the output image from the decoder
    must be in interleaved pixel format.
 5) COM and APPn markers are allowed to occur before the SOI and before the
    EOI markers as well as between the SOI and SOS markers except that the
    Adobe APPE marker must occur after SOI.
 6) DRI could occur before SOI, but SOI resets the resync parameters, so
    it is useless there.
 7) The DCTDecode filter terminates after an EOI marker.

 It is probably better to tolerate the following bugs except when `Picky=1':
 1) 16-bit quantizers (in old Radius; illegal in JPEG except for 12-bit
    input samples).
 2) Extra RST marker just before EOI (was in Tom Lane's impl.)
 3) RST0 marker occurring immediately after SOS (in 5/24/91 Radius code).

 Frame, quantizer, and scan parameters are organized as follows:
   1) Frame params are put in dg->jpframe[frameCompNum] during an SOFn marker.
   2) Quantization tables are loaded into dg->qtizers[j] arrays during the DQT
      marker.  Each quant. table has an ID in [0..3].  Pairing of frame
      parameters to these ID's can be arbitrary.
   3) Scan parameters are collected into the dg->jpscan[scanCompNum] struct
      during the SOS marker.  The jpframe[frameCompNum] struct is searched to
      find a frameComponentId matching scanComponentId, and jpscan->jpframe is
      set to point at it.  (Since JPEG requires scan comp. order to be the
      same as frame comp. order, the code below begins the search for the next
      frame comp. with the comp. after the last scan comp.'s frame comp.)
   4) After the return from DCTDGetScanParms for the first SOS marker,
      DCTAllocate is called.  (Storage allocated does not depend on any scan
      params, but the allocation must occur after the DQT marker, which might
      come after the SOFn marker, so the allocation cannot be done earlier.)
      DCTAllocate allocates some large blocks of storage for the color strip,
      scan line buffer and scaled quant. tables (pointed at by
      dg->equant[qtnum]).  For prog. JPEG, it allocates the coef. raster for
      each color; allocation does not change between scans.
   5) The QTableNum for each scan component is found through the jpframe
      pointer; the index of the quant. table was a frame parameter.
   6) DCTDScaleQT is called to scale the quantizers the way the reverse DCT
      wants them and put the result into dg->equant[qtnum].
 --------------------------------------------------------------------------*/
private void DCTDDecodeImage()
throws PDFFilterFormatException
{ int jj, origmCode, mCode;
  boolean reportMarkerError;
  int segSize = 0;
//#if UserMarkers
//  ASJPEGMarker markerHdr;
//  ASUns8P markerBody;
//#endif

  for (;;)
  {
    reportMarkerError = true;      /* 1 if segSize should be 0 after switch */
    origmCode = mCode = DCTDMarkerSGet();
    if ((mCode <= DCTTables.APPF_MARKER) && (mCode >= DCTTables.APP0_MARKER))
      mCode = DCTTables.APP0_MARKER;
    else if (mCode >= DCTTables.RST0_MARKER && mCode <= (DCTTables.RST0_MARKER + 7))
      mCode = DCTTables.RST0_MARKER;
    switch(mCode)
    {
      case DCTTables.SOI_MARKER:
        if (dg_state != 0)
          DCTError.err(DCTError.dctdmain30, "Second SOI or SOFn JPEG marker is disallowed",
                    DCTError.filtSrcRangeErr);

        dg_state = 1;
        /* Reset the restart interval. */
        dg_resync_count = 1;
        dg_resync = dg_resyncmod8 = 0;
        segSize = 0;
        break;

      case DCTTables.SOF0_MARKER:
      case DCTTables.SOF1_MARKER:
      case DCTTables.SOF2_MARKER: /* Progressive */
      if (mCode == DCTTables.SOF0_MARKER)
      {
        dg_encoding = DCTTables.ASJPEGBaseline;
      }
      else if (mCode == DCTTables.SOF1_MARKER)
      {
        dg_encoding = DCTTables.ASJPEGSequential;
        dg_relax = true;
      }
      else /* if (mCode == DCTTables.SOF2_MARKER) */
      {
        dg_encoding = DCTTables.ASJPEGProgressive;
        dg_haveQCoefRaster = true;
        dg_relax = true;
      }

      if (dg_jpframe[0] != null)
        DCTError.err(DCTError.dctdmain30, "Second SOI or SOFn JPEG marker is disallowed",
                  DCTError.filtSrcRangeErr);

      segSize = DCTDGetFrameParms();
      DCTInitCommon(0);
      break;

      /* Adobe's reserved APPE marker, which begins with the ASCII text `Adobe'
       and is >= 14 chars. long, is digested by the DCTDAdobeMarker procedure.
       Pass every other COM and APPn marker to the dg->decodeUserMarker
       callback procedure; if it returns 1, put the marker at the tail of the
       dg->markers chain, else deallocate it.
       */
      case DCTTables.COM_MARKER:
      case DCTTables.APP0_MARKER:
        { int c0 = 0;
          int c1 = 0;
          segSize = DCTDSGet16() - 2;
          reportMarkerError = false;  /* Skip bytes in segment */
          jj = 0;

          if ((origmCode == DCTTables.APPE_MARKER) && segSize >= 12)
          { /* Verify that "Adobe" appears big-endian at the beginning of the
             marker.  Otherwise, assume a different application.
             */
            segSize -= 5;
            c0 = DCTDSGet32();
            c1 = DCTDSGet8();
            if ((c0 == (65*16777216 + 100*65536 + 111*256 + 98)) && (c1 == 101))
            { segSize = DCTDAdobeMarker(segSize);
              break;
            }
            /* Not an Adobe marker--fall through */
            jj = 5;
          } /* if ((origmCode == APPE_MARKER...)) */
//#if UserMarkers
//          /* Not APPE or too small for `Adobe' marker */
//          if (dg->decodeUserMarker != NULL)
//          { ASJPEGMarker prevMarkerHdr = dg->markers;
//            ASJPEGMarker *prevMarkerHdrPtr;
//            if (prevMarkerHdr == NULL)
//            { prevMarkerHdrPtr = &dg->markers;
//            } else /* Put marker at end of chain */
//            { while (prevMarkerHdr->nextMarker != NULL)
//              { prevMarkerHdr = prevMarkerHdr->nextMarker;
//              }
//              prevMarkerHdrPtr = &(prevMarkerHdr->nextMarker);
//            }
//            segSize += jj;
//            markerHdr = (ASJPEGMarker)(AllocSmall(
//                segSize + sizeof(ASJPEGMarkerRec) + 4));
//            if (markerHdr == NULL)
//            { DCTAllocFail(dg, (ASInt32)(segSize + sizeof(ASJPEGMarkerRec) + 4));
//            }
//            /* To avoid a potential leak, storage must be chained here before
//             calling DCTDSGet8 below.
//             */
//            markerHdr->nextMarker = NULL;
//            markerHdr->markerLen = segSize + 4;
//            *prevMarkerHdrPtr = markerHdr;
//
//            /* Reconstruct/copy the marker into the allocated storage.
//             For the converter, retained markers will be handed off to
//             the encoder later, so preserve all bytes.
//             */
//            markerBody = (ASUns8P)(&markerHdr[1]);
//            markerBody[0] = FIL_MARKER;
//            markerBody[1] = (ASUns8)origmCode;
//            markerBody[2] = (ASUns8)((segSize + 2) >> 8);
//            markerBody[3] = (ASUns8)((segSize + 2) & 0xFF);
//            if (jj > 0)
//            { markerBody[4] = (ASUns8)(c0 >> 24);
//              markerBody[5] = (ASUns8)((c0 >> 16) & 0xFF);
//              markerBody[6] = (ASUns8)((c0 >> 8) & 0xFF);
//              markerBody[7] = (ASUns8)(c0 & 0xFF);
//              markerBody[8] = (ASUns8)c1;
//            }
//            DCTDSFRead(dg, &markerBody[jj + 4], segSize - jj);
//            if (! (*dg->decodeUserMarker)((char *)(&markerBody[4]),
//                                          (ASUns32)origmCode,
//                                          (ASUns32)segSize))
//            { FreeSmall(markerHdr);
//              *prevMarkerHdrPtr = NULL;
//            }
//            segSize = 0;
//          } /* if (dg->decodeUserMarker != NULL) */
//#endif /* UserMarkers */
	}
        break;

      case DCTTables.DHT_MARKER:
        if (dg_state == 0)
            DCTDNoSOI();
        segSize = DCTDGetDHT();
        break;

      case DCTTables.DQT_MARKER:
        if (dg_encoding == DCTTables.ASJPEGProgressive && dg_scansCompleted > 0)
        { DCTError.err(DCTError.dctdmain21, "JPEG DQT marker is invalid after first scan",
                    DCTError.filtSrcRangeErr);
        }

        if (dg_state == 0)
            DCTDNoSOI();
        segSize = DCTDGetDQT();
        break;

      case DCTTables.SOS_MARKER:
        if (dg_state == 0)
            DCTDNoSOI();
        if ((dg_framecn == 0) || (dg_frameqn == 0) || (dg_framehn == 0))
        { DCTError.err(DCTError.dctdmain22, "JPEG SOFn DQT or DHT marker missing before SOS marker",
                    DCTError.filtSrcRangeErr);
        }
        segSize = DCTDGetScanParms();
        if (segSize != 0)
          break;
        if (dg_scansCompleted == 0)
        {
          /* Allocate and initialize the non-gvl storage. */
          DCTAllocate(dg_raster_size_out);
          for (jj = 0; jj < dg_qTableCount; jj++)
          { DCTDScaleQT(dg_equant[jj], jj);
          }
          if (dg_colorconv != 0)
          {
            DCTDInitCTBig(); /* Initialize DCTDecode color transform tables */
          }

        } /* if (dg->scansCompleted = 0) */
        /* After doing the scan, maybe for a subset of the components,
         reinitialize scan variables for output.  Currently, only
         interleaved output is implemented, so set dg->cn == dg->framecn.
         */
        if (dg_haveQCoefRaster)
        { DCTInitForScan();
          DCTDDecodeScanToRaster();
          dg_scansCompleted++;
          dg_cn = dg_framecn;
        }

        DCTInitForScan();
        dg_out_lines_processed = 0;
        dg_nblocks = 0;
        DCTResetDC();
        dg_resync_count = dg_resync + 1; /* Reset RSTn marker sequence */
        dg_state = 4;                     /* indicate SOS marker seen */
        return;


      case 0x100: /* Source end or error when 0xFF followed by marker byte
                   was expected. */
        if (dg_state == 0)
          DCTDNoSOI();
        else if (dg_picky)
          DCTDReportEOF();
      case DCTTables.EOI_MARKER:
        /* The EOI marker should cause filter EOD.  Closing the input
         bitstream is inappropriate for a PostScript filter; and it is
         not useful for the standalone program because os_stdin is used. */
        if (dg_state == 0)
            DCTDNoSOI();

        dg_eoiDecoded = true;
        if (dg_scansCompleted <= dg_scansInImage)
        { 
        	 dg_state = 2;  /* indicate no more output to process */
        	 
        } /*else
        {
        	/* Must have called DCTDNextScan before completing output of a
            reconstructed image based upon the previous scan.  Do not
            change dg->state, dg->nblocks, or dg->out_lines_processed
            here; just return to resume image reconstruction.
            }*/

        return;

      case DCTTables.DRI_MARKER:
        segSize = DCTDGetDRI();
        break;

      case DCTTables.SOF3_MARKER: /* Spatial (sequential) lossless */
        DCTError.err(DCTError.dctdmain23, "Spatial/lossless JPEG disallowed",
                  DCTError.filtUnimpErr);

      case DCTTables.RST0_MARKER: /* RST0 to RST7 */
        /* RSTn markers are only valid in the Huff-coded part of a compressed
         image, not at the beginning or end of the Huff-coded part.  Control
         would come here if a RSTn marker erroneously occurred immediately
         before EOI/SOS or during a compressed image header before the first
         scan.  If !dg->picky, tolerate the marker; otherwise, give an error.
         */
        if (dg_picky)
        { DCTDInvalidMarker(origmCode);
	}
        dg_resyncmod8 = (origmCode + 1) & 0x7;
        break;

      /* case VER_MARKER: */
      /* case DTI_MARKER: */
      /* case DTF_MARKER: */
      /* case SRF_MARKER: */
      /* case SRS_MARKER: */
      /* case DCR_MARKER: */
      /* case DQS_MARKER: */
      /* case DNL_MARKER: */
      default:
        if ((origmCode >= DCTTables.SOF9_MARKER) && (origmCode <= DCTTables.SOFF_MARKER))
        { DCTError.err(DCTError.dctdmain24, "Arithmetic-coded JPEG disallowed",
                    DCTError.filtUnimpErr);
        } else if ((origmCode >= DCTTables.SOF5_MARKER) && (origmCode <= DCTTables.SOF7_MARKER))
        { DCTError.err(DCTError.dctdmain25, "Differential JPEG disallowed",
                    DCTError.filtUnimpErr);
        }
        else if ((origmCode >= 0xF0) && (origmCode <= 0xFD))
	{ DCTError.err(DCTError.dctdmain26, "JPEG extension marker "+origmCode+" disallowed",
                    DCTError.filtUnimpErr);
        } else
        { DCTError.err(DCTError.dctdmain27, "Unknown or invalid JPEG marker = "+origmCode,
                    DCTError.filtSrcRangeErr);
        }
    } /* switch (mCode) */

    /* The intention here is to report an irr. error if segSize < 0,
     to continue interpreting markers if segSize == 0, and to skip segSize
     unread bytes of the last marker if reportMarkerError==0 or dg->picky==0.
     */
    if (segSize == 0)
      continue;
    if (segSize < 0)
      DCTError.err(DCTError.dctdmain28, "JPEG "+origmCode+" marker too short",
                DCTError.filtSrcRangeErr);

    if ((!reportMarkerError) || (!dg_picky))
    {
      /* Debugging crock
      DCTDebugOut(dg->debugOutArg, "segSize = %ld\n", segSize);
       */
      while (segSize-- > 0)
        DCTDSGet8();
      continue;
    }
    DCTError.err(DCTError.dctdmain29, "JPEG "+origmCode+" marker too long",
              DCTError.filtSrcRangeErr);
  }
} /* DCTDDecodeImage */


/*--------------------------------------------------------------------------
 DCTDNextScan is used to sequence through scans of a compressed image.

 If called before processing any part of the compressed image, DCTDNextScan
 processes initial markers through the first SOS, at which point, dg->state
 == 4; if this happens without error DCTDNextScan returns 0, else 1.
 DCTDNextScan can be used this way regardless of whether the QuantCoefRaster
 compile conditional is true or false, and regardless of what kind of JPEG
 image is being decoded.  Iff there is a coefficient raster, this initial
 decoding will process the first scan's Huffman-coded data as well as the
 initial markers in the compressed image.

 After starting to decompress an image, if dg->haveQCoefRaster == 0 (or
 the QuantCoefRaster compile conditional is false), any call to DCTDNextScan
 will return 1 (nothing changed) and do nothing.  (The caller should not be
 trying to advance scans or reset to the beginning of the image except when
 the coefficient raster is being used, but the extra calls are harmless.)

 On a multi-scan image (QuantCoefRaster compile conditional true and
 dg->haveQCoefRaster == 1), DCTDNextScan is also used to sequence through
 scans.  Each subsequent call will attempt to decode another scan; if another
 scan is decoded, the output scan line number is reset to 0, the prev. DC
 values are reset, and 0 is returned; otherwise, the state of the output
 is unchanged and 1 is returned.

 The output ASStm position is always somewhere in the last scan decoded; this
 is a consequence of the fact that the output ASStm position is reset when a
 new scan is decoded (0 returned by DCTDNextScan) and not reset when a new
 scan is not decoded (1 returned by DCTDNextScan).  dg->scansCompleted contains
 the number of the last scan decoded; it is incremented at the completion of
 decoding a scan.  dg->scansInImage contains the number of the last scan for
 which coefficients were used to completely reconstruct an image on the
 output ASStm.  When the previous scan was the final scan in the compressed
 image, then DCTDNextScan will decode an EOI marker; but the output ASStm
 position is not reset and 1 is returned when this happens.  dg->scansInImage
 is set equal to dg->scansCompleted when a decoding scan is completed.

 NOTE: DCTDNextScan should not be called after starting to output the final
 scan line in the image until that scan line is completely output because
 dg->scansInImage is set equal to dg->scansCompleted at the beginning of the
 final scan line, but this does not become correct until those pixels are
 actually consumed by the client.  (If such a premature DCTDNextScan call
 happened during the output of the final scan line after the final scan in
 the compressed image, then output of some pixels on the last scan line would
 never occur.)

 NOTE: For ASStm variations, the caller must reset the stm->cnt field and set
 the output image position to the beginning of the image after DCTDNextScan
 returns 0 and should not do either of these things when DCTDNextScan returns 1.

 After calling DCTDNextScan, the caller can determine compressed image
 characteristics from dg->scansCompleted, dg->scansInImage, dg->encoding,
 dg->lx, dg->ly, and dg->framecn.
 --------------------------------------------------------------------------*/
public int DCTDNextScan()
throws Exception
{
  switch(dg_state)
  {
  case 4:       /* In the middle of a scan somewhere */
  case 5:
    if (!dg_eoiDecoded && dg_haveQCoefRaster)
        break;
    else
        return (1);   /* The standalone decoder stops with this. */

  case 0:       /* First call */
    DCTDInitSGet(); /* In dctdhuff.c */
        break;

  case 1:       /* Between scans or after the last scan */
    if (!dg_haveQCoefRaster)
        return (1);   /* The standalone decoder stops with this. */
    break;

  case 2:         /* EOI marker has been seen and a reconstruction based upon
                     the final compressed image scan has been output.
                     There is no reason for the caller to do more. */
  case 3:         /* bitstream at AS_EOF */
    DCTFree();  /* A noop if no storage allocated yet. */
    return (1);   /* The standalone decoder stops with this. */
  } /* switch (dg->state) */

  DCTDDecodeImage();
  /* dg->state could be 2(AS_EOF), 3(error), or 4(SOS decoded) here. */
  if (dg_state >= 4 && !dg_eoiDecoded)
  { return (0);   /* Must have decoded an SOS (Start Of Scan) marker */
  } else
  { return (1);   /* EOI marker was decoded or error */
  }

} /* DCTDNextScan */


/*--------------------------------------------------------------------------
 DCTDFilBuf tries to fill the dg->ncolorline buffer.  It returns 0 if
 successful or AS_EOF if the buffer cannot be filled (i.e., AS_EOF or error).
 dg->state, dg->stripmv, and dg->stripv are decoder state variables.
 There is no provision for returning any part of the final strip at the
 end of a truncated image; premature AS_EOF discards this strip.

 The lowest-level bitstream reading code sets dg->state==3 when a premature
 AS_EOF or error occurs.  This error condition can only happen during decoding
 of markers by DCTDDecodeImage or of a strip by DCTDDecodeStrip.

 The normal sequence of states is as follows:

 1) A PSL2 client creates the filter with a call to DCTDecodeOpen, which
 sets dg->state == 0.  The kind of JPEG encoding at this point is unknown.

 2) The client will next call DCTDNextScan or DCTDFilBuf.  In dg->state == 0,
 the filter will then process the SOI marker and set dg->state == 1.  It will
 then process other markers in the compressed image until an SOS marker is
 decoded.  If building a quantized coefficient raster (e.g., progressive JPEG
 or sequential JPEG with block smoothing), it will decode the scan and fill
 the jpframe->qcoef raster for each color with the quant. coef. values
 resulting from the first scan.  After this for progressive, or immediately
 for sequential, it will set dg->state == 4 and return.

 3) The first attempt to read from the filter, which may occur either
 immediately after creating the filter or after one or more calls to
 DCTDNextScan and/or DCTDReset, finds the input buffer empty and calls
 DCTDFilBuf; if dg->state is 0, this call will do the processing
 described in (2) above, which winds up with dg->state == 4.

 With no quant. coef. raster:

   4a) In dg->state == 4, decode, dequantize, and reverse DCT the first
   strip MCU by MCU into the color strip buffers.  Change dg->state to 5
   after the color strip buffers are filled, and reverse color transform the
   first scanline in the strip into the dg->ncolorline buffer.

   5a) In dg->state == 5, reverse color transform the next scan line of the
   strip into the dg->ncolorline buffer.  Subsequent calls to DCTDFilBuf are
   in state 5 for all but 1 scan line in a strip, and are in state 4 when
   another strip must be decoded.  On the call which delivers the final
   scan line in the scan, dg->state is set to 1 again.

   6a) On the call after that, the EOI marker is processed and DCTFree is
   called to release the big pieces of storage owned by the filter.

 With a quant. coef. raster:

   4b) For progressive, one or more calls to DCTDNextScan may occur; each call
   made prior to decoding of the final scan in the compressed image will reset
   the decoding raster position to the beginning of the image, set
   dg->state == 1, process data up to the next SOS marker, and decode a scan
   into the jpframe->qcoef rasters; i.e., it will update the approx. to the
   quant. coef. values that are currently in the raster.  Then it will set
   dg->state to 4.

   5b) Like sequential JPEG (4a) above except that the qcoefBase values of
   the quant. coef. are used and no decoding occurs.

   6b) Same as sequential JPEG (5a) above.

   7b) When DCTDFilBuf is called after the final scan line in the compressed
   image, it resets the image position to the beginning except after the
   last scan, when it expects to process and EOI marker.

 FilterStmFilBufFinish is the procedure which assures that an extra call
 will be made, so that step (6) really happens.  At filter creation,
 fd.charBeyond is initialized to AS_EOF by BasicFilterStmCreate.  On the first
 call to DCTDFilBuf, FilterStmFilBufFinish squirrels the final buffer char.
 away in fd.charBeyond, so the consumer of the DCTDecode filter sees 1
 char. less than the first scanline; subsequent calls to DCTDFilBuf deliver
 the final char. of the preceding scanline plus all but one char. of the next
 scanline until the final scanline is delivered.  When DCTDFilBuf is called
 to get the final char. of the final scanline, then it goes through
 step (4) above.

 A problem is that a buffer to hold the ASungetc() of fd.charBeyond must
 exist after the dg->ncolorline buffer has been deallocated.  Currently,
 the crock of using &dg->unused for this is implemented; the previous
 item in the gvl struct before unused is the int bs_byte, which is ok to
 smash because it is empty after delivering the EOI marker.
 ---------------------------------------------------------------------------*/

private boolean DCTDFilBuf()
throws IOException
{ switch(dg_state)
  {
  case 0:       /* First call */
    DCTDInitSGet(); /* In dctdhuff.c */

  case 1:       /* Between scans or after the last scan */
    DCTDDecodeImage();
    /* dg->state could be 2(AS_EOF), 3(error), or 4(SOS decoded) here. */

  case 4:       /* Process next strip (at least 1 in every scan) */
    if (dg_state == 4)
      DCTDDecodeStrip();      /* In dctdcore.c */
  case 2:               /* EOI marker has been processed */
  case 3:               /* bitstream at AS_EOF */
    if (dg_state < 4)
    { DCTFree();      /* A noop if no storage allocated yet. */
      return (true);       /* The standalone decoder stops with this. */
    }
/*** Worry about DNL markers here */
    dg_stripv = dg_stripmv = 0;
    dg_state = 5;
  case 5:       /* Process next scanline (at least 1 in every strip) */
    /* Build scanline in dg->ncolorline. */

    DCTPutNCLine(dg_stripv, dg_stripmv);
    if ((dg_stripmv += 1) >= dg_maxv)
    { dg_stripmv = 0;
      if ((dg_stripv += 1) >= 8)
        dg_state = 4;
    }
    if ((dg_out_lines_processed += 1) >= dg_ly)
    {
      /* Update dg->scansCompleted, if there is no coefficient raster; it
       contains the number of scans decoded.  Then update dg->scansInImage,
       the number of the scan for which a reconstructed image has been output.

       ***PROBLEM: dg->scansInImage is updated here when dg->ncolorline is
       filled, BEFORE the final scan line has been output; but
       dg->scansInImage should be changed AFTER the last pixel in
       dg->ncolorline is output.  As a consequence, the caller, after starting
       to output the last scan line, must not execute DCTDNextScan before
       fully outputting it.  However, this code cannot be moved to case 1 above
       because then dg->scansInImage will be wrong if DCTDNextScan is called
       after processing the last pixel in the last scan line, which is the
       more normal place to call it.
       */
      if (!dg_haveQCoefRaster)
        dg_scansCompleted++;
      dg_scansInImage = dg_scansCompleted;
      dg_state = (dg_eoiDecoded) ? 2 : 1;
    }
    break;
  } /* switch (dg->state) */
  return(false);
} /* DCTDFilBuf */


//dcdhuff.c

/*--------------------------------------------------------------------------
The spec for the bitstream is a big-endian spec with the most significant byte
of multi-byte quantities occurring first and the root of a Huffman code placed
toward the msb (most significant bit) of the byte.

Annex B in JPEG 9-R7 or JPEG 8-R8 Sec. 6.2.1 defines the JPEG markers.  A
marker is any non-zero 8-bit value preceded by a byte-aligned 0xFF prefix.
Some marker codes are followed by an ASUns16 length (where the length includes
the two bytes specifying the length but does not include the 0xFF prefix or
the marker byte itself).  The Baseline decoder requires that a frame begin with
SOF0; the implemented extensions use SOF1; SOF2-SOF15, DHP, EXP, JPG, JPGn,
TEM, and RES markers are illegal and cause PostScript errors.  The Baseline
decoder skips bytes in APPn markers and handles RSTm markers.  APPn markers may
be inserted just before SOF or SOS markers.

Two sets of routines read the bitstream: (DCTDHuffSGet, DCTDmdusgetv, and
DCTDMCUPreSGet) read Huffman codes while processing an MCU; and (DCTDSGet8,
DCTDSGet16, and DCTDMarkerSGet) read non-MCU data.  The distinction is that
embedded marker codes may not occur during an MCU; at other times, the program
is either reading the next marker code, in which case any residual bits in the
last byte are skipped (and any other data up to the next marker prefix); or it
is reading marker data arguments where the 0xFF code is natural, not a marker
code.

DCTDHuffSGet interprets Huffman codes; and DCTDmdusgetv fetches the bit vector
following each Huffman code.  The normal byte fetch is open-coded in these
routines and DCTDMCUPreSGet handles any 0xFF marker prefixes.  If
ProcedureDecode=0, then the DCTDHuffSGet and DCTDmdusgetv procedures are not
compiled; in this case, their equivalent is open-coded in dctdcore.c.  (These
routines were responsible for some slowness of the DCT decoder since they were
called about 7 times/8x8 block.)

For Huffman decoding, the spec requires that a 0xFF byte (or FIL_MARKER) in
the bitstream be followed by a 0 byte; any other following value would be
interpreted as a marker code, all of which are illegal (except that an
indefinitely long sequence of 0xFF bytes is treated as a single such byte).

The only marker code which may occur unexpectedly is DNL, used at the end of a
scan to terminate it.  DNL may occur only at the end of an MCU strip to adjust
image length to retain between 0 (1?) and 8*maxv lines of the final strip; DNL
cannot be used to shorten the image to some point earlier than the
beginning of the final strip, nor can it be issued until the final strip is
complete.  This implementation does not support DNL.

Between MCUs, all other markers are illegal except RSTm markers.  If restart
was enabled by a DRI marker (prior to the SOS marker) which specified a
non-zero MCU count, then a RSTm marker code is placed between the MCUs at the
specified restart interval; since the restart interval is specified in MCUs,
the RSTm markers occur only between Huffman-coded parts of the compressed
bitstream, not during them.  The intent of JPEG is that RSTm permits image
editing on a smaller scale than strips; PostScript decodes RSTm markers.

The code here keeps the next byte in the bitstream in dg->byteAhead,
so that it can look ahead one byte after a 0xFF marker prefix.

NOTE: One performance hack attempted put 0xFF at the end of the good data in
the bs_fh stream buffer and attempted to coalesce several end checks into one.
This does not work inside PS because input might be coming from a string, in
which case writing the byte after the last good byte would smash the string and
might cause a memory fault.
--------------------------------------------------------------------------*/

/*-------------------------------------------------------------------------
 Problems with the decoded data cause these errors.
 -------------------------------------------------------------------------*/
private void DCTDInvalidMarker(int marker)
throws PDFFilterFormatException
{ if ((marker >= DCTTables.RST0_MARKER) && (marker < (DCTTables.RST0_MARKER + 0x8)))
  { DCTError.err(DCTError.dctdhuff1,
        "Invalid JPEG RST"+(marker & 7)+" marker in scan "+dg_scansCompleted+" block "+dg_nblocks,
        DCTError.filtSrcRangeErr);
  } else
  { DCTError.err(DCTError.dctdhuff2,
        "Invalid JPEG marker "+marker+" in scan "+dg_scansCompleted+" 8x8 block "+dg_nblocks,
        DCTError.filtSrcRangeErr);
  }
} /* DCTDInvalidMarker */


private void DCTDBadHuff()
throws PDFFilterFormatException
{
  DCTError.err(DCTError.dctdhuff3,
    "Huffcode too big in scan "+dg_scansCompleted+" 8x8 block "+dg_nblocks,
    DCTError.filtSrcRangeErr);
} /* DCTDBadHuff */


/*-------------------------------------------------------------------------
 Procedure to report AS_EOF or error during GETC or FREAD for a marker.
 ***It would be better to distinguish AS_EOF from error here.
 -------------------------------------------------------------------------*/
private void DCTDReportEOF()
throws PDFFilterFormatException
{
  DCTError.err(DCTError.dctdhuff4,
    "Source error or end in scan "+dg_scansCompleted+" 8x8 block "+dg_nblocks,
    DCTError.filtSrcEndErr);
} /* DCTDReportEOF */


/*-------------------------------------------------------------------------
 DCTDTruncateError is called when dg->byteAhead holds 0xFF, the following
 byte is non-zero, and the number of preceding undecoded bits is
 insufficient to complete a code word.
 -------------------------------------------------------------------------*/
private void DCTDTruncateError()
throws PDFFilterFormatException
{
  int byteAhead = FGETC();
  if (byteAhead > 0)
  { DCTDInvalidMarker(byteAhead);
  }
  DCTDReportEOF();
} /* DCTDTruncateError */


/*-------------------------------------------------------------------------
 Error subroutine for DCTDMarkerSGet.
 -------------------------------------------------------------------------*/
private void DCTDNoMarker()
throws PDFFilterFormatException
{ DCTError.err(DCTError.dctdhuff5, "Missing marker before scan "+dg_scansCompleted+" 8x8 block "+dg_nblocks,
            DCTError.filtSrcRangeErr);
} /* DCTDNoMarker */


/*-------------------------------------------------------------------------
 Subroutine used by DCTDHuffSGet and DCTDmdusgetv when the next bitstream
 byte is any value outside [0..0xFE].; only 0xFF and AS_EOF are legal
 possibilities for FASTGETC in this case.  The code assumes that the byte
 is 0xFF; if it is really AS_EOF, then the FGETC call will return the same
 result one byte later.  The code skips any sequence of following 0xFF bytes
 and returns 0, iff the next non-0xFF byte is a 0.

 If the byte following the (sequence of) 0xFF byte(s) is not 0, then it
 is a marker code that is UNGETC'ed; in this case, it is impossible to
 refill any more bytes; give an error if unused < nbits; otherwise, return
 non-zero to the caller.  The decoder will then continue decoding any bits
 already buffered.  If it runs out before asking for the next marker, an
 error will occur.  If additional attempts to read more bits from the source
 occur, then the 0xFF byte in dg->byteAhead will be reread, and this
 procedure will again be called to reread the UNGETC'ed byte.

 The occurrence of any real (i.e., non-0) marker during the Huffman decoding of
 a strip is illegal; markers must occur either before the first scan, between
 scans, or between MCUs.  The marker prefix 0xFF (or FIL_MARKER) occurs
 naturally about once every 300 bytes during decoding; this would occur less
 frequently if attention were paid to the Huffman coding.  This procedure is
 supposed to work correctly on either big-endian or little-endian CPUs.

 ***On 5/21/91, Radius's JPEG issued the first RST marker at the beginning
 of the scan rather than after the specified no. MCUs.  By not detecting
 the error here, those images would print.  However, this procedure would
 then have to be passed a value and return a value and would become complex.
 The Blend=16384 krock is an alternative fix for this.
 -------------------------------------------------------------------------*/
private int DCTDMCUPreSGet(int unusedminusnbits)
throws PDFFilterFormatException
{
  for (;;)
  { int byteAhead;
    switch (byteAhead = FGETC())
    {
    case 0: /* 0xFF marker prefix is literal (the common case). */
      /* The caller will refill dg->byteAhead. */
      return (0);

    case 0xFF:
      continue;

    default:
      if (unusedminusnbits < 0)
        DCTDTruncateError();  /* Error; does not return. */
      /* Return non-0, indicating `marker ahead'. */
      return(UNGETC(byteAhead));
    }
  } /* for (;;) */
} /* DCTDMCUPreSGet */


/*-------------------------------------------------------------------------
 DCTDSGet8 is called to read a marker segment byte from the bitstream and
 return it to the caller; marker prefixes are ignored--the bitstream is read
 literally.  dg->unused == 0 always at the call.  It is supposed to work
 correctly on either big-endian or little-endian CPUs.
 -------------------------------------------------------------------------*/
private int DCTDSGet8()
throws PDFFilterFormatException
{ int bs_byte;

  bs_byte = dg_byteAhead;
  dg_byteAhead = FGETC();
  if (bs_byte < 0)
  { DCTDReportEOF();
  }
  return(bs_byte);
} /* DCTDSGet8 */


/*-------------------------------------------------------------------------
 Procedure to ASfread a marker string and report any error.
 -------------------------------------------------------------------------*/
private byte[] DCTDSFRead(int len)
throws PDFFilterFormatException
{ byte inStr[] = new byte[len];
    inStr[0] = (byte)dg_byteAhead; /* Consume the buffered byte */
    for (int i = 1; i < len; i++)
    {
        int cb = FGETC();
        if (cb < 0) {
            DCTDReportEOF();
            break;
        }
        else
        	inStr[i] = (byte)cb;
    }
  DCTDSGet8(); /* Reload dg->byteAhead */

  return inStr;
} 

/* DCTDSFReadShort */


private short[] DCTDSFReadShort(int len)
throws PDFFilterFormatException
{ short inStr[] = new short[len];
    inStr[0] = (short)dg_byteAhead; /* Consume the buffered byte */
    for (int i = 1; i < len; i++)
    {
        int cb = FGETC();
        if (cb < 0) {
            DCTDReportEOF();
            break;
        }
        else
        {
        	inStr[i] = (short)cb;
        }
    }
  DCTDSGet8(); /* Reload dg->byteAhead */

  return inStr;
} 

/*-------------------------------------------------------------------------
 DCTDSGet16 reads two marker segment bytes from the bitstream and returns
 an ASUns16 to the caller.  See DCTDSGet8 comments.
 -------------------------------------------------------------------------*/
private int DCTDSGet16()
throws PDFFilterFormatException
{ int bs_byte = DCTDSGet8();
  return ((bs_byte << 8) + DCTDSGet8());
} /* DCTDSGet16 */



/*-------------------------------------------------------------------------
 DCTDSGet32 reads 4 marker segment bytes from the bitstream and returns
 a ASUns32 to the caller.  See DCTDSGet8 comments.
 -------------------------------------------------------------------------*/
private int DCTDSGet32()
throws PDFFilterFormatException
{ int valHi = (DCTDSGet16()) << 16;
  return (valHi + DCTDSGet16());
} /* DCTDSGet32 */

/*-------------------------------------------------------------------------
 When DCTDMarkerSGet is called, several uninterpreted bits for the preceding
 Huffman-coded part of the last MCU might still in dg->bs_byte, but the next
 byte in the bitstream should be a marker prefix 0xFF (or FIL_MARKER).
 If dg->picky != 0, then give an error if there are 8 or more bits preceding a
 marker prefix; otherwise, skip ahead until a marker prefix is found.
 -------------------------------------------------------------------------*/
private int DCTDMarkerSGet()
throws PDFFilterFormatException
 {
	int bs_byte;
	// int orig_unused = dg_unused;

	int garb_bytes = dg_unused >> 3;
	dg_unused = 0;
	for (;;) {
		if ((dg_picky && garb_bytes > 0) || garb_bytes > 10000)
			DCTDNoMarker();
		bs_byte = DCTDSGet8Marker();
		if (bs_byte == 0xFF && dg_byteAhead != 0x00)
			break;
		else if (bs_byte == 0x100)
			return (bs_byte); /*
							 * For friendly non-error treatment of file
							 * missing the final EOI marker byte or the
							 * preceding 0xFF, pass EOF back to caller.
							 */
		garb_bytes++;

	}
	for (;;) {
		bs_byte = dg_byteAhead;
		if (bs_byte >= 0xFF) {
			if (bs_byte >= 0x100) {
				bs_byte = 0x100;
				return (bs_byte);
			}

			DCTDSGet8();
			continue;
		} else {
			if (bs_byte != DCTTables.EOI_MARKER) {
				DCTDSGet8();
			}
		}
		return (bs_byte);
	}
} /* DCTDMarkerSGet */


/*-------------------------------------------------------------------------
  DCTDSGet8 is called to read a marker segment byte from the bitstream and
  return it to the caller; marker prefixes are ignored--the bitstream is read
  literally.  dg->unused == 0 always at the call.  It is supposed to work
  correctly on either big-endian or little-endian CPUs.
 
  DCTDSGet8Marker is the same as DCTDSGet8 but returns 0x100 rather than
  giving a source end error if the input stream returns EOF.
  -------------------------------------------------------------------------*/
private int DCTDSGet8Marker()
throws PDFFilterFormatException
{ int bs_byte;

  bs_byte = dg_byteAhead;
  dg_byteAhead = FGETC();
  if (bs_byte < 0)
  { bs_byte = 0x100;
  }
  return(bs_byte);
} /* DCTDSGet8 */
		     
/*-------------------------------------------------------------------------
 Initialize input bitstream.
 -------------------------------------------------------------------------*/
private void DCTDInitSGet()
throws IOException
{ dg_unused = 0;
  dg_byteAhead = FGETC();
} /* DCTDInitSGet */


/*-------------------------------------------------------------------------
 Get the next Huffman code for the decoder.  At typical quantization, this
 procedure is called once for the DC codeword and 10 times for AC codewords
 per 8x8 block.  Examine the code bit-by-bit until it is <= the max. code of
 the length so far shifted in.  Then hufcode - mincode[nbits] + valptr[nbits]
 is the displacement into the huffval table to find the value for this
 Huffman code.  The tables were changed so that valptr[nbits] contains
 valptr[nbits] - mincode[nbits] to simplify the calculation.

 Initially, dg->bs_byte (a ASUns32) is refilled full enough
 (MAX_CODE_LEN = 16 bits) to decode a complete Huffman code with no refill
 checks in the loop.  2 bytes are refilled each time.

 It would be possible in a good source image to encounter AS_EOF 17 bits ahead
 of the next DCTDHuffSGet bit.  This would happen if the last codeword of
 the final 8x8 block was 1-bit long and occurred on the final bit of a byte.
 It would then be followed by a 0xFF marker prefix byte and an EOI_MARKER
 byte.  This code and DCTDMCUPreSGet treat AS_EOF like a marker; there is no
 explicit check for it; but if the unused bits preceding it are exhausted,
 then a coding error will occur.

 It is important that bad input data not crash PS here.  The `biggest' check
 is necessary to absolutely ensure that the valptr[x] reference will not be
 out-of-range.
 -------------------------------------------------------------------------*/
private static int WMASK[] = {0, 1, 3, 7, 0x0f, 0x1f, 0x3f, 0x7f,
                              0x0ff, 0x1ff, 0x3ff, 0x7ff,
                              0x0fff, 0x1fff, 0x3fff, 0x7fff,
                              0x0ffff, 0x1ffff, 0x3ffff, 0x7ffff,
                              0x0fffff, 0x1fffff, 0x3fffff, 0x7fffff,
                              0x0ffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff,
                              0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff,
                              0xffffffff };

private int VALOF(int nb)
{
  int xfval;

  nb = WMASK[nb];
  xfval = (dg_bs_byte >>> dg_unused) & nb;
  if (xfval <= (nb >>> 1))
    xfval -= nb;

  return xfval;
}

private int DCTDHuffSGet(DCTdec_code_struct acc)
throws PDFFilterFormatException
{
  int hufc;
  int maxcode[] = acc.maxcode;
  int MAX_CODE_LEN = DCTdec_code_struct.MAX_CODE_LEN;

DCTDSGetc1:
{
 DCTDSGetc0:
 {
  if (dg_unused < MAX_CODE_LEN) /* See comments above */
  { hufc = dg_byteAhead;
    if (0xFF == (hufc & 0xFF))
    { if (DCTDMCUPreSGet(dg_unused - 1) != 0)
      { hufc = (dg_bs_byte << (MAX_CODE_LEN - dg_unused)) & WMASK[MAX_CODE_LEN];
        break DCTDSGetc1; /* Real marker; cannot refill */
      }
    }
    dg_bs_byte = (dg_bs_byte << 8) + hufc;    /* Natural 0xFF byte. */
    {
      hufc = FGETC();

      if (0xFF == (hufc & 0xFF))
      { if (DCTDMCUPreSGet(dg_unused + 8 - 1) != 0)
        { /* Real marker or AS_EOF; cannot refill. */
          dg_byteAhead = hufc;
          if ((dg_unused += 8) < MAX_CODE_LEN)
          { hufc = (dg_bs_byte << (MAX_CODE_LEN - dg_unused)) & WMASK[MAX_CODE_LEN];
            break DCTDSGetc1; /* < 16 bits for decode */
          }
          else
            break DCTDSGetc0; /* >= 16 bits for decode */
        }
      }
      dg_bs_byte = (dg_bs_byte << 8) + hufc; /* Natural 0xFF byte. */
      dg_byteAhead = FGETC();
    }
    dg_unused += 16;
  } /* if (dg_unused < MAX_CODE_LEN) */

  /* hufc is in [0..65535] and maxcode[s] is in ([0..65535]<<n) -1, I think.
   Position unused bits in hufc[16..31].  Entries in the maxcode table are
   left-shifted n so that breakout from the conditional test sequence below
   occurs correctly.  Average no. code bits is probably about 3 at 20:1,
   maybe 6 at 4:1 compression.
   */
 } /* DCTDSGetc0 */
  hufc = (dg_bs_byte >>> (dg_unused - MAX_CODE_LEN)) & WMASK[MAX_CODE_LEN];

} /* DCTDSGetc1 */

  /* This code timed at 51.0 sec on `dctrgb bigbal -hy=2 -vy=2 -qfac=0.0'
   and at 15.4 sec on `dctrgb bigbal -hy=2 -vy=2' gcc Sun3 size = 1712 code
   bytes for ProcedureDecode=1.
   */
  int i = 0;
  if (hufc >= acc.biggest)
    DCTDBadHuff();    /* Does not return. */
  while (hufc > maxcode[i++]) { }
  dg_unused -= i;
  /*** if (unused < 0) DCTDBadHuff(dg); ***/
  hufc = hufc >>> (MAX_CODE_LEN - i);
  return(acc.huffval[acc.valptr[i - 1] + hufc] & 0xff);
} /* DCTDHuffSGet */



/*-------------------------------------------------------------------------
 In a Huffman-coded region, if dg->unused < nbits, fill dg->bs_byte with
 >= nbits.  The caller then will typically use the VALOF macro to extract a
 signed vector from dg->bs_byte; but different code is used for the 1-bit
 extensions of progressive JPEG and for the EOBn vector.
 -------------------------------------------------------------------------*/
private void DCTDFillHuffBits(int nbits)
throws PDFFilterFormatException
{ int work;

  dg_unused -= nbits;

  if (dg_unused < 0)
  { work = dg_byteAhead;
    if (0xff == (work & 0xFF))
    { /* An != 0 return is impossible because it would have terminated on an
         error in DCTDMCUPreSGet.
       */
      DCTDMCUPreSGet(dg_unused);
    }
    dg_bs_byte = (dg_bs_byte << 8) + work;    /* Natural 0xFF */
    {
      work = FGETC();
      if (0xFF == (work & 0xFF))
      { if (DCTDMCUPreSGet(dg_unused + 8) != 0)
        { dg_byteAhead = work; dg_unused += 8;
          return;    /* Real marker or AS_EOF ahead; no refill */
        }
      }
      dg_bs_byte = (dg_bs_byte << 8) + work;   /* Natural 0xFF */
      dg_byteAhead = FGETC();
    }
    dg_unused += 16;
  } /* if (unused < 0) */
} /* DCTDFillHuffBits */


/*-------------------------------------------------------------------------
 Build Huffman decoding structures.
 -------------------------------------------------------------------------*/
private void DCTDGetHuffDec(DCTdec_code_struct acc,
                            int[]   huffcode)
{ int p;
  int ss, ms;
  ms = 0; /* pacify compiler 'referenced before set' warning */
  for (p = ss = 0; ss < DCTdec_code_struct.MAX_CODE_LEN; ss++)
  { if (acc.huffbits[ss] != 0 )
    {
      acc.valptr[ss] = (p - huffcode[p]);
      p = p + acc.huffbits[ss];
      /* Pre-12/92: acc->huff.dec.maxcode[ss] = huffcode[p-1]; */
      acc.maxcode[ss] =
        ((huffcode[p-1] + 1) << (DCTdec_code_struct.MAX_CODE_LEN - ss -1)) - 1;
      ms = ss;
    }
    else
      acc.maxcode[ss] = -1;
  }
  /* If the last codeword ends with 111...1110 (i.e., if the Huffman code is
   dense and uses all allowed values), then incrementing maxcode[ms] makes it
   equal to 111...1111; then the decoder will not shift in extra bits beyond
   this value.  For pre-12/92 code: maxcode[ms] had to be set to 0x7FFFFFFF
   to force loop exit.  New 12/92 scheme checks biggest beforehand to ensure
   loop exit.
   */
  acc.biggest = acc.maxcode[ms] + 1;
  /* acc->huff.dec.maxcode[ms] = (ASInt32)0x7FFFFFFF; */
} /* DCTDGetHuffDec */


/*------------------------------------------------------------------------
 First, read the Huffman code specification from the bitstream checking for
 an impossible or unusable specification.  Then use DCTSizeCodes to setup
 arrays of Huffman codes and sizes and check for a valid Huffman spec.
 Next, read the values corresponding to the codes and check for unreasonable
 values.  Finally, create the &huffval[valptr - mincode][0] and maxcode arrays,
 where mincode and maxcode are the minimum and maximum codes within each
 code size, respectively, and valptr is the index into the huffval table for
 the first code of the size; and compute biggest.

 The maxcode array has to be arranged so that the DCTDHuffSGet procedure's
 `while' loop will be sure to exit.  The normal Huffman code is dense, and the
 largest legal codeword is 111...110 (i.e., all ones of some bitlength except
 the last bit).
 ------------------------------------------------------------------------*/
private void DCTDMakeHuffDec(DCTdec_code_struct acc, boolean ac)
throws PDFFilterFormatException
{ int nvals, i;
  int huffcode[] = new int[DCTdec_code_struct.AC_CODE_TBL_SIZE+1];
  int huffsize[] = new int[DCTdec_code_struct.AC_CODE_TBL_SIZE+1];

  acc.huffbits = DCTDSFReadShort(DCTdec_code_struct.MAX_CODE_LEN);
  /* Returns dg->lastp */
  DCTSizeCodes(acc.huffbits, huffsize, huffcode, ac);
  /* Read the values represented */
  nvals = dg_lastp;
  /* acc->huff.dec.huffval == acc->huff.dec.huffbits + MAX_CODE_LEN. */
  acc.huffval = DCTDSFRead(nvals);

  for (i = 0; i < nvals; i++)
  { int val = acc.huffval[i] & 0xff;
    if (ac)
    { int magcat = val & 0xF;
      int zrunlen = val >> 4;
      if (   (magcat >= 11)
          || (   (magcat == 0)
              && (zrunlen != 0)
              && (zrunlen != 0xF)
              && (dg_encoding != DCTTables.ASJPEGProgressive)))
      { DCTUnusableACCode(DCTError.filtSrcRangeErr, val);
        /* Does not return */
      }
    } else /* DC table */
    { if (val >= DCTdec_code_struct.DC_CODE_TBL_SIZE)
      { DCTUnusableDCCode(DCTError.filtSrcRangeErr, val);
       /* Does not return */
      }
    }
  }
  DCTDGetHuffDec(acc, huffcode);
} /* DCTDMakeHuffDec */


/* dctcmmn.c */
/* This module contains code and tables shared by the DCTEncode and
 DCTDecode filters.
 */

/*------------------------------------------------------------------------
 Shared error message procedures.
 -----------------------------------------------------------------------*/
//private void DCTAllocFail(int nBytes)
//throws PDFFilterFormatException
//{ /* NOTE: This error must have the name 'dctcmmn1' for code in dctdopen.c */
//  DCTError.err(DCTError.dctcmmn1,
//    "Cannot allocate "+nBytes+" bytes", DCTError.filtAllocErr);
//} /* DCTAllocFail */


private void DCTUnusableACCode(String err, int  huffCode)
throws PDFFilterFormatException
{
  DCTError.err(DCTError.dctcmmn2, "Unusable value "+huffCode+" in AC HuffTable",
            err);
} /* DCTUnusableACCode */


private void DCTUnusableDCCode(String err, int huffCode)
throws PDFFilterFormatException
{
  DCTError.err(DCTError.dctcmmn3, "Unusable value "+huffCode+" in DC HuffTable",
            err);
} /* DCTUnusableDCCode */


//private void DCTCaptionTooLong(String  err)
//throws PDFFilterFormatException
//{
//  DCTError.err(DCTError.dctcmmn4, "Caption > 32767-byte limit", err);
//} /* DCTCaptionTooLong */


/*------------------------------------------------------------------------
 DCTunzigzag[snake pos. i] = normal position i.  This array has 16 extra
 entries to avoid an error check in dctdcore.c for going beyond
 block end; the extra entries point at the 64th position of the
 block which will be harmlessly overwritten.
 ------------------------------------------------------------------------*/
private byte DCTunzigzag[] =
{ 0x00, 0x01, 0x08, 0x10, 0x09, 0x02, 0x03, 0x0A,
  0x11, 0x18, 0x20, 0x19, 0x12, 0x0B, 0x04, 0x05,
  0x0C, 0x13, 0x1A, 0x21, 0x28, 0x30, 0x29, 0x22,
  0x1B, 0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1C,
  0x23, 0x2A, 0x31, 0x38, 0x39, 0x32, 0x2B, 0x24,
  0x1D, 0x16, 0x0F, 0x17, 0x1E, 0x25, 0x2C, 0x33,
  0x3A, 0x3B, 0x34, 0x2D, 0x26, 0x1F, 0x27, 0x2E,
  0x35, 0x3C, 0x3D, 0x36, 0x2F, 0x37, 0x3E, 0x3F,
  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40
}; /* DCTunzigzag */

/*---------------------------------------------------------------------------
 DCTMin8BitQuant contains in zig-zag order, the minimum quantizer for
 each of the 64 quantizers at which quant. coef. max. range is in [-127..+127]
 allowing it to be stored in 8 bits.  This table is derived from DCTMaxCoef
 (commented out above) by solving:
   (2*MaxCoef + Q + 7) / (2*Q) = 127
 (where 7 is a safety factor against a poor implementation) which solves to:
   Q = (7 + (2 * MaxCoef)) / 253
 The safety factor does not affect the result anywhere.
 ---------------------------------------------------------------------------*/
private static final byte DCTMin8BitQuant[] =
{  9,
   8,  8,
   8,  7,  8,
   8,  7,  7,  8,
   9,  7,  7,  7,  9,
   8,  8,  7,  7,  8,  8,
   8,  7,  8,  7,  8,  7,  8,
   8,  7,  7,  8,  8,  7,  7,  8,
   7,  7,  7,  9,  7,  7,  7,
   7,  7,  8,  8,  7,  7,
   7,  8,  7,  8,  7,
   8,  7,  7,  8,
   7,  7,  7,
   7,  7,
   7
}; /* DCTMin8BitQuant */


/*---------------------------------------------------------------------------
 Compute for each quant. table whether or not the max. quant. coef. fits in
 8 bits.  Store the result in qFitsIn8[qTabNo].
 ---------------------------------------------------------------------------*/
private void DCTQFitsIn8 ()
{ int ii;
  for (ii = 0; ii < dg_qTableCount; ii++)
  { short qtizers[] = dg_qtizers[ii];
    boolean quantCoefFitsIn8 = true;
    int jj;
    for (jj = 0; jj < 64; jj++)
    { if (qtizers[jj] < DCTMin8BitQuant[jj])
      { quantCoefFitsIn8 = false;
        break;
      }
    }
    dg_qFitsIn8[ii] = quantCoefFitsIn8;
  }
} /* DCTQFitsIn8 */


/*---------------------------------------------------------------------------
 Initialize jpframe->vs quantized coefficient pointers for a color to point
 at consecutive strips of 8 rows.
 ---------------------------------------------------------------------------*/
private void DCTInitQCPtrs(int last_qcoef_ptr[],
                          DCTjframe   jpframe,
                          int         stripNo)
{ int coefPerMCU = (jpframe.coefx << 3) + (jpframe.coefx >>> 3);
  int ncoef = jpframe.vs * stripNo * coefPerMCU;
  int ii;
  for (ii = 0; ii < jpframe.vs; ii++)
  { last_qcoef_ptr[ii] = ncoef + ii * coefPerMCU;
  }
} /* DCTInitQCPtrs */


/*---------------------------------------------------------------------------
 This procedure is called once/color/strip from dctecore.c and dctdcore.c
 to initialize pointers to the quantized coefficient array and color strip.
 jpframe->qcoef points at the beginning of the quantized coefficient array,
 which consists of NumMCUS x NumBlocksPerMCU x 64 elements x 2 bytes/element for
 each 8x8 block; the elements are in zigzag order.

 last_cptr for each color c points at the 0th element of the 8x8 block; it is
 marched along the MCU first horizontally by 8 samples per block and then
 vertically by 8*padx_c (if there is vertical sampling).  extra_ptr identifies
 the starting point of extra blocks at the end of a strip; when a block with a
 small horizontal sampling value is padded to a multiple of 8, more highly
 sampled blocks may be padded to a multiple of 16, 24, or 32 to complete the
 MCU; this padding can create extra unused 8x8 blocks for the highly sampled
 component.  These are identified so that they can be coded as compactly as
 possible.  Only the encoder uses extra_ptr, but since this is a low frequency
 initialization, it does no harm to also initialize it for the decoder.
 ---------------------------------------------------------------------------*/
private void DCTInitStripPtrs(DCTjframe      jpframe,
                              int            stripNo)
{ DCTStripPtrRec stripPtrs[] = jpframe.stripPtrs;
  int coefPerStrip = (jpframe.coefx << 3) + (jpframe.coefx >> 3);
  stripPtrs[0].last_qcoef_ptr = (stripNo * jpframe.vs * coefPerStrip);
  { int padx8 = jpframe.padx << 3;     /* padx is in samples */
    int i;
    stripPtrs[0].last_ptr = 0;
    for (i = 1; i < jpframe.vs; i++)
    { stripPtrs[i].last_ptr = stripPtrs[i-1].last_ptr + padx8;
//#if QuantCoefRaster
      stripPtrs[i].last_qcoef_ptr =
           stripPtrs[i-1].last_qcoef_ptr + coefPerStrip;
//#endif
    }
  }
} /* DCTInitStripPtrs */


/*-----------------------------------------------------------------------
 Reset the DC predictions.
 -----------------------------------------------------------------------*/
private void DCTResetDC()
{
  for (int i = 0; i < dg_framecn; i++)
    dg_jpscan[i].prev_dc = 0;
} /* DCTResetDC */


/*----------------------------------------------------------------------
 This procedure holds some initialization common to the encoder and decoder;
 it is called once for the frame.  One problem is that errors are handled
 differently; in the encoder, errors occur during the Open and appear as
 failures of the PS 'filter' operator; but in the decoder, they occur when
 the first byte is read from the stream.  (Note that defining rcerr
 differently for each allows the DCTEncode open and DCTDecode stream
 operations to share this procedure.)
 ------------------------------------------------------------------------*/
private void DCTInitCommon(int encInit)
throws PDFFilterFormatException
{ int maxh, maxv, ii, maxh_x8, nblks_per_mdu;
  String rcerr, strucerr;
  if (encInit != 0)
  { rcerr    = DCTError.filtRangeErr;
    strucerr = DCTError.filtRangeErr;
  } else
  { rcerr    = DCTError.filtSrcRangeErr;
    strucerr = DCTError.filtIncCombErr;
  }

  maxh = maxv = 1;
  nblks_per_mdu = 0; /* Zero this for reuse of gvl after computing
                        custom HuffTables. */
  for (ii = 0; ii < dg_framecn; ii++)
  { int hs, vs, nbc;
    DCTjframe jpframe = dg_jpframe[ii];
    hs = jpframe.hs;
    vs = jpframe.vs;
    nbc = hs * vs;
    if ((nbc == 0) || (hs > 4) || (vs > 4))
    { DCTError.err(DCTError.dctcmmn6, "Invalid sampling", rcerr);
    }
    if (hs > maxh) maxh = hs;
    if (vs > maxv) maxv = vs;
    nblks_per_mdu += nbc;
  } /* for (ii = 0; ii < dg->framecn; ii++) */

  /* nblks_per_mdu > 10 is non-JPEG, but allow it if the user has
     explicitly permitted this. */
  if (nblks_per_mdu > 10)
  { if (!dg_relax)
    { DCTError.err(DCTError.dctcmmn7, "Sum(HSamples * VSamples) > 10 limit",
                strucerr);
    }
    else if (dg_encoding == DCTTables.ASJPEGBaseline)
    { dg_encoding = DCTTables.ASJPEGSequential;
    }
  }
  dg_maxh = maxh;
  dg_maxv = maxv;
  maxh_x8 = maxh * 8;

  dg_cnmaxh = dg_framecn * maxh;
  dg_nsamps = ((dg_lx + maxh - 1) / maxh) * dg_cnmaxh;
  /* raster_size_out is needed by the decoder and by encoder study mode. */
  dg_raster_size_out = dg_lx * dg_framecn;

  if (dg_encoding != DCTTables.ASJPEGBaseline)
    dg_relax = true;

  /* The dg->colorconv == 0x3FFF default means no ColorTransform entry was
   in the PS dictionary; for the decoder, it also means no APPE marker.
   0, 1, or 2 presently allowed (extensible); 1 is converted to 0 for 1 or
   2 colors, left 1 for 3 colors, or converted to 2 for 4 colors;
   command-line option of 2 is also allowed for framecn==4 colors.

   (For the decoder, if the number of frame parameters is greater than the
   number of scan parameters, then do not do default to any color
   transform; this is not useful because multiple scans are not handled by
   this implementation, so the other colors cannot be printed.)
   */
  if (dg_colorconv != 0)               /* 0 == no color conversion */
  { if (dg_colorconv != 0x3FFF)        /* Not the default */
    { if (dg_colorconv == DCTTables.RGBtoYUV)
      { if (dg_framecn == 4)
          dg_colorconv = DCTTables.CMYKtoYUVK;
        else if (dg_framecn < 3)
          dg_colorconv = 0;
      }
      else if ((dg_colorconv != DCTTables.CMYKtoYUVK) || (dg_framecn != 4))
      { DCTError.err(DCTError.dctcmmn8, "Unusable ColorTransform="+dg_colorconv, rcerr);
      }
    }
    else /* Default ColorTransform == 0x3FFF */
    { if (dg_framecn == 3)
        dg_colorconv = DCTTables.RGBtoYUV;
      else
        dg_colorconv = 0;
    } /* else */
  } /* if (dg->colorconv != 0) */

  /* Determine whether the sampling permits accelerated horizontal strip
   processing.  If so, indicate it in dg->hFast.  The acceleration consists
   of doing the color transform (if any), blending (if any), and downsampling
   or upsampling in a single loop rather than in separate loops.
   */
  dg_hFast = DCTTables.SLOWSTRIP;
  if ((dg_framecn >= 3) && (dg_jpframe[1].hs == 1) && (dg_jpframe[2].hs == 1))
  { if ((dg_jpframe[0].hs == 1) && (dg_colorconv != 0))
    { if (dg_framecn == 3)
        dg_hFast = DCTTables.FAST111CT;
      else
        dg_hFast = DCTTables.FAST1111CT;
    }
    else if (dg_jpframe[0].hs == 2)
    { if (dg_framecn == 3)
      { if (dg_colorconv != 0)
          dg_hFast = DCTTables.FAST211CT;
        else
          dg_hFast = DCTTables.FAST211;
      }
      /* Only the color transform case is fast for framecn == 4. */
      else if ((dg_framecn >= 4) && (dg_jpframe[3].hs == 2) && (dg_colorconv != 0))
        dg_hFast = DCTTables.FAST2112CT;
    }
  } /* if ((hu == 1) && (hv == 1)) */

  for (ii = 0; ii < dg_framecn; ii++)
  { DCTjframe jpframe = dg_jpframe[ii];
    if ((jpframe.hs == maxh) && (jpframe.vs == maxv))
      jpframe.maxSampling = true;
    else
      jpframe.maxSampling = false;
  }

  /* Indicate dg->vFast != 0 (fast vertical sampling) for 21, 31, 41,
   211, 311, 411, and for 4 colors (with color transform only) 2112, 3113,
   or 4114; these also require hy == maxh (and for 4 colors hk == maxh).
   A single code loop for a vertical fast case handles all of (21, 31, 41),
   another all of (211, 311, 411), etc.  Horizontal fast cases must all be
   individually coded.
   */
  dg_vFast = 0;
  if (((maxv >> 2) != 0) && dg_jpframe[0].maxSampling && (dg_jpframe[1].vs == 1))
  { if (   (dg_framecn == 2)
        || (   (dg_jpframe[2].vs == 1)
            && (   (dg_framecn == 3)
                || ((dg_colorconv != 0) && dg_jpframe[3].maxSampling))
           )
       )
      dg_vFast = 1;
  }

  /* DCTDebugOut(dg->debugOutArg,
                 "vFast=%d, hFast=%d\n", dg->vFast, dg->hFast); */

  /* Each component is first represented as Ceiling[(lx * hs) / maxh] pixels
   [See JPEG CD 10918-1 A.1.1].

   For multi-component frames, interleaved components are then represented in
   MCUs where each component is hs * 8 pixels wide [A.2.1].  These two steps
   are equivalent, assuming hmin == 1, to rounding up the width of the most
   heavily sampled component to a multiple of 8 * maxh pixels and computing
   the no. MCUs which this represents.  Then each color in the frame has a
   width of 8 * hs * No. MCUs as follows:
     No. MCUs        == (lx + (8 * maxh) - 1) / (8 * maxh)
     No. pixels wide == 8 * hs * No. MCUs

   For a single-component frame, each MCU is only 8 pixels wide, so:
     No. MCUs        == (lx + 7) / 8
     No. pixels wide == 8 * No. MCUs
   NOTE: I think other implementations are using
     No. MCUs        == ((lx * hs) + (8 * hs) - 1) / (8 * hs)
     No. pixels wide == 8 * No. MCUs
   which is identical.

   The memory allocation for the color strips is computed according to the no.
   pixels wide calculation above.  For a single-component scan in a
   multi-component frame (e.g., progressive JPEG), however, the number of blocks
   in the width can be smaller than the above.  It is:
     No. MCUs   == Ceiling(Ceiling((lx * hs) / maxh) / 8)
     No. MCUs   == ((((lx * hs) + maxh - 1) / maxh) + 7) / 8
   which is exactly equivalent (proved by exhaustion with a script and also
   by a theorem) to:
     No. MCUs   == Ceiling((lx * hs) / (8 * maxh))
     No. MCUs   == ((lx * hs) + (8 * maxh) - 1) / (8 * maxh)

   Strip block width is computed from frame components and is
   ((dg->lx + maxh_x8 - 1) / maxh_x8) * maxh blocks for the most highly
   sampled component, or hs * ((dg->lx + maxh_x8 - 1) / maxh_x8) for any
   component in an interleaved scan.

   padx is the storage reserved for each scan line, enough to extend it to
   the exact interleave.  Because multi-color scans have an MCU that is
   computed differently from single-color scans, this must be done carefully.
   With a big CT, the values after the color transform and before the DCT or
   reverse DCT are stored in 16 bits to avoid double roundoff error; in this
   case padx (which is in bytes) must be doubled.  For progressive, coefx and
   coefy are the width and height of the quantized coefficient array in
   coefficients (not bytes).
   */
  if (dg_framecn >= 2)
  { int horMCUsTimes8 = ((dg_lx + maxh_x8 - 1) / maxh_x8) * 8;
//#if QuantCoefRaster
    int maxv_x8 = maxv * 8;
    int verMCUsTimes8 = ((dg_ly + maxv_x8 - 1) / maxv_x8) * 8;
//#endif
    for (ii = 0; ii < dg_framecn; ii++)
    { DCTjframe jpframe = dg_jpframe[ii];
      int horWidth = horMCUsTimes8 * jpframe.hs;
      jpframe.padx = horWidth;
//#if QuantCoefRaster
      if (dg_haveQCoefRaster)
      { jpframe.coefy = verMCUsTimes8 * jpframe.vs;
        jpframe.coefx = horWidth;
      }
//#endif
    }
  } else /* One-color frames compute the width differently. */
  { int horMCUsTimes8 = (   ((dg_lx * dg_jpframe[0].hs) + maxh_x8 - 1)
                             / maxh_x8) * 8;
//#if QuantCoefRaster
    int maxv_x8 = maxv * 8;
    int verMCUsTimes8 = (   ((dg_ly * dg_jpframe[0].vs) + maxv_x8 - 1)
                             / maxv_x8) * 8;
    if (dg_haveQCoefRaster)
    { dg_jpframe[0].coefy = verMCUsTimes8;
      dg_jpframe[0].coefx = horMCUsTimes8;
    }
//#endif
    dg_jpframe[0].padx = horMCUsTimes8;
  }
} /* DCTInitCommon */



/*-----------------------------------------------------------------------
 For progressive JPEG, some initialization differs with the scan.  Do the
 part of that which is common to the encoder and decoder here.  Single
 component scans define the minimum coding unit (MCU or MDU) differently
 than multi-component scans.  The caller has initialized dg->cn and the
 following jpscan elements for every component of the scan:
   ACCodeTableId, DCCodeTableId, jpframe.
 ----------------------------------------------------------------------*/
private void DCTInitForScan ()
{ int nmdus_per_strip, nstrips_this_scan;
  int maxhx8        = dg_maxh * 8;
  int maxvx8        = dg_maxv * 8;
  int ii;
  if (dg_cn == 1)
  { DCTjframe jpframe     = dg_jpscan[0].jpframe;
    nmdus_per_strip     = dg_lx * jpframe.hs;
    nstrips_this_scan   = dg_ly * jpframe.vs;
  } else  /* multi-component scan */
  { nmdus_per_strip     = dg_lx;
    nstrips_this_scan   = dg_ly;
  }
  dg_nmdus_per_strip   = (nmdus_per_strip + maxhx8 - 1) / maxhx8;
  dg_nstrips_this_scan = (nstrips_this_scan + maxvx8 - 1) / maxvx8;

  for (ii = 0; ii < dg_cn; ii++)
  { DCTjscan jpscan   = dg_jpscan[ii];
    jpscan.acc     = dg_ats[jpscan.ACCodeTableId];
    jpscan.dcc     = dg_dts[jpscan.DCCodeTableId];
  }
} /* DCTInitForScan */


/*-----------------------------------------------------------------------
 Give an error for impossible Huffman code specification; otherwise, set
 up huffsize[] with the size of all Huffman codes in ascending order;
 leave the number of different codes in dg->lastp; and generate the Huffman
 codes for each codesize in the table.  This procedure is shared by the
 encoder and decoder.

 The 1st entry in huffbits is no. length==1 codewords.  The code space
 would be used up if 2 length==1 codewords existed.  On each step,
 subtract the number of codewords of the given length; then twice the
 residual == no. codewords of length 1 larger.  In this way, check for
 an impossible spec.  There must be at least 1 left over code because
 JPEG disallows any all-ones codewords, so the final one must be no
 bigger than 111...1110.

 ***For DCTEncode, the errors detected here should have been detected
 during DCTEncodeOpen.
 ----------------------------------------------------------------------*/
private void DCTSizeCodes(short huffbits[], int huffsize[],
  int huffcode[], boolean ac)
throws PDFFilterFormatException
{ int p;
  int sizelim = (  ac
                     ? DCTdec_code_struct.AC_CODE_TBL_SIZE
                     : DCTdec_code_struct.DC_CODE_TBL_SIZE);
  int i, j;
  int nvals = 2;
  int code;
  int si;
  for (p = i = 0; i < DCTdec_code_struct.MAX_CODE_LEN; i++)
  { if ((nvals = (nvals - huffbits[i]) << 1) < 2)
    { String strucerr =   DCTError.filtSrcRangeErr;
      DCTError.err(DCTError.dctcmmn9, "Over-committed Huffman specification",
                strucerr);
    }
    for (j = 1; j <= huffbits[i]; j++)
    { if (p >= sizelim)
      { String lengerr =   DCTError.filtSrcLengthErr;
          DCTError.err(DCTError.dctcmmn10, "HuffTable exceeds "+sizelim+" entry limit", lengerr);
      }
      huffsize[p++] = i+1;
    }
  } /* for (p = i = ...) */
  huffsize[p] = 0;
  dg_lastp = p;
  p = code = 0;
  si = huffsize[0];
  for (;;)
  { do
    { huffcode[p++] = code++;
    }
    while (huffsize[p] == si);
    if (huffsize[p] == 0)
      return;
    else
    { do
      { code = code << 1;
        si++;
      }
      while (huffsize[p] != si);
    }
  }
} /* DCTSizeCodes */


/* dctalloc.c */


/*----------------------------------------------------------------------
 Total non-stack storage required by the encoder or decoder is the size of
 the gvl struct, separately allocated at filter creation time, plus the
 storage allocated here.  gvl includes worst case storage for Huffman and
 quantization arrays and all ASStm and filter machinery storage; on 10/4/96,
 this was about 6216 bytes, if the DataStudy feature is compiled in, else
 about 5068? bytes; PSL2 does not include the DataStudy feature in EXPORT
 configurations.  Application products include it, if they use optimized
 Huffman tables.  Storage is allocated here for the scan line buffer, color
 strips, and color transform tables.

 The 6216 or 5068 bytes allocated for gvl at filter creation is marginal for
 harmony with PS AllocSmall (os_malloc); blocks this large could frequently
 find os_malloc's RAM too fragmented, even with plenty of free storage, so
 more 8k-byte blocks would be put under os_malloc control, permanently
 losing this storage from VM.  (I.e., if os_malloc is first fit with a rover,
 it could fragment.)

 Allocating a single giant block from the page allocator for the rest of the
 filter's needs is also marginal for low-RAM products, and is likely to be one
 of the first allocates to limit check during testing.  This is marginal both
 because it is a single block and because total size is large on big images.

 Scan line and strip sizes are unknown and unallocable until lx, ly, framecn,
 and sampling for the scan are known; so allocation cannot happen during
 DCTDecode filter creation because these scan parameters are in JPEG header
 markers, which may not be read until the first filter use.

 For sequential JPEG, the encoder and decoder require a one raster line buffer
 and storage for one strip of 8*maxv scan lines.  No storage is needed for
 quantized coefficients in the ordinary case because the MCUs (Minimum Coding
 Units) are generated singly in procedure temporary storage and coded or
 decoded one-by-one.  However, progressive JPEG, the JPEG-to-JPEG converter,
 the one-pass optimal sequential encoder, and the (as yet unimplemented)
 decoder block-boundary smoothing feature require a huge additional block of
 storage sufficient to hold all quantized coefficients plus a 64 entry x
 4-bytes/entry table to hold the unzigzag map.

 dg->ncolorline is both the DCTEncode input buffer or the DCTDecode output
 buffer and the scan line buffer for the inner implementation of these
 filters.  It must hold the framecn-color scan line preceded by a safetyZone
 of 1 byte for the ASStm machinery (increased to 4 bytes so that dg->ncolorline
 is 32-bit aligned), lengthened for replicating the final pixel 3 times and
 for aligning the buffer end at the next word boundary.  The part of the
 encoder that downsamples and does the color transform (DCTEGetNCLine in
 dctestr.c) always processes a multiple of maxh pixels during each inner loop
 iteration; this loop is permitted to be unrolled to handle up to 4
 pixels/iteration; therefor, max(maxh-1, 3) = 3 replications of the final
 pixel in ncolorline are required before beginning, so that any blending or
 selection of pixels beyond EOL will not use garbage.

 When the encoder reads a scan line into dg->ncolorline, the last pixel on
 each scan line must be replicated maxh-1 times before downsampling to ensure
 that any averaging of samples beyond EOL does not distort the last retained
 sample.  Thus the no. pixels (== bytes) needed for dg->ncolorline is:

   pixels/scan line = (lx + maxh - 1)*framecn

 The strip handling code processes a group of maxv scan lines, horizontally
 downsampling each line as it is read; then it vertically downsamples these
 maxv scan lines into vs retained scan lines.  This means that, although the
 final requirement is vs*8 scan lines/strip, the intermediate need is
 vs*7 + maxv scan lines.

 The strip scan line width in jpframe->padx (computed in dctcmmn.c) is a
 multiple of 8*maxh pixels, where the pixels are 1 byte if the compile
 option BigCT==0, or 2 bytes if BigCT==1; this is the width after the
 color transform.  (The BigCT option uses more storage/pixel to avoid an
 extra round off after the color transform before the reverse DCT.)
 In the bad case where maxh==4 and a color with hs==1 ends after the first
 pixel in its last MCU, 7 extra samples are needed to complete that color's
 MCU, and 28 extra samples are needed to complete the MCU of the component
 with hs==4.

 The size calculations for the color strip are summarized as follows:

   MCUs/strip       = (lx + (8*maxh - 1))/(8*maxh);
   pixels/strip     = MCUs/scan line * hs * 8;
   Pixel size       = 8 bits if BigCT==0 else 16 bits.
   scan lines/strip = (vs * 7) + maxv;
   pixels/strip     = pixels/scan line * scan lines/strip;
                    = ((lx + (8*maxh - 1))/(8*maxh))*hs*8 * (vs*7 + maxv);
     or approx.    ~= (lx * hs * (vs*7 + maxv)) / maxh;

 Note that horizontal downsampling of some colors reduces total storage, while
 vertical downsampling increases it.

 8 extra bytes are allocated in ncolorline and 8 extra for each color to
 protect against the above calculations being slightly wrong.  The same
 sizes are used for the decoder, which does not require as much extra.

 For QuantCoefRaster, a huge additional piece of storage is needed to
 hold the quantized coefficient array for each color, which requires:

   height = [(ly + (8*maxv - 1)) / (8*maxv)] * 8 * vs
   width  = pixels/strip (see above)

 The coefficients are stored in 1 or 2 bytes according to the worst case
 size of any coefficient.

 -----------------------------------------------------------------------*/
private int DCTStripBytes (DCTjframe jpframe, int maxv)
{ if (jpframe.hs <= 0)
    return (0);
  return (jpframe.padx * (7*jpframe.vs + maxv) + 8 );
} /* DCTStripBytes */


/*----------------------------------------------------------------------
 Allocate the storage needed by the DCTEncode or DCTDecode filter (same
 structure is used for both so that code can be shared and so that the
 JPEG-to-JPEG converter can work).  Although the gvl structure was zeroed
 immediately after allocating it, only the quantized coefficient arrays
 for progressive JPEG and the error counters for the Study feature are
 zeroed here.
 -----------------------------------------------------------------------*/
private void DCTAllocate(int raster_size)
{ int j;

  if (dg_haveQCoefRaster)
  {
      DCTQFitsIn8();

      for (j = 0; j < dg_framecn; j++)
      { DCTjframe jpframe = dg_jpframe[j];
        jpframe.qcSmall = dg_qFitsIn8[jpframe.frameQTNum];
        /* In addition to 64 quant. coef., each 8x8 block needs 1 extra to
         hold the position of the last non-zero quant. coef.  Also, since
         the decoder doesn't check for block overrun until after decoding a
         Huffman code and storing a quant. coef., it is possible for a bad
         Huffcode in the final 8x8 block of the raster to overshoot block
         end by 14 positions.  Allocate 14 extra positions to prevent
         memory smash.  Also round to a AS_PREFERREDALIGN boundary.
         */
        { int coefst = jpframe.coefx * jpframe.coefy;
          coefst = coefst + (coefst >> 6) + 14;
          if (jpframe.qcSmall)
            jpframe.qcoef = new byte[coefst];
          else
            jpframe.qcoef = new short[coefst];
        }
      }
  }

  dg_equant = new int[dg_qTableCount][64];

  for (j = 0; j < dg_framecn; j++)
  { DCTjframe jpframe = dg_jpframe[j];
    jpframe.qmat = dg_equant[jpframe.frameQTNum];
    jpframe.strip = new short[DCTStripBytes(jpframe, dg_maxv)];
    jpframe.stripPtrs = new DCTStripPtrRec[jpframe.vs];
    for (int i = 0; i < jpframe.vs; i++)
        jpframe.stripPtrs[i] = new DCTStripPtrRec();
  } /* for (j = 0; j < dg->framecn; j++) */

  qstrip = new int[65];             /* Save allocating this on every block */
  last_qcoef_ptr = new int[4][4];   /* This is used for Progressive */
  dg_bigcolorline = new short[raster_size + (3 * dg_framecn) + 8];

} /* DCTAllocate */

private void DCTFree()
{ int j;

    for (j = 0; j < dg_framecn; j++)
    { DCTjframe jpframe = dg_jpframe[j];

      jpframe.qcoef = null;
      jpframe.qmat = null;
      jpframe.strip = null;
    }

  dg_equant = null;
  dg_bigcolorline = null;

} /* DCTFree */


/* dctdcore.c */

private void DCTDBadZRun()
throws PDFFilterFormatException
{ DCTError.err(DCTError.dctdcore1, "Missing EOB Huffcode in scan "+dg_scansCompleted+" 8x8 block "+ dg_nblocks,
            DCTError.filtSrcRangeErr);
} /* DCTDBadZRun */


/*----------------------------------------------------------------------------
 Decode snake format Huffman codes and build a quantized coefficient array for
 the image.  See comments for the DCTDDecodeColor procedure later.

 In addition to standard vector decode following a run-mag. Huffman code,
 progressive JPEG also has vectors after EOBn codes which are decoded as
 follows (etc. for more bits):
   nbits==0  nbits==1  nbits==2
       1      0 -> 2    0 -> 4
              1 -> 3    1 -> 5
                        2 -> 6
                        3 -> 7
 On the first progressive scan of a particular coef. (band), the prog. coder
 works like the sequential coder except that the coding is based upon
 qcoef >> Al instead of qcoef.  For DC, the shift is a pure right-shift such
 that -1 >> 1 stays -1 while 1 >> 1 becomes 0; the effect is that the
 reconstructed output is based upon -2 and 0, respectively.  For AC, however,
 the shift is toward 0, so -1 and +1 both shift to 0.

 Subsequent DC scans each provide 1 more bit of every DC coef.; these scans
 have no Huffman codes.  Subsequent AC scans include new Huffman codes for
 qcoef values which were 0 in the preceding scan; the only possible values
 are +1 and -1 since any value farther from 0 would have been non-0 in the
 preceding scan.  These Huffman codes, EOBn codes, and ZRun16 codes are each
 followed in the code stream by extra bits refining the qcoef values which
 appeared somewhere in the run between the last Huffman code and this new one.
 In other words, suppose that the new Huffman code specifies ZRun= 6; this
 code means that 6 zero-coef. values are skipped plus any qcoef values of
 larger magnitude than 1; after the Huffman code and its 1-bit magnitude
 vector, the extra bits are put literally into the code stream.  An EOBn
 code covering more than 1 block will be followed by all of the extra bits
 in those blocks.
 -----------------------------------------------------------------------------*/
private int DCTDDecodeProgMCU(int vblks, int hblks, DCTjscan jpscan,
                              int last_qcoef_ptr[], int nEOB)
throws PDFFilterFormatException
{
  boolean qcSmall  = jpscan.jpframe.qcSmall;
  Object  qcoefObj = jpscan.jpframe.qcoef;
  byte    qcoefB[] = null;
  short   qcoefS[] = null;
  int ii, jj;

  if (qcSmall)
    qcoefB = (byte[])qcoefObj;
  else
    qcoefS = (short[])qcoefObj;

  if (dg_ss == 0)   /* DC scan? */
  { if (dg_ah == 0)
    { /*-------------------------------------------------------------------
       Primary DC scan.
       -------------------------------------------------------------------*/
      for (jj = 0; jj < vblks; jj++)
      {
        DCTdec_code_struct dcc = jpscan.dcc;
        for (ii = hblks; ii-- > 0;)
        { int qcoefBase = last_qcoef_ptr[jj];
          int nbits, xfval, qcoef;
          //int work;

          nbits = DCTDHuffSGet(dcc);

          if (nbits != 0)
          {
            DCTDFillHuffBits(nbits);
            xfval = VALOF(nbits);
            jpscan.prev_dc += xfval;
          } /* if (nbits != 0) */
          else xfval = 0;

          qcoef = (jpscan.prev_dc) << dg_al;
          if (qcSmall)
          { qcoefB[qcoefBase] = (byte)qcoef;
          } else
          { qcoefS[qcoefBase] = (short)qcoef;
          }
          last_qcoef_ptr[jj] += 65;
          dg_nblocks++;
        }  /* for (ii = hblks; ii-- > 0;) */
      } /* while (vblks > 0) */
    } else


    { /*-----------------------------------------------------------------
       DC scan not the first.
       -----------------------------------------------------------------*/
      for (jj = 0; jj < vblks; jj++)
      {
        for (ii = hblks; ii-- > 0;)
        { int qcoefBase = last_qcoef_ptr[jj];
          //int qcoef;
          int work;
          DCTDFillHuffBits(1);
          work = ((dg_bs_byte >> dg_unused) & 1) << dg_al;
          if (qcSmall)
          { qcoefB[qcoefBase] += (byte)work;
          } else
          { qcoefS[qcoefBase] += (short)work;
          }
          last_qcoef_ptr[jj] += 65;
          dg_nblocks++;
        }  /* for (ii = hblks; ii-- > 0;) */
      } /* while (vblks > 0) */
    } /* else not the first DC scan */
  } /* DC scan */


  else               /* AC scan */
  { DCTdec_code_struct acc = jpscan.acc;
//  ASDebugAssert(vblks == 1);
    if (dg_ah == 0)
    { /*-------------------------------------------------------------------
       First AC scan for this band of this color.  The last block position
       filled was at index (initially dg->ss - 1).

       qcoefBase.qc.PI...[64] contains the index of the last NZ zig-zag
       quant. coef. in the block; it must be set to the index of the last
       new quant. coef. of this scan, if greater than the last for previous
       scans.

       *** Be careful not to set to ss-1 when no coef. are in the band.
       -------------------------------------------------------------------*/
      for (ii = hblks; ii-- > 0;)
      { int qcoefBase = last_qcoef_ptr[0];
        int index;
        if (nEOB > 0)
        { --nEOB;
            last_qcoef_ptr[0] += 65;
            dg_nblocks++;
            continue;    /* Skip this 8x8 block */
        }
        index = dg_ss - 1;
        do
        { int nbits, xfval;
          int ac_dist;
          int work;
          nbits = (ac_dist = DCTDHuffSGet(acc)) & 15;
          /* Finally have a codeword parsed into nbits and ac_dist */
          ac_dist = ac_dist >> 4;
          if (nbits == 0)      /* EOBn (end-of-block) or ZRUN16 */
          { if (ac_dist == 15) /* ZRUN16 */
            { /* Get here rarely with normal quantizers.  It is a bug for
               a coded image to go beyond index==dg->se with ZRUN16.
               */
              index += 16;
              continue;
            }
            if (ac_dist != 0)
            {
              DCTDFillHuffBits(ac_dist);
              work = dg_bs_byte >> dg_unused;

              work &= WMASK[ac_dist];
              work += (1 << ac_dist);
            } else /* ac_dist == 0 */
              work = 1;
            nEOB = work - 1;
            break; /* exit `do'; index == pos. of last NZ quant. coef. */
          } /* if (nbits == 0) */
          /* It is a bug for a coded image to go beyond index==dg->se.  If this
           happens, qcoefBase[index] = (ASInt16)xfval below will go into the next
           block of this color (harmless since it will be overwritten) or, at
           the end of the image, into the extra storage (allocated to permit
           this overflow) after that.  Then outside the loop, check for the
           overrun error.
           */
          index += (ac_dist + 1);
          /* Requantize and store in zigzag order. */
          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);
          xfval = xfval << dg_al;
          if (qcSmall)
            qcoefB[qcoefBase+index] = (byte)xfval;
          else
            qcoefS[qcoefBase+index] = (short)xfval;
        } while (index < dg_se);

        if (index > dg_se)
        { if (dg_picky)
          { DCTDBadZRun();
          }
          index = dg_se;
        }
        if (qcSmall)
        { if (index > qcoefB[qcoefBase+64])
	      { qcoefB[qcoefBase+64] = (byte)index; /* Last non-zero coef. */
          }
        } else
        { if (index > qcoefS[qcoefBase+64])
          { qcoefS[qcoefBase+64] = (short)index; /* Last non-zero coef. */
          }
        }
        last_qcoef_ptr[0] += 65;
        dg_nblocks++;
      }  /* for (ii = hblks; ii-- > 0;) */
    } else

    /*-------------------------------------------------------------------
     Not the first AC scan for this band of this color.  The last block
     position filled was at index (initially dg->ss - 1).  Each codeword
     is followed by a 1-bit refinement for each non-zero quant. coef.
     currently in the raster after the previous position and before the
     position of the new non-zero-mag. quant. coef.  If processing an
     EOB-run, refine non-zero quant. coef. in the entire block.

     qcoefBase[64] contains the index of the last NZ zig-zag quant.
     coef. in the block; it must be updated to the index of the last
     new quant. coef. of this scan, if that is greater than the last
     quant. coef. in previous scans.
     -------------------------------------------------------------------*/
  { DCTDProgMCUNextB1:
    for (ii = hblks; ii-- > 0;)
      { int qcoefBase = last_qcoef_ptr[0];
        int nbits, qcoef, xfval;
        int work;
        int ac_dist;
        int index = dg_ss - 1;

        if (nEOB > 0)
        {
        /* The last interpreted code was an EOBn code, and its count covered
         this block; the code could have been issued in a previous block, or
         it could have been issued after other codes in this block.  Refine
         each AC code started in a previous scan with 1 more bit for which
         0 means no change, 1 make the quant. coef farther from 0 by 1 << Al.
         */
         nEOB -= 1;
      /* DCTDProgACRefToEOB: */
        if (qcSmall)
        { while (index < dg_se)
          { index++;
            qcoef = qcoefB[qcoefBase+index];
            if (qcoef != 0)
            {
              DCTDFillHuffBits(1);
              xfval = (dg_bs_byte >> dg_unused) & 1;

              if (xfval != 0)
              { xfval <<= dg_al;
                if (qcoef > 0) qcoef += xfval;
                else           qcoef -= xfval;
                qcoefB[qcoefBase+index] = (byte)qcoef;
              }
            } /* if (qcoef != 0) */
          } /* while (index < dg->se) */
          last_qcoef_ptr[0] += 65;
        } else /* !qcSmall */
        { while (index < dg_se)
          { index++;
            qcoef = qcoefS[qcoefBase+index];
            if (qcoef != 0)
            {
              DCTDFillHuffBits(1);
              xfval = (dg_bs_byte >> dg_unused) & 1;

              if (xfval != 0)
              { xfval <<= dg_al;
                if (qcoef > 0) qcoef += xfval;
                else           qcoef -= xfval;
                qcoefS[qcoefBase+index] = (short)qcoef;
              }
            } /* if (qcoef != 0) */
          } /* while (index < dg->se) */
          last_qcoef_ptr[0] += 65;
        }
        dg_nblocks++;
        continue DCTDProgMCUNextB1;  /* Advance to next block;
                                   qcoefBase[64] unchanged */
      /* end of DCTDProgACRefToEOB: */
        } /* if (nEOB > 0) */

      DCTDProgACRefBlk:
        do
        {
          nbits = (ac_dist = DCTDHuffSGet(acc)) & 15;
          ac_dist = ac_dist >> 4;
          if (nbits == 0 && ac_dist == 15) /* ZRUN16 */
          {
            xfval = 0;
          }
          else
          {

          if (nbits == 0) /* EOBn (end-of-block) and not ZRUN16 */
          { if (ac_dist == 0)
              work = 1;
            else
            {
              DCTDFillHuffBits(ac_dist);
              work = dg_bs_byte >> dg_unused;

              work &= WMASK[ac_dist];
              work += (1 << ac_dist);
            } /* else */
            nEOB = work - 1;
            /* Beyond last NZ coef. of previous scans? */
            if (index > (  qcSmall
                         ? qcoefB[qcoefBase+64]
                         : qcoefS[qcoefBase+64]))
              break DCTDProgACRefBlk; /* exit `do' */
            else
            {
                if (qcSmall)
                { while (index < dg_se)
                  { index++;
                    qcoef = qcoefB[qcoefBase+index];
                    if (qcoef != 0)
                    {
                      DCTDFillHuffBits(1);
                      xfval = (dg_bs_byte >> dg_unused) & 1;

                      if (xfval != 0)
                      { xfval <<= dg_al;
                        if (qcoef > 0) qcoef += xfval;
                        else           qcoef -= xfval;
                        qcoefB[qcoefBase+index] = (byte)qcoef;
                      }
                    } /* if (qcoef != 0) */
                  } /* while (index < dg->se) */
                  last_qcoef_ptr[0] += 65;
                } else /* !qcSmall */
                { while (index < dg_se)
                  { index++;
                    qcoef = qcoefS[qcoefBase+index];
                    if (qcoef != 0)
                    {
                      DCTDFillHuffBits(1);
                      xfval = (dg_bs_byte >> dg_unused) & 1;

                      if (xfval != 0)
                      { xfval <<= dg_al;
                        if (qcoef > 0) qcoef += xfval;
                        else           qcoef -= xfval;
                        qcoefS[qcoefBase+index] = (short)qcoef;
                      }
                    } /* if (qcoef != 0) */
                  } /* while (index < dg->se) */
                  last_qcoef_ptr[0] += 65;
                }
                dg_nblocks++;
                continue DCTDProgMCUNextB1;  /* Advance to next block;
                                           qcoefBase[64] unchanged */
            }
          } /* if (nbits == 0 && ac_dist != 15) */

          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);
          xfval <<= dg_al;
          } /* not ZRUN16 */
          /* index is the position of the last non-zero coef. written
           (or dg->ss - 1 if none).  xfval is the new +1 or -1 mag.
           quant. coef. shifted by Al to be stored after skipping
           ac_dist zero-mag. coef. and updating any non-zero-mag.
           quant. coef. in between.
           */
          if (qcSmall)
          { for (;;)
            { index++;
              if (index > dg_se)
              { DCTDBadZRun();
              }
              qcoef = qcoefB[qcoefBase+index];
              if (qcoef != 0)
              {
                DCTDFillHuffBits(1);
                nbits = (dg_bs_byte >> dg_unused) & 1;

                if (nbits != 0)
                { nbits <<= dg_al;
                  if (qcoef > 0) qcoef += nbits; /* away from 0 */
                  else           qcoef -= nbits;
                  qcoefB[qcoefBase+index] = (byte)qcoef;
                }
                continue;
              } /* if (qcoef != 0) */
              if (ac_dist == 0) break;
              ac_dist--;
            } /* for (;;) */
            qcoefB[qcoefBase+index] = (byte)xfval;
          } else /* !qcSmall */
          { for (;;)
            { index++;
              if (index > dg_se)
              { DCTDBadZRun();
              }
              qcoef = qcoefS[qcoefBase+index];
              if (qcoef != 0)
              {
                DCTDFillHuffBits(1);
                nbits = (dg_bs_byte >> dg_unused) & 1;

                if (nbits != 0)
                { nbits <<= dg_al;
                  if (qcoef > 0) qcoef += nbits; /* away from 0 */
                  else           qcoef -= nbits;
                  qcoefS[qcoefBase+index] = (short)qcoef;
                }
                continue;
    	      } /* if (qcoef != 0) */
              if (ac_dist == 0) break;
              ac_dist--;
            } /* for (;;) */
            qcoefS[qcoefBase+index] = (short)xfval;
          } /* !qcSmall */
        } while (index < dg_se);

        /* Bigger than last NZ coef. from previous scans? */
        if (qcSmall)
        { if (index > qcoefB[qcoefBase+64])
          { qcoefB[qcoefBase+64] = (byte)index; /* 1 greater than last NZ coef. */
            last_qcoef_ptr[0] += 65;
          }
        } else
        { if (index > qcoefS[qcoefBase+64])
          { qcoefS[qcoefBase+64] = (short)index; /* 1 greater than last NZ coef. */
            last_qcoef_ptr[0] += 65;
          }
        }
        dg_nblocks++;
      }  /* for (ii = hblks; ii-- > 0;) */
    } /* else not the first AC scan */
  } /* else AC scan */

  return nEOB;
} /* DCTDDecodeProgMCU */


/*----------------------------------------------------------------------------
 Decode snake format Huffman codes and build a quantized coefficient array for
 the image.  See comments for the DCTDDecodeColor procedure later.
 -----------------------------------------------------------------------------*/
private void DCTDDecodeSeqMCU(int vblks, int hblks, DCTjscan jpscan,
                              int last_qcoef_ptr[])
throws PDFFilterFormatException
{
  int xfval;
  boolean qcSmall  = jpscan.jpframe.qcSmall;
  Object  qcoefObj = jpscan.jpframe.qcoef;
  byte    qcoefB[] = null;
  short   qcoefS[] = null;
  int ii, vv;

  if (qcSmall)
    qcoefB = (byte[])qcoefObj;
  else
    qcoefS = (short[])qcoefObj;

  for (vv = 0; vv < vblks; vv++)
  { for (ii = hblks; ii-- > 0;)
    { /*------------------------------------------------------------
       The raster was initialized to 0.  Decode and store non-zero
       quant. coef.
       ------------------------------------------------------------*/
      int qcoefBase = last_qcoef_ptr[vv];
      int nbits;

      /* DC coefficient */
      { DCTdec_code_struct dcc = jpscan.dcc;
        nbits = DCTDHuffSGet(dcc);

        if (nbits != 0)
        {
          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);

          jpscan.prev_dc += xfval;
        } /* if (nbits != 0) */

        if (qcSmall)
          qcoefB[qcoefBase] = (byte)(jpscan.prev_dc);
        else
          qcoefS[qcoefBase] = (short)(jpscan.prev_dc);
      } /* DC codeword */

      { /* AC codewords */
        DCTdec_code_struct acc = jpscan.acc;

        int index = 0;
        do
        { int ac_dist;
          nbits = (ac_dist = DCTDHuffSGet(acc)) & 15;

          ac_dist = ac_dist >> 4;
          if (nbits == 0)
          { if (ac_dist == 0)      /* End-of-block code */
            { break;
            }
            /* Get here very rarely with normal quantizers.  It is a bug for
             a coded image to go beyond index==63 with a ZRUN16 codeword.
             */
            index += 16;
            continue;
          }
          /* It is a bug for a coded image to go beyond index==63.  If this
           happens, the qcoefBase.qc.PI*[index] = work write below will go into
           the next block of this color (harmless since it will be overwritten)
           or, at the end of the image, into the storage after that.
           Then outside the loop, check for the overrun error.
           */
          index += (ac_dist + 1);

          /* Requantize and store in zigzag order. */
          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);

          if (qcSmall)
            qcoefB[qcoefBase+index] = (byte)xfval;
          else
            qcoefS[qcoefBase+index] = (short)xfval;
        } while (index < 63);
        if (index >= 64)
        { if (dg_picky)
          { DCTDBadZRun();
          }
          index = 63;
        }
        if (qcSmall)
        { qcoefB[qcoefBase+64] = (byte)index;
          last_qcoef_ptr[vv] += 65;
        } else
        { qcoefS[qcoefBase+64] = (short)index; /* Last non-zero coef. index */
          last_qcoef_ptr[vv] += 65;
	}
        dg_nblocks++;
      } /* AC codewords */
    }  /* for (ii = hblks; ii-- > 0;) */
    last_qcoef_ptr[vv]++;
  } /* for (vv = 0; vv = vblks; vv++) */
} /* DCTDDecodeSeqMCU */


/*------------------------------------------------------------------------
 Dequantize (in snake order) and reverse transform all 8x8 blocks in an
 MCU from the quantized coefficient array into the color strip buffers.
 Convert from [-128..+127] to [0..255] range, if required, by xoring the
 sign bit while copying blk into the color block.
 -------------------------------------------------------------------------*/
private void DCTDStripFromRaster(int vblks, int hblks, DCTjframe jpframe)
{ int qmat[] = jpframe.qmat;
  boolean qcSmall  = jpframe.qcSmall;
  Object  qcoefObj = jpframe.qcoef;
  DCTStripPtrRec stripPtrs[] = jpframe.stripPtrs;
  byte    qcoefB[] = null;
  short   qcoefS[] = null;
  int qcoefBase;
  int qcoef;
  int ii, jj, vv;
  int maxPos, maxCol, pos, lnz;

  if (qcSmall)
    qcoefB = (byte[])qcoefObj;
  else
    qcoefS = (short[])qcoefObj;

  for (vv = 0; vv < vblks; vv++)
  {
    for (ii = hblks; ii-- > 0;)
    { /*-----------------------------------------------------------------
       Set block to zeros; then insert non-zero transform coefficients.
       -----------------------------------------------------------------*/
      for (jj = qstrip.length; jj-- > 0;)
        qstrip[jj] = 0;

      maxPos = maxCol = 0;
      qcoefBase = stripPtrs[vv].last_qcoef_ptr;
      if (qcSmall)
      { lnz = qcoefB[qcoefBase+64];
        for (jj = lnz+1; jj-- > 0;)
        { qcoef = qcoefB[qcoefBase+jj];
          if (qcoef != 0)
          { pos = DCTunzigzag[jj];
            qstrip[pos] = qmat[jj] * qcoef;
            if (pos > maxPos)
              maxPos = pos;
            if ((pos = pos & 0x7) > maxCol)
              maxCol = pos;
          }
        } /* for (jj ...) */
        stripPtrs[vv].last_qcoef_ptr += 65;
      } else
      { lnz = qcoefS[qcoefBase+64];
        for (jj = lnz+1; jj-- > 0;)
        { qcoef = qcoefS[qcoefBase+jj];
          if (qcoef != 0)
          { pos = DCTunzigzag[jj];
            qstrip[pos] = qmat[jj] * qcoef;
            if (pos > maxPos)
              maxPos = pos;
            if ((pos = pos & 0x7) > maxCol)
              maxCol = pos;
          }
        } /* for (jj ...) */
        stripPtrs[vv].last_qcoef_ptr += 65;
      }
      DCTRevTransform(qstrip, stripPtrs[vv].last_ptr,
                       maxCol, maxPos >> 3, jpframe);
       stripPtrs[vv].last_ptr += 8;
      dg_nblocks++;
    }  /* for (ii = hblks; ii-- > 0;) */
  } /* while (vblks > 0) */
} /* DCTDStripFromRaster */



/*------------------------------------------------------------------------
 Decode, dequantize (in snake format); reverse transform and unsnake the color
 characters into the temporary blk array; convert from [-128..+127] to
 [0..255] range, if required, by xoring the sign bit while copying blk
 into the color block.  The decode is conditional so that this procedure
 can also be called by the coder to produce the recon file.

 The two calls to the DCTDHuffSGet and the two to DCTDFillHuffBits are
 open-coded here and commented out in dctdhuff.c

 Vector decoding after Huffman codes is as follows (etc. for more bits):
   nbits==0    nbits==1    nbits==2
      0         0 -> -1     0 -> -3
                1 -> +1     1 -> -2
                            2 -> +2
                            3 -> +3
 -------------------------------------------------------------------------*/
private void DCTDDecodeColor(int vblks, int hblks, DCTjscan jpscan)
throws PDFFilterFormatException
{ DCTjframe jpframe = jpscan.jpframe;
  DCTStripPtrRec stripPtrs[] = jpframe.stripPtrs;
  int qmat[] = jpframe.qmat;
  int xfval;
  int ii, jj, vv;

  for (vv = 0; vv < vblks; vv++)
  { for (ii = hblks; ii-- > 0;)
    { /*------------------------------------------------------------
       Set block to zeros.  Decode non zero transform coefficients.
       ------------------------------------------------------------*/
      int nbits;

      /* Start with clean slate; insert non-zeroes only. */
      for (jj = qstrip.length; jj-- > 0;)
        qstrip[jj] = 0;

      /* DC coefficient */
      { DCTdec_code_struct dcc = jpscan.dcc;
        nbits = DCTDHuffSGet(dcc);

        if (nbits != 0)
        {
          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);

          jpscan.prev_dc += xfval;
        } /* if (nbits != 0) */

        /* Here DCTunzigzag[0] == 0 */
        qstrip[0] = qmat[0] * (jpscan.prev_dc);  /* Place DC coef. */
      } /* DC codeword */

      { /* AC codewords */
        DCTdec_code_struct acc = jpscan.acc;
        int index, pos, maxPos, maxCol;
        maxPos = maxCol = index = 0;
        do
        { int ac_dist;
          nbits = (ac_dist = DCTDHuffSGet(acc)) & 15;

          ac_dist = ac_dist >> 4;
          if (nbits == 0)
          { if (ac_dist == 0)      /* End-of-block code */
            { //TracePrintf0("AC EOB\n");
              break;
            }
            /* Get here very rarely with normal quantizers.  It is a bug for
             a coded image to go beyond index==63 with a ZRUN16 codeword.
             */
            index += 16;
            continue;
          }
          /* It is a bug for a coded image to go beyond index==63; this bug
           cannot be ignored because the reference to the DCTunzigzag array
           will be out-of-range.  However, DCTunzigzag has 16 extra entries
           which will map an out-of-range index into 64, and qstrip has 1
           extra 64th entry.  So if this happens, the codeword will harmlessly
           overwrite the extra qstrip entry; then outside the loop, check for
           the overrun error.
           */
          index += (ac_dist + 1);

          /* Requantize and store in unzigzagged order. */
          pos = DCTunzigzag[index];
          DCTDFillHuffBits(nbits);
          xfval = VALOF(nbits);
          qstrip[pos] = qmat[(index>63)?63:index] * xfval;//index > 63 implies corrupted image because Jpeg encoded data
          												  // use to be in blocks of 64*64. In that case, we use index as upper bound
          												  // i.e, 63 and continue with decoding.
          if (pos > maxPos)
            maxPos = pos;
          if ((pos = pos & 0x7) > maxCol)
            maxCol = pos;
        } /* do */
        while (index < 63);

        if (index >= 64)
        { if (dg_picky)
          { DCTDBadZRun();
          }
          else
          { maxPos = 63; maxCol = 7; /* Blunder on and hope for the best */
          }
        }

        DCTRevTransform(qstrip, stripPtrs[vv].last_ptr,
                         maxCol, maxPos >> 3, jpframe);
        stripPtrs[vv].last_ptr += 8;
        dg_nblocks++;
      } /* AC codewords */
    }  /* for (ii = hblks; ii-- > 0;) */
  } /* for (vv = 0; vv < vblks; vv++) */
} /* DCTDDecodeColor */



/*-----------------------------------------------------------------------
 Handle RSTn markers.
 -----------------------------------------------------------------------*/
private void DCTDDecodeResync()
throws PDFFilterFormatException
{ int marker;
  if (--dg_resync_count <= 0)
  { dg_resync_count = dg_resync;
    DCTResetDC();
    marker = DCTDMarkerSGet();
    /* A problem here is that the 1990 Radius implementation put the first
     RST0 marker before the first coded MCU (a bug); thereafter the RSTn
     markers occurred in the right places but are numerically 1 too big.
     With this code, which only gives the error when dg->picky != 0, such
     a bad file can readily be fixed with an editor by removing the first
     RST0 marker.
     */
    if (marker == 0x100)
      DCTDReportEOF();
    if (   (dg_picky)
        && (marker != (DCTTables.RST0_MARKER + dg_resyncmod8)))
    { DCTDInvalidMarker(marker);
    }
    dg_resyncmod8 = (dg_resyncmod8 + 1) & 0x7;
  }
} /* DCTDDecodeResync */



//#if QuantCoefRaster
/*-----------------------------------------------------------------------
 After decoding an SOS marker, DCTDDecodeImage calls this procedure when
 a quantized coefficient raster is being used.  Here, decode the next scan
 and store the quantized coefficients in the last_qcoef_ptr array for
 each color.
 -----------------------------------------------------------------------*/
private void DCTDDecodeScanToRaster()
throws PDFFilterFormatException
{ 
//  boolean qcSmall;
//  Object  qcoefObj;
//  byte    qcoefB[] = null;
//  short   qcoefS[] = null;
  int nEOB = 0;
  int ii, jj, kk;

  dg_nblocks = 0;

  if (dg_cn == 1)
  { DCTjscan jpscan = dg_jpscan[0];
    DCTjframe jpframe = jpscan.jpframe;

//    qcSmall  = jpscan.jpframe.qcSmall;
//    qcoefObj = jpscan.jpframe.qcoef;
//    if (qcSmall)
//      qcoefB = (byte[])qcoefObj;
//    else
//      qcoefS = (short[])qcoefObj;

    int coefPerStrip = (jpframe.coefx << 3) + (jpframe.coefx >> 3);
    /* jpscan->prev_dc = 0; duplication */
    for (jj = 0; jj < dg_nstrips_this_scan; jj++)
    { int ncoef = jj * coefPerStrip;
      int sd;
      last_qcoef_ptr[0][0] = ncoef;

      if (dg_encoding == DCTTables.ASJPEGProgressive)
      { for (ii = dg_nmdus_per_strip; ii > 0; ii -= sd)
        { sd = ii;
          if (dg_resync != 0)
          { DCTDDecodeResync();
            if (dg_resync_count < ii)
              sd = dg_resync_count;
            dg_resync_count -= (sd - 1);
          }
          nEOB = DCTDDecodeProgMCU(1, sd, jpscan, last_qcoef_ptr[0], nEOB);
        } /* for (ii = ...) */
      } else /* ASJPEGBaseline or ASJPEGSequential */
      { for (ii = dg_nmdus_per_strip; ii > 0; ii -= sd)
        { sd = ii;
          if (dg_resync != 0)
          { DCTDDecodeResync();
            if (dg_resync_count < ii)
              sd = dg_resync_count;
            dg_resync_count -= (sd - 1);
          }
          DCTDDecodeSeqMCU(1, sd, jpscan, last_qcoef_ptr[0]);
        } /* for (ii = ...) */
      }
    } /* for (jj ...) */
  } else /* dg->cn > 1 */
  { /* DCTResetDC(dg); duplication */
    for (jj = 0; jj < dg_nstrips_this_scan; jj++)
    {
      for (ii = 0; ii < dg_framecn; ii++)
        DCTInitQCPtrs(last_qcoef_ptr[ii], dg_jpframe[ii], jj);

      for (ii = dg_nmdus_per_strip; ii-- > 0;)
      { DCTjscan jpscan;
        DCTjframe jpframe;
        if (dg_resync != 0)
        { DCTDDecodeResync();
        }
        if (dg_encoding == DCTTables.ASJPEGProgressive)
        { for (kk = 0; kk < dg_cn; kk++)
          { jpscan  = dg_jpscan[kk];
            jpframe = jpscan.jpframe;
            nEOB = DCTDDecodeProgMCU(jpframe.vs, jpframe.hs, jpscan,
                              last_qcoef_ptr[kk], nEOB);
          }
        } else
        { for (kk = 0; kk < dg_cn; kk++)
          { jpscan  = dg_jpscan[kk];
            jpframe = jpscan.jpframe;
            DCTDDecodeSeqMCU(jpframe.vs, jpframe.hs, jpscan,
                             last_qcoef_ptr[kk]);
          }
        }
      } /* for (ii = dg_nmdus_per_strip; ii-- > 0;) */
    } /* for (jj ...) */
  } /* dg->cn > 1 */
} /* DCTDDecodeScanToRaster */
//#endif /* QuantCoefRaster */



/*----------------------------------------------------------------------
 DCTDDecodeStrip produces maxv*8 scan lines of pixels in the color strip
 buffers.  When a quant. coef. raster is used, the work is done by the
 DCTDStripFromRaster procedure, in which case Huffman-decoding and
 building of a quant. coef. raster occurred previously; DCTDStripFromRaster
 will dequantize and reverse transform each 8x8 block.  Otherwise, the
 work is done by the DCTDDecodeColor procedure which additionally must do
 Huffman-decoding.

 For multi-color scans (dg->cn > 1), DCTDDecodeStrip uses a loop that
 does a strip of maxv*8 scanlines, with resync checks between MCUs.  The
 inner loop, decodes and dequantizes an MCU consisting of hc*vc 8x8 blocks
 into the strip for each color c.

 For one-color scans, a different outer loop is used which processes
 jpframe->vs strips of 8 scan lines with hc==vc==1.  In this case, the MCU
 is a single 8x8 block.

 Handling end-of-scan correctly when vy > 1 also requires check for
 end-of-scan after each 8 scanlines rather than after 8*vy scanlines.
 Note that DCTPutNCLine can be called after each 8 scanlines at the end
 of the image because it will terminate before trying to output the
 remaining 8*(vy-1) scan lines in this case.
 ------------------------------------------------------------------------*/
private void DCTDDecodeStrip ()
throws PDFFilterFormatException
{ 
//  boolean qcSmall;
//  Object  qcoefObj;
//  byte    qcoefB[] = null;
//  short   qcoefS[] = null;
  int ii, jj;

  if (dg_cn == 1)
  { DCTjscan jpscan = dg_jpscan[0];
    DCTjframe jpframe = jpscan.jpframe;

//    qcSmall  = jpscan.jpframe.qcSmall;
//    qcoefObj = jpscan.jpframe.qcoef;
//    if (qcSmall)
//      qcoefB = (byte[])qcoefObj;
//    else
//      qcoefS = (short[])qcoefObj;

//#if QuantCoefRaster
    int coefPerStrip = (jpframe.coefx << 3) + (jpframe.coefx >> 3);
    int stripNo = (dg_out_lines_processed + 7) / 8;
//#endif
    int lastp = 0;

    for (jj = 0; jj < jpframe.vs; jj++)
    { if ((dg_out_lines_processed + 8*jj) >= dg_ly)
        return;
      /* stripPtrs->last_ptr = jpframe->strip + (jpframe->padx * 8 * jj); */
      jpframe.stripPtrs[0].last_ptr = lastp; /* last_ptr points to next MCU in strip */
//#if QuantCoefRaster
      if (dg_haveQCoefRaster)
      { int disp = (stripNo + jj) * coefPerStrip;
        jpframe.stripPtrs[0].last_qcoef_ptr = disp;
        DCTDStripFromRaster(1, dg_nmdus_per_strip, jpframe);
      } else
//#endif

      { int sd;
        for (ii = dg_nmdus_per_strip; ii > 0; ii -= sd)
        { sd = ii;
          if (dg_resync != 0)
          { DCTDDecodeResync();
            if (dg_resync_count < ii)
              sd = dg_resync_count;
            dg_resync_count -= (sd - 1);
          }
          DCTDDecodeColor(1, sd, jpscan);
        } /* for (ii = ...) */
      }
      lastp += (jpframe.padx << 3);
    }
    return;
  } else /* dg->cn > 1 */
  {
//#if QuantCoefRaster
    int maxv8 = dg_maxv * 8;
    int stripNo = (dg_out_lines_processed + maxv8 - 1) / maxv8;
//#endif
    for (ii = 0; ii < dg_framecn; ii++)
    {
      DCTInitStripPtrs(dg_jpframe[ii], stripNo);
    }
//#if QuantCoefRaster
    if (dg_haveQCoefRaster)
    { /*------------------------------------------------------------
       Quantized coefficients are in the jpscan->qcoef arrays.
       Dequantize and reverse transform an MCU.
       ------------------------------------------------------------*/
      int nmdus = dg_nmdus_per_strip;
      for (ii = 0; ii < dg_framecn; ii++)
      { DCTjframe jpframe = dg_jpframe[ii];
        DCTDStripFromRaster(jpframe.vs, jpframe.hs * nmdus,
                            jpframe);
      } /* for (ii ...) */
    } else
//#endif
    { for (ii = dg_nmdus_per_strip; ii-- > 0;)
      { if (dg_resync != 0)
          DCTDDecodeResync();
        /*------------------------------------------------------------
         Decode, dequantize, and reverse transform an MCU.
         ------------------------------------------------------------*/
        for (jj = 0; jj < dg_cn; jj++)
        { DCTjscan jpscan   = dg_jpscan[jj];
          DCTjframe jpframe = jpscan.jpframe;
          DCTDDecodeColor(jpframe.vs, jpframe.hs, jpscan);
        } /* for (jj = ...) */
      } /* for (ii = dg_nmdus_per_strip; ii-- > 0;) */
    }
    return;
  } /* else cn > 1 */
} /* DCTDDecodeStrip */


/*  dctdxf.c */
/*-----------------------------------------------------------------------
 SINES AND COSINES
 CN, where N = 0 to 7 denotes cosine(N*pi/16) in radians or cosine(N*45/4) in
 degrees.  SN similarly denotes sine.

  C1 = S7 = cos(1pi/16) = cos(1*45/4) = 0.98078528
  C2 = S6 = cos(2pi/16) = cos(2*45/4) = 0.923879532
  C3 = S5 = cos(3pi/16) = cos(3*45/4) = 0.831469612
  C4 = S4 = cos(4pi/16) = cos(4*45/4) = 0.707106781
  C5 = S3 = cos(5pi/16) = cos(5*45/4) = 0.555570233
  C6 = S2 = cos(6pi/16) = cos(6*45/4) = 0.382683432
  C7 = S1 = cos(7pi/16) = cos(7*45/4) = 0.195090322
  C4 = SqRt(2)/2
  C2+C6 = 1.306562965
  C2-C6 = 0.5411961

 Multiplication of each coefficient by the quantizer was performed by the
 caller of DCTRevTransform; that multiplication simultaneously scaled the DCT
 coefficients by a row scaling factor and a column scaling factor to reduce
 multiplications per 8x8 coefficient block to at most 80.  The multiply
 operations are defined as macros to facilitate improving the code or varying
 it with the target platform.  ShiftRXform has 3 cases:
   0 13-bit accurate shift-and-add.
   1 16-bit accurate shift-and-add.
   2 integer constant multiplications.
 NOTE: Any variation in the value of ShiftRXform will result in a testing
 hickup since output will change. ShiftRXform == 0 should be used in all
 PostScript products.
 ------------------------------------------------------------------------*/
/*------------------------------------------------------------------------
 Perform reverse transform multiplies by specifying an integer multiply to
 C (which the compiler might optimize into shift-add).
 ------------------------------------------------------------------------*/

private static final long C4p16 = 46341L;    /* 65536 * 0.707106781 = 46340.95 */
private static final long C6p16 = 25080L;    /* 65536 * 0.382683432 = 25079.5414 */
private static final long C2pC6p16 = 85627L; /* 65536 * 1.306562964 = 85626.9104 */
private static final long C2mC6p16 = 35468L; /* 65536 * 0.5411961   = 35467.82761 */
//private static final long C4p8 = 181L;       /* C4p8 is (by chance) accurate to 13 bits. */
//private static final long C4p13 = 5793L;     /* 8192 * 0.707106781 = 5792.61875 */
//private static final long C6p13 = 3135L;     /* 8192 * 0.382683432 = 3134.942675 */
//private static final long C2pC6p13 = 10703L; /* 8192 * 1.306562964 = 10703.3638 */
//private static final long C2mC6p13 = 4433L;  /* 8192 * 0.5411961 = 4433.478451 */

/*
#define C4mulA(cc, dd) dd = (cc * C4p8) >> 8
#define C2pC6mulA(cc, dd) dd = (cc * C2pC6p13) >> 13
#define C6mulA(cc, dd) dd = (cc * C6p13) >> 13
#define C2mC6mulA(cc, dd) dd = (cc * C2mC6p13) >> 13
*/

/*-----------------------------------------------------------------------
 Scale the dequantization 8x8 array by a scale factor which is 2^16 * Ki * Kj,
 where Ki and Kj are row and column factors removed from the reverse transform
 to simplify it.  Then multiplying by a quantizer simultaneously dequantizes
 and performs the first multiply for row i and column j.
 See dctnotes1.txt for an explanation.  The factors are:
   K0 = .707106781   K1 = .509795579   K2 = .541196099   K3 = .601344886
   K4 = .707106781   K5 = .899976222   K6 = 1.306562964  K7 = 2.562915448

 IDCT scaling values printed by the conditional below:
  0   32768.0000  23624.4114  25079.5414  27866.8933
      32768.0000  41705.7531  60547.3690 118767.9366
  1   23624.4114  17032.2515  18081.3417  20090.9104
      23624.4114  30068.1723  43652.2204  85626.9105
  2   25079.5414  18081.3417  19195.0499  21328.3967
      25079.5414  31920.2014  46340.9499  90901.0431
  3   27866.8933  20090.9104  21328.3967  23698.8447
      27866.8933  35467.8275  51491.3047 101003.8275
  4   32768.0000  23624.4114  25079.5414  27866.8933
      32768.0000  41705.7531  60547.3690 118767.9366
  5   41705.7531  30068.1723  31920.2014  35467.8275
      41705.7531  53081.3551  77062.1833 151162.9103
  6   60547.3690  43652.2204  46340.9499  51491.3047
      60547.3690  77062.1833 111876.9499 219454.5315
  7  118767.9366  85626.9105  90901.0431 101003.8275
     118767.9366 151162.9103 219454.5315 430475.5487
 -----------------------------------------------------------------------*/
private static int DCTDQScale[] =
{  32768,  23624,  23624,  25080,  17032,  25080,  27867,  18081,
   18081,  27867,  32768,  20091,  19195,  20091,  32768,  41706,
   23624,  21328,  21328,  23624,  41706,  60547,  30068,  25080,
   23699,  25080,  30068,  60547, 118768,  43652,  31920,  27867,
   27867,  31920,  43652, 118768,  85627,  46341,  35468,  32768,
   35468,  46341,  85627,  90901,  51491,  41706,  41706,  51491,
   90901, 101004,  60547,  53081,  60547, 101004, 118768,  77062,
   77062, 118768, 151163, 111877, 151163, 219455, 219455,  430476
};

private void DCTDScaleQT(int dqarray[], int qtabnum)
{ short qtizers[] = dg_qtizers[qtabnum];
  int i;

  for (i = 0; i < 64; i++)
  { dqarray[i] = qtizers[i] * DCTDQScale[/*DCTunzigzag[i]*/ i];
  }
} /* DCTDScaleQT */



/*------------------------------------------------------------------------
 DCTRevTransform is called from dctdcore.c on a DCTDecode and by dctecore.c
 on a data_study for (StudyQErr or StudyRecon).  (The data_study feature is
 not compiled into PS except in some debugging configurations.)  It performs
 a DCT reverse transform on the 8 x 8 ASInt32 xform array passed to it.

 The reverse transform is implemented as one loop with a 1D transform for
 the 8 columns and another loop with a 1D transform for the 8 rows.  In the
 column section, the no. columns transformed is maxCol, and the work done
 in each column is based on maxRow.  In the row section, all 8 rows are
 transformed, but work per row is based on maxCol.  The accelerated cases all
 derive from the general case by removing 0 terms.  On a data_study,
 maxCol = maxPos = 7, so all special code for a data_study resides in those
 switch arms.  The final part of the row transform is to adjust overflows down
 to 255 and underflows up to 0; the resulting 8 x 8 pixel array is then stored
 in the appropriate color strip.

 (I also tried a variation in which the maximum row for each column was
  remembered rather than the maximum for the array as a whole; but this tested
  slower.  The reason it was slower is that in the first half of the transform,
  the max. non-zero row switch occurred inside the `for' loop rather than
  outside in the first half of the transform, and additional calculation is
  also needed in dctdcore.c to remember the maximums separately for each
  column.)

 DCTTransform left coef. [-1024..+1016] which was divided by an 8-bit
 quantization value and rounded.  DCTRevTransform starts here with the
 array of regenerated coefficients; at entry here each original transform
 coefficient has been multiplied by a scaling constant * quantizer, where
 the scaling constant is a number between [0.25*2^16 .. 6.59*2^16].  The
 scaling constant is chosen differently for each coefficient to factor out
 some multiplications inside the IDCT.

 The worst case range of requantized coefficients for an accurate transform is
 [((-1024 + Q/2) / Q) * Q..((+1016 + Q/2) / Q) * Q], where Q is the largest
 quantizer which just barely rounds away from 0.  For 8-bit quantizers,
 Q==-227 and +225, which gives an input range of [-1135..+1125].  To allow
 for transform inaccuracies and poor rounding by some implementations, a
 larger input range than this must be tolerated.

 16-bit quantizers are illegal in JPEG with 8-bit samples.  For 16-bit
 quantizers with 8-bit sample values, the requantized coef. could be in
 about [-2047..+2032].  Although it would be possible to compare coef.
 against the maximums and force them into the legal range, this would be
 slow, and overflows would be extremely infrequent.  Instead, the approach
 here should be to work correctly for somewhat wider ranges than the
 maximums to tolerate transform inaccuracies.

 As with DCTTransform, the 0th xform term (or DC coefficient) figures
 positively into every element of the reverse transform, permitting a single
 constant to be added to this term to accomplish all of the following:

  1) Offset final result from the [-128..+127] to the [0..255] range.
  2) Round the final result prior to right-shifting.

 In other words, the single constant added is distributed positively over all
 elements to accomplish these functions.  The DC offset and rounding
 requirements are:

   128.5*8*0.5*2^16 = 0x101*2^17 = 0x2020000;

 where 128 is the DC offset, 0.5 rounds the final result; 8 accounts for the
 size gain of the transform coefficients with respect to original samples of
 SqRt(8) in each half of the reverse transform; and 0.5*2^16 accounts for
 pre-multiplication of the DC coefficient by its scaling constant.

 Using the 9/93 new quantizers with YCC conversion on kg.vrgb, the following
 were the maxRow and maxCol statistics:
        QFactor=1.0     QFactor=0.5     QFactor=0.5 2:1
        22-to-1 comp.  14.37-to-1 comp. 17.89-to-1 comp.
        Row     Col      Row    Col       Row     Col
       Count   Count    Count  Count     Count   Count   New vs. Old
  0    14753  13969     10403   9530      2220    2111      0
  1     6845   6375      6492   5945      2175    1853     -2
  2     5408   5975      6178   6744      3019    2923     -3
  3     2179   2280      3720   4032      3530    3386     -3
  4     1261   1387      2119   2203      2208    2322     -3
  5      713    914      1278   1443      1394    1621     +4
  6      232    381       931    962       979    1029    +18
  7       25    135       295    557       315     595    +18

 In determining whether this new method is a win over the old IDCT, we
 should compute Sum(maxCol*maxRow*V[maxRow]) + Sum(8*maxCol*V[maxCol]),
 where V[j] is the value in the `New vs. Old' column.  However, we would
 have to guess at the value of maxCol for each instance of maxRow, so it
 is easier to just look at the maxCol counts and compute the weighted
 sum.  For the QFactor = 0.5, 2:1 downsampling case, the weighted sum is
 4264, favorable to the new IDCT.  For the QFactor=0.5 1:1 case, it is
 -33,759, favorable to the old IDCT.


 UNDERSTANDING OVERFLOWS AND UNDERFLOWS

 Overflows and underflows empirically occur as the result of quantization
 errors to sample values near 0 or near 255.  As quantization is reduced,
 improving accuracy, overflows and underflows occur less frequently.

 Max. sample value range coming out of the reverse DCT is increased by
 quantization errors which produce coefficients of larger magnitude than
 unquantized coefficients; and max. range is reduced by quantization errors
 producing coefficients of smaller magnitude.  For small quantizers, max.
 range increase can be approximated from the basic IDCT definition as about
   0.125 * Sum( Q[i, j] / 2),
 where Q[i, j] is the quantizer for row i column j, and the sum is over the
 64 coefficients.  This approx. has a max. value for 8-bit quantizers of
 about 8 * 128 = 1024 (not a problem).

 If quantizers were allowed to be 16-bit values, the max. range example above
 would not apply.  If one attempted to make all quantizers very large, then all
 quantized coefficients would round to 0, reducing max. range rather than
 increasing it.  To construct the worst case sample value range with 16-bit
 quantizers, one would proceed in something like the following way:
   1) Choose sample values that result in a coefficient array with the
 largest possible Sum(|Coef[i,j]|).
   2) Pick Q[i, j] = 0.5 * |Coef[i, j]| for each coefficient so that that
 coefficient rounds away from 0.  I.e., effectively double each coefficient.
   3) The errors will be additive for one of the 64 sample values; diddle that
 sample value to have an initial value that is 0 or maximum, according to what
 will produce the greatest underflow or overflow.

 I haven't followed through this reasoning and actually constructed a worst
 case, but my guess is that no construction of quantizers and sample values
 can produce a reverse DCT sample value greater than 2 * correct value.


 CONVERGENCE OF THE REVERSE TRANFORM

 The 11-bit coefficients may be thought of as consisting of an 8-bit ASInt32
 part and 3-bit fractional part.  Before the call to DCTRevTransform, each
 coefficient was regenerated from the decoded quantized coefficients by
 multiplying by the quantizer x a scaling factor from the DCTDQScale table
 earlier (between [1.103..26.274] x 2^16).  So incoming coefficients appear in
 a 32-bit word as an 8-bit ASInt32 part and 19-bit fractional part multiplied
 by the scaling factor.

 The reverse transform is decomposed into separate row and column portions,
 each of which converges separately and reduces the coefficient range by
 cos(pi/4) = SqRt(2)/2.  So the two portions together reduce the range by
 1 bit and leave the 8-bit result in the left-most 14 bits of a 32-bit word
 before the final >> 18. For the BigCT case (implemented here) we save 11 bits
 of result (8 bits + 3 fraction bits), so the final shift is >> 15.

 In examining how the 1D forward DCT works, each coefficient except the DC
 sums 4 positively-weighted and 4 negatively-weighted samples; and the
 orthonormal relationship between any 2 coefficients is that 4 terms will be
 weighted with the same sign and 4 oppositely.  So the sum of two
 coefficients computed during the reverse DCT will have a range not larger
 than either coefficient by itself.  For example, during the reverse
 transform, a sum or difference such as xformx[0] + xformx[32], does not have
 a larger range than the original coefficients because the increase in range
 from the addition of 4 same-weighted components is cancelled by the reduction
 in range from 4 oppositely-weighted components of the terms being added.
 Also, if one term was at an extreme value of its range, then that would imply
 that each of its 8 components was maximum and added the same way, so no
 other term could be non-0.
  ------------------------------------------------------------------------*/

private void DCTRevTransform(int xform[], int lastPtr,
  int maxCol, int maxRow, DCTjframe jpframe)
{ int xformx =  maxCol ;
  int a0, a1, a2, a3, a4, a5, a6, a7, /*b0,*/ b1;
  int padx = jpframe.padx;

  /* TRANSFORM COLUMNS */
  xform[0] += 0x2004000;

  /* Accelerate column transforms for maxRow < 6.  Look at code for
   maxRow >= 6 to see the general code from which the simpler accelerated
   versions are derived.
   */
  if (maxRow < 4)
  { if (maxRow < 2)
    { if (maxRow < 1) /* maxRow == 0 (1 row) */
      { do
        { xform[xformx+8] = xform[xformx+16] = xform[xformx+24] = xform[xformx+32] =
            xform[xformx+40] = xform[xformx+48] = xform[xformx+56] = xform[xformx];
          xformx--;
        } while (xformx >= 0); /* for xformx over columns */
      } /* maxRow < 1 */

      else /* maxRow == 1 (2 rows) */
      { do
        { /* Work: 3M+4A here vs. 3M+2A in old IDCT. */
          a7 = xform[xformx+8];
          a5 = (int)((a7 * C4p16) >> 16); //C4mulA(a7, a5);
          a6 = (int)((a7 * C2pC6p16) >> 16); //C2pC6mulA(a7, a6);
          a4 = (int)((a7 * C6p16) >> 16); //C6mulA(a7, a4);
          a6 = a6 - a4;
          a7 = a7 + a6;
          a6 = a6 + a5;
          a5 = a5 + a4;

          a0 = xform[xformx];

          xform[xformx+ 8] = a0 + a6;
          xform[xformx+48] = a0 - a6;
          xform[xformx+16] = a0 + a5;
          xform[xformx+40] = a0 - a5;
          xform[xformx+24] = a0 + a4;
          xform[xformx+32] = a0 - a4;
          xform[xformx]    = a0 + a7;
          xform[xformx+56] = a0 - a7;
          xformx--;
        } while (xformx >= 0); /* for xformx over columns */
      } /* else maxRow == 1 */
    } /* if (maxRow < 2) */

    else if (maxRow == 2) /* (3 rows) */
    { do
      { /* Work: 4M+9A here vs. 4M+6A for old IDCT. */
/* The normal calculation is equivalent to the following, which saves 1 add:
        C6mulA(a7, a4);   C6*S1
        C4mulA(a7, a5);   C4*S1
        C2mulA(a7, a6);   C2*S1
        a7 = a7 + a6;     (1+C2)*S1
        a6 = a6 + a5;     (C2+C4)*S1
        a5 = a5 + a4;     (C4+C6)*S1
 Also, with a sequence of shifts and adds, it is possible to reuse shifted
 values of a7 when calculating the multiplies:
   C2p16 =      60547 = 65536 - (4096 + 1024) + 128 - 8 - 1
   C4p16 =      46341 = (32768 + 8192) + (4096 + 1024) + 256 + (4 + 1)
   C6p16 =      25080 = (32768 - 8192) + 512 - 8
 Hand code for the shift-and-add macros was 9S + 14A compared to
 5S+4A + 4S+3A + 5S+5A + 4A = 14S + 16A for the macros.  A problem is that a
 manual shift-and-add sequence will not get exactly the same result as the
 macros, so the study mode and regular mode results will not be identical.
 (I.e., the calculation of C4 and C6 is the same as the regular calculation,
 but the regular calc. computes C2 as (C2+C6)-C6), so low bit truncation will
 be minutely different.)
 NOTE: gcc with -O2 optimization beat this code on the Sparc, so it is
 optimizing the multiply sequences generated by the standard macros effectively
 and does not need this hack.
 */
        a7 = xform[xformx+8];
        a5 = (int)((a7 * C4p16) >> 16); //C4mulA(a7, a5);
        a6 = (int)((a7 * C2pC6p16) >> 16); //C2pC6mulA(a7, a6);
        a4 = (int)((a7 * C6p16) >> 16); //C6mulA(a7, a4);    /* C6*S1 */
        a6 = a6 - a4;
        a7 = a7 + a6;      /* (1+C2)*S1 */
        a6 = a6 + a5;      /* (C2+C4)*S1 */
        a5 = a5 + a4;      /* (C4+C6)*S1 */

        a2 = xform[xformx]; a1 = xform[xformx+16];
        b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
        a1 = b1 + a1;
        a0 = a2 + a1;
        a3 = a2 - a1;
        a1 = a2 + b1;
        a2 = a2 - b1;

        xform[xformx+ 8] = a1 + a6;
        xform[xformx+48] = a1 - a6;
        xform[xformx+16] = a2 + a5;
        xform[xformx+40] = a2 - a5;
        xform[xformx+24] = a3 + a4;
        xform[xformx+32] = a3 - a4;
        xform[xformx]    = a0 + a7;
        xform[xformx+56] = a0 - a7;
        xformx--;
      } while (xformx >= 0); /* for xformx over columns */
    } /* maxRow == 2 */

    else /* maxRow == 3 (4 rows) */
    { do
      { /* Work: 5M+13A here vs. 5M+10A for old IDCT. */
        a4 = xform[xformx+24]; a3 = xform[xformx+8];
        a5 = a3 - a4;
        a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
        a7 = a3 + a4;
        a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
        a3 = a3 - a4;
        a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
        a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
        a6 = a6 - a3;
        a4 = a3 - a4;
        a7 = a7 + a6;
        a6 = a6 + a5;
        a5 = a5 + a4;

        a2 = xform[xformx]; a1 = xform[xformx+16];
        b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
        a1 = b1 + a1;
        a0 = a2 + a1;
        a3 = a2 - a1;
        a1 = a2 + b1;
        a2 = a2 - b1;

        xform[xformx+ 8] = a1 + a6;
        xform[xformx+48] = a1 - a6;
        xform[xformx+16] = a2 + a5;
        xform[xformx+40] = a2 - a5;
        xform[xformx+24] = a3 + a4;
        xform[xformx+32] = a3 - a4;
        xform[xformx]    = a0 + a7;
        xform[xformx+56] = a0 - a7;
        xformx--;
      } while (xformx >= 0); /* for xformx over columns */
    } /* else maxRow == 3 */
  } /* if (maxRow < 4) */

  /* maxRow >= 4*/
  else if (maxRow < 6) /* maxRow == 4 or 5 */
  {
    if (maxRow <= 4) /* maxRow == 4 (5 rows) */
    { do
      { /* Work: 5M+15A here vs. 5M+12A in old IDCT */
        a4 = xform[xformx+24]; a3 = xform[xformx+ 8];
        a5 = a3 - a4;
        a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
        a7 = a3 + a4;
        a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
        a3 = a3 - a4;
        a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
        a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
        a6 = a6 - a3;
        a4 = a3 - a4;
        a7 = a7 + a6;
        a6 = a6 + a5;
        a5 = a5 + a4;

        a2 = xform[xformx]; b1 = xform[xformx+32];
        a3 = a2 + b1;
        a2 = a2 - b1;
        a1 = xform[xformx+16];
        b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
        a1 = b1 + a1;
        a0 = a3 + a1;
        a3 = a3 - a1;
        a1 = a2 + b1;
        a2 = a2 - b1;

        xform[xformx+ 8] = a1 + a6;
        xform[xformx+48] = a1 - a6;
        xform[xformx+16] = a2 + a5;
        xform[xformx+40] = a2 - a5;
        xform[xformx+24] = a3 + a4;
        xform[xformx+32] = a3 - a4;
        xform[xformx]    = a0 + a7;
        xform[xformx+56] = a0 - a7;
        xformx--;
      } while (xformx >= 0); /* for xformx over columns */
    } /* maxRow == 4 */

    else if (maxRow == 5) /* (6 rows) */
    { do
      { /* Work: 5M+17A here vs. 6M+14A for old IDCT */
        a4 = xform[xformx+24]; a6 = xform[xformx+40];
        a7 = a4 + a6;
        a4 = a4 - a6;
        a3 = xform[xformx+8];
        a5 = a3 - a7;
        a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
        a7 = a3 + a7;

        a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
        a3 = a3 - a4;
        a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
        a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
        a6 = a6 - a3;
        a4 = a3 - a4;
        a7 = a7 + a6;
        a6 = a6 + a5;
        a5 = a5 + a4;

        a2 = xform[xformx]; b1 = xform[xformx+32];
        a3 = a2 + b1;
        a2 = a2 - b1;
        a1 = xform[xformx+16];
        b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
        a1 = b1 + a1;
        a0 = a3 + a1;
        a3 = a3 - a1;
        a1 = a2 + b1;
        a2 = a2 - b1;

        xform[xformx+ 8] = a1 + a6;
        xform[xformx+48] = a1 - a6;
        xform[xformx+16] = a2 + a5;
        xform[xformx+40] = a2 - a5;
        xform[xformx+24] = a3 + a4;
        xform[xformx+32] = a3 - a4;
        xform[xformx]    = a0 + a7;
        xform[xformx+56] = a0 - a7;
        xformx--;
      } while (xformx >= 0); /* for xformx over columns */
    } /* maxRow == 5 */
  } /* if (maxRow < 6) */

  else /* maxRow == 6 or 7 */
  { do
    { /* The transform requires (not including xform[xformx+i] = n at the end)
       Work = 5M+21A; this compares to 8M+18A for the old IDCT.
       */
      a4 = xform[xformx+24]; a6 = xform[xformx+40];
      a7 = a4 + a6;      /* S35 */
      a4 = a4 - a6;      /* D35 */
      a3 = xform[xformx+8]; a5 = xform[xformx+56];
      a6 = a3 + a5;      /* S17 */
      a3 = a3 - a5;      /* D17 */
      a5 = a6 - a7;
      a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);    /* C4*(S17-S35) */
      a7 = a6 + a7;      /* S17+S35 */
      a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6); /* (C2+C6)*D17 */
      a3 = a3 - a4;      /* D17-D35 */
      a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);    /* C6*(D17-D35) */
      a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4); /* (C2-C6)*D35 */
      a6 = a6 - a3;      /* (C2+C6)*D17 - C6*(D17-D35) = C2*D17 + C6*D35 */
      a4 = a3 - a4;      /* C6*(D17-D35) - (C2-C6)*D35 = C6*D17 - C2*D35 */
      a7 = a7 + a6;      /* S17+S35 + C2*D17 + C6*D35 */
      a6 = a6 + a5;      /* C2*D17 + C6*D35 + C4*(S17-S35) */
      a5 = a5 + a4;      /* C4*(S17-S35) + C6*D17 - C2*D35 */

      a2 = xform[xformx]; b1 = xform[xformx+32];
      a3 = a2 + b1;      /* S04 */
      a2 = a2 - b1;      /* D04 */
      a0 = xform[xformx+48]; b1 = xform[xformx+16];
      a1 = b1 + a0;      /* S26 */
      b1 = b1 - a0;      /* D26 */
      b1 = (int)((b1 * C4p16) >> 16); //C4mulA(b1, b1);    /* C4*D26 */
      a1 = b1 + a1;      /* S26 + C4*D26 */
      a0 = a3 + a1;      /* S04 + S26 + C4*D26 */
      a3 = a3 - a1;      /* S04 - S26 - C4*D26 */
      a1 = a2 + b1;      /* D04 + C4*D26 */
      a2 = a2 - b1;      /* D04 - C4*D26 */

      xform[xformx+ 8] = a1 + a6;
      xform[xformx+48] = a1 - a6;
      xform[xformx+16] = a2 + a5;
      xform[xformx+40] = a2 - a5;
      xform[xformx+24] = a3 + a4;
      xform[xformx+32] = a3 - a4;
      xform[xformx]    = a0 + a7;
      xform[xformx+56] = a0 - a7;
      xformx--;
    } while (xformx >= 0); /* for xformx over columns */
  } /* maxRow == 6 or 7 */


  /* TRANSFORM ROWS.  Accelerate the cases for max. columns == 1, 2, 3, 4,
   and 5 or 6. */
  xformx = 0;
  { int ix = lastPtr;
    short cstrip[] = jpframe.strip;
//#if BigCT
    { if (maxCol < 4)
      { if (maxCol < 2)
        { if (maxCol < 1)
          { do
            { a0 = xform[xformx] >> 15;
              cstrip[ix] = cstrip[ix+1] = cstrip[ix+2] = cstrip[ix+3] =
                cstrip[ix+4] = cstrip[ix+5] = cstrip[ix+6] = cstrip[ix+7] = (short)a0;
//#endif
              ix += padx;
            } while ((xformx += 8) < 64); /* for(xformx ...) */
          } /* if (maxCol < 1) */

          else /* maxCol == 1 (Two non-zero columns) */
          { do
            { /* Work: 3M+4A vs. 3M+2A */
              a7 = xform[xformx+1];
              a5 = (int)((a7 * C4p16) >> 16); //C4mulA(a7, a5);
              a6 = (int)((a7 * C2pC6p16) >> 16); //C2pC6mulA(a7, a6);
              a4 = (int)((a7 * C6p16) >> 16); //C6mulA(a7, a4);  /* C6*S1 */
              a6 = a6 - a4;
              a7 = a7 + a6;    /* ( 1+C2)*S1 */
              a6 = a6 + a5;    /* (C2+C4)*S1 */
              a5 = a5 + a4;    /* (C4+C6)*S1 */

              a0 = xform[xformx];
              cstrip[ix]   = (short)((a0 + a7) >> 15);
              cstrip[ix+1] = (short)((a0 + a6) >> 15);
              cstrip[ix+2] = (short)((a0 + a5) >> 15);
              cstrip[ix+3] = (short)((a0 + a4) >> 15);
              cstrip[ix+4] = (short)((a0 - a4) >> 15);
              cstrip[ix+5] = (short)((a0 - a5) >> 15);
              cstrip[ix+6] = (short)((a0 - a6) >> 15);
              cstrip[ix+7] = (short)((a0 - a7) >> 15);
              ix += padx;
            } while ((xformx += 8) < 64); /* for(xformx ...) */
          } /* else (maxCol == 1) */
        } /* (maxCol < 2) */

        else if (maxCol == 2)
        { do
          { a7 = xform[xformx+1];
            a5 = (int)((a7 * C4p16) >> 16); //C4mulA(a7, a5);
            a6 = (int)((a7 * C2pC6p16) >> 16); //C2pC6mulA(a7, a6);
            a4 = (int)((a7 * C6p16) >> 16); //C6mulA(a7, a4);    /* C6*S1 */
            a6 = a6 - a4;
            a7 = a7 + a6;      /* (1+C2)*S1 */
            a6 = a6 + a5;      /* (C2+C4)*S1 */
            a5 = a5 + a4;      /* (C4+C6)*S1 */

            a2 = xform[xformx]; a1 = xform[xformx+2];
            b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
            a1 = b1 + a1;
            a0 = a2 + a1;      /* S0 + (1+C4)*S2 */
            a3 = a2 - a1;      /* S0 - (1+C4)*S2 */
            a1 = a2 + b1;      /* S0 + C4*S2 */
            a2 = a2 - b1;      /* S0 - C4*S2 */

            cstrip[ix]   = (short)((a0 + a7) >> 15);
            cstrip[ix+1] = (short)((a1 + a6) >> 15);
            cstrip[ix+2] = (short)((a2 + a5) >> 15);
            cstrip[ix+3] = (short)((a3 + a4) >> 15);
            cstrip[ix+4] = (short)((a3 - a4) >> 15);
            cstrip[ix+5] = (short)((a2 - a5) >> 15);
            cstrip[ix+6] = (short)((a1 - a6) >> 15);
            cstrip[ix+7] = (short)((a0 - a7) >> 15);
            ix += padx;
          } while ((xformx += 8) < 64); /* for(xformx ...) */
        } /* maxCol == 2 */

        else /* (maxCol == 3) */
        { do
          { /* Work: 5M+13A here vs. 5M+10A for old IDCT. */
            a4 = xform[xformx+3]; a3 = xform[xformx+1];
            a5 = a3 - a4;
            a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
            a7 = a3 + a4;
            a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
            a3 = a3 - a4;
            a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
            a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
            a6 = a6 - a3;
            a4 = a3 - a4; a7 = a7 + a6; a6 = a6 + a5; a5 = a5 + a4;

            a2 = xform[xformx]; a1 = xform[xformx+2];
            b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
            a1 = b1 + a1;
            a0 = a2 + a1; a3 = a2 - a1; a1 = a2 + b1; a2 = a2 - b1;

            cstrip[ix]   = (short)((a0 + a7) >> 15);
            cstrip[ix+1] = (short)((a1 + a6) >> 15);
            cstrip[ix+2] = (short)((a2 + a5) >> 15);
            cstrip[ix+3] = (short)((a3 + a4) >> 15);
            cstrip[ix+4] = (short)((a3 - a4) >> 15);
            cstrip[ix+5] = (short)((a2 - a5) >> 15);
            cstrip[ix+6] = (short)((a1 - a6) >> 15);
            cstrip[ix+7] = (short)((a0 - a7) >> 15);
            ix += padx;
          } while ((xformx += 8) < 64); /* for(xformx ...) */
        } /* (maxCol == 3) */
      } /* if (maxCol < 4) */

      else if (maxCol < 6) /* (maxCol == 4 or 5) */
      { if (maxCol < 5) /* maxCol == 4 */
        { do
          { /* Work: 5M+15A here vs. 5M+12A in old IDCT */
            a4 = xform[xformx+3]; a3 = xform[xformx+1]; a5 = a3 - a4;
            a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
            a7 = a3 + a4;
            a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
            a3 = a3 - a4;
            a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
            a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
            a6 = a6 - a3;
            a4 = a3 - a4; a7 = a7 + a6; a6 = a6 + a5; a5 = a5 + a4;

            a2 = xform[xformx]; b1 = xform[xformx+4]; a3 = a2 + b1; a2 = a2 - b1;
            a1 = xform[xformx+2];
            b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
            a1 = b1 + a1;
            a0 = a3 + a1; a3 = a3 - a1; a1 = a2 + b1; a2 = a2 - b1;

            cstrip[ix]   = (short)((a0 + a7) >> 15);
            cstrip[ix+1] = (short)((a1 + a6) >> 15);
            cstrip[ix+2] = (short)((a2 + a5) >> 15);
            cstrip[ix+3] = (short)((a3 + a4) >> 15);
            cstrip[ix+4] = (short)((a3 - a4) >> 15);
            cstrip[ix+5] = (short)((a2 - a5) >> 15);
            cstrip[ix+6] = (short)((a1 - a6) >> 15);
            cstrip[ix+7] = (short)((a0 - a7) >> 15);
            ix += padx;
          } while ((xformx += 8) < 64); /* for(xformx ...) */
        } /* maxCol == 4 */
        else /* maxCol == 5 */
        { do
          { /* Work: 5M+17A here vs. 6M+14A for old IDCT */
            a4 = xform[xformx+3]; a6 = xform[xformx+5]; a7 = a4 + a6; a4 = a4 - a6;
            a3 = xform[xformx+1]; a5 = a3 - a7;
            a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);
            a7 = a3 + a7;
            a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6);
            a3 = a3 - a4;
            a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);
            a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4);
            a6 = a6 - a3;
            a4 = a3 - a4; a7 = a7 + a6; a6 = a6 + a5; a5 = a5 + a4;

            a2 = xform[xformx]; b1 = xform[xformx+4];
            a3 = a2 + b1;
            a2 = a2 - b1;
            a1 = xform[xformx+2];
            b1 = (int)((a1 * C4p16) >> 16); //C4mulA(a1, b1);
            a1 = b1 + a1;
            a0 = a3 + a1; a3 = a3 - a1; a1 = a2 + b1; a2 = a2 - b1;

            cstrip[ix]   = (short)((a0 + a7) >> 15);
            cstrip[ix+1] = (short)((a1 + a6) >> 15);
            cstrip[ix+2] = (short)((a2 + a5) >> 15);
            cstrip[ix+3] = (short)((a3 + a4) >> 15);
            cstrip[ix+4] = (short)((a3 - a4) >> 15);
            cstrip[ix+5] = (short)((a2 - a5) >> 15);
            cstrip[ix+6] = (short)((a1 - a6) >> 15);
            cstrip[ix+7] = (short)((a0 - a7) >> 15);
            ix += padx;
          } while ((xformx += 8) < 64); /* for(xformx ...) */
        } /* maxCol == 5 */
      } /* maxCol == 4 or 5 */

      else /* (maxCol >= 6) || (data_study != 0) */
      { do
        { /* The transform requires (not including xform[xformx+i] = n at the end)
           Work = 5M+21A; this compares to 8M+18A for the old IDCT.
           */
          a4 = xform[xformx+3]; a6 = xform[xformx+5];
          a7 = a4 + a6;      /* S35 */
          a4 = a4 - a6;      /* D35 */
          a3 = xform[xformx+1]; a5 = xform[xformx+7];
          a6 = a3 + a5;      /* S17 */
          a3 = a3 - a5;      /* D17 */
          a5 = a6 - a7;
          a5 = (int)((a5 * C4p16) >> 16); //C4mulA(a5, a5);    /* C4*(S17-S35) */
          a7 = a6 + a7;      /* S17+S35 */
          a6 = (int)((a3 * C2pC6p16) >> 16); //C2pC6mulA(a3, a6); /* (C2+C6)*D17 */
          a3 = a3 - a4;      /* D17-D35 */
          a3 = (int)((a3 * C6p16) >> 16); //C6mulA(a3, a3);    /* C6*(D17-D35) */
          a4 = (int)((a4 * C2mC6p16) >> 16); //C2mC6mulA(a4, a4); /* (C2-C6)*D35 */
          a6 = a6 - a3;      /* (C2+C6)*D17 - C6*(D17-D35) = C2*D17 + C6*D35 */
          a4 = a3 - a4;      /* C6*(D17-D35) - (C2-C6)*D35 = C6*D17 - C2*D35 */
          a7 = a7 + a6;      /* S17+S35 + C2*D17 + C6*D35 */
          a6 = a6 + a5;      /* C2*D17 + C6*D35 + C4*(S17-S35) */
          a5 = a5 + a4;      /* C4*(S17-S35) + C6*D17 - C2*D35 */

          a2 = xform[xformx]; b1 = xform[xformx+4];
          a3 = a2 + b1;      /* S04 */
          a2 = a2 - b1;      /* D04 */
          a0 = xform[xformx+6]; b1 = xform[xformx+2];
          a1 = b1 + a0;      /* S26 */
          b1 = b1 - a0;      /* D26 */
          b1 = (int)((b1 * C4p16) >> 16); //C4mulA(b1, b1);    /* C4 * D26 */
          a1 = b1 + a1;      /* C4*D26 + S26 */
          a0 = a3 + a1;      /* S04 + C4*D26 + S26 */
          a3 = a3 - a1;      /* S04 - C4*D26 - S26 */
          a1 = a2 + b1;      /* D04 + C4*D26 */
          a2 = a2 - b1;      /* D04 - C4*D26 */

          cstrip[ix]   = (short)((a0 + a7) >> 15);
          cstrip[ix+1] = (short)((a1 + a6) >> 15);
          cstrip[ix+2] = (short)((a2 + a5) >> 15);
          cstrip[ix+3] = (short)((a3 + a4) >> 15);
          cstrip[ix+4] = (short)((a3 - a4) >> 15);
          cstrip[ix+5] = (short)((a2 - a5) >> 15);
          cstrip[ix+6] = (short)((a1 - a6) >> 15);
          cstrip[ix+7] = (short)((a0 - a7) >> 15);
          ix += padx;
        } while ((xformx += 8) < 64); /* for(xformx ...) */
      } /* else (maxCol >= 6 || dg->data_study) */
    }
  }
} /* DCTRevTransform */


/* dctdsbg.c */

/*-----------------------------------------------------------------------
 Initialize the decoder color transform tables.  This code is used by the
 DCTEncode study option and by DCTDecode.  A single 32-bit table entry
 for Cb contains the value for the G calculation in the left-half and the
 value for the B calculation in the right-half; and the table for Cr
 contains the G and R terms in the halves of the word.  The right-half
 values are 15-bit signed quantities offset by 0x4000 to fit in 15 bits.

 The RGB to YCC and YCC to RGB transforms are as follows:

   Y =  .299R    + .587G     + .114B
  Cb = -.168736R - .331264G  + .500B      + 128
  Cr =  .500R    - .4186876G - .08131241B + 128

   R = Y                          + 1.4020     * (Cr - 128)
   G = Y - 0.3441363 * (Cb - 128) - 0.71413636 * (Cr - 128)
   B = Y + 1.772     * (Cb - 128)

 Due to the retention of 3 extra bits from the reverse transform, the table
 index is an 11-bit Y, Cb, or Cr value.  Each 16-bit table entry is computed
 as accurately as possible from an initial value that consists of a
 [-128..+128] to [0..255] range conversion and the following rounding
 constants:
   0x 8000 to round the value stored in the table.
   0x10000 in UG to round (UGtab[u] + VGtab[v]) >> 17.
   0x80000 in UG to round (Y + ((UGtab[u] + VGtab[v]) >> 17)) >> 3
           (negative for RGB because uvg is subtracted).
   0x40000 in UB and VR to round (Y + UBtab[u]) >> 3
           (negative for CMYK because ub and vr are subtracted.
 Extra precision, which is shifted or masked off before storing, prevents the
 sums from drifting over 2047 additions.  Values stored for UG and VG contain
 an 8-bit integer and 4-bit fractional part; values stored for UB and VR
 contain an 8-bit integer and 3-bit fractional part.

 DCTDRGBUV(uvg, ub, vr, ubs, vrs) \
  uvg = (ASInt32)(((ASInt32)ubs * (ASInt32)UG + (ASInt32)vrs * (ASInt32)VG + \
               (ASInt32)0x9000 - \
               (ASInt32)0x80 * ((ASInt32)UG + (ASInt32)VG)) >> 17); \
  ub =  (ASInt32)(((ASInt32)ubs * (ASInt32)UB + (ASInt32)0x9000 - \
               (ASInt32)0x80*(ASInt32)UB) >> 16); \
  vr =  (ASInt32)(((ASInt32)vrs * (ASInt32)VR + (ASInt32)0x9000 - \
               (ASInt32)0x80*(ASInt32)VR) >> 16);

 DCTDCMYKUV(uvg, ub, vr, ubs, vrs) \
  uvg = (ASInt32)(((ASInt32)ubs * (ASInt32)UG + (ASInt32)vrs * (ASInt32)VG + \
               (ASInt32)0xFF0000 + (ASInt32)0x9000 - \
               (ASInt32)0x80*((ASInt32)VG + (ASInt32)UG)) >> 17); \
  ub =  (ASInt32)(((ASInt32)ubs * (ASInt32)UB - (ASInt32)0xFF0000 + \
               (ASInt32)0x9000 - (ASInt32)0x80*(ASInt32)UB) >> 16); \
  vr =  (ASInt32)(((ASInt32)vrs * (ASInt32)VR - (ASInt32)0x3FC000 + \
               (ASInt32)0x9000 - (ASInt32)0x80*(ASInt32)VR) >> 16);
 ----------------------------------------------------------------------*/
private static final int UG = 0x0B033;  /* 0.3441363 *16*65536/8 = 45106.63311 */
private static final int UB = 0x1C5A2;  /* 1.7720    * 8*65536/8 = 116129.792  */
private static final int VR = 0x166E9;  /* 1.4020    * 8*65536/8 = 91881.472   */
private static final int VG = 0x16DA3;  /* 0.71413636*16*65536/8 = 93603.28098 */

/* 3 extra bits of YCC precision from IDCT into reverse CT */
private static final int CTBRRND = 0x40000;
private static final int CT128   = 0x400;
private static final int CTBRFF  = 0x7F80000;

private static final int CT128UB = (CT128 * UB);
private static final int CT128VR = (CT128 * VR);
private static final int CT128UG = (CT128 * UG);
private static final int CT128VG = (CT128 * VG);

private static final int CTLOOPCOUNT = (CT128 + CT128);
private static final int CTGRND = (CTBRRND + CTBRRND);
private static final int CTGFF = (CTBRFF + CTBRFF);

private void DCTDInitCTBig()
{ int j;
  int UGtab[] = dg_UGtab = new int[CTLOOPCOUNT];
  int VGtab[] = dg_VGtab = new int[CTLOOPCOUNT];
  int UBconst, VRconst, UGconst, VGconst;

  if (dg_colorconv == DCTTables.RGBtoYUV)
  { UBconst = 0x08000 + CTBRRND - CT128UB;
    VRconst = 0x08000 + CTBRRND - CT128VR;
    UGconst = 0x18000 - CTGRND  - CT128UG;
    VGconst = 0x08000 - CT128VG;
  } /* if (dg->colorconv == RGBtoYUV) */
  else /* dg->colorconv == CMYKtoYUVK */
  { UBconst = 0x8000 - CTBRRND -
              (CT128UB + CTBRFF);
    VRconst = 0x8000 - CTBRRND -
              (CT128VR + CTBRFF);
    UGconst = (CTGFF + CTGRND + 0x18000) - CT128UG;
    VGconst = 0x8000 - CT128VG;
  } /* dg->colorconv == CMYKtoYUVK */
  for (j = 0; j < CTLOOPCOUNT; j++)
  { UGtab[j] = (UGconst & 0xFFFF0000) +
      ((UBconst >> 16) & 0xFFFF);
    VGtab[j] = (VGconst & 0xFFFF0000) +
      ((VRconst >> 16) & 0xFFFF);

/*    DCTDebugOut(dg->debugOutArg, "UBerr=%e, VRerr=%e, UGerr=%e, VGerr=%\n",
       (float)(UBconst>>16) - (float)((double)1.772 * (double)(j - CT128) +
                                        (double)4.0),
       (float)(VRconst>>16) - (float)((double)1.402 * (double)(j - CT128) +
                                   (double)4.0),
       (float)(UGconst>>16) - (float)((double)0.6882726 * (double)(j - CT128) -
                                   (double)7.0),
       (float)(VGconst>>16) - (float)((double)1.42827272 * (double)(j - CT128))
       );
*/
    VGconst += VG; VRconst += VR;
    UGconst += UG; UBconst += UB;
  } /* for (j = 0; j < 2048; j++) */
} /* DCTDInitCT */


/*------------------------------------------------------------------------
 Vertical resampling is accomplished by advancing or not advancing the strip
 pointer according to stripmv, maxv, and vs for a color c.
 The code below implements the following table:
   maxv   1         2        3        4
 vs
 1        0        0,0     0,0,0   0,0,0,0
 2       --        0,1     0,0,1   0,0,1,1
 3       --        --      0,1,2   0,1,1,2
 4       --        --       --     0,1,2,3
 The table shows that line 0 is used for the first of the maxv scanlines;
 if vs > 1, then the line used increases up to vs-1.  The above table is
 convenient for the encoder because only when maxv==4 and vs==2 is it
 necessary to copy color strip lines.  JPEG does not dictate any particular
 method of subsampling and resampling.
 -------------------------------------------------------------------------*/
private int DCTPointStrip(int stripv, int stripmv, int maxv, int vs)
{ if (vs <= 1)
    return(stripv); /* Don't care if vs == 0. */
  else
  { /* linex==stripmv is correct for vs==maxv.  That leaves only
     (maxv==3, vs==2), (maxv==4, vs==2), and (maxv==4, vs==3) to worry about.
     */
    int linex = stripmv;
    if (maxv == 4)
    { if (vs == 3)
        linex = (stripmv + 1) >>> 1;
      else if (vs == 2)
        linex = (stripmv >>> 1);
    }
    else if (vs == (maxv - 1))
      linex = (stripmv >>> 1);
    return(linex + stripv * vs);
  }
} /* DCTPointStrip */


/*----------------------------------------------------------------------
 Resample, perform any color transform on the strip data, and output the
 interleaved raster to ncolorline.

 With no complications, the number of samples handled per ncolorline
 is a multiple of cn*maxh, so in all maxh==2 fast cases, there will be
 an even number of samples handled.

 The color strips have no extra storage, but have 8*N sample values, so
 loops unrolled to 2, 4, or 8 iterations are ok.

 ncolorline has enough extra storage to permit an overshoot of 3 extra
 pixels + 4 bytes; i.e., it is/should be ok to overshoot up to 3*cn + 4
 bytes; (but on 12/20/92, I had a problem which made me question this).

 MTK: We delayed the rounding from DCTRevTransform to this step.
 This is really the DCTPutNCLineBig case, merged into the old PutNCLine
 -----------------------------------------------------------------------*/
private void DCTPutNCLine(int stripv, int stripmv)
{ short rbfr[] = dg_bigcolorline;
  int rblast = dg_nsamps;
//  int rmask = 0x700;
//  int rnegovf = 0x400;
  int colorconv = dg_colorconv;
  int cn = dg_framecn;
  int ix, y_ptr, u_ptr, v_ptr, k_ptr;
  short y_strip[], u_strip[], v_strip[], k_strip[];
  int /*rc, rm,  rk, rg,*/ rb,ry, ub, vr, uvg;

  y_strip = u_strip = v_strip = k_strip = null;
  y_ptr   = u_ptr   = v_ptr   = k_ptr   = 0;
  ix = 0;
  { DCTjframe jpframe = dg_jpframe[0];
    int maxv = dg_maxv;
    y_strip = jpframe.strip;
    y_ptr = (DCTPointStrip(stripv, stripmv, maxv, jpframe.vs)) * jpframe.padx;

    if (cn > 1)
    {
      jpframe = dg_jpframe[1];
      u_strip = jpframe.strip;
      u_ptr = (DCTPointStrip(stripv, stripmv, maxv, jpframe.vs)) * jpframe.padx;
    }

    if (cn > 2)
    {
      jpframe = dg_jpframe[2];
      v_strip = jpframe.strip;
      v_ptr = (DCTPointStrip(stripv, stripmv, maxv, jpframe.vs)) * jpframe.padx;
    }

    if (cn > 3)
    {
      jpframe = dg_jpframe[3];
      k_strip = jpframe.strip;
      k_ptr = (DCTPointStrip(stripv, stripmv, maxv, jpframe.vs)) * jpframe.padx;
    }
  }

  /* 3 color vertical fast cases require hy == maxh, vy == maxv, and
   vu == vv == 1; 4 color fast cases also require colorconv != 0 and
   vk == maxv.  On color conversions, the new Y value of the YUV or YUVK
   sample is computed, and Ynew - Yold is added to Cb and Cr computed
   last time and already stored in ncolorline.  For non-color-conversion
   vertical fast cases, the new y value is stored into the scanline computed
   last time.  Vertical fast cases apply to vertical sampling, 21, 31, or 41
   for 2 colors, to 211, 311, or 411 for 3 colors or to 2112, 3113, or 4114
   with color conversion.
   u_ptr is used to point at the previous scan line's y_strip.
   NOTE: When the result on the previous scanline was an overflow or
   underflow fixup, then the calculation here will differ slightly from a
   recomputed result, more accurate if the previous overflow was due to too
   large a Y value, less accurate if the previous overflow was due to too
   large a magnitude of Cb or Cr; overall, this code is less accurate.
   *** Decode blending might require vFast to be turned off.
   */
  if ((stripmv > 0) && (dg_vFast != 0))
  { switch (cn)
    {
    case 2: /* vFast != 0, cn == 2 */
      do
      {
        rbfr[ix]   = y_strip[y_ptr];
        rbfr[ix+2] = y_strip[y_ptr+1];
        rbfr[ix+4] = y_strip[y_ptr+2];
        rbfr[ix+6] = y_strip[y_ptr+3];
        y_ptr += 4;
        ix += 8;
      } while (ix < rblast);
      return;

    case 3: /* vFast != 0, cn == 3 */
      if (colorconv != 0)
      { u_ptr = y_ptr - dg_jpframe[0].padx;
        do
        {
          rb = y_strip[y_ptr] - y_strip[u_ptr];
          rbfr[ix]   += rb; rbfr[ix+1] += rb; rbfr[ix+2] += rb;

          rb = y_strip[y_ptr+1] - y_strip[u_ptr+1];
          rbfr[ix+3] += rb; rbfr[ix+4] += rb; rbfr[ix+5] += rb;

          rb = y_strip[y_ptr+2] - y_strip[u_ptr+2];
          rbfr[ix+6] += rb; rbfr[ix+7] += rb; rbfr[ix+8] += rb;

          rb = y_strip[y_ptr+3] - y_strip[u_ptr+3];
          rbfr[ix+9] += rb; rbfr[ix+10] += rb; rbfr[ix+11] += rb;
          y_ptr += 4; u_ptr += 4;
          ix += 12;
        } while(ix < rblast);
        return;
      } /* if (colorconv != 0) */
      else /* colorconv == 0 */
      { do
        {
          rbfr[ix]   = y_strip[y_ptr];
          rbfr[ix+3] = y_strip[y_ptr+1];
          rbfr[ix+6] = y_strip[y_ptr+2];
          rbfr[ix+9] = y_strip[y_ptr+3];
          ix += 12; y_ptr += 4;
        } while (ix < rblast);
        return;
      } /* colorconv == 0 */

    case 4: /* vFast != 0, cn == 4 */
      u_ptr = y_ptr - dg_jpframe[0].padx;
      do
      {
        ry = y_strip[y_ptr] - y_strip[u_ptr];
        rbfr[ix]  += ry; rbfr[ix+1] += ry; rbfr[ix+2] += ry;
        rbfr[ix+3] = k_strip[k_ptr];

        ry = y_strip[y_ptr+1] - y_strip[u_ptr+1];
        rbfr[ix+4] += ry; rbfr[ix+5] += ry; rbfr[ix+6] += ry;
        rbfr[ix+7] = k_strip[k_ptr+1];

        y_ptr += 2; u_ptr += 2; k_ptr += 2;
        ix += 8;
      } while (ix < rblast);
      return;
    } /* switch (cn) */
  } /* if ((blend == 0) && (stripmv > 0) && (vFast != 0)) */

  if (dg_hFast != 0)
  { /* The 111CT, 211CT, 1111CT, and 2112CT cases must be fast because they
     are used by PhotoShop and Acrobat.  The maxh==1, no color transform
     cases are not here because they are just as fast mixed in with the slow
     cases later.  The only case here not used by applications is FAST211,
     made fast because CIELAB or its equivalent might become important in
     the future.
     */
    int UGtab[] = dg_UGtab;
    int VGtab[] = dg_VGtab;

    switch (dg_hFast)
    {
    case DCTTables.FAST211:
      do
      { rbfr[ix]   = y_strip[y_ptr];
        rbfr[ix+1] = rbfr[ix+4] = u_strip[u_ptr];
        rbfr[ix+2] = rbfr[ix+5] = v_strip[v_ptr];
        rbfr[ix+3] = y_strip[y_ptr+1];
        rbfr[ix+6] = y_strip[y_ptr+2];
        rbfr[ix+7] = rbfr[ix+10] = u_strip[u_ptr+1];
        rbfr[ix+8] = rbfr[ix+11] = v_strip[v_ptr+1];
        rbfr[ix+9] = y_strip[y_ptr+3];

        y_ptr += 4; u_ptr += 2; v_ptr += 2;
        ix += 12;
      } while (ix < rblast);
      return;

    case DCTTables.FAST111CT:
      do
      {
        rb = y_strip[y_ptr];
        ub = u_strip[u_ptr];
        vr = v_strip[v_ptr];

        if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
        if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
        ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

        rbfr[ix]   = (short)(rb + vr);
        rbfr[ix+1] = (short)(rb - uvg);
        rbfr[ix+2] = (short)(rb + ub);

        ++y_ptr; ++u_ptr; ++v_ptr;
        ix += 3;
      } while (ix < rblast);
      return;

    /* Blended decoding.  I investigated this somewhat in jpeg97n but haven't
     found any type of interpolation significantly better than simple
     replication.  The investigation is in my private wishlist.doc and
     jpeg97n/dctdstr.c4.test.
     */
    case DCTTables.FAST211CT:
        do
        {
          ub = u_strip[u_ptr];
          vr = v_strip[v_ptr];

          if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
          if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
          ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

          rb = y_strip[y_ptr];
          rbfr[ix]   = (short)(rb + vr);
          rbfr[ix+1] = (short)(rb - uvg);
          rbfr[ix+2] = (short)(rb + ub);

          rb = y_strip[y_ptr+1];
          rbfr[ix+3] = (short)(rb + vr);
          rbfr[ix+4] = (short)(rb - uvg);
          rbfr[ix+5] = (short)(rb + ub);

          y_ptr += 2; ++u_ptr; ++v_ptr;
          ix += 6;
        } while (ix < rblast);
      return;


    case DCTTables.FAST1111CT:
       do
      {
        ry = -y_strip[y_ptr];
        ub =  u_strip[u_ptr];
        vr =  v_strip[v_ptr];

        if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
        if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
        ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

         rbfr[ix]   = (short)(ry - vr);
         rbfr[ix+1] = (short)(ry + uvg);
         rbfr[ix+2] = (short)(ry - ub);
         rbfr[ix+3] = k_strip[k_ptr];

        ++y_ptr; ++u_ptr; ++v_ptr; ++k_ptr;
        ix += 4;
      } while (ix < rblast);
      return;

    case DCTTables.FAST2112CT:
      do
      {
        ub = u_strip[u_ptr];
        vr = v_strip[v_ptr];

        if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
        if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
        ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

        ry = -y_strip[y_ptr];
        rbfr[ix]   = (short)(ry - vr);
        rbfr[ix+1] = (short)(ry + uvg);
        rbfr[ix+2] = (short)(ry - ub);
        rbfr[ix+3] = k_strip[k_ptr];

        ry = -y_strip[y_ptr+1];
        rbfr[ix+4] = (short)(ry - vr);
        rbfr[ix+5] = (short)(ry + uvg);
        rbfr[ix+6] = (short)(ry - ub);
        rbfr[ix+7] = k_strip[k_ptr+1];

        y_ptr += 2; ++u_ptr; ++v_ptr; k_ptr += 2;
        ix += 8;
      } while (ix < rblast);
      return;
    } /* switch (dg->hFast) */
  } /* if (dg->hFast != 0) */


  { /* Slow cases with separate color transform after the resampling.
     These cases are slower because the separate color transform involves
     storing and refetching sample values from the color strips.

     Moving these definitions from the beginning of the procedure to here
     and some others down to the particular cases reduced optimized code size
     by several hundred bytes on the Sparc.  I think that the optimizer keeps
     everything in registers up to some size, so localization helps on the
     blocks requiring fewer registers.
     */
    int hy, hu, hv, hk;

    hy = hu = hv = hk = 0;
    hy = dg_jpframe[0].hs;
    if (cn > 1) hu = dg_jpframe[1].hs;
    if (cn > 2) hv = dg_jpframe[2].hs;
    if (cn > 3) hk = dg_jpframe[3].hs;

    switch ((dg_maxh << 2) + cn - 5)
    {
    case 12: /* maxh == 4, cn == 1 */
    case  8: /* maxh == 3, cn == 1 */
    case  4: /* maxh == 2, cn == 1 */
    case  0: /* maxh == 1, cn == 1 */
      /* nsamps might be larger than lx if maxh > 1, probably an illegal
       case for an interleaved image.  But this implementation probably
       works anyway.
      do
      {
        rbfr[0] = y_ptr[0]; rbfr[1] = y_ptr[1];
        rbfr[2] = y_ptr[2]; rbfr[3] = y_ptr[3];
        rbfr += 4; y_ptr += 4;
      } while (rbfr < rblast);
       */
      System.arraycopy(y_strip, y_ptr, rbfr, ix, dg_nsamps);
      return;

    case 3:        /* maxh == 1, cn == 4 */
      /* The casts from ASUns8P to ASUns32P for rbfr and rblast are safe here
       because both are aligned on the proper boundaries for cn == 4.
       The DCTPutM1C4Line procedure improved performance slightly, but it
       seemed risky and did not gain very much, so I am leaving it out for now.
       */
      /* ASDebugAssert((ASInt32)((ASUns32P)rblast) == (ASInt32)rblast);
         DCTPutM1C4Line((ASUns32P)rbfr, (ASUns32P)rblast,
           (ASUns32P)y_ptr, (ASUns32P)u_ptr, (ASUns32P)v_ptr, (ASUns32P)k_ptr);
       */
      do
      {
        rbfr[ix]    = y_strip[y_ptr];
        rbfr[ix+ 4] = y_strip[y_ptr+1];
        rbfr[ix+ 8] = y_strip[y_ptr+2];
        rbfr[ix+12] = y_strip[y_ptr+3];
        y_ptr += 4;

        rbfr[ix+ 1] = u_strip[u_ptr];
        rbfr[ix+ 5] = u_strip[u_ptr+1];
        rbfr[ix+ 9] = u_strip[u_ptr+2];
        rbfr[ix+13] = u_strip[u_ptr+3];
        u_ptr += 4;

        rbfr[ix+ 2] = v_strip[v_ptr];
        rbfr[ix+ 6] = v_strip[v_ptr+1];
        rbfr[ix+10] = v_strip[v_ptr+2];
        rbfr[ix+14] = v_strip[v_ptr+3];
        v_ptr += 4;

        rbfr[ix+ 3] = k_strip[k_ptr];
        rbfr[ix+ 7] = k_strip[k_ptr+1];
        rbfr[ix+11] = k_strip[k_ptr+2];
        rbfr[ix+15] = k_strip[k_ptr+3];
        k_ptr += 4;
        ix += 16;
      } while (ix < rblast);
      return;

  case 2:        /* maxh == 1, cn == 3 */
      /* rblast is not ASUns32-aligned here, so round to larger value. */
      /* ASUns32P rblastw = (ASUns32P)((ASInt32)(rblast + 3) & ~3);
         DCTPutM1C3Line((ASUns32P)rbfr, (ASUns32P)rblastw,
           y_ptr, u_ptr, v_ptr);
         DCTPutM1C3Line(rbfr, rblast, y_ptr, u_ptr, v_ptr);
       */
      do
      {
        rbfr[ix]    = y_strip[y_ptr];
        rbfr[ix+ 3] = y_strip[y_ptr+1];
        rbfr[ix+ 6] = y_strip[y_ptr+2];
        rbfr[ix+ 9] = y_strip[y_ptr+3];
        y_ptr += 4;

        rbfr[ix+ 1] = u_strip[u_ptr];
        rbfr[ix+ 4] = u_strip[u_ptr+1];
        rbfr[ix+ 7] = u_strip[u_ptr+2];
        rbfr[ix+10] = u_strip[u_ptr+3];
        u_ptr += 4;

        rbfr[ix+ 2] = v_strip[v_ptr];
        rbfr[ix+ 5] = v_strip[v_ptr+1];
        rbfr[ix+ 8] = v_strip[v_ptr+2];
        rbfr[ix+11] = v_strip[v_ptr+3];
        v_ptr += 4;
        ix += 12;
      } while (ix < rblast);
      return;

    case 1:        /* maxh == 1, cn == 2 */
      do
      {
        rbfr[ix]   = y_strip[y_ptr];
        rbfr[ix+2] = y_strip[y_ptr+1];
        rbfr[ix+4] = y_strip[y_ptr+2];
        rbfr[ix+6] = y_strip[y_ptr+3];
        y_ptr += 4;

        rbfr[ix+1] = u_strip[u_ptr];
        rbfr[ix+3] = u_strip[u_ptr+1];
        rbfr[ix+5] = u_strip[u_ptr+2];
        rbfr[ix+7] = u_strip[u_ptr+3];
        u_ptr += 4;
        ix += 8;
      } while (ix < rblast);
      return;

    case 7:        /* maxh == 2, cn == 4 */
      do
      {
        rbfr[ix]   = y_strip[y_ptr]; y_ptr += hy;
        rbfr[ix+1] = u_strip[u_ptr]; u_ptr += hu;
        rbfr[ix+2] = v_strip[v_ptr]; v_ptr += hv;
        rbfr[ix+3] = k_strip[k_ptr]; k_ptr += hk;
        rbfr[ix+4] = y_strip[y_ptr-1];
        rbfr[ix+5] = u_strip[u_ptr-1];
        rbfr[ix+6] = v_strip[v_ptr-1];
        rbfr[ix+7] = k_strip[k_ptr-1];
        ix += 8;
      } while (ix < rblast);
      break;

    case 6:        /* maxh == 2, cn == 3 */
      do
      {
        rbfr[ix]   = y_strip[y_ptr]; y_ptr += hy;
        rbfr[ix+1] = u_strip[u_ptr]; u_ptr += hu;
        rbfr[ix+2] = v_strip[v_ptr]; v_ptr += hv;
        rbfr[ix+3] = y_strip[y_ptr-1];
        rbfr[ix+4] = u_strip[u_ptr-1];
        rbfr[ix+5] = v_strip[v_ptr-1];
        ix += 6;
      } while (ix < rblast);
      break;

    case 5:        /* maxh == 2, cn == 2 */
      do
      {
        rbfr[ix]   = y_strip[y_ptr]; y_ptr += hy;
        rbfr[ix+1] = u_strip[u_ptr]; u_ptr += hu;
        rbfr[ix+2] = y_strip[y_ptr-1];
        rbfr[ix+3] = u_strip[u_ptr-1];
        ix += 4;
      } while (ix < rblast);
      return;

    case 15: /* maxh == 4, cn == 4 */
    case 14: /* maxh == 4, cn == 3 */
    case 13: /* maxh == 4, cn == 2 */
      do
      { short y0, y1, y2, y3;
        short u0, u1, u2, u3;
        short v0, v1, v2, v3;
        short k0, k1, k2, k3;

        y0 = rbfr[ix++] = y_strip[y_ptr++];
        switch (hy)
        {
        case 4:
          y1 = y_strip[y_ptr++]; y2 = y_strip[y_ptr++]; y3 = y_strip[y_ptr++]; break;
        case 3:
          y2 = y1 = y_strip[y_ptr++]; y3 = y_strip[y_ptr++]; break;
        case 2:
          y1 = y0; y3 = y2 = y_strip[y_ptr++]; break;
        default: /* 1 */
          y3 = y2 = y1 = y0; break;
        }

        u0 = rbfr[ix++] = u_strip[u_ptr++];
        switch (hu)
        {
        case 4:
          u1 = u_strip[u_ptr++]; u2 = u_strip[u_ptr++]; u3 = u_strip[u_ptr++]; break;
        case 3:
          u2 = u1 = u_strip[u_ptr++]; u3 = u_strip[u_ptr++]; break;
        case 2:
          u1 = u0; u3 = u2 = u_strip[u_ptr++]; break;
        default: /* 1 */
          u3 = u2 = u1 = u0; break;
        }

        if (cn >= 3)
        { v0 = rbfr[ix++] = v_strip[v_ptr++];
          switch (hv)
          {
          case 4:
            v1 = v_strip[v_ptr++]; v2 = v_strip[v_ptr++]; v3 = v_strip[v_ptr++]; break;
          case 3:
            v2 = v1 = v_strip[v_ptr++]; v3 = v_strip[v_ptr++]; break;
          case 2:
            v1 = v0; v3 = v2 = v_strip[v_ptr++]; break;
          default: /* 1 */
            v3 = v2 = v1 = v0; break;
          }

          if (cn >= 4)
          { k0 = rbfr[ix++] = k_strip[k_ptr++];
            switch (hk)
            {
            case 4:
              k1 = k_strip[k_ptr++]; k2 = k_strip[k_ptr++]; k3 = k_strip[k_ptr++]; break;
            case 3:
              k2 = k1 = k_strip[k_ptr++]; k3 = k_strip[k_ptr++]; break;
            case 2:
              k1 = k0; k3 = k2 = k_strip[k_ptr++]; break;
            default: /* 1 */
              k3 = k2 = k1 = k0; break;
            }
            rbfr[ix]    = y1; rbfr[ix+ 1] = u1; rbfr[ix+ 2] = v1; rbfr[ix+ 3] = k1;
            rbfr[ix+ 4] = y2; rbfr[ix+ 5] = u2; rbfr[ix+ 6] = v2; rbfr[ix+ 7] = k2;
            rbfr[ix+ 8] = y3; rbfr[ix+ 9] = u3; rbfr[ix+10] = v3; rbfr[ix+11] = k3;
            ix += 12;
          }
          else /* cn == 3 */
          {
            rbfr[ix]   = y1; rbfr[ix+1] = u1; rbfr[ix+2] = v1;
            rbfr[ix+3] = y2; rbfr[ix+4] = u2; rbfr[ix+5] = v2;
            rbfr[ix+6] = y3; rbfr[ix+7] = u3; rbfr[ix+8] = v3;
            ix += 9;
          }
        }
        else /* cn == 2 */
        {
          rbfr[ix]   = y1; rbfr[ix+1] = u1;
          rbfr[ix+2] = y2; rbfr[ix+3] = u2;
          rbfr[ix+4] = y3; rbfr[ix+5] = u3;
          ix += 6;
        } /* cn == 2 */
      } while (ix < rblast);
      break;

    case 11: /* maxh == 3, cn == 4 */
    case 10: /* maxh == 3, cn == 3 */
    case  9: /* maxh == 3, cn == 2 */
      do
      { short y0, u0, v0, k0;
        short y1, u1, v1, k1;
        short y2, u2, v2, k2;

        y0 = rbfr[ix++] = y_strip[y_ptr++];
        switch (hy)
        {
        case 3:
          y1 = y_strip[y_ptr++]; y2 = y_strip[y_ptr++]; break;
        case 1:
          y2 = y1 = y0; break;
        default: /* case 2: */
          y2 = y_strip[y_ptr++];
          y1 = (short)((y0 + y2 + 1) >> 1); break;
        }

        u0 = rbfr[ix++] = u_strip[u_ptr++];
        switch (hu)
        {
        case 3:
          u1 = u_strip[u_ptr++]; u2 = u_strip[u_ptr++]; break;
        case 1:
          u2 = u1 = u0; break;
        default: /* case 2: */
          u2 = u_strip[u_ptr++];
          u1 = (short)((u0 + u2 + 1) >> 1); break;
        }

        if (cn >= 3)
        { v0 = rbfr[ix++] = v_strip[v_ptr++];
          switch (hv)
          {
          case 3:
            v1 = v_strip[v_ptr++]; v2 = v_strip[v_ptr++]; break;
          case 1:
            v2 = v1 = v0; break;
          default: /* case 2: */
            v2 = v_strip[v_ptr++];
            v1 = (short)((v0 + v2 + 1) >> 1); break;
          }

          if (cn >= 4)
          { k0 = rbfr[ix++] = k_strip[k_ptr++];
            switch (hk)
            {
            case 3:
              k1 = k_strip[k_ptr++]; k2 = k_strip[k_ptr++]; break;
            case 1:
              k2 = k1 = k0; break;
            default: /* case 2: */
              k2 = k_strip[k_ptr++];
              k1 = (short)((k0 + k2 + 1) >> 1); break;
            }
            rbfr[ix]   = y1; rbfr[ix+1] = u1; rbfr[ix+2] = v1; rbfr[ix+3] = k1;
            rbfr[ix+4] = y2; rbfr[ix+5] = u2; rbfr[ix+6] = v2; rbfr[ix+7] = k2;
            ix += 8;
          }
          else /* cn == 3 */
          {
            rbfr[ix]   = y1; rbfr[ix+1] = u1; rbfr[ix+2] = v1;
            rbfr[ix+3] = y2; rbfr[ix+4] = u2; rbfr[ix+5] = v2;
            ix += 6;
          }
        }
        else /* cn == 2 */
        {
          rbfr[ix]   = y1; rbfr[ix+1] = u1;
          rbfr[ix+2] = y2; rbfr[ix+3] = u2;
          ix += 4;
        } /* cn == 2 */
      } while (ix < rblast);
      break;
    } /* switch ((dg->maxh << 2) + cn - 5 */
  } /* slow cases bracket */

/*** Any blending of resampled lines would go here. ***/

  /* Ideal RGB to YCC and YCC to RGB matrices are as follows:

     Y =  .299R    + .587G    + .114B         R = Y + 1.4020V
     U = -.168736R - .331264G + .500B         G = Y - .3441363U - .71413636V
     V =  .500R    - .4186876G - .08131241B   B = Y + 1.772U

   As multiples of 16384, the RGB to YCC matrix is:
     4898.816   9617.408   1867.776     0x1323   0x2591   0x074C
    -2764.571  -5427.429   8192.000    -0x0ACD  -0x1533   0x2000
     8192.000  -6859.778  -1332.223     0x2000  -0x1ACC  -0x0534

   and the YCC to RGB matrix is:
     16384.000      0.000  22970.368    0x4000   0x0000   0x59BA
     16384.000  -5638.329 -11700.410    0x4000   0x1606  -0x2DB4
     16384.000  29032.448      0.000    0x4000   0x7168   0x0000

   Going from RGB to YCC, there was no possibility of an overflow or
   underflow occurring during the conversion, but going YCC to RGB, an
   overflow or underflow could occur.  If that happens, the overflowing
   term must be lowered to 0xFF or the underflowing term raised up to 0.
   */
  if (colorconv != 0) /* Transform color coords? */
  { int UGtab[] = dg_UGtab;
    int VGtab[] = dg_VGtab;
    ix = 0;

    switch (colorconv)
    {
    case DCTTables.RGBtoYUV:
      do
      {
        rb = rbfr[ix];
        ub = rbfr[ix+1];
        vr = rbfr[ix+2];

        if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
        if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
        ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

        rbfr[ix]   = (short)(rb + vr);
        rbfr[ix+1] = (short)(rb - uvg);
        rbfr[ix+2] = (short)(rb + ub);
        ix += 3;
      } while (ix < rblast);
      break;

    case DCTTables.CMYKtoYUVK:
      do
      {
        ry = -rbfr[ix];
        ub =  rbfr[ix+1];
        vr =  rbfr[ix+2];

        if ((ub & 0xfffff800) != 0) ub = (~ub) >>> 21;
        if ((vr & 0xfffff800) != 0) vr = (~vr) >>> 21;
        ub = UGtab[ub]; vr = VGtab[vr]; uvg = (ub + vr) >> 17;

         rbfr[ix]   = (short)(ry - vr);
         rbfr[ix+1] = (short)(ry + uvg);
         rbfr[ix+2] = (short)(ry - ub);

        ix += 4;
      } while (ix < rblast);
      break;

    } /* switch (colorconv) */
  } /* if (colorconv != 0) */
} /* DCTPutNCLine */

}
