#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "CImg.h"
#include "fileString.h" //read video from string
using namespace cimg_library;

// define maximum picture size
#define MAX_WIDTH  1024
#define MAX_HEIGHT 1024
// maximum size of an encoded frame. should be multiple of 4 bytes
#define MAX_ENCODED_FRAME  10000

// define fixed MCU size (8)
#define MCU_sx     8
#define MCU_sy     8
#define MAX_YMCU   MAX_WIDTH/MCU_sy
#define MAX_XMCU   MAX_HEIGHT/MCU_sx
#define MAX_BLOCK  MAX_YMCU*MAX_XMCU

/*----------------------------------*/
/* JPEG format parsing markers here */
/*----------------------------------*/
#define SOI_MK     0xFFD8   /* start of image       */
#define APP_MK     0xFFE0   /* custom, up to FFEF   */
#define COM_MK     0xFFFE   /* commment segment     */
#define SOF_MK     0xFFC0   /* start of frame       */
#define SOS_MK     0xFFDA   /* start of scan        */
#define DHT_MK     0xFFC4   /* Huffman table        */
#define DQT_MK     0xFFDB   /* Quant. table         */
#define DRI_MK     0xFFDD   /* restart interval     */
#define EOI_MK     0xFFD9   /* end of image         */
#define MK_MSK     0xFFF0
#define RST_MK(x)  ( (0xFFF8&(x)) == 0xFFD0 ) /* is x a restart interval ? */

/*-------------------------------------------------------- */
/* all kinds of macros here                                */
/*-------------------------------------------------------- */
#define first_quad(c)   ((c)>>4)        /* first 4 bits in file order */
#define second_quad(c)  ((c)&15)
#define HUFF_ID(class, id)  (2*(class)+(id))
#define DC_CLASS        0
#define AC_CLASS        1
#define CASS            1
#define MAX_SIZE(Mclass) ((Mclass)?162:14) /* Memory size of HTables */

/*-------------------------------------------------------*/
/* JPEG data types here         */
/*-------------------------------------------------------*/
/* component descriptor structure */
typedef struct {
  unsigned char CID;  /* component ID */
  char    QT;         /* QTable index, 2bits  */
  char    DC_HT;      /* DC table index, 1bit */
  char    AC_HT;      /* AC table index, 1bit */
  int   PRED;         /* DC predictor value */
} cd_t;

/*-----------------------------------------*/
/* command for VLD                         */
/*-----------------------------------------*/
#define DHT_CMD 0xFA  /* command Define Huffman Table for VLD */
#define SOS_CMD 0xFB  /* command Start Of Scan for VLD        */
#define RPS_CMD 0xFC  /* command Read Picture Size for VLD    */
#define ABR_CMD 0xFD  /* command abort picture                */


#define SILENT    0
#define VERBOSE   1
#define INFO      2
#define VERBOSITY SILENT

void dbgprintf(int urgency, const char* format, ...);

static const int ZIGZAG_COEFFS[64] = {
     0,  1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
    12, 19, 26, 33, 40, 48, 41, 34,
    27, 20, 13,  6,  7, 14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36,
    29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46,
    53, 60, 61, 54, 47, 55, 62, 63
};

int* unZigZag(int* dest, int* src);

int* unquantify(int* block, unsigned char* qtable);

//minimum and maximum values a `signed int' can hold.
#define INT_MAX2  2147483647
#define INT_MIN2  (-INT_MAX2 - 1)

//useful constants
//ck = cos(k*pi/16) = s8-k = sin((8-k)*pi/16) times 1 << C_BITS and rounded
#define c0_1   16384
#define c0_s2  23170
#define c1_1   16069
#define c1_s2  22725
#define c2_1   15137
#define c2_s2  21407
#define c3_1   13623
#define c3_s2  19266
#define c4_1   11585
#define c4_s2  16384
#define c5_1   9102
#define c5_s2  12873
#define c6_1   6270
#define c6_s2  8867
#define c7_1   3196
#define c7_s2  4520
#define c8_1   0
#define c8_s2  0
#define sqrt2  c0_s2

// the number of bits of accuracy in all (signed) integer operations:
// may lie between 1 and 32 (bounds inclusive).
#define ARITH_BITS      16

// the minimum signed integer value that fits in ARITH_BITS:
#define ARITH_MIN       (-1 << (ARITH_BITS-1))
// the maximum signed integer value that fits in ARITH_BITS:
#define ARITH_MAX       (~ARITH_MIN)

// the number of bits coefficients are scaled up before 2-D IDCT:
#define S_BITS           3
// the number of bits in the fractional part of a fixed point constant:
#define C_BITS          14

#define SCALE(x, n)     ((x) << (n))

/* This version is vital in passing overall mean error test. */
inline int DESCALE(int x, int n);
inline int ADD(int x, int y);
inline int SUB(int x, int y);
inline int CMUL(int c, int x);

/* Rotate (x,y) over angle k*pi/16 (counter-clockwise) and scale with f. */
inline void rot(int f, int k, int x, int y, int *rx, int *ry);

/* Butterfly: but(a,b,x,y) = rot(sqrt(2),4,a,b,x,y) */
#define but(a,b,x,y) { x = SUB(a,b); y = ADD(a,b); }

inline void idct_1d(int *Y);
void IDCT(int input[8][8], unsigned char output[8][8]);

int intceil(int N, int D);
int intfloor(int N, int D);
int reformat(unsigned long s, int good);

int load_huff_tables();
int unpack_block2(unsigned char block[8][8], cd_t *comp);
void display_frame(unsigned x_size, unsigned y_size, unsigned char *buffer);
unsigned int get_size2();
void skip_segment2();
//int load_quant_tables2(const void *, const void *, DOLProcess *);
unsigned int get_next_MK2();
int get_bits2(int number, unsigned long *res);
int get_symbol2(int select, unsigned char *res);
int get_one_bit2(unsigned char *res);

CImgDisplay *display;

#define my_read(ptr, len) \
    do {                           \
        memcpy(ptr, processframe_state.fptr, len);      \
        processframe_state.fptr += len;                 \
        processframe_state.fLen -= len;                 \
    } while (0);                               \

typedef struct _vld_state {
    int mx_size;
    int my_size; //picture size in units of MCUs
    unsigned x_size;
    unsigned y_size; //picture size in pixels
    unsigned char DC_Table0[MAX_SIZE(DC_CLASS)];
    unsigned char DC_Table1[MAX_SIZE(DC_CLASS)];
    unsigned char AC_Table0[MAX_SIZE(AC_CLASS)];
    unsigned char AC_Table1[MAX_SIZE(AC_CLASS)];
    unsigned char *HTable[4];
    int MinCode[4][16];
    int MaxCode[4][16];
    int ValPtr[4][16];
    unsigned char window;
    unsigned char bit_count; //available bits in the window
    unsigned char QTable[64];
} VLD_State;

//local variables
typedef struct _local_states {
    char frame[MAX_ENCODED_FRAME]; //frame
    int fLen;                      //frame size
    char *fptr;                    //current read ptr
    VLD_State vld;                 //VLD state
    int num_iter;                  //number of iteration
    unsigned char ColorBuffer[MCU_sx * MCU_sy];
    unsigned char LineBuffer[MAX_WIDTH * MCU_sx];
    int dataBuff[(MAX_WIDTH * MCU_sy) / 4];
    unsigned char displayBuff[MAX_WIDTH * MAX_HEIGHT];
    CImgDisplay *main_disp;
} Processframe_State;


Processframe_State processframe_state;

void process_frame_init() {
    processframe_state.vld.HTable[0] = processframe_state.vld.DC_Table0;
    processframe_state.vld.HTable[1] = processframe_state.vld.DC_Table1;
    processframe_state.vld.HTable[2] = processframe_state.vld.AC_Table0;
    processframe_state.vld.HTable[3] = processframe_state.vld.AC_Table1;
    processframe_state.num_iter = 0;
    processframe_state.main_disp = new CImgDisplay(320, 240, "DOL MJPEG");
    display = processframe_state.main_disp;
}

int process_frame(unsigned fLen, char *fptr) {
    char * ptr;
    int done;
    unsigned int aux, mark;
    unsigned char buf, waste;
    int in_frame; //frame started? current component?
    int found_MK; //if marker found while read data found_MK = 1
    int x_size, y_size;  //picture size in pixel units
    int nblock; //picture size in number of MCUs

    processframe_state.fLen = fLen;
    processframe_state.fptr = fptr;

    //now process segments as they appear first find the SOI marker
    do {
        aux = get_next_MK2();
    } while (aux != SOI_MK);

    dbgprintf(VERBOSE, "PROCESSFRAME\tFound the SOI marker !\n");
    dbgprintf(VERBOSE, "PROCESSFRAME\tStart picture\n");

    found_MK = 0; //marker already found
    done = 1;
    while (done) {
        if (found_MK == 0)  {
            mark = get_next_MK2();
        }
        switch (mark) {
        case SOF_MK:

            dbgprintf(VERBOSE, "PROCESSFRAME\tFound the SOF marker\n");

            in_frame = 1;
            found_MK = 0;
            //header size, don't care
            get_size2();

            //precision, 8bit, don't care
            my_read(&waste, 1*(sizeof(waste)));

            //load basic image parameters
            y_size = get_size2();
            x_size = get_size2();

            processframe_state.vld.x_size = x_size;
            processframe_state.vld.y_size = y_size;
            processframe_state.vld.mx_size = intceil(x_size, MCU_sx);
            processframe_state.vld.my_size = intceil(y_size, MCU_sy);

            dbgprintf(VERBOSE, "\tVLD\tpicture size: y_size=%d, x_size=%d\n",
                    y_size,x_size);
            dbgprintf(VERBOSE, "\tVLD\tpicture size: my_size=%d, mx_size=%d\n",
                   processframe_state.vld.my_size, processframe_state.vld.mx_size);

            //total number of MCU in picture
            nblock = (y_size/MCU_sy)*(x_size/MCU_sx);

            dbgprintf(VERBOSE, "\tPROCESSFRAME Number of blocks in picture is %d \n", nblock);
            dbgprintf(VERBOSE, "\tPROCESSFRAME Picture size is %d by %d\n", x_size, y_size);
            dbgprintf(VERBOSE, "\tPROCESSFRAME Monochrome JPEG picture!\n");

            //number of components,don't care
            my_read(&waste, 1*(sizeof(waste)));

            //component order
            my_read(&buf, 1*(sizeof(buf)));

            //sampling factor, don't care
            my_read(&buf, 1*(sizeof(buf)));

            //quantization table index,don't care for jfif
            my_read(&buf, 1*(sizeof(buf)));
            break;
        case DHT_MK:
            dbgprintf(VERBOSE, "PROCESSFRAME\tDefining Huffman Tables\n");
            //VLD: loading Huffman table
            load_huff_tables();

            //remove the rest
            while ((ptr = (char *)memchr(processframe_state.fptr,0xFF,processframe_state.fLen))
                   != NULL) {
                int offset;
                unsigned char pot_mark = *(ptr+1);
                if ((pot_mark != 0) && (pot_mark != 0xD8)) {
                    offset = ptr - processframe_state.fptr + 2;
                    processframe_state.fLen -= offset;
                    processframe_state.fptr += offset;
                    mark = (0xFF00|(unsigned int)pot_mark);
                    dbgprintf(VERBOSE, "\tPROCESSFRAME\tfound marker while in vld=%x! 2\n",mark);
                    found_MK = 1;
                    break;
                }

                offset = ptr - processframe_state.fptr + 2;
                processframe_state.fLen -= offset;
                processframe_state.fptr += offset;
            }

            break;

        case DQT_MK:
            dbgprintf(VERBOSE, "PROCESSFRAME\tDefining Quantization Tables\n");
            {
                unsigned char aux;
                unsigned int size, n, i;

                size = get_size2(); //this is the table's size
                my_read(&aux, 1*(sizeof(aux)));
                memcpy(processframe_state.vld.QTable, processframe_state.fptr, sizeof(processframe_state.vld.QTable));
                processframe_state.fptr += sizeof(processframe_state.vld.QTable);
            }
            break;

        case DRI_MK:
            //skip size
            get_size2();
            get_size2();
            break;

        case SOS_MK:
            cd_t comp;    // descriptors for 3 components
            int leftover; // RST check
            dbgprintf(VERBOSE, "PROCESSFRAME\tFound the SOS marker\n");
            get_size2(); // don't care
            get_size2(); // don't care

            my_read(&buf, sizeof(buf));
            comp.DC_HT = first_quad(buf);
            comp.AC_HT = second_quad(buf);

            get_size2(); // don't care
            my_read(&buf, sizeof(buf));

            processframe_state.vld.bit_count = 0; // initialise vld decoder
            comp.PRED = 0; //initialise vld predictor
            leftover = processframe_state.vld.mx_size * processframe_state.vld.my_size;


            {
                int goodrows, goodcolumns;
                unsigned char *ColorBuffer = processframe_state.ColorBuffer;
                unsigned char *LineBuffer  = processframe_state.LineBuffer;
                int *dataBuff = processframe_state.dataBuff;
                unsigned char *displayBuff = processframe_state.displayBuff;

                goodrows = MCU_sy;
                goodcolumns = MCU_sx;

                //piece-wise processing
                for (int j = 0; j < processframe_state.vld.my_size; j++) {
                    for (int k = 0; k < processframe_state.vld.mx_size; k++) {
                        unsigned char block[8][8];
                        unpack_block2(block, &comp);
                        for (int l = 0; l < goodrows; l++) {
                            memcpy(LineBuffer + k * MCU_sx + l * processframe_state.vld.x_size,
                                   &block[l][0], goodcolumns);
                        }
                    }
                    for (int l = 0; l < (processframe_state.vld.x_size * MCU_sy) / 4; l++) {
                        dataBuff[l] =(*(LineBuffer + 4 * l))
                            + (*(LineBuffer + 4 * l + 1) << 8)
                            + (*(LineBuffer + 4 * l + 2) << 16)
                            + (*(LineBuffer + 4 * l + 3) << 24);
                    }
                    memcpy(processframe_state.displayBuff + j * processframe_state.vld.x_size * MCU_sy,
                            dataBuff, processframe_state.vld.x_size * MCU_sy);
                }
                display_frame(processframe_state.vld.x_size, processframe_state.vld.y_size, processframe_state.displayBuff);
            }


            //if picture end normally, EOI marker is send to VLD
            //get_size2(); // don't care

            //remove the rest
            while ((ptr = (char *)memchr(processframe_state.fptr,0xFF,processframe_state.fLen))
                   != NULL) {
                int offset;
                unsigned char pot_mark = *(ptr+1);
                if ((pot_mark != 0) && (pot_mark != 0xD8)) {
                    offset = ptr - processframe_state.fptr + 2;
                    processframe_state.fLen -= offset;
                    processframe_state.fptr += offset;
                    mark = (0xFF00|(unsigned int)pot_mark);
                    dbgprintf(VERBOSE, "\tPROCESSFRAME\t found marker in data stream to vld:%x\n",
                            mark);
                    found_MK = 1;
                    break;
                }
                offset = ptr - processframe_state.fptr + 2;
                processframe_state.fLen -= offset;
                processframe_state.fptr += offset;
            }

            in_frame = 0;
            break;


        case EOI_MK:
            dbgprintf(VERBOSE, "PROCESSFRAME\tpicture end\n");
            done = 0;
            break;
        case COM_MK:
            dbgprintf(VERBOSE, "PROCESSFRAME\tSkipping comments\n");
            skip_segment2();
            break;

        case EOF:
            dbgprintf(VERBOSE | CASS, "ERROR PROCESSFRAME Ran out of input data !\n");
            exit(0);
        default:
            if ((mark & MK_MSK) == APP_MK) {
                dbgprintf(VERBOSE, "PROCESSFRAME\tSkipping application data\n");
                skip_segment2();
                break;
            }
            if (RST_MK(mark)) {
                dbgprintf(VERBOSE, "PROCESSFRAME\tfound RST Marker\n");
                break;
            }
            done = 0;
            break;
        }
    }
}

void display_frame(unsigned x_size, unsigned y_size, unsigned char *buffer) {
    //create image from display_buffer and display it
    CImg<unsigned char> img(buffer, x_size, y_size), visu(x_size, y_size, 1, 1, 0);
    display->display(img);
    display->paint();
}

int unpack_block2(unsigned char block[8][8], cd_t *comp) {
    unsigned long temp;
    unsigned int i, run, cat;
    int value;
    unsigned char symbol;
    int T[64];
    memset((void *)T, 0, sizeof(T)); //zeroize block
    int block2[8][8];

    //first get the DC coefficient
    get_symbol2(HUFF_ID(DC_CLASS,comp->DC_HT),&symbol);
    get_bits2(symbol,&temp);

    value = reformat(temp, symbol);
    value += comp->PRED;
    comp->PRED = value;

    //reoganize and unquantify -> move to ZZ and IQ
    T[0] = value ;

    //then the AC ones
    //if symbol found is EOB and process not finish, missing values are
    //replaced by zero
    for (i = 1; i < 64; i++) {
        get_symbol2(HUFF_ID(AC_CLASS, comp->AC_HT), &symbol);

        if (symbol == 0x00) break;
        if (symbol == 0xF0) {
            i += 15;
            continue;
        }
        cat = symbol & 15;
        run = (symbol >> 4) & 15;
        i += run;
        get_bits2(cat, &temp);
        value = reformat(temp, cat);
        T[i] = value ;

        //63 is to exit without EOB if last coef non-zero
        if (i == 63) break;
    }
    unquantify(T, processframe_state.vld.QTable);
    unZigZag((int*)block2, T);
    IDCT(block2, block);

    return 0;
}

//utility and counter to return the number of bits from file
//right aligned, masked, first bit towards MSB's
int get_bits2(int number, unsigned long *res) {
    int i, newbit;
    unsigned long result = 0;
    unsigned char aux, wwindow;

    *(res) = 0;
    if (!number) return 0;
    for (i = 0; i < number; i++) {
        if (processframe_state.vld.bit_count == 0) {
            my_read(&wwindow, sizeof(wwindow));
            if (wwindow == 0xFF) {
                my_read(&aux, sizeof(aux));
                processframe_state.vld.bit_count = 0;
            }
            processframe_state.vld.bit_count = 8;
        } else
            wwindow = processframe_state.vld.window;
        newbit = (wwindow >> 7) & 1;
        processframe_state.vld.window = wwindow << 1;
        processframe_state.vld.bit_count--;
        result = (result << 1) | newbit;
    }
    *(res)= result;
    return 0;
}

/*-----------------------------------*/
/* extract a single symbol from file */
/* using specified huffman table ... */
/*-----------------------------------*/
int get_symbol2(int select, unsigned char *res) {
    unsigned char temp;
    long code = 0;
    int length;
    int index;

    for (length = 0; length < 16; length++) {
        get_one_bit2(&temp);

        code = (2 * code) | temp;
        if (code <= processframe_state.vld.MaxCode[select][length])
            break;
    }
    index = processframe_state.vld.ValPtr[select][length] + code -
        processframe_state.vld.MinCode[select][length];
    if (index < MAX_SIZE(select / 2)) {
        *(res)=processframe_state.vld.HTable[select][index];
        return 0;
    }
#ifndef CASS
    printf("\tWARNING:\tOverflowing symbol table !\n");
#endif
    return 1;
}

int get_one_bit2(unsigned char *res) {
    int newbit;
    unsigned char aux, wwindow;

    *(res) = 0;
    if (processframe_state.vld.bit_count == 0) {
        my_read(&wwindow, sizeof(wwindow));
        if (wwindow == 0xFF) {
            my_read(&aux, sizeof(aux));
            processframe_state.vld.bit_count = 0;
        }
        processframe_state.vld.bit_count = 8;
    } else
        wwindow = processframe_state.vld.window;

    newbit = (wwindow >> 7) & 1;
    processframe_state.vld.window = wwindow << 1;
    processframe_state.vld.bit_count--;
    *(res) = newbit;
    return 0;
}



//------------------------------------------------------------------------
/* utility and counter to return the number of bits from file */
/* right aligned, masked, first bit towards MSB's               */

inline unsigned int get_size2() {
    unsigned char aux[2];
    my_read(&aux[0], 1*(sizeof(aux[0])));
    my_read(&aux[1], 1*(sizeof(aux[1])));
    return ((aux[0] << 8) | aux[1]);  /* big endian */
}

//skip a segment we don't want
void skip_segment2()
{
    unsigned int size;
    unsigned char tag[5], waste;
    unsigned int i;

    size = get_size2();
    if (size > 5) {
        for (i = 0; i < 4; i++)
            my_read(&tag[i], 1*(sizeof(tag[i])));
        tag[4] = 0;
        size -= 4;
    }
    for(i=0; i<(size - 2); i++)
        my_read(&waste, 1*(sizeof(waste)));
}

/*----------------------------------------------------------------*/
/* find next marker of any type, returns it, positions just after */
/* EOF instead of marker if end of file met while searching ...   */
/*----------------------------------------------------------------*/
unsigned int get_next_MK2() {
    unsigned char bufp;
    unsigned int c;
    int ffmet = 0;
    int locpassed = -1;

    do {
        my_read(&bufp, sizeof(bufp));
        c = (unsigned int)bufp;
        switch (c) {
        case 0xFF:
            ffmet = 1;
            break;
        case 0x00:
            ffmet = 0;
            break;
        default:
            if (ffmet){
                dbgprintf(VERBOSE, "\tPROCESSFRAME\tfound marker %x\n",c);
                return (0xFF00 | c);
            }
            ffmet = 0;
            break;
        }
        locpassed++;
    } while (c!= EOF);
    return (unsigned int)EOF;
}

/*----------------------------------------------------------*/
/* Loading of Huffman table, with leaves drop ability       */
/*----------------------------------------------------------*/
int load_huff_tables() {
    unsigned char aux, buf, waste;
    int size, Mclass, id, max;
    int LeavesN, LeavesT, i;
    int AuxCode;

    size = get_size2();/* this is the tables' size */

    size -= 2;
    while ((size > 0))  {
        my_read(&aux, 1*(sizeof(aux)));

        Mclass = first_quad(aux);  /* AC or DC */
        id = second_quad(aux);    /* table no */

        if (id > 1) {
            dbgprintf(INFO, "\tERROR:\tBad HTable identity %d!\n", id);
        }

        id = HUFF_ID(Mclass, id);

        dbgprintf(VERBOSE, "\tVLD\tLoading Table %d\n", id);

        size--;
        LeavesT = 0;
        AuxCode = 0;
        for (i = 0; i < 16; i++) {
            my_read(&buf, 1*(sizeof(buf)));

            LeavesN = buf;
            processframe_state.vld.ValPtr[id][i] = LeavesT;
            processframe_state.vld.MinCode[id][i] = AuxCode * 2;
            AuxCode = processframe_state.vld.MinCode[id][i] + LeavesN;
            processframe_state.vld.MaxCode[id][i] = (LeavesN) ? (AuxCode - 1) : (-1);
            LeavesT += LeavesN;
        }

        size -= 16;
        if (LeavesT > MAX_SIZE(Mclass)) {
            max = MAX_SIZE(Mclass);
            printf("\tWARNING:\tTruncating Table by %d symbols\n",
                   LeavesT - max);
        } else
            max = LeavesT;

        for (i = 0; i < max; i++) { /* get huffman table */
            my_read(&buf, 1*(sizeof(buf)));
            processframe_state.vld.HTable[id][i] = buf;  /* load in raw order */
        }

        for (i = max; i < LeavesT; i++) {
            my_read(&waste, 1*(sizeof(waste)));
        }
        size -= LeavesT;
        dbgprintf(VERBOSE, "\tVLD:\tUsing %d words of table memory\n", LeavesT);
    }
    return 0;
}

/**
 * debug print
 */
void dbgprintf(int urgency, const char* format, ...) {
    va_list argp;
    if (urgency & VERBOSITY != 0) {
        va_start(argp, format);
        printf(format, argp);
    }
}

/**
 * unzigzag block
 */
int* unZigZag(int dest[64], int src[64]) {
  for (int i = 0; i < 64; i++) {
    dest[ZIGZAG_COEFFS[i]] = src[i] ;
  }
  return dest;
}

/**
 * unquantify block
 */
int* unquantify(int block[64], unsigned char qtable[64]) {
  for (int i = 0; i < 64; i++) {
      block[i] = block[i] * qtable[i];
  }
  return block;
}

/**
 *
 */
int intceil(int N, int D) {
   int i = N / D;
   if (N > D * i) i++;
   return i;
}

/**
 *
 */
int intfloor(int N, int D) {
   int i = N / D;
   if (N < D * i) i--;
   return i;
}

/**
 * transform JPEG number format into usual 2's complement format
 */
int reformat(unsigned long s, int good) {
    unsigned int st;
    if (!good)
        return 0;

    st = 1 << (good - 1); //2^(good - 1)
    if (s < st) {
        return (s + 1 + ((-1) << good));
    } else {
        return s;
    }
}

/**
 *
 */
inline int DESCALE(int x, int n) {
    return (x + (1 << (n - 1)) - (x < 0)) >> n;
}

/**
 *
 */
inline int ADD(int x, int y) {
  int mini = 0;
  int maxi = 0;
  int r = x + y;

  if (r > maxi)    maxi = r;
  if (r < mini)    mini = r;
  return r; //in effect: &0x0000FFFF
}

/**
 *
 */
inline int SUB(int x, int y) {
  int mini = 0;
  int maxi = 0;
  int r = x - y;

  if (r > maxi) maxi = r;
  if (r < mini) mini = r;
  return r; //in effect: &0x0000FFFF
}

/**
 *
 */
inline int CMUL(int c, int x) {
  int mini = 0;
  int maxi = 0;
  int r = c * x;
  // less accurate rounding here also works fine
  r = (r + (1 << (C_BITS - 1))) >> C_BITS;
  if (r > maxi)    maxi = r;
  if (r < mini)    mini = r;
  return r;
}

/**
 * rotate (x,y) over angle k * pi / 16 (counter-clockwise) and scale with f
 */
inline void rot(int f, int k, int x, int y, int *rx, int *ry) {
  int COS[2][8] = {
      {c0_1, c1_1, c2_1, c3_1, c4_1, c5_1, c6_1, c7_1},
      {c0_s2, c1_s2, c2_s2, c3_s2, c4_s2, c5_s2, c6_s2, c7_s2}
  };
#define Cos(k) COS[f][k]
#define Sin(k) Cos(8-k)
  *rx = SUB(CMUL(Cos(k), x), CMUL(Sin(k), y));
  *ry = ADD(CMUL(Sin(k), x), CMUL(Cos(k), y));
#undef Cos
#undef Sin
}


/**
 * inverse 1-D discrete cosine transform. The result Y is scaled
 * up by factor sqrt(8). original Loeffler algorithm
 */
inline void idct_1d(int *Y) {
    int z1[8], z2[8], z3[8];

    //stage 1
    but(Y[0], Y[4], z1[1], z1[0]);
    rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
    but(Y[1], Y[7], z1[4], z1[7]);
    z1[5] = CMUL(sqrt2, Y[3]);
    z1[6] = CMUL(sqrt2, Y[5]);

    //stage 2
    but(z1[0], z1[3], z2[3], z2[0]);
    but(z1[1], z1[2], z2[2], z2[1]);
    but(z1[4], z1[6], z2[6], z2[4]);
    but(z1[7], z1[5], z2[5], z2[7]);

    //stage 3
    z3[0] = z2[0];
    z3[1] = z2[1];
    z3[2] = z2[2];
    z3[3] = z2[3];
    rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
    rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);

    //final stage 4
    but(z3[0], z3[7], Y[7], Y[0]);
    but(z3[1], z3[6], Y[6], Y[1]);
    but(z3[2], z3[5], Y[5], Y[2]);
    but(z3[3], z3[4], Y[4], Y[3]);
}

/**
 * inverse 2-D discrete cosine transform
 */
void IDCT(int input[8][8], unsigned char output[8][8]) {
#define Y(i,j) Y[8*i+j]
#define X(i,j) (output[i][j])
    int Y[64];
    int k, l;
    int mini = INT_MAX2;
    int maxi = INT_MIN2;

    for (k = 0; k < 8; k++) { //pass 1: process rows.
        for (l = 0; l < 8; l++) { //prescale k-th row:
            Y(k, l) = SCALE(input[k][l], S_BITS);
        }
        idct_1d(&Y(k, 0)); //1-D IDCT on k-th row
        //result Y is scaled up by factor sqrt(8) * 2^S_BITS.
    }
    for (l = 0; l < 8; l++) { //pass 2: process columns.
      int Yc[8];

      for (k = 0; k < 8; k++) {
          Yc[k] = Y(k, l);
      }
      idct_1d(Yc); //1-D IDCT on l-th column
      for (k = 0; k < 8; k++) { //result is once more scaled up by a factor sqrt(8)
          int r = 128 + DESCALE(Yc[k], S_BITS + 3);
          r = r > 0 ? (r < 255 ? r : 255) : 0; //clip to 8 bits unsigned
          X(k, l) = r;
      }
    }
#undef X
#undef Y
}


/**
 *
 */
int main() {
    unsigned frame_number = 0;
    int len = sizeof(_STR);
    char *ptr, *hptr, *fptr;
    int fLen = 0;
    hptr = (char*)_STR;
    fptr = hptr;

    process_frame_init();

    while (len != 0) {
        ptr = (char *)memchr(hptr, 0xFF, len);
        if (*(ptr + 1) == '\xD9') { //end of image 0xFFD9
            ptr += 2;
            fLen = ptr - fptr;
            process_frame(fLen, fptr);
            len -= (ptr - hptr);
            hptr = ptr;
            fptr = hptr;
        } else {
            ptr += 2;
            len -= (ptr - hptr);
            hptr = ptr;
        }
    }
    return 0;
}
