diff options
Diffstat (limited to 'k9vamps')
-rw-r--r-- | k9vamps/Makefile.am | 12 | ||||
-rw-r--r-- | k9vamps/ac.h | 83 | ||||
-rw-r--r-- | k9vamps/cputest.cpp | 274 | ||||
-rw-r--r-- | k9vamps/getvlc.h | 401 | ||||
-rw-r--r-- | k9vamps/k9fifo.cpp | 54 | ||||
-rw-r--r-- | k9vamps/k9fifo.h | 39 | ||||
-rw-r--r-- | k9vamps/k9requant.cpp | 2481 | ||||
-rw-r--r-- | k9vamps/k9requant.h | 278 | ||||
-rwxr-xr-x | k9vamps/k9requant2.h | 126 | ||||
-rwxr-xr-x | k9vamps/k9vamps.cpp | 1100 | ||||
-rwxr-xr-x | k9vamps/k9vamps.h | 168 | ||||
-rw-r--r-- | k9vamps/putvlc.h | 250 | ||||
-rw-r--r-- | k9vamps/qTable.h | 1141 | ||||
-rw-r--r-- | k9vamps/tcmemcpy.cpp | 483 |
14 files changed, 6890 insertions, 0 deletions
diff --git a/k9vamps/Makefile.am b/k9vamps/Makefile.am new file mode 100644 index 0000000..d432954 --- /dev/null +++ b/k9vamps/Makefile.am @@ -0,0 +1,12 @@ +AM_CPPFLAGS= -I$(srcdir) $(all_includes) + +KDE_CXXFLAGS = $(ENABLE_PERMISSIVE_FLAG) + +METASOURCES = AUTO +libk9vamps_la_LDFLAGS = $(all_libraries) +noinst_LTLIBRARIES = libk9vamps.la +libk9vamps_la_SOURCES = cputest.cpp getvlc.h k9fifo.cpp k9requant.cpp \ + k9vamps.cpp k9vamps.h putvlc.h qTable.h tcmemcpy.cpp +INCLUDES = -I$(top_srcdir)/dvdread -I$(top_srcdir)/k9vamps \ + -I$(top_srcdir)/libk9copy +noinst_HEADERS = k9requant.h ac.h k9fifo.h diff --git a/k9vamps/ac.h b/k9vamps/ac.h new file mode 100644 index 0000000..c79100f --- /dev/null +++ b/k9vamps/ac.h @@ -0,0 +1,83 @@ +/* + * ac.h + * + * Copyright (C) Thomas �streich - November 2002 + * + * This file is part of transcode, a video stream processing tool + * + * transcode is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * transcode is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _AC_H +#define _AC_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "k9common.h" + +#include <unistd.h> + +// #include "libtc/libtc.h" + +#ifdef __bsdi__ +typedef unsigned int uint32_t; +#endif + +//mm_support +#define MM_C 0x0000 //plain C (default) +#define MM_IA32ASM 0x0001 //32-bit assembler optimized code (non-MMX) +#define MM_AMD64ASM 0x0002 //64-bit assembler optimized code (non-MMX) +#define MM_CMOVE 0x0004 //CMOVcc instruction +#define MM_MMX 0x0008 //standard MMX +#define MM_MMXEXT 0x0010 //SSE integer functions or AMD MMX ext +#define MM_3DNOW 0x0020 //AMD 3DNOW +#define MM_SSE 0x0040 //SSE functions +#define MM_SSE2 0x0080 //PIV SSE2 functions +#define MM_3DNOWEXT 0x0100 //AMD 3DNow! ext. +#define MM_SSE3 0x0200 //Prescott SSE3 + +extern void * (*tc_memcpy)(void *, const void *, size_t); +extern void tc_memcpy_init(int verbose, int mmflags); + +extern int mm_flag; +int ac_mmflag(); +void ac_mmtest(); +char *ac_mmstr(int flag, int mode); + +//ac_memcpy +void * ac_memcpy_mmx(void *dest, const void *src, size_t bytes); +void * ac_memcpy_amdmmx(void *dest, const void *src, size_t bytes); +void * ac_memcpy_amd64(void *dest, const void *src, size_t bytes); + +//average (simple average over 2 rows) +int ac_average_mmx(char *row1, char *row2, char *out, int bytes); +int ac_average_sse(char *row1, char *row2, char *out, int bytes); +int ac_average_sse2(char *row1, char *row2, char *out, int bytes); + +//swap +int ac_swap_rgb2bgr_asm(char *im, int bytes); +int ac_swap_rgb2bgr_asm64(char *im, int pixels); + +//rescale +int ac_rescale_mmxext(char *row1, char *row2, char *out, int bytes, + unsigned long weight1, unsigned long weight2); +int ac_rescale_sse(char *row1, char *row2, char *out, int bytes, + unsigned long weight1, unsigned long weight2); +int ac_rescale_sse2(char *row1, char *row2, char *out, int bytes, + unsigned long weight1, unsigned long weight2); + +#endif diff --git a/k9vamps/cputest.cpp b/k9vamps/cputest.cpp new file mode 100644 index 0000000..643742b --- /dev/null +++ b/k9vamps/cputest.cpp @@ -0,0 +1,274 @@ +/* Cpu detection code, extracted from mmx.h ((c)1997-99 by H. Dietz + and R. Fisher). Converted to C and improved by Fabrice Bellard */ + +#include <stdio.h> +#include <stdlib.h> +#include "ac.h" + +#ifdef HAVE_STRING_H +# include <string.h> +#endif + +#ifdef ARCH_X86_64 +# define REG_b "rbx" +# define REG_S "rsi" +#else +# define REG_b "ebx" +# define REG_S "esi" +#endif + +//exported + int mm_flag; + +/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ +#define cpuid(index,eax,ebx,ecx,edx)\ + __asm __volatile\ + ("mov %%"REG_b", %%"REG_S"\n\t"\ + "cpuid\n\t"\ + "xchg %%"REG_b", %%"REG_S\ + : "=a" (eax), "=S" (ebx),\ + "=c" (ecx), "=d" (edx)\ + : "0" (index)); + +#define CPUID_STD_MMX 0x00800000 +#define CPUID_STD_SSE 0x02000000 +#define CPUID_STD_SSE2 0x04000000 +#define CPUID_STD_SSE3 0x00000001 // ECX! +#define CPUID_EXT_AMD_3DNOW 0x80000000 +#define CPUID_EXT_AMD_3DNOWEXT 0x40000000 +#define CPUID_EXT_AMD_MMXEXT 0x00400000 +#define CPUID_EXT_CYR_MMX 0x00800000 +#define CPUID_EXT_CYR_MMXEXT 0x01000000 + +/* Function to test if multimedia instructions are supported... */ +static int mm_support(void) +{ + int rval = 0; +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int eax, ebx, ecx, edx; + int max_std_level, max_ext_level, std_caps=0, std_caps2=0, ext_caps=0; + long a, c; + + char vendor[13] = "UnknownVndr"; + + __asm__ __volatile__ ( + /* See if CPUID instruction is supported ... */ + /* ... Get copies of EFLAGS into eax and ecx */ + "pushf\n\t" + "pop %0\n\t" + "mov %0, %1\n\t" + + /* ... Toggle the ID bit in one copy and store */ + /* to the EFLAGS reg */ + "xor $0x200000, %0\n\t" + "push %0\n\t" + "popf\n\t" + + /* ... Get the (hopefully modified) EFLAGS */ + "pushf\n\t" + "pop %0\n\t" + : "=a" (a), "=c" (c) + : + : "cc" + ); + + if (a == c) + return 0; /* CPUID not supported */ + + cpuid(0, max_std_level, ebx, ecx, edx); + + /* highest cpuid is 0, no standard features */ + if (max_std_level == 0) + return rval; + + /* save the vendor string */ + *(int *)vendor = ebx; + *(int *)&vendor[4] = edx; + *(int *)&vendor[8] = ecx; + + if (max_std_level >= 1){ + cpuid(1, eax, ebx, std_caps2, std_caps); + if (std_caps & (1<<15)) + rval |= MM_CMOVE; + if (std_caps & (1<<23)) + rval |= MM_MMX; + if (std_caps & (1<<25)) + rval |= MM_MMXEXT | MM_SSE; + if (std_caps & (1<<26)) + rval |= MM_SSE2; + } + + cpuid(0x80000000, max_ext_level, ebx, ecx, edx); + + if (max_ext_level >= 0x80000001) { + cpuid(0x80000001, eax, ebx, ecx, ext_caps); + } + + cpuid(0, eax, ebx, ecx, edx); + if (strcmp(vendor, "AuthenticAMD") == 0) { + /* AMD */ + if (ext_caps & (1<<22)) + rval |= MM_MMXEXT; + if (ext_caps & (1<<31)) + rval |= MM_3DNOW; + if (ext_caps & (1<<30)) + rval |= MM_3DNOWEXT; + if (std_caps2 & (1<<0)) + rval |= MM_SSE3; + } else if (strcmp(vendor, "CentaurHauls") == 0) { + /* VIA C3 */ + if (ext_caps & (1<<24)) + rval |= MM_MMXEXT; + } else if (strcmp(vendor, "CyrixInstead") == 0) { + /* Cyrix */ + if (ext_caps & (1<<24)) + rval |= MM_MMXEXT; + } +#if 0 + av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", + (rval&MM_MMX) ? "MMX ":"", + (rval&MM_MMXEXT) ? "MMX2 ":"", + (rval&MM_SSE) ? "SSE ":"", + (rval&MM_SSE2) ? "SSE2 ":"", + (rval&MM_3DNOW) ? "3DNow ":"", + (rval&MM_3DNOWEXT) ? "3DNowExt ":""); +#endif +#endif /* ARCH_X86(_64) */ + return rval; +} + + +int ac_mmflag(void) +{ + //if (mm_flag==-1) { + mm_flag = mm_support(); +#ifdef ARCH_X86 + mm_flag |= MM_IA32ASM; +#endif +#ifdef ARCH_X86_64 + mm_flag |= MM_AMD64ASM; +#endif + //} + return(mm_flag); +} + +void ac_mmtest() +{ + mm_flag=-1; + int cc=ac_mmflag(); + return; + printf("(%s) available multimedia extensions:", __FILE__); + + if(cc & MM_SSE3) { + printf(" sse3\n"); + return; + } else if(cc & MM_SSE2) { + printf(" sse2\n"); + return; + } else if(cc & MM_SSE) { + printf(" sse\n"); + return; + } else if(cc & MM_3DNOWEXT) { + printf(" 3dnowext\n"); + return; + } else if(cc & MM_3DNOW) { + printf(" 3dnow\n"); + return; + } else if(cc & MM_MMXEXT) { + printf(" mmxext\n"); + return; + } else if(cc & MM_MMX) { + printf(" mmx\n"); + return; + } else if(cc & MM_AMD64ASM) { + printf(" 64asm\n"); + return; + } else if(cc & MM_IA32ASM) { + printf(" 32asm\n"); + return; + } else printf(" C\n"); +} + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} + +char *ac_mmstr(int flag, int mode) +{ + static char mmstr[64]=""; + int cc; + + if(flag==-1) + //get full mm caps + cc=ac_mmflag(); + else + cc=flag; + + //return max supported mm extensions, or str for user provided flag + if(mode==0) { + if(cc & MM_SSE3) { + return("sse3"); + } else if(cc & MM_SSE2) { + return("sse2"); + } else if(cc & MM_SSE) { + return("sse"); + } else if(cc & MM_3DNOWEXT) { + return("3dnowext"); + } else if(cc & MM_3DNOW) { + return("3dnow"); + } else if(cc & MM_MMXEXT) { + return("mmxext"); + } else if(cc & MM_MMX) { + return("mmx"); + } else if(cc & (MM_AMD64ASM|MM_IA32ASM)) { + return("asm"); + } else return("C"); + } + + //return full capability list + if(mode==1) { + if(cc & MM_SSE3) strlcat(mmstr, "sse3 ", sizeof(mmstr)); + if(cc & MM_SSE2) strlcat(mmstr, "sse2 ", sizeof(mmstr)); + if(cc & MM_SSE) strlcat(mmstr, "sse ", sizeof(mmstr)); + if(cc & MM_3DNOWEXT) strlcat(mmstr, "3dnowext ", sizeof(mmstr)); + if(cc & MM_3DNOW) strlcat(mmstr, "3dnow ", sizeof(mmstr)); + if(cc & MM_MMXEXT) strlcat(mmstr, "mmxext ", sizeof(mmstr)); + if(cc & MM_MMX) strlcat(mmstr, "mmx ", sizeof(mmstr)); + if(cc & (MM_AMD64ASM|MM_IA32ASM)) strlcat(mmstr, "asm ", sizeof(mmstr)); + strlcat(mmstr, "C", sizeof(mmstr)); + return(mmstr); + } + + return(""); +} diff --git a/k9vamps/getvlc.h b/k9vamps/getvlc.h new file mode 100644 index 0000000..bcf82c0 --- /dev/null +++ b/k9vamps/getvlc.h @@ -0,0 +1,401 @@ + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 +#define DCT_TYPE_INTERLACED 32 + +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) Flush_Bits(num) +#define COPYBITS(bit_buf,bits,num) Copy_Bits(num) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(inbitbuf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(inbitbuf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; diff --git a/k9vamps/k9fifo.cpp b/k9vamps/k9fifo.cpp new file mode 100644 index 0000000..7c32de7 --- /dev/null +++ b/k9vamps/k9fifo.cpp @@ -0,0 +1,54 @@ +// +// C++ Implementation: k9fifo +// +// Description: +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "k9fifo.h" +#include "ac.h" + +uint32_t k9fifo::count() { + return m_count; +} + +void k9fifo::enqueue (uchar *_buffer, uint32_t _size) { + mutex.lock(); + if (_size+queue > INPUT_SIZE) { + uint32_t s1,s2; + s1=INPUT_SIZE-queue; + tc_memcpy(array+queue,_buffer,s1); + s2=_size-s1; + tc_memcpy(array,_buffer+s1,s2); + } else + tc_memcpy(array+queue,_buffer,_size); + queue=(queue+_size) %INPUT_SIZE; + m_count+=_size; + mutex.unlock(); +} + +void k9fifo::dequeue(uchar *_buffer,uint32_t _size) { + mutex.lock(); + if ( _size+head >INPUT_SIZE) { + uint32_t s1,s2; + s1=INPUT_SIZE - head; + tc_memcpy(_buffer,array+head,s1); + s2=_size-s1; + tc_memcpy(_buffer+s1,array,s2); + } else + tc_memcpy(_buffer,array+head,_size); + head =(head+_size)%INPUT_SIZE; + m_count -=_size; + mutex.unlock(); +} + +void k9fifo::clear() { + mutex.lock(); + head=queue; + m_count=0; + mutex.unlock(); +} diff --git a/k9vamps/k9fifo.h b/k9vamps/k9fifo.h new file mode 100644 index 0000000..42fbbef --- /dev/null +++ b/k9vamps/k9fifo.h @@ -0,0 +1,39 @@ +// +// C++ Interface: k9fifo +// +// Description: +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#ifndef K9FIFO_H +#define K9FIFO_H + +#include "k9common.h" +#include <qmutex.h> +/** + @author Jean-Michel PETIT <[email protected]> +*/ + +#define INPUT_SIZE ( 0x2000*1024) + +class k9fifo { +private: + uint32_t head,queue; + uchar *array; + uint32_t m_count; + QMutex mutex; +public: + k9fifo () { head=queue=m_count=0;array= new uchar[INPUT_SIZE];}; + uint32_t count(); // { return (queue-head)%INPUT_SIZE ;} + uint32_t freespace() {return INPUT_SIZE-count();}; + void enqueue (uchar *_buffer, uint32_t _size) ; + void dequeue(uchar *_buffer,uint32_t _size) ; + void clear(); + ~k9fifo() { delete[] array;}; +}; + +#endif diff --git a/k9vamps/k9requant.cpp b/k9vamps/k9requant.cpp new file mode 100644 index 0000000..009f76c --- /dev/null +++ b/k9vamps/k9requant.cpp @@ -0,0 +1,2481 @@ +// +// C++ Implementation: k9requant +// +// Description: +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "k9requant.h" +#include "getvlc.h" +#include "putvlc.h" +#include "ac.h" +// Code from libmpeg2 and mpeg2enc copyright by their respective owners +// New code and modifications copyright Antoine Missout +// Thanks to Sven Goethel for error resilience patches +// Released under GPL license, see gnu.org + +// toggles: + +#define THREAD +// #define LOG_RATE_CONTROL // some stats +// #define DEMO // demo mode +// #define STAT // print stats on exit +// #define USE_FD // use 2 lasts args for input/output paths + +#define NDEBUG // turns off asserts +#define REMOVE_BYTE_STUFFING // removes series of 0x00 +// #define USE_GLOBAL_REGISTER // assign registers to bit buffers +#define MAX_ERRORS 0 // if above copy slice + +//#define CHANGE_BRIGHTNESS //add a param to command line, changing brightness: _will_not_recompress_, disables max_errors +//#define WIN // for windows fixes, use with USE_FD + +// params: + +// if not defined, non intra block in p frames are requantised +// if defined and >= 0, we keep coeff. in pos 0..n-1 in scan order +// and coeff which would have been non-null if requantised +// if defined and < 0 we drop max 1/x coeffs. +// experimental, looks better when undefined +// #define P_FRAME_NON_INTRA_DROP 8 + +// params for fact = 1.0, fact = 3.0 and fact = 10.0 +// we'll make a linear interpolation between +static const int i_factors[3] = { 5, 15, 65 }; +static const int p_factors[3] = { 5, 25, 85 }; +static const int b_factors[3] = { 25, 45, 105 }; + + +static const double i_min_stresses[3] = { 0.70, 0.40, 0.00 }; +static const double p_min_stresses[3] = { 0.60, 0.35, 0.00 }; +static const double b_min_stresses[3] = { 0.00, 0.00, 0.00 }; + + +// factor up to which alt table will be used +// (though alt_table gives better psnr up to factor around ~2.5 +// the result is less pleasing to watch than normal table +// so this is disabled) +static const double max_alt_table = 0.0; + +// includes +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <math.h> + +#ifndef USE_FD +#include <unistd.h> +#include <fcntl.h> +#endif + +// quant table +#include "qTable.h" + +// useful constants +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 + +// gcc +#ifdef HAVE_BUILTIN_EXPECT + #define likely(x) __builtin_expect ((x) != 0, 1) + #define unlikely(x) __builtin_expect ((x) != 0, 0) +#else + #define likely(x) (x) + #define unlikely(x) (x) +#endif + +#ifndef NDEBUG + #define DEB(msg) fprintf (stderr, "%s:%d " msg, __FILE__, __LINE__) + #define DEBF(format, args...) fprintf (stderr, "%s:%d " format, __FILE__, __LINE__, args) +#else + #define DEB(msg) + #ifdef WIN + #define DEBF(format, args) + #else + #define DEBF(format, args...) + #endif +#endif + +#ifndef THREAD +#define LOG(msg) fprintf (stderr, msg) +#ifdef WIN + #define LOGF(format, arg1) fprintf (stderr, format, arg1) +#else + #define LOGF(format, args...) fprintf (stderr, format, args) +#endif +#endif + + + +/*#define MOV_READ \ + mloka1 = rbuf - cbuf; if (mloka1) memmove(orbuf, cbuf, mloka1);\ + cbuf = rbuf = orbuf; rbuf += mloka1; +*/ + +#ifdef STAT + +#define RETURN \ + assert(rbuf >= cbuf);\ + mloka1 = rbuf - cbuf;\ + if (mloka1) { COPY(mloka1); }\ + WRITE \ + free(orbuf); \ + free(owbuf); \ + \ + LOG("Stats:\n");\ + \ + LOGF("Wanted fact_x: %.1f\n", fact_x);\ + \ + LOGF("cnt_i: %.0f ", (float)cnt_i); \ + if (cnt_i) LOGF("ori_i: %.0f new_i: %.0f fact_i: %.1f\n", (float)ori_i, (float)new_i, (float)ori_i/(float)new_i); \ + else LOG("\n");\ + \ + LOGF("cnt_p: %.0f ", (float)cnt_p); \ + if (cnt_p) LOGF("ori_p: %.0f new_p: %.0f fact_p: %.1f cnt_p_i: %.0f cnt_p_ni: %.0f propor: %.1f i\n", \ + (float)ori_p, (float)new_p, (float)ori_p/(float)new_p, (float)cnt_p_i, (float)cnt_p_ni, (float)cnt_p_i/((float)cnt_p_i+(float)cnt_p_ni)); \ + else LOG("\n");\ + \ + LOGF("cnt_b: %.0f ", (float)cnt_b); \ + if (cnt_b) LOGF("ori_b: %.0f new_b: %.0f fact_b: %.1f cnt_b_i: %.0f cnt_b_ni: %.0f propor: %.1f i\n", \ + (float)ori_b, (float)new_b, (float)ori_b/(float)new_b, (float)cnt_b_i, (float)cnt_b_ni, (float)cnt_b_i/((float)cnt_b_i+(float)cnt_b_ni)); \ + else LOG("\n");\ + \ + LOGF("Final fact_x: %.1f\n", (float)inbytecnt/(float)outbytecnt);\ + exit(0); + +#else + +#define RETURN \ + assert(rbuf >= cbuf);\ + mloka1 = rbuf - cbuf;\ + if (mloka1) { COPY(mloka1); }\ + WRITE \ + free(orbuf); \ + free(owbuf); \ + exit(0); + +#endif + #define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (f_code[0]); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (f_code[1]); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + h_offset += 16; \ + if (h_offset == horizontal_size_value) \ + { \ + v_offset += 16; \ + if (v_offset > (vertical_size_value - 16)) return; \ + h_offset = 0; \ + } \ +} while (0) + +#ifdef P_FRAME_NON_INTRA_DROP + #if (P_FRAME_NON_INTRA_DROP < 0) + #undef UPDATE_VAL + #define UPDATE_VAL + #define SAVE_VAL + #define WRITE_VAL \ + blk->level = val; \ + blk->run = i - li - 1; \ + li = i; \ + blk++; + #else + #define SAVE_VAL oval = val; + #define WRITE_VAL \ + if ((val) || (i < P_FRAME_NON_INTRA_DROP)) \ + { \ + blk->level = oval; \ + blk->run = i - li - 1; \ + li = i; \ + blk++; \ + } + #endif +#else + #define SAVE_VAL + #define WRITE_VAL \ + if (val) \ + { \ + blk->level = val; \ + blk->run = i - li - 1; \ + li = i; \ + blk++; \ + } +#endif + +#define UPDATE_VAL \ + val = curTable[val]; + +int quantisers[42] = + { + 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, + 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 72, 80, + 88, 96, 104, 112 + }; + +int non_linear_quantizer_scale [] = + { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 + }; + + +const uint8 non_linear_mquant_table[32] = + { + 0, 1, 2, 3, 4, 5, 6, 7, + 8,10,12,14,16,18,20,22, + 24,28,32,36,40,44,48,52, + 56,64,72,80,88,96,104,112 + }; +const uint8 map_non_linear_mquant[113] = + { + 0,1,2,3,4,5,6,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16, + 16,17,17,17,18,18,18,18,19,19,19,19,20,20,20,20,21,21,21,21,22,22, + 22,22,23,23,23,23,24,24,24,24,24,24,24,25,25,25,25,25,25,25,26,26, + 26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,29, + 29,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,31,31,31,31,31 + }; + + +k9requant::k9requant() +{ + + cbuf=rbuf=orbuf=wbuf=NULL; + quant_table_id = &quant_table_id_data[2048]; + rqt_run=false; + initvar(); +} +void k9requant::putbits(uint val, int n) +{ + assert(n < 32); + assert(!(val & (0xffffffffU << n))); + + while (unlikely(n >= outbitcnt)) + { + wbuf[0] = (outbitbuf << outbitcnt ) | (val >> (n - outbitcnt)); + SEEKW(1); + n -= outbitcnt; + outbitbuf = 0; + val &= ~(0xffffffffU << n); + outbitcnt = BITS_IN_BUF; + } + + if (likely(n)) + { + outbitbuf = (outbitbuf << n) | val; + outbitcnt -= n; + } + + assert(outbitcnt > 0); + assert(outbitcnt <= BITS_IN_BUF); +} + +void k9requant::Refill_bits(void) +{ + assert((rbuf - cbuf) >= 1); + inbitbuf |= cbuf[0] << (24 - inbitcnt); + inbitcnt += 8; + SEEKR(1) +} + +void k9requant::Flush_Bits(uint n) +{ + assert(inbitcnt >= n); + + inbitbuf <<= n; + inbitcnt -= n; + + assert( (!n) || ((n>0) && !(inbitbuf & 0x1)) ); + + while (unlikely(inbitcnt < 24)) Refill_bits(); +} + +uint k9requant::Show_Bits(uint n) +{ + return ((unsigned int)inbitbuf) >> (32 - n); +} + +uint k9requant::Get_Bits(uint n) +{ + uint Val = Show_Bits(n); + Flush_Bits(n); + return Val; +} + +uint k9requant::Copy_Bits(uint n) +{ + uint Val = Get_Bits(n); + putbits(Val, n); + return Val; +} + +void k9requant::flush_read_buffer() +{ + int i = inbitcnt & 0x7; + if (i) + { + if (inbitbuf >> (32 - i)) + { + DEBF("illegal inbitbuf: 0x%08X, %i, 0x%02X, %i\n", inbitbuf, inbitcnt, (inbitbuf >> (32 - i)), i); + sliceError++; + } + + inbitbuf <<= i; + inbitcnt -= i; + } + SEEKR(-1 * (inbitcnt >> 3)); + inbitcnt = 0; +} + +void k9requant::flush_write_buffer() +{ + if (outbitcnt != 8) putbits(0, outbitcnt); +} + +/////---- begin ext mpeg code +int k9requant::scale_quant(double quant ) +{ + int iquant; +#ifdef DEMO + if ((gopCount & 0x7F) < 10) // gop is ~ 0.5 sec, so 5 sec every ~minute (127 * 0.5 = 63.5 sec) + { + if (q_scale_type) return 112; + else return 62; + } +#endif + if (q_scale_type) + { + iquant = (int) floor(quant+0.5); + /* clip mquant to legal (linear) range */ + if (iquant<1) iquant = 1; + if (iquant>112) iquant = 112; + iquant = non_linear_mquant_table[map_non_linear_mquant[iquant]]; + } + else + { + /* clip mquant to legal (linear) range */ + iquant = (int)floor(quant+0.5); + if (iquant<2) iquant = 2; + if (iquant>62) iquant = 62; + iquant = (iquant/2)*2; // Must be *even* + } + return iquant; +} + +int k9requant::increment_quant(int quant) +{ +#ifdef DEMO + if ((gopCount & 0x7F) < 10) + { + if (q_scale_type) return 112; + else return 62; + } +#endif + if (q_scale_type) + { + if (quant < 1 || quant > 112) + { + DEBF("illegal quant: %d\n", quant); + if (quant > 112) quant = 112; + else if (quant < 1) quant = 1; + DEBF("illegal quant changed to : %d\n", quant); + sliceError++; + } + quant = map_non_linear_mquant[quant] + 1; + if (quant > 31) quant = 31; + quant = non_linear_mquant_table[quant]; + } + else + { + if ((quant & 1) || (quant < 2) || (quant > 62)) + { + DEBF("illegal quant: %d\n", quant); + if (quant & 1) quant--; + if (quant > 62) quant = 62; + else if (quant < 2) quant = 2; + DEBF("illegal quant changed to : %d\n", quant); + sliceError++; + } + quant += 2; + if (quant > 62) quant = 62; + } + return quant; +} + +int k9requant::intmax( register int x, register int y ) +{ return x < y ? y : x; } + +int k9requant::intmin( register int x, register int y ) +{ return x < y ? x : y; } + + +int k9requant::getNewQuant(int curQuant, int intra) +{ +#ifdef CHANGE_BRIGHTNESS + return curQuant; +#else + int mquant = 0; + double cStress; + + switch (picture_coding_type) + { + case I_TYPE: + cStress = (stress_factor - i_min_stress) / (1.0 - i_min_stress); + mquant = intmax(scale_quant(curQuant + i_factor*cStress), increment_quant(curQuant)); + break; + + case P_TYPE: + cStress = (stress_factor - p_min_stress) / (1.0 - p_min_stress); + if (intra) // since it might be used as a ref, treat it as an I frame block + mquant = intmax(scale_quant(curQuant + i_factor*cStress), increment_quant(curQuant)); + else + mquant = intmax(scale_quant(curQuant + p_factor*cStress), increment_quant(curQuant)); + break; + + case B_TYPE: + cStress = (stress_factor - b_min_stress) / (1.0 - b_min_stress); + mquant = intmax(scale_quant(curQuant + b_factor*cStress), increment_quant(curQuant)); + break; + + default: + assert(0); + break; + } + + assert(mquant >= curQuant); + + return mquant; +#endif +} + +int k9requant::isNotEmpty(RunLevel *blk) +{ + return (blk->level); +} + + +// return != 0 if error +int k9requant::putAC(int run, int signed_level, int vlcformat) +{ + int level, len; + const VLCtable *ptab = NULL; + + level = (signed_level<0) ? -signed_level : signed_level; /* abs(signed_level) */ + + // assert(!(run<0 || run>63 || level==0 || level>2047)); + if(run<0 || run>63) + { + DEBF("illegal run: %d\n", run); + sliceError++; + return 1; + } + if(level==0 || level>2047) + { + DEBF("illegal level: %d\n", level); + sliceError++; + return 1; + } + + len = 0; + + if (run<2 && level<41) + { + if (vlcformat) ptab = &dct_code_tab1a[run][level-1]; + else ptab = &dct_code_tab1[run][level-1]; + len = ptab->len; + } + else if (run<32 && level<6) + { + if (vlcformat) ptab = &dct_code_tab2a[run-2][level-1]; + else ptab = &dct_code_tab2[run-2][level-1]; + len = ptab->len; + } + + if (len) /* a VLC code exists */ + { + putbits(ptab->code, len); + putbits(signed_level<0, 1); /* sign */ + } + else + { + putbits(1l, 6); /* Escape */ + putbits(run, 6); /* 6 bit code for run */ + putbits(((uint)signed_level) & 0xFFF, 12); + } + + return 0; +} + +// return != 0 if error +int k9requant::putACfirst(int run, int val) +{ + if (run==0 && (val==1 || val==-1)) + { + putbits(2|((val<0) ? 1 : 0), 2); + return 0; + } + else return putAC(run,val,0); +} + +void k9requant::putnonintrablk(RunLevel *blk) +{ + assert(blk->level); + + if (putACfirst(blk->run, blk->level)) return; + blk++; + + while(blk->level) + { + if (putAC(blk->run, blk->level, 0)) return; + blk++; + } + + putbits(2,2); +} + +void k9requant::putcbp(int cbp) +{ + assert(cbp > 0 && cbp < 64); + putbits(cbptable[cbp].code,cbptable[cbp].len); +} + +void k9requant::putmbtype(int mb_type) +{ + putbits(mbtypetab[picture_coding_type-1][mb_type].code, + mbtypetab[picture_coding_type-1][mb_type].len); +} + +int k9requant::get_macroblock_modes () +{ + int macroblock_modes; + const MBtab * tab; + + switch (picture_coding_type) + { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (frame_pred_frame_dct)) && (picture_structure == FRAME_PICTURE)) + { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture_structure != FRAME_PICTURE) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } + else if (frame_pred_frame_dct) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } + else + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture_structure != FRAME_PICTURE) + { + if (! (macroblock_modes & MACROBLOCK_INTRA)) + { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } + else if (frame_pred_frame_dct) + { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } + else + { + if (macroblock_modes & MACROBLOCK_INTRA) goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + default: + return 0; + } + +} + +int k9requant::get_quantizer_scale () +{ + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (!quantizer_scale_code) + { + DEBF("illegal quant scale code: %d\n", quantizer_scale_code); + sliceError++; + quantizer_scale_code++; + } + + if (q_scale_type) return non_linear_quantizer_scale[quantizer_scale_code]; + else return quantizer_scale_code << 1; +} + +void k9requant::get_motion_delta (const int f_code) +{ +#define bit_buf (inbitbuf) + const MVtab * tab; + + if (bit_buf & 0x80000000) + { + COPYBITS (bit_buf, bits, 1); + return; + } + else if (bit_buf >= 0x0c000000) + { + + tab = MV_4 + UBITS (bit_buf, 4); + COPYBITS (bit_buf, bits, tab->len + 1); + if (f_code) COPYBITS (bit_buf, bits, f_code); + return; + } + else + { + + tab = MV_10 + UBITS (bit_buf, 10); + COPYBITS (bit_buf, bits, tab->len + 1); + if (f_code) COPYBITS (bit_buf, bits, f_code); + return; + } +} + + +void k9requant::get_dmv () +{ + const DMVtab * tab; + tab = DMV_2 + UBITS (bit_buf, 2); + COPYBITS (bit_buf, bits, tab->len); + return; +} + +int k9requant::get_coded_block_pattern () +{ +#define bit_buf (inbitbuf) + const CBPtab * tab; + + if (bit_buf >= 0x20000000) + { + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + else + { + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } +} + +int k9requant::get_luma_dc_dct_diff () +{ +#define bit_buf (inbitbuf) +#ifdef CHANGE_BRIGHTNESS + #define DOBITS(x, y, z) DUMPBITS(x, y, z) +#else + #define DOBITS(x, y, z) COPYBITS(x, y, z) +#endif + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) + { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) + { + DOBITS (bit_buf, bits, tab->len); + //dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + dc_diff = UBITS (bit_buf, size); if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size); + DOBITS (bit_buf, bits, size); + return dc_diff; + } + else + { + DOBITS (bit_buf, bits, 3); + return 0; + } + } + else + { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DOBITS (bit_buf, bits, tab->len); + //dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + dc_diff = UBITS (bit_buf, size); if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size); + DOBITS (bit_buf, bits, size); + return dc_diff; + } +} + +int k9requant::get_chroma_dc_dct_diff () +{ +#define bit_buf (inbitbuf) + + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) + { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) + { + COPYBITS (bit_buf, bits, tab->len); + //dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + dc_diff = UBITS (bit_buf, size); if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size); + COPYBITS (bit_buf, bits, size); + return dc_diff; + } + else + { + COPYBITS (bit_buf, bits, 2); + return 0; + } + } + else + { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + COPYBITS (bit_buf, bits, tab->len + 1); + //dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + dc_diff = UBITS (bit_buf, size); if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size); + COPYBITS (bit_buf, bits, size); + return dc_diff; + } +} + + +void k9requant::get_intra_block_B14 () +{ +#define bit_buf (inbitbuf) + int i, li; + int val; + const DCTtab * tab; + + li = i = 0; + + while (1) + { + if (bit_buf >= 0x28000000) + { + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) break; /* end of block */ + + normal_code: + DUMPBITS (bit_buf, bits, tab->len); + val = tab->level; + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + UPDATE_VAL + if (val) + { + if (putAC(i - li - 1, val, 0)) break; + li = i; + } + + DUMPBITS (bit_buf, bits, 1); + + continue; + } + else if (bit_buf >= 0x04000000) + { + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) goto normal_code; + + /* escape code */ + i += (UBITS (bit_buf, 12) & 0x3F) - 64; + if (i >= 64) + { + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + DUMPBITS (bit_buf, bits, 12); + val = SBITS (bit_buf, 12); + UPDATE_VAL + if (val) + { + if (putAC(i - li - 1, val, 0)) break; + li = i; + } + + DUMPBITS (bit_buf, bits, 12); + + continue; + } + else if (bit_buf >= 0x02000000) + { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00800000) + { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00200000) + { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else + { + tab = DCT_16 + UBITS (bit_buf, 16); + DUMPBITS (bit_buf, bits, 16); + i += tab->run; + if (i < 64) goto normal_code; + } + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + COPYBITS (bit_buf, bits, 2); /* end of block code */ +} + +void k9requant::get_intra_block_B15 () +{ +#define bit_buf (inbitbuf) + int i, li; + int val; + const DCTtab * tab; + + li = i = 0; + + while (1) + { + if (bit_buf >= 0x04000000) + { + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + { + normal_code: + DUMPBITS (bit_buf, bits, tab->len); + + val = tab->level; + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + UPDATE_VAL + if (val) + { + if (putAC(i - li - 1, val, 1)) break; + li = i; + } + + DUMPBITS (bit_buf, bits, 1); + + continue; + } + else + { + if (i >= 128) break; /* end of block */ + + i += (UBITS (bit_buf, 12) & 0x3F) - 64; + + if (i >= 64) + { + sliceError++; + break; /* illegal, check against buffer overflow */ + } + + DUMPBITS (bit_buf, bits, 12); + val = SBITS (bit_buf, 12); + UPDATE_VAL + if (val) + { + if (putAC(i - li - 1, val, 1)) break; + li = i; + } + + DUMPBITS (bit_buf, bits, 12); + + continue; + } + } + else if (bit_buf >= 0x02000000) + { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00800000) + { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00200000) + { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else + { + tab = DCT_16 + UBITS (bit_buf, 16); + DUMPBITS (bit_buf, bits, 16); + i += tab->run; + if (i < 64) goto normal_code; + } + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + COPYBITS (bit_buf, bits, 4); /* end of block code */ +} + +int k9requant::get_non_intra_block_rq (RunLevel *blk) +{ +#define bit_buf (inbitbuf) + //int q = quantizer_scale; + //int nq = new_quantizer_scale, tst = (nq / q) + ((nq % q) ? 1 : 0); + int i, li; + int val; + const DCTtab * tab; + + li = i = -1; + + if (bit_buf >= 0x28000000) + { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } + else goto entry_2; + + while (1) + { + if (bit_buf >= 0x28000000) + { + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + + DUMPBITS (bit_buf, bits, tab->len); + val = tab->level; + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + UPDATE_VAL + if (val) + { + blk->level = val; + blk->run = i - li - 1; + li = i; + blk++; + } + + DUMPBITS (bit_buf, bits, 1); + + continue; + } + + entry_2: + if (bit_buf >= 0x04000000) + { + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) goto normal_code; + + /* escape code */ + + i += (UBITS (bit_buf, 12) & 0x3F) - 64; + + if (i >= 64) + { + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + DUMPBITS (bit_buf, bits, 12); + val = SBITS (bit_buf, 12); + UPDATE_VAL + if (val) + { + blk->level = val; + blk->run = i - li - 1; + li = i; + blk++; + } + + DUMPBITS (bit_buf, bits, 12); + + continue; + } + else if (bit_buf >= 0x02000000) + { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00800000) + { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00200000) + { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else + { + tab = DCT_16 + UBITS (bit_buf, 16); + DUMPBITS (bit_buf, bits, 16); + + i += tab->run; + if (i < 64) goto normal_code; + } + + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + + blk->level = 0; + + return i; +} + + +int k9requant::get_non_intra_block_sav (RunLevel *blk, int cc) +{ +#define bit_buf (inbitbuf) + int i, li; + int val; + const DCTtab * tab; + +#ifdef P_FRAME_NON_INTRA_DROP + #if (P_FRAME_NON_INTRA_DROP < 0) + RunLevel *oblk = blk; +#else + int oval; +#endif +#endif + + li = i = -1; + + if (bit_buf >= 0x28000000) + { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } + else goto entry_2; + + while (1) + { + if (bit_buf >= 0x28000000) + { + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + + DUMPBITS (bit_buf, bits, tab->len); + val = tab->level; + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + SAVE_VAL + if (li == -1) + { + if (abs(val) < abs(mb_sav_lev)) + { + mb_sav_c = cc; + mb_sav_lev = val; + mb_sav_run = i - li - 1; + } + } + UPDATE_VAL + WRITE_VAL + + DUMPBITS (bit_buf, bits, 1); + + continue; + } + + entry_2: + if (bit_buf >= 0x04000000) + { + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) goto normal_code; + + /* escape code */ + + i += (UBITS (bit_buf, 12) & 0x3F) - 64; + + if (i >= 64) + { + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + DUMPBITS (bit_buf, bits, 12); + val = SBITS (bit_buf, 12); + SAVE_VAL + if (li == -1) + { + if (abs(val) < abs(mb_sav_lev)) + { + mb_sav_c = cc; + mb_sav_lev = val; + mb_sav_run = i - li - 1; + } + } + UPDATE_VAL + WRITE_VAL + + DUMPBITS (bit_buf, bits, 12); + + continue; + } + else if (bit_buf >= 0x02000000) + { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00800000) + { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00200000) + { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else + { + tab = DCT_16 + UBITS (bit_buf, 16); + DUMPBITS (bit_buf, bits, 16); + + i += tab->run; + if (i < 64) goto normal_code; + } + + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + +#ifdef P_FRAME_NON_INTRA_DROP + #if (P_FRAME_NON_INTRA_DROP < 0) + blk -= (int)((blk - oblk) * (stress_factor / P_FRAME_NON_INTRA_DROP)); +#ifdef DEMO + if ((gopCount & 0x7F) < 10) blk = oblk; +#endif + #endif +#endif + + blk->level = 0; + + return i; +} + +#ifdef P_FRAME_NON_INTRA_DROP +int k9requant::get_non_intra_block_drop (RunLevel *blk, int cc) +{ +#define bit_buf (inbitbuf) + int i, li; + int val; + const DCTtab * tab; +#if (P_FRAME_NON_INTRA_DROP < 0) + RunLevel *oblk = blk; +#else + int oval; +#endif + + li = i = -1; + + if (bit_buf >= 0x28000000) + { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } + else goto entry_2; + + while (1) + { + if (bit_buf >= 0x28000000) + { + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + + DUMPBITS (bit_buf, bits, tab->len); + val = tab->level; + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + SAVE_VAL + UPDATE_VAL + WRITE_VAL + + DUMPBITS (bit_buf, bits, 1); + + continue; + } + + entry_2: + if (bit_buf >= 0x04000000) + { + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) goto normal_code; + + /* escape code */ + + i += (UBITS (bit_buf, 12) & 0x3F) - 64; + + if (i >= 64) + { + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + + DUMPBITS (bit_buf, bits, 12); + val = SBITS (bit_buf, 12); + SAVE_VAL + UPDATE_VAL + WRITE_VAL + + DUMPBITS (bit_buf, bits, 12); + + continue; + } + else if (bit_buf >= 0x02000000) + { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00800000) + { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else if (bit_buf >= 0x00200000) + { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) goto normal_code; + } + else + { + tab = DCT_16 + UBITS (bit_buf, 16); + DUMPBITS (bit_buf, bits, 16); + + i += tab->run; + if (i < 64) goto normal_code; + } + + sliceError++; + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + +#if (P_FRAME_NON_INTRA_DROP < 0) + blk -= (int)((blk - oblk) * (stress_factor / P_FRAME_NON_INTRA_DROP)); +#ifdef DEMO + if ((gopCount & 0x7F) < 10) blk = oblk; +#endif + #endif + + blk->level = 0; + + return i; +} +#endif + +#ifdef CHANGE_BRIGHTNESS +void k9requant::putDC(const sVLCtable *tab, int val) +{ + int absval, size; + absval = abs(val); + size = 0; + while (absval) + { + absval >>= 1; + size++; + } + putbits(tab[size].code,tab[size].len); + if (size!=0) + { + if (val>=0) absval = val; + else absval = val + (1<<size) - 1; /* val + (2 ^ size) - 1 */ + putbits(absval,size); + } +} +#endif +void k9requant::slice_intra_DCT (const int cc) +{ +#ifdef CHANGE_BRIGHTNESS + if (cc == 0) + { + int val; + int bri = get_luma_dc_dct_diff(); + if (dc_reset) + { + val = bri + (128 << intra_dc_precision); + old_dc_pred = val; + + val += delta_bright << intra_dc_precision; + if (val > (255 << intra_dc_precision)) val = 255 << intra_dc_precision; + else if (val < 0) val = 0; + + bri = val - (128 << intra_dc_precision); + new_dc_pred = val; + + dc_reset = 0; + } + else + { + val = bri + old_dc_pred; + old_dc_pred = val; + + val += delta_bright << intra_dc_precision; + if (val > (255 << intra_dc_precision)) val = 255 << intra_dc_precision; + else if (val < 0) val = 0; + + bri = val - new_dc_pred; + new_dc_pred = val; + } + putDC(DClumtab, bri); + } +#else + if (cc == 0) get_luma_dc_dct_diff (); +#endif + else get_chroma_dc_dct_diff (); + + if (intra_vlc_format) get_intra_block_B15 (); + else get_intra_block_B14 (); +} + +void k9requant::slice_non_intra_DCT (int cur_block) +{ +#ifdef P_FRAME_NON_INTRA_DROP + if (picture_coding_type == P_TYPE) + { + if ((h_offset == 0) || (h_offset == horizontal_size_value - 16)) + get_non_intra_block_sav(block[cur_block], cur_block); + else + get_non_intra_block_drop(block[cur_block], cur_block); + } + else + get_non_intra_block_rq(block[cur_block]); +#else + if ((picture_coding_type == P_TYPE) && ((h_offset == 0) || (h_offset == horizontal_size_value - 16))) + get_non_intra_block_sav(block[cur_block], cur_block); + else + get_non_intra_block_rq(block[cur_block]); +#endif +} + +void k9requant::motion_fr_frame ( uint f_code[2] ) +{ + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); +} + +void k9requant::motion_fr_field ( uint f_code[2] ) +{ + COPYBITS (bit_buf, bits, 1); + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); + + COPYBITS (bit_buf, bits, 1); + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); +} + +void k9requant::motion_fr_dmv ( uint f_code[2] ) +{ + get_motion_delta (f_code[0]); + get_dmv (); + + get_motion_delta (f_code[1]); + get_dmv (); +} + +void k9requant::motion_fr_conceal ( ) +{ + get_motion_delta (f_code[0][0]); + get_motion_delta (f_code[0][1]); + + COPYBITS (bit_buf, bits, 1); +} + +void k9requant::motion_fi_field ( uint f_code[2] ) +{ + COPYBITS (bit_buf, bits, 1); + + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); +} + +void k9requant::motion_fi_16x8 ( uint f_code[2] ) +{ + COPYBITS (bit_buf, bits, 1); + + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); + + COPYBITS (bit_buf, bits, 1); + + get_motion_delta (f_code[0]); + get_motion_delta (f_code[1]); +} + +void k9requant::motion_fi_dmv ( uint f_code[2] ) +{ + get_motion_delta (f_code[0]); + get_dmv (); + + get_motion_delta (f_code[1]); + get_dmv (); +} + +void k9requant::motion_fi_conceal () +{ + COPYBITS (bit_buf, bits, 1); + + get_motion_delta (f_code[0][0]); + get_motion_delta (f_code[0][1]); + + COPYBITS (bit_buf, bits, 1); +} + + +void k9requant::putmbdata(int macroblock_modes) +{ + putmbtype(macroblock_modes & 0x1F); + + /*switch (picture_coding_type) + { + case I_TYPE: + if ((! (frame_pred_frame_dct)) && (picture_structure == FRAME_PICTURE)) + putbits(macroblock_modes & DCT_TYPE_INTERLACED ? 1 : 0, 1); + break; + + case P_TYPE: + if (picture_structure != FRAME_PICTURE) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + break; + } + else if (frame_pred_frame_dct) break; + else + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + putbits(macroblock_modes & DCT_TYPE_INTERLACED ? 1 : 0, 1); + break; + } + + case B_TYPE: + if (picture_structure != FRAME_PICTURE) + { + if (! (macroblock_modes & MACROBLOCK_INTRA)) + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + break; + } + else if (frame_pred_frame_dct) break; + else + { + if (macroblock_modes & MACROBLOCK_INTRA) goto intra; + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { + intra: + putbits(macroblock_modes & DCT_TYPE_INTERLACED ? 1 : 0, 1); + } + break; + } + }*/ + + if (macroblock_modes & (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD)) + { + if (picture_structure == FRAME_PICTURE) + { + if (frame_pred_frame_dct == 0) + { + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + } + } + else + { + putbits((macroblock_modes & MOTION_TYPE_MASK) / MOTION_TYPE_BASE, 2); + } + } + if ((picture_structure == FRAME_PICTURE) && (frame_pred_frame_dct == 0) && (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) + { + putbits(macroblock_modes & DCT_TYPE_INTERLACED ? 1 : 0, 1); + } +} + +void k9requant::put_quantiser(int quantiser) +{ + putbits(q_scale_type ? map_non_linear_mquant[quantiser] : quantiser >> 1, 5); + last_coded_scale = quantiser; +} + +void k9requant::putaddrinc(int addrinc) +{ + mb_out += addrinc; + //LOGF("mb_out: %i\n", mb_out); + if (mb_out > (horizontal_size_value >> 4)) + { + sliceError++; + //LOGF("mb_out: %i, hsv: %i, curo: %i\n", mb_out, horizontal_size_value, (int)outbytecnt + (wbuf - owbuf)); + } + while (addrinc>33) + { + putbits(0x08,11); /* macroblock_escape */ + addrinc-= 33; + } + assert( addrinc >= 1 && addrinc <= 33 ); + putbits(addrinctab[addrinc-1].code,addrinctab[addrinc-1].len); +} + +int k9requant::slice_init (int code) +{ +#define bit_buf (inbitbuf) + + int offset; + const MBAtab * mba; + + mb_out = 0; + v_offset = (code - 1) * 16; + + quantizer_scale = get_quantizer_scale (); + new_quantizer_scale = getNewQuant(quantizer_scale, 0); + put_quantiser(new_quantizer_scale); + + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) + { + DUMPBITS (bit_buf, bits, 9); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) + { + if (bit_buf >= 0x08000000) + { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } + else if (bit_buf >= 0x01800000) + { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } + else switch (UBITS (bit_buf, 12)) + { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + continue; + default: /* error */ + sliceError++; + return 1; + } + } + mb_add = offset + mba->mba + 1; + mb_skip = 0; + COPYBITS (bit_buf, bits, 1); + DUMPBITS(bit_buf, bits, mba->len); + + h_offset = (offset + mba->mba) << 4; + + while (h_offset - (int)horizontal_size_value >= 0) + { + h_offset -= horizontal_size_value; + v_offset += 16; + } + + if (v_offset > (vertical_size_value - 16)) return 1; + + return 0; + +} + +void k9requant::mpeg2_slice ( const int code ) +{ +#define bit_buf (inbitbuf) + +#ifdef CHANGE_BRIGHTNESS + dc_reset = 1; +#endif + + if (slice_init (code)) return; + + while (1) + { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + macroblock_modes = get_macroblock_modes (); + if (macroblock_modes & MACROBLOCK_QUANT) quantizer_scale = get_quantizer_scale (); + + if (macroblock_modes & MACROBLOCK_INTRA) + { +#ifdef STAT + if (picture_coding_type == P_TYPE) cnt_p_i++; + else if (picture_coding_type == B_TYPE) cnt_b_i++; +#endif + + new_quantizer_scale = getNewQuant(quantizer_scale, 1); + if (last_coded_scale == new_quantizer_scale) macroblock_modes &= 0xFFFFFFEF; // remove MACROBLOCK_QUANT + else macroblock_modes |= MACROBLOCK_QUANT; //add MACROBLOCK_QUANT + + putaddrinc(mb_add + mb_skip); mb_skip = 0; + putmbdata(macroblock_modes); + if (macroblock_modes & MACROBLOCK_QUANT) put_quantiser(new_quantizer_scale); + + if (concealment_motion_vectors) + { + if (picture_structure == FRAME_PICTURE) motion_fr_conceal (); + else motion_fi_conceal (); + } + + curTable = quant_tables[quant_equ[quantizer_scale]][quant_equ[new_quantizer_scale]]; + if (!curTable) + { + /*DEBF("Inv. curTable: qs: %i nqs: %i qe_qs: %i qe_nqs: %i\n", + quantizer_scale, new_quantizer_scale, + quant_equ[quantizer_scale], quant_equ[new_quantizer_scale]);*/ + curTable = quant_table_id; + } + + slice_intra_DCT ( 0); + slice_intra_DCT ( 0); + slice_intra_DCT ( 0); + slice_intra_DCT ( 0); + slice_intra_DCT ( 1); + slice_intra_DCT ( 2); + } + else + { + int new_coded_block_pattern = 0; + + // begin saving data + int batb; + uint8 n_owbuf[32], *n_wbuf, *o_owbuf, *o_wbuf; + uint32 n_outbitcnt, n_outbitbuf, o_outbitcnt, o_outbitbuf; + +#ifdef CHANGE_BRIGHTNESS + dc_reset = 1; +#endif + +#define PUSH_BIT_IO \ + o_owbuf = owbuf; o_wbuf = wbuf; \ + o_outbitcnt = outbitcnt; o_outbitbuf = outbitbuf; \ + owbuf = wbuf = n_owbuf; \ + outbitcnt = BITS_IN_BUF; outbitbuf = 0; + +#define POP_BIT_IO \ + n_wbuf = wbuf; \ + n_outbitcnt = outbitcnt; n_outbitbuf = outbitbuf; \ + owbuf = o_owbuf; wbuf = o_wbuf; \ + outbitcnt = o_outbitcnt; outbitbuf = o_outbitbuf; + + PUSH_BIT_IO + + if (picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) + { + case MC_FRAME: MOTION_CALL (motion_fr_frame, macroblock_modes); break; + case MC_FIELD: MOTION_CALL (motion_fr_field, macroblock_modes); break; + case MC_DMV: MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) + { + case MC_FIELD: MOTION_CALL (motion_fi_field, macroblock_modes); break; + case MC_16X8: MOTION_CALL (motion_fi_16x8, macroblock_modes); break; + case MC_DMV: MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); break; + } + + POP_BIT_IO + + // end saving data + +#ifdef STAT + if (picture_coding_type == P_TYPE) cnt_p_ni++; + else if (picture_coding_type == B_TYPE) cnt_b_ni++; +#endif + new_quantizer_scale = getNewQuant(quantizer_scale, 0); + + if (macroblock_modes & MACROBLOCK_PATTERN) + { + int coded_block_pattern = get_coded_block_pattern (); + + mb_sav_lev = 0xFFFF; + curTable = quant_tables[quant_equ[quantizer_scale]][quant_equ[new_quantizer_scale]]; + if (!curTable) + { + /*DEBF("Inv. curTable: qs: %i nqs: %i qe_qs: %i qe_nqs: %i\n", + quantizer_scale, new_quantizer_scale, + quant_equ[quantizer_scale], quant_equ[new_quantizer_scale]);*/ + curTable = quant_table_id; + } + + if (coded_block_pattern & 0x20) { slice_non_intra_DCT(0); if (isNotEmpty(block[0])) new_coded_block_pattern |= 0x20; } + if (coded_block_pattern & 0x10) { slice_non_intra_DCT(1); if (isNotEmpty(block[1])) new_coded_block_pattern |= 0x10; } + if (coded_block_pattern & 0x08) { slice_non_intra_DCT(2); if (isNotEmpty(block[2])) new_coded_block_pattern |= 0x08; } + if (coded_block_pattern & 0x04) { slice_non_intra_DCT(3); if (isNotEmpty(block[3])) new_coded_block_pattern |= 0x04; } + if (coded_block_pattern & 0x02) { slice_non_intra_DCT(4); if (isNotEmpty(block[4])) new_coded_block_pattern |= 0x02; } + if (coded_block_pattern & 0x01) { slice_non_intra_DCT(5); if (isNotEmpty(block[5])) new_coded_block_pattern |= 0x01; } +#ifdef P_FRAME_NON_INTRA_DROP + if (picture_coding_type == P_TYPE) new_quantizer_scale = quantizer_scale; +#endif + if (!new_coded_block_pattern) + { + macroblock_modes &= 0xFFFFFFED; // remove MACROBLOCK_PATTERN and MACROBLOCK_QUANT flag + if ( (picture_coding_type == P_TYPE) + && !(macroblock_modes & MACROBLOCK_MOTION_FORWARD)) + { + assert(n_wbuf == n_owbuf); + assert(n_outbitcnt == BITS_IN_BUF); + + if ((h_offset == 0) || (h_offset == horizontal_size_value - 16)) // can't skip last mb + { + // we can't transmit mv (0,0) since PMV could be different than 0 for last block + // so we transmit the single smallest coeff. instead unrequantised + // anyway this is likely to take no more bit than transmiting a null mv.... + + assert((mb_sav_lev) && (mb_sav_lev != 0xFFFF)); + + new_coded_block_pattern = 1 << (5 - mb_sav_c); + macroblock_modes |= MACROBLOCK_PATTERN; + new_quantizer_scale = quantizer_scale; + block[mb_sav_c][0].run = mb_sav_run; block[mb_sav_c][0].level = mb_sav_lev; + block[mb_sav_c][1].run = 0; block[mb_sav_c][1].level = 0; + } + else + { + mb_skip += mb_add; + goto skip_mb; + } + } + } + } + + if (last_coded_scale == new_quantizer_scale) macroblock_modes &= 0xFFFFFFEF; // remove MACROBLOCK_QUANT + else if (macroblock_modes & MACROBLOCK_PATTERN) macroblock_modes |= MACROBLOCK_QUANT; //add MACROBLOCK_QUANT + assert( (macroblock_modes & MACROBLOCK_PATTERN) || !(macroblock_modes & MACROBLOCK_QUANT) ); + + putaddrinc(mb_add + mb_skip); mb_skip = 0; + putmbdata(macroblock_modes); + if (macroblock_modes & MACROBLOCK_QUANT) put_quantiser(new_quantizer_scale); + + // put saved motion data... + for (batb = 0; batb < (n_wbuf - n_owbuf); batb++) putbits(n_owbuf[batb], 8); + putbits(n_outbitbuf, BITS_IN_BUF - n_outbitcnt); + // end saved motion data... + + if (macroblock_modes & MACROBLOCK_PATTERN) + { + putcbp(new_coded_block_pattern); + + if (new_coded_block_pattern & 0x20) putnonintrablk(block[0]); + if (new_coded_block_pattern & 0x10) putnonintrablk(block[1]); + if (new_coded_block_pattern & 0x08) putnonintrablk(block[2]); + if (new_coded_block_pattern & 0x04) putnonintrablk(block[3]); + if (new_coded_block_pattern & 0x02) putnonintrablk(block[4]); + if (new_coded_block_pattern & 0x01) putnonintrablk(block[5]); + } + } + + skip_mb: + + NEXT_MACROBLOCK; + + mba_inc = 0; + while (1) + { + if (bit_buf >= 0x10000000) + { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } + else if (bit_buf >= 0x03000000) + { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } + else + switch (UBITS (bit_buf, 11)) + { + case 8: /* macroblock_escape */ + mba_inc += 33; + DUMPBITS (bit_buf, bits, 11); + continue; + default: /* end of slice, or error */ + //LOGF("hoffset: %i, hsv: %i, curo: %i\n", h_offset, horizontal_size_value, (int)outbytecnt + (wbuf - owbuf)); + if (h_offset != 0) + sliceError++; + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); //PPP + + mba_inc += mba->mba; + mb_add = mba_inc + 1; + +#ifdef CHANGE_BRIGHTNESS + if (mba_inc) dc_reset = 1; +#endif + + if (mba_inc) do { NEXT_MACROBLOCK; } + while (--mba_inc); + } + +} + +/////---- end ext mpeg code + +void k9requant::run () +{ + uint8 ID, found; + int64 greedyFactor, greedyFactor2; + int i; + +#ifdef DEMO + gopCount = 0; +#endif + +#ifdef LOG_RATE_CONTROL + LOG_FILE = fopen("Logfile.txt", "w"); +#endif + +#ifdef STAT + ori_i = ori_p = ori_b = 0; + new_i = new_p = new_b = 0; + cnt_i = cnt_p = cnt_b = 0; + cnt_p_i = cnt_p_ni = 0; + cnt_b_i = cnt_b_ni = 0; +#endif + +#ifdef USE_FD + if (argc < 3) { USAGE } + ifd = fopen(argv[argc - 2], "rb"); + ofd = fopen(argv[argc - 1], "wb"); + if (!ifd) + { + LOGF("Bad input path! (%s)\n", argv[argc - 2]); + return 2; + } + if (!ofd) + { + LOGF("Bad output path! (%s)\n", argv[argc - 1]); + return 2; + } + argc -= 2; +#endif + +#ifndef THREAD + rbuf = cbuf = orbuf = malloc(BUF_SIZE); + wbuf = owbuf = malloc(BUF_SIZE); + inbytecnt = outbytecnt = 0; + eof = 0; +#endif + + validPicHeader = 0; + validSeqHeader = 0; + validExtHeader = 0; + +#ifndef THREAD + // argument parsing +#ifdef CHANGE_BRIGHTNESS + if (argc < 5) { USAGE } + delta_bright = atoi(argv[4]); +#else + if (argc < 4) { USAGE } +#endif + fact_x = atof(argv[1]); + sscanf(argv[3], "%lld", &orim2vsize); +#endif + +#ifdef THREAD + orim2vsize=rqt_visize; + fact_x = rqt_fact; +#endif + + greedyFactor = orim2vsize / 100; + greedyFactor2 = orim2vsize / 50; + +#ifndef THREAD + if (fact_x <= 1.0) + { + unsigned char buf[4096]; + + while(1) + { + int i = read(0, buf, 4096); + if (i > 0) write(1, buf, i); + else return 0; + } + } + +#endif + if (fact_x > 10.0) fact_x = 10.0; + + // factor and stresses setting + initRequant(); + + // fill quant table + // id table + for (i = -2048; i <= 2047; i++) quant_table_id[i] = i; + + // other tables + for (i = 0; i < 42; i++) + { + int q = quantisers[i]; + int j; + + for (j = i + 1; j < 42; j++) + { + int nq = quantisers[j]; + int k; + short *cTab = quant_tables[quant_equ[q]][quant_equ[nq]]; + + for (k = -2048; k <= 2047; k++) + { + int ov = k*q; + int t = ov / nq; + + if (fact_x <= max_alt_table) + { + int t2, t3; + int d, d2, d3; + int nv, nv2, nv3; + + t2 = t + 1; + t3 = t - 1; + + nv = t * nq; + nv2 = t2 * nq; + nv3 = t3 * nq; + + d = abs(nv - ov); + d2 = abs(nv2 - ov); + d3 = abs(nv3 - ov); + + if (d2 < d) { d = d2; t = t2; } + if (d3 < d) t = t3; + } + + if (t > 2047) t = 2047; + else if (t < -2048) t = -2048; + + cTab[k] = t; + } + } + } + +#ifndef THREAD + LOG("M2VRequantiser by Makira.\n"); +#ifdef WIN + fprintf(stderr, "Using %f as factor, %lld as m2v size.\n", fact_x, orim2vsize); +#else + LOGF("Using %f as factor, %lld as m2v size.\n", fact_x, orim2vsize); +#endif +#endif + + // recoding + while(1) + { + // get next start code prefix + found = 0; + while (!found) + { +#ifndef REMOVE_BYTE_STUFFING + LOCK(3) +#else + LOCK(8) + if ( (cbuf[7] == 0) && (cbuf[6] == 0) && (cbuf[5] == 0) && (cbuf[4] == 0) + && (cbuf[3] == 0) && (cbuf[2] == 0) && (cbuf[1] == 0) && (cbuf[0] == 0) ) { SEEKR(1) } + else +#endif + if ( (cbuf[0] == 0) && (cbuf[1] == 0) && (cbuf[2] == 1) ) found = 1; // start code ! + else { COPY(1) } // continue search + } + COPY(3) + + // get start code + LOCK(1) + ID = cbuf[0]; + COPY(1) + + if (ID == 0x00) // pic header + { + LOCK(4) + picture_coding_type = (cbuf[1] >> 3) & 0x7; + if (picture_coding_type < 1 || picture_coding_type > 3) + { + DEBF("illegal picture_coding_type: %i\n", picture_coding_type); + validPicHeader = 0; + } + else + { + validPicHeader = 1; + cbuf[1] |= 0x7; cbuf[2] = 0xFF; cbuf[3] |= 0xF8; // vbv_delay is now 0xFFFF + } + + validExtHeader = 0; + + COPY(4) + } + else if (ID == 0xB3) // seq header + { + LOCK(8) + horizontal_size_value = (cbuf[0] << 4) | (cbuf[1] >> 4); + vertical_size_value = ((cbuf[1] & 0xF) << 8) | cbuf[2]; + if ( horizontal_size_value > 720 || horizontal_size_value < 352 + || vertical_size_value > 576 || vertical_size_value < 480 + || (horizontal_size_value & 0xF) || (vertical_size_value & 0xF)) + { + DEBF("illegal size, hori: %i verti: %i\n", horizontal_size_value, vertical_size_value); + validSeqHeader = 0; + } + else + validSeqHeader = 1; + + validPicHeader = 0; + validExtHeader = 0; + + COPY(8) + } + else if (ID == 0xB5) // extension + { + LOCK(1) + if ((cbuf[0] >> 4) == 0x8) // pic coding ext + { + LOCK(5) + + f_code[0][0] = (cbuf[0] & 0xF) - 1; + f_code[0][1] = (cbuf[1] >> 4) - 1; + f_code[1][0] = (cbuf[1] & 0xF) - 1; + f_code[1][1] = (cbuf[2] >> 4) - 1; + + intra_dc_precision = (cbuf[2] >> 2) & 0x3; + picture_structure = cbuf[2] & 0x3; + frame_pred_frame_dct = (cbuf[3] >> 6) & 0x1; + concealment_motion_vectors = (cbuf[3] >> 5) & 0x1; + q_scale_type = (cbuf[3] >> 4) & 0x1; + intra_vlc_format = (cbuf[3] >> 3) & 0x1; + alternate_scan = (cbuf[3] >> 2) & 0x1; + + if ( (f_code[0][0] > 8 && f_code[0][0] < 14) + || (f_code[0][1] > 8 && f_code[0][1] < 14) + || (f_code[1][0] > 8 && f_code[1][0] < 14) + || (f_code[1][1] > 8 && f_code[1][1] < 14) + || picture_structure == 0) + { + DEBF("illegal ext, f_code[0][0]: %i f_code[0][1]: %i f_code[1][0]: %i f_code[1][1]: %i picture_structure:%i\n", + f_code[0][0], f_code[0][1], f_code[1][0], f_code[1][1], picture_structure); + validExtHeader = 0; + } + else + validExtHeader = 1; + COPY(5) + } + else + { + COPY(1) + } + } + else if (ID == 0xB8) // gop header + { + LOCK(4) + COPY(4) + +#ifdef DEMO + gopCount++; +#endif + } + else if ((ID >= 0x01) && (ID <= 0xAF) && validPicHeader && validSeqHeader && validExtHeader) // slice + { + uint8 *outTemp = wbuf, *inTemp = cbuf; + int64 threshold; + + bytediff = (outbytecnt + (wbuf - owbuf)) - ((inbytecnt - (rbuf - cbuf)) / fact_x); + + + if (inbytecnt < greedyFactor2) threshold = inbytecnt >> 1; + else if (orim2vsize - inbytecnt < greedyFactor2) threshold = (orim2vsize - inbytecnt) >> 1; + else threshold = greedyFactor; + + if (threshold < 1024) threshold = 1024; + + stress_factor = (float)(bytediff + threshold) / (float)(threshold << 1); + if (stress_factor > 1.0f) stress_factor = 1.0f; + else if (stress_factor < 0.0f) stress_factor = 0.0f; + + +#ifdef LOG_RATE_CONTROL + /*fprintf(LOG_FILE, "%f%%: Requested: %f Current: %f Delta: %lld Threshold: %f Stress: %f\n", + (float)(100.0f*inbytecnt)/orim2vsize, // percent + (float)fact_x, // requested + (float)(inbytecnt - (rbuf - cbuf))/(float)(outbytecnt + (wbuf - owbuf)), // current + (long long)bytediff, // delta + (float)threshold, // threshold + stress_factor // Stress + );*/ + fprintf(LOG_FILE, "inb: %.0f inb_c: %.0f oub: %.0f oub_c: %.0f cur: %.3f dif: %.0f thr: %.0f str: %.03f\n", + (float)inbytecnt, + (float)(inbytecnt - (rbuf - cbuf)), + (float)outbytecnt, + (float)(outbytecnt + (wbuf - owbuf)), + (float)(inbytecnt - (rbuf - cbuf))/(float)(outbytecnt + (wbuf - owbuf)), + (float)bytediff, + (float)threshold, + (float)stress_factor ); +#endif + + +#ifndef CHANGE_BRIGHTNESS + if ( ((picture_coding_type == I_TYPE) && ( stress_factor > i_min_stress)) + || ((picture_coding_type == P_TYPE) && ( stress_factor > p_min_stress)) + || ((picture_coding_type == B_TYPE) && ( stress_factor > b_min_stress)) +#ifdef DEMO + || ((gopCount & 0x7F) < 10) +#endif + ) +#endif + { + // init error + sliceError = 0; + + // init bit buffer + inbitbuf = 0; inbitcnt = 0; + outbitbuf = 0; outbitcnt = BITS_IN_BUF; + + // get 32 bits + Refill_bits(); + Refill_bits(); + Refill_bits(); + Refill_bits(); + + // begin bit level recoding + mpeg2_slice(ID); + flush_read_buffer(); + flush_write_buffer(); + // end bit level recoding + +#ifndef CHANGE_BRIGHTNESS + if ((wbuf - outTemp > cbuf - inTemp) || (sliceError > MAX_ERRORS)) // yes that might happen, rarely + { +#ifndef NDEBUG + if (sliceError > MAX_ERRORS) + { + DEBF("sliceError (%i) > MAX_ERRORS (%i)\n", sliceError, MAX_ERRORS); + } +#endif + + // in this case, we'll just use the original slice ! + tc_memcpy(outTemp, inTemp, cbuf - inTemp); + wbuf = outTemp + (cbuf - inTemp); + + // adjust outbytecnt + outbytecnt -= (wbuf - outTemp) - (cbuf - inTemp); + } +#endif + +#ifdef STAT + #ifdef LOG_RATE_CONTROL + if (picture_coding_type == I_TYPE) fprintf(LOG_FILE, "-I-\n"); +#endif + switch(picture_coding_type) + { + case I_TYPE: + ori_i += cbuf - inTemp; + new_i += (wbuf - outTemp > cbuf - inTemp) ? (cbuf - inTemp) : (wbuf - outTemp); + cnt_i ++; + break; + + case P_TYPE: + ori_p += cbuf - inTemp; + new_p += (wbuf - outTemp > cbuf - inTemp) ? (cbuf - inTemp) : (wbuf - outTemp); + cnt_p ++; + break; + + case B_TYPE: + ori_b += cbuf - inTemp; + new_b += (wbuf - outTemp > cbuf - inTemp) ? (cbuf - inTemp) : (wbuf - outTemp); + cnt_b ++; + break; + + default: + assert(0); + break; + } +#endif + } + } + +#ifndef NDEBUG + if ((ID >= 0x01) && (ID <= 0xAF) && (!validPicHeader || !validSeqHeader || !validExtHeader)) + { + if (!validPicHeader) DEBF("missing pic header (%02X)\n", ID); + if (!validSeqHeader) DEBF("missing seq header (%02X)\n", ID); + if (!validExtHeader) DEBF("missing ext header (%02X)\n", ID); + } +#endif +if (rbuf - orbuf > MAX_READ) { MOV_READ } + if (wbuf - owbuf > MIN_WRITE) { WRITE } + } + + +#ifdef LOG_RATE_CONTROL + fclose(LOG_FILE); +#endif + rqt_run=false; + // keeps gcc happy + return ; +} + +void k9requant::initvar() +{ + cbuf = NULL; + rbuf = NULL; + wbuf = NULL; + orbuf = NULL; + owbuf = NULL; + inbitcnt = outbitcnt = 0; + inbitbuf = outbitbuf = 0; + inbytecnt = outbytecnt = 0; + fact_x = 0; + mloka1 = mloka2 = eof = 0; + orim2vsize = 0; + bytediff = 0; + stress_factor = 0; + i_factor = 0; + p_factor = 0; + b_factor = 0; + i_min_stress = 0; + p_min_stress = 0; + b_min_stress = 0; + quant_table_id = &quant_table_id_data[2048]; + horizontal_size_value = 0; + vertical_size_value = 0; + + picture_coding_type = 0; + + memset( f_code,0 , sizeof(f_code)); + intra_dc_precision = 0; + picture_structure = 0; + frame_pred_frame_dct = 0; + concealment_motion_vectors = 0; + q_scale_type = 0; + intra_vlc_format = 0; + alternate_scan = 0; + + validPicHeader = 0; + validSeqHeader = 0; + validExtHeader = 0; + sliceError = 0; + + quantizer_scale = 0; + new_quantizer_scale = 0; + last_coded_scale = 0; + h_offset = v_offset = 0; + mb_skip = mb_add = 0; + mb_out = 0; + + mb_sav_run = mb_sav_lev = mb_sav_c = 0; + curTable = NULL; + memset( block, 0, sizeof(block)); +} + + + +void k9requant::initRequant() { + int i; + if (fact_x <= 1.0) + { + i_factor = i_factors[0]; + p_factor = p_factors[0]; + b_factor = b_factors[0]; + i_min_stress = i_min_stresses[0]; + p_min_stress = p_min_stresses[0]; + b_min_stress = b_min_stresses[0]; + } + else if (fact_x >= 10.0) + { + i_factor = i_factors[2]; + p_factor = p_factors[2]; + b_factor = b_factors[2]; + i_min_stress = i_min_stresses[2]; + p_min_stress = p_min_stresses[2]; + b_min_stress = b_min_stresses[2]; + } + else if (fact_x <= 3.0) // 1.0 .. 3.0 + { + double inter = (fact_x - 1.0)/(3.0 - 1.0); + i_factor = i_factors[0] + inter * (i_factors[1] - i_factors[0]); + p_factor = p_factors[0] + inter * (p_factors[1] - p_factors[0]); + b_factor = b_factors[0] + inter * (b_factors[1] - b_factors[0]); + i_min_stress = i_min_stresses[0] + inter * (i_min_stresses[1] - i_min_stresses[0]); + p_min_stress = p_min_stresses[0] + inter * (p_min_stresses[1] - p_min_stresses[0]); + b_min_stress = b_min_stresses[0] + inter * (b_min_stresses[1] - b_min_stresses[0]); + } + else // 3.0 .. 10.0 + { + double inter = (fact_x - 3.0)/(10.0 - 3.0); + i_factor = i_factors[1] + inter * (i_factors[2] - i_factors[1]); + p_factor = p_factors[1] + inter * (p_factors[2] - p_factors[1]); + b_factor = b_factors[1] + inter * (b_factors[2] - b_factors[1]); + i_min_stress = i_min_stresses[1] + inter * (i_min_stresses[2] - i_min_stresses[1]); + p_min_stress = p_min_stresses[1] + inter * (p_min_stresses[2] - p_min_stresses[1]); + b_min_stress = b_min_stresses[1] + inter * (b_min_stresses[2] - b_min_stresses[1]); + } + + /*LOGF( "i_factor: %i p_factor: %i b_factor: %i\n" + "i_min_stress: %.02f p_min_stress: %.02f b_min_stress: %.02f\n", + i_factor, p_factor, b_factor, + i_min_stress, p_min_stress, b_min_stress);*/ + + + +} + +bool k9requant::lock( int64 x) { + if (unlikely ((x) > (rbuf - cbuf))) + { + if (likely (wbuf)) + { + QMutexLocker locker( &mutw ); + //mutw.lock(); + rqt_wcnt = wbuf - owbuf; + condw.wakeAll(); + //mutw.unlock(); + } + //mutr.lock(); + QMutexLocker locker( &mutr ); + while (!rqt_rcnt) + { + condr.wait( &mutr); + if (rqt_stop==true) { + //mutr.unlock(); + return false; + } + } + cbuf = rqt_rptr; //src buffer + rbuf =orbuf = cbuf; + rbuf += rqt_rcnt + 3; // end of src buffer + rqt_rcnt = 0; + owbuf = rqt_wptr; // dest buffer + inbytecnt = rqt_inbytes; + outbytecnt = rqt_outbytes; + orim2vsize = rqt_visize; + //mutr.unlock(); + wbuf = owbuf; + if ( fact_x < rqt_fact) { + fact_x=rqt_fact; + initRequant(); + } + fact_x=rqt_fact; + } + + return true; + +} + + + diff --git a/k9vamps/k9requant.h b/k9vamps/k9requant.h new file mode 100644 index 0000000..2832c06 --- /dev/null +++ b/k9vamps/k9requant.h @@ -0,0 +1,278 @@ +// +// C++ Interface: k9requant +// +// Description: A transcription from m2vrequantizer in C++ +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#ifndef K9REQUANT_H +#define K9REQUANT_H +#include "k9common.h" + + +#include <qthread.h> +#include <qmutex.h> +#include <qwaitcondition.h> + + +// user defined types +//typedef unsigned int uint; +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +typedef char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + + +typedef signed int sint; +typedef signed char sint8; +typedef signed short sint16; +typedef signed int sint32; +#ifdef WIN +typedef __int64 sint64; +#else +typedef signed long long sint64; +#endif + +#define BITS_IN_BUF (8) + +// block data +typedef struct { + uint8 run; + short level; +} +RunLevel; +/* +#define BUF_SIZE (16*1024*1024) +#define MIN_READ (4*1024*1024) +#define MIN_WRITE (8*1024*1024) +#define MAX_READ (10*1024*1024) +*/ + + +// keep gcc happy +#define WRITE \ + orbuf = orbuf; \ + mloka1 = mloka1; \ + mloka2 = mloka2; \ + eof = eof; + +// meaningless +#define MIN_WRITE 0 +#define MAX_READ 0 +#define MOV_READ + +// this is where we switch threads +#define LOCK(x) if (! lock(x)) {rqt_run=false;return;} + +#define COPY(x) \ + tc_memcpy (wbuf, cbuf, x); \ + cbuf += x; \ + wbuf += x; + +#define SEEKR(x) cbuf += x; + +#define SEEKW(x) wbuf += x; + + +/** + @author Jean-Michel PETIT <[email protected]> +*/ +class k9requant : public QThread { +private: + + int inbitcnt, outbitcnt; + uint32 inbitbuf, outbitbuf; + uint64 inbytecnt, outbytecnt; + float fact_x; + int mloka1, mloka2, eof; + + int64 orim2vsize; + int64 bytediff; + double stress_factor; // from 0.0 to 1.0 + + int i_factor; + int p_factor; + int b_factor; + double i_min_stress; + double p_min_stress; + double b_min_stress; + + short quant_table_id_data[4096]; + short *quant_table_id ; + +#ifdef USE_FD + + FILE *ifd, *ofd; +#endif + +#ifdef STAT + + uint64 ori_i, ori_p, ori_b; + uint64 new_i, new_p, new_b; + uint64 cnt_i, cnt_p, cnt_b; + uint64 cnt_p_i, cnt_p_ni; + uint64 cnt_b_i, cnt_b_ni; +#endif + +#ifdef DEMO + + int gopCount; +#endif + +#ifdef LOG_RATE_CONTROL + + FILE* LOG_FILE; +#endif + +#ifdef CHANGE_BRIGHTNESS + + int delta_bright; + int dc_reset; + int old_dc_pred, new_dc_pred; +#endif + + // mpeg2 state + // seq header + uint horizontal_size_value; + uint vertical_size_value; + + // pic header + uint picture_coding_type; + + // pic code ext + uint f_code[2][2]; + uint intra_dc_precision; + uint picture_structure; + uint frame_pred_frame_dct; + uint concealment_motion_vectors; + uint q_scale_type; + uint intra_vlc_format; + uint alternate_scan; + + // error + int validPicHeader; + int validSeqHeader; + int validExtHeader; + int sliceError; + + // slice or mb + uint quantizer_scale; + uint new_quantizer_scale; + uint last_coded_scale; + int h_offset, v_offset; + int mb_skip, mb_add; + int mb_out; + + int mb_sav_run, mb_sav_lev, mb_sav_c; + short *curTable; + + RunLevel block[6][65]; // terminated by level = 0, so we need 64+1 +private: + void putbits(uint val, int n); + void Refill_bits(void); + + void Flush_Bits(uint n); + uint Show_Bits(uint n); + uint Get_Bits(uint n); + + uint Copy_Bits(uint n); + void flush_read_buffer(); + void flush_write_buffer(); + int scale_quant(double quant ); + int increment_quant(int quant); + int intmax( register int x, register int y ); + + int intmin( register int x, register int y ); + void putmbtype(int mb_type); + + int getNewQuant(int curQuant, int intra); + + int isNotEmpty(RunLevel *blk); + + + // return != 0 if error + int putAC(int run, int signed_level, int vlcformat); + // return != 0 if error + int putACfirst(int run, int val); + void putnonintrablk(RunLevel *blk); + void putcbp(int cbp); + + int get_macroblock_modes (); + int get_quantizer_scale (); + void get_motion_delta (const int f_code); + void get_dmv (); + int get_coded_block_pattern (); + int get_luma_dc_dct_diff (); + int get_chroma_dc_dct_diff (); + void get_intra_block_B14 (); + void get_intra_block_B15 (); + int get_non_intra_block_rq (RunLevel *blk); + int get_non_intra_block_sav (RunLevel *blk, int cc); + +#ifdef P_FRAME_NON_INTRA_DROP + + int get_non_intra_block_drop (RunLevel *blk, int cc); +#endif + +#ifdef CHANGE_BRIGHTNESS + + void putDC(const sVLCtable *tab, int val); +#endif + + void slice_intra_DCT (const int cc); + void slice_non_intra_DCT (int cur_block); + void motion_fr_frame ( uint f_code[2] ); + void motion_fr_field ( uint f_code[2] ); + void motion_fr_dmv ( uint f_code[2] ); + void motion_fr_conceal ( ); + void motion_fi_field ( uint f_code[2] ); + void motion_fi_16x8 ( uint f_code[2] ); + void motion_fi_dmv ( uint f_code[2] ); + void motion_fi_conceal (); + void putmbdata(int macroblock_modes); + void put_quantiser(int quantiser); + void putaddrinc(int addrinc); + int slice_init (int code); + void mpeg2_slice ( const int code ); + void initRequant(); + bool lock(int64 x); + +protected: + void run (); + +public: + uint8 *cbuf, *rbuf, *wbuf, *orbuf, *owbuf; + // global data for inter thread com + float rqt_fact; + uint32_t rqt_rcnt; + uint32_t rqt_wcnt; + uint64_t rqt_inbytes; + uint64_t rqt_outbytes; + uint64_t rqt_visize; + uchar *rqt_rptr; + uchar *rqt_wptr; + QWaitCondition condr; + QWaitCondition condw; + QMutex mutr; + QMutex mutw; + bool rqt_stop; + bool rqt_run; + + +public: + k9requant(); + void initvar(); +}; + + + +#endif diff --git a/k9vamps/k9requant2.h b/k9vamps/k9requant2.h new file mode 100755 index 0000000..e28336f --- /dev/null +++ b/k9vamps/k9requant2.h @@ -0,0 +1,126 @@ +#include <qbuffer.h> +#include "k9fifo.h" + +#ifndef K9REQUANT2_H +#define K9REQUANT2_H + + +class k9Requant2 +{ +public: + void setInput(char *data,int size); + void setOutput(char*data,int size); + uint64_t getOutByteCnt() { return outbytecnt-outbytecnt1;}; + void setFactor(float fact); + void run(); + k9Requant2(); + ~k9Requant2(); +private: + QBuffer m_buffIn,m_buffOut; + QByteArray m_baIn,m_baOut; + + char *m_dataIn,*m_dataOut; + int m_sizeIn,m_sizeOut; + + // mpeg2 state + // seq header + uint horizontal_size_value; + uint vertical_size_value; + + // pic header + uint picture_coding_type; + + // pic code ext + uint f_code[2][2]; + uint intra_dc_precision; + uint picture_structure; + uint frame_pred_frame_dct; + uint concealment_motion_vectors; + uint q_scale_type; + uint intra_vlc_format; + uint alternate_scan; + + // error + int validPicHeader; + int validSeqHeader; + int validExtHeader; + int sliceError; + + // slice or mb + uint quantizer_scale; + uint new_quantizer_scale; + uint last_coded_scale; + int h_offset, v_offset; + + // rate + double quant_corr; + + // block data + typedef struct + { + uint8_t run; + short level; + } RunLevel; + RunLevel block[6][65]; // terminated by level = 0, so we need 64+1 + // end mpeg2 state + // global variables + uint8_t *cbuf, *rbuf, *wbuf, *orbuf, *owbuf; + int inbitcnt, outbitcnt; + uint32_t inbitbuf, outbitbuf; + uint64_t inbytecnt, outbytecnt,outbytecnt1; + float fact_x; + int mloka1; + + /////---- begin ext mpeg code + +private: + void putbits(uint val, int n); + void Refill_bits(void); + uint Show_Bits(uint n); + uint Get_Bits(uint n); + uint Copy_Bits(uint n); + void flush_read_buffer(); + void flush_write_buffer(); + int scale_quant(double quant); + int increment_quant(int quant); + int intmax( register int x, register int y ); + int intmin( register int x, register int y ); + int getNewQuant(int curQuant); + int isNotEmpty(RunLevel *blk); + int putAC(int run, int signed_level, int vlcformat); + int putACfirst(int run, int val); + void putnonintrablk(RunLevel *blk); + void putcbp(int cbp); + void putmbtype(int mb_type); + int get_macroblock_modes (); + int get_quantizer_scale (); + int get_motion_delta (const int f_code); + int get_dmv (); + int get_coded_block_pattern (); + int get_luma_dc_dct_diff (); + int get_chroma_dc_dct_diff (); + void get_intra_block_B14 (); + void get_intra_block_B15 (); + int get_non_intra_block_drop (RunLevel *blk); + int get_non_intra_block_rq (RunLevel *blk); + void slice_intra_DCT (const int cc); + void slice_non_intra_DCT (int cur_block); + void motion_fr_frame ( uint f_code[2] ); + void motion_fr_field ( uint f_code[2] ); + void motion_fr_dmv ( uint f_code[2] ); + void motion_fr_conceal ( ); + void motion_fi_field ( uint f_code[2] ); + void motion_fi_16x8 ( uint f_code[2] ); + void motion_fi_dmv ( uint f_code[2] ); + void motion_fi_conceal (); + + void putmbdata(int macroblock_modes); + void put_quantiser(int quantiser); + int slice_init (int code); + void mpeg2_slice ( const int code ); + void Flush_Bits(uint); + void init(); +protected: +}; + +#endif diff --git a/k9vamps/k9vamps.cpp b/k9vamps/k9vamps.cpp new file mode 100755 index 0000000..b077fb8 --- /dev/null +++ b/k9vamps/k9vamps.cpp @@ -0,0 +1,1100 @@ +// +// C++ Interface: k9vamps +// +// Description: A transcription from Vamps in C++ +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#include "k9vamps.h" +#include <qapplication.h> +#include "ac.h" + + +void k9vamps::setNoData() { + noData=true; + wDataRead.wakeAll(); + wDataReady.wakeAll(); +} + +void k9vamps::addData(uchar *data,uint size) { + while (1) { + if (m_fifo.freespace()>=size) { + m_fifo.enqueue(data,size); + wDataReady.wakeAll(); + break; + } else + wDataRead.wait(); + } +} + + +int k9vamps::readData(uchar * data,uint size) { + uint size2=size; + uint32_t readSize=0,s=0; + + while (1) { + // is there data in the buffer? + if (m_fifo.count() >0) { + // s= size of data that we will read (maximum = size) + s=(m_fifo.count()) <size2 ? (m_fifo.count()) : size2; + // increments the number of readen bytes + readSize+=s; + // decrements the number of max bytes to read + size2-=s; + //moves bytes from buffer to output + m_fifo.dequeue(data,s); + //moves the position of output buffer to receive next bytes + data+=s; + //there's now free space in input buffer, we can wake the injection thread + wDataRead.wakeAll(); + } + // break the loop if injection thread terminated or we got what we want (size bytes) + // otherwise, we're waiting for datas + if(noData || (m_fifo.count() >=size2)) { + break; + } else + wDataReady.wait(); + } + // if there's datas in input buffer and we did not get all what we wanted, we take them. + s= (m_fifo.count()) <size2 ? (m_fifo.count()) : size2; + readSize+=s; + if (s>0 ) + m_fifo.dequeue(data,s); + + wDataRead.wakeAll(); + return readSize; +} + +void k9vamps::addSubpicture(uint id) { + int cpt=1; + for (uint i=0;i<32;i++) + if (spu_track_map[i]!=0) cpt++; + spu_track_map[id-1]=cpt; +} + +void k9vamps::addAudio(uint id) { + int cpt=1; + for (uint i=0;i <8;i++) + if (audio_track_map[i] !=0) cpt++; + + audio_track_map[id-1]=cpt; +} + +void k9vamps::addAudio(uint id,uint newId) { + if (newId==0) + addAudio(id); + else + audio_track_map[id-1]=newId; +} + + +void k9vamps::setInputSize(uint64_t size) { + ps_size=size; +} + +void k9vamps::setVapFactor(float factor) { + vap_fact=factor; +} + +void k9vamps::setSaveImage(k9SaveImage *m_save) { + m_saveImage=m_save; +} + +void k9vamps::reset() { + m_preserve=true; + bytes_read =0; + bytes_written=0; + padding_bytes=0; + total_packs=0; + video_packs=0; + skipped_video_packs=0; + aux_packs=0; + skipped_aux_packs=0; + sequence_headers=0; + nav_packs=0; + + rptr = rbuf; + rhwp = rbuf; + wptr = wbuf; + vbuf_size = VBUF_SIZE; + vap_fact= 1.0f; + + // inbuffw=inbuff; + for (uint i=0; i<8;i++) { + audio_track_map[i]=0; + } + for (uint i=0; i<32;i++) { + spu_track_map[i]=0; + } + + calc_ps_vap = 1; + vap_fact=1.0; + ps_size=0; + noData=false; + + avgdiff=1; + m_totfact=m_nbfact=m_avgfact=0; + + vin_bytes=0; + vout_bytes=0; + +} + +k9vamps::k9vamps(k9DVDBackup *dvdbackup) { + m_saveImage=NULL; + m_dvdbackup=dvdbackup; + reset(); + m_requant=NULL; + if (dvdbackup !=NULL) + m_bgUpdate = new k9bgUpdate(dvdbackup); + else + m_bgUpdate=NULL; + rbuf_size= RBUF_SIZE; + rbuf = (uchar*) malloc(rbuf_size);; + m_output=NULL; +} + + +void k9vamps::setPreserve(bool _value) { + m_preserve = _value; +} +void k9vamps::setOutput(QFile *_output) { + m_output=_output; +} + +k9vamps::~k9vamps() { + if (m_bgUpdate !=NULL) + delete m_bgUpdate; + free (rbuf); +} + + +void k9vamps::run () { + m_error=false; + m_errMsg=""; + m_requant=new k9requant(); + eof=0; + + // allocate video buffers + vibuf =(uchar*) malloc (vbuf_size); + vobuf = (uchar*) malloc (vbuf_size); + + if (vibuf == NULL || vobuf == NULL) + fatal (QString("Allocation of video buffers failed: %1").arg(strerror (errno))); + + + // actually do vaporization + vaporize (); + + + flush(); + + if (m_requant !=NULL) { + m_requant->rqt_stop=true; + while(m_requant->running()) { + m_requant->condr.wakeAll(); + m_requant->condw.wakeAll(); + m_requant->wait(10); + } +// m_requant->mutr.unlock(); +// m_requant->mutw.unlock(); + } + delete m_requant; + m_requant=NULL; + free (vibuf); + free(vobuf); + if (m_bgUpdate!=NULL) + m_bgUpdate->wait(); + //mutex.unlock(); +} + + + +// lock `size' bytes in read buffer +// i.e. ensure the next `size' input bytes are available in buffer +// returns nonzero on EOF +int k9vamps::lock (int size) { + int avail, n; + + avail = rhwp - rptr; + + if (avail >= size) + return 0; + + if (avail) { + tc_memcpy (rbuf, rptr, avail); + rptr = rbuf; + rhwp = rptr + avail; + } + + if (rbuf_size -avail <=0) { + uchar *buffer =(uchar*) malloc (rbuf_size+20480); + tc_memcpy (buffer,rbuf,rbuf_size); + rptr = buffer +(rptr-rbuf); + rhwp=buffer+(rhwp-rbuf); + rbuf_size+=20480; + free(rbuf); + rbuf=buffer; + } + + n = readData(rhwp,rbuf_size - avail); + + if (n % SECT_SIZE) + fatal ("Premature EOF"); + + rhwp += n; + bytes_read += n; + + return !n; +} + + +// copy `size' bytes from rbuf to wbuf +void k9vamps::copy (int size) { + if (!size) + return; + + if ((wptr - wbuf) + size > WBUF_SIZE) + fatal ("Write buffer overflow"); + + tc_memcpy (wptr, rptr, size); + rptr += size; + wptr += size; +} + + +// skip `size' bytes in rbuf +void k9vamps::skip (int size) { + rptr += size; +} + + +// flush wbuf +void k9vamps::flush (void) { + int size; + mutex.lock(); + size = wptr - wbuf; + + if (!size) { + mutex.unlock(); + return; + } + //m_dvdbackup->getOutput(wbuf,size); + // wait for a preceding update to finish + if (m_bgUpdate!=NULL) { + m_bgUpdate->wait(); + m_bgUpdate->update( wbuf,size); + } + if (m_output != NULL) + m_output->writeBlock((const char*) wbuf,size); + if (m_saveImage !=NULL) + m_saveImage->addData(wbuf,size); + wptr = wbuf; + bytes_written += size; + mutex.unlock(); +} + + +// returns no. bytes read up to where `ptr' points +uint64_t k9vamps::rtell (uchar *ptr) { + return bytes_read - (rhwp - ptr); +} + + +// returns no. bytes written up to where `ptr' points +// (including those in buffer which are not actually written yet) +uint64_t k9vamps::wtell (uchar *ptr) { + return bytes_written + (ptr - wbuf); +} + + +// some pack header consistency checking +bool k9vamps::check_pack (uchar *ptr) { + uint32_t pack_start_code; + int pack_stuffing_length; + + pack_start_code = (uint32_t) (ptr [0]) << 24; + pack_start_code |= (uint32_t) (ptr [1]) << 16; + pack_start_code |= (uint32_t) (ptr [2]) << 8; + pack_start_code |= (uint32_t) (ptr [3]); + + if (pack_start_code != 0x000001ba) { +// fatal ("Bad pack start code at %llu: %08lx", rtell (ptr), pack_start_code); + return false; + } + + if ((ptr [4] & 0xc0) != 0x40) { + // fatal ("Not an MPEG2 program stream pack at %llu", rtell (ptr)); + return false; + } + + // we rely on a fixed pack header size of 14 + // so better to ensure this is true + pack_stuffing_length = ptr [13] & 7; + + if (pack_stuffing_length) { + //fatal ("Non-zero pack stuffing length at %llu: %d\n", rtell (ptr), pack_stuffing_length); + return false; + } + + return true; +} + + +// video packet consistency checking +int k9vamps::check_video_packet (uchar *ptr) { + int vid_packet_length, pad_packet_length, rc = 0; + uint32_t vid_packet_start_code, pad_packet_start_code, sequence_header_code; + + vid_packet_start_code = (uint32_t) (ptr [0]) << 24; + vid_packet_start_code |= (uint32_t) (ptr [1]) << 16; + vid_packet_start_code |= (uint32_t) (ptr [2]) << 8; + vid_packet_start_code |= (uint32_t) (ptr [3]); + + if (vid_packet_start_code != 0x000001e0) + fatal(QString ("Bad video packet start code at %1: %2").arg(rtell(ptr)).arg(vid_packet_start_code,0,16)); + + vid_packet_length = ptr [4] << 8; + vid_packet_length |= ptr [5]; + vid_packet_length += 6; + + if ((ptr [6] & 0xc0) != 0x80) + fatal (QString("Not an MPEG2 video packet at %1").arg(rtell (ptr))); + + if (ptr [7]) { + if ((ptr [7] & 0xc0) != 0xc0) + qDebug (QString("First video packet in sequence starting at %1 misses PTS or DTS, flags=%2").arg(rtell (ptr)).arg(ptr [7])); + else { + sequence_header_code = (uint32_t) (ptr [6 + 3 + ptr [8] + 0]) << 24; + sequence_header_code |= (uint32_t) (ptr [6 + 3 + ptr [8] + 1]) << 16; + sequence_header_code |= (uint32_t) (ptr [6 + 3 + ptr [8] + 2]) << 8; + sequence_header_code |= (uint32_t) (ptr [6 + 3 + ptr [8] + 3]); + + if (sequence_header_code == 0x000001b3) { + rc = 1; + } else { + //fprintf (stderr, "Start of GOP at %llu not on sector boundary\n", + // rtell (ptr + 6 + 3 + ptr [8])); + sequence_headers++; + } + } + + } + + pad_packet_length = 0; + + if (14 + vid_packet_length < SECT_SIZE - 6) { + // video packet does not fill whole sector + // check for padding packet + ptr += vid_packet_length; + + pad_packet_start_code = (uint32_t) (ptr [0]) << 24; + pad_packet_start_code |= (uint32_t) (ptr [1]) << 16; + pad_packet_start_code |= (uint32_t) (ptr [2]) << 8; + pad_packet_start_code |= (uint32_t) (ptr [3]); + + if (pad_packet_start_code != 0x000001be) + qDebug (QString("Bad padding packet start code at %1: %2").arg(rtell (ptr + vid_packet_length)).arg(pad_packet_start_code)); + else { + pad_packet_length = ptr [4] << 8; + pad_packet_length |= ptr [5]; + pad_packet_length += 6; + } + } + + // length of video packet plus padding packet must always match sector size + if (14 + vid_packet_length + pad_packet_length != SECT_SIZE) + qDebug (QString("Bad video packet length at %1: %2").arg(rtell (ptr)).arg(vid_packet_length)); + + return rc; +} + + +// here we go +// this is where we switch to the requantization thread +// note that this and the requant thread never run concurrently (apart +// from a very short time) so a dual CPU box does not give an advantage +// returns size of evaporated GOP +int k9vamps::requant (uchar *dst, uchar *src, int n, float fact) { + if (n==0) return 0; + int rv; + if (! m_requant->running()) { + m_requant->initvar(); + } + m_requant->rqt_stop=false; + // this ensures for the requant thread to stop at this GOP's end + tc_memcpy (src + n, "\0\0\1", 3); + + m_requant->mutr.lock(); + + m_requant->rqt_rptr = src; + m_requant->rqt_wptr = dst; + m_requant->rqt_rcnt = n; + m_requant->rqt_wcnt = 0; + m_requant->rqt_fact = fact ; + m_requant->rqt_inbytes = vin_bytes; + m_requant->rqt_outbytes = vout_bytes; + m_requant->rqt_visize = (uint64_t) ((float) ps_size * (float) vin_bytes / ((float) total_packs * (float) SECT_SIZE)); + + // create requantization thread + if (! m_requant->running()) { + m_requant->start(); + m_requant->rqt_run=true; + } + + m_requant->condr.wakeAll(); + m_requant->mutr.unlock(); + + // now the requant thread should be running + + m_requant->mutw.lock(); + + // wait for requant thread to finish + while (!m_requant->rqt_wcnt) + m_requant->condw.wait( &m_requant->mutw); + + rv = m_requant->rqt_wcnt; + + m_requant->mutw.unlock(); +/* if ((m_requant->rbuf-m_requant->cbuf -3) >0 ) { + tc_memcpy(dst+m_requant->rqt_wcnt,m_requant->cbuf,m_requant->rbuf-m_requant->cbuf -3); + rv +=m_requant->rbuf-m_requant->cbuf -3; + } +/*/ + if ((m_requant->rbuf-m_requant->cbuf -2) >0 ) { + tc_memcpy(dst+m_requant->rqt_wcnt,m_requant->cbuf,m_requant->rbuf-m_requant->cbuf -2); + rv +=m_requant->rbuf-m_requant->cbuf -2; + } + + + +// if (rv>n) +// qDebug("requant error"); + + double realrqtfact=(double)(vin_bytes) / (double)(vout_bytes+rv); + avgdiff = ((m_avgfact) /realrqtfact); + + //qDebug ("factor : " +QString::number(m_avgfact) +" --> " +QString::number((float)n/(float)rv) +" avgdiff : " + QString::number(avgdiff) +" rqt_visize :" +QString::number(m_requant->rqt_visize) +" ps_size :" +QString::number(ps_size) + " vin_bytes :" + QString::number(vin_bytes)) ; + + return rv; + +} + + +// translate type of private stream 1 packet +// according to the track translation maps +// returns new track type (e.g. 0x80 for first AC3 audio +// track in cmd line) or zero if track is not to be copied +int k9vamps::new_private_1_type (uchar *ptr) { + int type, track, abase; + + type = ptr [6 + 3 + ptr [8]]; + //fprintf (stderr, "type=%02x\n", type); + + if (type >= 0x20 && type <= 0x3f) { + // subpicture + + track = spu_track_map [type - 0x20]; + + return track ? track - 1 + 0x20 : 0; + } + + if (type >= 0x80 && type <= 0x87) { + // AC3 audio + abase = 0x80; + } else if (type >= 0x88 && type <= 0x8f) { + // DTS audio + abase = 0x88; + } else if (type >= 0xa0 && type <= 0xa7) { + // LPCM audio + abase = 0xa0; + } else { +// fatal ("Unknown private stream 1 type at %llu: %02x", rtell (ptr), type); + abase = 0; + } + + track = audio_track_map [type - abase]; + + return track ? track - 1 + abase : 0; +} + + +// selectivly copy private stream 1 packs +// patches track type to reflect new track +// mapping unless user opted to preserve them +void k9vamps::copy_private_1 (uchar *ptr) { + int type; + + type = new_private_1_type (ptr); + + if (type) { + if (!m_preserve) + ptr [6 + 3 + ptr [8]] = type; + + copy (SECT_SIZE); + + return; + } + + skip (SECT_SIZE); +} + + +// translate ID of MPEG audio packet +// according to the audio track translation map +// returns new ID (e.g. 0xc0 for first MPEG audio +// track in cmd line) or zero if track is not to be copied +int k9vamps::new_mpeg_audio_id (int id) { + int track; + + track = audio_track_map [id - 0xc0]; + + return track ? track - 1 + 0xc0 : 0; +} + + +// selectivly copy MPEG audio packs +// patches ID to reflect new track mapping unless user opted to preserve them +void k9vamps::copy_mpeg_audio (uchar *ptr) { + int id; + + id = new_mpeg_audio_id (ptr [3]); + + if (id) { + if (!m_preserve) + ptr [3] = id; + + copy (SECT_SIZE); + + return; + } + + skip (SECT_SIZE); +} + + +// process beginning of program stream up to +// - but not including - first sequence header +// this PS leader is NOT shrunk since the PS may not +// necessarily begin at a GOP boundary (although it should?) +// nevertheless the unwanted private stream 1 and MPEG audio +// packs are skipped since some players could get confused otherwise +void k9vamps::vap_leader () { + uchar *ptr; + int id, data_length; + + while (!lock (SECT_SIZE)) { + ptr = rptr; + if (check_pack (ptr)) { + ptr += 14; + id = ptr [3]; + } else { + ptr +=14; + id = 0; + } + + switch (id) { + case 0xe0: + // video + if (check_video_packet (ptr)) + // sequence header + return; + + copy (SECT_SIZE); + break; + + case 0xbd: + // private 1: audio/subpicture + copy_private_1 (ptr); + break; + + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + // MPEG audio + copy_mpeg_audio (ptr); + break; + + case 0xbb: + // system header/private 2: PCI/DSI + copy (SECT_SIZE); + break; + + case 0xbe: + // padding + data_length = ptr [4] << 8; + data_length |= ptr [5]; + + if (14 + data_length != SECT_SIZE - 6) + fatal (QString("Bad padding packet length at %1: %2").arg(rtell (ptr)).arg(data_length)); + //JMP:à vérifier + skip (SECT_SIZE); + + break; + + default: + // fatal("Encountered stream ID %02x at %llu, " + // "probably bad MPEG2 program stream", id, rtell (ptr)); + copy (SECT_SIZE); + } + + if (wptr == wbuf + WBUF_SIZE) + flush (); + } + + eof = 1; + flush (); + + return; +} + + +// process end of program stream +// the same counts here as for the PS' beginning +void k9vamps::vap_trailer (int length) { + uchar *ptr; + int i, id, data_length; + + for (i = 0; i < length; i += SECT_SIZE) { + ptr = rptr + 14; + id = ptr [3]; + + if (id == 0xbd) { + // private 1: audio/subpicture + copy_private_1 (ptr); + } else if (id >= 0xc0 && id <= 0xc7) { + // MPEG audio + copy_mpeg_audio (ptr); + } else if (id == 0xbe) { + // padding + data_length = ptr [4] << 8; + data_length |= ptr [5]; + + if (14 + data_length != SECT_SIZE - 6) + fatal (QString("Bad padding packet length at %1: %2").arg(rtell (ptr)).arg(data_length)); + skip (SECT_SIZE); + } else { + copy (SECT_SIZE); + } + + if (wptr == wbuf + WBUF_SIZE) + flush (); + } + + flush (); +} + + +// vaporization is split in two phases - this is phase 1 +// PS packs are read into rbuf until a sequence header is found. +// All video packs are unpacketized and the contained video ES +// GOP copied to vibuf. In the same course the private stream 1 +// and MPEG audio packs are inspected and the number of packs +// not to be copied are counted. This is to forecast the video +// vaporization factor in case the user specified a PS shrink factor. +// returns GOP length in bytes +int k9vamps::vap_phase1 (void) { + uchar *ptr, *viptr = vibuf; + int seq_length, id, data_length, opt_length, seqhdr; + + for (seq_length = 0; + !lock (seq_length + SECT_SIZE); seq_length += SECT_SIZE) { + ptr = rptr + seq_length; + if (check_pack (ptr)) { + ptr += 14; + id = ptr [3]; + } else { + ptr += 14; + id = 0; + } + + + // avoid duplicate counts for sequence headers + if (seq_length) + total_packs++; + + switch (id) { + case 0xe0: + // video + seqhdr = check_video_packet (ptr); + + if (seq_length) { + video_packs++; + + if (seqhdr) { + sequence_headers++; + vilen = viptr - vibuf; + + return seq_length; + } + } + + // copy contained video ES fragment to vibuf + data_length = ptr [4] << 8; + data_length |= ptr [5]; + opt_length = 3 + ptr [8]; + data_length -= opt_length; + + if ((viptr - vibuf) + data_length > vbuf_size - 3) { + // reallocate video buffers + int i = viptr - vibuf; + + // grow by another VBUF_SIZE bytes + vbuf_size += VBUF_SIZE; + vibuf = (uchar*)realloc (vibuf, vbuf_size); + vobuf = (uchar*)realloc (vobuf, vbuf_size); + + if (vibuf == NULL || vobuf == NULL) + fatal ("Reallocation of video buffers failed"); + + viptr = vibuf + i; + } + + //fprintf (stderr, "data_length=%d\n", data_length); + tc_memcpy (viptr, ptr + 6 + opt_length, data_length); + viptr += data_length; + break; + + case 0xbd: + // private 1: audio/subpicture + aux_packs++; + + if (!new_private_1_type (ptr)) + skipped_aux_packs++; + + break; + + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + // MPEG audio + aux_packs++; + + if (!new_mpeg_audio_id (id)) + skipped_aux_packs++; + + break; + + case 0xbb: + // system header/private 2: PCI/DSI + nav_packs++; + break; + + case 0xbe: + // padding + skipped_aux_packs++; + data_length = ptr [4] << 8; + data_length |= ptr [5]; + + if (14 + data_length != SECT_SIZE - 6) + fatal (QString("Bad padding packet length at %1: %2").arg(rtell (ptr)).arg(data_length)); + + break; + + default: +// fatal("Encountered stream ID %02x at %llu, " +// "probably bad MPEG2 program stream", id, rtell (ptr)); + break; + } + + } + + eof = 1; + + return seq_length; +} + + +// re-packetize the video ES +// `ptr' points to original PES packet where to put the video data +// `voptr' points to first unpacketized byte in vobuf +// `avail' specifies number of bytes remaining in vobuf +// returns number of ES bytes in generated PES packet +int k9vamps::gen_video_packet (uchar *ptr, uchar *voptr, int avail) { + int i, header_data_length, data_length, padding_length; + + // if original PES holds optional data (e.g. DTS/PTS) we must keep it + header_data_length = (ptr [7] & 0xc0) == 0xc0 ? ptr [8] : 0; + data_length = SECT_SIZE - (14 + 6 + 3 + header_data_length); + + if (avail >= data_length) { + // write out a full video packet (usually 2025 byte) + tc_memcpy (ptr + 6 + 3 + header_data_length, voptr, data_length); + ptr [4] = (SECT_SIZE - (14 + 6)) >> 8; + ptr [5] = (SECT_SIZE - (14 + 6)) & 0xff; + ptr [8] = header_data_length; + + return data_length; + } + + if (avail < data_length - 6) { + // write a short video packet and a padding packet + tc_memcpy (ptr + 6 + 3 + header_data_length, voptr, avail); + ptr [4] = (3 + header_data_length + avail) >> 8; + ptr [5] = 3 + header_data_length + avail; + ptr [8] = header_data_length; + + // generate padding packet + ptr += 6 + 3 + header_data_length + avail; + padding_length = data_length - (avail + 6); + padding_bytes += padding_length + 6; + ptr [0] = 0; + ptr [1] = 0; + ptr [2] = 1; + ptr [3] = 0xbe; + ptr [4] = padding_length >> 8; + ptr [5] = padding_length; + + for (i = 0; i < padding_length; i++) + ptr [6+i] = 0xff; + + return avail; + } + + // write a padded video packet (1 to 6 padding bytes) + padding_length = data_length - avail; + padding_bytes += padding_length; + memset (ptr + 6 + 3 + header_data_length, 0xff, padding_length); + header_data_length += padding_length; + tc_memcpy (ptr + 6 + 3 + header_data_length, voptr, avail); + ptr [4] = (SECT_SIZE - (14 + 6)) >> 8; + ptr [5] = (SECT_SIZE - (14 + 6)) & 0xff; + ptr [8] = header_data_length; + + return avail; +} + + +// this is phase 2 of vaporization +// the shrunk video ES is re-packetized by using the source PES packets +// unused PS packs are skipped +// only wanted private stream 1 and MPEG audio packs are copied +// all nav packs are copied +void k9vamps::vap_phase2 (int seq_length) { + int i, id, avail, data_length; + uchar *ptr, *voptr = vobuf, *vohwp = vobuf + volen; + + for (i = 0; i < seq_length; i += SECT_SIZE) { + ptr = rptr + 14; + id = ptr [3]; + + switch (id) { + case 0xe0: + // video + avail = vohwp - voptr; + + if (avail) { + // still some video output data left + voptr += gen_video_packet (ptr, voptr, avail); + copy (SECT_SIZE); + } else { + // no video output data left - skip input sector + skip (SECT_SIZE); + skipped_video_packs++; + } + + break; + + case 0xbd: + // private 1: audio/subpicture + copy_private_1 (ptr); + break; + + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0xc6: + case 0xc7: + // MPEG audio + copy_mpeg_audio (ptr); + break; + + case 0xbb: + // system header/private 2: PCI/DSI + copy (SECT_SIZE); + break; + + case 0xbe: + // padding + data_length = ptr [4] << 8; + data_length |= ptr [5]; + + if (14 + data_length != SECT_SIZE - 6) + fatal (QString("Bad padding packet length at %1: %2").arg(rtell (ptr)).arg(data_length)); + //JMP: à vérifier + skip (SECT_SIZE); + break; + + default: + copy (SECT_SIZE); +// fatal("Encountered stream ID %02x at %llu, " +// "probably bad MPEG2 program stream", id, rtell (ptr)); + } + + if (wptr == wbuf + WBUF_SIZE) + // end of write buffer reached --> flush it to disk + flush (); + } +} + +QString & k9vamps::geterrMsg() { + return m_errMsg; +} + +bool k9vamps::geterror() { + return m_error; +} + +// entry point from main() +// the requant thread already has been started +void k9vamps::vaporize (void) { + int seq_length; + float fact = vap_fact; + + // process PS up to but not including first sequence header + vap_leader (); + + // just in case - maybe should spit out a warning/error here + if (eof) + return; + + total_packs++; + nav_packs++; + total_packs++; + video_packs++; + + // main loop + while (1) { + // do phase 1 of vaporization + seq_length = vap_phase1 (); + + if (eof) { + // EOF on source PS + // process packs after and including last sequence header + vap_trailer (seq_length); + + // only exit point from main loop + return; + } + + //fprintf (stderr, "seq_length=%d\n", seq_length); + + if (calc_ps_vap && vap_fact > 1.0f) { + // forecast video ES vaporization factor + // the basic formulars look like: + // vap_fact = total_packs/(restpacks+vop) + // restpacks = total_packs-(video_packs+skipped_aux_packs) + // fact = (video_packs*net-(gops*net/2+10))/(vop*net-(gops*net/2+10)) + // net = SECT_SIZE-(14+9) + // 14: pack header size + // 9: PES header size + // 10: PTS+DTS size in PES header of sequence header + // You are welcome to double check everything here! + float vop, net; + net = (float) (SECT_SIZE - (14+9)); + vop = video_packs + skipped_aux_packs - + (float) total_packs * (1.0f-1.0f/vap_fact); + fact = ((float) video_packs * net - + ((float) sequence_headers * net/2.0f + 10.0f)) / + (vop * net - ((float) sequence_headers * net/2.0f + 10.0f)); + + //JMP + m_totfact+=fact ; + m_nbfact++; + m_avgfact=m_totfact/m_nbfact; + + // requant seems to get stuck on factors < 1 + if (fact < 1.0f) + fact = 1.0f; + + if (verbose >= 2) + fprintf (stderr, "Info: Target video ES vaporization factor: %.3f\n", + fact); + } + + vin_bytes += vilen; + + if (fact > 1.0f) { + // do requantization + volen = requant (vobuf, vibuf, vilen, fact); + } else { + // don't do requantization + tc_memcpy (vobuf, vibuf, vilen); + volen = vilen; + } + + vout_bytes += volen; + + // do phase 2 of vaporization + vap_phase2 (seq_length); + + //fprintf (stderr, + // "tot=%d, vid=%d, ps1=%d, nav=%d, sv=%d, sp1=%d, fact=%.3f\n", + // total_packs, video_packs, aux_packs, nav_packs, + // skipped_video_packs, skipped_aux_packs, fact); + } +} + +uint64_t k9vamps::getOutputBytes() { + return bytes_written; +} + +void k9vamps::abort() { + //fatal("vamps stopped"); + setNoData(); + if (m_requant !=NULL) + m_requant->wait(); + if (m_bgUpdate!=NULL) + m_bgUpdate->wait(); +} + +// this is a *very* sophisticated kind of error handling :-) +void +k9vamps::fatal (QString msg) { + m_errMsg=msg; + m_error=true; + if (m_requant !=NULL) + m_requant->terminate(); + if (m_bgUpdate !=NULL) + m_bgUpdate->terminate(); + terminate(); +} + +/**************************** BACKGROUND UPDATE **********************/ + +k9bgUpdate::k9bgUpdate(k9DVDBackup * _backup) { + m_backup = _backup; + +} + +void k9bgUpdate::update(uchar *_buffer,uint32_t _size) { + mutex.lock(); + m_buffer=(uchar*)malloc(_size); + tc_memcpy(m_buffer,_buffer,_size); + m_size=_size; + start(); + mutex.unlock(); +} + +void k9bgUpdate::run() { + m_backup->getOutput(m_buffer,m_size); + free(m_buffer); +} diff --git a/k9vamps/k9vamps.h b/k9vamps/k9vamps.h new file mode 100755 index 0000000..8f0b945 --- /dev/null +++ b/k9vamps/k9vamps.h @@ -0,0 +1,168 @@ +// +// C++ Interface: k9vamps +// +// Description: A transcription from Vamps in C++ +// +// +// Author: Jean-Michel PETIT <[email protected]>, (C) 2006 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#ifndef K9VAMPS_H +#define K9VAMPS_H + +#include "k9common.h" +#include <qfile.h> +#include <qthread.h> +#include <qobject.h> +#include <stdio.h> +#include <errno.h> +#include <stdarg.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <getopt.h> +#include <pthread.h> +#include <sys/stat.h> +#include <qmutex.h> +#include <qwaitcondition.h> +#include "k9dvdbackup.h" +#include "k9requant.h" +#include "k9fifo.h" +#include "k9saveimage.h" + +// DVD sector size +#define SECT_SIZE 2048 + +// read buffer size (4MB) +#define RBUF_SIZE (0x1000*1024) + +// write buffer size (4MB) +#define WBUF_SIZE (0x1000*1024) + +// initial video buffer size (1MB) +#define VBUF_SIZE (1024*1024) + + + + +class k9bgUpdate : public QThread +{ +private: + uchar * m_buffer; + k9DVDBackup *m_backup; + uint32_t m_size; + QMutex mutex; +public: + k9bgUpdate(k9DVDBackup * _backup); + void update(uchar *_buffer, uint32_t size); +protected: + void run(); + +}; + + + + +class k9vamps:public QThread +{ +private: + uchar *rbuf;// [RBUF_SIZE]; // the PS read buffer + uchar wbuf [WBUF_SIZE]; // the PS write buffer + uchar *vibuf; // the video ES requant input buffer + uchar *vobuf; // the video ES requant output buffer + uchar *rptr ; // pointer to current char in read buf + uchar *rhwp ; // read buffer high water pointer + uchar *wptr ; // pointer to first unused char in wbuf + uint64_t bytes_read; // total PS bytes read + uint64_t bytes_written; // total PS bytes written + uint64_t padding_bytes; // total padding bytes written + uint64_t vin_bytes; // total unshrinked video ES bytes + uint64_t vout_bytes; // total shrinked video ES bytes + uint64_t ps_size; // total PS size in bytes + uint32_t vbuf_size; // the video ES requant buffers' size + uint32_t rbuf_size; + uint32_t vilen; // current GOP's unshrinked vidES bytes + uint32_t volen; // current GOP's shrinked vidES bytes + int total_packs; // total no. PS packs + int video_packs; // no. video packs in PS + int skipped_video_packs; // skipped thereof + int aux_packs; // no. audio and subpicture packs in PS + int skipped_aux_packs; // skipped thereof + int sequence_headers; // no. sequence headers (== #GOPs) + int nav_packs; // no. nav packs + int eof; // end of file flag + int spu_track_map [32]; // subpicture track# translation map + int audio_track_map [8]; // audio track# translation map + int verbose; // level of verbosity + int calc_ps_vap; // calc vaporization based on PS size + bool m_preserve; // preserve audio/spu track numbers + float vap_fact; // vaporization factor from cmd line + bool noData; + QMutex mutex; + + k9bgUpdate *m_bgUpdate; + k9fifo m_fifo; + QString m_errMsg; + bool m_error; + + double avgdiff; + double m_totfact,m_nbfact,m_avgfact; + QFile *m_output; +private: + // prototypes + void vaporize (void); + void fatal (QString _msg); + int lock(int size); + void copy(int size); + void skip (int size); + void flush(); + uint64_t wtell (uchar *ptr); + + uint64_t rtell (uchar *ptr); + bool check_pack (uchar *ptr); + int check_video_packet (uchar *ptr); + int requant (uchar *dst, uchar *src, int n, float fact); + int new_private_1_type (uchar *ptr); + void copy_private_1 (uchar *ptr); + int new_mpeg_audio_id (int id); + void copy_mpeg_audio (uchar *ptr); + void vap_leader (); + void vap_trailer (int length); + int vap_phase1 (void); + int gen_video_packet (uchar *ptr, uchar *voptr, int avail); + void vap_phase2 (int seq_length); + pthread_t thread; + + int readData(uchar *data,uint size); + + QWaitCondition wDataRead; + QWaitCondition wDataReady; + k9DVDBackup *m_dvdbackup; + k9requant *m_requant; + k9SaveImage *m_saveImage; +protected: + void run(); +public: + k9vamps(k9DVDBackup *dvdbackup); + void addData(uchar* data,uint size); + void setNoData(); + void addSubpicture(uint id); + void addAudio(uint id); + void addAudio(uint id,uint newId); + void reset(); + void setInputSize(uint64_t size); + void setVapFactor(float factor); + void setSaveImage(k9SaveImage*); + void setOutput(QFile *_output); + uint64_t getOutputBytes(); + QString & geterrMsg(); + bool geterror(); + void abort(); + void setPreserve(bool _value); + ~k9vamps(); +}; + + +#endif diff --git a/k9vamps/putvlc.h b/k9vamps/putvlc.h new file mode 100644 index 0000000..d5af62a --- /dev/null +++ b/k9vamps/putvlc.h @@ -0,0 +1,250 @@ +// put blk + +/* type definitions for variable length code table entries */ + +typedef struct +{ + unsigned char code; /* right justified */ + char len; +} VLCtable; + +/* for codes longer than 8 bits (excluding leading zeroes) */ +typedef struct +{ + unsigned short code; /* right justified */ + char len; +} sVLCtable; + + +/* Table B-2, B-3, B-4 variable length codes for macroblock_type + * + * indexed by [macroblock_type] + */ + +const static VLCtable mbtypetab[3][32]= +{ + /* I */ + { + {0,0}, {1,1}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {0,0}, {1,2}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} + }, + /* P */ + { + {0,0}, {3,5}, {1,2}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {1,3}, {0,0}, {1,1}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {0,0}, {1,6}, {1,5}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {2,5}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} + }, + /* B */ + { + {0,0}, {3,5}, {0,0}, {0,0}, {2,3}, {0,0}, {3,3}, {0,0}, + {2,4}, {0,0}, {3,4}, {0,0}, {2,2}, {0,0}, {3,2}, {0,0}, + {0,0}, {1,6}, {0,0}, {0,0}, {0,0}, {0,0}, {2,6}, {0,0}, + {0,0}, {0,0}, {3,6}, {0,0}, {0,0}, {0,0}, {2,5}, {0,0} + } +}; + + +/* Table B-5 ... B-8 variable length codes for macroblock_type in + * scalable sequences + * + * not implemented + */ + +/* Table B-9, variable length codes for coded_block_pattern + * + * indexed by [coded_block_pattern] + */ + +const static VLCtable cbptable[64]= +{ + {0x01,9}, {0x0b,5}, {0x09,5}, {0x0d,6}, + {0x0d,4}, {0x17,7}, {0x13,7}, {0x1f,8}, + {0x0c,4}, {0x16,7}, {0x12,7}, {0x1e,8}, + {0x13,5}, {0x1b,8}, {0x17,8}, {0x13,8}, + {0x0b,4}, {0x15,7}, {0x11,7}, {0x1d,8}, + {0x11,5}, {0x19,8}, {0x15,8}, {0x11,8}, + {0x0f,6}, {0x0f,8}, {0x0d,8}, {0x03,9}, + {0x0f,5}, {0x0b,8}, {0x07,8}, {0x07,9}, + {0x0a,4}, {0x14,7}, {0x10,7}, {0x1c,8}, + {0x0e,6}, {0x0e,8}, {0x0c,8}, {0x02,9}, + {0x10,5}, {0x18,8}, {0x14,8}, {0x10,8}, + {0x0e,5}, {0x0a,8}, {0x06,8}, {0x06,9}, + {0x12,5}, {0x1a,8}, {0x16,8}, {0x12,8}, + {0x0d,5}, {0x09,8}, {0x05,8}, {0x05,9}, + {0x0c,5}, {0x08,8}, {0x04,8}, {0x04,9}, + {0x07,3}, {0x0a,5}, {0x08,5}, {0x0c,6} +}; + + + +/* Table B-14, DCT coefficients table zero + * + * indexed by [run][level-1] + * split into two tables (dct_code_tab1, dct_code_tab2) to reduce size + * 'first DCT coefficient' condition and 'End of Block' are treated elsewhere + * codes do not include s (sign bit) + */ + +const static VLCtable dct_code_tab1[2][40]= +{ + /* run = 0, level = 1...40 */ + { + {0x03, 2}, {0x04, 4}, {0x05, 5}, {0x06, 7}, + {0x26, 8}, {0x21, 8}, {0x0a,10}, {0x1d,12}, + {0x18,12}, {0x13,12}, {0x10,12}, {0x1a,13}, + {0x19,13}, {0x18,13}, {0x17,13}, {0x1f,14}, + {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14}, + {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14}, + {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14}, + {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15}, + {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15}, + {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15} + }, + /* run = 1, level = 1...18 */ + { + {0x03, 3}, {0x06, 6}, {0x25, 8}, {0x0c,10}, + {0x1b,12}, {0x16,13}, {0x15,13}, {0x1f,15}, + {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15}, + {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16}, + {0x11,16}, {0x10,16}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0} + } +}; + +const static VLCtable dct_code_tab2[30][5]= +{ + /* run = 2...31, level = 1...5 */ + {{0x05, 4}, {0x04, 7}, {0x0b,10}, {0x14,12}, {0x14,13}}, + {{0x07, 5}, {0x24, 8}, {0x1c,12}, {0x13,13}, {0x00, 0}}, + {{0x06, 5}, {0x0f,10}, {0x12,12}, {0x00, 0}, {0x00, 0}}, + {{0x07, 6}, {0x09,10}, {0x12,13}, {0x00, 0}, {0x00, 0}}, + {{0x05, 6}, {0x1e,12}, {0x14,16}, {0x00, 0}, {0x00, 0}}, + {{0x04, 6}, {0x15,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x07, 7}, {0x11,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x05, 7}, {0x11,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x27, 8}, {0x10,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x23, 8}, {0x1a,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x22, 8}, {0x19,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x20, 8}, {0x18,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x0e,10}, {0x17,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x0d,10}, {0x16,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x08,10}, {0x15,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1a,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x19,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x17,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x16,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1e,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1d,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1c,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1b,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1e,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1d,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1c,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1b,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}} +}; + + +/* Table B-15, DCT coefficients table one + * + * indexed by [run][level-1] + * split into two tables (dct_code_tab1a, dct_code_tab2a) to reduce size + * 'End of Block' is treated elsewhere + * codes do not include s (sign bit) + */ + +const static VLCtable dct_code_tab1a[2][40]= +{ + /* run = 0, level = 1...40 */ + { + {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5}, + {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7}, + {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8}, + {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14}, + {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14}, + {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14}, + {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14}, + {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15}, + {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15}, + {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15} + }, + /* run = 1, level = 1...18 */ + { + {0x02, 3}, {0x06, 5}, {0x79, 7}, {0x27, 8}, + {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15}, + {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15}, + {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16}, + {0x11,16}, {0x10,16}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}, + {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0} + } +}; + +const static VLCtable dct_code_tab2a[30][5]= +{ + /* run = 2...31, level = 1...5 */ + {{0x05, 5}, {0x07, 7}, {0xfc, 8}, {0x0c,10}, {0x14,13}}, + {{0x07, 5}, {0x26, 8}, {0x1c,12}, {0x13,13}, {0x00, 0}}, + {{0x06, 6}, {0xfd, 8}, {0x12,12}, {0x00, 0}, {0x00, 0}}, + {{0x07, 6}, {0x04, 9}, {0x12,13}, {0x00, 0}, {0x00, 0}}, + {{0x06, 7}, {0x1e,12}, {0x14,16}, {0x00, 0}, {0x00, 0}}, + {{0x04, 7}, {0x15,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x05, 7}, {0x11,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x78, 7}, {0x11,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x7a, 7}, {0x10,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x21, 8}, {0x1a,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x25, 8}, {0x19,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x24, 8}, {0x18,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x05, 9}, {0x17,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x07, 9}, {0x16,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x0d,10}, {0x15,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1a,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x19,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x17,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x16,12}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1e,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1d,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1c,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1b,13}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1f,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1e,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1d,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1c,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}, + {{0x1b,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}} +}; + + +const static VLCtable addrinctab[33]= +{ + {0x01,1}, {0x03,3}, {0x02,3}, {0x03,4}, + {0x02,4}, {0x03,5}, {0x02,5}, {0x07,7}, + {0x06,7}, {0x0b,8}, {0x0a,8}, {0x09,8}, + {0x08,8}, {0x07,8}, {0x06,8}, {0x17,10}, + {0x16,10}, {0x15,10}, {0x14,10}, {0x13,10}, + {0x12,10}, {0x23,11}, {0x22,11}, {0x21,11}, + {0x20,11}, {0x1f,11}, {0x1e,11}, {0x1d,11}, + {0x1c,11}, {0x1b,11}, {0x1a,11}, {0x19,11}, + {0x18,11} +}; + +const static sVLCtable DClumtab[12]= +{ + {0x0004,3}, {0x0000,2}, {0x0001,2}, {0x0005,3}, {0x0006,3}, {0x000e,4}, + {0x001e,5}, {0x003e,6}, {0x007e,7}, {0x00fe,8}, {0x01fe,9}, {0x01ff,9} +}; + + diff --git a/k9vamps/qTable.h b/k9vamps/qTable.h new file mode 100644 index 0000000..8b0741f --- /dev/null +++ b/k9vamps/qTable.h @@ -0,0 +1,1141 @@ +static short quant_equ[113] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, + 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, + 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, + 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, + 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, + 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, + 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, + 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, + 41, 41 +}; + +static short quant_table_1_to_2[4096]; +static short quant_table_1_to_3[4096]; +static short quant_table_1_to_4[4096]; +static short quant_table_1_to_5[4096]; +static short quant_table_1_to_6[4096]; +static short quant_table_1_to_7[4096]; +static short quant_table_1_to_8[4096]; +static short quant_table_1_to_10[4096]; +static short quant_table_1_to_12[4096]; +static short quant_table_1_to_14[4096]; +static short quant_table_1_to_16[4096]; +static short quant_table_1_to_18[4096]; +static short quant_table_1_to_20[4096]; +static short quant_table_1_to_22[4096]; +static short quant_table_1_to_24[4096]; +static short quant_table_1_to_26[4096]; +static short quant_table_1_to_28[4096]; +static short quant_table_1_to_30[4096]; +static short quant_table_1_to_32[4096]; +static short quant_table_1_to_34[4096]; +static short quant_table_1_to_36[4096]; +static short quant_table_1_to_38[4096]; +static short quant_table_1_to_40[4096]; +static short quant_table_1_to_42[4096]; +static short quant_table_1_to_44[4096]; +static short quant_table_1_to_46[4096]; +static short quant_table_1_to_48[4096]; +static short quant_table_1_to_50[4096]; +static short quant_table_1_to_52[4096]; +static short quant_table_1_to_54[4096]; +static short quant_table_1_to_56[4096]; +static short quant_table_1_to_58[4096]; +static short quant_table_1_to_60[4096]; +static short quant_table_1_to_62[4096]; +static short quant_table_1_to_64[4096]; +static short quant_table_1_to_72[4096]; +static short quant_table_1_to_80[4096]; +static short quant_table_1_to_88[4096]; +static short quant_table_1_to_96[4096]; +static short quant_table_1_to_104[4096]; +static short quant_table_1_to_112[4096]; +static short quant_table_2_to_3[4096]; +static short quant_table_2_to_4[4096]; +static short quant_table_2_to_5[4096]; +static short quant_table_2_to_6[4096]; +static short quant_table_2_to_7[4096]; +static short quant_table_2_to_8[4096]; +static short quant_table_2_to_10[4096]; +static short quant_table_2_to_12[4096]; +static short quant_table_2_to_14[4096]; +static short quant_table_2_to_16[4096]; +static short quant_table_2_to_18[4096]; +static short quant_table_2_to_20[4096]; +static short quant_table_2_to_22[4096]; +static short quant_table_2_to_24[4096]; +static short quant_table_2_to_26[4096]; +static short quant_table_2_to_28[4096]; +static short quant_table_2_to_30[4096]; +static short quant_table_2_to_32[4096]; +static short quant_table_2_to_34[4096]; +static short quant_table_2_to_36[4096]; +static short quant_table_2_to_38[4096]; +static short quant_table_2_to_40[4096]; +static short quant_table_2_to_42[4096]; +static short quant_table_2_to_44[4096]; +static short quant_table_2_to_46[4096]; +static short quant_table_2_to_48[4096]; +static short quant_table_2_to_50[4096]; +static short quant_table_2_to_52[4096]; +static short quant_table_2_to_54[4096]; +static short quant_table_2_to_56[4096]; +static short quant_table_2_to_58[4096]; +static short quant_table_2_to_60[4096]; +static short quant_table_2_to_62[4096]; +static short quant_table_2_to_64[4096]; +static short quant_table_2_to_72[4096]; +static short quant_table_2_to_80[4096]; +static short quant_table_2_to_88[4096]; +static short quant_table_2_to_96[4096]; +static short quant_table_2_to_104[4096]; +static short quant_table_2_to_112[4096]; +static short quant_table_3_to_4[4096]; +static short quant_table_3_to_5[4096]; +static short quant_table_3_to_6[4096]; +static short quant_table_3_to_7[4096]; +static short quant_table_3_to_8[4096]; +static short quant_table_3_to_10[4096]; +static short quant_table_3_to_12[4096]; +static short quant_table_3_to_14[4096]; +static short quant_table_3_to_16[4096]; +static short quant_table_3_to_18[4096]; +static short quant_table_3_to_20[4096]; +static short quant_table_3_to_22[4096]; +static short quant_table_3_to_24[4096]; +static short quant_table_3_to_26[4096]; +static short quant_table_3_to_28[4096]; +static short quant_table_3_to_30[4096]; +static short quant_table_3_to_32[4096]; +static short quant_table_3_to_34[4096]; +static short quant_table_3_to_36[4096]; +static short quant_table_3_to_38[4096]; +static short quant_table_3_to_40[4096]; +static short quant_table_3_to_42[4096]; +static short quant_table_3_to_44[4096]; +static short quant_table_3_to_46[4096]; +static short quant_table_3_to_48[4096]; +static short quant_table_3_to_50[4096]; +static short quant_table_3_to_52[4096]; +static short quant_table_3_to_54[4096]; +static short quant_table_3_to_56[4096]; +static short quant_table_3_to_58[4096]; +static short quant_table_3_to_60[4096]; +static short quant_table_3_to_62[4096]; +static short quant_table_3_to_64[4096]; +static short quant_table_3_to_72[4096]; +static short quant_table_3_to_80[4096]; +static short quant_table_3_to_88[4096]; +static short quant_table_3_to_96[4096]; +static short quant_table_3_to_104[4096]; +static short quant_table_3_to_112[4096]; +static short quant_table_4_to_5[4096]; +static short quant_table_4_to_6[4096]; +static short quant_table_4_to_7[4096]; +static short quant_table_4_to_8[4096]; +static short quant_table_4_to_10[4096]; +static short quant_table_4_to_12[4096]; +static short quant_table_4_to_14[4096]; +static short quant_table_4_to_16[4096]; +static short quant_table_4_to_18[4096]; +static short quant_table_4_to_20[4096]; +static short quant_table_4_to_22[4096]; +static short quant_table_4_to_24[4096]; +static short quant_table_4_to_26[4096]; +static short quant_table_4_to_28[4096]; +static short quant_table_4_to_30[4096]; +static short quant_table_4_to_32[4096]; +static short quant_table_4_to_34[4096]; +static short quant_table_4_to_36[4096]; +static short quant_table_4_to_38[4096]; +static short quant_table_4_to_40[4096]; +static short quant_table_4_to_42[4096]; +static short quant_table_4_to_44[4096]; +static short quant_table_4_to_46[4096]; +static short quant_table_4_to_48[4096]; +static short quant_table_4_to_50[4096]; +static short quant_table_4_to_52[4096]; +static short quant_table_4_to_54[4096]; +static short quant_table_4_to_56[4096]; +static short quant_table_4_to_58[4096]; +static short quant_table_4_to_60[4096]; +static short quant_table_4_to_62[4096]; +static short quant_table_4_to_64[4096]; +static short quant_table_4_to_72[4096]; +static short quant_table_4_to_80[4096]; +static short quant_table_4_to_88[4096]; +static short quant_table_4_to_96[4096]; +static short quant_table_4_to_104[4096]; +static short quant_table_4_to_112[4096]; +static short quant_table_5_to_6[4096]; +static short quant_table_5_to_7[4096]; +static short quant_table_5_to_8[4096]; +static short quant_table_5_to_10[4096]; +static short quant_table_5_to_12[4096]; +static short quant_table_5_to_14[4096]; +static short quant_table_5_to_16[4096]; +static short quant_table_5_to_18[4096]; +static short quant_table_5_to_20[4096]; +static short quant_table_5_to_22[4096]; +static short quant_table_5_to_24[4096]; +static short quant_table_5_to_26[4096]; +static short quant_table_5_to_28[4096]; +static short quant_table_5_to_30[4096]; +static short quant_table_5_to_32[4096]; +static short quant_table_5_to_34[4096]; +static short quant_table_5_to_36[4096]; +static short quant_table_5_to_38[4096]; +static short quant_table_5_to_40[4096]; +static short quant_table_5_to_42[4096]; +static short quant_table_5_to_44[4096]; +static short quant_table_5_to_46[4096]; +static short quant_table_5_to_48[4096]; +static short quant_table_5_to_50[4096]; +static short quant_table_5_to_52[4096]; +static short quant_table_5_to_54[4096]; +static short quant_table_5_to_56[4096]; +static short quant_table_5_to_58[4096]; +static short quant_table_5_to_60[4096]; +static short quant_table_5_to_62[4096]; +static short quant_table_5_to_64[4096]; +static short quant_table_5_to_72[4096]; +static short quant_table_5_to_80[4096]; +static short quant_table_5_to_88[4096]; +static short quant_table_5_to_96[4096]; +static short quant_table_5_to_104[4096]; +static short quant_table_5_to_112[4096]; +static short quant_table_6_to_7[4096]; +static short quant_table_6_to_8[4096]; +static short quant_table_6_to_10[4096]; +static short quant_table_6_to_12[4096]; +static short quant_table_6_to_14[4096]; +static short quant_table_6_to_16[4096]; +static short quant_table_6_to_18[4096]; +static short quant_table_6_to_20[4096]; +static short quant_table_6_to_22[4096]; +static short quant_table_6_to_24[4096]; +static short quant_table_6_to_26[4096]; +static short quant_table_6_to_28[4096]; +static short quant_table_6_to_30[4096]; +static short quant_table_6_to_32[4096]; +static short quant_table_6_to_34[4096]; +static short quant_table_6_to_36[4096]; +static short quant_table_6_to_38[4096]; +static short quant_table_6_to_40[4096]; +static short quant_table_6_to_42[4096]; +static short quant_table_6_to_44[4096]; +static short quant_table_6_to_46[4096]; +static short quant_table_6_to_48[4096]; +static short quant_table_6_to_50[4096]; +static short quant_table_6_to_52[4096]; +static short quant_table_6_to_54[4096]; +static short quant_table_6_to_56[4096]; +static short quant_table_6_to_58[4096]; +static short quant_table_6_to_60[4096]; +static short quant_table_6_to_62[4096]; +static short quant_table_6_to_64[4096]; +static short quant_table_6_to_72[4096]; +static short quant_table_6_to_80[4096]; +static short quant_table_6_to_88[4096]; +static short quant_table_6_to_96[4096]; +static short quant_table_6_to_104[4096]; +static short quant_table_6_to_112[4096]; +static short quant_table_7_to_8[4096]; +static short quant_table_7_to_10[4096]; +static short quant_table_7_to_12[4096]; +static short quant_table_7_to_14[4096]; +static short quant_table_7_to_16[4096]; +static short quant_table_7_to_18[4096]; +static short quant_table_7_to_20[4096]; +static short quant_table_7_to_22[4096]; +static short quant_table_7_to_24[4096]; +static short quant_table_7_to_26[4096]; +static short quant_table_7_to_28[4096]; +static short quant_table_7_to_30[4096]; +static short quant_table_7_to_32[4096]; +static short quant_table_7_to_34[4096]; +static short quant_table_7_to_36[4096]; +static short quant_table_7_to_38[4096]; +static short quant_table_7_to_40[4096]; +static short quant_table_7_to_42[4096]; +static short quant_table_7_to_44[4096]; +static short quant_table_7_to_46[4096]; +static short quant_table_7_to_48[4096]; +static short quant_table_7_to_50[4096]; +static short quant_table_7_to_52[4096]; +static short quant_table_7_to_54[4096]; +static short quant_table_7_to_56[4096]; +static short quant_table_7_to_58[4096]; +static short quant_table_7_to_60[4096]; +static short quant_table_7_to_62[4096]; +static short quant_table_7_to_64[4096]; +static short quant_table_7_to_72[4096]; +static short quant_table_7_to_80[4096]; +static short quant_table_7_to_88[4096]; +static short quant_table_7_to_96[4096]; +static short quant_table_7_to_104[4096]; +static short quant_table_7_to_112[4096]; +static short quant_table_8_to_10[4096]; +static short quant_table_8_to_12[4096]; +static short quant_table_8_to_14[4096]; +static short quant_table_8_to_16[4096]; +static short quant_table_8_to_18[4096]; +static short quant_table_8_to_20[4096]; +static short quant_table_8_to_22[4096]; +static short quant_table_8_to_24[4096]; +static short quant_table_8_to_26[4096]; +static short quant_table_8_to_28[4096]; +static short quant_table_8_to_30[4096]; +static short quant_table_8_to_32[4096]; +static short quant_table_8_to_34[4096]; +static short quant_table_8_to_36[4096]; +static short quant_table_8_to_38[4096]; +static short quant_table_8_to_40[4096]; +static short quant_table_8_to_42[4096]; +static short quant_table_8_to_44[4096]; +static short quant_table_8_to_46[4096]; +static short quant_table_8_to_48[4096]; +static short quant_table_8_to_50[4096]; +static short quant_table_8_to_52[4096]; +static short quant_table_8_to_54[4096]; +static short quant_table_8_to_56[4096]; +static short quant_table_8_to_58[4096]; +static short quant_table_8_to_60[4096]; +static short quant_table_8_to_62[4096]; +static short quant_table_8_to_64[4096]; +static short quant_table_8_to_72[4096]; +static short quant_table_8_to_80[4096]; +static short quant_table_8_to_88[4096]; +static short quant_table_8_to_96[4096]; +static short quant_table_8_to_104[4096]; +static short quant_table_8_to_112[4096]; +static short quant_table_10_to_12[4096]; +static short quant_table_10_to_14[4096]; +static short quant_table_10_to_16[4096]; +static short quant_table_10_to_18[4096]; +static short quant_table_10_to_20[4096]; +static short quant_table_10_to_22[4096]; +static short quant_table_10_to_24[4096]; +static short quant_table_10_to_26[4096]; +static short quant_table_10_to_28[4096]; +static short quant_table_10_to_30[4096]; +static short quant_table_10_to_32[4096]; +static short quant_table_10_to_34[4096]; +static short quant_table_10_to_36[4096]; +static short quant_table_10_to_38[4096]; +static short quant_table_10_to_40[4096]; +static short quant_table_10_to_42[4096]; +static short quant_table_10_to_44[4096]; +static short quant_table_10_to_46[4096]; +static short quant_table_10_to_48[4096]; +static short quant_table_10_to_50[4096]; +static short quant_table_10_to_52[4096]; +static short quant_table_10_to_54[4096]; +static short quant_table_10_to_56[4096]; +static short quant_table_10_to_58[4096]; +static short quant_table_10_to_60[4096]; +static short quant_table_10_to_62[4096]; +static short quant_table_10_to_64[4096]; +static short quant_table_10_to_72[4096]; +static short quant_table_10_to_80[4096]; +static short quant_table_10_to_88[4096]; +static short quant_table_10_to_96[4096]; +static short quant_table_10_to_104[4096]; +static short quant_table_10_to_112[4096]; +static short quant_table_12_to_14[4096]; +static short quant_table_12_to_16[4096]; +static short quant_table_12_to_18[4096]; +static short quant_table_12_to_20[4096]; +static short quant_table_12_to_22[4096]; +static short quant_table_12_to_24[4096]; +static short quant_table_12_to_26[4096]; +static short quant_table_12_to_28[4096]; +static short quant_table_12_to_30[4096]; +static short quant_table_12_to_32[4096]; +static short quant_table_12_to_34[4096]; +static short quant_table_12_to_36[4096]; +static short quant_table_12_to_38[4096]; +static short quant_table_12_to_40[4096]; +static short quant_table_12_to_42[4096]; +static short quant_table_12_to_44[4096]; +static short quant_table_12_to_46[4096]; +static short quant_table_12_to_48[4096]; +static short quant_table_12_to_50[4096]; +static short quant_table_12_to_52[4096]; +static short quant_table_12_to_54[4096]; +static short quant_table_12_to_56[4096]; +static short quant_table_12_to_58[4096]; +static short quant_table_12_to_60[4096]; +static short quant_table_12_to_62[4096]; +static short quant_table_12_to_64[4096]; +static short quant_table_12_to_72[4096]; +static short quant_table_12_to_80[4096]; +static short quant_table_12_to_88[4096]; +static short quant_table_12_to_96[4096]; +static short quant_table_12_to_104[4096]; +static short quant_table_12_to_112[4096]; +static short quant_table_14_to_16[4096]; +static short quant_table_14_to_18[4096]; +static short quant_table_14_to_20[4096]; +static short quant_table_14_to_22[4096]; +static short quant_table_14_to_24[4096]; +static short quant_table_14_to_26[4096]; +static short quant_table_14_to_28[4096]; +static short quant_table_14_to_30[4096]; +static short quant_table_14_to_32[4096]; +static short quant_table_14_to_34[4096]; +static short quant_table_14_to_36[4096]; +static short quant_table_14_to_38[4096]; +static short quant_table_14_to_40[4096]; +static short quant_table_14_to_42[4096]; +static short quant_table_14_to_44[4096]; +static short quant_table_14_to_46[4096]; +static short quant_table_14_to_48[4096]; +static short quant_table_14_to_50[4096]; +static short quant_table_14_to_52[4096]; +static short quant_table_14_to_54[4096]; +static short quant_table_14_to_56[4096]; +static short quant_table_14_to_58[4096]; +static short quant_table_14_to_60[4096]; +static short quant_table_14_to_62[4096]; +static short quant_table_14_to_64[4096]; +static short quant_table_14_to_72[4096]; +static short quant_table_14_to_80[4096]; +static short quant_table_14_to_88[4096]; +static short quant_table_14_to_96[4096]; +static short quant_table_14_to_104[4096]; +static short quant_table_14_to_112[4096]; +static short quant_table_16_to_18[4096]; +static short quant_table_16_to_20[4096]; +static short quant_table_16_to_22[4096]; +static short quant_table_16_to_24[4096]; +static short quant_table_16_to_26[4096]; +static short quant_table_16_to_28[4096]; +static short quant_table_16_to_30[4096]; +static short quant_table_16_to_32[4096]; +static short quant_table_16_to_34[4096]; +static short quant_table_16_to_36[4096]; +static short quant_table_16_to_38[4096]; +static short quant_table_16_to_40[4096]; +static short quant_table_16_to_42[4096]; +static short quant_table_16_to_44[4096]; +static short quant_table_16_to_46[4096]; +static short quant_table_16_to_48[4096]; +static short quant_table_16_to_50[4096]; +static short quant_table_16_to_52[4096]; +static short quant_table_16_to_54[4096]; +static short quant_table_16_to_56[4096]; +static short quant_table_16_to_58[4096]; +static short quant_table_16_to_60[4096]; +static short quant_table_16_to_62[4096]; +static short quant_table_16_to_64[4096]; +static short quant_table_16_to_72[4096]; +static short quant_table_16_to_80[4096]; +static short quant_table_16_to_88[4096]; +static short quant_table_16_to_96[4096]; +static short quant_table_16_to_104[4096]; +static short quant_table_16_to_112[4096]; +static short quant_table_18_to_20[4096]; +static short quant_table_18_to_22[4096]; +static short quant_table_18_to_24[4096]; +static short quant_table_18_to_26[4096]; +static short quant_table_18_to_28[4096]; +static short quant_table_18_to_30[4096]; +static short quant_table_18_to_32[4096]; +static short quant_table_18_to_34[4096]; +static short quant_table_18_to_36[4096]; +static short quant_table_18_to_38[4096]; +static short quant_table_18_to_40[4096]; +static short quant_table_18_to_42[4096]; +static short quant_table_18_to_44[4096]; +static short quant_table_18_to_46[4096]; +static short quant_table_18_to_48[4096]; +static short quant_table_18_to_50[4096]; +static short quant_table_18_to_52[4096]; +static short quant_table_18_to_54[4096]; +static short quant_table_18_to_56[4096]; +static short quant_table_18_to_58[4096]; +static short quant_table_18_to_60[4096]; +static short quant_table_18_to_62[4096]; +static short quant_table_18_to_64[4096]; +static short quant_table_18_to_72[4096]; +static short quant_table_18_to_80[4096]; +static short quant_table_18_to_88[4096]; +static short quant_table_18_to_96[4096]; +static short quant_table_18_to_104[4096]; +static short quant_table_18_to_112[4096]; +static short quant_table_20_to_22[4096]; +static short quant_table_20_to_24[4096]; +static short quant_table_20_to_26[4096]; +static short quant_table_20_to_28[4096]; +static short quant_table_20_to_30[4096]; +static short quant_table_20_to_32[4096]; +static short quant_table_20_to_34[4096]; +static short quant_table_20_to_36[4096]; +static short quant_table_20_to_38[4096]; +static short quant_table_20_to_40[4096]; +static short quant_table_20_to_42[4096]; +static short quant_table_20_to_44[4096]; +static short quant_table_20_to_46[4096]; +static short quant_table_20_to_48[4096]; +static short quant_table_20_to_50[4096]; +static short quant_table_20_to_52[4096]; +static short quant_table_20_to_54[4096]; +static short quant_table_20_to_56[4096]; +static short quant_table_20_to_58[4096]; +static short quant_table_20_to_60[4096]; +static short quant_table_20_to_62[4096]; +static short quant_table_20_to_64[4096]; +static short quant_table_20_to_72[4096]; +static short quant_table_20_to_80[4096]; +static short quant_table_20_to_88[4096]; +static short quant_table_20_to_96[4096]; +static short quant_table_20_to_104[4096]; +static short quant_table_20_to_112[4096]; +static short quant_table_22_to_24[4096]; +static short quant_table_22_to_26[4096]; +static short quant_table_22_to_28[4096]; +static short quant_table_22_to_30[4096]; +static short quant_table_22_to_32[4096]; +static short quant_table_22_to_34[4096]; +static short quant_table_22_to_36[4096]; +static short quant_table_22_to_38[4096]; +static short quant_table_22_to_40[4096]; +static short quant_table_22_to_42[4096]; +static short quant_table_22_to_44[4096]; +static short quant_table_22_to_46[4096]; +static short quant_table_22_to_48[4096]; +static short quant_table_22_to_50[4096]; +static short quant_table_22_to_52[4096]; +static short quant_table_22_to_54[4096]; +static short quant_table_22_to_56[4096]; +static short quant_table_22_to_58[4096]; +static short quant_table_22_to_60[4096]; +static short quant_table_22_to_62[4096]; +static short quant_table_22_to_64[4096]; +static short quant_table_22_to_72[4096]; +static short quant_table_22_to_80[4096]; +static short quant_table_22_to_88[4096]; +static short quant_table_22_to_96[4096]; +static short quant_table_22_to_104[4096]; +static short quant_table_22_to_112[4096]; +static short quant_table_24_to_26[4096]; +static short quant_table_24_to_28[4096]; +static short quant_table_24_to_30[4096]; +static short quant_table_24_to_32[4096]; +static short quant_table_24_to_34[4096]; +static short quant_table_24_to_36[4096]; +static short quant_table_24_to_38[4096]; +static short quant_table_24_to_40[4096]; +static short quant_table_24_to_42[4096]; +static short quant_table_24_to_44[4096]; +static short quant_table_24_to_46[4096]; +static short quant_table_24_to_48[4096]; +static short quant_table_24_to_50[4096]; +static short quant_table_24_to_52[4096]; +static short quant_table_24_to_54[4096]; +static short quant_table_24_to_56[4096]; +static short quant_table_24_to_58[4096]; +static short quant_table_24_to_60[4096]; +static short quant_table_24_to_62[4096]; +static short quant_table_24_to_64[4096]; +static short quant_table_24_to_72[4096]; +static short quant_table_24_to_80[4096]; +static short quant_table_24_to_88[4096]; +static short quant_table_24_to_96[4096]; +static short quant_table_24_to_104[4096]; +static short quant_table_24_to_112[4096]; +static short quant_table_26_to_28[4096]; +static short quant_table_26_to_30[4096]; +static short quant_table_26_to_32[4096]; +static short quant_table_26_to_34[4096]; +static short quant_table_26_to_36[4096]; +static short quant_table_26_to_38[4096]; +static short quant_table_26_to_40[4096]; +static short quant_table_26_to_42[4096]; +static short quant_table_26_to_44[4096]; +static short quant_table_26_to_46[4096]; +static short quant_table_26_to_48[4096]; +static short quant_table_26_to_50[4096]; +static short quant_table_26_to_52[4096]; +static short quant_table_26_to_54[4096]; +static short quant_table_26_to_56[4096]; +static short quant_table_26_to_58[4096]; +static short quant_table_26_to_60[4096]; +static short quant_table_26_to_62[4096]; +static short quant_table_26_to_64[4096]; +static short quant_table_26_to_72[4096]; +static short quant_table_26_to_80[4096]; +static short quant_table_26_to_88[4096]; +static short quant_table_26_to_96[4096]; +static short quant_table_26_to_104[4096]; +static short quant_table_26_to_112[4096]; +static short quant_table_28_to_30[4096]; +static short quant_table_28_to_32[4096]; +static short quant_table_28_to_34[4096]; +static short quant_table_28_to_36[4096]; +static short quant_table_28_to_38[4096]; +static short quant_table_28_to_40[4096]; +static short quant_table_28_to_42[4096]; +static short quant_table_28_to_44[4096]; +static short quant_table_28_to_46[4096]; +static short quant_table_28_to_48[4096]; +static short quant_table_28_to_50[4096]; +static short quant_table_28_to_52[4096]; +static short quant_table_28_to_54[4096]; +static short quant_table_28_to_56[4096]; +static short quant_table_28_to_58[4096]; +static short quant_table_28_to_60[4096]; +static short quant_table_28_to_62[4096]; +static short quant_table_28_to_64[4096]; +static short quant_table_28_to_72[4096]; +static short quant_table_28_to_80[4096]; +static short quant_table_28_to_88[4096]; +static short quant_table_28_to_96[4096]; +static short quant_table_28_to_104[4096]; +static short quant_table_28_to_112[4096]; +static short quant_table_30_to_32[4096]; +static short quant_table_30_to_34[4096]; +static short quant_table_30_to_36[4096]; +static short quant_table_30_to_38[4096]; +static short quant_table_30_to_40[4096]; +static short quant_table_30_to_42[4096]; +static short quant_table_30_to_44[4096]; +static short quant_table_30_to_46[4096]; +static short quant_table_30_to_48[4096]; +static short quant_table_30_to_50[4096]; +static short quant_table_30_to_52[4096]; +static short quant_table_30_to_54[4096]; +static short quant_table_30_to_56[4096]; +static short quant_table_30_to_58[4096]; +static short quant_table_30_to_60[4096]; +static short quant_table_30_to_62[4096]; +static short quant_table_30_to_64[4096]; +static short quant_table_30_to_72[4096]; +static short quant_table_30_to_80[4096]; +static short quant_table_30_to_88[4096]; +static short quant_table_30_to_96[4096]; +static short quant_table_30_to_104[4096]; +static short quant_table_30_to_112[4096]; +static short quant_table_32_to_34[4096]; +static short quant_table_32_to_36[4096]; +static short quant_table_32_to_38[4096]; +static short quant_table_32_to_40[4096]; +static short quant_table_32_to_42[4096]; +static short quant_table_32_to_44[4096]; +static short quant_table_32_to_46[4096]; +static short quant_table_32_to_48[4096]; +static short quant_table_32_to_50[4096]; +static short quant_table_32_to_52[4096]; +static short quant_table_32_to_54[4096]; +static short quant_table_32_to_56[4096]; +static short quant_table_32_to_58[4096]; +static short quant_table_32_to_60[4096]; +static short quant_table_32_to_62[4096]; +static short quant_table_32_to_64[4096]; +static short quant_table_32_to_72[4096]; +static short quant_table_32_to_80[4096]; +static short quant_table_32_to_88[4096]; +static short quant_table_32_to_96[4096]; +static short quant_table_32_to_104[4096]; +static short quant_table_32_to_112[4096]; +static short quant_table_34_to_36[4096]; +static short quant_table_34_to_38[4096]; +static short quant_table_34_to_40[4096]; +static short quant_table_34_to_42[4096]; +static short quant_table_34_to_44[4096]; +static short quant_table_34_to_46[4096]; +static short quant_table_34_to_48[4096]; +static short quant_table_34_to_50[4096]; +static short quant_table_34_to_52[4096]; +static short quant_table_34_to_54[4096]; +static short quant_table_34_to_56[4096]; +static short quant_table_34_to_58[4096]; +static short quant_table_34_to_60[4096]; +static short quant_table_34_to_62[4096]; +static short quant_table_34_to_64[4096]; +static short quant_table_34_to_72[4096]; +static short quant_table_34_to_80[4096]; +static short quant_table_34_to_88[4096]; +static short quant_table_34_to_96[4096]; +static short quant_table_34_to_104[4096]; +static short quant_table_34_to_112[4096]; +static short quant_table_36_to_38[4096]; +static short quant_table_36_to_40[4096]; +static short quant_table_36_to_42[4096]; +static short quant_table_36_to_44[4096]; +static short quant_table_36_to_46[4096]; +static short quant_table_36_to_48[4096]; +static short quant_table_36_to_50[4096]; +static short quant_table_36_to_52[4096]; +static short quant_table_36_to_54[4096]; +static short quant_table_36_to_56[4096]; +static short quant_table_36_to_58[4096]; +static short quant_table_36_to_60[4096]; +static short quant_table_36_to_62[4096]; +static short quant_table_36_to_64[4096]; +static short quant_table_36_to_72[4096]; +static short quant_table_36_to_80[4096]; +static short quant_table_36_to_88[4096]; +static short quant_table_36_to_96[4096]; +static short quant_table_36_to_104[4096]; +static short quant_table_36_to_112[4096]; +static short quant_table_38_to_40[4096]; +static short quant_table_38_to_42[4096]; +static short quant_table_38_to_44[4096]; +static short quant_table_38_to_46[4096]; +static short quant_table_38_to_48[4096]; +static short quant_table_38_to_50[4096]; +static short quant_table_38_to_52[4096]; +static short quant_table_38_to_54[4096]; +static short quant_table_38_to_56[4096]; +static short quant_table_38_to_58[4096]; +static short quant_table_38_to_60[4096]; +static short quant_table_38_to_62[4096]; +static short quant_table_38_to_64[4096]; +static short quant_table_38_to_72[4096]; +static short quant_table_38_to_80[4096]; +static short quant_table_38_to_88[4096]; +static short quant_table_38_to_96[4096]; +static short quant_table_38_to_104[4096]; +static short quant_table_38_to_112[4096]; +static short quant_table_40_to_42[4096]; +static short quant_table_40_to_44[4096]; +static short quant_table_40_to_46[4096]; +static short quant_table_40_to_48[4096]; +static short quant_table_40_to_50[4096]; +static short quant_table_40_to_52[4096]; +static short quant_table_40_to_54[4096]; +static short quant_table_40_to_56[4096]; +static short quant_table_40_to_58[4096]; +static short quant_table_40_to_60[4096]; +static short quant_table_40_to_62[4096]; +static short quant_table_40_to_64[4096]; +static short quant_table_40_to_72[4096]; +static short quant_table_40_to_80[4096]; +static short quant_table_40_to_88[4096]; +static short quant_table_40_to_96[4096]; +static short quant_table_40_to_104[4096]; +static short quant_table_40_to_112[4096]; +static short quant_table_42_to_44[4096]; +static short quant_table_42_to_46[4096]; +static short quant_table_42_to_48[4096]; +static short quant_table_42_to_50[4096]; +static short quant_table_42_to_52[4096]; +static short quant_table_42_to_54[4096]; +static short quant_table_42_to_56[4096]; +static short quant_table_42_to_58[4096]; +static short quant_table_42_to_60[4096]; +static short quant_table_42_to_62[4096]; +static short quant_table_42_to_64[4096]; +static short quant_table_42_to_72[4096]; +static short quant_table_42_to_80[4096]; +static short quant_table_42_to_88[4096]; +static short quant_table_42_to_96[4096]; +static short quant_table_42_to_104[4096]; +static short quant_table_42_to_112[4096]; +static short quant_table_44_to_46[4096]; +static short quant_table_44_to_48[4096]; +static short quant_table_44_to_50[4096]; +static short quant_table_44_to_52[4096]; +static short quant_table_44_to_54[4096]; +static short quant_table_44_to_56[4096]; +static short quant_table_44_to_58[4096]; +static short quant_table_44_to_60[4096]; +static short quant_table_44_to_62[4096]; +static short quant_table_44_to_64[4096]; +static short quant_table_44_to_72[4096]; +static short quant_table_44_to_80[4096]; +static short quant_table_44_to_88[4096]; +static short quant_table_44_to_96[4096]; +static short quant_table_44_to_104[4096]; +static short quant_table_44_to_112[4096]; +static short quant_table_46_to_48[4096]; +static short quant_table_46_to_50[4096]; +static short quant_table_46_to_52[4096]; +static short quant_table_46_to_54[4096]; +static short quant_table_46_to_56[4096]; +static short quant_table_46_to_58[4096]; +static short quant_table_46_to_60[4096]; +static short quant_table_46_to_62[4096]; +static short quant_table_46_to_64[4096]; +static short quant_table_46_to_72[4096]; +static short quant_table_46_to_80[4096]; +static short quant_table_46_to_88[4096]; +static short quant_table_46_to_96[4096]; +static short quant_table_46_to_104[4096]; +static short quant_table_46_to_112[4096]; +static short quant_table_48_to_50[4096]; +static short quant_table_48_to_52[4096]; +static short quant_table_48_to_54[4096]; +static short quant_table_48_to_56[4096]; +static short quant_table_48_to_58[4096]; +static short quant_table_48_to_60[4096]; +static short quant_table_48_to_62[4096]; +static short quant_table_48_to_64[4096]; +static short quant_table_48_to_72[4096]; +static short quant_table_48_to_80[4096]; +static short quant_table_48_to_88[4096]; +static short quant_table_48_to_96[4096]; +static short quant_table_48_to_104[4096]; +static short quant_table_48_to_112[4096]; +static short quant_table_50_to_52[4096]; +static short quant_table_50_to_54[4096]; +static short quant_table_50_to_56[4096]; +static short quant_table_50_to_58[4096]; +static short quant_table_50_to_60[4096]; +static short quant_table_50_to_62[4096]; +static short quant_table_50_to_64[4096]; +static short quant_table_50_to_72[4096]; +static short quant_table_50_to_80[4096]; +static short quant_table_50_to_88[4096]; +static short quant_table_50_to_96[4096]; +static short quant_table_50_to_104[4096]; +static short quant_table_50_to_112[4096]; +static short quant_table_52_to_54[4096]; +static short quant_table_52_to_56[4096]; +static short quant_table_52_to_58[4096]; +static short quant_table_52_to_60[4096]; +static short quant_table_52_to_62[4096]; +static short quant_table_52_to_64[4096]; +static short quant_table_52_to_72[4096]; +static short quant_table_52_to_80[4096]; +static short quant_table_52_to_88[4096]; +static short quant_table_52_to_96[4096]; +static short quant_table_52_to_104[4096]; +static short quant_table_52_to_112[4096]; +static short quant_table_54_to_56[4096]; +static short quant_table_54_to_58[4096]; +static short quant_table_54_to_60[4096]; +static short quant_table_54_to_62[4096]; +static short quant_table_54_to_64[4096]; +static short quant_table_54_to_72[4096]; +static short quant_table_54_to_80[4096]; +static short quant_table_54_to_88[4096]; +static short quant_table_54_to_96[4096]; +static short quant_table_54_to_104[4096]; +static short quant_table_54_to_112[4096]; +static short quant_table_56_to_58[4096]; +static short quant_table_56_to_60[4096]; +static short quant_table_56_to_62[4096]; +static short quant_table_56_to_64[4096]; +static short quant_table_56_to_72[4096]; +static short quant_table_56_to_80[4096]; +static short quant_table_56_to_88[4096]; +static short quant_table_56_to_96[4096]; +static short quant_table_56_to_104[4096]; +static short quant_table_56_to_112[4096]; +static short quant_table_58_to_60[4096]; +static short quant_table_58_to_62[4096]; +static short quant_table_58_to_64[4096]; +static short quant_table_58_to_72[4096]; +static short quant_table_58_to_80[4096]; +static short quant_table_58_to_88[4096]; +static short quant_table_58_to_96[4096]; +static short quant_table_58_to_104[4096]; +static short quant_table_58_to_112[4096]; +static short quant_table_60_to_62[4096]; +static short quant_table_60_to_64[4096]; +static short quant_table_60_to_72[4096]; +static short quant_table_60_to_80[4096]; +static short quant_table_60_to_88[4096]; +static short quant_table_60_to_96[4096]; +static short quant_table_60_to_104[4096]; +static short quant_table_60_to_112[4096]; +static short quant_table_62_to_64[4096]; +static short quant_table_62_to_72[4096]; +static short quant_table_62_to_80[4096]; +static short quant_table_62_to_88[4096]; +static short quant_table_62_to_96[4096]; +static short quant_table_62_to_104[4096]; +static short quant_table_62_to_112[4096]; +static short quant_table_64_to_72[4096]; +static short quant_table_64_to_80[4096]; +static short quant_table_64_to_88[4096]; +static short quant_table_64_to_96[4096]; +static short quant_table_64_to_104[4096]; +static short quant_table_64_to_112[4096]; +static short quant_table_72_to_80[4096]; +static short quant_table_72_to_88[4096]; +static short quant_table_72_to_96[4096]; +static short quant_table_72_to_104[4096]; +static short quant_table_72_to_112[4096]; +static short quant_table_80_to_88[4096]; +static short quant_table_80_to_96[4096]; +static short quant_table_80_to_104[4096]; +static short quant_table_80_to_112[4096]; +static short quant_table_88_to_96[4096]; +static short quant_table_88_to_104[4096]; +static short quant_table_88_to_112[4096]; +static short quant_table_96_to_104[4096]; +static short quant_table_96_to_112[4096]; +static short quant_table_104_to_112[4096]; +static short *quant_tables[42][42] = { +{ 0, &quant_table_1_to_2[2048], &quant_table_1_to_3[2048], &quant_table_1_to_4[2048], &quant_table_1_to_5[2048], + &quant_table_1_to_6[2048], &quant_table_1_to_7[2048], &quant_table_1_to_8[2048], &quant_table_1_to_10[2048], + &quant_table_1_to_12[2048], &quant_table_1_to_14[2048], &quant_table_1_to_16[2048], &quant_table_1_to_18[2048], + &quant_table_1_to_20[2048], &quant_table_1_to_22[2048], &quant_table_1_to_24[2048], &quant_table_1_to_26[2048], + &quant_table_1_to_28[2048], &quant_table_1_to_30[2048], &quant_table_1_to_32[2048], &quant_table_1_to_34[2048], + &quant_table_1_to_36[2048], &quant_table_1_to_38[2048], &quant_table_1_to_40[2048], &quant_table_1_to_42[2048], + &quant_table_1_to_44[2048], &quant_table_1_to_46[2048], &quant_table_1_to_48[2048], &quant_table_1_to_50[2048], + &quant_table_1_to_52[2048], &quant_table_1_to_54[2048], &quant_table_1_to_56[2048], &quant_table_1_to_58[2048], + &quant_table_1_to_60[2048], &quant_table_1_to_62[2048], &quant_table_1_to_64[2048], &quant_table_1_to_72[2048], + &quant_table_1_to_80[2048], &quant_table_1_to_88[2048], &quant_table_1_to_96[2048], &quant_table_1_to_104[2048], + &quant_table_1_to_112[2048]}, +{ 0,0, &quant_table_2_to_3[2048], &quant_table_2_to_4[2048], &quant_table_2_to_5[2048], + &quant_table_2_to_6[2048], &quant_table_2_to_7[2048], &quant_table_2_to_8[2048], &quant_table_2_to_10[2048], + &quant_table_2_to_12[2048], &quant_table_2_to_14[2048], &quant_table_2_to_16[2048], &quant_table_2_to_18[2048], + &quant_table_2_to_20[2048], &quant_table_2_to_22[2048], &quant_table_2_to_24[2048], &quant_table_2_to_26[2048], + &quant_table_2_to_28[2048], &quant_table_2_to_30[2048], &quant_table_2_to_32[2048], &quant_table_2_to_34[2048], + &quant_table_2_to_36[2048], &quant_table_2_to_38[2048], &quant_table_2_to_40[2048], &quant_table_2_to_42[2048], + &quant_table_2_to_44[2048], &quant_table_2_to_46[2048], &quant_table_2_to_48[2048], &quant_table_2_to_50[2048], + &quant_table_2_to_52[2048], &quant_table_2_to_54[2048], &quant_table_2_to_56[2048], &quant_table_2_to_58[2048], + &quant_table_2_to_60[2048], &quant_table_2_to_62[2048], &quant_table_2_to_64[2048], &quant_table_2_to_72[2048], + &quant_table_2_to_80[2048], &quant_table_2_to_88[2048], &quant_table_2_to_96[2048], &quant_table_2_to_104[2048], + &quant_table_2_to_112[2048]}, +{ 0,0,0, &quant_table_3_to_4[2048], &quant_table_3_to_5[2048], + &quant_table_3_to_6[2048], &quant_table_3_to_7[2048], &quant_table_3_to_8[2048], &quant_table_3_to_10[2048], + &quant_table_3_to_12[2048], &quant_table_3_to_14[2048], &quant_table_3_to_16[2048], &quant_table_3_to_18[2048], + &quant_table_3_to_20[2048], &quant_table_3_to_22[2048], &quant_table_3_to_24[2048], &quant_table_3_to_26[2048], + &quant_table_3_to_28[2048], &quant_table_3_to_30[2048], &quant_table_3_to_32[2048], &quant_table_3_to_34[2048], + &quant_table_3_to_36[2048], &quant_table_3_to_38[2048], &quant_table_3_to_40[2048], &quant_table_3_to_42[2048], + &quant_table_3_to_44[2048], &quant_table_3_to_46[2048], &quant_table_3_to_48[2048], &quant_table_3_to_50[2048], + &quant_table_3_to_52[2048], &quant_table_3_to_54[2048], &quant_table_3_to_56[2048], &quant_table_3_to_58[2048], + &quant_table_3_to_60[2048], &quant_table_3_to_62[2048], &quant_table_3_to_64[2048], &quant_table_3_to_72[2048], + &quant_table_3_to_80[2048], &quant_table_3_to_88[2048], &quant_table_3_to_96[2048], &quant_table_3_to_104[2048], + &quant_table_3_to_112[2048]}, +{ 0,0,0,0, &quant_table_4_to_5[2048], + &quant_table_4_to_6[2048], &quant_table_4_to_7[2048], &quant_table_4_to_8[2048], &quant_table_4_to_10[2048], + &quant_table_4_to_12[2048], &quant_table_4_to_14[2048], &quant_table_4_to_16[2048], &quant_table_4_to_18[2048], + &quant_table_4_to_20[2048], &quant_table_4_to_22[2048], &quant_table_4_to_24[2048], &quant_table_4_to_26[2048], + &quant_table_4_to_28[2048], &quant_table_4_to_30[2048], &quant_table_4_to_32[2048], &quant_table_4_to_34[2048], + &quant_table_4_to_36[2048], &quant_table_4_to_38[2048], &quant_table_4_to_40[2048], &quant_table_4_to_42[2048], + &quant_table_4_to_44[2048], &quant_table_4_to_46[2048], &quant_table_4_to_48[2048], &quant_table_4_to_50[2048], + &quant_table_4_to_52[2048], &quant_table_4_to_54[2048], &quant_table_4_to_56[2048], &quant_table_4_to_58[2048], + &quant_table_4_to_60[2048], &quant_table_4_to_62[2048], &quant_table_4_to_64[2048], &quant_table_4_to_72[2048], + &quant_table_4_to_80[2048], &quant_table_4_to_88[2048], &quant_table_4_to_96[2048], &quant_table_4_to_104[2048], + &quant_table_4_to_112[2048]}, +{ 0,0,0,0,0, &quant_table_5_to_6[2048], &quant_table_5_to_7[2048], &quant_table_5_to_8[2048], &quant_table_5_to_10[2048], + &quant_table_5_to_12[2048], &quant_table_5_to_14[2048], &quant_table_5_to_16[2048], &quant_table_5_to_18[2048], + &quant_table_5_to_20[2048], &quant_table_5_to_22[2048], &quant_table_5_to_24[2048], &quant_table_5_to_26[2048], + &quant_table_5_to_28[2048], &quant_table_5_to_30[2048], &quant_table_5_to_32[2048], &quant_table_5_to_34[2048], + &quant_table_5_to_36[2048], &quant_table_5_to_38[2048], &quant_table_5_to_40[2048], &quant_table_5_to_42[2048], + &quant_table_5_to_44[2048], &quant_table_5_to_46[2048], &quant_table_5_to_48[2048], &quant_table_5_to_50[2048], + &quant_table_5_to_52[2048], &quant_table_5_to_54[2048], &quant_table_5_to_56[2048], &quant_table_5_to_58[2048], + &quant_table_5_to_60[2048], &quant_table_5_to_62[2048], &quant_table_5_to_64[2048], &quant_table_5_to_72[2048], + &quant_table_5_to_80[2048], &quant_table_5_to_88[2048], &quant_table_5_to_96[2048], &quant_table_5_to_104[2048], + &quant_table_5_to_112[2048]}, +{ 0,0,0,0,0,0, &quant_table_6_to_7[2048], &quant_table_6_to_8[2048], &quant_table_6_to_10[2048], + &quant_table_6_to_12[2048], &quant_table_6_to_14[2048], &quant_table_6_to_16[2048], &quant_table_6_to_18[2048], + &quant_table_6_to_20[2048], &quant_table_6_to_22[2048], &quant_table_6_to_24[2048], &quant_table_6_to_26[2048], + &quant_table_6_to_28[2048], &quant_table_6_to_30[2048], &quant_table_6_to_32[2048], &quant_table_6_to_34[2048], + &quant_table_6_to_36[2048], &quant_table_6_to_38[2048], &quant_table_6_to_40[2048], &quant_table_6_to_42[2048], + &quant_table_6_to_44[2048], &quant_table_6_to_46[2048], &quant_table_6_to_48[2048], &quant_table_6_to_50[2048], + &quant_table_6_to_52[2048], &quant_table_6_to_54[2048], &quant_table_6_to_56[2048], &quant_table_6_to_58[2048], + &quant_table_6_to_60[2048], &quant_table_6_to_62[2048], &quant_table_6_to_64[2048], &quant_table_6_to_72[2048], + &quant_table_6_to_80[2048], &quant_table_6_to_88[2048], &quant_table_6_to_96[2048], &quant_table_6_to_104[2048], + &quant_table_6_to_112[2048]}, +{ 0,0,0,0,0,0,0, &quant_table_7_to_8[2048], &quant_table_7_to_10[2048], + &quant_table_7_to_12[2048], &quant_table_7_to_14[2048], &quant_table_7_to_16[2048], &quant_table_7_to_18[2048], + &quant_table_7_to_20[2048], &quant_table_7_to_22[2048], &quant_table_7_to_24[2048], &quant_table_7_to_26[2048], + &quant_table_7_to_28[2048], &quant_table_7_to_30[2048], &quant_table_7_to_32[2048], &quant_table_7_to_34[2048], + &quant_table_7_to_36[2048], &quant_table_7_to_38[2048], &quant_table_7_to_40[2048], &quant_table_7_to_42[2048], + &quant_table_7_to_44[2048], &quant_table_7_to_46[2048], &quant_table_7_to_48[2048], &quant_table_7_to_50[2048], + &quant_table_7_to_52[2048], &quant_table_7_to_54[2048], &quant_table_7_to_56[2048], &quant_table_7_to_58[2048], + &quant_table_7_to_60[2048], &quant_table_7_to_62[2048], &quant_table_7_to_64[2048], &quant_table_7_to_72[2048], + &quant_table_7_to_80[2048], &quant_table_7_to_88[2048], &quant_table_7_to_96[2048], &quant_table_7_to_104[2048], + &quant_table_7_to_112[2048]}, +{ 0,0,0,0,0,0,0,0, &quant_table_8_to_10[2048], + &quant_table_8_to_12[2048], &quant_table_8_to_14[2048], &quant_table_8_to_16[2048], &quant_table_8_to_18[2048], + &quant_table_8_to_20[2048], &quant_table_8_to_22[2048], &quant_table_8_to_24[2048], &quant_table_8_to_26[2048], + &quant_table_8_to_28[2048], &quant_table_8_to_30[2048], &quant_table_8_to_32[2048], &quant_table_8_to_34[2048], + &quant_table_8_to_36[2048], &quant_table_8_to_38[2048], &quant_table_8_to_40[2048], &quant_table_8_to_42[2048], + &quant_table_8_to_44[2048], &quant_table_8_to_46[2048], &quant_table_8_to_48[2048], &quant_table_8_to_50[2048], + &quant_table_8_to_52[2048], &quant_table_8_to_54[2048], &quant_table_8_to_56[2048], &quant_table_8_to_58[2048], + &quant_table_8_to_60[2048], &quant_table_8_to_62[2048], &quant_table_8_to_64[2048], &quant_table_8_to_72[2048], + &quant_table_8_to_80[2048], &quant_table_8_to_88[2048], &quant_table_8_to_96[2048], &quant_table_8_to_104[2048], + &quant_table_8_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0, &quant_table_10_to_12[2048], &quant_table_10_to_14[2048], &quant_table_10_to_16[2048], &quant_table_10_to_18[2048], + &quant_table_10_to_20[2048], &quant_table_10_to_22[2048], &quant_table_10_to_24[2048], &quant_table_10_to_26[2048], + &quant_table_10_to_28[2048], &quant_table_10_to_30[2048], &quant_table_10_to_32[2048], &quant_table_10_to_34[2048], + &quant_table_10_to_36[2048], &quant_table_10_to_38[2048], &quant_table_10_to_40[2048], &quant_table_10_to_42[2048], + &quant_table_10_to_44[2048], &quant_table_10_to_46[2048], &quant_table_10_to_48[2048], &quant_table_10_to_50[2048], + &quant_table_10_to_52[2048], &quant_table_10_to_54[2048], &quant_table_10_to_56[2048], &quant_table_10_to_58[2048], + &quant_table_10_to_60[2048], &quant_table_10_to_62[2048], &quant_table_10_to_64[2048], &quant_table_10_to_72[2048], + &quant_table_10_to_80[2048], &quant_table_10_to_88[2048], &quant_table_10_to_96[2048], &quant_table_10_to_104[2048], + &quant_table_10_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0, &quant_table_12_to_14[2048], &quant_table_12_to_16[2048], &quant_table_12_to_18[2048], + &quant_table_12_to_20[2048], &quant_table_12_to_22[2048], &quant_table_12_to_24[2048], &quant_table_12_to_26[2048], + &quant_table_12_to_28[2048], &quant_table_12_to_30[2048], &quant_table_12_to_32[2048], &quant_table_12_to_34[2048], + &quant_table_12_to_36[2048], &quant_table_12_to_38[2048], &quant_table_12_to_40[2048], &quant_table_12_to_42[2048], + &quant_table_12_to_44[2048], &quant_table_12_to_46[2048], &quant_table_12_to_48[2048], &quant_table_12_to_50[2048], + &quant_table_12_to_52[2048], &quant_table_12_to_54[2048], &quant_table_12_to_56[2048], &quant_table_12_to_58[2048], + &quant_table_12_to_60[2048], &quant_table_12_to_62[2048], &quant_table_12_to_64[2048], &quant_table_12_to_72[2048], + &quant_table_12_to_80[2048], &quant_table_12_to_88[2048], &quant_table_12_to_96[2048], &quant_table_12_to_104[2048], + &quant_table_12_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0, &quant_table_14_to_16[2048], &quant_table_14_to_18[2048], + &quant_table_14_to_20[2048], &quant_table_14_to_22[2048], &quant_table_14_to_24[2048], &quant_table_14_to_26[2048], + &quant_table_14_to_28[2048], &quant_table_14_to_30[2048], &quant_table_14_to_32[2048], &quant_table_14_to_34[2048], + &quant_table_14_to_36[2048], &quant_table_14_to_38[2048], &quant_table_14_to_40[2048], &quant_table_14_to_42[2048], + &quant_table_14_to_44[2048], &quant_table_14_to_46[2048], &quant_table_14_to_48[2048], &quant_table_14_to_50[2048], + &quant_table_14_to_52[2048], &quant_table_14_to_54[2048], &quant_table_14_to_56[2048], &quant_table_14_to_58[2048], + &quant_table_14_to_60[2048], &quant_table_14_to_62[2048], &quant_table_14_to_64[2048], &quant_table_14_to_72[2048], + &quant_table_14_to_80[2048], &quant_table_14_to_88[2048], &quant_table_14_to_96[2048], &quant_table_14_to_104[2048], + &quant_table_14_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_16_to_18[2048], + &quant_table_16_to_20[2048], &quant_table_16_to_22[2048], &quant_table_16_to_24[2048], &quant_table_16_to_26[2048], + &quant_table_16_to_28[2048], &quant_table_16_to_30[2048], &quant_table_16_to_32[2048], &quant_table_16_to_34[2048], + &quant_table_16_to_36[2048], &quant_table_16_to_38[2048], &quant_table_16_to_40[2048], &quant_table_16_to_42[2048], + &quant_table_16_to_44[2048], &quant_table_16_to_46[2048], &quant_table_16_to_48[2048], &quant_table_16_to_50[2048], + &quant_table_16_to_52[2048], &quant_table_16_to_54[2048], &quant_table_16_to_56[2048], &quant_table_16_to_58[2048], + &quant_table_16_to_60[2048], &quant_table_16_to_62[2048], &quant_table_16_to_64[2048], &quant_table_16_to_72[2048], + &quant_table_16_to_80[2048], &quant_table_16_to_88[2048], &quant_table_16_to_96[2048], &quant_table_16_to_104[2048], + &quant_table_16_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_18_to_20[2048], &quant_table_18_to_22[2048], &quant_table_18_to_24[2048], &quant_table_18_to_26[2048], + &quant_table_18_to_28[2048], &quant_table_18_to_30[2048], &quant_table_18_to_32[2048], &quant_table_18_to_34[2048], + &quant_table_18_to_36[2048], &quant_table_18_to_38[2048], &quant_table_18_to_40[2048], &quant_table_18_to_42[2048], + &quant_table_18_to_44[2048], &quant_table_18_to_46[2048], &quant_table_18_to_48[2048], &quant_table_18_to_50[2048], + &quant_table_18_to_52[2048], &quant_table_18_to_54[2048], &quant_table_18_to_56[2048], &quant_table_18_to_58[2048], + &quant_table_18_to_60[2048], &quant_table_18_to_62[2048], &quant_table_18_to_64[2048], &quant_table_18_to_72[2048], + &quant_table_18_to_80[2048], &quant_table_18_to_88[2048], &quant_table_18_to_96[2048], &quant_table_18_to_104[2048], + &quant_table_18_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_20_to_22[2048], &quant_table_20_to_24[2048], &quant_table_20_to_26[2048], + &quant_table_20_to_28[2048], &quant_table_20_to_30[2048], &quant_table_20_to_32[2048], &quant_table_20_to_34[2048], + &quant_table_20_to_36[2048], &quant_table_20_to_38[2048], &quant_table_20_to_40[2048], &quant_table_20_to_42[2048], + &quant_table_20_to_44[2048], &quant_table_20_to_46[2048], &quant_table_20_to_48[2048], &quant_table_20_to_50[2048], + &quant_table_20_to_52[2048], &quant_table_20_to_54[2048], &quant_table_20_to_56[2048], &quant_table_20_to_58[2048], + &quant_table_20_to_60[2048], &quant_table_20_to_62[2048], &quant_table_20_to_64[2048], &quant_table_20_to_72[2048], + &quant_table_20_to_80[2048], &quant_table_20_to_88[2048], &quant_table_20_to_96[2048], &quant_table_20_to_104[2048], + &quant_table_20_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_22_to_24[2048], &quant_table_22_to_26[2048], + &quant_table_22_to_28[2048], &quant_table_22_to_30[2048], &quant_table_22_to_32[2048], &quant_table_22_to_34[2048], + &quant_table_22_to_36[2048], &quant_table_22_to_38[2048], &quant_table_22_to_40[2048], &quant_table_22_to_42[2048], + &quant_table_22_to_44[2048], &quant_table_22_to_46[2048], &quant_table_22_to_48[2048], &quant_table_22_to_50[2048], + &quant_table_22_to_52[2048], &quant_table_22_to_54[2048], &quant_table_22_to_56[2048], &quant_table_22_to_58[2048], + &quant_table_22_to_60[2048], &quant_table_22_to_62[2048], &quant_table_22_to_64[2048], &quant_table_22_to_72[2048], + &quant_table_22_to_80[2048], &quant_table_22_to_88[2048], &quant_table_22_to_96[2048], &quant_table_22_to_104[2048], + &quant_table_22_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_24_to_26[2048], + &quant_table_24_to_28[2048], &quant_table_24_to_30[2048], &quant_table_24_to_32[2048], &quant_table_24_to_34[2048], + &quant_table_24_to_36[2048], &quant_table_24_to_38[2048], &quant_table_24_to_40[2048], &quant_table_24_to_42[2048], + &quant_table_24_to_44[2048], &quant_table_24_to_46[2048], &quant_table_24_to_48[2048], &quant_table_24_to_50[2048], + &quant_table_24_to_52[2048], &quant_table_24_to_54[2048], &quant_table_24_to_56[2048], &quant_table_24_to_58[2048], + &quant_table_24_to_60[2048], &quant_table_24_to_62[2048], &quant_table_24_to_64[2048], &quant_table_24_to_72[2048], + &quant_table_24_to_80[2048], &quant_table_24_to_88[2048], &quant_table_24_to_96[2048], &quant_table_24_to_104[2048], + &quant_table_24_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_26_to_28[2048], &quant_table_26_to_30[2048], &quant_table_26_to_32[2048], &quant_table_26_to_34[2048], + &quant_table_26_to_36[2048], &quant_table_26_to_38[2048], &quant_table_26_to_40[2048], &quant_table_26_to_42[2048], + &quant_table_26_to_44[2048], &quant_table_26_to_46[2048], &quant_table_26_to_48[2048], &quant_table_26_to_50[2048], + &quant_table_26_to_52[2048], &quant_table_26_to_54[2048], &quant_table_26_to_56[2048], &quant_table_26_to_58[2048], + &quant_table_26_to_60[2048], &quant_table_26_to_62[2048], &quant_table_26_to_64[2048], &quant_table_26_to_72[2048], + &quant_table_26_to_80[2048], &quant_table_26_to_88[2048], &quant_table_26_to_96[2048], &quant_table_26_to_104[2048], + &quant_table_26_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_28_to_30[2048], &quant_table_28_to_32[2048], &quant_table_28_to_34[2048], + &quant_table_28_to_36[2048], &quant_table_28_to_38[2048], &quant_table_28_to_40[2048], &quant_table_28_to_42[2048], + &quant_table_28_to_44[2048], &quant_table_28_to_46[2048], &quant_table_28_to_48[2048], &quant_table_28_to_50[2048], + &quant_table_28_to_52[2048], &quant_table_28_to_54[2048], &quant_table_28_to_56[2048], &quant_table_28_to_58[2048], + &quant_table_28_to_60[2048], &quant_table_28_to_62[2048], &quant_table_28_to_64[2048], &quant_table_28_to_72[2048], + &quant_table_28_to_80[2048], &quant_table_28_to_88[2048], &quant_table_28_to_96[2048], &quant_table_28_to_104[2048], + &quant_table_28_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_30_to_32[2048], &quant_table_30_to_34[2048], + &quant_table_30_to_36[2048], &quant_table_30_to_38[2048], &quant_table_30_to_40[2048], &quant_table_30_to_42[2048], + &quant_table_30_to_44[2048], &quant_table_30_to_46[2048], &quant_table_30_to_48[2048], &quant_table_30_to_50[2048], + &quant_table_30_to_52[2048], &quant_table_30_to_54[2048], &quant_table_30_to_56[2048], &quant_table_30_to_58[2048], + &quant_table_30_to_60[2048], &quant_table_30_to_62[2048], &quant_table_30_to_64[2048], &quant_table_30_to_72[2048], + &quant_table_30_to_80[2048], &quant_table_30_to_88[2048], &quant_table_30_to_96[2048], &quant_table_30_to_104[2048], + &quant_table_30_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_32_to_34[2048], + &quant_table_32_to_36[2048], &quant_table_32_to_38[2048], &quant_table_32_to_40[2048], &quant_table_32_to_42[2048], + &quant_table_32_to_44[2048], &quant_table_32_to_46[2048], &quant_table_32_to_48[2048], &quant_table_32_to_50[2048], + &quant_table_32_to_52[2048], &quant_table_32_to_54[2048], &quant_table_32_to_56[2048], &quant_table_32_to_58[2048], + &quant_table_32_to_60[2048], &quant_table_32_to_62[2048], &quant_table_32_to_64[2048], &quant_table_32_to_72[2048], + &quant_table_32_to_80[2048], &quant_table_32_to_88[2048], &quant_table_32_to_96[2048], &quant_table_32_to_104[2048], + &quant_table_32_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_34_to_36[2048], &quant_table_34_to_38[2048], &quant_table_34_to_40[2048], &quant_table_34_to_42[2048], + &quant_table_34_to_44[2048], &quant_table_34_to_46[2048], &quant_table_34_to_48[2048], &quant_table_34_to_50[2048], + &quant_table_34_to_52[2048], &quant_table_34_to_54[2048], &quant_table_34_to_56[2048], &quant_table_34_to_58[2048], + &quant_table_34_to_60[2048], &quant_table_34_to_62[2048], &quant_table_34_to_64[2048], &quant_table_34_to_72[2048], + &quant_table_34_to_80[2048], &quant_table_34_to_88[2048], &quant_table_34_to_96[2048], &quant_table_34_to_104[2048], + &quant_table_34_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_36_to_38[2048], &quant_table_36_to_40[2048], &quant_table_36_to_42[2048], + &quant_table_36_to_44[2048], &quant_table_36_to_46[2048], &quant_table_36_to_48[2048], &quant_table_36_to_50[2048], + &quant_table_36_to_52[2048], &quant_table_36_to_54[2048], &quant_table_36_to_56[2048], &quant_table_36_to_58[2048], + &quant_table_36_to_60[2048], &quant_table_36_to_62[2048], &quant_table_36_to_64[2048], &quant_table_36_to_72[2048], + &quant_table_36_to_80[2048], &quant_table_36_to_88[2048], &quant_table_36_to_96[2048], &quant_table_36_to_104[2048], + &quant_table_36_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_38_to_40[2048], &quant_table_38_to_42[2048], + &quant_table_38_to_44[2048], &quant_table_38_to_46[2048], &quant_table_38_to_48[2048], &quant_table_38_to_50[2048], + &quant_table_38_to_52[2048], &quant_table_38_to_54[2048], &quant_table_38_to_56[2048], &quant_table_38_to_58[2048], + &quant_table_38_to_60[2048], &quant_table_38_to_62[2048], &quant_table_38_to_64[2048], &quant_table_38_to_72[2048], + &quant_table_38_to_80[2048], &quant_table_38_to_88[2048], &quant_table_38_to_96[2048], &quant_table_38_to_104[2048], + &quant_table_38_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_40_to_42[2048], + &quant_table_40_to_44[2048], &quant_table_40_to_46[2048], &quant_table_40_to_48[2048], &quant_table_40_to_50[2048], + &quant_table_40_to_52[2048], &quant_table_40_to_54[2048], &quant_table_40_to_56[2048], &quant_table_40_to_58[2048], + &quant_table_40_to_60[2048], &quant_table_40_to_62[2048], &quant_table_40_to_64[2048], &quant_table_40_to_72[2048], + &quant_table_40_to_80[2048], &quant_table_40_to_88[2048], &quant_table_40_to_96[2048], &quant_table_40_to_104[2048], + &quant_table_40_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_42_to_44[2048], &quant_table_42_to_46[2048], &quant_table_42_to_48[2048], &quant_table_42_to_50[2048], + &quant_table_42_to_52[2048], &quant_table_42_to_54[2048], &quant_table_42_to_56[2048], &quant_table_42_to_58[2048], + &quant_table_42_to_60[2048], &quant_table_42_to_62[2048], &quant_table_42_to_64[2048], &quant_table_42_to_72[2048], + &quant_table_42_to_80[2048], &quant_table_42_to_88[2048], &quant_table_42_to_96[2048], &quant_table_42_to_104[2048], + &quant_table_42_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_44_to_46[2048], &quant_table_44_to_48[2048], &quant_table_44_to_50[2048], + &quant_table_44_to_52[2048], &quant_table_44_to_54[2048], &quant_table_44_to_56[2048], &quant_table_44_to_58[2048], + &quant_table_44_to_60[2048], &quant_table_44_to_62[2048], &quant_table_44_to_64[2048], &quant_table_44_to_72[2048], + &quant_table_44_to_80[2048], &quant_table_44_to_88[2048], &quant_table_44_to_96[2048], &quant_table_44_to_104[2048], + &quant_table_44_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_46_to_48[2048], &quant_table_46_to_50[2048], + &quant_table_46_to_52[2048], &quant_table_46_to_54[2048], &quant_table_46_to_56[2048], &quant_table_46_to_58[2048], + &quant_table_46_to_60[2048], &quant_table_46_to_62[2048], &quant_table_46_to_64[2048], &quant_table_46_to_72[2048], + &quant_table_46_to_80[2048], &quant_table_46_to_88[2048], &quant_table_46_to_96[2048], &quant_table_46_to_104[2048], + &quant_table_46_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_48_to_50[2048], + &quant_table_48_to_52[2048], &quant_table_48_to_54[2048], &quant_table_48_to_56[2048], &quant_table_48_to_58[2048], + &quant_table_48_to_60[2048], &quant_table_48_to_62[2048], &quant_table_48_to_64[2048], &quant_table_48_to_72[2048], + &quant_table_48_to_80[2048], &quant_table_48_to_88[2048], &quant_table_48_to_96[2048], &quant_table_48_to_104[2048], + &quant_table_48_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_50_to_52[2048], &quant_table_50_to_54[2048], &quant_table_50_to_56[2048], &quant_table_50_to_58[2048], + &quant_table_50_to_60[2048], &quant_table_50_to_62[2048], &quant_table_50_to_64[2048], &quant_table_50_to_72[2048], + &quant_table_50_to_80[2048], &quant_table_50_to_88[2048], &quant_table_50_to_96[2048], &quant_table_50_to_104[2048], + &quant_table_50_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_52_to_54[2048], &quant_table_52_to_56[2048], &quant_table_52_to_58[2048], + &quant_table_52_to_60[2048], &quant_table_52_to_62[2048], &quant_table_52_to_64[2048], &quant_table_52_to_72[2048], + &quant_table_52_to_80[2048], &quant_table_52_to_88[2048], &quant_table_52_to_96[2048], &quant_table_52_to_104[2048], + &quant_table_52_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_54_to_56[2048], &quant_table_54_to_58[2048], + &quant_table_54_to_60[2048], &quant_table_54_to_62[2048], &quant_table_54_to_64[2048], &quant_table_54_to_72[2048], + &quant_table_54_to_80[2048], &quant_table_54_to_88[2048], &quant_table_54_to_96[2048], &quant_table_54_to_104[2048], + &quant_table_54_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_56_to_58[2048], + &quant_table_56_to_60[2048], &quant_table_56_to_62[2048], &quant_table_56_to_64[2048], &quant_table_56_to_72[2048], + &quant_table_56_to_80[2048], &quant_table_56_to_88[2048], &quant_table_56_to_96[2048], &quant_table_56_to_104[2048], + &quant_table_56_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_58_to_60[2048], &quant_table_58_to_62[2048], &quant_table_58_to_64[2048], &quant_table_58_to_72[2048], + &quant_table_58_to_80[2048], &quant_table_58_to_88[2048], &quant_table_58_to_96[2048], &quant_table_58_to_104[2048], + &quant_table_58_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_60_to_62[2048], &quant_table_60_to_64[2048], &quant_table_60_to_72[2048], + &quant_table_60_to_80[2048], &quant_table_60_to_88[2048], &quant_table_60_to_96[2048], &quant_table_60_to_104[2048], + &quant_table_60_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_62_to_64[2048], &quant_table_62_to_72[2048], + &quant_table_62_to_80[2048], &quant_table_62_to_88[2048], &quant_table_62_to_96[2048], &quant_table_62_to_104[2048], + &quant_table_62_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_64_to_72[2048], + &quant_table_64_to_80[2048], &quant_table_64_to_88[2048], &quant_table_64_to_96[2048], &quant_table_64_to_104[2048], + &quant_table_64_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_72_to_80[2048], &quant_table_72_to_88[2048], &quant_table_72_to_96[2048], &quant_table_72_to_104[2048], + &quant_table_72_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_80_to_88[2048], &quant_table_80_to_96[2048], &quant_table_80_to_104[2048], + &quant_table_80_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_88_to_96[2048], &quant_table_88_to_104[2048], + &quant_table_88_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_96_to_104[2048], + &quant_table_96_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, &quant_table_104_to_112[2048]}, +{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +}; + diff --git a/k9vamps/tcmemcpy.cpp b/k9vamps/tcmemcpy.cpp new file mode 100644 index 0000000..ceb7c69 --- /dev/null +++ b/k9vamps/tcmemcpy.cpp @@ -0,0 +1,483 @@ +/* + * tcmemcpy.c - optimized memcpy() routines for transcode + * Written by Andrew Church <[email protected]> + */ + +#include <string.h> +#include <stdio.h> +#include "ac.h" + +/*************************************************************************/ + +#if defined(ARCH_X86) + +/* MMX-optimized routine, intended for PMMX/PII processors. + * Nonstandard instructions used: + * (CPUID.MMX) MOVQ + */ + +void *ac_memcpy_mmx(void *dest, const void *src, size_t bytes) +{ + asm("\ +PENTIUM_LINE_SIZE = 32 # PMMX/PII cache line size \n\ +PENTIUM_CACHE_SIZE = 8192 # PMMX/PII total cache size \n\ +# Use only half because writes may touch the cache too (PII) \n\ +PENTIUM_CACHE_BLOCK = (PENTIUM_CACHE_SIZE/2 - PENTIUM_LINE_SIZE) \n\ + \n\ + push %%ebx # Save PIC register \n\ + push %%edi # Save destination for return value \n\ + cld # MOVS* should ascend \n\ + \n\ + mov $64, %%ebx # Constant \n\ + \n\ + cmp %%ebx, %%ecx \n\ + jb mmx.memcpy_last # Just use movs if <64 bytes \n\ + \n\ + # First align destination address to a multiple of 8 bytes \n\ + mov $8, %%eax # EAX <- (8-dest) & 7 \n\ + sub %%edi, %%eax \n\ + and $0b111, %%eax # ... which is the number of bytes to copy\n\ + lea 0f, %%edx # Use a computed jump--faster than a loop\n\ + sub %%eax, %%edx \n\ + jmp *%%edx # Execute 0-7 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ +0: sub %%eax, %%ecx # Update count \n\ + \n\ + # Now copy data in blocks \n\ +0: mov %%ecx, %%edx # EDX <- ECX >> 6 (cache lines to copy) \n\ + shr $6, %%edx \n\ + jz mmx.memcpy_last # <64 bytes left? Skip to end \n\ + cmp $PENTIUM_CACHE_BLOCK/64, %%edx \n\ + jb 1f # Limit size of block \n\ + mov $PENTIUM_CACHE_BLOCK/64, %%edx \n\ +1: mov %%edx, %%eax # EAX <- EDX << 6 (bytes to copy) \n\ + shl $6, %%eax \n\ + sub %%eax, %%ecx # Update remaining count \n\ + add %%eax, %%esi # Point to end of region to be block-copied\n\ +2: test %%eax, -32(%%esi) # Touch each cache line in reverse order\n\ + test %%eax, -64(%%esi) \n\ + sub %%ebx, %%esi # Update pointer \n\ + sub %%ebx, %%eax # And loop \n\ + jnz 2b \n\ + # Note that ESI now points to the beginning of the block \n\ +3: movq (%%esi), %%mm0 # Do the actual copy, 64 bytes at a time\n\ + movq 8(%%esi), %%mm1 \n\ + movq 16(%%esi), %%mm2 \n\ + movq 24(%%esi), %%mm3 \n\ + movq 32(%%esi), %%mm4 \n\ + movq 40(%%esi), %%mm5 \n\ + movq 48(%%esi), %%mm6 \n\ + movq 56(%%esi), %%mm7 \n\ + movq %%mm0, (%%edi) \n\ + movq %%mm1, 8(%%edi) \n\ + movq %%mm2, 16(%%edi) \n\ + movq %%mm3, 24(%%edi) \n\ + movq %%mm4, 32(%%edi) \n\ + movq %%mm5, 40(%%edi) \n\ + movq %%mm6, 48(%%edi) \n\ + movq %%mm7, 56(%%edi) \n\ + add %%ebx, %%esi # Update pointers \n\ + add %%ebx, %%edi \n\ + dec %%edx # And loop \n\ + jnz 3b \n\ + jmp 0b \n\ + \n\ +mmx.memcpy_last: \n\ + # Copy last <64 bytes, using the computed jump trick \n\ + mov %%ecx, %%eax # EAX <- ECX>>2 \n\ + shr $2, %%eax \n\ + lea 0f, %%edx \n\ + sub %%eax, %%edx \n\ + jmp *%%edx # Execute 0-15 MOVSD's \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ +0: and $0b11, %%ecx # ECX <- ECX & 3 \n\ + lea 0f, %%edx \n\ + sub %%ecx, %%edx \n\ + jmp *%%edx # Execute 0-3 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ +0: \n\ + # All done! \n\ + emms # Clean up MMX state \n\ + pop %%edi # Restore destination (return value) \n\ + pop %%ebx # Restore PIC register \n\ + " : /* no outputs */ + : "D" (dest), "S" (src), "c" (bytes) + : "%eax", "%edx" + ); + return dest; +} + +#endif /* ARCH_X86 */ + +/*************************************************************************/ + +#if defined(ARCH_X86) + +/* SSE-optimized routine. Backported from AMD64 routine below. + * Nonstandard instructions used: + * (CPUID.CMOVE) CMOVA + * (CPUID.MMX) MOVQ + * (CPUID.SSE) MOVNTQ + */ + +void *ac_memcpy_sse(void *dest, const void *src, size_t bytes) +{ + asm("\ + push %%ebx # Save PIC register \n\ + push %%edi # Save destination for return value \n\ + cld # MOVS* should ascend \n\ + \n\ + cmp $64, %%ecx # Skip block copy for small blocks \n\ + jb sse.memcpy_last \n\ + \n\ + mov $128, %%ebx # Constant used later \n\ + \n\ + # First align destination address to a multiple of 8 bytes \n\ + mov $8, %%eax # EAX <- (8-dest) & 7 \n\ + sub %%edi, %%eax \n\ + and $0b111, %%eax # ... which is the number of bytes to copy\n\ + lea 0f, %%edx # Use a computed jump--faster than a loop\n\ + sub %%eax, %%edx \n\ + jmp *%%edx # Execute 0-7 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ +0: sub %%eax, %%ecx # Update count \n\ + \n\ + cmp $0x10040, %%ecx # Is this a large block? (0x10040 is an \n\ + # arbitrary value where prefetching and \n\ + # write combining seem to start becoming\n\ + # faster) \n\ + jae sse.memcpy_bp # Yup, use prefetch copy \n\ + \n\ +sse.memcpy_small: # Small block copy routine--no prefetch \n" +#if 0 +" mov %%ecx, %%edx # EDX <- bytes to copy / 8 \n\ + shr $3, %%edx \n\ + mov %%edx, %%eax # Leave remainder in ECX for later \n\ + shl $3, %%eax \n\ + sub %%eax, %%ecx \n\ + .align 16 \n\ +0: movq (%%esi), %%mm0 # Copy 8 bytes of data \n\ + movq %%mm0, (%%edi) \n\ + add $8, %%esi # Update pointers \n\ + add $8, %%edi \n\ + dec %%edx # And loop \n\ + jg 0b \n\ + jmp sse.memcpy_last # Copy any remaining bytes \n\ + \n\ + nop # Align loops below \n" +#else +" # It appears that a simple rep movs is faster than cleverness \n\ + # with movq... \n\ + mov %%ecx, %%edx # EDX <- ECX & 3 \n\ + and $0b11, %%edx \n\ + shr $2, %%ecx # ECX <- ECX >> 2 \n\ + rep movsl # Copy away! \n\ + mov %%edx, %%ecx # Take care of last 0-3 bytes \n\ + rep movsb \n\ + jmp sse.memcpy_end # And exit \n\ + \n\ + .align 16 \n\ + nop \n\ + nop \n" +#endif +"sse.memcpy_bp: # Block prefetch copy routine \n\ +0: mov %%ecx, %%edx # EDX: temp counter \n\ + shr $6, %%edx # Divide by cache line size (64 bytes) \n\ + cmp %%ebx, %%edx # ... and cap at 128 (8192 bytes) \n\ + cmova %%ebx, %%edx \n\ + shl $3, %%edx # EDX <- cache lines to copy * 8 \n\ + mov %%edx, %%eax # EAX <- cache lines to preload * 8 \n\ + # (also used as memory offset) \n\ +1: test %%eax, -64(%%esi,%%eax,8) # Preload cache lines in pairs \n\ + test %%eax, -128(%%esi,%%eax,8) # (going backwards) \n\ + # (note that test %%eax,... seems to be faster than prefetchnta \n\ + # on x86) \n\ + sub $16, %%eax # And loop \n\ + jg 1b \n\ + \n\ + # Then copy--forward, which seems to be faster than reverse for \n\ + # certain alignments \n\ + xor %%eax, %%eax \n\ +2: movq (%%esi,%%eax,8), %%mm0 # Copy 8 bytes and loop \n\ + movntq %%mm0, (%%edi,%%eax,8) \n\ + inc %%eax \n\ + cmp %%edx, %%eax \n\ + jb 2b \n\ + \n\ + # Finally, update pointers and count, and loop \n\ + shl $3, %%edx # EDX <- bytes copied \n\ + add %%edx, %%esi \n\ + add %%edx, %%edi \n\ + sub %%edx, %%ecx \n\ + cmp $64, %%ecx # At least one cache line left? \n\ + jae 0b # Yup, loop \n\ + \n\ +sse.memcpy_last: \n\ + # Copy last <64 bytes, using the computed jump trick \n\ + mov %%ecx, %%eax # EAX <- ECX>>2 \n\ + shr $2, %%eax \n\ + lea 0f, %%edx \n\ + sub %%eax, %%edx \n\ + jmp *%%edx # Execute 0-15 MOVSD's \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ + movsd \n\ +0: and $0b11, %%ecx # ECX <- ECX & 3 \n\ + lea sse.memcpy_end, %%edx \n\ + sub %%ecx, %%edx \n\ + jmp *%%edx # Execute 0-3 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ + \n\ +sse.memcpy_end: \n\ + # All done! \n\ + emms # Clean up after MMX instructions \n\ + sfence # Flush the write buffer \n\ + pop %%edi # Restore destination (return value) \n\ + pop %%ebx # Restore PIC register \n\ + " : /* no outputs */ + : "D" (dest), "S" (src), "c" (bytes) + : "%eax", "%edx" + ); + return dest; +} + +#endif /* ARCH_X86 */ + +/*************************************************************************/ + +#if defined(ARCH_X86_64) + +/* AMD64-optimized routine, using SSE2. Derived from AMD64 optimization + * guide section 5.13: Appropriate Memory Copying Routines. + * Nonstandard instructions used: + * (CPUID.CMOVE) CMOVA + * (CPUID.SSE2) MOVDQA, MOVDQU, MOVNTDQ + * + * Note that this routine will also run more or less as-is (modulo register + * names and label(%%rip) references) on x86 CPUs, but tests have shown the + * SSE1 version above to be faster. + */ + +/* The block copying code--macroized because we use two versions of it + * depending on whether the source is 16-byte-aligned or not. Pass either + * movdqa or movdqu (unquoted) for the parameter. */ +#define AMD64_BLOCK_MEMCPY(movdq) \ +" # First prefetch (note that if we end on an odd number of cache \n\ + # lines, we skip prefetching the last one--faster that way than \n\ + # prefetching line by line or treating it as a special case) \n\ +0: mov %%ecx, %%edx # EDX: temp counter (always <32 bits) \n\ + shr $6, %%edx # Divide by cache line size (64 bytes) \n\ + cmp %%ebx, %%edx # ... and cap at 128 (8192 bytes) \n\ + cmova %%ebx, %%edx \n\ + shl $3, %%edx # EDX <- cache lines to copy * 8 \n\ + mov %%edx, %%eax # EAX <- cache lines to preload * 8 \n\ + # (also used as memory offset) \n\ +1: prefetchnta -64(%%rsi,%%rax,8) # Preload cache lines in pairs \n\ + prefetchnta -128(%%rsi,%%rax,8) # (going backwards) \n\ + sub $16, %%eax # And loop \n\ + jg 1b \n\ + \n\ + # Then copy--forward, which seems to be faster than reverse for \n\ + # certain alignments \n\ + xor %%eax, %%eax \n\ +2: " #movdq " (%%rsi,%%rax,8), %%xmm0 # Copy 16 bytes and loop \n\ + movntdq %%xmm0, (%%rdi,%%rax,8) \n\ + add $2, %%eax \n\ + cmp %%edx, %%eax \n\ + jb 2b \n\ + \n\ + # Finally, update pointers and count, and loop \n\ + shl $3, %%edx # EDX <- bytes copied \n\ + add %%rdx, %%rsi \n\ + add %%rdx, %%rdi \n\ + sub %%rdx, %%rcx \n\ + cmp $64, %%rcx # At least one cache line left? \n\ + jae 0b # Yup, loop \n" + +void *ac_memcpy_amd64(void *dest, const void *src, size_t bytes) +{ + asm("\ + push %%rdi # Save destination for return value \n\ + cld # MOVS* should ascend \n\ + \n\ + cmp $64, %%rcx # Skip block copy for small blocks \n\ + jb amd64.memcpy_last \n\ + \n\ + mov $128, %%ebx # Constant used later \n\ + \n\ + # First align destination address to a multiple of 16 bytes \n\ + mov $8, %%eax # EAX <- (8-dest) & 7 \n\ + sub %%edi, %%eax # (we don't care about the top 32 bits) \n\ + and $0b111, %%eax # ... which is the number of bytes to copy\n\ + lea 0f(%%rip), %%rdx # Use a computed jump--faster than a loop\n\ + sub %%rax, %%rdx \n\ + jmp *%%rdx # Execute 0-7 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ +0: sub %%rax, %%rcx # Update count \n\ + test $0b1000, %%edi # Is destination not 16-byte aligned? \n\ + je 1f \n\ + movsq # Then move 8 bytes to align it \n\ + sub $8, %%rcx \n\ + \n\ +1: cmp $0x38000, %%rcx # Is this a large block? (0x38000 is an \n\ + # arbitrary value where prefetching and \n\ + # write combining seem to start becoming\n\ + # faster) \n\ + jb amd64.memcpy_small # Nope, use small copy (no prefetch/WC) \n\ + test $0b1111, %%esi # Is source also 16-byte aligned? \n\ + # (use ESI to save a REX prefix byte) \n\ + jnz amd64.memcpy_normal_bp # Nope, use slow copy \n\ + jmp amd64.memcpy_fast_bp # Yup, use fast copy \n\ + \n\ +amd64.memcpy_small: # Small block copy routine--no prefetch \n\ + mov %%ecx, %%edx # EDX <- bytes to copy / 16 \n\ + shr $4, %%edx # (count known to fit in 32 bits) \n\ + mov %%edx, %%eax # Leave remainder in ECX for later \n\ + shl $4, %%eax \n\ + sub %%eax, %%ecx \n\ + .align 16 \n\ +0: movdqu (%%rsi), %%xmm0 # Copy 16 bytes of data \n\ + movdqa %%xmm0, (%%rdi) \n\ + add $16, %%rsi # Update pointers \n\ + add $16, %%rdi \n\ + dec %%edx # And loop \n\ + jnz 0b \n\ + jmp amd64.memcpy_last # Copy any remaining bytes \n\ + \n\ + .align 16 \n\ + nop \n\ + nop \n\ +amd64.memcpy_fast_bp: # Fast block prefetch loop \n" +AMD64_BLOCK_MEMCPY(movdqa) +" jmp amd64.memcpy_last # Copy any remaining bytes \n\ + \n\ + .align 16 \n\ + nop \n\ + nop \n\ +amd64.memcpy_normal_bp: # Normal (unaligned) block prefetch loop\n" +AMD64_BLOCK_MEMCPY(movdqu) +" \n\ +amd64.memcpy_last: \n\ + # Copy last <64 bytes, using the computed jump trick \n\ + mov %%ecx, %%eax # EAX <- ECX>>3 \n\ + shr $3, %%eax \n\ + lea 0f(%%rip), %%rdx \n\ + add %%eax, %%eax # Watch out, MOVSQ is 2 bytes! \n\ + sub %%rax, %%rdx \n\ + jmp *%%rdx # Execute 0-7 MOVSQ's \n\ + movsq \n\ + movsq \n\ + movsq \n\ + movsq \n\ + movsq \n\ + movsq \n\ + movsq \n\ +0: and $0b111, %%ecx # ECX <- ECX & 7 \n\ + lea 0f(%%rip), %%rdx \n\ + sub %%rcx, %%rdx \n\ + jmp *%%rdx # Execute 0-7 MOVSB's \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ + movsb \n\ +0: \n\ + # All done! \n\ + emms # Clean up after MMX instructions \n\ + sfence # Flush the write buffer \n\ + pop %%rdi # Restore destination (return value) \n\ + " : /* no outputs */ + : "D" (dest), "S" (src), "c" (bytes) + : "%rax", "%rbx", "%rdx" + ); + return dest; +} + +#endif /* ARCH_X86_64 */ + +/*************************************************************************/ + +void * (*tc_memcpy)(void *, const void *, size_t) = memcpy; + +void tc_memcpy_init(int verbose, int mmflags) +{ + const char * method = "libc"; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + int accel = mmflags == -1 ? ac_mmflag() : mmflags; +#endif + +#if defined(ARCH_X86) + if((accel & MM_CMOVE) && (accel & MM_SSE)) + { + method = "sse"; + tc_memcpy = ac_memcpy_sse; + } + else if(accel & MM_MMX) + { + method = "mmx"; + tc_memcpy = ac_memcpy_mmx; + } +#endif + +#if defined(ARCH_X86_64) + if((accel & MM_CMOVE) && (accel & MM_SSE2)) + { + method = "amd64"; + tc_memcpy = ac_memcpy_amd64; + } +#endif + + if(verbose) + fprintf(stderr, "tc_memcpy: using %s for memcpy\n", method); +} |