Browse Source

archive

master
Yak Macker 1 year ago
commit
55620298bb
  1. 19
      comic_compressor/Makefile
  2. 10
      comic_compressor/README.md
  3. 534
      comic_compressor/include/zconf.h
  4. 1912
      comic_compressor/include/zlib.h
  5. BIN
      comic_compressor/lib/libz.a
  6. 3
      comic_compressor/license.txt
  7. 143
      comic_compressor/src/decoder.c
  8. 291
      comic_compressor/src/encoder.c
  9. 11
      comic_compressor/strip_last_bit.c

19
comic_compressor/Makefile

@ -0,0 +1,19 @@
CFLAGS = -Ofast -march=native
CFLAGS_TEST = -O0 -ggdb
all: encoder decoder
test: encoder_test decoder_test
encoder:
clang src/encoder.c -o encoder $(CFLAGS) -I./include -L. lib/libz.a -Wall
decoder:
clang src/decoder.c -o decoder $(CFLAGS) -I./include -L. lib/libz.a -Wall
encoder_test:
clang src/encoder.c -o encoder $(CFLAGS_TEST) -I./include -L. lib/libz.a -Wall
decoder_test:
clang src/decoder.c -o decoder $(CFLAGS_TEST) -I./include -L. lib/libz.a -Wall
clean:
rm encoder decoder

10
comic_compressor/README.md

@ -0,0 +1,10 @@
# comiccompressor
a compression algorithm designed to be efficient for simple images with many solid colors and low entropy (like comics)
The compression strategy is pretty much the same as PNG, with some key differences:
- red, green, and blue segments are processed independently
- the last bit of each byte is stripped out and used for storing extra data. If that bit is set to 1, the next byte lists the size of a triangular region which has the same color. The resulting image is practically indistinguishable from the original in most situations.
The program in this repository does only slightly worse than pngcrush on average, and performs significantly better on some inputs. Inputs and outputs are raw RGB streams.
To build, first make a static build of zlib, and then copy zlib.a into /lib and type 'make'.

534
comic_compressor/include/zconf.h

@ -0,0 +1,534 @@
/* zconf.h -- configuration of the zlib compression library
* Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#ifndef ZCONF_H
#define ZCONF_H
/*
* If you *really* need a unique prefix for all types and library functions,
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
* Even better than compiling with -DZ_PREFIX would be to use configure to set
* this permanently in zconf.h using "./configure --zprefix".
*/
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
# define Z_PREFIX_SET
/* all linked symbols and init macros */
# define _dist_code z__dist_code
# define _length_code z__length_code
# define _tr_align z__tr_align
# define _tr_flush_bits z__tr_flush_bits
# define _tr_flush_block z__tr_flush_block
# define _tr_init z__tr_init
# define _tr_stored_block z__tr_stored_block
# define _tr_tally z__tr_tally
# define adler32 z_adler32
# define adler32_combine z_adler32_combine
# define adler32_combine64 z_adler32_combine64
# define adler32_z z_adler32_z
# ifndef Z_SOLO
# define compress z_compress
# define compress2 z_compress2
# define compressBound z_compressBound
# endif
# define crc32 z_crc32
# define crc32_combine z_crc32_combine
# define crc32_combine64 z_crc32_combine64
# define crc32_z z_crc32_z
# define deflate z_deflate
# define deflateBound z_deflateBound
# define deflateCopy z_deflateCopy
# define deflateEnd z_deflateEnd
# define deflateGetDictionary z_deflateGetDictionary
# define deflateInit z_deflateInit
# define deflateInit2 z_deflateInit2
# define deflateInit2_ z_deflateInit2_
# define deflateInit_ z_deflateInit_
# define deflateParams z_deflateParams
# define deflatePending z_deflatePending
# define deflatePrime z_deflatePrime
# define deflateReset z_deflateReset
# define deflateResetKeep z_deflateResetKeep
# define deflateSetDictionary z_deflateSetDictionary
# define deflateSetHeader z_deflateSetHeader
# define deflateTune z_deflateTune
# define deflate_copyright z_deflate_copyright
# define get_crc_table z_get_crc_table
# ifndef Z_SOLO
# define gz_error z_gz_error
# define gz_intmax z_gz_intmax
# define gz_strwinerror z_gz_strwinerror
# define gzbuffer z_gzbuffer
# define gzclearerr z_gzclearerr
# define gzclose z_gzclose
# define gzclose_r z_gzclose_r
# define gzclose_w z_gzclose_w
# define gzdirect z_gzdirect
# define gzdopen z_gzdopen
# define gzeof z_gzeof
# define gzerror z_gzerror
# define gzflush z_gzflush
# define gzfread z_gzfread
# define gzfwrite z_gzfwrite
# define gzgetc z_gzgetc
# define gzgetc_ z_gzgetc_
# define gzgets z_gzgets
# define gzoffset z_gzoffset
# define gzoffset64 z_gzoffset64
# define gzopen z_gzopen
# define gzopen64 z_gzopen64
# ifdef _WIN32
# define gzopen_w z_gzopen_w
# endif
# define gzprintf z_gzprintf
# define gzputc z_gzputc
# define gzputs z_gzputs
# define gzread z_gzread
# define gzrewind z_gzrewind
# define gzseek z_gzseek
# define gzseek64 z_gzseek64
# define gzsetparams z_gzsetparams
# define gztell z_gztell
# define gztell64 z_gztell64
# define gzungetc z_gzungetc
# define gzvprintf z_gzvprintf
# define gzwrite z_gzwrite
# endif
# define inflate z_inflate
# define inflateBack z_inflateBack
# define inflateBackEnd z_inflateBackEnd
# define inflateBackInit z_inflateBackInit
# define inflateBackInit_ z_inflateBackInit_
# define inflateCodesUsed z_inflateCodesUsed
# define inflateCopy z_inflateCopy
# define inflateEnd z_inflateEnd
# define inflateGetDictionary z_inflateGetDictionary
# define inflateGetHeader z_inflateGetHeader
# define inflateInit z_inflateInit
# define inflateInit2 z_inflateInit2
# define inflateInit2_ z_inflateInit2_
# define inflateInit_ z_inflateInit_
# define inflateMark z_inflateMark
# define inflatePrime z_inflatePrime
# define inflateReset z_inflateReset
# define inflateReset2 z_inflateReset2
# define inflateResetKeep z_inflateResetKeep
# define inflateSetDictionary z_inflateSetDictionary
# define inflateSync z_inflateSync
# define inflateSyncPoint z_inflateSyncPoint
# define inflateUndermine z_inflateUndermine
# define inflateValidate z_inflateValidate
# define inflate_copyright z_inflate_copyright
# define inflate_fast z_inflate_fast
# define inflate_table z_inflate_table
# ifndef Z_SOLO
# define uncompress z_uncompress
# define uncompress2 z_uncompress2
# endif
# define zError z_zError
# ifndef Z_SOLO
# define zcalloc z_zcalloc
# define zcfree z_zcfree
# endif
# define zlibCompileFlags z_zlibCompileFlags
# define zlibVersion z_zlibVersion
/* all zlib typedefs in zlib.h and zconf.h */
# define Byte z_Byte
# define Bytef z_Bytef
# define alloc_func z_alloc_func
# define charf z_charf
# define free_func z_free_func
# ifndef Z_SOLO
# define gzFile z_gzFile
# endif
# define gz_header z_gz_header
# define gz_headerp z_gz_headerp
# define in_func z_in_func
# define intf z_intf
# define out_func z_out_func
# define uInt z_uInt
# define uIntf z_uIntf
# define uLong z_uLong
# define uLongf z_uLongf
# define voidp z_voidp
# define voidpc z_voidpc
# define voidpf z_voidpf
/* all zlib structs in zlib.h and zconf.h */
# define gz_header_s z_gz_header_s
# define internal_state z_internal_state
#endif
#if defined(__MSDOS__) && !defined(MSDOS)
# define MSDOS
#endif
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
# define OS2
#endif
#if defined(_WINDOWS) && !defined(WINDOWS)
# define WINDOWS
#endif
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
# ifndef WIN32
# define WIN32
# endif
#endif
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
# ifndef SYS16BIT
# define SYS16BIT
# endif
# endif
#endif
/*
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more
* than 64k bytes at a time (needed on systems with 16-bit int).
*/
#ifdef SYS16BIT
# define MAXSEG_64K
#endif
#ifdef MSDOS
# define UNALIGNED_OK
#endif
#ifdef __STDC_VERSION__
# ifndef STDC
# define STDC
# endif
# if __STDC_VERSION__ >= 199901L
# ifndef STDC99
# define STDC99
# endif
# endif
#endif
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
# define STDC
#endif
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
# define STDC
#endif
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
# define STDC
#endif
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
# define STDC
#endif
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
# define STDC
#endif
#ifndef STDC
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
# define const /* note: need a more gentle solution here */
# endif
#endif
#if defined(ZLIB_CONST) && !defined(z_const)
# define z_const const
#else
# define z_const
#endif
#ifdef Z_SOLO
typedef unsigned long z_size_t;
#else
# define z_longlong long long
# if defined(NO_SIZE_T)
typedef unsigned NO_SIZE_T z_size_t;
# elif defined(STDC)
# include <stddef.h>
typedef size_t z_size_t;
# else
typedef unsigned long z_size_t;
# endif
# undef z_longlong
#endif
/* Maximum value for memLevel in deflateInit2 */
#ifndef MAX_MEM_LEVEL
# ifdef MAXSEG_64K
# define MAX_MEM_LEVEL 8
# else
# define MAX_MEM_LEVEL 9
# endif
#endif
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
* created by gzip. (Files created by minigzip can still be extracted by
* gzip.)
*/
#ifndef MAX_WBITS
# define MAX_WBITS 15 /* 32K LZ77 window */
#endif
/* The memory requirements for deflate are (in bytes):
(1 << (windowBits+2)) + (1 << (memLevel+9))
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
plus a few kilobytes for small objects. For example, if you want to reduce
the default memory requirements from 256K to 128K, compile with
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
Of course this will generally degrade compression (there's no free lunch).
The memory requirements for inflate are (in bytes) 1 << windowBits
that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
for small objects.
*/
/* Type declarations */
#ifndef OF /* function prototypes */
# ifdef STDC
# define OF(args) args
# else
# define OF(args) ()
# endif
#endif
#ifndef Z_ARG /* function prototypes for stdarg */
# if defined(STDC) || defined(Z_HAVE_STDARG_H)
# define Z_ARG(args) args
# else
# define Z_ARG(args) ()
# endif
#endif
/* The following definitions for FAR are needed only for MSDOS mixed
* model programming (small or medium model with some far allocations).
* This was tested only with MSC; for other MSDOS compilers you may have
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
* just define FAR to be empty.
*/
#ifdef SYS16BIT
# if defined(M_I86SM) || defined(M_I86MM)
/* MSC small or medium model */
# define SMALL_MEDIUM
# ifdef _MSC_VER
# define FAR _far
# else
# define FAR far
# endif
# endif
# if (defined(__SMALL__) || defined(__MEDIUM__))
/* Turbo C small or medium model */
# define SMALL_MEDIUM
# ifdef __BORLANDC__
# define FAR _far
# else
# define FAR far
# endif
# endif
#endif
#if defined(WINDOWS) || defined(WIN32)
/* If building or using zlib as a DLL, define ZLIB_DLL.
* This is not mandatory, but it offers a little performance increase.
*/
# ifdef ZLIB_DLL
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
# ifdef ZLIB_INTERNAL
# define ZEXTERN extern __declspec(dllexport)
# else
# define ZEXTERN extern __declspec(dllimport)
# endif
# endif
# endif /* ZLIB_DLL */
/* If building or using zlib with the WINAPI/WINAPIV calling convention,
* define ZLIB_WINAPI.
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
*/
# ifdef ZLIB_WINAPI
# ifdef FAR
# undef FAR
# endif
# include <windows.h>
/* No need for _export, use ZLIB.DEF instead. */
/* For complete Windows compatibility, use WINAPI, not __stdcall. */
# define ZEXPORT WINAPI
# ifdef WIN32
# define ZEXPORTVA WINAPIV
# else
# define ZEXPORTVA FAR CDECL
# endif
# endif
#endif
#if defined (__BEOS__)
# ifdef ZLIB_DLL
# ifdef ZLIB_INTERNAL
# define ZEXPORT __declspec(dllexport)
# define ZEXPORTVA __declspec(dllexport)
# else
# define ZEXPORT __declspec(dllimport)
# define ZEXPORTVA __declspec(dllimport)
# endif
# endif
#endif
#ifndef ZEXTERN
# define ZEXTERN extern
#endif
#ifndef ZEXPORT
# define ZEXPORT
#endif
#ifndef ZEXPORTVA
# define ZEXPORTVA
#endif
#ifndef FAR
# define FAR
#endif
#if !defined(__MACTYPES__)
typedef unsigned char Byte; /* 8 bits */
#endif
typedef unsigned int uInt; /* 16 bits or more */
typedef unsigned long uLong; /* 32 bits or more */
#ifdef SMALL_MEDIUM
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
# define Bytef Byte FAR
#else
typedef Byte FAR Bytef;
#endif
typedef char FAR charf;
typedef int FAR intf;
typedef uInt FAR uIntf;
typedef uLong FAR uLongf;
#ifdef STDC
typedef void const *voidpc;
typedef void FAR *voidpf;
typedef void *voidp;
#else
typedef Byte const *voidpc;
typedef Byte FAR *voidpf;
typedef Byte *voidp;
#endif
#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
# include <limits.h>
# if (UINT_MAX == 0xffffffffUL)
# define Z_U4 unsigned
# elif (ULONG_MAX == 0xffffffffUL)
# define Z_U4 unsigned long
# elif (USHRT_MAX == 0xffffffffUL)
# define Z_U4 unsigned short
# endif
#endif
#ifdef Z_U4
typedef Z_U4 z_crc_t;
#else
typedef unsigned long z_crc_t;
#endif
#if 1 /* was set to #if 1 by ./configure */
# define Z_HAVE_UNISTD_H
#endif
#if 1 /* was set to #if 1 by ./configure */
# define Z_HAVE_STDARG_H
#endif
#ifdef STDC
# ifndef Z_SOLO
# include <sys/types.h> /* for off_t */
# endif
#endif
#if defined(STDC) || defined(Z_HAVE_STDARG_H)
# ifndef Z_SOLO
# include <stdarg.h> /* for va_list */
# endif
#endif
#ifdef _WIN32
# ifndef Z_SOLO
# include <stddef.h> /* for wchar_t */
# endif
#endif
/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
* "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
* though the former does not conform to the LFS document), but considering
* both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
* equivalently requesting no 64-bit operations
*/
#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
# undef _LARGEFILE64_SOURCE
#endif
#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
# define Z_HAVE_UNISTD_H
#endif
#ifndef Z_SOLO
# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
# ifdef VMS
# include <unixio.h> /* for off_t */
# endif
# ifndef z_off_t
# define z_off_t off_t
# endif
# endif
#endif
#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
# define Z_LFS64
#endif
#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
# define Z_LARGE64
#endif
#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
# define Z_WANT64
#endif
#if !defined(SEEK_SET) && !defined(Z_SOLO)
# define SEEK_SET 0 /* Seek from beginning of file. */
# define SEEK_CUR 1 /* Seek from current position. */
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
#endif
#ifndef z_off_t
# define z_off_t long
#endif
#if !defined(_WIN32) && defined(Z_LARGE64)
# define z_off64_t off64_t
#else
# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
# define z_off64_t __int64
# else
# define z_off64_t z_off_t
# endif
#endif
/* MVS linker does not support external names larger than 8 bytes */
#if defined(__MVS__)
#pragma map(deflateInit_,"DEIN")
#pragma map(deflateInit2_,"DEIN2")
#pragma map(deflateEnd,"DEEND")
#pragma map(deflateBound,"DEBND")
#pragma map(inflateInit_,"ININ")
#pragma map(inflateInit2_,"ININ2")
#pragma map(inflateEnd,"INEND")
#pragma map(inflateSync,"INSY")
#pragma map(inflateSetDictionary,"INSEDI")
#pragma map(compressBound,"CMBND")
#pragma map(inflate_table,"INTABL")
#pragma map(inflate_fast,"INFA")
#pragma map(inflate_copyright,"INCOPY")
#endif
#endif /* ZCONF_H */

1912
comic_compressor/include/zlib.h

File diff suppressed because it is too large Load Diff

BIN
comic_compressor/lib/libz.a

Binary file not shown.

3
comic_compressor/license.txt

@ -0,0 +1,3 @@
This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
Author: Jack McKee
Date: 2020

143
comic_compressor/src/decoder.c

@ -0,0 +1,143 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "zlib.h"
#define min(a,b) ((a < b) ? a : b)
typedef char bool;
struct region {
bool is_active;
int l;
unsigned char color;
int y0;
int x0;
};
unsigned char MASK = 0xfe;
//tries to decode a pixel's value based on the current active regions and up to two bytes in v1, v2
//if only one byte was read, v2 should contain 0.
//return value is the number of bytes to increment input. It is 0 if the current row position is covered by a region or if not enough data was read
//to decode a value
int decode_next(struct region* regions, int rowlen, int x, int y, unsigned char v1, unsigned char v2, unsigned char last_color, unsigned char* output, int* output_incr) {
if (regions[x].is_active) { //if a region covers this location
int ilim = min(y - regions[x].y0 + 1,rowlen-x);
for(int i = 0; i < ilim; i++) {
output[3*i] = regions[x].color;
}
*output_incr = ilim;
if(regions[x].l == (y - regions[x].y0)) {
regions[x].is_active = 0;
}
return 0;
} else { //if a region just expired or there was no active region
if ((v1 & 0x1) && v2) { //if we are reading a nonzero-length region
regions[x].is_active = 1;
regions[x].l = v2 & 0x7f;
regions[x].color = last_color + (v1 & MASK);
*output = last_color + (v1 & MASK);
//fprintf(stderr,"last color was %x, offset is %u, at (%d,%d)\n",last_color,v1 & MASK,x,y);
regions[x].y0 = y;
regions[x].x0 = x;
*output_incr = 1;
return 2;
} else if (v1 & 0x1) { //if we would be reading a nonzero-length region, but v2 is empty. Read another value and call again.
*output_incr = 0;
return 0;
} else { //if we read a zero-length region
regions[x].is_active = 0;
*output = last_color + (v1 & MASK);
//fprintf(stderr,"last color was %x, offset is %u, at (%d,%d)\n",last_color,v1 & MASK,x,y);
*output_incr = 1;
return 1;
}
}
}
//decode a file stream
//the first four bytes of the (deflated) file should be the rowlen, in little-endian
void decode_stream(gzFile in, FILE* out) {
unsigned char v1 = 0;
unsigned char v2 = 0;
int rowlen = 0;
v1 = gzgetc(in);
rowlen = v1;
v1 = gzgetc(in);
rowlen |= ((int)v1 << 8);
v1 = gzgetc(in);
rowlen |= ((int)v1 << 16);
v1 = gzgetc(in);
rowlen |= ((int)v1 << 24);
struct region* regions_red = calloc(rowlen,sizeof(struct region));
struct region* regions_green = calloc(rowlen,sizeof(struct region));
struct region* regions_blue = calloc(rowlen,sizeof(struct region));
unsigned char* this_row = calloc(rowlen*3,1);
int y = 0;
int input_incr = 2;// start with 2, so we get a fresh pair of inputs for the first go
int output_incr = 1;
while(gzeof(in) == 0) {
//decode reds, then greens, then blues, then mix them all up and output them
for(int x = 0; x < rowlen;) {
if(input_incr == 1) {
v1 = v2;
v2 = gzgetc(in);
} else if(input_incr == 2) {
v1 = gzgetc(in);
v2 = gzgetc(in);
} else if(output_incr == 0) { //handle incomplete reads. Probably impossible but whatever
v2 = gzgetc(in);
}
input_incr = decode_next(regions_red,rowlen,x,y,v1,v2,x == 0 ? 0 : this_row[3*x-3],this_row + 3*x,&output_incr);
x += output_incr;
}
for(int x = 0; x < rowlen;) {
if(input_incr == 1) {
v1 = v2;
v2 = gzgetc(in);
} else if(input_incr == 2) {
v1 = gzgetc(in);
v2 = gzgetc(in);
} else if(output_incr == 0) { //handle incomplete reads. Probably impossible but whatever
v2 = gzgetc(in);
}
input_incr = decode_next(regions_green,rowlen,x,y,v1,v2,x == 0 ? 0 : this_row[3*x-2],this_row + 3*x+1,&output_incr);
x += output_incr;
}
for(int x = 0; x < rowlen;) {
if(input_incr == 1) {
v1 = v2;
v2 = gzgetc(in);
} else if(input_incr == 2) {
v1 = gzgetc(in);
v2 = gzgetc(in);
} else if(output_incr == 0) { //handle incomplete reads. Probably impossible but whatever
v2 = gzgetc(in);
}
input_incr = decode_next(regions_blue,rowlen,x,y,v1,v2,x == 0 ? 0 : this_row[3*x-1],this_row + 3*x + 2,&output_incr);
x += output_incr;
}
//output
fwrite(this_row,1,rowlen*3,out);
//fprintf(stderr,"finished row %d\n",y);
y++;
}
free(regions_red);
free(regions_green);
free(regions_blue);
free(this_row);
}
int main(int argc,char** argv) {
gzFile in = gzdopen(fileno(stdin),"r");
FILE* out = stdout;
if (argc == 3) {
gzclose(in);
in = gzopen(argv[1],"r");
out = fopen(argv[2],"w");
}
decode_stream(in,out);
gzclose(in);
fclose(out);
}

291
comic_compressor/src/encoder.c

@ -0,0 +1,291 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "zlib.h"
#define max(a,b) ((a < b) ? b : a)
#define min(a,b) ((a < b) ? a : b)
#define square(a) (a*a)
#define rref(x) (row[x] & MASK)
#define diff(a,b) ((a & 1) == (b & 1) ? (a - b) : ((a & MASK) - (b & MASK)))
typedef char bool;
//a struct that contains information about an active region
struct region {
bool is_active;
int l;
unsigned char* output;
unsigned char color;
};
long avg_area_num = 0;
long avg_area_denom = 0;
long num_bad_regions = 0;
//the bitmask to apply to all values. The last bit must be unset, but it is possible to get better compression (and a more restricted color space) by making a smaller mask.
unsigned char MASK = 0xfe;
//checks that there is no active edge to the left of the given position
bool no_active_edge(int x, unsigned char this_color, struct region* regions) {
for(int xo = 1; xo <= x; xo++) {
if (regions[x-xo].is_active && (regions[x-xo].l == xo - 1) && (regions[x-xo].color == this_color)) {
return 0;
}
}
return 1;
}
//encodes one component of one row.
//this is implemented as a macro so it takes up less space. Use encode_row_red, encode_row_green, etc.
//diagnostics data has been commented out for speed
#define encode_row(OFFSET) for(int x = 0; x < rowlen;) {\
if (regions[x].is_active) { /*if this region is active, we need to see if it continues here or not*/\
int i = x;\
int imax = min(rowlen,x+regions[x].l+2);\
for(;i < imax; i++) {\
if(row[3*i+OFFSET] != regions[x].color) {\
break;\
}\
}\
/*case: the region continues through this row*/ \
if (i == imax) {\
regions[x].l++;\
/*case: the region at the imax-1 position is active, so we need to deactivate it and write its contents to the output buffer*/ \
if(regions[i - 1].is_active && (x < (i-1))) {\
regions[i - 1].is_active = 0;\
*(regions[i - 1].output) = regions[i - 1].l | 0x80;\
/*avg_area_denom += square(regions[i - 1].l + 1)/2;\
avg_area_num += 2;*/\
}\
/*case: the region at the x position is at maximum size, so we deactivate & write it */\
if(regions[x].l == 127) {\
regions[x].is_active = 0;\
*(regions[x].output) = 0xff;\
/*avg_area_denom += square(128)/2;\
avg_area_num += 2; */\
}\
/*increment x to skip over the region we just encoded*/\
x = i;\
/*case: the region is no longer active, so we deactivate it*/\
} else {\
regions[x].is_active = 0;\
*(regions[x].output) = regions[x].l | 0x80;\
/*avg_area_denom += square(regions[x].l + 1)/2;\
avg_area_num += 2;\
if(regions[x].l == 0) {\
num_bad_regions++;\
}*/ \
/*case: it is possible to begin a new region starting at this location
* basically this is true if and only if we are not at the last row and the two pixels below this one match
* all the trinary expressions are to handle the case when we are at the last column */\
if((nextrow != 0) && (nextrow[3*x+OFFSET] == row[3*x+OFFSET]) && (nextrow[x < rowlen - 1 ? 3*x + (3+OFFSET) : 3*x+OFFSET] == row[3*x+OFFSET]) && no_active_edge(x,row[3*x+OFFSET],regions)) {\
regions[x].is_active = 1;\
regions[x].output = output+1;\
*output = (row[3*x+OFFSET] - (x > 0 ? row[3*x-(3-OFFSET)] : 0)) | 0x1;\
*(output + 1) = 0;\
regions[x].l = 0;\
regions[x].color = row[3*x+OFFSET];\
output = output + 2;\
/*case: it is not possible to begin a new region, so we simply encode the offset from the last pixel*/\
} else {\
*output = row[3*x+OFFSET] - (x > 0 ? row[3*x-(3-OFFSET)] : 0);\
/*$////fprintf(stderr,"zero-length region at %d, last color was %x, this color = %x, offset = %d\n",x,(x > 0 ? row[x - 1] : 0),row[x],*output);*/\
output = output + 1;\
/*avg_area_num ++;\
avg_area_denom ++;*/\
}\
/*increment x by just 1*/\
x++;\
}\
/*case: there is no active region here*/\
} else {\
/*case: it is possible to begin a new region here*/\
if ((nextrow != 0) && (nextrow[3*x+OFFSET] == row[3*x+OFFSET]) && (nextrow[x < rowlen - 1 ? 3*x + (3+OFFSET) : 3*x+OFFSET] == row[3*x+OFFSET]) && no_active_edge(x,row[3*x+OFFSET],regions)) {\
regions[x].is_active = 1;\
regions[x].output = output+1;\
*output = (row[3*x+OFFSET] - (x > 0 ? row[3*x-(3-OFFSET)] : 0)) | 0x1;\
*(output + 1) = 0;\
regions[x].l = 0;\
regions[x].color = row[3*x+OFFSET];\
output = output + 2;\
/*case: it is not possible to begin a new region here*/\
} else {\
*output = row[3*x+OFFSET] - (x > 0 ? row[3*x-(3-OFFSET)] : 0);\
output = output + 1;\
/*avg_area_num ++;\
avg_area_denom ++;*/\
}\
x++;\
}\
}\
return output;
unsigned char* encode_row_red(struct region* regions, int rowlen, unsigned char* output, unsigned char* row, unsigned char* nextrow) {
encode_row(0)
}
unsigned char* encode_row_green(struct region* regions, int rowlen, unsigned char* output, unsigned char* row, unsigned char* nextrow) {
encode_row(1)
}
unsigned char* encode_row_blue(struct region* regions, int rowlen, unsigned char* output, unsigned char* row, unsigned char* nextrow) {
encode_row(2)
}
//reads from a file and outputs to a file, calling encode_row repeatedly and feeding the results into a gzip file
//reads and writes only one row at a time, so it can work on streams and can easily be adapted to situations where that is key
void encode_stream(FILE* in, gzFile out, int rowlen) {
struct region* red_regions = calloc(rowlen,sizeof(struct region));
struct region* green_regions = calloc(rowlen,sizeof(struct region));
struct region* blue_regions = calloc(rowlen,sizeof(struct region));
unsigned char* output_buffer = calloc(rowlen*256*6,1); //an adversarial example keeps the output buffer locked for an entire 128 rows, so we need quite a lot of memory to guarantee no segfaults.
unsigned char* output_cursor = output_buffer;
int output_begin = 0;
int output_len = 0;
int output_end = rowlen*256*6;
unsigned char* this_row_buf = calloc(rowlen*3,1);
unsigned char* next_row_buf = calloc(rowlen*3,1);
//fprintf(stderr,"output buffer is size %d\n",rowlen*256*6);
//read the first row into next_row to prime the whole process
fread(next_row_buf,1,3*rowlen,in);
for(int x = 0; x < rowlen*3;x++) {
next_row_buf[x] &= MASK;
}
int y = 0;
int last_read = 0;
while(feof(in) == 0) {
unsigned char* tmp = this_row_buf; //trick: just swap the bufs each time you read a row
this_row_buf = next_row_buf;
next_row_buf = tmp;
last_read = fread(next_row_buf,1,3*rowlen,in);
for(int x = 0; x < rowlen*3;x++) {
next_row_buf[x] &= MASK;
}
if(last_read == 0) {
break;
}
//encode the different components
output_cursor = encode_row_red(red_regions,rowlen,output_cursor,this_row_buf,next_row_buf);
output_cursor = encode_row_green(green_regions,rowlen,output_cursor,this_row_buf,next_row_buf);
output_cursor = encode_row_blue(blue_regions,rowlen,output_cursor,this_row_buf,next_row_buf);
//if we can, input some data.
//fprintf(stderr,"after reading row %d, cursor is %d above buffer\n",y,output_cursor - output_buffer);
if (output_begin < (output_cursor - output_buffer)) {
while(output_begin + output_len < (output_cursor - output_buffer)) {
if((output_buffer[output_begin + output_len] & 0x1) && (output_buffer[output_begin + output_len + 1] & 0x80)) {
output_buffer[output_begin + output_len + 1] &= 0x7f;
output_len += 2;
} else if ((output_buffer[output_begin + output_len] & 0x1) == 0) {
output_len ++;
} else {
break;
}
}
} else {
while(output_begin + output_len < output_end) {
if((output_buffer[output_begin + output_len] & 0x1) && (output_buffer[output_begin + output_len + 1] & 0x80)) {
output_buffer[output_begin + output_len + 1] &= 0x7f;
output_len += 2;
} else if ((output_buffer[output_begin + output_len] & 0x1) == 0) {
output_len ++;
} else {
break;
}
}
}
//fprintf(stderr,"write of size %d\n",output_len);
if(output_len > 0) {
gzwrite(out,output_buffer + output_begin,output_len);
output_begin += output_len;
output_len = 0;
//reset output_begin to 0 if we have just finished reading the region left over after resetting the cursor
if(output_begin == output_end) {
//fprintf(stderr,"resetting output_begin\n");
output_begin = 0;
output_end = rowlen*256*6;
//reset the cursor to 0 so that there is always rowlen*128 space after the cursor
//have to make sure this doesn't happen twice in a row, so we restrict to the case when cursor > output_buffer + output_begin
} else if((output_cursor - output_buffer > output_begin) && (output_begin > rowlen*128*6)) {
//fprintf(stderr,"resetting cursor\n");
output_end = output_cursor - output_buffer;
output_cursor = output_buffer;
}
//fprintf(stderr,"cursor at %d above buffer, output_begin at %d\n",output_cursor - output_buffer, output_begin);
}
y++;
}
//final row
output_cursor = encode_row_red(red_regions,rowlen,output_cursor,this_row_buf,0);
output_cursor = encode_row_green(green_regions,rowlen,output_cursor,this_row_buf,0);
output_cursor = encode_row_blue(blue_regions,rowlen,output_cursor,this_row_buf,0);
//finalize all regions that didn't get finalized
for(int i = 0; i < rowlen; i++) {
if(red_regions[i].is_active) {
*(red_regions[i].output) = red_regions[i].l;
}
if(green_regions[i].is_active) {
*(green_regions[i].output) = green_regions[i].l;
}
if(blue_regions[i].is_active) {
*(blue_regions[i].output) = blue_regions[i].l;
}
}
//now 100% of our output should be writeable
if(output_begin > (output_cursor - output_buffer)) {
//write the extra bit
for(int i = output_begin; i < output_end;) {
if (output_buffer[i] & 0x1) {
output_buffer[i+1] &= 0x7f;
i += 2;
} else {
i++;
}
}
//fprintf(stderr,"write of size %d to reset output_begin\n",output_end - output_begin);
gzwrite(out,output_buffer + output_begin,output_end - output_begin);
output_begin = 0;
}
//clear the 0x80 markers and write the last bit
output_len = output_cursor - (output_buffer + output_begin);
for(int i = output_begin; i < output_len;) {
if (output_buffer[i] & 0x1) {
output_buffer[i+1] &= 0x7f;
i += 2;
} else {
i++;
}
}
//fprintf(stderr,"final write of size %d\n",output_len);
gzwrite(out,output_buffer + output_begin,output_len);
gzclose(out);
free(red_regions);
free(green_regions);
free(blue_regions);
free(output_buffer);
free(this_row_buf);
free(next_row_buf);
}
int main(int argc, char** argv) {
FILE* in = stdin;
gzFile out = gzdopen(fileno(stdout),"w");
int rowlen = 0;
if(argc < 2) {
return 1;
} else if (argc < 4) {
rowlen = atoi(argv[1]);
} else {
rowlen = atoi(argv[1]);
in = fopen(argv[2],"r");
gzclose(out);
out = gzopen(argv[3],"w");
}
gzputc(out,rowlen & 0xff); //output rowlen in little-endian
gzputc(out,(rowlen >> 8) & 0xff);
gzputc(out,(rowlen >> 16) & 0xff);
gzputc(out,rowlen >> 24);
encode_stream(in,out,rowlen);
fclose(in);
//fprintf(stderr,"average bytes per pixel before compression: %f\n",(1.0*avg_area_num)/avg_area_denom);
//fprintf(stderr,"number of bad regions: %d\n",num_bad_regions);
}

11
comic_compressor/strip_last_bit.c

@ -0,0 +1,11 @@
#include <stdio.h>
int main(int argc, char** argv) {
FILE* in = fopen(argv[1],"r");
FILE* out = fopen(argv[2],"w");
while(feof(in) == 0) {
fputc(fgetc(in) & 0xfe,out);
}
fclose(in);
fclose(out);
}
Loading…
Cancel
Save