1 // Copyright 2019 Tero Hänninen. All rights reserved.
2 // SPDX-License-Identifier: BSD-2-Clause
3 //
4 // https://tools.ietf.org/html/rfc2083
5 // https://www.w3.org/TR/2003/REC-PNG-20031110/
6 module imagefmt.png;
7 
8 import etc.c.zlib;
9 import imagefmt;
10 
11 @nogc nothrow package:
12 
13 struct PNGHeader {
14     int     w;
15     int     h;
16     ubyte   bpc;  // bits per component
17     ubyte   colortype;
18     ubyte   compression;
19     ubyte   filter;
20     ubyte   interlace;
21 }
22 
23 enum CTYPE {
24     y    = 0,
25     rgb  = 2,
26     idx  = 3,
27     ya   = 4,
28     rgba = 6,
29 }
30 
31 enum FILTER { none, sub, up, average, paeth }
32 
33 struct PNGDecoder {
34     Reader* rc;
35 
36     int     w;
37     int     h;
38     ubyte   sbpc;
39     ubyte   tbpc;
40     ubyte   schans;
41     ubyte   tchans;
42     bool    indexed;
43     bool    interlaced;
44 
45     ubyte[12] chunkmeta;
46     CRC32   crc;
47     union {
48         ubyte[] buf8;
49         ushort[] buf16;
50     }
51     ubyte[] palette;
52     ubyte[] transparency;
53 
54     // decompression
55     z_stream*   z;              // zlib stream
56     uint        avail_idat;     // available bytes in current idat chunk
57     ubyte[]     idat_window;    // slice of reader's buffer
58 }
59 
60 immutable ubyte[8] SIGNATURE =
61     [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
62 
63 immutable ubyte[8] HEAD_CHUNK_SIG =
64     [0x0, 0x0, 0x0, 0xd, 'I','H','D','R'];
65 
66 bool detect_png(Reader* rc)
67 {
68     ubyte[8] tmp;
69     read_block(rc, tmp[0..$]);
70     reset2start(rc);
71     return !rc.fail && tmp == SIGNATURE;
72 }
73 
74 IFInfo read_png_info(Reader* rc)
75 {
76     PNGHeader head;
77     IFInfo info;
78     info.e = read_png_header(rc, head);
79     info.w = head.w;
80     info.h = head.h;
81     info.c = channels(head.colortype);
82     if (head.colortype == CTYPE.idx && have_tRNS(rc))
83         info.c = 4;
84     else if (info.c == 0 && !info.e)
85         info.e = ERROR.data;
86     return info;
87 }
88 
89 bool have_tRNS(Reader* rc)
90 {
91     ubyte[12] chunkmeta;
92     read_block(rc, chunkmeta[4..$]);  // next chunk's len and type
93 
94     while (!rc.fail) {
95         uint len = load_u32be(chunkmeta[4..8]);
96         if (len > int.max)
97             return false;
98         switch (cast(char[]) chunkmeta[8..12]) {
99             case "tRNS":
100                 return true;
101             case "IDAT":
102             case "IEND":
103                 return false;
104             default:
105                 while (len > 0) {
106                     ubyte[] slice = read_slice(rc, len);
107                     if (!slice.length)
108                         return false;
109                     len -= slice.length;
110                 }
111                 read_block(rc, chunkmeta[0..$]); // crc | len, type
112         }
113     }
114     return false;
115 }
116 
117 ubyte read_png_header(Reader* rc, out PNGHeader head)
118 {
119     ubyte[33] tmp;  // file header, IHDR len+type+data+crc
120     read_block(rc, tmp[0..$]);
121     if (rc.fail) return ERROR.stream;
122 
123     if (tmp[0..8] != SIGNATURE       ||
124         tmp[8..16] != HEAD_CHUNK_SIG ||
125         tmp[29..33] != CRC32.of(tmp[12..29]))
126         return ERROR.data;
127 
128     head.w           = load_u32be(tmp[16..20]);
129     head.h           = load_u32be(tmp[20..24]);
130     head.bpc         = tmp[24];
131     head.colortype   = tmp[25];
132     head.compression = tmp[26];
133     head.filter      = tmp[27];
134     head.interlace   = tmp[28];
135 
136     return 0;
137 }
138 
139 ubyte read_png(Reader* rc, out IFImage image, int reqchans, int reqbpc)
140 {
141     if (cast(uint) reqchans > 4)
142         return ERROR.arg;
143     if (reqbpc != 0 && reqbpc != 8 && reqbpc != 16)
144         return ERROR.unsupp;
145 
146     PNGHeader head;
147     if (ubyte e = read_png_header(rc, head))
148         return e;
149     if (head.w < 1 || head.h < 1 || cast(ulong) head.w * head.h > int.max)
150         return ERROR.dim;
151     if (head.bpc != 8 && head.bpc != 16)
152         return ERROR.unsupp;
153     if (head.colortype != CTYPE.y    &&
154         head.colortype != CTYPE.rgb  &&
155         head.colortype != CTYPE.idx  &&
156         head.colortype != CTYPE.ya   &&
157         head.colortype != CTYPE.rgba)
158         return ERROR.unsupp;
159     if (head.colortype == CTYPE.idx && head.bpc != 8)
160         return ERROR.unsupp;
161     if (head.compression != 0 || head.filter != 0 || head.interlace > 1)
162         return ERROR.unsupp;
163 
164     PNGDecoder dc = {
165         rc         : rc,
166         w          : head.w,
167         h          : head.h,
168         sbpc       : head.bpc,
169         tbpc       : cast(ubyte) (reqbpc ? reqbpc : head.bpc),
170         schans     : channels(head.colortype),  // +1 for indexed if tRNS found later
171         tchans     : cast(ubyte) reqchans,  // adjust later
172         indexed    : head.colortype == CTYPE.idx,
173         interlaced : head.interlace == 1,
174         // init the rest later
175     };
176 
177     ubyte e = read_chunks(&dc);
178     _free(dc.palette.ptr);
179     _free(dc.transparency.ptr);
180     if (e) return e;
181 
182     switch (32 * head.bpc + dc.tbpc) {
183         case 32 *  8 +  8: image.buf8 = dc.buf8; break;
184         case 32 * 16 + 16: image.buf16 = dc.buf16; break;
185         case 32 *  8 + 16: image.buf16 = bpc8to16(dc.buf8); break;
186         case 32 * 16 +  8: image.buf8 = bpc16to8(dc.buf16); break;
187         default: assert(0);
188     }
189     if (!image.buf8.ptr)
190         return ERROR.oom;
191 
192     image.w = dc.w;
193     image.h = dc.h;
194     image.c = cast(ubyte) dc.tchans;
195     image.bpc = cast(ubyte) dc.tbpc;
196     image.cinfile = cast(ubyte) dc.schans;
197     return e;
198 }
199 
200 ubyte read_chunks(PNGDecoder* dc)
201 {
202     enum STAGE {
203         IHDR_done,
204         PLTE_done,
205         IDAT_done,
206         IEND_done,
207     }
208 
209     auto stage = STAGE.IHDR_done;
210 
211     read_block(dc.rc, dc.chunkmeta[4..$]);  // next chunk's len and type
212 
213     while (stage != STAGE.IEND_done && !dc.rc.fail) {
214         uint len = load_u32be(dc.chunkmeta[4..8]);
215         if (len > int.max)
216             return ERROR.data;
217 
218         dc.crc.put(dc.chunkmeta[8..12]);  // type
219         switch (cast(char[]) dc.chunkmeta[8..12]) {
220             case "IDAT":
221                 if (stage != STAGE.IHDR_done &&
222                    (stage != STAGE.PLTE_done || !dc.indexed))
223                    return ERROR.data;
224                 // fixup chans as needed. tRNS only supported for indexed by imagefmt
225                 dc.schans = dc.indexed && dc.transparency.length ? 4 : dc.schans;
226                 dc.tchans = dc.tchans ? dc.tchans : dc.schans;
227                 if (cast(ulong) dc.w * dc.h * dc.tchans > MAXIMUM_IMAGE_SIZE)
228                     return ERROR.bigimg;
229                 ubyte e = read_idat_chunks(dc, len);
230                 if (e) return e;
231                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
232                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
233                     return ERROR.data;
234                 stage = STAGE.IDAT_done;
235                 break;
236             case "PLTE":
237                 if (stage != STAGE.IHDR_done)
238                     return ERROR.data;
239                 const uint entries = len / 3;
240                 if (entries * 3 != len || entries > 256)
241                     return ERROR.data;
242                 ubyte e;
243                 dc.palette = new_buffer(len, e);
244                 if (e) return e;
245                 read_block(dc.rc, dc.palette[0..$]);
246                 dc.crc.put(dc.palette);
247                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
248                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
249                     return ERROR.data;
250                 stage = STAGE.PLTE_done;
251                 break;
252             case "tRNS":
253                 if (! (stage == STAGE.IHDR_done ||
254                       (stage == STAGE.PLTE_done && dc.indexed)) )
255                     return ERROR.data;
256                 if (dc.indexed && len * 3 > dc.palette.length || len > 256)
257                     return ERROR.data; // that is redundant really --^
258                 if (!dc.indexed)
259                     return ERROR.unsupp;
260                 ubyte e;
261                 dc.transparency = new_buffer(256, e); if (e) return e;
262                 read_block(dc.rc, dc.transparency[0..len]);
263                 dc.transparency[len..$] = 255;
264                 read_block(dc.rc, dc.chunkmeta[0..$]);
265                 if (dc.rc.fail) return ERROR.stream;
266                 dc.crc.put(dc.transparency[0..$]);
267                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
268                     return ERROR.data;
269                 break;
270             case "IEND":
271                 if (stage != STAGE.IDAT_done)
272                     return ERROR.data;
273                 static immutable ubyte[4] IEND_CRC = [0xae, 0x42, 0x60, 0x82];
274                 read_block(dc.rc, dc.chunkmeta[0..4]);
275                 if (len != 0 || dc.chunkmeta[0..4] != IEND_CRC)
276                     return ERROR.data;
277                 stage = STAGE.IEND_done;
278                 break;
279             case "IHDR":
280                 return ERROR.data;
281             default:
282                 // unknown chunk, ignore but check crc
283                 while (len > 0) {
284                     ubyte[] slice = read_slice(dc.rc, len);
285                     if (!slice.length)
286                         return ERROR.data;
287                     len -= slice.length;
288                     dc.crc.put(slice[0..$]);
289                 }
290                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
291                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
292                     return ERROR.data;
293         }
294     }
295 
296     return dc.rc.fail ? ERROR.stream : 0;
297 }
298 
299 ubyte read_idat_chunks(PNGDecoder* dc, in uint len)
300 {
301     // initialize zlib stream
302     z_stream z = { zalloc: null, zfree: null, opaque: null };
303     if (inflateInit(&z) != Z_OK)
304         return ERROR.zinit;
305     dc.z = &z;
306     dc.avail_idat = len;
307     ubyte e;
308     switch (dc.sbpc) {
309         case 8: e = read_idat8(dc); break;
310         case 16: e = read_idat16(dc); break;
311         default: e = ERROR.unsupp; break;
312     }
313     inflateEnd(&z);
314     return e;
315 }
316 
317 void swap(ref ubyte[] a, ref ubyte[] b)
318 {
319     ubyte[] swap = b;
320     b = a;
321     a = swap;
322 }
323 
324 //; these guys are only used by the read_idat functions and their helpers
325 private ubyte _png_error = 0;
326 private void sete(ubyte e)     { if (!_png_error) _png_error = e; }
327 private bool gete(out ubyte e) { return _png_error ? (e = _png_error) != 0 : false; }
328 
329 ubyte read_idat8(PNGDecoder* dc)
330 {
331     auto convert = cast(conv8) getconv(dc.schans, dc.tchans, 8);
332 
333     const size_t filterstep = dc.indexed ? 1 : dc.schans;
334     const size_t uclinesz   = dc.w * filterstep + 1; // uncompr, +1 for filter byte
335     const size_t xlinesz    = dc.w * dc.schans * dc.indexed;
336     const size_t redlinesz  = dc.w * dc.tchans * dc.interlaced;
337     const size_t workbufsz  = 2 * uclinesz + xlinesz + redlinesz;
338 
339     ubyte e;
340     ubyte[] cline;      // current line
341     ubyte[] pline;      // previous line
342     ubyte[] xline;      // intermediate buffer/slice for depaletting
343     ubyte[] redline;    // reduced image line
344     ubyte[] result  = new_buffer(dc.w * dc.h * dc.tchans, e);   if (e) return e;
345     ubyte[] workbuf = new_buffer(workbufsz, e);                 if (e) goto fail;
346     cline = workbuf[0 .. uclinesz];
347     pline = workbuf[uclinesz .. 2*uclinesz];
348     xline = dc.indexed ? workbuf[2*uclinesz .. 2*uclinesz + xlinesz] : null;
349     redline = dc.interlaced ? workbuf[$-redlinesz .. $] : null;
350     workbuf[0..$] = 0;
351 
352     sete(0);
353 
354     if (!dc.interlaced) {
355         const size_t tlinelen = dc.w * dc.tchans;
356         size_t ti;
357         if (dc.indexed) {
358             foreach (_; 0 .. dc.h) {
359                 uncompress(dc, cline); // cline[0] is the filter type
360                 recon(cline, pline, filterstep);
361                 depalette(dc.palette, dc.transparency, cline[1..$], xline);
362                 convert(xline, result[ti .. ti + tlinelen]);
363                 ti += tlinelen;
364                 swap(cline, pline);
365             }
366         } else {
367             foreach (_; 0 .. dc.h) {
368                 uncompress(dc, cline); // cline[0] is the filter type
369                 recon(cline, pline, filterstep);
370                 convert(cline[1..$], result[ti .. ti + tlinelen]);
371                 ti += tlinelen;
372                 swap(cline, pline);
373             }
374         }
375     } else {    // Adam7 interlacing
376         const size_t[7] redw = a7_init_redw(dc.w);
377         const size_t[7] redh = a7_init_redh(dc.h);
378 
379         foreach (pass; 0 .. 7) {
380             const A7Catapult catapult = a7catapults[pass];
381             const size_t slinelen = redw[pass] * dc.schans;
382             const size_t tlinelen = redw[pass] * dc.tchans;
383             ubyte[] cln = cline[0 .. redw[pass] * filterstep + 1];
384             ubyte[] pln = pline[0 .. redw[pass] * filterstep + 1];
385             pln[] = 0;  // must be done for defiltering (recon)
386 
387             if (dc.indexed) {
388                 foreach (j; 0 .. redh[pass]) {
389                     uncompress(dc, cln); // cln[0] is the filter type
390                     recon(cln, pln, filterstep);
391                     depalette(dc.palette, dc.transparency, cln[1..$], xline);
392                     convert(xline[0 .. slinelen], redline[0 .. tlinelen]);
393                     sling(redline, result, catapult, redw[pass], j, dc.w, dc.tchans);
394                     swap(cln, pln);
395                 }
396             } else {
397                 foreach (j; 0 .. redh[pass]) {
398                     uncompress(dc, cln); // cln[0] is the filter type
399                     recon(cln, pln, filterstep);
400                     convert(cln[1 .. 1 + slinelen], redline[0 .. tlinelen]);
401                     sling(redline, result, catapult, redw[pass], j, dc.w, dc.tchans);
402                     swap(cln, pln);
403                 }
404             }
405         }
406     }
407 
408     if (gete(e)) goto fail;
409 
410 finish:
411     _free(workbuf.ptr);
412     dc.buf8 = result[0..$];
413     return e;
414 fail:
415     _free(result.ptr);
416     result = null;
417     goto finish;
418 }
419 
420 ubyte read_idat16(PNGDecoder* dc)     // 16-bit is never indexed
421 {
422     auto convert = cast(conv16) getconv(dc.schans, dc.tchans, 16);
423 
424     // these are in bytes
425     const size_t filterstep = dc.schans * 2;
426     const size_t uclinesz   = dc.w * filterstep + 1; // uncompr, +1 for filter byte
427     const size_t xlinesz    = dc.w * dc.schans * 2;
428     const size_t redlinesz  = dc.w * dc.h * dc.tchans * 2 * dc.interlaced;
429     const size_t workbufsz  = 2 * uclinesz + xlinesz + redlinesz;
430 
431     // xline is not quite necessary, it could be avoided if the conversion
432     // functions were changed to do what line16_from_bytes does.
433 
434     ubyte e;
435     ubyte[] cline;      // current line
436     ubyte[] pline;      // previous line
437     ushort[] xline;     // intermediate buffer to catch 16-bit samples
438     ushort[] redline;   // reduced image line
439     ushort[] result = new_buffer16(dc.w * dc.h * dc.tchans, e); if (e) return e;
440     ubyte[] workbuf = new_buffer(workbufsz, e);                 if (e) goto fail;
441     cline = workbuf[0 .. uclinesz];
442     pline = workbuf[uclinesz .. 2*uclinesz];
443     xline = cast(ushort[]) workbuf[2*uclinesz .. 2*uclinesz + xlinesz];
444     redline = dc.interlaced ? cast(ushort[]) workbuf[$-redlinesz .. $] : null;
445     workbuf[0..$] = 0;
446 
447     sete(0);
448 
449     if (!dc.interlaced) {
450         const size_t tlinelen = dc.w * dc.tchans;
451         size_t ti;
452         foreach (_; 0 .. dc.h) {
453             uncompress(dc, cline); // cline[0] is the filter type
454             recon(cline, pline, filterstep);
455             line16_from_bytes(cline[1..$], xline);
456             convert(xline[0..$], result[ti .. ti + tlinelen]);
457             ti += tlinelen;
458             swap(cline, pline);
459         }
460     } else {    // Adam7 interlacing
461         const size_t[7] redw = a7_init_redw(dc.w);
462         const size_t[7] redh = a7_init_redh(dc.h);
463 
464         foreach (pass; 0 .. 7) {
465             const A7Catapult catapult = a7catapults[pass];
466             const size_t slinelen = redw[pass] * dc.schans;
467             const size_t tlinelen = redw[pass] * dc.tchans;
468             ubyte[] cln = cline[0 .. redw[pass] * filterstep + 1];
469             ubyte[] pln = pline[0 .. redw[pass] * filterstep + 1];
470             pln[] = 0;
471 
472             foreach (j; 0 .. redh[pass]) {
473                 uncompress(dc, cln); // cln[0] is the filter type
474                 recon(cln, pln, filterstep);
475                 line16_from_bytes(cln[1 .. $], xline[0 .. slinelen]);
476                 convert(xline[0 .. slinelen], redline[0 .. tlinelen]);
477                 sling16(redline, result, catapult, redw[pass], j, dc.w, dc.tchans);
478                 swap(cln, pln);
479             }
480         }
481     }
482 
483     if (gete(e)) goto fail;
484 
485 finish:
486     _free(workbuf.ptr);
487     dc.buf16 = result[0..$];
488     return e;
489 fail:
490     _free(result.ptr);
491     result = null;
492     goto finish;
493 }
494 
495 void line16_from_bytes(in ubyte[] src, ushort[] tgt)
496 {
497     for (size_t k, t;   k < src.length;   k+=2, t+=1) {
498         tgt[t] = src[k] << 8 | src[k+1];
499     }
500 }
501 
502 void sling(in ubyte[] redline, ubyte[] result, A7Catapult cata, in size_t redw,
503                                         in size_t j, in int dcw, in int tchans)
504 {
505     for (size_t i, redi; i < redw; ++i, redi += tchans) {
506         const size_t ti = cata(i, j, dcw) * tchans;
507         result[ti .. ti + tchans] = redline[redi .. redi + tchans];
508     }
509 }
510 
511 void sling16(in ushort[] redline, ushort[] result, A7Catapult cata, in size_t redw,
512                                         in size_t j, in int dcw, in int tchans)
513 {
514     for (size_t i, redi; i < redw; ++i, redi += tchans) {
515         const size_t ti = cata(i, j, dcw) * tchans;
516         result[ti .. ti + tchans] = redline[redi .. redi + tchans];
517     }
518 }
519 
520 // Uncompresses a line from the IDAT stream into dst. Calls sete for errors.
521 void uncompress(PNGDecoder* dc, ubyte[] dst)
522 {
523     dc.z.avail_out = cast(uint) dst.length;
524     dc.z.next_out = dst.ptr;
525 
526     while (true) {
527         if (!dc.z.avail_in) {
528             if (!dc.avail_idat) {
529                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
530                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
531                     return sete(ERROR.data);
532                 dc.avail_idat = load_u32be(dc.chunkmeta[4..8]);
533                 if (dc.rc.fail || !dc.avail_idat) return sete(ERROR.data);
534                 if (dc.chunkmeta[8..12] != "IDAT") return sete(ERROR.lackdata);
535                 dc.crc.put(dc.chunkmeta[8..12]);
536             }
537             dc.idat_window = read_slice(dc.rc, dc.avail_idat);
538             if (!dc.idat_window) return sete(ERROR.stream);
539             dc.crc.put(dc.idat_window);
540             dc.avail_idat -= cast(uint) dc.idat_window.length;
541             dc.z.avail_in = cast(uint) dc.idat_window.length;
542             dc.z.next_in = dc.idat_window.ptr;
543         }
544 
545         int q = inflate(dc.z, Z_NO_FLUSH);
546 
547         if (dc.z.avail_out == 0)
548             return;
549         if (q != Z_OK)
550             return sete(ERROR.zstream);
551     }
552 }
553 
554 void depalette(in ubyte[] palette, in ubyte[] trns, in ubyte[] sline, ubyte[] dst)
555 {
556     if (trns.length) {
557         for (size_t s, d;  s < sline.length;  s+=1, d+=4) {
558             const ubyte tidx = sline[s];
559             size_t pidx = tidx * 3;
560             if (pidx + 3 > palette.length)
561                 return sete(ERROR.data);
562             dst[d .. d+3] = palette[pidx .. pidx+3];
563             dst[d+3] = trns[tidx];
564         }
565     } else {
566         for (size_t s, d;  s < sline.length;  s+=1, d+=3) {
567             const size_t pidx = sline[s] * 3;
568             if (pidx + 3 > palette.length)
569                 return sete(ERROR.data);
570             dst[d .. d+3] = palette[pidx .. pidx+3];
571         }
572     }
573 }
574 
575 void recon(ubyte[] cline, const(ubyte)[] pline, in size_t fstep)
576 {
577     const ubyte ftype = cline[0];
578     cline = cline[1..$];
579     pline = pline[1..$];
580     switch (ftype) {
581         case FILTER.none:
582             break;
583         case FILTER.sub:
584             foreach (k; fstep .. cline.length)
585                 cline[k] += cline[k-fstep];
586             break;
587         case FILTER.up:
588             foreach (k; 0 .. cline.length)
589                 cline[k] += pline[k];
590             break;
591         case FILTER.average:
592             foreach (k; 0 .. fstep)
593                 cline[k] += pline[k] / 2;
594             foreach (k; fstep .. cline.length)
595                 cline[k] += cast(ubyte)
596                     ((cast(uint) cline[k-fstep] + cast(uint) pline[k]) / 2);
597             break;
598         case FILTER.paeth:
599             foreach (i; 0 .. fstep)
600                 cline[i] += paeth(0, pline[i], 0);
601             foreach (i; fstep .. cline.length)
602                 cline[i] += paeth(cline[i-fstep], pline[i], pline[i-fstep]);
603             break;
604         default:
605             return sete(ERROR.unsupp);
606     }
607 }
608 
609 ubyte paeth(in ubyte a, in ubyte b, in ubyte c)
610 {
611     int pc = cast(int) c;
612     int pa = cast(int) b - pc;
613     int pb = cast(int) a - pc;
614     pc = pa + pb;
615     if (pa < 0) pa = -pa;
616     if (pb < 0) pb = -pb;
617     if (pc < 0) pc = -pc;
618 
619     if (pa <= pb && pa <= pc) {
620         return a;
621     } else if (pb <= pc) {
622         return b;
623     }
624     return c;
625 }
626 
627 alias A7Catapult = size_t function(size_t redx, size_t redy, size_t dstw);
628 immutable A7Catapult[7] a7catapults = [
629     &a7_red1_to_dst,
630     &a7_red2_to_dst,
631     &a7_red3_to_dst,
632     &a7_red4_to_dst,
633     &a7_red5_to_dst,
634     &a7_red6_to_dst,
635     &a7_red7_to_dst,
636 ];
637 
638 size_t a7_red1_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8;     }
639 size_t a7_red2_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*8*dstw + redx*8+4;   }
640 size_t a7_red3_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*8+4)*dstw + redx*4; }
641 size_t a7_red4_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*4*dstw + redx*4+2;   }
642 size_t a7_red5_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*4+2)*dstw + redx*2; }
643 size_t a7_red6_to_dst(size_t redx, size_t redy, size_t dstw) { return redy*2*dstw + redx*2+1;   }
644 size_t a7_red7_to_dst(size_t redx, size_t redy, size_t dstw) { return (redy*2+1)*dstw + redx;   }
645 
646 size_t[7] a7_init_redw(in int w)
647 {
648     const size_t[7] redw = [(w + 7) / 8,
649                             (w + 3) / 8,
650                             (w + 3) / 4,
651                             (w + 1) / 4,
652                             (w + 1) / 2,
653                             (w + 0) / 2,
654                             (w + 0) / 1];
655     return redw;
656 }
657 
658 size_t[7] a7_init_redh(in int h)
659 {
660     const size_t[7] redh = [(h + 7) / 8,
661                             (h + 7) / 8,
662                             (h + 3) / 8,
663                             (h + 3) / 4,
664                             (h + 1) / 4,
665                             (h + 1) / 2,
666                             (h + 0) / 2];
667     return redh;
668 }
669 
670 uint load_u32be(in ubyte[4] s)
671 {
672     return (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
673 }
674 
675 ubyte[4] u32_to_be(in uint x)
676 {
677     return [cast(ubyte) (x >> 24), cast(ubyte) (x >> 16),
678             cast(ubyte) (x >> 8),  cast(ubyte) x];
679 }
680 
681 ubyte channels(in ubyte colortype)
682 {
683     switch (cast(CTYPE) colortype) {
684         case CTYPE.y: return 1;
685         case CTYPE.ya: return 2;
686         case CTYPE.rgb: return 3;
687         case CTYPE.rgba: return 4;
688         case CTYPE.idx: return 3;   // +1 if tRNS chunk present
689         default: return 0;
690     }
691 }
692 
693 ubyte write_png(Writer* wc, int w, int h, in ubyte[] buf, in int reqchans)
694 {
695     if (w < 1 || h < 1)
696         return ERROR.dim;
697     const uint schans = cast(uint) (buf.length / w / h);
698     if (schans < 1 || schans > 4 || schans * w * h != buf.length)
699         return ERROR.dim;
700     if (cast(uint) reqchans > 4)
701         return ERROR.unsupp;
702 
703     const uint tchans = cast(uint) reqchans ? reqchans : schans;
704     ubyte colortype;
705     switch (tchans) {
706         case 1: colortype = CTYPE.y; break;
707         case 2: colortype = CTYPE.ya; break;
708         case 3: colortype = CTYPE.rgb; break;
709         case 4: colortype = CTYPE.rgba; break;
710         default: assert(0);
711     }
712 
713     ubyte[13] head; // data part of IHDR chunk
714     head[0..4]   = u32_to_be(cast(uint) w);
715     head[4..8]   = u32_to_be(cast(uint) h);
716     head[8]      = 8; // bit depth
717     head[9]      = colortype;
718     head[10..13] = 0; // compression, filter and interlace methods
719 
720     CRC32 crc;
721     crc.put(cast(ubyte[]) "IHDR");
722     crc.put(head);
723 
724     write_block(wc, SIGNATURE);
725     write_block(wc, HEAD_CHUNK_SIG);
726     write_block(wc, head);
727     write_block(wc, crc.finish_be());
728 
729     if (wc.fail) return ERROR.stream;
730 
731     PNGEncoder ec = {
732         wc: wc,
733         w: w,
734         h: h,
735         schans: schans,
736         tchans: tchans,
737         buf: buf,
738     };
739 
740     ubyte e = write_idat(ec);
741     if (e) return e;
742 
743     static immutable ubyte[12] IEND =
744         [0, 0, 0, 0, 'I','E','N','D', 0xae, 0x42, 0x60, 0x82];
745     write_block(wc, IEND);
746 
747     return wc.fail ? ERROR.stream : e;
748 }
749 
750 struct PNGEncoder {
751     Writer*     wc;
752     size_t      w;
753     size_t      h;
754     uint        schans;
755     uint        tchans;
756     const(ubyte)[] buf;
757     CRC32       crc;
758     z_stream*   z;
759     ubyte[]     idatbuf;
760 }
761 
762 enum MAXIMUM_CHUNK_SIZE = 8192;
763 
764 ubyte write_idat(ref PNGEncoder ec)
765 {
766     // initialize zlib stream
767     z_stream z = { zalloc: null, zfree: null, opaque: null };
768     if (deflateInit(&z, Z_DEFAULT_COMPRESSION) != Z_OK)
769         return ERROR.zinit;
770     scope(exit)
771         deflateEnd(ec.z);
772     ec.z = &z;
773 
774     auto convert = cast(conv8) getconv(ec.schans, ec.tchans, 8);
775 
776     const size_t slinesz = ec.w * ec.schans;
777     const size_t tlinesz = ec.w * ec.tchans + 1;
778     const size_t filterstep = ec.tchans;
779     const size_t workbufsz = 3 * tlinesz + MAXIMUM_CHUNK_SIZE;
780 
781     ubyte e;
782     ubyte[] workbuf  = new_buffer(workbufsz, e);    if (e) return e;
783     ubyte[] cline    = workbuf[0 .. tlinesz];
784     ubyte[] pline    = workbuf[tlinesz .. 2 * tlinesz];
785     ubyte[] filtered = workbuf[2 * tlinesz .. 3 * tlinesz];
786     ec.idatbuf       = workbuf[$-MAXIMUM_CHUNK_SIZE .. $];
787     workbuf[0..$] = 0;
788     ec.z.avail_out = cast(uint) ec.idatbuf.length;
789     ec.z.next_out = ec.idatbuf.ptr;
790 
791     sete(0);
792 
793     const size_t sbufsz = ec.w * ec.schans * ec.h;
794 
795     for (size_t si; si < sbufsz; si += slinesz) {
796         convert(ec.buf[si .. si + slinesz], cline[1..$]);
797 
798         // these loops could be merged with some extra space...
799         foreach (i; 1 .. filterstep+1)
800             filtered[i] = cast(ubyte) (cline[i] - paeth(0, pline[i], 0));
801         foreach (i; filterstep+1 .. tlinesz)
802             filtered[i] = cast(ubyte)
803             (cline[i] - paeth(cline[i-filterstep], pline[i], pline[i-filterstep]));
804         filtered[0] = FILTER.paeth;
805 
806         compress(ec, filtered);
807         swap(cline, pline);
808     }
809 
810     while (!gete(e)) {  // flush zlib
811         int q = deflate(ec.z, Z_FINISH);
812         if (ec.idatbuf.length - ec.z.avail_out > 0)
813             flush_idat(ec);
814         if (q == Z_STREAM_END) break;
815         if (q == Z_OK) continue;    // not enough avail_out
816         sete(ERROR.zstream);
817     }
818 
819 finish:
820     _free(workbuf.ptr);
821     return e;
822 }
823 
824 void compress(ref PNGEncoder ec, in ubyte[] line)
825 {
826     ec.z.avail_in = cast(uint) line.length;
827     ec.z.next_in = line.ptr;
828     while (ec.z.avail_in) {
829         int q = deflate(ec.z, Z_NO_FLUSH);
830         if (q != Z_OK) return sete(ERROR.zstream);
831         if (ec.z.avail_out == 0)
832             flush_idat(ec);
833     }
834 }
835 
836 void flush_idat(ref PNGEncoder ec)      // writes an idat chunk
837 {
838     if (ec.wc.fail) return;
839     const uint len = cast(uint) (ec.idatbuf.length - ec.z.avail_out);
840     ec.crc.put(cast(const(ubyte)[]) "IDAT");
841     ec.crc.put(ec.idatbuf[0 .. len]);
842     write_block(ec.wc, u32_to_be(len));
843     write_block(ec.wc, cast(const(ubyte)[]) "IDAT");
844     write_block(ec.wc, ec.idatbuf[0 .. len]);
845     write_block(ec.wc, ec.crc.finish_be());
846     ec.z.next_out = ec.idatbuf.ptr;
847     ec.z.avail_out = cast(uint) ec.idatbuf.length;
848     if (ec.wc.fail) sete(ERROR.stream);
849 }
850 
851 struct CRC32 {
852     uint r = 0xffff_ffff;
853 
854     @nogc nothrow:
855 
856     void put(in ubyte[] data)
857     {
858         foreach (b; data) {
859             const int i = b ^ cast(ubyte) r;
860             r = (r >> 8) ^ CRC32TAB[i];
861         }
862     }
863 
864     ubyte[4] finish_be()
865     {
866         ubyte[4] result = u32_to_be(r ^ 0xffff_ffff);
867         r = 0xffff_ffff;
868         return result;
869     }
870 
871     static ubyte[4] of(in ubyte[] data)
872     {
873         CRC32 c;
874         c.put(data);
875         return c.finish_be();
876     }
877 }
878 
879 immutable uint[256] CRC32TAB = [
880     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
881     0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
882     0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
883     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
884     0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
885     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
886     0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
887     0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
888     0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
889     0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
890     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
891     0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
892     0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
893     0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
894     0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
895     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
896     0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
897     0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
898     0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
899     0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
900     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
901     0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
902     0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
903     0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
904     0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
905     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
906     0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
907     0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
908     0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
909     0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
910     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
911     0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
912     0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
913     0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
914     0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
915     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
916     0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
917     0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
918     0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
919     0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
920     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
921     0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
922     0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
923     0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
924     0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
925     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
926     0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
927     0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
928     0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
929     0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
930     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
931     0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
932     0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
933     0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
934     0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
935     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
936     0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
937     0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
938     0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
939     0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
940     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
941     0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
942     0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
943     0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
944 ];