1 // Copyright 2019 Tero Hänninen. All rights reserved.
2 // SPDX-License-Identifier: BSD-2-Clause
3 //
4 // https://tools.ietf.org/html/rfc2083
5 // https://www.w3.org/TR/2003/REC-PNG-20031110/
6 module imagefmt.png;
7 
8 import etc.c.zlib;
9 import imagefmt;
10 
11 @nogc nothrow package:
12 
13 struct PNGHeader {
14     int     w;
15     int     h;
16     ubyte   bpc;  // bits per component
17     ubyte   colortype;
18     ubyte   compression;
19     ubyte   filter;
20     ubyte   interlace;
21 }
22 
23 enum CTYPE {
24     y    = 0,
25     rgb  = 2,
26     idx  = 3,
27     ya   = 4,
28     rgba = 6,
29 }
30 
31 enum FILTER { none, sub, up, average, paeth }
32 
33 struct PNGDecoder {
34     Reader* rc;
35 
36     int     w;
37     int     h;
38     ubyte   sbpc;
39     ubyte   tbpc;
40     ubyte   schans;
41     ubyte   tchans;
42     bool    indexed;
43     bool    interlaced;
44 
45     ubyte[12] chunkmeta;
46     CRC32   crc;
47     union {
48         ubyte[] buf8;
49         ushort[] buf16;
50     }
51     ubyte[] palette;
52     ubyte[] transparency;
53 
54     // decompression
55     z_stream*   z;              // zlib stream
56     uint        avail_idat;     // available bytes in current idat chunk
57     ubyte[]     idat_window;    // slice of reader's buffer
58 }
59 
60 immutable ubyte[8] SIGNATURE =
61     [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
62 
63 immutable ubyte[8] HEAD_CHUNK_SIG =
64     [0x0, 0x0, 0x0, 0xd, 'I','H','D','R'];
65 
66 bool detect_png(Reader* rc)
67 {
68     ubyte[8] tmp;
69     read_block(rc, tmp[0..$]);
70     reset2start(rc);
71     return !rc.fail && tmp == SIGNATURE;
72 }
73 
74 IFInfo read_png_info(Reader* rc)
75 {
76     PNGHeader head;
77     IFInfo info;
78     info.e = read_png_header(rc, head);
79     info.w = head.w;
80     info.h = head.h;
81     info.c = channels(head.colortype);
82     if (head.colortype == CTYPE.idx && have_tRNS(rc))
83         info.c = 4;
84     else if (info.c == 0 && !info.e)
85         info.e = ERROR.data;
86     return info;
87 }
88 
89 bool have_tRNS(Reader* rc)
90 {
91     ubyte[12] chunkmeta;
92     read_block(rc, chunkmeta[4..$]);  // next chunk's len and type
93 
94     while (!rc.fail) {
95         uint len = load_u32be(chunkmeta[4..8]);
96         if (len > int.max)
97             return false;
98         switch (cast(char[]) chunkmeta[8..12]) {
99             case "tRNS":
100                 return true;
101             case "IDAT":
102             case "IEND":
103                 return false;
104             default:
105                 while (len > 0) {
106                     ubyte[] slice = read_slice(rc, len);
107                     if (!slice.length)
108                         return false;
109                     len -= slice.length;
110                 }
111                 read_block(rc, chunkmeta[0..$]); // crc | len, type
112         }
113     }
114     return false;
115 }
116 
117 ubyte read_png_header(Reader* rc, out PNGHeader head)
118 {
119     ubyte[33] tmp;  // file header, IHDR len+type+data+crc
120     read_block(rc, tmp[0..$]);
121     if (rc.fail) return ERROR.stream;
122 
123     if (tmp[0..8] != SIGNATURE       ||
124         tmp[8..16] != HEAD_CHUNK_SIG ||
125         tmp[29..33] != CRC32.of(tmp[12..29]))
126         return ERROR.data;
127 
128     head.w           = load_u32be(tmp[16..20]);
129     head.h           = load_u32be(tmp[20..24]);
130     head.bpc         = tmp[24];
131     head.colortype   = tmp[25];
132     head.compression = tmp[26];
133     head.filter      = tmp[27];
134     head.interlace   = tmp[28];
135 
136     return 0;
137 }
138 
139 ubyte read_png(Reader* rc, out IFImage image, int reqchans, int reqbpc)
140 {
141     if (cast(uint) reqchans > 4)
142         return ERROR.arg;
143     if (reqbpc != 0 && reqbpc != 8 && reqbpc != 16)
144         return ERROR.unsupp;
145 
146     PNGHeader head;
147     if (ubyte e = read_png_header(rc, head))
148         return e;
149     if (head.w < 1 || head.h < 1 || cast(ulong) head.w * head.h > int.max)
150         return ERROR.dim;
151     if (head.bpc != 8 && head.bpc != 16)
152         return ERROR.unsupp;
153     if (head.colortype != CTYPE.y    &&
154         head.colortype != CTYPE.rgb  &&
155         head.colortype != CTYPE.idx  &&
156         head.colortype != CTYPE.ya   &&
157         head.colortype != CTYPE.rgba)
158         return ERROR.unsupp;
159     if (head.colortype == CTYPE.idx && head.bpc != 8)
160         return ERROR.unsupp;
161     if (head.compression != 0 || head.filter != 0 || head.interlace > 1)
162         return ERROR.unsupp;
163 
164     PNGDecoder dc = {
165         rc         : rc,
166         w          : head.w,
167         h          : head.h,
168         sbpc       : head.bpc,
169         tbpc       : cast(ubyte) (reqbpc ? reqbpc : head.bpc),
170         schans     : channels(head.colortype),  // +1 for indexed if tRNS found later
171         tchans     : cast(ubyte) reqchans,  // adjust later
172         indexed    : head.colortype == CTYPE.idx,
173         interlaced : head.interlace == 1,
174         // init the rest later
175     };
176 
177     ubyte e = read_chunks(&dc);
178     _free(dc.palette.ptr);
179     _free(dc.transparency.ptr);
180     if (e) return e;
181 
182     switch (32 * head.bpc + dc.tbpc) {
183         case 32 *  8 +  8: image.buf8 = dc.buf8; break;
184         case 32 * 16 + 16: image.buf16 = dc.buf16; break;
185         case 32 *  8 + 16: image.buf16 = bpc8to16(dc.buf8); break;
186         case 32 * 16 +  8: image.buf8 = bpc16to8(dc.buf16); break;
187         default: assert(0);
188     }
189     if (!image.buf8.ptr)
190         return ERROR.oom;
191 
192     image.w = dc.w;
193     image.h = dc.h;
194     image.c = cast(ubyte) dc.tchans;
195     image.bpc = cast(ubyte) dc.tbpc;
196     image.cinfile = cast(ubyte) dc.schans;
197     return e;
198 }
199 
200 ubyte read_chunks(PNGDecoder* dc)
201 {
202     enum STAGE {
203         IHDR_done,
204         PLTE_done,
205         IDAT_done,
206         IEND_done,
207     }
208 
209     auto stage = STAGE.IHDR_done;
210 
211     read_block(dc.rc, dc.chunkmeta[4..$]);  // next chunk's len and type
212 
213     while (stage != STAGE.IEND_done && !dc.rc.fail) {
214         uint len = load_u32be(dc.chunkmeta[4..8]);
215         if (len > int.max)
216             return ERROR.data;
217 
218         dc.crc.put(dc.chunkmeta[8..12]);  // type
219         switch (cast(char[]) dc.chunkmeta[8..12]) {
220             case "IDAT":
221                 if (stage != STAGE.IHDR_done &&
222                    (stage != STAGE.PLTE_done || !dc.indexed))
223                    return ERROR.data;
224                 // fixup chans as needed. tRNS only supported for indexed by imagefmt
225                 dc.schans = dc.indexed && dc.transparency.length ? 4 : dc.schans;
226                 dc.tchans = dc.tchans ? dc.tchans : dc.schans;
227                 if (cast(ulong) dc.w * dc.h * dc.tchans > MAXIMUM_IMAGE_SIZE)
228                     return ERROR.bigimg;
229                 ubyte e = read_idat_chunks(dc, len);
230                 if (e) return e;
231                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
232                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
233                     return ERROR.data;
234                 stage = STAGE.IDAT_done;
235                 break;
236             case "PLTE":
237                 if (stage != STAGE.IHDR_done)
238                     return ERROR.data;
239                 const uint entries = len / 3;
240                 if (entries * 3 != len || entries > 256)
241                     return ERROR.data;
242                 ubyte e;
243                 dc.palette = new_buffer(len, e);
244                 if (e) return e;
245                 read_block(dc.rc, dc.palette[0..$]);
246                 dc.crc.put(dc.palette);
247                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
248                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
249                     return ERROR.data;
250                 stage = STAGE.PLTE_done;
251                 break;
252             case "tRNS":
253                 if (! (stage == STAGE.IHDR_done ||
254                       (stage == STAGE.PLTE_done && dc.indexed)) )
255                     return ERROR.data;
256                 if (dc.indexed && len * 3 > dc.palette.length || len > 256)
257                     return ERROR.data; // that is redundant really --^
258                 if (!dc.indexed)
259                     return ERROR.unsupp;
260                 ubyte e;
261                 dc.transparency = new_buffer(256, e); if (e) return e;
262                 read_block(dc.rc, dc.transparency[0..len]);
263                 dc.transparency[len..$] = 255;
264                 read_block(dc.rc, dc.chunkmeta[0..$]);
265                 if (dc.rc.fail) return ERROR.stream;
266                 dc.crc.put(dc.transparency[0..len]);
267                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
268                     return ERROR.data;
269                 break;
270             case "IEND":
271                 if (stage != STAGE.IDAT_done)
272                     return ERROR.data;
273                 static immutable ubyte[4] IEND_CRC = [0xae, 0x42, 0x60, 0x82];
274                 read_block(dc.rc, dc.chunkmeta[0..4]);
275                 if (len != 0 || dc.chunkmeta[0..4] != IEND_CRC)
276                     return ERROR.data;
277                 stage = STAGE.IEND_done;
278                 break;
279             case "IHDR":
280                 return ERROR.data;
281             default:
282                 // unknown chunk, ignore but check crc
283                 while (len > 0) {
284                     ubyte[] slice = read_slice(dc.rc, len);
285                     if (!slice.length)
286                         return ERROR.data;
287                     len -= slice.length;
288                     dc.crc.put(slice[0..$]);
289                 }
290                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
291                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
292                     return ERROR.data;
293         }
294     }
295 
296     return dc.rc.fail ? ERROR.stream : 0;
297 }
298 
299 ubyte read_idat_chunks(PNGDecoder* dc, in uint len)
300 {
301     // initialize zlib stream
302     z_stream z = { zalloc: null, zfree: null, opaque: null };
303     if (inflateInit(&z) != Z_OK)
304         return ERROR.zinit;
305     dc.z = &z;
306     dc.avail_idat = len;
307     ubyte e;
308     switch (dc.sbpc) {
309         case 8: e = read_idat8(dc); break;
310         case 16: e = read_idat16(dc); break;
311         default: e = ERROR.unsupp; break;
312     }
313     inflateEnd(&z);
314     return e;
315 }
316 
317 void swap(ref ubyte[] a, ref ubyte[] b)
318 {
319     ubyte[] swap = b;
320     b = a;
321     a = swap;
322 }
323 
324 //; these guys are only used by the read_idat functions and their helpers
325 private ubyte _png_error = 0;
326 private void sete(ubyte e)     { if (!_png_error) _png_error = e; }
327 private bool gete(out ubyte e) { return _png_error ? (e = _png_error) != 0 : false; }
328 
329 ubyte read_idat8(PNGDecoder* dc)
330 {
331     auto convert = cast(conv8) getconv(dc.schans, dc.tchans, 8);
332 
333     const size_t filterstep = dc.indexed ? 1 : dc.schans;
334     const size_t uclinesz   = dc.w * filterstep + 1; // uncompr, +1 for filter byte
335     const size_t xlinesz    = dc.w * dc.schans * dc.indexed;
336     const size_t redlinesz  = dc.w * dc.tchans * dc.interlaced;
337     const size_t workbufsz  = 2 * uclinesz + xlinesz + redlinesz;
338     const bool flp          = VERTICAL_ORIENTATION_READ == -1;
339 
340     ubyte e;
341     ubyte[] cline;      // current line
342     ubyte[] pline;      // previous line
343     ubyte[] xline;      // intermediate buffer/slice for depaletting
344     ubyte[] redline;    // reduced image line
345     ubyte[] result  = new_buffer(dc.w * dc.h * dc.tchans, e);   if (e) return e;
346     ubyte[] workbuf = new_buffer(workbufsz, e);                 if (e) goto fail;
347     cline = workbuf[0 .. uclinesz];
348     pline = workbuf[uclinesz .. 2*uclinesz];
349     xline = dc.indexed ? workbuf[2*uclinesz .. 2*uclinesz + xlinesz] : null;
350     redline = dc.interlaced ? workbuf[$-redlinesz .. $] : null;
351     workbuf[0..$] = 0;
352 
353     sete(0);
354 
355     if (!dc.interlaced) {
356         const size_t tlinelen = dc.w * dc.tchans;
357         const size_t tstride = flp ? -tlinelen           : tlinelen;
358         size_t ti            = flp ? (dc.h-1) * tlinelen : 0;
359         if (dc.indexed) {
360             foreach (_; 0 .. dc.h) {
361                 uncompress(dc, cline); // cline[0] is the filter type
362                 recon(cline, pline, filterstep);
363                 depalette(dc.palette, dc.transparency, cline[1..$], xline);
364                 convert(xline, result[ti .. ti + tlinelen]);
365                 ti += tstride;
366                 swap(cline, pline);
367             }
368         } else {
369             foreach (_; 0 .. dc.h) {
370                 uncompress(dc, cline); // cline[0] is the filter type
371                 recon(cline, pline, filterstep);
372                 convert(cline[1..$], result[ti .. ti + tlinelen]);
373                 ti += tstride;
374                 swap(cline, pline);
375             }
376         }
377     } else {    // Adam7 interlacing
378         const size_t[7] redw = a7_init_redw(dc.w);
379         const size_t[7] redh = a7_init_redh(dc.h);
380         const int dhi = dc.h - 1;   // destination "height index"
381 
382         foreach (pass; 0 .. 7) {
383             const A7Catapult cata = a7catapults[pass];
384             const size_t slinelen = redw[pass] * dc.schans;
385             const size_t tlinelen = redw[pass] * dc.tchans;
386             ubyte[] cln = cline[0 .. redw[pass] * filterstep + 1];
387             ubyte[] pln = pline[0 .. redw[pass] * filterstep + 1];
388             pln[] = 0;  // must be done for defiltering (recon)
389 
390             if (dc.indexed) {
391                 foreach (j; 0 .. redh[pass]) {
392                     uncompress(dc, cln); // cln[0] is the filter type
393                     recon(cln, pln, filterstep);
394                     depalette(dc.palette, dc.transparency, cln[1..$], xline);
395                     convert(xline[0 .. slinelen], redline[0 .. tlinelen]);
396                     sling(redline, result, cata, redw[pass], j, dc.w, dhi, dc.tchans, flp);
397                     swap(cln, pln);
398                 }
399             } else {
400                 foreach (j; 0 .. redh[pass]) {
401                     uncompress(dc, cln); // cln[0] is the filter type
402                     recon(cln, pln, filterstep);
403                     convert(cln[1 .. 1 + slinelen], redline[0 .. tlinelen]);
404                     sling(redline, result, cata, redw[pass], j, dc.w, dhi, dc.tchans, flp);
405                     swap(cln, pln);
406                 }
407             }
408         }
409     }
410 
411     if (gete(e)) goto fail;
412 
413 finish:
414     _free(workbuf.ptr);
415     dc.buf8 = result[0..$];
416     return e;
417 fail:
418     _free(result.ptr);
419     result = null;
420     goto finish;
421 }
422 
423 ubyte read_idat16(PNGDecoder* dc)     // 16-bit is never indexed
424 {
425     auto convert = cast(conv16) getconv(dc.schans, dc.tchans, 16);
426 
427     // these are in bytes
428     const size_t filterstep = dc.schans * 2;
429     const size_t uclinesz   = dc.w * filterstep + 1; // uncompr, +1 for filter byte
430     const size_t xlinesz    = dc.w * dc.schans * 2;
431     const size_t redlinesz  = dc.w * dc.h * dc.tchans * 2 * dc.interlaced;
432     const size_t workbufsz  = 2 * uclinesz + xlinesz + redlinesz;
433     const bool flp          = VERTICAL_ORIENTATION_READ == -1;
434 
435     // xline is not quite necessary, it could be avoided if the conversion
436     // functions were changed to do what line16_from_bytes does.
437 
438     ubyte e;
439     ubyte[] cline;      // current line
440     ubyte[] pline;      // previous line
441     ushort[] xline;     // intermediate buffer to catch 16-bit samples
442     ushort[] redline;   // reduced image line
443     ushort[] result = new_buffer16(dc.w * dc.h * dc.tchans, e); if (e) return e;
444     ubyte[] workbuf = new_buffer(workbufsz, e);                 if (e) goto fail;
445     cline = workbuf[0 .. uclinesz];
446     pline = workbuf[uclinesz .. 2*uclinesz];
447     xline = cast(ushort[]) workbuf[2*uclinesz .. 2*uclinesz + xlinesz];
448     redline = dc.interlaced ? cast(ushort[]) workbuf[$-redlinesz .. $] : null;
449     workbuf[0..$] = 0;
450 
451     sete(0);
452 
453     if (!dc.interlaced) {
454         const size_t tlinelen = dc.w * dc.tchans;
455         const size_t tstride = flp ? -tlinelen           : tlinelen;
456         size_t ti            = flp ? (dc.h-1) * tlinelen : 0;
457         foreach (_; 0 .. dc.h) {
458             uncompress(dc, cline); // cline[0] is the filter type
459             recon(cline, pline, filterstep);
460             line16_from_bytes(cline[1..$], xline);
461             convert(xline[0..$], result[ti .. ti + tlinelen]);
462             ti += tstride;
463             swap(cline, pline);
464         }
465     } else {    // Adam7 interlacing
466         const size_t[7] redw = a7_init_redw(dc.w);
467         const size_t[7] redh = a7_init_redh(dc.h);
468         const int dhi = dc.h - 1;   // destination "height index"
469 
470         foreach (pass; 0 .. 7) {
471             const A7Catapult cata = a7catapults[pass];
472             const size_t slinelen = redw[pass] * dc.schans;
473             const size_t tlinelen = redw[pass] * dc.tchans;
474             ubyte[] cln = cline[0 .. redw[pass] * filterstep + 1];
475             ubyte[] pln = pline[0 .. redw[pass] * filterstep + 1];
476             pln[] = 0;
477 
478             foreach (j; 0 .. redh[pass]) {
479                 uncompress(dc, cln); // cln[0] is the filter type
480                 recon(cln, pln, filterstep);
481                 line16_from_bytes(cln[1 .. $], xline[0 .. slinelen]);
482                 convert(xline[0 .. slinelen], redline[0 .. tlinelen]);
483                 sling16(redline, result, cata, redw[pass], j, dc.w, dhi, dc.tchans, flp);
484                 swap(cln, pln);
485             }
486         }
487     }
488 
489     if (gete(e)) goto fail;
490 
491 finish:
492     _free(workbuf.ptr);
493     dc.buf16 = result[0..$];
494     return e;
495 fail:
496     _free(result.ptr);
497     result = null;
498     goto finish;
499 }
500 
501 void line16_from_bytes(in ubyte[] src, ushort[] tgt)
502 {
503     for (size_t k, t;   k < src.length;   k+=2, t+=1) {
504         tgt[t] = src[k] << 8 | src[k+1];
505     }
506 }
507 
508 void sling(in ubyte[] redline, ubyte[] result, A7Catapult cata, size_t redw,
509                             size_t j, int dw, int dhi, int tchans, bool flp)
510 {
511     for (size_t i, redi; i < redw; ++i, redi += tchans) {
512         const size_t ti = cata(i, j, dw, dhi, flp) * tchans;
513         result[ti .. ti + tchans] = redline[redi .. redi + tchans];
514     }
515 }
516 
517 void sling16(in ushort[] redline, ushort[] result, A7Catapult cata, size_t redw,
518                                 size_t j, int dw, int dhi, int tchans, bool flp)
519 {
520     for (size_t i, redi; i < redw; ++i, redi += tchans) {
521         const size_t ti = cata(i, j, dw, dhi, flp) * tchans;
522         result[ti .. ti + tchans] = redline[redi .. redi + tchans];
523     }
524 }
525 
526 // Uncompresses a line from the IDAT stream into dst. Calls sete for errors.
527 void uncompress(PNGDecoder* dc, ubyte[] dst)
528 {
529     dc.z.avail_out = cast(uint) dst.length;
530     dc.z.next_out = dst.ptr;
531 
532     while (true) {
533         if (!dc.z.avail_in) {
534             if (!dc.avail_idat) {
535                 read_block(dc.rc, dc.chunkmeta[0..$]); // crc | len, type
536                 if (dc.crc.finish_be() != dc.chunkmeta[0..4])
537                     return sete(ERROR.data);
538                 dc.avail_idat = load_u32be(dc.chunkmeta[4..8]);
539                 if (dc.rc.fail || !dc.avail_idat) return sete(ERROR.data);
540                 if (dc.chunkmeta[8..12] != "IDAT") return sete(ERROR.lackdata);
541                 dc.crc.put(dc.chunkmeta[8..12]);
542             }
543             dc.idat_window = read_slice(dc.rc, dc.avail_idat);
544             if (!dc.idat_window) return sete(ERROR.stream);
545             dc.crc.put(dc.idat_window);
546             dc.avail_idat -= cast(uint) dc.idat_window.length;
547             dc.z.avail_in = cast(uint) dc.idat_window.length;
548             dc.z.next_in = dc.idat_window.ptr;
549         }
550 
551         int q = inflate(dc.z, Z_NO_FLUSH);
552 
553         if (dc.z.avail_out == 0)
554             return;
555         if (q != Z_OK)
556             return sete(ERROR.zstream);
557     }
558 }
559 
560 void depalette(in ubyte[] palette, in ubyte[] trns, in ubyte[] sline, ubyte[] dst)
561 {
562     if (trns.length) {
563         for (size_t s, d;  s < sline.length;  s+=1, d+=4) {
564             const ubyte tidx = sline[s];
565             size_t pidx = tidx * 3;
566             if (pidx + 3 > palette.length)
567                 return sete(ERROR.data);
568             dst[d .. d+3] = palette[pidx .. pidx+3];
569             dst[d+3] = trns[tidx];
570         }
571     } else {
572         for (size_t s, d;  s < sline.length;  s+=1, d+=3) {
573             const size_t pidx = sline[s] * 3;
574             if (pidx + 3 > palette.length)
575                 return sete(ERROR.data);
576             dst[d .. d+3] = palette[pidx .. pidx+3];
577         }
578     }
579 }
580 
581 void recon(ubyte[] cline, const(ubyte)[] pline, in size_t fstep)
582 {
583     const ubyte ftype = cline[0];
584     cline = cline[1..$];
585     pline = pline[1..$];
586     switch (ftype) {
587         case FILTER.none:
588             break;
589         case FILTER.sub:
590             foreach (k; fstep .. cline.length)
591                 cline[k] += cline[k-fstep];
592             break;
593         case FILTER.up:
594             foreach (k; 0 .. cline.length)
595                 cline[k] += pline[k];
596             break;
597         case FILTER.average:
598             foreach (k; 0 .. fstep)
599                 cline[k] += pline[k] / 2;
600             foreach (k; fstep .. cline.length)
601                 cline[k] += cast(ubyte)
602                     ((cast(uint) cline[k-fstep] + cast(uint) pline[k]) / 2);
603             break;
604         case FILTER.paeth:
605             foreach (i; 0 .. fstep)
606                 cline[i] += paeth(0, pline[i], 0);
607             foreach (i; fstep .. cline.length)
608                 cline[i] += paeth(cline[i-fstep], pline[i], pline[i-fstep]);
609             break;
610         default:
611             return sete(ERROR.unsupp);
612     }
613 }
614 
615 ubyte paeth(in ubyte a, in ubyte b, in ubyte c)
616 {
617     int pc = cast(int) c;
618     int pa = cast(int) b - pc;
619     int pb = cast(int) a - pc;
620     pc = pa + pb;
621     if (pa < 0) pa = -pa;
622     if (pb < 0) pb = -pb;
623     if (pc < 0) pc = -pc;
624 
625     if (pa <= pb && pa <= pc) {
626         return a;
627     } else if (pb <= pc) {
628         return b;
629     }
630     return c;
631 }
632 
633 alias A7Catapult = size_t function(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp);
634 immutable A7Catapult[7] a7catapults = [
635     &a7_red1_to_dst,
636     &a7_red2_to_dst,
637     &a7_red3_to_dst,
638     &a7_red4_to_dst,
639     &a7_red5_to_dst,
640     &a7_red6_to_dst,
641     &a7_red7_to_dst,
642 ];
643 
644 size_t a7_red1_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*8,   dhi, flp)*dw + redx*8;   }
645 size_t a7_red2_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*8,   dhi, flp)*dw + redx*8+4; }
646 size_t a7_red3_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*8+4, dhi, flp)*dw + redx*4;   }
647 size_t a7_red4_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*4,   dhi, flp)*dw + redx*4+2; }
648 size_t a7_red5_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*4+2, dhi, flp)*dw + redx*2;   }
649 size_t a7_red6_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*2,   dhi, flp)*dw + redx*2+1; }
650 size_t a7_red7_to_dst(size_t redx, size_t redy, size_t dw, size_t dhi, bool flp) { return vf(redy*2+1, dhi, flp)*dw + redx;     }
651 
652 size_t vf(size_t dy, size_t dhi, bool flp) { return dy + (dhi - 2*dy) * flp; }
653 
654 size_t[7] a7_init_redw(in int w)
655 {
656     const size_t[7] redw = [(w + 7) / 8,
657                             (w + 3) / 8,
658                             (w + 3) / 4,
659                             (w + 1) / 4,
660                             (w + 1) / 2,
661                             (w + 0) / 2,
662                             (w + 0) / 1];
663     return redw;
664 }
665 
666 size_t[7] a7_init_redh(in int h)
667 {
668     const size_t[7] redh = [(h + 7) / 8,
669                             (h + 7) / 8,
670                             (h + 3) / 8,
671                             (h + 3) / 4,
672                             (h + 1) / 4,
673                             (h + 1) / 2,
674                             (h + 0) / 2];
675     return redh;
676 }
677 
678 uint load_u32be(in ubyte[4] s)
679 {
680     return (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
681 }
682 
683 ubyte[4] u32_to_be(in uint x)
684 {
685     return [cast(ubyte) (x >> 24), cast(ubyte) (x >> 16),
686             cast(ubyte) (x >> 8),  cast(ubyte) x];
687 }
688 
689 ubyte channels(in ubyte colortype)
690 {
691     switch (cast(CTYPE) colortype) {
692         case CTYPE.y: return 1;
693         case CTYPE.ya: return 2;
694         case CTYPE.rgb: return 3;
695         case CTYPE.rgba: return 4;
696         case CTYPE.idx: return 3;   // +1 if tRNS chunk present
697         default: return 0;
698     }
699 }
700 
701 ubyte write_png(Writer* wc, int w, int h, in ubyte[] buf, in int reqchans)
702 {
703     if (w < 1 || h < 1)
704         return ERROR.dim;
705     const uint schans = cast(uint) (buf.length / w / h);
706     if (schans < 1 || schans > 4 || schans * w * h != buf.length)
707         return ERROR.dim;
708     if (cast(uint) reqchans > 4)
709         return ERROR.unsupp;
710 
711     const uint tchans = cast(uint) reqchans ? reqchans : schans;
712     ubyte colortype;
713     switch (tchans) {
714         case 1: colortype = CTYPE.y; break;
715         case 2: colortype = CTYPE.ya; break;
716         case 3: colortype = CTYPE.rgb; break;
717         case 4: colortype = CTYPE.rgba; break;
718         default: assert(0);
719     }
720 
721     ubyte[13] head; // data part of IHDR chunk
722     head[0..4]   = u32_to_be(cast(uint) w);
723     head[4..8]   = u32_to_be(cast(uint) h);
724     head[8]      = 8; // bit depth
725     head[9]      = colortype;
726     head[10..13] = 0; // compression, filter and interlace methods
727 
728     CRC32 crc;
729     crc.put(cast(ubyte[]) "IHDR");
730     crc.put(head);
731 
732     write_block(wc, SIGNATURE);
733     write_block(wc, HEAD_CHUNK_SIG);
734     write_block(wc, head);
735     write_block(wc, crc.finish_be());
736 
737     if (wc.fail) return ERROR.stream;
738 
739     PNGEncoder ec = {
740         wc: wc,
741         w: w,
742         h: h,
743         schans: schans,
744         tchans: tchans,
745         buf: buf,
746     };
747 
748     ubyte e = write_idat(ec);
749     if (e) return e;
750 
751     static immutable ubyte[12] IEND =
752         [0, 0, 0, 0, 'I','E','N','D', 0xae, 0x42, 0x60, 0x82];
753     write_block(wc, IEND);
754 
755     return wc.fail ? ERROR.stream : e;
756 }
757 
758 struct PNGEncoder {
759     Writer*     wc;
760     size_t      w;
761     size_t      h;
762     uint        schans;
763     uint        tchans;
764     const(ubyte)[] buf;
765     CRC32       crc;
766     z_stream*   z;
767     ubyte[]     idatbuf;
768 }
769 
770 enum MAXIMUM_CHUNK_SIZE = 8192;
771 
772 ubyte write_idat(ref PNGEncoder ec)
773 {
774     // initialize zlib stream
775     z_stream z = { zalloc: null, zfree: null, opaque: null };
776     if (deflateInit(&z, Z_DEFAULT_COMPRESSION) != Z_OK)
777         return ERROR.zinit;
778     scope(exit)
779         deflateEnd(ec.z);
780     ec.z = &z;
781 
782     auto convert = cast(conv8) getconv(ec.schans, ec.tchans, 8);
783 
784     const size_t slinesz = ec.w * ec.schans;
785     const size_t tlinesz = ec.w * ec.tchans + 1;
786     const size_t filterstep = ec.tchans;
787     const size_t workbufsz = 3 * tlinesz + MAXIMUM_CHUNK_SIZE;
788 
789     ubyte e;
790     ubyte[] workbuf  = new_buffer(workbufsz, e);    if (e) return e;
791     ubyte[] cline    = workbuf[0 .. tlinesz];
792     ubyte[] pline    = workbuf[tlinesz .. 2 * tlinesz];
793     ubyte[] filtered = workbuf[2 * tlinesz .. 3 * tlinesz];
794     ec.idatbuf       = workbuf[$-MAXIMUM_CHUNK_SIZE .. $];
795     workbuf[0..$] = 0;
796     ec.z.avail_out = cast(uint) ec.idatbuf.length;
797     ec.z.next_out = ec.idatbuf.ptr;
798 
799     sete(0);
800 
801     const size_t sbufsz = ec.h * slinesz;
802     const ptrdiff_t sstride = slinesz * VERTICAL_ORIENTATION_WRITE;
803     size_t si = (ec.h - 1) * slinesz * (VERTICAL_ORIENTATION_WRITE == -1);
804 
805     for (; cast(size_t) si < sbufsz; si += sstride) {
806         convert(ec.buf[si .. si + slinesz], cline[1..$]);
807 
808         // these loops could be merged with some extra space...
809         foreach (i; 1 .. filterstep+1)
810             filtered[i] = cast(ubyte) (cline[i] - paeth(0, pline[i], 0));
811         foreach (i; filterstep+1 .. tlinesz)
812             filtered[i] = cast(ubyte)
813             (cline[i] - paeth(cline[i-filterstep], pline[i], pline[i-filterstep]));
814         filtered[0] = FILTER.paeth;
815 
816         compress(ec, filtered);
817         swap(cline, pline);
818     }
819 
820     while (!gete(e)) {  // flush zlib
821         int q = deflate(ec.z, Z_FINISH);
822         if (ec.idatbuf.length - ec.z.avail_out > 0)
823             flush_idat(ec);
824         if (q == Z_STREAM_END) break;
825         if (q == Z_OK) continue;    // not enough avail_out
826         sete(ERROR.zstream);
827     }
828 
829 finish:
830     _free(workbuf.ptr);
831     return e;
832 }
833 
834 void compress(ref PNGEncoder ec, in ubyte[] line)
835 {
836     ec.z.avail_in = cast(uint) line.length;
837     ec.z.next_in = line.ptr;
838     while (ec.z.avail_in) {
839         int q = deflate(ec.z, Z_NO_FLUSH);
840         if (q != Z_OK) return sete(ERROR.zstream);
841         if (ec.z.avail_out == 0)
842             flush_idat(ec);
843     }
844 }
845 
846 void flush_idat(ref PNGEncoder ec)      // writes an idat chunk
847 {
848     if (ec.wc.fail) return;
849     const uint len = cast(uint) (ec.idatbuf.length - ec.z.avail_out);
850     ec.crc.put(cast(const(ubyte)[]) "IDAT");
851     ec.crc.put(ec.idatbuf[0 .. len]);
852     write_block(ec.wc, u32_to_be(len));
853     write_block(ec.wc, cast(const(ubyte)[]) "IDAT");
854     write_block(ec.wc, ec.idatbuf[0 .. len]);
855     write_block(ec.wc, ec.crc.finish_be());
856     ec.z.next_out = ec.idatbuf.ptr;
857     ec.z.avail_out = cast(uint) ec.idatbuf.length;
858     if (ec.wc.fail) sete(ERROR.stream);
859 }
860 
861 struct CRC32 {
862     uint r = 0xffff_ffff;
863 
864     @nogc nothrow:
865 
866     void put(in ubyte[] data)
867     {
868         foreach (b; data) {
869             const int i = b ^ cast(ubyte) r;
870             r = (r >> 8) ^ CRC32TAB[i];
871         }
872     }
873 
874     ubyte[4] finish_be()
875     {
876         ubyte[4] result = u32_to_be(r ^ 0xffff_ffff);
877         r = 0xffff_ffff;
878         return result;
879     }
880 
881     static ubyte[4] of(in ubyte[] data)
882     {
883         CRC32 c;
884         c.put(data);
885         return c.finish_be();
886     }
887 }
888 
889 immutable uint[256] CRC32TAB = [
890     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
891     0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
892     0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
893     0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
894     0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
895     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
896     0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
897     0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
898     0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
899     0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
900     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
901     0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
902     0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
903     0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
904     0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
905     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
906     0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
907     0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
908     0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
909     0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
910     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
911     0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
912     0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
913     0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
914     0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
915     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
916     0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
917     0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
918     0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
919     0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
920     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
921     0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
922     0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
923     0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
924     0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
925     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
926     0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
927     0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
928     0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
929     0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
930     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
931     0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
932     0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
933     0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
934     0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
935     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
936     0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
937     0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
938     0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
939     0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
940     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
941     0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
942     0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
943     0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
944     0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
945     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
946     0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
947     0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
948     0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
949     0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
950     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
951     0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
952     0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
953     0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
954 ];