1 // Copyright 2019 Tero Hänninen. All rights reserved.
2 // SPDX-License-Identifier: BSD-2-Clause
3 module imagefmt.jpeg;
4 
5 import std.math : ceil;
6 import imagefmt;
7 
8 @nogc nothrow package:
9 
10 struct JPEGDecoder {
11     Reader* rc;
12 
13     ubyte[64][4] qtables;
14     HuffTab[2] ac_tables;
15     HuffTab[2] dc_tables;
16 
17     int bits_left;   // num of unused bits in cb
18     ubyte cb;  // current byte (next bit always at MSB)
19 
20     bool has_frame_header = false;
21     bool eoi_reached = false;
22 
23     bool correct_comp_ids;
24     ubyte[] compsbuf;
25     Component[3] comps;
26     ubyte num_comps;
27     int tchans;
28 
29     int width;
30     int height;
31     int hmax;
32     int vmax;
33     int num_mcu_x;
34     int num_mcu_y;
35 
36     ushort restart_interval;    // number of MCUs in restart interval
37 }
38 
39 // image component
40 struct Component {
41     size_t x;       // total num of samples, without fill samples
42     size_t y;       // total num of samples, without fill samples
43     ubyte[] data;   // reconstructed samples
44     int pred;       // dc prediction
45     ubyte sfx;      // sampling factor, aka. h
46     ubyte sfy;      // sampling factor, aka. v
47     ubyte qtable;
48     ubyte ac_table;
49     ubyte dc_table;
50 }
51 
52 struct HuffTab {
53     ubyte[256] values;
54     ubyte[257] sizes;
55     short[16] mincode;
56     short[16] maxcode;
57     short[16] valptr;
58 }
59 
60 enum MARKER : ubyte {
61     SOI = 0xd8,     // start of image
62     SOF0 = 0xc0,    // start of frame / baseline DCT
63     //SOF1 = 0xc1,    // start of frame / extended seq.
64     //SOF2 = 0xc2,    // start of frame / progressive DCT
65     SOF3 = 0xc3,    // start of frame / lossless
66     SOF9 = 0xc9,    // start of frame / extended seq., arithmetic
67     SOF11 = 0xcb,    // start of frame / lossless, arithmetic
68     DHT = 0xc4,     // define huffman tables
69     DQT = 0xdb,     // define quantization tables
70     DRI = 0xdd,     // define restart interval
71     SOS = 0xda,     // start of scan
72     DNL = 0xdc,     // define number of lines
73     RST0 = 0xd0,    // restart entropy coded data
74     // ...
75     RST7 = 0xd7,    // restart entropy coded data
76     APP0 = 0xe0,    // application 0 segment
77     // ...
78     APPf = 0xef,    // application f segment
79     //DAC = 0xcc,     // define arithmetic conditioning table
80     COM = 0xfe,     // comment
81     EOI = 0xd9,     // end of image
82 }
83 
84 bool detect_jpeg(Reader* rc)
85 {
86     auto info = read_jpeg_info(rc);
87     reset2start(rc);
88     return info.e == 0;
89 }
90 
91 IFInfo infoerror(ubyte e)
92 {
93     IFInfo info = { e: e };
94     return info;
95 }
96 
97 IFInfo read_jpeg_info(Reader* rc)
98 {
99     if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI)
100        return infoerror(rc.fail ? ERROR.stream : ERROR.data);
101 
102     while (true) {
103         if (read_u8(rc) != 0xff)
104             return infoerror(rc.fail ? ERROR.stream : ERROR.data);
105 
106         ubyte marker = read_u8(rc);
107         while (marker == 0xff && !rc.fail)
108             marker = read_u8(rc);
109 
110         if (rc.fail)
111             return infoerror(ERROR.stream);
112 
113         switch (marker) with (MARKER) {
114             case SOF0: .. case SOF3:
115             case SOF9: .. case SOF11:
116                 skip(rc, 3); // len + some byte
117                 IFInfo info;
118                 info.h = read_u16be(rc);
119                 info.w = read_u16be(rc);
120                 info.c = read_u8(rc);
121                 info.e = rc.fail ? ERROR.stream : 0;
122                 return info;
123             case SOS, EOI:
124                 return infoerror(ERROR.data);
125             case DRI, DHT, DQT, COM:
126             case APP0: .. case APPf:
127                 int len = read_u16be(rc) - 2;
128                 skip(rc, len);
129                 break;
130             default:
131                 return infoerror(ERROR.unsupp);
132         }
133     }
134     assert(0);
135 }
136 
137 ubyte read_jpeg(Reader* rc, out IFImage image, in int reqchans, in int reqbpc)
138 {
139     if (cast(uint) reqchans > 4)
140         return ERROR.arg;
141     const ubyte tbpc = cast(ubyte) (reqbpc ? reqbpc : 8);
142     if (tbpc != 8 && tbpc != 16)
143         return ERROR.unsupp;
144     if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI)
145        return rc.fail ? ERROR.stream : ERROR.data;
146     if (rc.fail)
147         return ERROR.stream;
148 
149     JPEGDecoder dc = { rc: rc };
150 
151     ubyte e = read_markers(dc);   // reads until first scan header or eoi
152 
153     if (e) return e;
154     if (dc.eoi_reached) return ERROR.data;
155 
156     dc.tchans = reqchans == 0 ? dc.num_comps : reqchans;
157 
158     if (cast(ulong) dc.width * dc.height * dc.tchans > MAXIMUM_IMAGE_SIZE)
159         return ERROR.bigimg;
160 
161     {
162         size_t[3] csizes;
163         size_t acc;
164         foreach (i, ref comp; dc.comps[0..dc.num_comps]) {
165             csizes[i] = dc.num_mcu_x * comp.sfx*8 * dc.num_mcu_y * comp.sfy*8;
166             acc += csizes[i];
167         }
168         dc.compsbuf = new_buffer(acc, e);
169         if (e) return e;
170         acc = 0;
171         foreach (i, ref comp; dc.comps[0..dc.num_comps]) {
172             comp.data = dc.compsbuf[acc .. acc + csizes[i]];
173             acc += csizes[i];
174         }
175     }
176     scope(exit)
177         _free(dc.compsbuf.ptr);
178 
179     // E.7 -- Multiple scans are for progressive images which are not supported
180     //while (!dc.eoi_reached) {
181         e = decode_scan(dc);    // E.2.3
182         //read_markers(dc);   // reads until next scan header or eoi
183     //}
184     if (e) return e;
185 
186     // throw away fill samples and convert to target format
187     ubyte[] buf = dc.reconstruct(e);
188     if (e) return e;
189 
190     if (VERTICAL_ORIENTATION_READ == -1)
191         e = flip_vertically(dc.width, dc.height, dc.tchans, buf);
192 
193     image.w   = dc.width;
194     image.h   = dc.height;
195     image.c   = cast(ubyte) dc.tchans;
196     image.cinfile = dc.num_comps;
197     image.bpc = tbpc;
198     if (tbpc == 8) {
199         image.buf8 = buf;
200     } else {
201         image.buf16 = bpc8to16(buf);
202         if (!image.buf16.ptr)
203             return ERROR.oom;
204     }
205     return 0;
206 }
207 
208 ubyte read_markers(ref JPEGDecoder dc)
209 {
210     bool has_next_scan_header = false;
211     while (!has_next_scan_header && !dc.eoi_reached) {
212         if (read_u8(dc.rc) != 0xff)
213             return dc.rc.fail ? ERROR.stream : ERROR.data;
214 
215         ubyte marker = read_u8(dc.rc);
216         while (marker == 0xff && !dc.rc.fail)
217             marker = read_u8(dc.rc);
218 
219         if (dc.rc.fail)
220             return ERROR.stream;
221 
222         ubyte e;
223         switch (marker) with (MARKER) {
224             case DHT:
225                 e = read_huffman_tables(dc);
226                 break;
227             case DQT:
228                 e = read_quantization_tables(dc);
229                 break;
230             case SOF0:
231                 if (dc.has_frame_header)
232                     return ERROR.data;
233                 e = read_frame_header(dc);
234                 dc.has_frame_header = true;
235                 break;
236             case SOS:
237                 if (!dc.has_frame_header)
238                     return ERROR.data;
239                 e = read_scan_header(dc);
240                 has_next_scan_header = true;
241                 break;
242             case DRI:
243                 if (read_u16be(dc.rc) != 4)  // len
244                     return dc.rc.fail ? ERROR.stream : ERROR.unsupp;
245                 dc.restart_interval = read_u16be(dc.rc);
246                 break;
247             case EOI:
248                 dc.eoi_reached = true;
249                 break;
250             case APP0: .. case APPf:
251             case COM:
252                 const int len = read_u16be(dc.rc) - 2;
253                 skip(dc.rc, len);
254                 break;
255             default:
256                 return ERROR.unsupp;
257         }
258         if (e)
259             return dc.rc.fail ? ERROR.stream : e;
260     }
261     return 0;
262 }
263 
264 // DHT -- define huffman tables
265 ubyte read_huffman_tables(ref JPEGDecoder dc)
266 {
267     ubyte[17] tmp;
268     int len = read_u16be(dc.rc) - 2;
269     if (dc.rc.fail) return ERROR.stream;
270 
271     ubyte e;
272     while (len > 0) {
273         read_block(dc.rc, tmp[0..17]);        // info byte & the BITS
274         if (dc.rc.fail) return ERROR.stream;
275         const ubyte tableslot  = tmp[0] & 0x0f; // must be 0 or 1 for baseline
276         const ubyte tableclass = tmp[0] >> 4;   // 0 = dc table, 1 = ac table
277         if (tableslot > 1 || tableclass > 1)
278             return ERROR.unsupp;
279 
280         // compute total number of huffman codes
281         int mt = 0;
282         foreach (i; 1..17)
283             mt += tmp[i];
284         if (256 < mt)
285             return ERROR.data;
286 
287         if (tableclass == 0) {
288             read_block(dc.rc, dc.dc_tables[tableslot].values[0..mt]);
289             derive_table(dc.dc_tables[tableslot], tmp[1..17], e);
290         } else {
291             read_block(dc.rc, dc.ac_tables[tableslot].values[0..mt]);
292             derive_table(dc.ac_tables[tableslot], tmp[1..17], e);
293         }
294 
295         len -= 17 + mt;
296     }
297     if (dc.rc.fail) return ERROR.stream;
298     return e;
299 }
300 
301 // num_values is the BITS
302 void derive_table(ref HuffTab table, in ref ubyte[16] num_values, ref ubyte e)
303 {
304     short[256] codes;
305 
306     int k = 0;
307     foreach (i; 0..16) {
308         foreach (j; 0..num_values[i]) {
309             if (k > table.sizes.length) {
310                 e = ERROR.data;
311                 return;
312             }
313             table.sizes[k] = cast(ubyte) (i + 1);
314             ++k;
315         }
316     }
317     table.sizes[k] = 0;
318 
319     k = 0;
320     short code = 0;
321     ubyte si = table.sizes[k];
322     while (true) {
323         do {
324             codes[k] = code;
325             ++code;
326             ++k;
327         } while (si == table.sizes[k]);
328 
329         if (table.sizes[k] == 0)
330             break;
331 
332         assert(si < table.sizes[k]);
333         do {
334             code <<= 1;
335             ++si;
336         } while (si != table.sizes[k]);
337     }
338 
339     derive_mincode_maxcode_valptr(
340         table.mincode, table.maxcode, table.valptr,
341         codes, num_values
342     );
343 }
344 
345 // F.15
346 void derive_mincode_maxcode_valptr(ref short[16] mincode, ref short[16] maxcode,
347      ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values)
348 {
349     mincode[] = -1;
350     maxcode[] = -1;
351     valptr[] = -1;
352 
353     int j = 0;
354     foreach (i; 0..16) {
355         if (num_values[i] != 0) {
356             valptr[i] = cast(short) j;
357             mincode[i] = codes[j];
358             j += num_values[i] - 1;
359             maxcode[i] = codes[j];
360             j += 1;
361         }
362     }
363 }
364 
365 // DQT -- define quantization tables
366 ubyte read_quantization_tables(ref JPEGDecoder dc)
367 {
368     int len = read_u16be(dc.rc);
369     if (len % 65 != 2)
370         return dc.rc.fail ? ERROR.stream : ERROR.data;
371     len -= 2;
372     while (len > 0) {
373         const ubyte info = read_u8(dc.rc);
374         const ubyte tableslot = info & 0x0f;
375         const ubyte precision = info >> 4;  // 0 = 8 bit, 1 = 16 bit
376         if (tableslot > 3 || precision != 0) // only 8 bit for baseline
377             return dc.rc.fail ? ERROR.stream : ERROR.unsupp;
378         read_block(dc.rc, dc.qtables[tableslot][0..64]);
379         len -= 1 + 64;
380     }
381     return dc.rc.fail ? ERROR.stream : 0;
382 }
383 
384 // SOF0 -- start of frame
385 ubyte read_frame_header(ref JPEGDecoder dc)
386 {
387     const int len = read_u16be(dc.rc);  // 8 + num_comps*3
388     const ubyte precision = read_u8(dc.rc);
389     dc.height = read_u16be(dc.rc);
390     dc.width = read_u16be(dc.rc);
391     dc.num_comps = read_u8(dc.rc);
392 
393     if ((dc.num_comps != 1 && dc.num_comps != 3)
394      || precision != 8 || len != 8 + dc.num_comps*3)
395         return ERROR.unsupp;
396 
397     dc.hmax = 0;
398     dc.vmax = 0;
399     int mcu_du = 0; // data units in one mcu
400     ubyte[9] tmp;
401     read_block(dc.rc, tmp[0..dc.num_comps*3]);
402     if (dc.rc.fail) return ERROR.stream;
403     foreach (i; 0..dc.num_comps) {
404         ubyte ci = tmp[i*3];
405         // JFIF says ci should be i+1, but there are images where ci is i. Normalize
406         // ids so that ci == i, always. So much for standards...
407         if (i == 0) { dc.correct_comp_ids = ci == i+1; }
408         if ((dc.correct_comp_ids && ci != i+1)
409         || (!dc.correct_comp_ids && ci != i))
410             return ERROR.data;
411 
412         Component* comp = &dc.comps[i];
413         const ubyte sampling_factors = tmp[i*3 + 1];
414         comp.sfx = sampling_factors >> 4;
415         comp.sfy = sampling_factors & 0xf;
416         comp.qtable = tmp[i*3 + 2];
417         if ( comp.sfy < 1 || 4 < comp.sfy ||
418              comp.sfx < 1 || 4 < comp.sfx ||
419              3 < comp.qtable )
420             return ERROR.unsupp;
421 
422         if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
423         if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
424 
425         mcu_du += comp.sfx * comp.sfy;
426     }
427     if (10 < mcu_du)
428         return ERROR.unsupp;
429 
430     assert(dc.hmax * dc.vmax);
431     foreach (i; 0..dc.num_comps) {
432         dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx /
433                     dc.hmax));
434         dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy /
435                     dc.vmax));
436     }
437 
438     size_t mcu_w = dc.hmax * 8;
439     size_t mcu_h = dc.vmax * 8;
440     dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w);
441     dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h);
442     return 0;
443 }
444 
445 // SOS -- start of scan
446 ubyte read_scan_header(ref JPEGDecoder dc)
447 {
448     const ushort len = read_u16be(dc.rc);
449     const ubyte num_scan_comps = read_u8(dc.rc);
450 
451     if ( num_scan_comps != dc.num_comps ||
452          len != (6+num_scan_comps*2) )
453         return dc.rc.fail ? ERROR.stream : ERROR.unsupp;
454 
455     ubyte[16] buf;
456     read_block(dc.rc, buf[0..len-3]);
457     if (dc.rc.fail) return ERROR.stream;
458 
459     foreach (i; 0..num_scan_comps) {
460         const uint ci = buf[i*2] - (dc.correct_comp_ids ? 1 : 0);
461         if (ci >= dc.num_comps)
462             return ERROR.data;
463 
464         const ubyte tables = buf[i*2 + 1];
465         dc.comps[ci].dc_table = tables >> 4;
466         dc.comps[ci].ac_table = tables & 0x0f;
467         if (dc.comps[ci].dc_table > 1 || dc.comps[ci].ac_table > 1)
468             return ERROR.unsupp;
469     }
470 
471     // ignore these
472     //ubyte spectral_start = buf[$-3];
473     //ubyte spectral_end = buf[$-2];
474     //ubyte approx = buf[$-1];
475     return 0;
476 }
477 
478 // E.2.3 and E.8 and E.9
479 ubyte decode_scan(ref JPEGDecoder dc)
480 {
481     int intervals, mcus;
482     if (dc.restart_interval > 0) {
483         int total_mcus = dc.num_mcu_x * dc.num_mcu_y;
484         intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval;
485         mcus = dc.restart_interval;
486     } else {
487         intervals = 1;
488         mcus = dc.num_mcu_x * dc.num_mcu_y;
489     }
490 
491     ubyte e;
492     foreach (mcu_j; 0 .. dc.num_mcu_y) {
493         foreach (mcu_i; 0 .. dc.num_mcu_x) {
494 
495             // decode mcu
496             foreach (c; 0..dc.num_comps) {
497                 auto comp = &dc.comps[c];
498                 foreach (du_j; 0 .. comp.sfy) {
499                     foreach (du_i; 0 .. comp.sfx) {
500                         // decode entropy, dequantize & dezigzag
501                         short[64] data;
502                         e = decode_block(dc, *comp, dc.qtables[comp.qtable], data);
503                         if (e) return e;
504                         // idct & level-shift
505                         int outx = (mcu_i * comp.sfx + du_i) * 8;
506                         int outy = (mcu_j * comp.sfy + du_j) * 8;
507                         int dst_stride = dc.num_mcu_x * comp.sfx*8;
508                         ubyte* dst = comp.data.ptr + outy*dst_stride + outx;
509                         stbi__idct_block(dst, dst_stride, data);
510                     }
511                 }
512             }
513 
514             --mcus;
515 
516             if (!mcus) {
517                 --intervals;
518                 if (!intervals)
519                     return e;
520 
521                 e = read_restart(dc.rc);    // RSTx marker
522 
523                 if (intervals == 1) {
524                     // last interval, may have fewer MCUs than defined by DRI
525                     mcus = (dc.num_mcu_y - mcu_j - 1)
526                          * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1;
527                 } else {
528                     mcus = dc.restart_interval;
529                 }
530 
531                 // reset decoder
532                 dc.cb = 0;
533                 dc.bits_left = 0;
534                 foreach (k; 0..dc.num_comps)
535                     dc.comps[k].pred = 0;
536             }
537 
538         }
539     }
540     return e;
541 }
542 
543 // RST0-RST7
544 ubyte read_restart(Reader* rc) {
545     ubyte a = read_u8(rc);
546     ubyte b = read_u8(rc);
547     if (rc.fail) return ERROR.stream;
548     if (a != 0xff || b < MARKER.RST0 || b > MARKER.RST7)
549         return ERROR.data;
550     return 0;
551     // the markers should cycle 0 through 7, could check that here...
552 }
553 
554 immutable ubyte[64] dezigzag = [
555      0,  1,  8, 16,  9,  2,  3, 10,
556     17, 24, 32, 25, 18, 11,  4,  5,
557     12, 19, 26, 33, 40, 48, 41, 34,
558     27, 20, 13,  6,  7, 14, 21, 28,
559     35, 42, 49, 56, 57, 50, 43, 36,
560     29, 22, 15, 23, 30, 37, 44, 51,
561     58, 59, 52, 45, 38, 31, 39, 46,
562     53, 60, 61, 54, 47, 55, 62, 63,
563 ];
564 
565 // decode entropy, dequantize & dezigzag (see section F.2)
566 ubyte decode_block(ref JPEGDecoder dc, ref Component comp, in ref ubyte[64] qtable,
567                                                               out short[64] result)
568 {
569     ubyte e;
570     const ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table], e);
571     if (e) return e;
572     const int diff = t ? dc.receive_and_extend(t, e) : 0;
573 
574     comp.pred = comp.pred + diff;
575     result[0] = cast(short) (comp.pred * qtable[0]);
576 
577     int k = 1;
578     do {
579         ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table], e);
580         if (e) return e;
581         ubyte rrrr = rs >> 4;
582         ubyte ssss = rs & 0xf;
583 
584         if (ssss == 0) {
585             if (rrrr != 0xf)
586                 break;      // end of block
587             k += 16;    // run length is 16
588             continue;
589         }
590 
591         k += rrrr;
592 
593         if (63 < k)
594             return ERROR.data;
595         result[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss, e) * qtable[k]);
596         k += 1;
597     } while (k < 64);
598 
599     return 0;
600 }
601 
602 int receive_and_extend(ref JPEGDecoder dc, in ubyte s, ref ubyte e)
603 {
604     // receive
605     int symbol = 0;
606     foreach (_; 0..s)
607         symbol = (symbol << 1) + nextbit(dc, e);
608     // extend
609     int vt = 1 << (s-1);
610     if (symbol < vt)
611         return symbol + (-1 << s) + 1;
612     return symbol;
613 }
614 
615 // F.16 -- the DECODE
616 ubyte decode_huff(ref JPEGDecoder dc, in ref HuffTab tab, ref ubyte e)
617 {
618     short code = nextbit(dc, e);
619 
620     int i = 0;
621     while (tab.maxcode[i] < code) {
622         code = cast(short) ((code << 1) + nextbit(dc, e));
623         i += 1;
624         if (i >= tab.maxcode.length) {
625             e = ERROR.data;
626             return 0;
627         }
628     }
629     const uint j = cast(uint) (tab.valptr[i] + code - tab.mincode[i]);
630     if (j >= tab.values.length) {
631         e = ERROR.data;
632         return 0;
633     }
634     return tab.values[j];
635 }
636 
637 // F.2.2.5 and F.18
638 ubyte nextbit(ref JPEGDecoder dc, ref ubyte e)
639 {
640     if (!dc.bits_left) {
641         dc.cb = read_u8(dc.rc);
642         dc.bits_left = 8;
643 
644         if (dc.cb == 0xff) {
645             if (read_u8(dc.rc) != 0x0) {
646                 e = dc.rc.fail ? ERROR.stream : ERROR.data; // unexpected marker
647                 return 0;
648             }
649         }
650         if (dc.rc.fail) e = ERROR.stream;
651     }
652 
653     ubyte r = dc.cb >> 7;
654     dc.cb <<= 1;
655     dc.bits_left -= 1;
656     return r;
657 }
658 
659 ubyte[] reconstruct(in ref JPEGDecoder dc, ref ubyte e)
660 {
661     ubyte[] result = new_buffer(dc.width * dc.height * dc.tchans, e);
662     if (e) return null;
663 
664     switch (dc.num_comps * 10 + dc.tchans) {
665         case 34, 33:
666             // Use specialized bilinear filtering functions for the frequent cases
667             // where Cb & Cr channels have half resolution.
668             if (dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2 &&
669                 dc.comps[0].sfx + dc.comps[0].sfy >= 3       &&
670                 dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 &&
671                 dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1)
672             {
673                 void function(in ubyte[], in ubyte[], ubyte[]) nothrow resample;
674                 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) {
675                     case 22: resample = &upsample_h2_v2; break;
676                     case 21: resample = &upsample_h2_v1; break;
677                     case 12: resample = &upsample_h1_v2; break;
678                     default: assert(0);
679                 }
680 
681                 ubyte[] comps1n2 = new_buffer(dc.width * 2, e);
682                 if (e) return null;
683                 scope(exit) _free(comps1n2.ptr);
684                 ubyte[] comp1 = comps1n2[0..dc.width];
685                 ubyte[] comp2 = comps1n2[dc.width..$];
686 
687                 size_t s = 0;
688                 size_t di = 0;
689                 foreach (j; 0 .. dc.height) {
690                     const size_t mi = j / dc.comps[0].sfy;
691                     const size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy)
692                                     ? mi : mi - 1 + s * 2;
693                     s = s ^ 1;
694 
695                     const size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8;
696                     const size_t cl0 = mi * cs;
697                     const size_t cl1 = si * cs;
698                     resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x],
699                              dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x],
700                              comp1[]);
701                     resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x],
702                              dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x],
703                              comp2[]);
704 
705                     foreach (i; 0 .. dc.width) {
706                         result[di .. di+3] = ycbcr_to_rgb(
707                             dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i],
708                             comp1[i],
709                             comp2[i],
710                         );
711                         if (dc.tchans == 4)
712                             result[di+3] = 255;
713                         di += dc.tchans;
714                     }
715                 }
716 
717                 return result;
718             }
719 
720             foreach (const ref comp; dc.comps[0..dc.num_comps]) {
721                 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax)
722                     return dc.upsample(result);
723             }
724 
725             size_t si, di;
726             foreach (j; 0 .. dc.height) {
727                 foreach (i; 0 .. dc.width) {
728                     result[di .. di+3] = ycbcr_to_rgb(
729                         dc.comps[0].data[si+i],
730                         dc.comps[1].data[si+i],
731                         dc.comps[2].data[si+i],
732                     );
733                     if (dc.tchans == 4)
734                         result[di+3] = 255;
735                     di += dc.tchans;
736                 }
737                 si += dc.num_mcu_x * dc.comps[0].sfx * 8;
738             }
739             return result;
740         case 32, 12, 31, 11:
741             const comp = &dc.comps[0];
742             if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
743                 size_t si, di;
744                 if (dc.tchans == 2) {
745                     foreach (j; 0 .. dc.height) {
746                         foreach (i; 0 .. dc.width) {
747                             result[di++] = comp.data[si+i];
748                             result[di++] = 255;
749                         }
750                         si += dc.num_mcu_x * comp.sfx * 8;
751                     }
752                 } else {
753                     foreach (j; 0 .. dc.height) {
754                         result[di .. di+dc.width] = comp.data[si .. si+dc.width];
755                         si += dc.num_mcu_x * comp.sfx * 8;
756                         di += dc.width;
757                     }
758                 }
759                 return result;
760             } else {
761                 // need to resample (haven't tested this...)
762                 return dc.upsample_luma(result);
763             }
764         case 14, 13:
765             const comp = &dc.comps[0];
766             size_t si, di;
767             foreach (j; 0 .. dc.height) {
768                 foreach (i; 0 .. dc.width) {
769                     result[di .. di+3] = comp.data[si+i];
770                     if (dc.tchans == 4)
771                         result[di+3] = 255;
772                     di += dc.tchans;
773                 }
774                 si += dc.num_mcu_x * comp.sfx * 8;
775             }
776             return result;
777         default:
778             assert(0);
779     }
780 }
781 
782 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result)
783 {
784     ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss)
785     {
786         return cast(ubyte) (( cast(uint) mm * 3 * 3
787                             + cast(uint) ms * 3 * 1
788                             + cast(uint) sm * 1 * 3
789                             + cast(uint) ss * 1 * 1
790                             + 8) / 16);
791     }
792 
793     result[0] = cast(ubyte) (( cast(uint) line0[0] * 3
794                              + cast(uint) line1[0] * 1
795                              + 2) / 4);
796     if (line0.length == 1)
797         return;
798     result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
799 
800     size_t di = 2;
801     foreach (i; 1 .. line0.length) {
802         result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
803         di += 1;
804         if (i == line0.length-1) {
805             if (di < result.length) {
806                 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3
807                                           + cast(uint) line1[i] * 1
808                                           + 2) / 4);
809             }
810             return;
811         }
812         result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
813         di += 1;
814     }
815 }
816 
817 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result)
818 {
819     result[0] = line0[0];
820     if (line0.length == 1)
821         return;
822     result[1] = cast(ubyte) (( cast(uint) line0[0] * 3
823                              + cast(uint) line0[1] * 1
824                              + 2) / 4);
825     size_t di = 2;
826     foreach (i; 1 .. line0.length) {
827         result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1
828                                   + cast(uint) line0[i+0] * 3
829                                   + 2) / 4);
830         di += 1;
831         if (i == line0.length-1) {
832             if (di < result.length) result[di] = line0[i];
833             return;
834         }
835         result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3
836                                   + cast(uint) line0[i+1] * 1
837                                   + 2) / 4);
838         di += 1;
839     }
840 }
841 
842 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result)
843 {
844     foreach (i; 0 .. result.length) {
845         result[i] = cast(ubyte) (( cast(uint) line0[i] * 3
846                                  + cast(uint) line1[i] * 1
847                                  + 2) / 4);
848     }
849 }
850 
851 // Nearest neighbor
852 ubyte[] upsample_luma(in ref JPEGDecoder dc, ubyte[] result)
853 {
854     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
855     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
856     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
857 
858     float y0 = y_step0 * 0.5;
859     size_t y0i = 0;
860 
861     size_t di;
862 
863     foreach (j; 0 .. dc.height) {
864         float x0 = x_step0 * 0.5;
865         size_t x0i = 0;
866         foreach (i; 0 .. dc.width) {
867             result[di] = dc.comps[0].data[y0i + x0i];
868             if (dc.tchans == 2)
869                 result[di+1] = 255;
870             di += dc.tchans;
871             x0 += x_step0;
872             if (x0 >= 1.0) {
873                 x0 -= 1.0;
874                 x0i += 1;
875             }
876         }
877         y0 += y_step0;
878         if (y0 >= 1.0) {
879             y0 -= 1.0;
880             y0i += stride0;
881         }
882     }
883     return result;
884 }
885 
886 // Nearest neighbor
887 ubyte[] upsample(in ref JPEGDecoder dc, ubyte[] result)
888 {
889     const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8;
890     const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8;
891     const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8;
892 
893     const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax;
894     const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax;
895     const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax;
896     const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax;
897     const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax;
898     const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax;
899 
900     float y0 = y_step0 * 0.5;
901     float y1 = y_step1 * 0.5;
902     float y2 = y_step2 * 0.5;
903     size_t y0i = 0;
904     size_t y1i = 0;
905     size_t y2i = 0;
906 
907     size_t di;
908 
909     foreach (_j; 0 .. dc.height) {
910         float x0 = x_step0 * 0.5;
911         float x1 = x_step1 * 0.5;
912         float x2 = x_step2 * 0.5;
913         size_t x0i = 0;
914         size_t x1i = 0;
915         size_t x2i = 0;
916         foreach (i; 0 .. dc.width) {
917             result[di .. di+3] = ycbcr_to_rgb(
918                 dc.comps[0].data[y0i + x0i],
919                 dc.comps[1].data[y1i + x1i],
920                 dc.comps[2].data[y2i + x2i],
921             );
922             if (dc.tchans == 4)
923                 result[di+3] = 255;
924             di += dc.tchans;
925             x0 += x_step0;
926             x1 += x_step1;
927             x2 += x_step2;
928             if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
929             if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; }
930             if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; }
931         }
932         y0 += y_step0;
933         y1 += y_step1;
934         y2 += y_step2;
935         if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
936         if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; }
937         if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; }
938     }
939     return result;
940 }
941 
942 ubyte[3] ycbcr_to_rgb(in ubyte y, in ubyte cb, in ubyte cr) pure {
943     ubyte[3] rgb = void;
944     rgb[0] = clamp(y + 1.402*(cr-128));
945     rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128));
946     rgb[2] = clamp(y + 1.772*(cb-128));
947     return rgb;
948 }
949 
950 ubyte clamp(in float x) pure {
951     if (x < 0) return 0;
952     if (255 < x) return 255;
953     return cast(ubyte) x;
954 }
955 
956 // ------------------------------------------------------------
957 // The IDCT stuff here (to the next dashed line) is copied and adapted from
958 // stb_image which is released under public domain.  Many thanks to stb_image
959 // author, Sean Barrett.
960 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
961 
962 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); }
963 pure int fsh(int x) { return x << 12; }
964 
965 // from stb_image, derived from jidctint -- DCT_ISLOW
966 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3,
967                         ref int x0, ref int x1, ref int x2, ref int x3,
968         int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
969 {
970    int p1,p2,p3,p4,p5;
971    //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3;
972    p2 = s2;
973    p3 = s6;
974    p1 = (p2+p3) * f2f(0.5411961f);
975    t2 = p1 + p3 * f2f(-1.847759065f);
976    t3 = p1 + p2 * f2f( 0.765366865f);
977    p2 = s0;
978    p3 = s4;
979    t0 = fsh(p2+p3);
980    t1 = fsh(p2-p3);
981    x0 = t0+t3;
982    x3 = t0-t3;
983    x1 = t1+t2;
984    x2 = t1-t2;
985    t0 = s7;
986    t1 = s5;
987    t2 = s3;
988    t3 = s1;
989    p3 = t0+t2;
990    p4 = t1+t3;
991    p1 = t0+t3;
992    p2 = t1+t2;
993    p5 = (p3+p4)*f2f( 1.175875602f);
994    t0 = t0*f2f( 0.298631336f);
995    t1 = t1*f2f( 2.053119869f);
996    t2 = t2*f2f( 3.072711026f);
997    t3 = t3*f2f( 1.501321110f);
998    p1 = p5 + p1*f2f(-0.899976223f);
999    p2 = p5 + p2*f2f(-2.562915447f);
1000    p3 = p3*f2f(-1.961570560f);
1001    p4 = p4*f2f(-0.390180644f);
1002    t3 += p1+p4;
1003    t2 += p2+p3;
1004    t1 += p2+p4;
1005    t0 += p1+p3;
1006 }
1007 
1008 // idct and level-shift
1009 pure void stbi__idct_block(ubyte* dst, in int dst_stride, in ref short[64] data)
1010 {
1011    int i;
1012    int[64] val;
1013    int* v = val.ptr;
1014    const(short)* d = data.ptr;
1015 
1016    // columns
1017    for (i=0; i < 8; ++i,++d, ++v) {
1018       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1019       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1020            && d[40]==0 && d[48]==0 && d[56]==0) {
1021          //    no shortcut                 0     seconds
1022          //    (1|2|3|4|5|6|7)==0          0     seconds
1023          //    all separate               -0.047 seconds
1024          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1025          int dcterm = d[0] << 2;
1026          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1027       } else {
1028          int t0,t1,t2,t3,x0,x1,x2,x3;
1029          STBI__IDCT_1D(
1030              t0, t1, t2, t3,
1031              x0, x1, x2, x3,
1032              d[ 0], d[ 8], d[16], d[24],
1033              d[32], d[40], d[48], d[56]
1034          );
1035          // constants scaled things up by 1<<12; let's bring them back
1036          // down, but keep 2 extra bits of precision
1037          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1038          v[ 0] = (x0+t3) >> 10;
1039          v[56] = (x0-t3) >> 10;
1040          v[ 8] = (x1+t2) >> 10;
1041          v[48] = (x1-t2) >> 10;
1042          v[16] = (x2+t1) >> 10;
1043          v[40] = (x2-t1) >> 10;
1044          v[24] = (x3+t0) >> 10;
1045          v[32] = (x3-t0) >> 10;
1046       }
1047    }
1048 
1049    ubyte* o = dst;
1050    for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) {
1051       // no fast case since the first 1D IDCT spread components out
1052       int t0,t1,t2,t3,x0,x1,x2,x3;
1053       STBI__IDCT_1D(
1054           t0, t1, t2, t3,
1055           x0, x1, x2, x3,
1056           v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
1057       );
1058       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1059       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1060       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1061       // so we want to round that, which means adding 0.5 * 1<<17,
1062       // aka 65536. Also, we'll end up with -128 to 127 that we want
1063       // to encode as 0-255 by adding 128, so we'll add that before the shift
1064       x0 += 65536 + (128<<17);
1065       x1 += 65536 + (128<<17);
1066       x2 += 65536 + (128<<17);
1067       x3 += 65536 + (128<<17);
1068       // tried computing the shifts into temps, or'ing the temps to see
1069       // if any were out of range, but that was slower
1070       o[0] = stbi__clamp((x0+t3) >> 17);
1071       o[7] = stbi__clamp((x0-t3) >> 17);
1072       o[1] = stbi__clamp((x1+t2) >> 17);
1073       o[6] = stbi__clamp((x1-t2) >> 17);
1074       o[2] = stbi__clamp((x2+t1) >> 17);
1075       o[5] = stbi__clamp((x2-t1) >> 17);
1076       o[3] = stbi__clamp((x3+t0) >> 17);
1077       o[4] = stbi__clamp((x3-t0) >> 17);
1078    }
1079 }
1080 
1081 // clamp to 0-255
1082 pure ubyte stbi__clamp(int x) {
1083    if (cast(uint) x > 255) {
1084       if (x < 0) return 0;
1085       if (x > 255) return 255;
1086    }
1087    return cast(ubyte) x;
1088 }
1089 
1090 // ------------------------------------------------------------