1 // Copyright 2019 Tero Hänninen. All rights reserved. 2 // SPDX-License-Identifier: BSD-2-Clause 3 module imagefmt.jpeg; 4 5 import std.math : ceil; 6 import imagefmt; 7 8 @nogc nothrow package: 9 10 struct JPEGDecoder { 11 Reader* rc; 12 13 ubyte[64][4] qtables; 14 HuffTab[2] ac_tables; 15 HuffTab[2] dc_tables; 16 17 int bits_left; // num of unused bits in cb 18 ubyte cb; // current byte (next bit always at MSB) 19 20 bool has_frame_header = false; 21 bool eoi_reached = false; 22 23 bool correct_comp_ids; 24 ubyte[] compsbuf; 25 Component[3] comps; 26 ubyte num_comps; 27 int tchans; 28 29 int width; 30 int height; 31 int hmax; 32 int vmax; 33 int num_mcu_x; 34 int num_mcu_y; 35 36 ushort restart_interval; // number of MCUs in restart interval 37 } 38 39 // image component 40 struct Component { 41 size_t x; // total num of samples, without fill samples 42 size_t y; // total num of samples, without fill samples 43 ubyte[] data; // reconstructed samples 44 int pred; // dc prediction 45 ubyte sfx; // sampling factor, aka. h 46 ubyte sfy; // sampling factor, aka. v 47 ubyte qtable; 48 ubyte ac_table; 49 ubyte dc_table; 50 } 51 52 struct HuffTab { 53 ubyte[256] values; 54 ubyte[257] sizes; 55 short[16] mincode; 56 short[16] maxcode; 57 short[16] valptr; 58 } 59 60 enum MARKER : ubyte { 61 SOI = 0xd8, // start of image 62 SOF0 = 0xc0, // start of frame / baseline DCT 63 //SOF1 = 0xc1, // start of frame / extended seq. 64 //SOF2 = 0xc2, // start of frame / progressive DCT 65 SOF3 = 0xc3, // start of frame / lossless 66 SOF9 = 0xc9, // start of frame / extended seq., arithmetic 67 SOF11 = 0xcb, // start of frame / lossless, arithmetic 68 DHT = 0xc4, // define huffman tables 69 DQT = 0xdb, // define quantization tables 70 DRI = 0xdd, // define restart interval 71 SOS = 0xda, // start of scan 72 DNL = 0xdc, // define number of lines 73 RST0 = 0xd0, // restart entropy coded data 74 // ... 75 RST7 = 0xd7, // restart entropy coded data 76 APP0 = 0xe0, // application 0 segment 77 // ... 78 APPf = 0xef, // application f segment 79 //DAC = 0xcc, // define arithmetic conditioning table 80 COM = 0xfe, // comment 81 EOI = 0xd9, // end of image 82 } 83 84 bool detect_jpeg(Reader* rc) 85 { 86 auto info = read_jpeg_info(rc); 87 reset2start(rc); 88 return info.e == 0; 89 } 90 91 IFInfo infoerror(ubyte e) 92 { 93 IFInfo info = { e: e }; 94 return info; 95 } 96 97 IFInfo read_jpeg_info(Reader* rc) 98 { 99 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 100 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 101 102 while (true) { 103 if (read_u8(rc) != 0xff) 104 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 105 106 ubyte marker = read_u8(rc); 107 while (marker == 0xff && !rc.fail) 108 marker = read_u8(rc); 109 110 if (rc.fail) 111 return infoerror(ERROR.stream); 112 113 switch (marker) with (MARKER) { 114 case SOF0: .. case SOF3: 115 case SOF9: .. case SOF11: 116 skip(rc, 3); // len + some byte 117 IFInfo info; 118 info.h = read_u16be(rc); 119 info.w = read_u16be(rc); 120 info.c = read_u8(rc); 121 info.e = rc.fail ? ERROR.stream : 0; 122 return info; 123 case SOS, EOI: 124 return infoerror(ERROR.data); 125 case DRI, DHT, DQT, COM: 126 case APP0: .. case APPf: 127 int len = read_u16be(rc) - 2; 128 skip(rc, len); 129 break; 130 default: 131 return infoerror(ERROR.unsupp); 132 } 133 } 134 assert(0); 135 } 136 137 ubyte read_jpeg(Reader* rc, out IFImage image, in int reqchans, in int reqbpc) 138 { 139 if (cast(uint) reqchans > 4) 140 return ERROR.arg; 141 const ubyte tbpc = cast(ubyte) (reqbpc ? reqbpc : 8); 142 if (tbpc != 8 && tbpc != 16) 143 return ERROR.unsupp; 144 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 145 return rc.fail ? ERROR.stream : ERROR.data; 146 if (rc.fail) 147 return ERROR.stream; 148 149 JPEGDecoder dc = { rc: rc }; 150 151 ubyte e = read_markers(dc); // reads until first scan header or eoi 152 153 if (e) return e; 154 if (dc.eoi_reached) return ERROR.data; 155 156 dc.tchans = reqchans == 0 ? dc.num_comps : reqchans; 157 158 if (cast(ulong) dc.width * dc.height * dc.tchans > MAXIMUM_IMAGE_SIZE) 159 return ERROR.bigimg; 160 161 { 162 size_t[3] csizes; 163 size_t acc; 164 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 165 csizes[i] = dc.num_mcu_x * comp.sfx*8 * dc.num_mcu_y * comp.sfy*8; 166 acc += csizes[i]; 167 } 168 dc.compsbuf = new_buffer(acc, e); 169 if (e) return e; 170 acc = 0; 171 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 172 comp.data = dc.compsbuf[acc .. acc + csizes[i]]; 173 acc += csizes[i]; 174 } 175 } 176 scope(exit) 177 _free(dc.compsbuf.ptr); 178 179 // E.7 -- Multiple scans are for progressive images which are not supported 180 //while (!dc.eoi_reached) { 181 e = decode_scan(dc); // E.2.3 182 //read_markers(dc); // reads until next scan header or eoi 183 //} 184 if (e) return e; 185 186 // throw away fill samples and convert to target format 187 ubyte[] buf = dc.reconstruct(e); 188 if (e) return e; 189 190 image.w = dc.width; 191 image.h = dc.height; 192 image.c = cast(ubyte) dc.tchans; 193 image.cinfile = dc.num_comps; 194 image.bpc = tbpc; 195 if (tbpc == 8) { 196 image.buf8 = buf; 197 } else { 198 image.buf16 = bpc8to16(buf); 199 if (!image.buf16.ptr) 200 return ERROR.oom; 201 } 202 return 0; 203 } 204 205 ubyte read_markers(ref JPEGDecoder dc) 206 { 207 bool has_next_scan_header = false; 208 while (!has_next_scan_header && !dc.eoi_reached) { 209 if (read_u8(dc.rc) != 0xff) 210 return dc.rc.fail ? ERROR.stream : ERROR.data; 211 212 ubyte marker = read_u8(dc.rc); 213 while (marker == 0xff && !dc.rc.fail) 214 marker = read_u8(dc.rc); 215 216 if (dc.rc.fail) 217 return ERROR.stream; 218 219 ubyte e; 220 switch (marker) with (MARKER) { 221 case DHT: 222 e = read_huffman_tables(dc); 223 break; 224 case DQT: 225 e = read_quantization_tables(dc); 226 break; 227 case SOF0: 228 if (dc.has_frame_header) 229 return ERROR.data; 230 e = read_frame_header(dc); 231 dc.has_frame_header = true; 232 break; 233 case SOS: 234 if (!dc.has_frame_header) 235 return ERROR.data; 236 e = read_scan_header(dc); 237 has_next_scan_header = true; 238 break; 239 case DRI: 240 if (read_u16be(dc.rc) != 4) // len 241 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 242 dc.restart_interval = read_u16be(dc.rc); 243 break; 244 case EOI: 245 dc.eoi_reached = true; 246 break; 247 case APP0: .. case APPf: 248 case COM: 249 const int len = read_u16be(dc.rc) - 2; 250 skip(dc.rc, len); 251 break; 252 default: 253 return ERROR.unsupp; 254 } 255 if (e) 256 return dc.rc.fail ? ERROR.stream : e; 257 } 258 return 0; 259 } 260 261 // DHT -- define huffman tables 262 ubyte read_huffman_tables(ref JPEGDecoder dc) 263 { 264 ubyte[17] tmp; 265 int len = read_u16be(dc.rc) - 2; 266 if (dc.rc.fail) return ERROR.stream; 267 268 ubyte e; 269 while (len > 0) { 270 read_block(dc.rc, tmp[0..17]); // info byte & the BITS 271 if (dc.rc.fail) return ERROR.stream; 272 const ubyte tableslot = tmp[0] & 0x0f; // must be 0 or 1 for baseline 273 const ubyte tableclass = tmp[0] >> 4; // 0 = dc table, 1 = ac table 274 if (tableslot > 1 || tableclass > 1) 275 return ERROR.unsupp; 276 277 // compute total number of huffman codes 278 int mt = 0; 279 foreach (i; 1..17) 280 mt += tmp[i]; 281 if (256 < mt) 282 return ERROR.data; 283 284 if (tableclass == 0) { 285 read_block(dc.rc, dc.dc_tables[tableslot].values[0..mt]); 286 derive_table(dc.dc_tables[tableslot], tmp[1..17], e); 287 } else { 288 read_block(dc.rc, dc.ac_tables[tableslot].values[0..mt]); 289 derive_table(dc.ac_tables[tableslot], tmp[1..17], e); 290 } 291 292 len -= 17 + mt; 293 } 294 if (dc.rc.fail) return ERROR.stream; 295 return e; 296 } 297 298 // num_values is the BITS 299 void derive_table(ref HuffTab table, in ref ubyte[16] num_values, ref ubyte e) 300 { 301 short[256] codes; 302 303 int k = 0; 304 foreach (i; 0..16) { 305 foreach (j; 0..num_values[i]) { 306 if (k > table.sizes.length) { 307 e = ERROR.data; 308 return; 309 } 310 table.sizes[k] = cast(ubyte) (i + 1); 311 ++k; 312 } 313 } 314 table.sizes[k] = 0; 315 316 k = 0; 317 short code = 0; 318 ubyte si = table.sizes[k]; 319 while (true) { 320 do { 321 codes[k] = code; 322 ++code; 323 ++k; 324 } while (si == table.sizes[k]); 325 326 if (table.sizes[k] == 0) 327 break; 328 329 assert(si < table.sizes[k]); 330 do { 331 code <<= 1; 332 ++si; 333 } while (si != table.sizes[k]); 334 } 335 336 derive_mincode_maxcode_valptr( 337 table.mincode, table.maxcode, table.valptr, 338 codes, num_values 339 ); 340 } 341 342 // F.15 343 void derive_mincode_maxcode_valptr(ref short[16] mincode, ref short[16] maxcode, 344 ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) 345 { 346 mincode[] = -1; 347 maxcode[] = -1; 348 valptr[] = -1; 349 350 int j = 0; 351 foreach (i; 0..16) { 352 if (num_values[i] != 0) { 353 valptr[i] = cast(short) j; 354 mincode[i] = codes[j]; 355 j += num_values[i] - 1; 356 maxcode[i] = codes[j]; 357 j += 1; 358 } 359 } 360 } 361 362 // DQT -- define quantization tables 363 ubyte read_quantization_tables(ref JPEGDecoder dc) 364 { 365 int len = read_u16be(dc.rc); 366 if (len % 65 != 2) 367 return dc.rc.fail ? ERROR.stream : ERROR.data; 368 len -= 2; 369 while (len > 0) { 370 const ubyte info = read_u8(dc.rc); 371 const ubyte tableslot = info & 0x0f; 372 const ubyte precision = info >> 4; // 0 = 8 bit, 1 = 16 bit 373 if (tableslot > 3 || precision != 0) // only 8 bit for baseline 374 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 375 read_block(dc.rc, dc.qtables[tableslot][0..64]); 376 len -= 1 + 64; 377 } 378 return dc.rc.fail ? ERROR.stream : 0; 379 } 380 381 // SOF0 -- start of frame 382 ubyte read_frame_header(ref JPEGDecoder dc) 383 { 384 const int len = read_u16be(dc.rc); // 8 + num_comps*3 385 const ubyte precision = read_u8(dc.rc); 386 dc.height = read_u16be(dc.rc); 387 dc.width = read_u16be(dc.rc); 388 dc.num_comps = read_u8(dc.rc); 389 390 if ((dc.num_comps != 1 && dc.num_comps != 3) 391 || precision != 8 || len != 8 + dc.num_comps*3) 392 return ERROR.unsupp; 393 394 dc.hmax = 0; 395 dc.vmax = 0; 396 int mcu_du = 0; // data units in one mcu 397 ubyte[9] tmp; 398 read_block(dc.rc, tmp[0..dc.num_comps*3]); 399 if (dc.rc.fail) return ERROR.stream; 400 foreach (i; 0..dc.num_comps) { 401 ubyte ci = tmp[i*3]; 402 // JFIF says ci should be i+1, but there are images where ci is i. Normalize 403 // ids so that ci == i, always. So much for standards... 404 if (i == 0) { dc.correct_comp_ids = ci == i+1; } 405 if ((dc.correct_comp_ids && ci != i+1) 406 || (!dc.correct_comp_ids && ci != i)) 407 return ERROR.data; 408 409 Component* comp = &dc.comps[i]; 410 const ubyte sampling_factors = tmp[i*3 + 1]; 411 comp.sfx = sampling_factors >> 4; 412 comp.sfy = sampling_factors & 0xf; 413 comp.qtable = tmp[i*3 + 2]; 414 if ( comp.sfy < 1 || 4 < comp.sfy || 415 comp.sfx < 1 || 4 < comp.sfx || 416 3 < comp.qtable ) 417 return ERROR.unsupp; 418 419 if (dc.hmax < comp.sfx) dc.hmax = comp.sfx; 420 if (dc.vmax < comp.sfy) dc.vmax = comp.sfy; 421 422 mcu_du += comp.sfx * comp.sfy; 423 } 424 if (10 < mcu_du) 425 return ERROR.unsupp; 426 427 assert(dc.hmax * dc.vmax); 428 foreach (i; 0..dc.num_comps) { 429 dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / 430 dc.hmax)); 431 dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / 432 dc.vmax)); 433 } 434 435 size_t mcu_w = dc.hmax * 8; 436 size_t mcu_h = dc.vmax * 8; 437 dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w); 438 dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h); 439 return 0; 440 } 441 442 // SOS -- start of scan 443 ubyte read_scan_header(ref JPEGDecoder dc) 444 { 445 const ushort len = read_u16be(dc.rc); 446 const ubyte num_scan_comps = read_u8(dc.rc); 447 448 if ( num_scan_comps != dc.num_comps || 449 len != (6+num_scan_comps*2) ) 450 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 451 452 ubyte[16] buf; 453 ubyte e; 454 read_block(dc.rc, buf[0..len-3]); 455 if (dc.rc.fail) return ERROR.stream; 456 457 foreach (i; 0..num_scan_comps) { 458 const uint ci = buf[i*2] - (dc.correct_comp_ids ? 1 : 0); 459 if (ci >= dc.num_comps) 460 return ERROR.data; 461 462 const ubyte tables = buf[i*2 + 1]; 463 dc.comps[ci].dc_table = tables >> 4; 464 dc.comps[ci].ac_table = tables & 0x0f; 465 if (dc.comps[ci].dc_table > 1 || dc.comps[ci].ac_table > 1) 466 return ERROR.unsupp; 467 } 468 469 // ignore these 470 //ubyte spectral_start = buf[$-3]; 471 //ubyte spectral_end = buf[$-2]; 472 //ubyte approx = buf[$-1]; 473 return 0; 474 } 475 476 // E.2.3 and E.8 and E.9 477 ubyte decode_scan(ref JPEGDecoder dc) 478 { 479 int intervals, mcus; 480 if (dc.restart_interval > 0) { 481 int total_mcus = dc.num_mcu_x * dc.num_mcu_y; 482 intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval; 483 mcus = dc.restart_interval; 484 } else { 485 intervals = 1; 486 mcus = dc.num_mcu_x * dc.num_mcu_y; 487 } 488 489 ubyte e; 490 foreach (mcu_j; 0 .. dc.num_mcu_y) { 491 foreach (mcu_i; 0 .. dc.num_mcu_x) { 492 493 // decode mcu 494 foreach (c; 0..dc.num_comps) { 495 auto comp = &dc.comps[c]; 496 foreach (du_j; 0 .. comp.sfy) { 497 foreach (du_i; 0 .. comp.sfx) { 498 // decode entropy, dequantize & dezigzag 499 short[64] data; 500 e = decode_block(dc, *comp, dc.qtables[comp.qtable], data); 501 if (e) return e; 502 // idct & level-shift 503 int outx = (mcu_i * comp.sfx + du_i) * 8; 504 int outy = (mcu_j * comp.sfy + du_j) * 8; 505 int dst_stride = dc.num_mcu_x * comp.sfx*8; 506 ubyte* dst = comp.data.ptr + outy*dst_stride + outx; 507 stbi__idct_block(dst, dst_stride, data); 508 } 509 } 510 } 511 512 --mcus; 513 514 if (!mcus) { 515 --intervals; 516 if (!intervals) 517 return e; 518 519 e = read_restart(dc.rc); // RSTx marker 520 521 if (intervals == 1) { 522 // last interval, may have fewer MCUs than defined by DRI 523 mcus = (dc.num_mcu_y - mcu_j - 1) 524 * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1; 525 } else { 526 mcus = dc.restart_interval; 527 } 528 529 // reset decoder 530 dc.cb = 0; 531 dc.bits_left = 0; 532 foreach (k; 0..dc.num_comps) 533 dc.comps[k].pred = 0; 534 } 535 536 } 537 } 538 return e; 539 } 540 541 // RST0-RST7 542 ubyte read_restart(Reader* rc) { 543 ubyte a = read_u8(rc); 544 ubyte b = read_u8(rc); 545 if (rc.fail) return ERROR.stream; 546 if (a != 0xff || b < MARKER.RST0 || b > MARKER.RST7) 547 return ERROR.data; 548 return 0; 549 // the markers should cycle 0 through 7, could check that here... 550 } 551 552 immutable ubyte[64] dezigzag = [ 553 0, 1, 8, 16, 9, 2, 3, 10, 554 17, 24, 32, 25, 18, 11, 4, 5, 555 12, 19, 26, 33, 40, 48, 41, 34, 556 27, 20, 13, 6, 7, 14, 21, 28, 557 35, 42, 49, 56, 57, 50, 43, 36, 558 29, 22, 15, 23, 30, 37, 44, 51, 559 58, 59, 52, 45, 38, 31, 39, 46, 560 53, 60, 61, 54, 47, 55, 62, 63, 561 ]; 562 563 // decode entropy, dequantize & dezigzag (see section F.2) 564 ubyte decode_block(ref JPEGDecoder dc, ref Component comp, in ref ubyte[64] qtable, 565 out short[64] result) 566 { 567 ubyte e; 568 const ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table], e); 569 if (e) return e; 570 const int diff = t ? dc.receive_and_extend(t, e) : 0; 571 572 comp.pred = comp.pred + diff; 573 result[0] = cast(short) (comp.pred * qtable[0]); 574 575 int k = 1; 576 do { 577 ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table], e); 578 if (e) return e; 579 ubyte rrrr = rs >> 4; 580 ubyte ssss = rs & 0xf; 581 582 if (ssss == 0) { 583 if (rrrr != 0xf) 584 break; // end of block 585 k += 16; // run length is 16 586 continue; 587 } 588 589 k += rrrr; 590 591 if (63 < k) 592 return ERROR.data; 593 result[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss, e) * qtable[k]); 594 k += 1; 595 } while (k < 64); 596 597 return 0; 598 } 599 600 int receive_and_extend(ref JPEGDecoder dc, in ubyte s, ref ubyte e) 601 { 602 // receive 603 int symbol = 0; 604 foreach (_; 0..s) 605 symbol = (symbol << 1) + nextbit(dc, e); 606 // extend 607 int vt = 1 << (s-1); 608 if (symbol < vt) 609 return symbol + (-1 << s) + 1; 610 return symbol; 611 } 612 613 // F.16 -- the DECODE 614 ubyte decode_huff(ref JPEGDecoder dc, in ref HuffTab tab, ref ubyte e) 615 { 616 short code = nextbit(dc, e); 617 618 int i = 0; 619 while (tab.maxcode[i] < code) { 620 code = cast(short) ((code << 1) + nextbit(dc, e)); 621 i += 1; 622 if (i >= tab.maxcode.length) { 623 e = ERROR.data; 624 return 0; 625 } 626 } 627 const uint j = cast(uint) (tab.valptr[i] + code - tab.mincode[i]); 628 if (j >= tab.values.length) { 629 e = ERROR.data; 630 return 0; 631 } 632 return tab.values[j]; 633 } 634 635 // F.2.2.5 and F.18 636 ubyte nextbit(ref JPEGDecoder dc, ref ubyte e) 637 { 638 if (!dc.bits_left) { 639 dc.cb = read_u8(dc.rc); 640 dc.bits_left = 8; 641 642 if (dc.cb == 0xff) { 643 if (read_u8(dc.rc) != 0x0) { 644 e = dc.rc.fail ? ERROR.stream : ERROR.data; // unexpected marker 645 return 0; 646 } 647 } 648 if (dc.rc.fail) e = ERROR.stream; 649 } 650 651 ubyte r = dc.cb >> 7; 652 dc.cb <<= 1; 653 dc.bits_left -= 1; 654 return r; 655 } 656 657 ubyte[] reconstruct(in ref JPEGDecoder dc, ref ubyte e) 658 { 659 ubyte[] result = new_buffer(dc.width * dc.height * dc.tchans, e); 660 if (e) return null; 661 662 switch (dc.num_comps * 10 + dc.tchans) { 663 case 34, 33: 664 // Use specialized bilinear filtering functions for the frequent cases where 665 // Cb & Cr channels have half resolution. 666 if (dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2 && 667 dc.comps[0].sfx + dc.comps[0].sfy >= 3 && 668 dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 && 669 dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) 670 { 671 void function(in ubyte[], in ubyte[], ubyte[]) nothrow resample; 672 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) { 673 case 22: resample = &upsample_h2_v2; break; 674 case 21: resample = &upsample_h2_v1; break; 675 case 12: resample = &upsample_h1_v2; break; 676 default: assert(0); 677 } 678 679 ubyte[] comps1n2 = new_buffer(dc.width * 2, e); 680 if (e) return null; 681 scope(exit) _free(comps1n2.ptr); 682 ubyte[] comp1 = comps1n2[0..dc.width]; 683 ubyte[] comp2 = comps1n2[dc.width..$]; 684 685 size_t s = 0; 686 size_t di = 0; 687 foreach (j; 0 .. dc.height) { 688 const size_t mi = j / dc.comps[0].sfy; 689 const size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy) 690 ? mi : mi - 1 + s * 2; 691 s = s ^ 1; 692 693 const size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8; 694 const size_t cl0 = mi * cs; 695 const size_t cl1 = si * cs; 696 resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x], 697 dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x], 698 comp1[]); 699 resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x], 700 dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x], 701 comp2[]); 702 703 foreach (i; 0 .. dc.width) { 704 result[di .. di+3] = ycbcr_to_rgb( 705 dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i], 706 comp1[i], 707 comp2[i], 708 ); 709 if (dc.tchans == 4) 710 result[di+3] = 255; 711 di += dc.tchans; 712 } 713 } 714 715 return result; 716 } 717 718 foreach (const ref comp; dc.comps[0..dc.num_comps]) { 719 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax) 720 return dc.upsample(result); 721 } 722 723 size_t si, di; 724 foreach (j; 0 .. dc.height) { 725 foreach (i; 0 .. dc.width) { 726 result[di .. di+3] = ycbcr_to_rgb( 727 dc.comps[0].data[si+i], 728 dc.comps[1].data[si+i], 729 dc.comps[2].data[si+i], 730 ); 731 if (dc.tchans == 4) 732 result[di+3] = 255; 733 di += dc.tchans; 734 } 735 si += dc.num_mcu_x * dc.comps[0].sfx * 8; 736 } 737 return result; 738 case 32, 12, 31, 11: 739 const comp = &dc.comps[0]; 740 if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) { 741 size_t si, di; 742 if (dc.tchans == 2) { 743 foreach (j; 0 .. dc.height) { 744 foreach (i; 0 .. dc.width) { 745 result[di++] = comp.data[si+i]; 746 result[di++] = 255; 747 } 748 si += dc.num_mcu_x * comp.sfx * 8; 749 } 750 } else { 751 foreach (j; 0 .. dc.height) { 752 result[di .. di+dc.width] = comp.data[si .. si+dc.width]; 753 si += dc.num_mcu_x * comp.sfx * 8; 754 di += dc.width; 755 } 756 } 757 return result; 758 } else { 759 // need to resample (haven't tested this...) 760 return dc.upsample_luma(result); 761 } 762 case 14, 13: 763 const comp = &dc.comps[0]; 764 size_t si, di; 765 foreach (j; 0 .. dc.height) { 766 foreach (i; 0 .. dc.width) { 767 result[di .. di+3] = comp.data[si+i]; 768 if (dc.tchans == 4) 769 result[di+3] = 255; 770 di += dc.tchans; 771 } 772 si += dc.num_mcu_x * comp.sfx * 8; 773 } 774 return result; 775 default: 776 assert(0); 777 } 778 } 779 780 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 781 { 782 ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) 783 { 784 return cast(ubyte) (( cast(uint) mm * 3 * 3 785 + cast(uint) ms * 3 * 1 786 + cast(uint) sm * 1 * 3 787 + cast(uint) ss * 1 * 1 788 + 8) / 16); 789 } 790 791 result[0] = cast(ubyte) (( cast(uint) line0[0] * 3 792 + cast(uint) line1[0] * 1 793 + 2) / 4); 794 if (line0.length == 1) 795 return; 796 result[1] = mix(line0[0], line0[1], line1[0], line1[1]); 797 798 size_t di = 2; 799 foreach (i; 1 .. line0.length) { 800 result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]); 801 di += 1; 802 if (i == line0.length-1) { 803 if (di < result.length) { 804 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3 805 + cast(uint) line1[i] * 1 806 + 2) / 4); 807 } 808 return; 809 } 810 result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]); 811 di += 1; 812 } 813 } 814 815 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) 816 { 817 result[0] = line0[0]; 818 if (line0.length == 1) 819 return; 820 result[1] = cast(ubyte) (( cast(uint) line0[0] * 3 821 + cast(uint) line0[1] * 1 822 + 2) / 4); 823 size_t di = 2; 824 foreach (i; 1 .. line0.length) { 825 result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1 826 + cast(uint) line0[i+0] * 3 827 + 2) / 4); 828 di += 1; 829 if (i == line0.length-1) { 830 if (di < result.length) result[di] = line0[i]; 831 return; 832 } 833 result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3 834 + cast(uint) line0[i+1] * 1 835 + 2) / 4); 836 di += 1; 837 } 838 } 839 840 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 841 { 842 foreach (i; 0 .. result.length) { 843 result[i] = cast(ubyte) (( cast(uint) line0[i] * 3 844 + cast(uint) line1[i] * 1 845 + 2) / 4); 846 } 847 } 848 849 // Nearest neighbor 850 ubyte[] upsample_luma(in ref JPEGDecoder dc, ubyte[] result) 851 { 852 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 853 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 854 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 855 856 float y0 = y_step0 * 0.5; 857 size_t y0i = 0; 858 859 size_t di; 860 861 foreach (j; 0 .. dc.height) { 862 float x0 = x_step0 * 0.5; 863 size_t x0i = 0; 864 foreach (i; 0 .. dc.width) { 865 result[di] = dc.comps[0].data[y0i + x0i]; 866 if (dc.tchans == 2) 867 result[di+1] = 255; 868 di += dc.tchans; 869 x0 += x_step0; 870 if (x0 >= 1.0) { 871 x0 -= 1.0; 872 x0i += 1; 873 } 874 } 875 y0 += y_step0; 876 if (y0 >= 1.0) { 877 y0 -= 1.0; 878 y0i += stride0; 879 } 880 } 881 return result; 882 } 883 884 // Nearest neighbor 885 ubyte[] upsample(in ref JPEGDecoder dc, ubyte[] result) 886 { 887 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 888 const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8; 889 const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8; 890 891 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 892 const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax; 893 const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax; 894 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 895 const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax; 896 const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax; 897 898 float y0 = y_step0 * 0.5; 899 float y1 = y_step1 * 0.5; 900 float y2 = y_step2 * 0.5; 901 size_t y0i = 0; 902 size_t y1i = 0; 903 size_t y2i = 0; 904 905 size_t di; 906 907 foreach (_j; 0 .. dc.height) { 908 float x0 = x_step0 * 0.5; 909 float x1 = x_step1 * 0.5; 910 float x2 = x_step2 * 0.5; 911 size_t x0i = 0; 912 size_t x1i = 0; 913 size_t x2i = 0; 914 foreach (i; 0 .. dc.width) { 915 result[di .. di+3] = ycbcr_to_rgb( 916 dc.comps[0].data[y0i + x0i], 917 dc.comps[1].data[y1i + x1i], 918 dc.comps[2].data[y2i + x2i], 919 ); 920 if (dc.tchans == 4) 921 result[di+3] = 255; 922 di += dc.tchans; 923 x0 += x_step0; 924 x1 += x_step1; 925 x2 += x_step2; 926 if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; } 927 if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; } 928 if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; } 929 } 930 y0 += y_step0; 931 y1 += y_step1; 932 y2 += y_step2; 933 if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; } 934 if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; } 935 if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; } 936 } 937 return result; 938 } 939 940 ubyte[3] ycbcr_to_rgb(in ubyte y, in ubyte cb, in ubyte cr) pure { 941 ubyte[3] rgb = void; 942 rgb[0] = clamp(y + 1.402*(cr-128)); 943 rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128)); 944 rgb[2] = clamp(y + 1.772*(cb-128)); 945 return rgb; 946 } 947 948 ubyte clamp(in float x) pure { 949 if (x < 0) return 0; 950 if (255 < x) return 255; 951 return cast(ubyte) x; 952 } 953 954 // ------------------------------------------------------------ 955 // The IDCT stuff here (to the next dashed line) is copied and adapted from 956 // stb_image which is released under public domain. Many thanks to stb_image 957 // author, Sean Barrett. 958 // Link: https://github.com/nothings/stb/blob/master/stb_image.h 959 960 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); } 961 pure int fsh(int x) { return x << 12; } 962 963 // from stb_image, derived from jidctint -- DCT_ISLOW 964 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3, 965 ref int x0, ref int x1, ref int x2, ref int x3, 966 int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7) 967 { 968 int p1,p2,p3,p4,p5; 969 //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; 970 p2 = s2; 971 p3 = s6; 972 p1 = (p2+p3) * f2f(0.5411961f); 973 t2 = p1 + p3 * f2f(-1.847759065f); 974 t3 = p1 + p2 * f2f( 0.765366865f); 975 p2 = s0; 976 p3 = s4; 977 t0 = fsh(p2+p3); 978 t1 = fsh(p2-p3); 979 x0 = t0+t3; 980 x3 = t0-t3; 981 x1 = t1+t2; 982 x2 = t1-t2; 983 t0 = s7; 984 t1 = s5; 985 t2 = s3; 986 t3 = s1; 987 p3 = t0+t2; 988 p4 = t1+t3; 989 p1 = t0+t3; 990 p2 = t1+t2; 991 p5 = (p3+p4)*f2f( 1.175875602f); 992 t0 = t0*f2f( 0.298631336f); 993 t1 = t1*f2f( 2.053119869f); 994 t2 = t2*f2f( 3.072711026f); 995 t3 = t3*f2f( 1.501321110f); 996 p1 = p5 + p1*f2f(-0.899976223f); 997 p2 = p5 + p2*f2f(-2.562915447f); 998 p3 = p3*f2f(-1.961570560f); 999 p4 = p4*f2f(-0.390180644f); 1000 t3 += p1+p4; 1001 t2 += p2+p3; 1002 t1 += p2+p4; 1003 t0 += p1+p3; 1004 } 1005 1006 // idct and level-shift 1007 pure void stbi__idct_block(ubyte* dst, in int dst_stride, in ref short[64] data) 1008 { 1009 int i; 1010 int[64] val; 1011 int* v = val.ptr; 1012 const(short)* d = data.ptr; 1013 1014 // columns 1015 for (i=0; i < 8; ++i,++d, ++v) { 1016 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 1017 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 1018 && d[40]==0 && d[48]==0 && d[56]==0) { 1019 // no shortcut 0 seconds 1020 // (1|2|3|4|5|6|7)==0 0 seconds 1021 // all separate -0.047 seconds 1022 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 1023 int dcterm = d[0] << 2; 1024 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 1025 } else { 1026 int t0,t1,t2,t3,x0,x1,x2,x3; 1027 STBI__IDCT_1D( 1028 t0, t1, t2, t3, 1029 x0, x1, x2, x3, 1030 d[ 0], d[ 8], d[16], d[24], 1031 d[32], d[40], d[48], d[56] 1032 ); 1033 // constants scaled things up by 1<<12; let's bring them back 1034 // down, but keep 2 extra bits of precision 1035 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 1036 v[ 0] = (x0+t3) >> 10; 1037 v[56] = (x0-t3) >> 10; 1038 v[ 8] = (x1+t2) >> 10; 1039 v[48] = (x1-t2) >> 10; 1040 v[16] = (x2+t1) >> 10; 1041 v[40] = (x2-t1) >> 10; 1042 v[24] = (x3+t0) >> 10; 1043 v[32] = (x3-t0) >> 10; 1044 } 1045 } 1046 1047 ubyte* o = dst; 1048 for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) { 1049 // no fast case since the first 1D IDCT spread components out 1050 int t0,t1,t2,t3,x0,x1,x2,x3; 1051 STBI__IDCT_1D( 1052 t0, t1, t2, t3, 1053 x0, x1, x2, x3, 1054 v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] 1055 ); 1056 // constants scaled things up by 1<<12, plus we had 1<<2 from first 1057 // loop, plus horizontal and vertical each scale by sqrt(8) so together 1058 // we've got an extra 1<<3, so 1<<17 total we need to remove. 1059 // so we want to round that, which means adding 0.5 * 1<<17, 1060 // aka 65536. Also, we'll end up with -128 to 127 that we want 1061 // to encode as 0-255 by adding 128, so we'll add that before the shift 1062 x0 += 65536 + (128<<17); 1063 x1 += 65536 + (128<<17); 1064 x2 += 65536 + (128<<17); 1065 x3 += 65536 + (128<<17); 1066 // tried computing the shifts into temps, or'ing the temps to see 1067 // if any were out of range, but that was slower 1068 o[0] = stbi__clamp((x0+t3) >> 17); 1069 o[7] = stbi__clamp((x0-t3) >> 17); 1070 o[1] = stbi__clamp((x1+t2) >> 17); 1071 o[6] = stbi__clamp((x1-t2) >> 17); 1072 o[2] = stbi__clamp((x2+t1) >> 17); 1073 o[5] = stbi__clamp((x2-t1) >> 17); 1074 o[3] = stbi__clamp((x3+t0) >> 17); 1075 o[4] = stbi__clamp((x3-t0) >> 17); 1076 } 1077 } 1078 1079 // clamp to 0-255 1080 pure ubyte stbi__clamp(int x) { 1081 if (cast(uint) x > 255) { 1082 if (x < 0) return 0; 1083 if (x > 255) return 255; 1084 } 1085 return cast(ubyte) x; 1086 } 1087 1088 // ------------------------------------------------------------