1 // Copyright 2019 Tero Hänninen. All rights reserved. 2 // SPDX-License-Identifier: BSD-2-Clause 3 module imagefmt.jpeg; 4 5 import std.math : ceil; 6 import imagefmt; 7 8 @nogc nothrow package: 9 10 struct JPEGDecoder { 11 Reader* rc; 12 13 ubyte[64][4] qtables; 14 HuffTab[2] ac_tables; 15 HuffTab[2] dc_tables; 16 17 int bits_left; // num of unused bits in cb 18 ubyte cb; // current byte (next bit always at MSB) 19 20 bool has_frame_header = false; 21 bool eoi_reached = false; 22 23 bool correct_comp_ids; 24 ubyte[] compsbuf; 25 Component[3] comps; 26 ubyte num_comps; 27 int tchans; 28 29 int width; 30 int height; 31 int hmax; 32 int vmax; 33 int num_mcu_x; 34 int num_mcu_y; 35 36 ushort restart_interval; // number of MCUs in restart interval 37 } 38 39 // image component 40 struct Component { 41 size_t x; // total num of samples, without fill samples 42 size_t y; // total num of samples, without fill samples 43 ubyte[] data; // reconstructed samples 44 int pred; // dc prediction 45 ubyte sfx; // sampling factor, aka. h 46 ubyte sfy; // sampling factor, aka. v 47 ubyte qtable; 48 ubyte ac_table; 49 ubyte dc_table; 50 } 51 52 struct HuffTab { 53 ubyte[256] values; 54 ubyte[257] sizes; 55 short[16] mincode; 56 short[16] maxcode; 57 short[16] valptr; 58 } 59 60 enum MARKER : ubyte { 61 SOI = 0xd8, // start of image 62 SOF0 = 0xc0, // start of frame / baseline DCT 63 //SOF1 = 0xc1, // start of frame / extended seq. 64 //SOF2 = 0xc2, // start of frame / progressive DCT 65 SOF3 = 0xc3, // start of frame / lossless 66 SOF9 = 0xc9, // start of frame / extended seq., arithmetic 67 SOF11 = 0xcb, // start of frame / lossless, arithmetic 68 DHT = 0xc4, // define huffman tables 69 DQT = 0xdb, // define quantization tables 70 DRI = 0xdd, // define restart interval 71 SOS = 0xda, // start of scan 72 DNL = 0xdc, // define number of lines 73 RST0 = 0xd0, // restart entropy coded data 74 // ... 75 RST7 = 0xd7, // restart entropy coded data 76 APP0 = 0xe0, // application 0 segment 77 // ... 78 APPf = 0xef, // application f segment 79 //DAC = 0xcc, // define arithmetic conditioning table 80 COM = 0xfe, // comment 81 EOI = 0xd9, // end of image 82 } 83 84 bool detect_jpeg(Reader* rc) 85 { 86 auto info = read_jpeg_info(rc); 87 reset2start(rc); 88 return info.e == 0; 89 } 90 91 IFInfo infoerror(ubyte e) 92 { 93 IFInfo info = { e: e }; 94 return info; 95 } 96 97 IFInfo read_jpeg_info(Reader* rc) 98 { 99 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 100 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 101 102 while (true) { 103 if (read_u8(rc) != 0xff) 104 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 105 106 ubyte marker = read_u8(rc); 107 while (marker == 0xff && !rc.fail) 108 marker = read_u8(rc); 109 110 if (rc.fail) 111 return infoerror(ERROR.stream); 112 113 switch (marker) with (MARKER) { 114 case SOF0: .. case SOF3: 115 case SOF9: .. case SOF11: 116 skip(rc, 3); // len + some byte 117 IFInfo info; 118 info.h = read_u16be(rc); 119 info.w = read_u16be(rc); 120 info.c = read_u8(rc); 121 info.e = rc.fail ? ERROR.stream : 0; 122 return info; 123 case SOS, EOI: 124 return infoerror(ERROR.data); 125 case DRI, DHT, DQT, COM: 126 case APP0: .. case APPf: 127 int len = read_u16be(rc) - 2; 128 skip(rc, len); 129 break; 130 default: 131 return infoerror(ERROR.unsupp); 132 } 133 } 134 assert(0); 135 } 136 137 ubyte read_jpeg(Reader* rc, out IFImage image, in int reqchans, in int reqbpc) 138 { 139 if (cast(uint) reqchans > 4) 140 return ERROR.arg; 141 const ubyte tbpc = cast(ubyte) (reqbpc ? reqbpc : 8); 142 if (tbpc != 8 && tbpc != 16) 143 return ERROR.unsupp; 144 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 145 return rc.fail ? ERROR.stream : ERROR.data; 146 if (rc.fail) 147 return ERROR.stream; 148 149 JPEGDecoder dc = { rc: rc }; 150 151 ubyte e = read_markers(dc); // reads until first scan header or eoi 152 153 if (e) return e; 154 if (dc.eoi_reached) return ERROR.data; 155 156 dc.tchans = reqchans == 0 ? dc.num_comps : reqchans; 157 158 if (cast(ulong) dc.width * dc.height * dc.tchans > MAXIMUM_IMAGE_SIZE) 159 return ERROR.bigimg; 160 161 { 162 size_t[3] csizes; 163 size_t acc; 164 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 165 csizes[i] = dc.num_mcu_x * comp.sfx*8 * dc.num_mcu_y * comp.sfy*8; 166 acc += csizes[i]; 167 } 168 dc.compsbuf = new_buffer(acc, e); 169 if (e) return e; 170 acc = 0; 171 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 172 comp.data = dc.compsbuf[acc .. acc + csizes[i]]; 173 acc += csizes[i]; 174 } 175 } 176 scope(exit) 177 _free(dc.compsbuf.ptr); 178 179 // E.7 -- Multiple scans are for progressive images which are not supported 180 //while (!dc.eoi_reached) { 181 e = decode_scan(dc); // E.2.3 182 //read_markers(dc); // reads until next scan header or eoi 183 //} 184 if (e) return e; 185 186 // throw away fill samples and convert to target format 187 ubyte[] buf = dc.reconstruct(e); 188 if (e) return e; 189 190 if (VERTICAL_ORIENTATION_READ == -1) 191 e = flip_vertically(dc.width, dc.height, dc.tchans, buf); 192 193 image.w = dc.width; 194 image.h = dc.height; 195 image.c = cast(ubyte) dc.tchans; 196 image.cinfile = dc.num_comps; 197 image.bpc = tbpc; 198 if (tbpc == 8) { 199 image.buf8 = buf; 200 } else { 201 image.buf16 = bpc8to16(buf); 202 if (!image.buf16.ptr) 203 return ERROR.oom; 204 } 205 return 0; 206 } 207 208 ubyte read_markers(ref JPEGDecoder dc) 209 { 210 bool has_next_scan_header = false; 211 while (!has_next_scan_header && !dc.eoi_reached) { 212 if (read_u8(dc.rc) != 0xff) 213 return dc.rc.fail ? ERROR.stream : ERROR.data; 214 215 ubyte marker = read_u8(dc.rc); 216 while (marker == 0xff && !dc.rc.fail) 217 marker = read_u8(dc.rc); 218 219 if (dc.rc.fail) 220 return ERROR.stream; 221 222 ubyte e; 223 switch (marker) with (MARKER) { 224 case DHT: 225 e = read_huffman_tables(dc); 226 break; 227 case DQT: 228 e = read_quantization_tables(dc); 229 break; 230 case SOF0: 231 if (dc.has_frame_header) 232 return ERROR.data; 233 e = read_frame_header(dc); 234 dc.has_frame_header = true; 235 break; 236 case SOS: 237 if (!dc.has_frame_header) 238 return ERROR.data; 239 e = read_scan_header(dc); 240 has_next_scan_header = true; 241 break; 242 case DRI: 243 if (read_u16be(dc.rc) != 4) // len 244 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 245 dc.restart_interval = read_u16be(dc.rc); 246 break; 247 case EOI: 248 dc.eoi_reached = true; 249 break; 250 case APP0: .. case APPf: 251 case COM: 252 const int len = read_u16be(dc.rc) - 2; 253 skip(dc.rc, len); 254 break; 255 default: 256 return ERROR.unsupp; 257 } 258 if (e) 259 return dc.rc.fail ? ERROR.stream : e; 260 } 261 return 0; 262 } 263 264 // DHT -- define huffman tables 265 ubyte read_huffman_tables(ref JPEGDecoder dc) 266 { 267 ubyte[17] tmp; 268 int len = read_u16be(dc.rc) - 2; 269 if (dc.rc.fail) return ERROR.stream; 270 271 ubyte e; 272 while (len > 0) { 273 read_block(dc.rc, tmp[0..17]); // info byte & the BITS 274 if (dc.rc.fail) return ERROR.stream; 275 const ubyte tableslot = tmp[0] & 0x0f; // must be 0 or 1 for baseline 276 const ubyte tableclass = tmp[0] >> 4; // 0 = dc table, 1 = ac table 277 if (tableslot > 1 || tableclass > 1) 278 return ERROR.unsupp; 279 280 // compute total number of huffman codes 281 int mt = 0; 282 foreach (i; 1..17) 283 mt += tmp[i]; 284 if (256 < mt) 285 return ERROR.data; 286 287 if (tableclass == 0) { 288 read_block(dc.rc, dc.dc_tables[tableslot].values[0..mt]); 289 derive_table(dc.dc_tables[tableslot], tmp[1..17], e); 290 } else { 291 read_block(dc.rc, dc.ac_tables[tableslot].values[0..mt]); 292 derive_table(dc.ac_tables[tableslot], tmp[1..17], e); 293 } 294 295 len -= 17 + mt; 296 } 297 if (dc.rc.fail) return ERROR.stream; 298 return e; 299 } 300 301 // num_values is the BITS 302 void derive_table(ref HuffTab table, in ref ubyte[16] num_values, ref ubyte e) 303 { 304 short[256] codes; 305 306 int k = 0; 307 foreach (i; 0..16) { 308 foreach (j; 0..num_values[i]) { 309 if (k > table.sizes.length) { 310 e = ERROR.data; 311 return; 312 } 313 table.sizes[k] = cast(ubyte) (i + 1); 314 ++k; 315 } 316 } 317 table.sizes[k] = 0; 318 319 k = 0; 320 short code = 0; 321 ubyte si = table.sizes[k]; 322 while (true) { 323 do { 324 codes[k] = code; 325 ++code; 326 ++k; 327 } while (si == table.sizes[k]); 328 329 if (table.sizes[k] == 0) 330 break; 331 332 assert(si < table.sizes[k]); 333 do { 334 code <<= 1; 335 ++si; 336 } while (si != table.sizes[k]); 337 } 338 339 derive_mincode_maxcode_valptr( 340 table.mincode, table.maxcode, table.valptr, 341 codes, num_values 342 ); 343 } 344 345 // F.15 346 void derive_mincode_maxcode_valptr(ref short[16] mincode, ref short[16] maxcode, 347 ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) 348 { 349 mincode[] = -1; 350 maxcode[] = -1; 351 valptr[] = -1; 352 353 int j = 0; 354 foreach (i; 0..16) { 355 if (num_values[i] != 0) { 356 valptr[i] = cast(short) j; 357 mincode[i] = codes[j]; 358 j += num_values[i] - 1; 359 maxcode[i] = codes[j]; 360 j += 1; 361 } 362 } 363 } 364 365 // DQT -- define quantization tables 366 ubyte read_quantization_tables(ref JPEGDecoder dc) 367 { 368 int len = read_u16be(dc.rc); 369 if (len % 65 != 2) 370 return dc.rc.fail ? ERROR.stream : ERROR.data; 371 len -= 2; 372 while (len > 0) { 373 const ubyte info = read_u8(dc.rc); 374 const ubyte tableslot = info & 0x0f; 375 const ubyte precision = info >> 4; // 0 = 8 bit, 1 = 16 bit 376 if (tableslot > 3 || precision != 0) // only 8 bit for baseline 377 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 378 read_block(dc.rc, dc.qtables[tableslot][0..64]); 379 len -= 1 + 64; 380 } 381 return dc.rc.fail ? ERROR.stream : 0; 382 } 383 384 // SOF0 -- start of frame 385 ubyte read_frame_header(ref JPEGDecoder dc) 386 { 387 const int len = read_u16be(dc.rc); // 8 + num_comps*3 388 const ubyte precision = read_u8(dc.rc); 389 dc.height = read_u16be(dc.rc); 390 dc.width = read_u16be(dc.rc); 391 dc.num_comps = read_u8(dc.rc); 392 393 if ((dc.num_comps != 1 && dc.num_comps != 3) 394 || precision != 8 || len != 8 + dc.num_comps*3) 395 return ERROR.unsupp; 396 397 dc.hmax = 0; 398 dc.vmax = 0; 399 int mcu_du = 0; // data units in one mcu 400 ubyte[9] tmp; 401 read_block(dc.rc, tmp[0..dc.num_comps*3]); 402 if (dc.rc.fail) return ERROR.stream; 403 foreach (i; 0..dc.num_comps) { 404 ubyte ci = tmp[i*3]; 405 // JFIF says ci should be i+1, but there are images where ci is i. Normalize 406 // ids so that ci == i, always. So much for standards... 407 if (i == 0) { dc.correct_comp_ids = ci == i+1; } 408 if ((dc.correct_comp_ids && ci != i+1) 409 || (!dc.correct_comp_ids && ci != i)) 410 return ERROR.data; 411 412 Component* comp = &dc.comps[i]; 413 const ubyte sampling_factors = tmp[i*3 + 1]; 414 comp.sfx = sampling_factors >> 4; 415 comp.sfy = sampling_factors & 0xf; 416 comp.qtable = tmp[i*3 + 2]; 417 if ( comp.sfy < 1 || 4 < comp.sfy || 418 comp.sfx < 1 || 4 < comp.sfx || 419 3 < comp.qtable ) 420 return ERROR.unsupp; 421 422 if (dc.hmax < comp.sfx) dc.hmax = comp.sfx; 423 if (dc.vmax < comp.sfy) dc.vmax = comp.sfy; 424 425 mcu_du += comp.sfx * comp.sfy; 426 } 427 if (10 < mcu_du) 428 return ERROR.unsupp; 429 430 assert(dc.hmax * dc.vmax); 431 foreach (i; 0..dc.num_comps) { 432 dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / 433 dc.hmax)); 434 dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / 435 dc.vmax)); 436 } 437 438 size_t mcu_w = dc.hmax * 8; 439 size_t mcu_h = dc.vmax * 8; 440 dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w); 441 dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h); 442 return 0; 443 } 444 445 // SOS -- start of scan 446 ubyte read_scan_header(ref JPEGDecoder dc) 447 { 448 const ushort len = read_u16be(dc.rc); 449 const ubyte num_scan_comps = read_u8(dc.rc); 450 451 if ( num_scan_comps != dc.num_comps || 452 len != (6+num_scan_comps*2) ) 453 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 454 455 ubyte[16] buf; 456 read_block(dc.rc, buf[0..len-3]); 457 if (dc.rc.fail) return ERROR.stream; 458 459 foreach (i; 0..num_scan_comps) { 460 const uint ci = buf[i*2] - (dc.correct_comp_ids ? 1 : 0); 461 if (ci >= dc.num_comps) 462 return ERROR.data; 463 464 const ubyte tables = buf[i*2 + 1]; 465 dc.comps[ci].dc_table = tables >> 4; 466 dc.comps[ci].ac_table = tables & 0x0f; 467 if (dc.comps[ci].dc_table > 1 || dc.comps[ci].ac_table > 1) 468 return ERROR.unsupp; 469 } 470 471 // ignore these 472 //ubyte spectral_start = buf[$-3]; 473 //ubyte spectral_end = buf[$-2]; 474 //ubyte approx = buf[$-1]; 475 return 0; 476 } 477 478 // E.2.3 and E.8 and E.9 479 ubyte decode_scan(ref JPEGDecoder dc) 480 { 481 int intervals, mcus; 482 if (dc.restart_interval > 0) { 483 int total_mcus = dc.num_mcu_x * dc.num_mcu_y; 484 intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval; 485 mcus = dc.restart_interval; 486 } else { 487 intervals = 1; 488 mcus = dc.num_mcu_x * dc.num_mcu_y; 489 } 490 491 ubyte e; 492 foreach (mcu_j; 0 .. dc.num_mcu_y) { 493 foreach (mcu_i; 0 .. dc.num_mcu_x) { 494 495 // decode mcu 496 foreach (c; 0..dc.num_comps) { 497 auto comp = &dc.comps[c]; 498 foreach (du_j; 0 .. comp.sfy) { 499 foreach (du_i; 0 .. comp.sfx) { 500 // decode entropy, dequantize & dezigzag 501 short[64] data; 502 e = decode_block(dc, *comp, dc.qtables[comp.qtable], data); 503 if (e) return e; 504 // idct & level-shift 505 int outx = (mcu_i * comp.sfx + du_i) * 8; 506 int outy = (mcu_j * comp.sfy + du_j) * 8; 507 int dst_stride = dc.num_mcu_x * comp.sfx*8; 508 ubyte* dst = comp.data.ptr + outy*dst_stride + outx; 509 stbi__idct_block(dst, dst_stride, data); 510 } 511 } 512 } 513 514 --mcus; 515 516 if (!mcus) { 517 --intervals; 518 if (!intervals) 519 return e; 520 521 e = read_restart(dc.rc); // RSTx marker 522 523 if (intervals == 1) { 524 // last interval, may have fewer MCUs than defined by DRI 525 mcus = (dc.num_mcu_y - mcu_j - 1) 526 * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1; 527 } else { 528 mcus = dc.restart_interval; 529 } 530 531 // reset decoder 532 dc.cb = 0; 533 dc.bits_left = 0; 534 foreach (k; 0..dc.num_comps) 535 dc.comps[k].pred = 0; 536 } 537 538 } 539 } 540 return e; 541 } 542 543 // RST0-RST7 544 ubyte read_restart(Reader* rc) { 545 ubyte a = read_u8(rc); 546 ubyte b = read_u8(rc); 547 if (rc.fail) return ERROR.stream; 548 if (a != 0xff || b < MARKER.RST0 || b > MARKER.RST7) 549 return ERROR.data; 550 return 0; 551 // the markers should cycle 0 through 7, could check that here... 552 } 553 554 immutable ubyte[64] dezigzag = [ 555 0, 1, 8, 16, 9, 2, 3, 10, 556 17, 24, 32, 25, 18, 11, 4, 5, 557 12, 19, 26, 33, 40, 48, 41, 34, 558 27, 20, 13, 6, 7, 14, 21, 28, 559 35, 42, 49, 56, 57, 50, 43, 36, 560 29, 22, 15, 23, 30, 37, 44, 51, 561 58, 59, 52, 45, 38, 31, 39, 46, 562 53, 60, 61, 54, 47, 55, 62, 63, 563 ]; 564 565 // decode entropy, dequantize & dezigzag (see section F.2) 566 ubyte decode_block(ref JPEGDecoder dc, ref Component comp, in ref ubyte[64] qtable, 567 out short[64] result) 568 { 569 ubyte e; 570 const ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table], e); 571 if (e) return e; 572 const int diff = t ? dc.receive_and_extend(t, e) : 0; 573 574 comp.pred = comp.pred + diff; 575 result[0] = cast(short) (comp.pred * qtable[0]); 576 577 int k = 1; 578 do { 579 ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table], e); 580 if (e) return e; 581 ubyte rrrr = rs >> 4; 582 ubyte ssss = rs & 0xf; 583 584 if (ssss == 0) { 585 if (rrrr != 0xf) 586 break; // end of block 587 k += 16; // run length is 16 588 continue; 589 } 590 591 k += rrrr; 592 593 if (63 < k) 594 return ERROR.data; 595 result[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss, e) * qtable[k]); 596 k += 1; 597 } while (k < 64); 598 599 return 0; 600 } 601 602 int receive_and_extend(ref JPEGDecoder dc, in ubyte s, ref ubyte e) 603 { 604 // receive 605 int symbol = 0; 606 foreach (_; 0..s) 607 symbol = (symbol << 1) + nextbit(dc, e); 608 // extend 609 int vt = 1 << (s-1); 610 if (symbol < vt) 611 return symbol + (-1 << s) + 1; 612 return symbol; 613 } 614 615 // F.16 -- the DECODE 616 ubyte decode_huff(ref JPEGDecoder dc, in ref HuffTab tab, ref ubyte e) 617 { 618 short code = nextbit(dc, e); 619 620 int i = 0; 621 while (tab.maxcode[i] < code) { 622 code = cast(short) ((code << 1) + nextbit(dc, e)); 623 i += 1; 624 if (i >= tab.maxcode.length) { 625 e = ERROR.data; 626 return 0; 627 } 628 } 629 const uint j = cast(uint) (tab.valptr[i] + code - tab.mincode[i]); 630 if (j >= tab.values.length) { 631 e = ERROR.data; 632 return 0; 633 } 634 return tab.values[j]; 635 } 636 637 // F.2.2.5 and F.18 638 ubyte nextbit(ref JPEGDecoder dc, ref ubyte e) 639 { 640 if (!dc.bits_left) { 641 dc.cb = read_u8(dc.rc); 642 dc.bits_left = 8; 643 644 if (dc.cb == 0xff) { 645 if (read_u8(dc.rc) != 0x0) { 646 e = dc.rc.fail ? ERROR.stream : ERROR.data; // unexpected marker 647 return 0; 648 } 649 } 650 if (dc.rc.fail) e = ERROR.stream; 651 } 652 653 ubyte r = dc.cb >> 7; 654 dc.cb <<= 1; 655 dc.bits_left -= 1; 656 return r; 657 } 658 659 ubyte[] reconstruct(in ref JPEGDecoder dc, ref ubyte e) 660 { 661 ubyte[] result = new_buffer(dc.width * dc.height * dc.tchans, e); 662 if (e) return null; 663 664 switch (dc.num_comps * 10 + dc.tchans) { 665 case 34, 33: 666 // Use specialized bilinear filtering functions for the frequent cases 667 // where Cb & Cr channels have half resolution. 668 if (dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2 && 669 dc.comps[0].sfx + dc.comps[0].sfy >= 3 && 670 dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 && 671 dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) 672 { 673 void function(in ubyte[], in ubyte[], ubyte[]) nothrow resample; 674 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) { 675 case 22: resample = &upsample_h2_v2; break; 676 case 21: resample = &upsample_h2_v1; break; 677 case 12: resample = &upsample_h1_v2; break; 678 default: assert(0); 679 } 680 681 ubyte[] comps1n2 = new_buffer(dc.width * 2, e); 682 if (e) return null; 683 scope(exit) _free(comps1n2.ptr); 684 ubyte[] comp1 = comps1n2[0..dc.width]; 685 ubyte[] comp2 = comps1n2[dc.width..$]; 686 687 size_t s = 0; 688 size_t di = 0; 689 foreach (j; 0 .. dc.height) { 690 const size_t mi = j / dc.comps[0].sfy; 691 const size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy) 692 ? mi : mi - 1 + s * 2; 693 s = s ^ 1; 694 695 const size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8; 696 const size_t cl0 = mi * cs; 697 const size_t cl1 = si * cs; 698 resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x], 699 dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x], 700 comp1[]); 701 resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x], 702 dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x], 703 comp2[]); 704 705 foreach (i; 0 .. dc.width) { 706 result[di .. di+3] = ycbcr_to_rgb( 707 dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i], 708 comp1[i], 709 comp2[i], 710 ); 711 if (dc.tchans == 4) 712 result[di+3] = 255; 713 di += dc.tchans; 714 } 715 } 716 717 return result; 718 } 719 720 foreach (const ref comp; dc.comps[0..dc.num_comps]) { 721 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax) 722 return dc.upsample(result); 723 } 724 725 size_t si, di; 726 foreach (j; 0 .. dc.height) { 727 foreach (i; 0 .. dc.width) { 728 result[di .. di+3] = ycbcr_to_rgb( 729 dc.comps[0].data[si+i], 730 dc.comps[1].data[si+i], 731 dc.comps[2].data[si+i], 732 ); 733 if (dc.tchans == 4) 734 result[di+3] = 255; 735 di += dc.tchans; 736 } 737 si += dc.num_mcu_x * dc.comps[0].sfx * 8; 738 } 739 return result; 740 case 32, 12, 31, 11: 741 const comp = &dc.comps[0]; 742 if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) { 743 size_t si, di; 744 if (dc.tchans == 2) { 745 foreach (j; 0 .. dc.height) { 746 foreach (i; 0 .. dc.width) { 747 result[di++] = comp.data[si+i]; 748 result[di++] = 255; 749 } 750 si += dc.num_mcu_x * comp.sfx * 8; 751 } 752 } else { 753 foreach (j; 0 .. dc.height) { 754 result[di .. di+dc.width] = comp.data[si .. si+dc.width]; 755 si += dc.num_mcu_x * comp.sfx * 8; 756 di += dc.width; 757 } 758 } 759 return result; 760 } else { 761 // need to resample (haven't tested this...) 762 return dc.upsample_luma(result); 763 } 764 case 14, 13: 765 const comp = &dc.comps[0]; 766 size_t si, di; 767 foreach (j; 0 .. dc.height) { 768 foreach (i; 0 .. dc.width) { 769 result[di .. di+3] = comp.data[si+i]; 770 if (dc.tchans == 4) 771 result[di+3] = 255; 772 di += dc.tchans; 773 } 774 si += dc.num_mcu_x * comp.sfx * 8; 775 } 776 return result; 777 default: 778 assert(0); 779 } 780 } 781 782 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 783 { 784 ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) 785 { 786 return cast(ubyte) (( cast(uint) mm * 3 * 3 787 + cast(uint) ms * 3 * 1 788 + cast(uint) sm * 1 * 3 789 + cast(uint) ss * 1 * 1 790 + 8) / 16); 791 } 792 793 result[0] = cast(ubyte) (( cast(uint) line0[0] * 3 794 + cast(uint) line1[0] * 1 795 + 2) / 4); 796 if (line0.length == 1) 797 return; 798 result[1] = mix(line0[0], line0[1], line1[0], line1[1]); 799 800 size_t di = 2; 801 foreach (i; 1 .. line0.length) { 802 result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]); 803 di += 1; 804 if (i == line0.length-1) { 805 if (di < result.length) { 806 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3 807 + cast(uint) line1[i] * 1 808 + 2) / 4); 809 } 810 return; 811 } 812 result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]); 813 di += 1; 814 } 815 } 816 817 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) 818 { 819 result[0] = line0[0]; 820 if (line0.length == 1) 821 return; 822 result[1] = cast(ubyte) (( cast(uint) line0[0] * 3 823 + cast(uint) line0[1] * 1 824 + 2) / 4); 825 size_t di = 2; 826 foreach (i; 1 .. line0.length) { 827 result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1 828 + cast(uint) line0[i+0] * 3 829 + 2) / 4); 830 di += 1; 831 if (i == line0.length-1) { 832 if (di < result.length) result[di] = line0[i]; 833 return; 834 } 835 result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3 836 + cast(uint) line0[i+1] * 1 837 + 2) / 4); 838 di += 1; 839 } 840 } 841 842 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 843 { 844 foreach (i; 0 .. result.length) { 845 result[i] = cast(ubyte) (( cast(uint) line0[i] * 3 846 + cast(uint) line1[i] * 1 847 + 2) / 4); 848 } 849 } 850 851 // Nearest neighbor 852 ubyte[] upsample_luma(in ref JPEGDecoder dc, ubyte[] result) 853 { 854 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 855 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 856 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 857 858 float y0 = y_step0 * 0.5; 859 size_t y0i = 0; 860 861 size_t di; 862 863 foreach (j; 0 .. dc.height) { 864 float x0 = x_step0 * 0.5; 865 size_t x0i = 0; 866 foreach (i; 0 .. dc.width) { 867 result[di] = dc.comps[0].data[y0i + x0i]; 868 if (dc.tchans == 2) 869 result[di+1] = 255; 870 di += dc.tchans; 871 x0 += x_step0; 872 if (x0 >= 1.0) { 873 x0 -= 1.0; 874 x0i += 1; 875 } 876 } 877 y0 += y_step0; 878 if (y0 >= 1.0) { 879 y0 -= 1.0; 880 y0i += stride0; 881 } 882 } 883 return result; 884 } 885 886 // Nearest neighbor 887 ubyte[] upsample(in ref JPEGDecoder dc, ubyte[] result) 888 { 889 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 890 const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8; 891 const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8; 892 893 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 894 const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax; 895 const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax; 896 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 897 const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax; 898 const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax; 899 900 float y0 = y_step0 * 0.5; 901 float y1 = y_step1 * 0.5; 902 float y2 = y_step2 * 0.5; 903 size_t y0i = 0; 904 size_t y1i = 0; 905 size_t y2i = 0; 906 907 size_t di; 908 909 foreach (_j; 0 .. dc.height) { 910 float x0 = x_step0 * 0.5; 911 float x1 = x_step1 * 0.5; 912 float x2 = x_step2 * 0.5; 913 size_t x0i = 0; 914 size_t x1i = 0; 915 size_t x2i = 0; 916 foreach (i; 0 .. dc.width) { 917 result[di .. di+3] = ycbcr_to_rgb( 918 dc.comps[0].data[y0i + x0i], 919 dc.comps[1].data[y1i + x1i], 920 dc.comps[2].data[y2i + x2i], 921 ); 922 if (dc.tchans == 4) 923 result[di+3] = 255; 924 di += dc.tchans; 925 x0 += x_step0; 926 x1 += x_step1; 927 x2 += x_step2; 928 if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; } 929 if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; } 930 if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; } 931 } 932 y0 += y_step0; 933 y1 += y_step1; 934 y2 += y_step2; 935 if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; } 936 if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; } 937 if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; } 938 } 939 return result; 940 } 941 942 ubyte[3] ycbcr_to_rgb(in ubyte y, in ubyte cb, in ubyte cr) pure { 943 ubyte[3] rgb = void; 944 rgb[0] = clamp(y + 1.402*(cr-128)); 945 rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128)); 946 rgb[2] = clamp(y + 1.772*(cb-128)); 947 return rgb; 948 } 949 950 ubyte clamp(in float x) pure { 951 if (x < 0) return 0; 952 if (255 < x) return 255; 953 return cast(ubyte) x; 954 } 955 956 // ------------------------------------------------------------ 957 // The IDCT stuff here (to the next dashed line) is copied and adapted from 958 // stb_image which is released under public domain. Many thanks to stb_image 959 // author, Sean Barrett. 960 // Link: https://github.com/nothings/stb/blob/master/stb_image.h 961 962 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); } 963 pure int fsh(int x) { return x << 12; } 964 965 // from stb_image, derived from jidctint -- DCT_ISLOW 966 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3, 967 ref int x0, ref int x1, ref int x2, ref int x3, 968 int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7) 969 { 970 int p1,p2,p3,p4,p5; 971 //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; 972 p2 = s2; 973 p3 = s6; 974 p1 = (p2+p3) * f2f(0.5411961f); 975 t2 = p1 + p3 * f2f(-1.847759065f); 976 t3 = p1 + p2 * f2f( 0.765366865f); 977 p2 = s0; 978 p3 = s4; 979 t0 = fsh(p2+p3); 980 t1 = fsh(p2-p3); 981 x0 = t0+t3; 982 x3 = t0-t3; 983 x1 = t1+t2; 984 x2 = t1-t2; 985 t0 = s7; 986 t1 = s5; 987 t2 = s3; 988 t3 = s1; 989 p3 = t0+t2; 990 p4 = t1+t3; 991 p1 = t0+t3; 992 p2 = t1+t2; 993 p5 = (p3+p4)*f2f( 1.175875602f); 994 t0 = t0*f2f( 0.298631336f); 995 t1 = t1*f2f( 2.053119869f); 996 t2 = t2*f2f( 3.072711026f); 997 t3 = t3*f2f( 1.501321110f); 998 p1 = p5 + p1*f2f(-0.899976223f); 999 p2 = p5 + p2*f2f(-2.562915447f); 1000 p3 = p3*f2f(-1.961570560f); 1001 p4 = p4*f2f(-0.390180644f); 1002 t3 += p1+p4; 1003 t2 += p2+p3; 1004 t1 += p2+p4; 1005 t0 += p1+p3; 1006 } 1007 1008 // idct and level-shift 1009 pure void stbi__idct_block(ubyte* dst, in int dst_stride, in ref short[64] data) 1010 { 1011 int i; 1012 int[64] val; 1013 int* v = val.ptr; 1014 const(short)* d = data.ptr; 1015 1016 // columns 1017 for (i=0; i < 8; ++i,++d, ++v) { 1018 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 1019 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 1020 && d[40]==0 && d[48]==0 && d[56]==0) { 1021 // no shortcut 0 seconds 1022 // (1|2|3|4|5|6|7)==0 0 seconds 1023 // all separate -0.047 seconds 1024 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 1025 int dcterm = d[0] << 2; 1026 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 1027 } else { 1028 int t0,t1,t2,t3,x0,x1,x2,x3; 1029 STBI__IDCT_1D( 1030 t0, t1, t2, t3, 1031 x0, x1, x2, x3, 1032 d[ 0], d[ 8], d[16], d[24], 1033 d[32], d[40], d[48], d[56] 1034 ); 1035 // constants scaled things up by 1<<12; let's bring them back 1036 // down, but keep 2 extra bits of precision 1037 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 1038 v[ 0] = (x0+t3) >> 10; 1039 v[56] = (x0-t3) >> 10; 1040 v[ 8] = (x1+t2) >> 10; 1041 v[48] = (x1-t2) >> 10; 1042 v[16] = (x2+t1) >> 10; 1043 v[40] = (x2-t1) >> 10; 1044 v[24] = (x3+t0) >> 10; 1045 v[32] = (x3-t0) >> 10; 1046 } 1047 } 1048 1049 ubyte* o = dst; 1050 for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) { 1051 // no fast case since the first 1D IDCT spread components out 1052 int t0,t1,t2,t3,x0,x1,x2,x3; 1053 STBI__IDCT_1D( 1054 t0, t1, t2, t3, 1055 x0, x1, x2, x3, 1056 v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] 1057 ); 1058 // constants scaled things up by 1<<12, plus we had 1<<2 from first 1059 // loop, plus horizontal and vertical each scale by sqrt(8) so together 1060 // we've got an extra 1<<3, so 1<<17 total we need to remove. 1061 // so we want to round that, which means adding 0.5 * 1<<17, 1062 // aka 65536. Also, we'll end up with -128 to 127 that we want 1063 // to encode as 0-255 by adding 128, so we'll add that before the shift 1064 x0 += 65536 + (128<<17); 1065 x1 += 65536 + (128<<17); 1066 x2 += 65536 + (128<<17); 1067 x3 += 65536 + (128<<17); 1068 // tried computing the shifts into temps, or'ing the temps to see 1069 // if any were out of range, but that was slower 1070 o[0] = stbi__clamp((x0+t3) >> 17); 1071 o[7] = stbi__clamp((x0-t3) >> 17); 1072 o[1] = stbi__clamp((x1+t2) >> 17); 1073 o[6] = stbi__clamp((x1-t2) >> 17); 1074 o[2] = stbi__clamp((x2+t1) >> 17); 1075 o[5] = stbi__clamp((x2-t1) >> 17); 1076 o[3] = stbi__clamp((x3+t0) >> 17); 1077 o[4] = stbi__clamp((x3-t0) >> 17); 1078 } 1079 } 1080 1081 // clamp to 0-255 1082 pure ubyte stbi__clamp(int x) { 1083 if (cast(uint) x > 255) { 1084 if (x < 0) return 0; 1085 if (x > 255) return 255; 1086 } 1087 return cast(ubyte) x; 1088 } 1089 1090 // ------------------------------------------------------------