1 // Copyright 2019 Tero Hänninen. All rights reserved. 2 // SPDX-License-Identifier: BSD-2-Clause 3 module imagefmt.jpeg; 4 5 import std.math : ceil; 6 import imagefmt; 7 8 @nogc nothrow package: 9 10 struct JPEGDecoder { 11 Reader* rc; 12 13 ubyte[64][4] qtables; 14 HuffTab[2] ac_tables; 15 HuffTab[2] dc_tables; 16 17 int bits_left; // num of unused bits in cb 18 ubyte cb; // current byte (next bit always at MSB) 19 20 bool has_frame_header = false; 21 bool eoi_reached = false; 22 23 bool correct_comp_ids; 24 ubyte[] compsbuf; 25 Component[3] comps; 26 ubyte num_comps; 27 int tchans; 28 29 int width; 30 int height; 31 int hmax; 32 int vmax; 33 int num_mcu_x; 34 int num_mcu_y; 35 36 ushort restart_interval; // number of MCUs in restart interval 37 } 38 39 // image component 40 struct Component { 41 size_t x; // total num of samples, without fill samples 42 size_t y; // total num of samples, without fill samples 43 ubyte[] data; // reconstructed samples 44 int pred; // dc prediction 45 ubyte sfx; // sampling factor, aka. h 46 ubyte sfy; // sampling factor, aka. v 47 ubyte qtable; 48 ubyte ac_table; 49 ubyte dc_table; 50 } 51 52 struct HuffTab { 53 ubyte[256] values; 54 ubyte[257] sizes; 55 short[16] mincode; 56 short[16] maxcode; 57 short[16] valptr; 58 } 59 60 enum MARKER : ubyte { 61 SOI = 0xd8, // start of image 62 SOF0 = 0xc0, // start of frame / baseline DCT 63 //SOF1 = 0xc1, // start of frame / extended seq. 64 //SOF2 = 0xc2, // start of frame / progressive DCT 65 SOF3 = 0xc3, // start of frame / lossless 66 SOF9 = 0xc9, // start of frame / extended seq., arithmetic 67 SOF11 = 0xcb, // start of frame / lossless, arithmetic 68 DHT = 0xc4, // define huffman tables 69 DQT = 0xdb, // define quantization tables 70 DRI = 0xdd, // define restart interval 71 SOS = 0xda, // start of scan 72 DNL = 0xdc, // define number of lines 73 RST0 = 0xd0, // restart entropy coded data 74 // ... 75 RST7 = 0xd7, // restart entropy coded data 76 APP0 = 0xe0, // application 0 segment 77 // ... 78 APPf = 0xef, // application f segment 79 //DAC = 0xcc, // define arithmetic conditioning table 80 COM = 0xfe, // comment 81 EOI = 0xd9, // end of image 82 } 83 84 bool detect_jpeg(Reader* rc) 85 { 86 auto info = read_jpeg_info(rc); 87 reset2start(rc); 88 return info.e == 0; 89 } 90 91 IFInfo infoerror(ubyte e) 92 { 93 IFInfo info = { e: e }; 94 return info; 95 } 96 97 IFInfo read_jpeg_info(Reader* rc) 98 { 99 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 100 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 101 102 while (true) { 103 if (read_u8(rc) != 0xff) 104 return infoerror(rc.fail ? ERROR.stream : ERROR.data); 105 106 ubyte marker = read_u8(rc); 107 while (marker == 0xff && !rc.fail) 108 marker = read_u8(rc); 109 110 if (rc.fail) 111 return infoerror(ERROR.stream); 112 113 switch (marker) with (MARKER) { 114 case SOF0: .. case SOF3: 115 case SOF9: .. case SOF11: 116 skip(rc, 3); // len + some byte 117 IFInfo info; 118 info.h = read_u16be(rc); 119 info.w = read_u16be(rc); 120 info.c = read_u8(rc); 121 info.e = rc.fail ? ERROR.stream : 0; 122 return info; 123 case SOS, EOI: 124 return infoerror(ERROR.data); 125 case DRI, DHT, DQT, COM: 126 case APP0: .. case APPf: 127 int len = read_u16be(rc) - 2; 128 skip(rc, len); 129 break; 130 default: 131 return infoerror(ERROR.unsupp); 132 } 133 } 134 assert(0); 135 } 136 137 ubyte read_jpeg(Reader* rc, out IFImage image, in int reqchans, in int reqbpc) 138 { 139 if (cast(uint) reqchans > 4) 140 return ERROR.arg; 141 const ubyte tbpc = cast(ubyte) (reqbpc ? reqbpc : 8); 142 if (tbpc != 8 && tbpc != 16) 143 return ERROR.unsupp; 144 if (read_u8(rc) != 0xff || read_u8(rc) != MARKER.SOI) 145 return rc.fail ? ERROR.stream : ERROR.data; 146 if (rc.fail) 147 return ERROR.stream; 148 149 JPEGDecoder dc = { rc: rc }; 150 151 ubyte e = read_markers(dc); // reads until first scan header or eoi 152 153 if (e) return e; 154 if (dc.eoi_reached) return ERROR.data; 155 156 dc.tchans = reqchans == 0 ? dc.num_comps : reqchans; 157 158 if (cast(ulong) dc.width * dc.height * dc.tchans > MAXIMUM_IMAGE_SIZE) 159 return ERROR.bigimg; 160 161 { 162 size_t[3] csizes; 163 size_t acc; 164 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 165 csizes[i] = dc.num_mcu_x * comp.sfx*8 * dc.num_mcu_y * comp.sfy*8; 166 acc += csizes[i]; 167 } 168 dc.compsbuf = new_buffer(acc, e); 169 if (e) return e; 170 acc = 0; 171 foreach (i, ref comp; dc.comps[0..dc.num_comps]) { 172 comp.data = dc.compsbuf[acc .. acc + csizes[i]]; 173 acc += csizes[i]; 174 } 175 } 176 scope(exit) 177 _free(dc.compsbuf.ptr); 178 179 // E.7 -- Multiple scans are for progressive images which are not supported 180 //while (!dc.eoi_reached) { 181 e = decode_scan(dc); // E.2.3 182 //read_markers(dc); // reads until next scan header or eoi 183 //} 184 if (e) return e; 185 186 // throw away fill samples and convert to target format 187 ubyte[] buf = dc.reconstruct(e); 188 if (e) return e; 189 190 if (VERTICAL_ORIENTATION_READ == -1) 191 e = flip_vertically(dc.width, dc.height, dc.tchans, buf); 192 193 image.w = dc.width; 194 image.h = dc.height; 195 image.c = cast(ubyte) dc.tchans; 196 image.cinfile = dc.num_comps; 197 image.bpc = tbpc; 198 if (tbpc == 8) { 199 image.buf8 = buf; 200 } else { 201 image.buf16 = bpc8to16(buf); 202 if (!image.buf16.ptr) 203 return ERROR.oom; 204 } 205 return 0; 206 } 207 208 ubyte read_markers(ref JPEGDecoder dc) 209 { 210 bool has_next_scan_header = false; 211 while (!has_next_scan_header && !dc.eoi_reached) { 212 if (read_u8(dc.rc) != 0xff) 213 return dc.rc.fail ? ERROR.stream : ERROR.data; 214 215 ubyte marker = read_u8(dc.rc); 216 while (marker == 0xff && !dc.rc.fail) 217 marker = read_u8(dc.rc); 218 219 if (dc.rc.fail) 220 return ERROR.stream; 221 222 ubyte e; 223 switch (marker) with (MARKER) { 224 case DHT: 225 e = read_huffman_tables(dc); 226 break; 227 case DQT: 228 e = read_quantization_tables(dc); 229 break; 230 case SOF0: 231 if (dc.has_frame_header) 232 return ERROR.data; 233 e = read_frame_header(dc); 234 dc.has_frame_header = true; 235 break; 236 case SOS: 237 if (!dc.has_frame_header) 238 return ERROR.data; 239 e = read_scan_header(dc); 240 has_next_scan_header = true; 241 break; 242 case DRI: 243 if (read_u16be(dc.rc) != 4) // len 244 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 245 dc.restart_interval = read_u16be(dc.rc); 246 break; 247 case EOI: 248 dc.eoi_reached = true; 249 break; 250 case APP0: .. case APPf: 251 case COM: 252 const int len = read_u16be(dc.rc) - 2; 253 skip(dc.rc, len); 254 break; 255 default: 256 return ERROR.unsupp; 257 } 258 if (e) 259 return dc.rc.fail ? ERROR.stream : e; 260 } 261 return 0; 262 } 263 264 // DHT -- define huffman tables 265 ubyte read_huffman_tables(ref JPEGDecoder dc) 266 { 267 ubyte[17] tmp; 268 int len = read_u16be(dc.rc) - 2; 269 if (dc.rc.fail) return ERROR.stream; 270 271 ubyte e; 272 while (len > 0) { 273 read_block(dc.rc, tmp[0..17]); // info byte & the BITS 274 if (dc.rc.fail) return ERROR.stream; 275 const ubyte tableslot = tmp[0] & 0x0f; // must be 0 or 1 for baseline 276 const ubyte tableclass = tmp[0] >> 4; // 0 = dc table, 1 = ac table 277 if (tableslot > 1 || tableclass > 1) 278 return ERROR.unsupp; 279 280 // compute total number of huffman codes 281 int mt = 0; 282 foreach (i; 1..17) 283 mt += tmp[i]; 284 if (256 < mt) 285 return ERROR.data; 286 287 if (tableclass == 0) { 288 read_block(dc.rc, dc.dc_tables[tableslot].values[0..mt]); 289 derive_table(dc.dc_tables[tableslot], tmp[1..17], e); 290 } else { 291 read_block(dc.rc, dc.ac_tables[tableslot].values[0..mt]); 292 derive_table(dc.ac_tables[tableslot], tmp[1..17], e); 293 } 294 295 len -= 17 + mt; 296 } 297 if (dc.rc.fail) return ERROR.stream; 298 return e; 299 } 300 301 // num_values is the BITS 302 void derive_table(ref HuffTab table, in ref ubyte[16] num_values, ref ubyte e) 303 { 304 short[256] codes; 305 306 int k = 0; 307 foreach (i; 0..16) { 308 foreach (j; 0..num_values[i]) { 309 if (k > table.sizes.length) { 310 e = ERROR.data; 311 return; 312 } 313 table.sizes[k] = cast(ubyte) (i + 1); 314 ++k; 315 } 316 } 317 table.sizes[k] = 0; 318 319 k = 0; 320 short code = 0; 321 ubyte si = table.sizes[k]; 322 while (true) { 323 do { 324 codes[k] = code; 325 ++code; 326 ++k; 327 } while (si == table.sizes[k]); 328 329 if (table.sizes[k] == 0) 330 break; 331 332 assert(si < table.sizes[k]); 333 do { 334 code <<= 1; 335 ++si; 336 } while (si != table.sizes[k]); 337 } 338 339 derive_mincode_maxcode_valptr( 340 table.mincode, table.maxcode, table.valptr, 341 codes, num_values 342 ); 343 } 344 345 // F.15 346 void derive_mincode_maxcode_valptr(ref short[16] mincode, ref short[16] maxcode, 347 ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) 348 { 349 mincode[] = -1; 350 maxcode[] = -1; 351 valptr[] = -1; 352 353 int j = 0; 354 foreach (i; 0..16) { 355 if (num_values[i] != 0) { 356 valptr[i] = cast(short) j; 357 mincode[i] = codes[j]; 358 j += num_values[i] - 1; 359 maxcode[i] = codes[j]; 360 j += 1; 361 } 362 } 363 } 364 365 // DQT -- define quantization tables 366 ubyte read_quantization_tables(ref JPEGDecoder dc) 367 { 368 int len = read_u16be(dc.rc); 369 if (len % 65 != 2) 370 return dc.rc.fail ? ERROR.stream : ERROR.data; 371 len -= 2; 372 while (len > 0) { 373 const ubyte info = read_u8(dc.rc); 374 const ubyte tableslot = info & 0x0f; 375 const ubyte precision = info >> 4; // 0 = 8 bit, 1 = 16 bit 376 if (tableslot > 3 || precision != 0) // only 8 bit for baseline 377 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 378 read_block(dc.rc, dc.qtables[tableslot][0..64]); 379 len -= 1 + 64; 380 } 381 return dc.rc.fail ? ERROR.stream : 0; 382 } 383 384 // SOF0 -- start of frame 385 ubyte read_frame_header(ref JPEGDecoder dc) 386 { 387 const int len = read_u16be(dc.rc); // 8 + num_comps*3 388 const ubyte precision = read_u8(dc.rc); 389 dc.height = read_u16be(dc.rc); 390 dc.width = read_u16be(dc.rc); 391 dc.num_comps = read_u8(dc.rc); 392 393 if ((dc.num_comps != 1 && dc.num_comps != 3) 394 || precision != 8 || len != 8 + dc.num_comps*3) 395 return ERROR.unsupp; 396 397 dc.hmax = 0; 398 dc.vmax = 0; 399 int mcu_du = 0; // data units in one mcu 400 ubyte[9] tmp; 401 read_block(dc.rc, tmp[0..dc.num_comps*3]); 402 if (dc.rc.fail) return ERROR.stream; 403 foreach (i; 0..dc.num_comps) { 404 ubyte ci = tmp[i*3]; 405 // JFIF says ci should be i+1, but there are images where ci is i. Normalize 406 // ids so that ci == i, always. So much for standards... 407 if (i == 0) { dc.correct_comp_ids = ci == i+1; } 408 if ((dc.correct_comp_ids && ci != i+1) 409 || (!dc.correct_comp_ids && ci != i)) 410 return ERROR.data; 411 412 Component* comp = &dc.comps[i]; 413 const ubyte sampling_factors = tmp[i*3 + 1]; 414 comp.sfx = sampling_factors >> 4; 415 comp.sfy = sampling_factors & 0xf; 416 comp.qtable = tmp[i*3 + 2]; 417 if ( comp.sfy < 1 || 4 < comp.sfy || 418 comp.sfx < 1 || 4 < comp.sfx || 419 3 < comp.qtable ) 420 return ERROR.unsupp; 421 422 if (dc.hmax < comp.sfx) dc.hmax = comp.sfx; 423 if (dc.vmax < comp.sfy) dc.vmax = comp.sfy; 424 425 mcu_du += comp.sfx * comp.sfy; 426 } 427 if (10 < mcu_du) 428 return ERROR.unsupp; 429 430 assert(dc.hmax * dc.vmax); 431 foreach (i; 0..dc.num_comps) { 432 dc.comps[i].x = cast(size_t) ceil(dc.width * (cast(double) dc.comps[i].sfx / 433 dc.hmax)); 434 dc.comps[i].y = cast(size_t) ceil(dc.height * (cast(double) dc.comps[i].sfy / 435 dc.vmax)); 436 } 437 438 size_t mcu_w = dc.hmax * 8; 439 size_t mcu_h = dc.vmax * 8; 440 dc.num_mcu_x = cast(int) ((dc.width + mcu_w-1) / mcu_w); 441 dc.num_mcu_y = cast(int) ((dc.height + mcu_h-1) / mcu_h); 442 return 0; 443 } 444 445 // SOS -- start of scan 446 ubyte read_scan_header(ref JPEGDecoder dc) 447 { 448 const ushort len = read_u16be(dc.rc); 449 const ubyte num_scan_comps = read_u8(dc.rc); 450 451 if ( num_scan_comps != dc.num_comps || 452 len != (6+num_scan_comps*2) ) 453 return dc.rc.fail ? ERROR.stream : ERROR.unsupp; 454 455 ubyte[16] buf; 456 ubyte e; 457 read_block(dc.rc, buf[0..len-3]); 458 if (dc.rc.fail) return ERROR.stream; 459 460 foreach (i; 0..num_scan_comps) { 461 const uint ci = buf[i*2] - (dc.correct_comp_ids ? 1 : 0); 462 if (ci >= dc.num_comps) 463 return ERROR.data; 464 465 const ubyte tables = buf[i*2 + 1]; 466 dc.comps[ci].dc_table = tables >> 4; 467 dc.comps[ci].ac_table = tables & 0x0f; 468 if (dc.comps[ci].dc_table > 1 || dc.comps[ci].ac_table > 1) 469 return ERROR.unsupp; 470 } 471 472 // ignore these 473 //ubyte spectral_start = buf[$-3]; 474 //ubyte spectral_end = buf[$-2]; 475 //ubyte approx = buf[$-1]; 476 return 0; 477 } 478 479 // E.2.3 and E.8 and E.9 480 ubyte decode_scan(ref JPEGDecoder dc) 481 { 482 int intervals, mcus; 483 if (dc.restart_interval > 0) { 484 int total_mcus = dc.num_mcu_x * dc.num_mcu_y; 485 intervals = (total_mcus + dc.restart_interval-1) / dc.restart_interval; 486 mcus = dc.restart_interval; 487 } else { 488 intervals = 1; 489 mcus = dc.num_mcu_x * dc.num_mcu_y; 490 } 491 492 ubyte e; 493 foreach (mcu_j; 0 .. dc.num_mcu_y) { 494 foreach (mcu_i; 0 .. dc.num_mcu_x) { 495 496 // decode mcu 497 foreach (c; 0..dc.num_comps) { 498 auto comp = &dc.comps[c]; 499 foreach (du_j; 0 .. comp.sfy) { 500 foreach (du_i; 0 .. comp.sfx) { 501 // decode entropy, dequantize & dezigzag 502 short[64] data; 503 e = decode_block(dc, *comp, dc.qtables[comp.qtable], data); 504 if (e) return e; 505 // idct & level-shift 506 int outx = (mcu_i * comp.sfx + du_i) * 8; 507 int outy = (mcu_j * comp.sfy + du_j) * 8; 508 int dst_stride = dc.num_mcu_x * comp.sfx*8; 509 ubyte* dst = comp.data.ptr + outy*dst_stride + outx; 510 stbi__idct_block(dst, dst_stride, data); 511 } 512 } 513 } 514 515 --mcus; 516 517 if (!mcus) { 518 --intervals; 519 if (!intervals) 520 return e; 521 522 e = read_restart(dc.rc); // RSTx marker 523 524 if (intervals == 1) { 525 // last interval, may have fewer MCUs than defined by DRI 526 mcus = (dc.num_mcu_y - mcu_j - 1) 527 * dc.num_mcu_x + dc.num_mcu_x - mcu_i - 1; 528 } else { 529 mcus = dc.restart_interval; 530 } 531 532 // reset decoder 533 dc.cb = 0; 534 dc.bits_left = 0; 535 foreach (k; 0..dc.num_comps) 536 dc.comps[k].pred = 0; 537 } 538 539 } 540 } 541 return e; 542 } 543 544 // RST0-RST7 545 ubyte read_restart(Reader* rc) { 546 ubyte a = read_u8(rc); 547 ubyte b = read_u8(rc); 548 if (rc.fail) return ERROR.stream; 549 if (a != 0xff || b < MARKER.RST0 || b > MARKER.RST7) 550 return ERROR.data; 551 return 0; 552 // the markers should cycle 0 through 7, could check that here... 553 } 554 555 immutable ubyte[64] dezigzag = [ 556 0, 1, 8, 16, 9, 2, 3, 10, 557 17, 24, 32, 25, 18, 11, 4, 5, 558 12, 19, 26, 33, 40, 48, 41, 34, 559 27, 20, 13, 6, 7, 14, 21, 28, 560 35, 42, 49, 56, 57, 50, 43, 36, 561 29, 22, 15, 23, 30, 37, 44, 51, 562 58, 59, 52, 45, 38, 31, 39, 46, 563 53, 60, 61, 54, 47, 55, 62, 63, 564 ]; 565 566 // decode entropy, dequantize & dezigzag (see section F.2) 567 ubyte decode_block(ref JPEGDecoder dc, ref Component comp, in ref ubyte[64] qtable, 568 out short[64] result) 569 { 570 ubyte e; 571 const ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table], e); 572 if (e) return e; 573 const int diff = t ? dc.receive_and_extend(t, e) : 0; 574 575 comp.pred = comp.pred + diff; 576 result[0] = cast(short) (comp.pred * qtable[0]); 577 578 int k = 1; 579 do { 580 ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table], e); 581 if (e) return e; 582 ubyte rrrr = rs >> 4; 583 ubyte ssss = rs & 0xf; 584 585 if (ssss == 0) { 586 if (rrrr != 0xf) 587 break; // end of block 588 k += 16; // run length is 16 589 continue; 590 } 591 592 k += rrrr; 593 594 if (63 < k) 595 return ERROR.data; 596 result[dezigzag[k]] = cast(short) (dc.receive_and_extend(ssss, e) * qtable[k]); 597 k += 1; 598 } while (k < 64); 599 600 return 0; 601 } 602 603 int receive_and_extend(ref JPEGDecoder dc, in ubyte s, ref ubyte e) 604 { 605 // receive 606 int symbol = 0; 607 foreach (_; 0..s) 608 symbol = (symbol << 1) + nextbit(dc, e); 609 // extend 610 int vt = 1 << (s-1); 611 if (symbol < vt) 612 return symbol + (-1 << s) + 1; 613 return symbol; 614 } 615 616 // F.16 -- the DECODE 617 ubyte decode_huff(ref JPEGDecoder dc, in ref HuffTab tab, ref ubyte e) 618 { 619 short code = nextbit(dc, e); 620 621 int i = 0; 622 while (tab.maxcode[i] < code) { 623 code = cast(short) ((code << 1) + nextbit(dc, e)); 624 i += 1; 625 if (i >= tab.maxcode.length) { 626 e = ERROR.data; 627 return 0; 628 } 629 } 630 const uint j = cast(uint) (tab.valptr[i] + code - tab.mincode[i]); 631 if (j >= tab.values.length) { 632 e = ERROR.data; 633 return 0; 634 } 635 return tab.values[j]; 636 } 637 638 // F.2.2.5 and F.18 639 ubyte nextbit(ref JPEGDecoder dc, ref ubyte e) 640 { 641 if (!dc.bits_left) { 642 dc.cb = read_u8(dc.rc); 643 dc.bits_left = 8; 644 645 if (dc.cb == 0xff) { 646 if (read_u8(dc.rc) != 0x0) { 647 e = dc.rc.fail ? ERROR.stream : ERROR.data; // unexpected marker 648 return 0; 649 } 650 } 651 if (dc.rc.fail) e = ERROR.stream; 652 } 653 654 ubyte r = dc.cb >> 7; 655 dc.cb <<= 1; 656 dc.bits_left -= 1; 657 return r; 658 } 659 660 ubyte[] reconstruct(in ref JPEGDecoder dc, ref ubyte e) 661 { 662 ubyte[] result = new_buffer(dc.width * dc.height * dc.tchans, e); 663 if (e) return null; 664 665 switch (dc.num_comps * 10 + dc.tchans) { 666 case 34, 33: 667 // Use specialized bilinear filtering functions for the frequent cases 668 // where Cb & Cr channels have half resolution. 669 if (dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2 && 670 dc.comps[0].sfx + dc.comps[0].sfy >= 3 && 671 dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 && 672 dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) 673 { 674 void function(in ubyte[], in ubyte[], ubyte[]) nothrow resample; 675 switch (dc.comps[0].sfx * 10 + dc.comps[0].sfy) { 676 case 22: resample = &upsample_h2_v2; break; 677 case 21: resample = &upsample_h2_v1; break; 678 case 12: resample = &upsample_h1_v2; break; 679 default: assert(0); 680 } 681 682 ubyte[] comps1n2 = new_buffer(dc.width * 2, e); 683 if (e) return null; 684 scope(exit) _free(comps1n2.ptr); 685 ubyte[] comp1 = comps1n2[0..dc.width]; 686 ubyte[] comp2 = comps1n2[dc.width..$]; 687 688 size_t s = 0; 689 size_t di = 0; 690 foreach (j; 0 .. dc.height) { 691 const size_t mi = j / dc.comps[0].sfy; 692 const size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy) 693 ? mi : mi - 1 + s * 2; 694 s = s ^ 1; 695 696 const size_t cs = dc.num_mcu_x * dc.comps[1].sfx * 8; 697 const size_t cl0 = mi * cs; 698 const size_t cl1 = si * cs; 699 resample(dc.comps[1].data[cl0 .. cl0 + dc.comps[1].x], 700 dc.comps[1].data[cl1 .. cl1 + dc.comps[1].x], 701 comp1[]); 702 resample(dc.comps[2].data[cl0 .. cl0 + dc.comps[2].x], 703 dc.comps[2].data[cl1 .. cl1 + dc.comps[2].x], 704 comp2[]); 705 706 foreach (i; 0 .. dc.width) { 707 result[di .. di+3] = ycbcr_to_rgb( 708 dc.comps[0].data[j * dc.num_mcu_x * dc.comps[0].sfx * 8 + i], 709 comp1[i], 710 comp2[i], 711 ); 712 if (dc.tchans == 4) 713 result[di+3] = 255; 714 di += dc.tchans; 715 } 716 } 717 718 return result; 719 } 720 721 foreach (const ref comp; dc.comps[0..dc.num_comps]) { 722 if (comp.sfx != dc.hmax || comp.sfy != dc.vmax) 723 return dc.upsample(result); 724 } 725 726 size_t si, di; 727 foreach (j; 0 .. dc.height) { 728 foreach (i; 0 .. dc.width) { 729 result[di .. di+3] = ycbcr_to_rgb( 730 dc.comps[0].data[si+i], 731 dc.comps[1].data[si+i], 732 dc.comps[2].data[si+i], 733 ); 734 if (dc.tchans == 4) 735 result[di+3] = 255; 736 di += dc.tchans; 737 } 738 si += dc.num_mcu_x * dc.comps[0].sfx * 8; 739 } 740 return result; 741 case 32, 12, 31, 11: 742 const comp = &dc.comps[0]; 743 if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) { 744 size_t si, di; 745 if (dc.tchans == 2) { 746 foreach (j; 0 .. dc.height) { 747 foreach (i; 0 .. dc.width) { 748 result[di++] = comp.data[si+i]; 749 result[di++] = 255; 750 } 751 si += dc.num_mcu_x * comp.sfx * 8; 752 } 753 } else { 754 foreach (j; 0 .. dc.height) { 755 result[di .. di+dc.width] = comp.data[si .. si+dc.width]; 756 si += dc.num_mcu_x * comp.sfx * 8; 757 di += dc.width; 758 } 759 } 760 return result; 761 } else { 762 // need to resample (haven't tested this...) 763 return dc.upsample_luma(result); 764 } 765 case 14, 13: 766 const comp = &dc.comps[0]; 767 size_t si, di; 768 foreach (j; 0 .. dc.height) { 769 foreach (i; 0 .. dc.width) { 770 result[di .. di+3] = comp.data[si+i]; 771 if (dc.tchans == 4) 772 result[di+3] = 255; 773 di += dc.tchans; 774 } 775 si += dc.num_mcu_x * comp.sfx * 8; 776 } 777 return result; 778 default: 779 assert(0); 780 } 781 } 782 783 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 784 { 785 ubyte mix(ubyte mm, ubyte ms, ubyte sm, ubyte ss) 786 { 787 return cast(ubyte) (( cast(uint) mm * 3 * 3 788 + cast(uint) ms * 3 * 1 789 + cast(uint) sm * 1 * 3 790 + cast(uint) ss * 1 * 1 791 + 8) / 16); 792 } 793 794 result[0] = cast(ubyte) (( cast(uint) line0[0] * 3 795 + cast(uint) line1[0] * 1 796 + 2) / 4); 797 if (line0.length == 1) 798 return; 799 result[1] = mix(line0[0], line0[1], line1[0], line1[1]); 800 801 size_t di = 2; 802 foreach (i; 1 .. line0.length) { 803 result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]); 804 di += 1; 805 if (i == line0.length-1) { 806 if (di < result.length) { 807 result[di] = cast(ubyte) (( cast(uint) line0[i] * 3 808 + cast(uint) line1[i] * 1 809 + 2) / 4); 810 } 811 return; 812 } 813 result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]); 814 di += 1; 815 } 816 } 817 818 void upsample_h2_v1(in ubyte[] line0, in ubyte[] _line1, ubyte[] result) 819 { 820 result[0] = line0[0]; 821 if (line0.length == 1) 822 return; 823 result[1] = cast(ubyte) (( cast(uint) line0[0] * 3 824 + cast(uint) line0[1] * 1 825 + 2) / 4); 826 size_t di = 2; 827 foreach (i; 1 .. line0.length) { 828 result[di] = cast(ubyte) (( cast(uint) line0[i-1] * 1 829 + cast(uint) line0[i+0] * 3 830 + 2) / 4); 831 di += 1; 832 if (i == line0.length-1) { 833 if (di < result.length) result[di] = line0[i]; 834 return; 835 } 836 result[di] = cast(ubyte) (( cast(uint) line0[i+0] * 3 837 + cast(uint) line0[i+1] * 1 838 + 2) / 4); 839 di += 1; 840 } 841 } 842 843 void upsample_h1_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) 844 { 845 foreach (i; 0 .. result.length) { 846 result[i] = cast(ubyte) (( cast(uint) line0[i] * 3 847 + cast(uint) line1[i] * 1 848 + 2) / 4); 849 } 850 } 851 852 // Nearest neighbor 853 ubyte[] upsample_luma(in ref JPEGDecoder dc, ubyte[] result) 854 { 855 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 856 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 857 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 858 859 float y0 = y_step0 * 0.5; 860 size_t y0i = 0; 861 862 size_t di; 863 864 foreach (j; 0 .. dc.height) { 865 float x0 = x_step0 * 0.5; 866 size_t x0i = 0; 867 foreach (i; 0 .. dc.width) { 868 result[di] = dc.comps[0].data[y0i + x0i]; 869 if (dc.tchans == 2) 870 result[di+1] = 255; 871 di += dc.tchans; 872 x0 += x_step0; 873 if (x0 >= 1.0) { 874 x0 -= 1.0; 875 x0i += 1; 876 } 877 } 878 y0 += y_step0; 879 if (y0 >= 1.0) { 880 y0 -= 1.0; 881 y0i += stride0; 882 } 883 } 884 return result; 885 } 886 887 // Nearest neighbor 888 ubyte[] upsample(in ref JPEGDecoder dc, ubyte[] result) 889 { 890 const size_t stride0 = dc.num_mcu_x * dc.comps[0].sfx * 8; 891 const size_t stride1 = dc.num_mcu_x * dc.comps[1].sfx * 8; 892 const size_t stride2 = dc.num_mcu_x * dc.comps[2].sfx * 8; 893 894 const y_step0 = cast(float) dc.comps[0].sfy / cast(float) dc.vmax; 895 const y_step1 = cast(float) dc.comps[1].sfy / cast(float) dc.vmax; 896 const y_step2 = cast(float) dc.comps[2].sfy / cast(float) dc.vmax; 897 const x_step0 = cast(float) dc.comps[0].sfx / cast(float) dc.hmax; 898 const x_step1 = cast(float) dc.comps[1].sfx / cast(float) dc.hmax; 899 const x_step2 = cast(float) dc.comps[2].sfx / cast(float) dc.hmax; 900 901 float y0 = y_step0 * 0.5; 902 float y1 = y_step1 * 0.5; 903 float y2 = y_step2 * 0.5; 904 size_t y0i = 0; 905 size_t y1i = 0; 906 size_t y2i = 0; 907 908 size_t di; 909 910 foreach (_j; 0 .. dc.height) { 911 float x0 = x_step0 * 0.5; 912 float x1 = x_step1 * 0.5; 913 float x2 = x_step2 * 0.5; 914 size_t x0i = 0; 915 size_t x1i = 0; 916 size_t x2i = 0; 917 foreach (i; 0 .. dc.width) { 918 result[di .. di+3] = ycbcr_to_rgb( 919 dc.comps[0].data[y0i + x0i], 920 dc.comps[1].data[y1i + x1i], 921 dc.comps[2].data[y2i + x2i], 922 ); 923 if (dc.tchans == 4) 924 result[di+3] = 255; 925 di += dc.tchans; 926 x0 += x_step0; 927 x1 += x_step1; 928 x2 += x_step2; 929 if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; } 930 if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; } 931 if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; } 932 } 933 y0 += y_step0; 934 y1 += y_step1; 935 y2 += y_step2; 936 if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; } 937 if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; } 938 if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; } 939 } 940 return result; 941 } 942 943 ubyte[3] ycbcr_to_rgb(in ubyte y, in ubyte cb, in ubyte cr) pure { 944 ubyte[3] rgb = void; 945 rgb[0] = clamp(y + 1.402*(cr-128)); 946 rgb[1] = clamp(y - 0.34414*(cb-128) - 0.71414*(cr-128)); 947 rgb[2] = clamp(y + 1.772*(cb-128)); 948 return rgb; 949 } 950 951 ubyte clamp(in float x) pure { 952 if (x < 0) return 0; 953 if (255 < x) return 255; 954 return cast(ubyte) x; 955 } 956 957 // ------------------------------------------------------------ 958 // The IDCT stuff here (to the next dashed line) is copied and adapted from 959 // stb_image which is released under public domain. Many thanks to stb_image 960 // author, Sean Barrett. 961 // Link: https://github.com/nothings/stb/blob/master/stb_image.h 962 963 pure int f2f(float x) { return cast(int) (x * 4096 + 0.5); } 964 pure int fsh(int x) { return x << 12; } 965 966 // from stb_image, derived from jidctint -- DCT_ISLOW 967 pure void STBI__IDCT_1D(ref int t0, ref int t1, ref int t2, ref int t3, 968 ref int x0, ref int x1, ref int x2, ref int x3, 969 int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7) 970 { 971 int p1,p2,p3,p4,p5; 972 //int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; 973 p2 = s2; 974 p3 = s6; 975 p1 = (p2+p3) * f2f(0.5411961f); 976 t2 = p1 + p3 * f2f(-1.847759065f); 977 t3 = p1 + p2 * f2f( 0.765366865f); 978 p2 = s0; 979 p3 = s4; 980 t0 = fsh(p2+p3); 981 t1 = fsh(p2-p3); 982 x0 = t0+t3; 983 x3 = t0-t3; 984 x1 = t1+t2; 985 x2 = t1-t2; 986 t0 = s7; 987 t1 = s5; 988 t2 = s3; 989 t3 = s1; 990 p3 = t0+t2; 991 p4 = t1+t3; 992 p1 = t0+t3; 993 p2 = t1+t2; 994 p5 = (p3+p4)*f2f( 1.175875602f); 995 t0 = t0*f2f( 0.298631336f); 996 t1 = t1*f2f( 2.053119869f); 997 t2 = t2*f2f( 3.072711026f); 998 t3 = t3*f2f( 1.501321110f); 999 p1 = p5 + p1*f2f(-0.899976223f); 1000 p2 = p5 + p2*f2f(-2.562915447f); 1001 p3 = p3*f2f(-1.961570560f); 1002 p4 = p4*f2f(-0.390180644f); 1003 t3 += p1+p4; 1004 t2 += p2+p3; 1005 t1 += p2+p4; 1006 t0 += p1+p3; 1007 } 1008 1009 // idct and level-shift 1010 pure void stbi__idct_block(ubyte* dst, in int dst_stride, in ref short[64] data) 1011 { 1012 int i; 1013 int[64] val; 1014 int* v = val.ptr; 1015 const(short)* d = data.ptr; 1016 1017 // columns 1018 for (i=0; i < 8; ++i,++d, ++v) { 1019 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 1020 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 1021 && d[40]==0 && d[48]==0 && d[56]==0) { 1022 // no shortcut 0 seconds 1023 // (1|2|3|4|5|6|7)==0 0 seconds 1024 // all separate -0.047 seconds 1025 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 1026 int dcterm = d[0] << 2; 1027 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 1028 } else { 1029 int t0,t1,t2,t3,x0,x1,x2,x3; 1030 STBI__IDCT_1D( 1031 t0, t1, t2, t3, 1032 x0, x1, x2, x3, 1033 d[ 0], d[ 8], d[16], d[24], 1034 d[32], d[40], d[48], d[56] 1035 ); 1036 // constants scaled things up by 1<<12; let's bring them back 1037 // down, but keep 2 extra bits of precision 1038 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 1039 v[ 0] = (x0+t3) >> 10; 1040 v[56] = (x0-t3) >> 10; 1041 v[ 8] = (x1+t2) >> 10; 1042 v[48] = (x1-t2) >> 10; 1043 v[16] = (x2+t1) >> 10; 1044 v[40] = (x2-t1) >> 10; 1045 v[24] = (x3+t0) >> 10; 1046 v[32] = (x3-t0) >> 10; 1047 } 1048 } 1049 1050 ubyte* o = dst; 1051 for (i=0, v=val.ptr; i < 8; ++i,v+=8,o+=dst_stride) { 1052 // no fast case since the first 1D IDCT spread components out 1053 int t0,t1,t2,t3,x0,x1,x2,x3; 1054 STBI__IDCT_1D( 1055 t0, t1, t2, t3, 1056 x0, x1, x2, x3, 1057 v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] 1058 ); 1059 // constants scaled things up by 1<<12, plus we had 1<<2 from first 1060 // loop, plus horizontal and vertical each scale by sqrt(8) so together 1061 // we've got an extra 1<<3, so 1<<17 total we need to remove. 1062 // so we want to round that, which means adding 0.5 * 1<<17, 1063 // aka 65536. Also, we'll end up with -128 to 127 that we want 1064 // to encode as 0-255 by adding 128, so we'll add that before the shift 1065 x0 += 65536 + (128<<17); 1066 x1 += 65536 + (128<<17); 1067 x2 += 65536 + (128<<17); 1068 x3 += 65536 + (128<<17); 1069 // tried computing the shifts into temps, or'ing the temps to see 1070 // if any were out of range, but that was slower 1071 o[0] = stbi__clamp((x0+t3) >> 17); 1072 o[7] = stbi__clamp((x0-t3) >> 17); 1073 o[1] = stbi__clamp((x1+t2) >> 17); 1074 o[6] = stbi__clamp((x1-t2) >> 17); 1075 o[2] = stbi__clamp((x2+t1) >> 17); 1076 o[5] = stbi__clamp((x2-t1) >> 17); 1077 o[3] = stbi__clamp((x3+t0) >> 17); 1078 o[4] = stbi__clamp((x3-t0) >> 17); 1079 } 1080 } 1081 1082 // clamp to 0-255 1083 pure ubyte stbi__clamp(int x) { 1084 if (cast(uint) x > 255) { 1085 if (x < 0) return 0; 1086 if (x > 255) return 255; 1087 } 1088 return cast(ubyte) x; 1089 } 1090 1091 // ------------------------------------------------------------