From 827d925b4a43f29cfceaabce158baedae040c21f Mon Sep 17 00:00:00 2001 From: zhouhua Date: Thu, 16 Apr 2026 23:27:47 +0800 Subject: [PATCH] Expand JPEG-LS parameter helpers toward NEAR 255 --- fpga/sim/tb_jls_coding_params.sv | 26 +-- fpga/sim/tb_jls_preset_defaults.sv | 28 +-- fpga/verilog/jls_coding_params.sv | 296 +++++++++------------------- fpga/verilog/jls_common_pkg.sv | 4 +- fpga/verilog/jls_near_scale_mul.sv | 51 +++-- fpga/verilog/jls_preset_defaults.sv | 197 ++++++++++-------- 6 files changed, 278 insertions(+), 324 deletions(-) diff --git a/fpga/sim/tb_jls_coding_params.sv b/fpga/sim/tb_jls_coding_params.sv index 5d0d3cf..fdd77d9 100644 --- a/fpga/sim/tb_jls_coding_params.sv +++ b/fpga/sim/tb_jls_coding_params.sv @@ -12,11 +12,11 @@ module tb_jls_coding_params; // NEAR stimulus shared by all parameterized instances. - logic [5:0] near_8; - logic [5:0] near_10; - logic [5:0] near_12; - logic [5:0] near_14; - logic [5:0] near_16; + logic [7:0] near_8; + logic [7:0] near_10; + logic [7:0] near_12; + logic [7:0] near_14; + logic [7:0] near_16; // DUT outputs. logic [16:0] range_8; @@ -81,11 +81,11 @@ module tb_jls_coding_params; ); initial begin - near_8 = 6'd0; - near_10 = 6'd16; - near_12 = 6'd31; - near_14 = 6'd8; - near_16 = 6'd0; + near_8 = 8'd0; + near_10 = 8'd16; + near_12 = 8'd31; + near_14 = 8'd8; + near_16 = 8'd0; #1; if (range_8 !== 17'd256 || qbpp_8 !== 5'd8 || limit_8 !== 7'd32) begin @@ -108,11 +108,11 @@ module tb_jls_coding_params; $fatal(1, "16-bit NEAR=0 coding params mismatch"); end - near_8 = 6'd63; - near_16 = 6'd31; + near_8 = 8'd63; + near_16 = 8'd31; #1; - if (range_8 !== 17'd6 || qbpp_8 !== 5'd3 || limit_8 !== 7'd32) begin + if (range_8 !== 17'd4 || qbpp_8 !== 5'd2 || limit_8 !== 7'd32) begin $fatal(1, "8-bit defensive NEAR clamp mismatch"); end diff --git a/fpga/sim/tb_jls_preset_defaults.sv b/fpga/sim/tb_jls_preset_defaults.sv index b0bbfe1..100c86e 100644 --- a/fpga/sim/tb_jls_preset_defaults.sv +++ b/fpga/sim/tb_jls_preset_defaults.sv @@ -12,12 +12,12 @@ module tb_jls_preset_defaults; // Test NEAR inputs. - logic [5:0] near_8b_0; - logic [5:0] near_8b_31; - logic [5:0] near_10b_4; - logic [5:0] near_12b_1; - logic [5:0] near_14b_2; - logic [5:0] near_16b_63; + logic [7:0] near_8b_0; + logic [7:0] near_8b_31; + logic [7:0] near_10b_4; + logic [7:0] near_12b_1; + logic [7:0] near_14b_2; + logic [7:0] near_16b_63; // Preset outputs for each instance under test. logic [15:0] maxval_8b_0; @@ -118,12 +118,12 @@ module tb_jls_preset_defaults; ); initial begin - near_8b_0 = 6'd0; - near_8b_31 = 6'd31; - near_10b_4 = 6'd4; - near_12b_1 = 6'd1; - near_14b_2 = 6'd2; - near_16b_63 = 6'd63; + near_8b_0 = 8'd0; + near_8b_31 = 8'd31; + near_10b_4 = 8'd4; + near_12b_1 = 8'd1; + near_14b_2 = 8'd2; + near_16b_63 = 8'd63; #1; @@ -157,8 +157,8 @@ module tb_jls_preset_defaults; $fatal(1, "14-bit NEAR=2 defaults mismatch"); end - if (maxval_16b_63 !== 16'hFFFF || t1_16b_63 !== 16'd111 || - t2_16b_63 !== 16'd222 || t3_16b_63 !== 16'd493 || + if (maxval_16b_63 !== 16'hFFFF || t1_16b_63 !== 16'd207 || + t2_16b_63 !== 16'd382 || t3_16b_63 !== 16'd717 || reset_16b_63 !== 16'd64) begin $fatal(1, "16-bit NEAR clamp defaults mismatch"); end diff --git a/fpga/verilog/jls_coding_params.sv b/fpga/verilog/jls_coding_params.sv index 5f4ac7a..a2af940 100644 --- a/fpga/verilog/jls_coding_params.sv +++ b/fpga/verilog/jls_coding_params.sv @@ -6,9 +6,10 @@ // Trace : docs/jls_traceability.md#jls-coding-parameters // Example : PIX_WIDTH=8,NEAR=0 gives RANGE=256,qbpp=8,LIMIT=32. // -// JPEG-LS coding parameter lookup. RANGE and qbpp depend on NEAR, but NEAR is -// limited to 0..31 in this project. A lookup table avoids a synthesized -// runtime divider and keeps this strip-level control path timing friendly. +// JPEG-LS coding parameter helper. RANGE depends on NEAR through +// RANGE = floor((MAXVAL + 2*NEAR)/(2*NEAR+1)) + 1. This is a strip-level +// control path, so the generic arithmetic is acceptable here and keeps the +// pixel pipeline free from runtime division logic. `default_nettype none @@ -17,7 +18,7 @@ module jls_coding_params #( parameter int PIX_WIDTH = 16 ) ( // JPEG-LS NEAR parameter for the current strip frame. - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // JPEG-LS RANGE parameter. output logic [16:0] RANGE, @@ -29,221 +30,120 @@ module jls_coding_params #( output logic [6:0] LIMIT ); - // Defensive clamp even though upstream NEAR is already limited to 31. - logic [5:0] near_clamped; - logic [16:0] range_next; - logic [4:0] qbpp_next; - logic [6:0] limit_next; + integer maximum_sample_value_int; + integer maximum_near_int; + integer near_clamped_int; + integer denominator_int; + integer range_int; + integer qbpp_int; + integer limit_int; always_comb begin - near_clamped = NEAR; - if (NEAR > 6'd31) begin - near_clamped = 6'd31; - end - end - - always_comb begin - range_next = 17'd65536; - qbpp_next = 5'd16; - limit_next = 7'd64; - + maximum_sample_value_int = 65535; + limit_int = 64; case (PIX_WIDTH) 8: begin - limit_next = 7'd32; - case (near_clamped) - 6'd0: begin range_next = 17'd256; qbpp_next = 5'd8; end - 6'd1: begin range_next = 17'd86; qbpp_next = 5'd7; end - 6'd2: begin range_next = 17'd52; qbpp_next = 5'd6; end - 6'd3: begin range_next = 17'd38; qbpp_next = 5'd6; end - 6'd4: begin range_next = 17'd30; qbpp_next = 5'd5; end - 6'd5: begin range_next = 17'd25; qbpp_next = 5'd5; end - 6'd6: begin range_next = 17'd21; qbpp_next = 5'd5; end - 6'd7: begin range_next = 17'd18; qbpp_next = 5'd5; end - 6'd8: begin range_next = 17'd16; qbpp_next = 5'd4; end - 6'd9: begin range_next = 17'd15; qbpp_next = 5'd4; end - 6'd10: begin range_next = 17'd14; qbpp_next = 5'd4; end - 6'd11: begin range_next = 17'd13; qbpp_next = 5'd4; end - 6'd12: begin range_next = 17'd12; qbpp_next = 5'd4; end - 6'd13: begin range_next = 17'd11; qbpp_next = 5'd4; end - 6'd14: begin range_next = 17'd10; qbpp_next = 5'd4; end - 6'd15: begin range_next = 17'd10; qbpp_next = 5'd4; end - 6'd16: begin range_next = 17'd9; qbpp_next = 5'd4; end - 6'd17: begin range_next = 17'd9; qbpp_next = 5'd4; end - 6'd18: begin range_next = 17'd8; qbpp_next = 5'd3; end - 6'd19: begin range_next = 17'd8; qbpp_next = 5'd3; end - 6'd20: begin range_next = 17'd8; qbpp_next = 5'd3; end - 6'd21: begin range_next = 17'd7; qbpp_next = 5'd3; end - 6'd22: begin range_next = 17'd7; qbpp_next = 5'd3; end - 6'd23: begin range_next = 17'd7; qbpp_next = 5'd3; end - 6'd24: begin range_next = 17'd7; qbpp_next = 5'd3; end - 6'd25: begin range_next = 17'd6; qbpp_next = 5'd3; end - 6'd26: begin range_next = 17'd6; qbpp_next = 5'd3; end - 6'd27: begin range_next = 17'd6; qbpp_next = 5'd3; end - 6'd28: begin range_next = 17'd6; qbpp_next = 5'd3; end - 6'd29: begin range_next = 17'd6; qbpp_next = 5'd3; end - 6'd30: begin range_next = 17'd6; qbpp_next = 5'd3; end - default: begin range_next = 17'd6; qbpp_next = 5'd3; end - endcase + maximum_sample_value_int = 255; + limit_int = 32; end 10: begin - limit_next = 7'd40; - case (near_clamped) - 6'd0: begin range_next = 17'd1024; qbpp_next = 5'd10; end - 6'd1: begin range_next = 17'd342; qbpp_next = 5'd9; end - 6'd2: begin range_next = 17'd206; qbpp_next = 5'd8; end - 6'd3: begin range_next = 17'd148; qbpp_next = 5'd8; end - 6'd4: begin range_next = 17'd115; qbpp_next = 5'd7; end - 6'd5: begin range_next = 17'd94; qbpp_next = 5'd7; end - 6'd6: begin range_next = 17'd80; qbpp_next = 5'd7; end - 6'd7: begin range_next = 17'd70; qbpp_next = 5'd7; end - 6'd8: begin range_next = 17'd62; qbpp_next = 5'd6; end - 6'd9: begin range_next = 17'd55; qbpp_next = 5'd6; end - 6'd10: begin range_next = 17'd50; qbpp_next = 5'd6; end - 6'd11: begin range_next = 17'd46; qbpp_next = 5'd6; end - 6'd12: begin range_next = 17'd42; qbpp_next = 5'd6; end - 6'd13: begin range_next = 17'd39; qbpp_next = 5'd6; end - 6'd14: begin range_next = 17'd37; qbpp_next = 5'd6; end - 6'd15: begin range_next = 17'd34; qbpp_next = 5'd6; end - 6'd16: begin range_next = 17'd32; qbpp_next = 5'd5; end - 6'd17: begin range_next = 17'd31; qbpp_next = 5'd5; end - 6'd18: begin range_next = 17'd29; qbpp_next = 5'd5; end - 6'd19: begin range_next = 17'd28; qbpp_next = 5'd5; end - 6'd20: begin range_next = 17'd26; qbpp_next = 5'd5; end - 6'd21: begin range_next = 17'd25; qbpp_next = 5'd5; end - 6'd22: begin range_next = 17'd24; qbpp_next = 5'd5; end - 6'd23: begin range_next = 17'd23; qbpp_next = 5'd5; end - 6'd24: begin range_next = 17'd22; qbpp_next = 5'd5; end - 6'd25: begin range_next = 17'd22; qbpp_next = 5'd5; end - 6'd26: begin range_next = 17'd21; qbpp_next = 5'd5; end - 6'd27: begin range_next = 17'd20; qbpp_next = 5'd5; end - 6'd28: begin range_next = 17'd19; qbpp_next = 5'd5; end - 6'd29: begin range_next = 17'd19; qbpp_next = 5'd5; end - 6'd30: begin range_next = 17'd18; qbpp_next = 5'd5; end - default: begin range_next = 17'd18; qbpp_next = 5'd5; end - endcase + maximum_sample_value_int = 1023; + limit_int = 40; end 12: begin - limit_next = 7'd48; - case (near_clamped) - 6'd0: begin range_next = 17'd4096; qbpp_next = 5'd12; end - 6'd1: begin range_next = 17'd1366; qbpp_next = 5'd11; end - 6'd2: begin range_next = 17'd820; qbpp_next = 5'd10; end - 6'd3: begin range_next = 17'd586; qbpp_next = 5'd10; end - 6'd4: begin range_next = 17'd456; qbpp_next = 5'd9; end - 6'd5: begin range_next = 17'd374; qbpp_next = 5'd9; end - 6'd6: begin range_next = 17'd316; qbpp_next = 5'd9; end - 6'd7: begin range_next = 17'd274; qbpp_next = 5'd9; end - 6'd8: begin range_next = 17'd242; qbpp_next = 5'd8; end - 6'd9: begin range_next = 17'd217; qbpp_next = 5'd8; end - 6'd10: begin range_next = 17'd196; qbpp_next = 5'd8; end - 6'd11: begin range_next = 17'd180; qbpp_next = 5'd8; end - 6'd12: begin range_next = 17'd165; qbpp_next = 5'd8; end - 6'd13: begin range_next = 17'd153; qbpp_next = 5'd8; end - 6'd14: begin range_next = 17'd143; qbpp_next = 5'd8; end - 6'd15: begin range_next = 17'd134; qbpp_next = 5'd8; end - 6'd16: begin range_next = 17'd126; qbpp_next = 5'd7; end - 6'd17: begin range_next = 17'd118; qbpp_next = 5'd7; end - 6'd18: begin range_next = 17'd112; qbpp_next = 5'd7; end - 6'd19: begin range_next = 17'd106; qbpp_next = 5'd7; end - 6'd20: begin range_next = 17'd101; qbpp_next = 5'd7; end - 6'd21: begin range_next = 17'd97; qbpp_next = 5'd7; end - 6'd22: begin range_next = 17'd92; qbpp_next = 5'd7; end - 6'd23: begin range_next = 17'd89; qbpp_next = 5'd7; end - 6'd24: begin range_next = 17'd85; qbpp_next = 5'd7; end - 6'd25: begin range_next = 17'd82; qbpp_next = 5'd7; end - 6'd26: begin range_next = 17'd79; qbpp_next = 5'd7; end - 6'd27: begin range_next = 17'd76; qbpp_next = 5'd7; end - 6'd28: begin range_next = 17'd73; qbpp_next = 5'd7; end - 6'd29: begin range_next = 17'd71; qbpp_next = 5'd7; end - 6'd30: begin range_next = 17'd69; qbpp_next = 5'd7; end - default: begin range_next = 17'd66; qbpp_next = 5'd7; end - endcase + maximum_sample_value_int = 4095; + limit_int = 48; end 14: begin - limit_next = 7'd56; - case (near_clamped) - 6'd0: begin range_next = 17'd16384; qbpp_next = 5'd14; end - 6'd1: begin range_next = 17'd5462; qbpp_next = 5'd13; end - 6'd2: begin range_next = 17'd3278; qbpp_next = 5'd12; end - 6'd3: begin range_next = 17'd2342; qbpp_next = 5'd12; end - 6'd4: begin range_next = 17'd1822; qbpp_next = 5'd11; end - 6'd5: begin range_next = 17'd1491; qbpp_next = 5'd11; end - 6'd6: begin range_next = 17'd1262; qbpp_next = 5'd11; end - 6'd7: begin range_next = 17'd1094; qbpp_next = 5'd11; end - 6'd8: begin range_next = 17'd965; qbpp_next = 5'd10; end - 6'd9: begin range_next = 17'd864; qbpp_next = 5'd10; end - 6'd10: begin range_next = 17'd782; qbpp_next = 5'd10; end - 6'd11: begin range_next = 17'd714; qbpp_next = 5'd10; end - 6'd12: begin range_next = 17'd657; qbpp_next = 5'd10; end - 6'd13: begin range_next = 17'd608; qbpp_next = 5'd10; end - 6'd14: begin range_next = 17'd566; qbpp_next = 5'd10; end - 6'd15: begin range_next = 17'd530; qbpp_next = 5'd10; end - 6'd16: begin range_next = 17'd498; qbpp_next = 5'd9; end - 6'd17: begin range_next = 17'd470; qbpp_next = 5'd9; end - 6'd18: begin range_next = 17'd444; qbpp_next = 5'd9; end - 6'd19: begin range_next = 17'd422; qbpp_next = 5'd9; end - 6'd20: begin range_next = 17'd401; qbpp_next = 5'd9; end - 6'd21: begin range_next = 17'd382; qbpp_next = 5'd9; end - 6'd22: begin range_next = 17'd366; qbpp_next = 5'd9; end - 6'd23: begin range_next = 17'd350; qbpp_next = 5'd9; end - 6'd24: begin range_next = 17'd336; qbpp_next = 5'd9; end - 6'd25: begin range_next = 17'd323; qbpp_next = 5'd9; end - 6'd26: begin range_next = 17'd311; qbpp_next = 5'd9; end - 6'd27: begin range_next = 17'd299; qbpp_next = 5'd9; end - 6'd28: begin range_next = 17'd289; qbpp_next = 5'd9; end - 6'd29: begin range_next = 17'd279; qbpp_next = 5'd9; end - 6'd30: begin range_next = 17'd270; qbpp_next = 5'd9; end - default: begin range_next = 17'd262; qbpp_next = 5'd9; end - endcase + maximum_sample_value_int = 16383; + limit_int = 56; end default: begin - limit_next = 7'd64; - case (near_clamped) - 6'd0: begin range_next = 17'd65536; qbpp_next = 5'd16; end - 6'd1: begin range_next = 17'd21846; qbpp_next = 5'd15; end - 6'd2: begin range_next = 17'd13108; qbpp_next = 5'd14; end - 6'd3: begin range_next = 17'd9364; qbpp_next = 5'd14; end - 6'd4: begin range_next = 17'd7283; qbpp_next = 5'd13; end - 6'd5: begin range_next = 17'd5959; qbpp_next = 5'd13; end - 6'd6: begin range_next = 17'd5043; qbpp_next = 5'd13; end - 6'd7: begin range_next = 17'd4370; qbpp_next = 5'd13; end - 6'd8: begin range_next = 17'd3856; qbpp_next = 5'd12; end - 6'd9: begin range_next = 17'd3451; qbpp_next = 5'd12; end - 6'd10: begin range_next = 17'd3122; qbpp_next = 5'd12; end - 6'd11: begin range_next = 17'd2851; qbpp_next = 5'd12; end - 6'd12: begin range_next = 17'd2623; qbpp_next = 5'd12; end - 6'd13: begin range_next = 17'd2429; qbpp_next = 5'd12; end - 6'd14: begin range_next = 17'd2261; qbpp_next = 5'd12; end - 6'd15: begin range_next = 17'd2116; qbpp_next = 5'd12; end - 6'd16: begin range_next = 17'd1987; qbpp_next = 5'd11; end - 6'd17: begin range_next = 17'd1874; qbpp_next = 5'd11; end - 6'd18: begin range_next = 17'd1773; qbpp_next = 5'd11; end - 6'd19: begin range_next = 17'd1682; qbpp_next = 5'd11; end - 6'd20: begin range_next = 17'd1600; qbpp_next = 5'd11; end - 6'd21: begin range_next = 17'd1526; qbpp_next = 5'd11; end - 6'd22: begin range_next = 17'd1458; qbpp_next = 5'd11; end - 6'd23: begin range_next = 17'd1396; qbpp_next = 5'd11; end - 6'd24: begin range_next = 17'd1339; qbpp_next = 5'd11; end - 6'd25: begin range_next = 17'd1286; qbpp_next = 5'd11; end - 6'd26: begin range_next = 17'd1238; qbpp_next = 5'd11; end - 6'd27: begin range_next = 17'd1193; qbpp_next = 5'd11; end - 6'd28: begin range_next = 17'd1151; qbpp_next = 5'd11; end - 6'd29: begin range_next = 17'd1112; qbpp_next = 5'd11; end - 6'd30: begin range_next = 17'd1076; qbpp_next = 5'd11; end - default: begin range_next = 17'd1042; qbpp_next = 5'd11; end - endcase + maximum_sample_value_int = 65535; + limit_int = 64; end endcase end always_comb begin - RANGE = range_next; - qbpp = qbpp_next; - LIMIT = limit_next; + maximum_near_int = maximum_sample_value_int / 2; + if (maximum_near_int > 255) begin + maximum_near_int = 255; + end + end + + always_comb begin + near_clamped_int = NEAR; + if (near_clamped_int > maximum_near_int) begin + near_clamped_int = maximum_near_int; + end + end + + always_comb begin + denominator_int = (2 * near_clamped_int) + 1; + range_int = ((maximum_sample_value_int + (2 * near_clamped_int)) / denominator_int) + 1; + end + + always_comb begin + qbpp_int = 0; + if (range_int > 1) begin + qbpp_int = 1; + if (range_int > 2) begin + qbpp_int = 2; + end + if (range_int > 4) begin + qbpp_int = 3; + end + if (range_int > 8) begin + qbpp_int = 4; + end + if (range_int > 16) begin + qbpp_int = 5; + end + if (range_int > 32) begin + qbpp_int = 6; + end + if (range_int > 64) begin + qbpp_int = 7; + end + if (range_int > 128) begin + qbpp_int = 8; + end + if (range_int > 256) begin + qbpp_int = 9; + end + if (range_int > 512) begin + qbpp_int = 10; + end + if (range_int > 1024) begin + qbpp_int = 11; + end + if (range_int > 2048) begin + qbpp_int = 12; + end + if (range_int > 4096) begin + qbpp_int = 13; + end + if (range_int > 8192) begin + qbpp_int = 14; + end + if (range_int > 16384) begin + qbpp_int = 15; + end + if (range_int > 32768) begin + qbpp_int = 16; + end + end + end + + always_comb begin + RANGE = range_int[16:0]; + qbpp = qbpp_int[4:0]; + LIMIT = limit_int[6:0]; end endmodule diff --git a/fpga/verilog/jls_common_pkg.sv b/fpga/verilog/jls_common_pkg.sv index a098f26..9fc87b1 100644 --- a/fpga/verilog/jls_common_pkg.sv +++ b/fpga/verilog/jls_common_pkg.sv @@ -20,8 +20,8 @@ package jls_common_pkg; // Output FIFO byte plus original-image-start sideband bit. localparam int JLS_OFIFO_WIDTH = 9; - // JPEG-LS NEAR is clamped to 0..31 in this IP. - localparam int JLS_NEAR_WIDTH = 6; + // JPEG-LS NEAR transport width. The standard valid range is 0..min(255,MAXVAL/2). + localparam int JLS_NEAR_WIDTH = 8; // JPEG marker byte used by SOI/SOF55/LSE/SOS/EOI marker generation. localparam logic [7:0] JLS_MARKER_PREFIX = 8'hFF; diff --git a/fpga/verilog/jls_near_scale_mul.sv b/fpga/verilog/jls_near_scale_mul.sv index f15dd1b..8b35bef 100644 --- a/fpga/verilog/jls_near_scale_mul.sv +++ b/fpga/verilog/jls_near_scale_mul.sv @@ -4,14 +4,11 @@ // Table : N/A // Pseudocode : product = multiplicand * (2 * NEAR + 1) // Trace : docs/jls_traceability.md#regular-error-quantization -// Example : multiplicand=5 and near_scale=5 gives 25. +// Example : multiplicand=5 and near_scale=9 gives 45. // -// The JPEG-LS NEAR scale is always an odd 6-bit positive value in the range -// 1..63. Vivado tended to map these narrow-scale multiplies into cascaded -// DSP48E1 structures, which put PCOUT->PCIN on the top timing path. This -// helper keeps the operation in carry chains with a fixed three-adder shape: -// one partial sum for bits [2:0], one partial sum for bits [5:3], then a final -// add. The caller provides the surrounding pipeline registers. +// The JPEG-LS NEAR scale is always an odd positive value in the range 1..511. +// This helper keeps the multiply in carry-chain style by grouping the odd-scale +// bits into three 3-bit slices: [2:0], [5:3], and [8:6]. `default_nettype none @@ -20,7 +17,7 @@ module jls_near_scale_mul #( parameter int OUTPUT_WIDTH = 41 ) ( input var logic signed [INPUT_WIDTH-1:0] multiplicand_i, - input var logic [5:0] near_scale_i, + input var logic [8:0] near_scale_i, output logic signed [OUTPUT_WIDTH-1:0] product_o ); @@ -32,7 +29,11 @@ module jls_near_scale_mul #( logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_3; logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_4; logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_5; + logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_6; + logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_7; + logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_8; logic signed [OUTPUT_WIDTH-1:0] partial_lo; + logic signed [OUTPUT_WIDTH-1:0] partial_mid; logic signed [OUTPUT_WIDTH-1:0] partial_hi; always_comb begin @@ -42,6 +43,9 @@ module jls_near_scale_mul #( multiplicand_shift_3 = multiplicand_ext <<< 3; multiplicand_shift_4 = multiplicand_ext <<< 4; multiplicand_shift_5 = multiplicand_ext <<< 5; + multiplicand_shift_6 = multiplicand_ext <<< 6; + multiplicand_shift_7 = multiplicand_ext <<< 7; + multiplicand_shift_8 = multiplicand_ext <<< 8; end always_comb begin @@ -59,19 +63,32 @@ module jls_near_scale_mul #( always_comb begin case (near_scale_i[5:3]) - 3'b000: partial_hi = {OUTPUT_WIDTH{1'b0}}; - 3'b001: partial_hi = multiplicand_shift_3; - 3'b010: partial_hi = multiplicand_shift_4; - 3'b011: partial_hi = multiplicand_shift_3 + multiplicand_shift_4; - 3'b100: partial_hi = multiplicand_shift_5; - 3'b101: partial_hi = multiplicand_shift_3 + multiplicand_shift_5; - 3'b110: partial_hi = multiplicand_shift_4 + multiplicand_shift_5; - default: partial_hi = multiplicand_shift_3 + multiplicand_shift_4 + multiplicand_shift_5; + 3'b000: partial_mid = {OUTPUT_WIDTH{1'b0}}; + 3'b001: partial_mid = multiplicand_shift_3; + 3'b010: partial_mid = multiplicand_shift_4; + 3'b011: partial_mid = multiplicand_shift_3 + multiplicand_shift_4; + 3'b100: partial_mid = multiplicand_shift_5; + 3'b101: partial_mid = multiplicand_shift_3 + multiplicand_shift_5; + 3'b110: partial_mid = multiplicand_shift_4 + multiplicand_shift_5; + default: partial_mid = multiplicand_shift_3 + multiplicand_shift_4 + multiplicand_shift_5; endcase end always_comb begin - product_o = partial_lo + partial_hi; + case (near_scale_i[8:6]) + 3'b000: partial_hi = {OUTPUT_WIDTH{1'b0}}; + 3'b001: partial_hi = multiplicand_shift_6; + 3'b010: partial_hi = multiplicand_shift_7; + 3'b011: partial_hi = multiplicand_shift_6 + multiplicand_shift_7; + 3'b100: partial_hi = multiplicand_shift_8; + 3'b101: partial_hi = multiplicand_shift_6 + multiplicand_shift_8; + 3'b110: partial_hi = multiplicand_shift_7 + multiplicand_shift_8; + default: partial_hi = multiplicand_shift_6 + multiplicand_shift_7 + multiplicand_shift_8; + endcase + end + + always_comb begin + product_o = partial_lo + partial_mid + partial_hi; end endmodule diff --git a/fpga/verilog/jls_preset_defaults.sv b/fpga/verilog/jls_preset_defaults.sv index eef6c71..70a6587 100644 --- a/fpga/verilog/jls_preset_defaults.sv +++ b/fpga/verilog/jls_preset_defaults.sv @@ -2,18 +2,15 @@ // Clause : Annex C.2.4.1.1 preset coding parameters // Figure : C.3 clamping function, referenced by default threshold rules // Table : Table C.1 valid preset parameters, Table C.2 RESET, Table C.3 defaults -// Pseudocode : Default threshold calculation for MAXVAL >= 128 +// Pseudocode : Default threshold calculation with MAXVAL and NEAR // Trace : docs/jls_traceability.md#jls-preset-defaults // Example : PIX_WIDTH=8, NEAR=0 gives MAXVAL=255, T1=3, T2=7, T3=21. // -// JPEG-LS default preset coding parameter helper. The first RTL version only -// supports 8/10/12/14/16-bit grayscale samples and NEAR is clamped to 0..31. -// For all supported sample precisions MAXVAL >= 128. With NEAR <= 31 the -// default thresholds do not hit MAXVAL, so the standard C.2.4.1.1 equations -// reduce to shallow shift-add expressions: -// T1 = FACTOR * 1 + 2 + 3*NEAR -// T2 = FACTOR * 4 + 3 + 5*NEAR -// T3 = FACTOR * 17 + 4 + 7*NEAR +// JPEG-LS default preset coding parameter helper. This version follows the +// full CharLS/ISO formula and supports NEAR up to min(255, MAXVAL/2). The +// work is strip-level control, not per-pixel, so one combinational divide is +// acceptable here and keeps the pixel pipeline independent of the wider NEAR +// range. `default_nettype none @@ -21,8 +18,8 @@ module jls_preset_defaults #( // Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16. parameter int PIX_WIDTH = 16 ) ( - // Requested NEAR value. Values above 31 are clamped defensively. - input var logic [5:0] near, + // Requested NEAR value. Values above min(255, MAXVAL/2) are clamped. + input var logic [7:0] near, // JPEG-LS LSE MAXVAL preset coding parameter. output logic [15:0] preset_maxval, @@ -40,97 +37,137 @@ module jls_preset_defaults #( output logic [15:0] preset_reset ); - // Default RESET value from T.87 Table C.2. - localparam logic [15:0] DEFAULT_RESET_VALUE = 16'd64; + localparam int DEFAULT_RESET_VALUE = 64; + localparam int BASIC_T1 = 3; + localparam int BASIC_T2 = 7; + localparam int BASIC_T3 = 21; - // Defensive NEAR clamp for the project maximum. - logic [5:0] near_clamped; - - // Shift-add terms for 3*NEAR, 5*NEAR, and 7*NEAR. - logic [15:0] near_ext; - logic [15:0] near_times_2; - logic [15:0] near_times_3; - logic [15:0] near_times_4; - logic [15:0] near_times_5; - logic [15:0] near_times_7; - - // Base threshold values after applying the standard FACTOR term. - logic [15:0] base_t1; - logic [15:0] base_t2; - logic [15:0] base_t3; + integer maximum_sample_value_int; + integer maximum_near_int; + integer near_clamped_int; + integer factor_int; + integer threshold1_candidate; + integer threshold2_candidate; + integer threshold3_candidate; + integer threshold1_int; + integer threshold2_int; + integer threshold3_int; always_comb begin - near_clamped = near; - if (near > 6'd31) begin - near_clamped = 6'd31; - end - end - - always_comb begin - near_ext = {10'd0, near_clamped}; - near_times_2 = {near_ext[14:0], 1'b0}; - near_times_3 = near_times_2 + near_ext; - near_times_4 = {near_ext[13:0], 2'b00}; - near_times_5 = near_times_4 + near_ext; - near_times_7 = near_times_4 + near_times_2 + near_ext; - end - - always_comb begin - preset_maxval = 16'hFFFF; - base_t1 = 16'd18; - base_t2 = 16'd67; - base_t3 = 16'd276; - + maximum_sample_value_int = 65535; case (PIX_WIDTH) 8: begin - preset_maxval = 16'd255; - base_t1 = 16'd3; - base_t2 = 16'd7; - base_t3 = 16'd21; + maximum_sample_value_int = 255; end 10: begin - preset_maxval = 16'd1023; - base_t1 = 16'd6; - base_t2 = 16'd19; - base_t3 = 16'd72; + maximum_sample_value_int = 1023; end 12: begin - preset_maxval = 16'd4095; - base_t1 = 16'd18; - base_t2 = 16'd67; - base_t3 = 16'd276; + maximum_sample_value_int = 4095; end 14: begin - preset_maxval = 16'd16383; - base_t1 = 16'd18; - base_t2 = 16'd67; - base_t3 = 16'd276; - end - - 16: begin - preset_maxval = 16'hFFFF; - base_t1 = 16'd18; - base_t2 = 16'd67; - base_t3 = 16'd276; + maximum_sample_value_int = 16383; end default: begin - preset_maxval = 16'hFFFF; - base_t1 = 16'd18; - base_t2 = 16'd67; - base_t3 = 16'd276; + maximum_sample_value_int = 65535; end endcase end always_comb begin - preset_t1 = base_t1 + near_times_3; - preset_t2 = base_t2 + near_times_5; - preset_t3 = base_t3 + near_times_7; - preset_reset = DEFAULT_RESET_VALUE; + maximum_near_int = maximum_sample_value_int / 2; + if (maximum_near_int > 255) begin + maximum_near_int = 255; + end + end + + always_comb begin + near_clamped_int = near; + if (near_clamped_int > maximum_near_int) begin + near_clamped_int = maximum_near_int; + end + end + + always_comb begin + factor_int = 1; + if (maximum_sample_value_int >= 128) begin + factor_int = maximum_sample_value_int; + if (factor_int > 4095) begin + factor_int = 4095; + end + factor_int = (factor_int + 128) / 256; + + threshold1_candidate = (factor_int * (BASIC_T1 - 2)) + 2 + (3 * near_clamped_int); + if ((threshold1_candidate > maximum_sample_value_int) || + (threshold1_candidate < (near_clamped_int + 1))) begin + threshold1_int = near_clamped_int + 1; + end else begin + threshold1_int = threshold1_candidate; + end + + threshold2_candidate = (factor_int * (BASIC_T2 - 3)) + 3 + (5 * near_clamped_int); + if ((threshold2_candidate > maximum_sample_value_int) || + (threshold2_candidate < threshold1_int)) begin + threshold2_int = threshold1_int; + end else begin + threshold2_int = threshold2_candidate; + end + + threshold3_candidate = (factor_int * (BASIC_T3 - 4)) + 4 + (7 * near_clamped_int); + if ((threshold3_candidate > maximum_sample_value_int) || + (threshold3_candidate < threshold2_int)) begin + threshold3_int = threshold2_int; + end else begin + threshold3_int = threshold3_candidate; + end + end else begin + factor_int = 256 / (maximum_sample_value_int + 1); + + threshold1_candidate = (BASIC_T1 / factor_int) + (3 * near_clamped_int); + if (threshold1_candidate < 2) begin + threshold1_candidate = 2; + end + if ((threshold1_candidate > maximum_sample_value_int) || + (threshold1_candidate < (near_clamped_int + 1))) begin + threshold1_int = near_clamped_int + 1; + end else begin + threshold1_int = threshold1_candidate; + end + + threshold2_candidate = (BASIC_T2 / factor_int) + (5 * near_clamped_int); + if (threshold2_candidate < 3) begin + threshold2_candidate = 3; + end + if ((threshold2_candidate > maximum_sample_value_int) || + (threshold2_candidate < threshold1_int)) begin + threshold2_int = threshold1_int; + end else begin + threshold2_int = threshold2_candidate; + end + + threshold3_candidate = (BASIC_T3 / factor_int) + (7 * near_clamped_int); + if (threshold3_candidate < 4) begin + threshold3_candidate = 4; + end + if ((threshold3_candidate > maximum_sample_value_int) || + (threshold3_candidate < threshold2_int)) begin + threshold3_int = threshold2_int; + end else begin + threshold3_int = threshold3_candidate; + end + end + end + + always_comb begin + preset_maxval = maximum_sample_value_int[15:0]; + preset_t1 = threshold1_int[15:0]; + preset_t2 = threshold2_int[15:0]; + preset_t3 = threshold3_int[15:0]; + preset_reset = DEFAULT_RESET_VALUE[15:0]; end endmodule