From ac093da000d4bb86335c0cfdbf78ada407265e02 Mon Sep 17 00:00:00 2001 From: zhouhua Date: Fri, 17 Apr 2026 07:52:24 +0800 Subject: [PATCH] Start expanding core JPEG-LS datapath toward NEAR 255 --- fpga/sim/tb_jls_near_ctrl.sv | 8 +- fpga/verilog/jls_context_quantizer.sv | 14 +- fpga/verilog/jls_context_update.sv | 20 +- fpga/verilog/jls_header_writer.sv | 8 +- fpga/verilog/jls_mode_router.sv | 4 +- fpga/verilog/jls_near_ctrl.sv | 145 +++++++-- fpga/verilog/jls_near_reciprocal_magic_lut.sv | 284 ++++++++++++++++++ fpga/verilog/jls_regular_error_quantizer.sv | 178 ++--------- fpga/verilog/jls_run_mode.sv | 179 ++--------- fpga/verilog/jls_scan_ctrl.sv | 12 +- fpga/verilog/jpeg_ls_encoder_top.sv | 14 +- fpga/verilog/jpeg_ls_rtl.f | 1 + 12 files changed, 488 insertions(+), 379 deletions(-) create mode 100644 fpga/verilog/jls_near_reciprocal_magic_lut.sv diff --git a/fpga/sim/tb_jls_near_ctrl.sv b/fpga/sim/tb_jls_near_ctrl.sv index 02b64eb..4e28749 100644 --- a/fpga/sim/tb_jls_near_ctrl.sv +++ b/fpga/sim/tb_jls_near_ctrl.sv @@ -26,7 +26,7 @@ module tb_jls_near_ctrl; logic [31:0] strip_output_bytes; // Dynamic NEAR controller outputs. - logic [5:0] current_near; + logic [7:0] current_near; logic [47:0] actual_bits_cumulative; logic [47:0] target_bits_cumulative; logic target_miss_at_max_near; @@ -37,7 +37,7 @@ module tb_jls_near_ctrl; jls_near_ctrl #( .PIX_WIDTH(PIX_WIDTH), - .MAX_NEAR(31) + .MAX_NEAR(255) ) dut ( .clk(clk), .rst(rst), @@ -139,7 +139,7 @@ module tb_jls_near_ctrl; @(posedge clk); image_start_valid = 1'b0; - for (loop_index = 0; loop_index < 8; loop_index = loop_index + 1) begin + for (loop_index = 0; loop_index < 16; loop_index = loop_index + 1) begin @(posedge clk); strip_output_bytes = 32'd1000; strip_done_valid = 1'b1; @@ -149,7 +149,7 @@ module tb_jls_near_ctrl; end #1; - if (current_near !== 6'd31 || target_miss_at_max_near !== 1'b1) begin + if (current_near !== 8'd255 || target_miss_at_max_near !== 1'b1) begin $fatal(1, "MAX_NEAR saturation or target miss flag mismatch"); end diff --git a/fpga/verilog/jls_context_quantizer.sv b/fpga/verilog/jls_context_quantizer.sv index 31f6296..940efca 100644 --- a/fpga/verilog/jls_context_quantizer.sv +++ b/fpga/verilog/jls_context_quantizer.sv @@ -29,14 +29,14 @@ module jls_gradient_quantize_one #( input var logic [15:0] T3, // JPEG-LS NEAR parameter for the current strip frame. - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // Quantized gradient Qi in the range -4..4. output logic signed [3:0] Qi ); // Padding for NEAR into the signed gradient compare width. - localparam int NEAR_PAD_WIDTH = DI_WIDTH - 6; + localparam int NEAR_PAD_WIDTH = DI_WIDTH - 8; // Signed compare constants. T1/T2/T3 are already valid for the configured // PIX_WIDTH, so truncation to DI_WIDTH is safe for the supported precisions. @@ -150,7 +150,7 @@ module jls_context_quantizer #( input var logic [15:0] T1, input var logic [15:0] T2, input var logic [15:0] T3, - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // Quantized context event is valid. output logic context_valid, @@ -219,7 +219,7 @@ module jls_context_quantizer #( logic [15:0] stage_T1; logic [15:0] stage_T2; logic [15:0] stage_T3; - logic [5:0] stage_NEAR; + logic [7:0] stage_NEAR; // One-entry input skid slot. predict_ready depends only on this local slot, // not on context_ready from the later context-memory hazard path. If the @@ -239,7 +239,7 @@ module jls_context_quantizer #( logic [15:0] stage_next_T1; logic [15:0] stage_next_T2; logic [15:0] stage_next_T3; - logic [5:0] stage_next_NEAR; + logic [7:0] stage_next_NEAR; // Registered quantized-gradient payload. Splitting Q1/Q2/Q3 from the // threshold compare stage keeps Annex A.3 gradient quantization out of the @@ -485,7 +485,7 @@ module jls_context_quantizer #( stage_T1 <= 16'd0; stage_T2 <= 16'd0; stage_T3 <= 16'd0; - stage_NEAR <= 6'd0; + stage_NEAR <= 8'd0; stage_next_valid <= 1'b0; stage_next_sample <= {PIX_WIDTH{1'b0}}; stage_next_x <= 13'd0; @@ -500,7 +500,7 @@ module jls_context_quantizer #( stage_next_T1 <= 16'd0; stage_next_T2 <= 16'd0; stage_next_T3 <= 16'd0; - stage_next_NEAR <= 6'd0; + stage_next_NEAR <= 8'd0; q_stage_valid <= 1'b0; q_stage_sample <= {PIX_WIDTH{1'b0}}; q_stage_x <= 13'd0; diff --git a/fpga/verilog/jls_context_update.sv b/fpga/verilog/jls_context_update.sv index aa427f5..b76d6f1 100644 --- a/fpga/verilog/jls_context_update.sv +++ b/fpga/verilog/jls_context_update.sv @@ -46,7 +46,7 @@ module jls_context_update ( input var logic [6:0] LIMIT_in, // JPEG-LS NEAR parameter for this strip. - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // JPEG-LS RESET parameter, normally 64. input var logic [15:0] RESET, @@ -86,7 +86,7 @@ module jls_context_update ( logic [32:0] abs_Errval_ext; // Stage-1 update terms from Annex A.6. - logic signed [7:0] near_scale; + logic signed [9:0] near_scale; logic signed [40:0] B_delta; logic [31:0] A_accum_next; logic signed [40:0] B_accum_next; @@ -127,7 +127,7 @@ module jls_context_update ( logic s0_strip_last_pixel; logic [4:0] s0_qbpp; logic [6:0] s0_LIMIT; - logic [5:0] s0_NEAR; + logic [7:0] s0_NEAR; logic [15:0] s0_RESET; // One-entry input skid slot. It keeps update_ready dependent only on local @@ -145,7 +145,7 @@ module jls_context_update ( logic update_next_strip_last_pixel; logic [4:0] update_next_qbpp; logic [6:0] update_next_LIMIT; - logic [5:0] update_next_NEAR; + logic [7:0] update_next_NEAR; logic [15:0] update_next_RESET; // Stage-1 registered multiplier operands/update payload. s1_Errval_ext and @@ -166,7 +166,7 @@ module jls_context_update ( logic [6:0] s1_LIMIT; logic s1_map_invert; logic signed [32:0] s1_Errval_ext; - logic signed [7:0] s1_near_scale; + logic signed [9:0] s1_near_scale; // Stage-2 registered product/update payload. s2_B_delta is the registered // scaled Errval term for the Annex A.6 B[Q] update before the following @@ -292,7 +292,7 @@ module jls_context_update ( .OUTPUT_WIDTH(41) ) context_update_near_scale_mul_i ( .multiplicand_i(s1_Errval_ext), - .near_scale_i(s1_near_scale[5:0]), + .near_scale_i(s1_near_scale[8:0]), .product_o(B_delta) ); @@ -412,7 +412,7 @@ module jls_context_update ( always_comb begin k_or_near_is_zero = 1'b0; - if (k_next == 5'd0 && s0_NEAR == 6'd0) begin + if (k_next == 5'd0 && s0_NEAR == 8'd0) begin k_or_near_is_zero = 1'b1; end end @@ -653,7 +653,7 @@ module jls_context_update ( s0_strip_last_pixel <= 1'b0; s0_qbpp <= 5'd0; s0_LIMIT <= 7'd0; - s0_NEAR <= 6'd0; + s0_NEAR <= 8'd0; s0_RESET <= 16'd0; update_next_valid <= 1'b0; update_next_A_in <= 32'd0; @@ -665,7 +665,7 @@ module jls_context_update ( update_next_strip_last_pixel <= 1'b0; update_next_qbpp <= 5'd0; update_next_LIMIT <= 7'd0; - update_next_NEAR <= 6'd0; + update_next_NEAR <= 8'd0; update_next_RESET <= 16'd0; s1_valid <= 1'b0; s1_A_accum <= 32'd0; @@ -682,7 +682,7 @@ module jls_context_update ( s1_LIMIT <= 7'd0; s1_map_invert <= 1'b0; s1_Errval_ext <= 33'sd0; - s1_near_scale <= 8'sd1; + s1_near_scale <= 10'sd1; s2_valid <= 1'b0; s2_A_accum <= 32'd0; s2_B_in <= 32'sd0; diff --git a/fpga/verilog/jls_header_writer.sv b/fpga/verilog/jls_header_writer.sv index 7646758..4aa9e3e 100644 --- a/fpga/verilog/jls_header_writer.sv +++ b/fpga/verilog/jls_header_writer.sv @@ -39,7 +39,7 @@ module jls_header_writer #( input var logic [12:0] strip_height, // NEAR parameter written to the JPEG-LS SOS segment. - input var logic [5:0] near, + input var logic [7:0] near, // JPEG-LS LSE MAXVAL preset coding parameter. input var logic [15:0] preset_maxval, @@ -111,7 +111,7 @@ module jls_header_writer #( logic latched_original_image_first_strip; logic [12:0] latched_strip_width; logic [12:0] latched_strip_height; - logic [5:0] latched_near; + logic [7:0] latched_near; logic [15:0] latched_preset_maxval; logic [15:0] latched_preset_t1; logic [15:0] latched_preset_t2; @@ -152,7 +152,7 @@ module jls_header_writer #( strip_width_lo = latched_strip_width[7:0]; strip_height_hi = {3'b000, latched_strip_height[12:8]}; strip_height_lo = latched_strip_height[7:0]; - near_byte = {2'b00, latched_near}; + near_byte = latched_near; end always_comb begin @@ -394,7 +394,7 @@ module jls_header_writer #( latched_original_image_first_strip <= 1'b0; latched_strip_width <= 13'd0; latched_strip_height <= 13'd0; - latched_near <= 6'd0; + latched_near <= 8'd0; latched_preset_maxval <= 16'd0; latched_preset_t1 <= 16'd0; latched_preset_t2 <= 16'd0; diff --git a/fpga/verilog/jls_mode_router.sv b/fpga/verilog/jls_mode_router.sv index f29962a..277c512 100644 --- a/fpga/verilog/jls_mode_router.sv +++ b/fpga/verilog/jls_mode_router.sv @@ -47,7 +47,7 @@ module jls_mode_router #( // Active strip width and NEAR for mode/run decisions. input var logic [12:0] strip_width, - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // Regular-mode event forwarded to jls_predictor. output logic regular_valid, @@ -245,7 +245,7 @@ module jls_mode_router #( pixel_Rc_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rc}); pixel_Rd_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rd}); pixel_sample_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, pixel_sample}); - near_ext33 = $signed({27'd0, NEAR}); + near_ext33 = $signed({25'd0, NEAR}); end always_comb begin diff --git a/fpga/verilog/jls_near_ctrl.sv b/fpga/verilog/jls_near_ctrl.sv index 7b23474..8659a88 100644 --- a/fpga/verilog/jls_near_ctrl.sv +++ b/fpga/verilog/jls_near_ctrl.sv @@ -18,7 +18,7 @@ module jls_near_ctrl #( parameter int PIX_WIDTH = 16, // Maximum dynamic NEAR allowed by the first RTL version. - parameter int MAX_NEAR = 31 + parameter int MAX_NEAR = 255 ) ( // Main 250 MHz clock. input var logic clk, @@ -42,7 +42,7 @@ module jls_near_ctrl #( input var logic [31:0] strip_output_bytes, // NEAR value to use for the next strip frame header and coding pipeline. - output logic [5:0] current_near, + output logic [7:0] current_near, // Cumulative actual output bits for verification and reporting. output logic [47:0] actual_bits_cumulative, @@ -66,19 +66,22 @@ module jls_near_ctrl #( // Discrete NEAR ladder used by the dynamic controller: // level 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 4, 4 -> 8, 5 -> 16, 6 -> 31. - localparam logic [2:0] NEAR_LEVEL_0 = 3'd0; - localparam logic [2:0] NEAR_LEVEL_1 = 3'd1; - localparam logic [2:0] NEAR_LEVEL_2 = 3'd2; - localparam logic [2:0] NEAR_LEVEL_4 = 3'd3; - localparam logic [2:0] NEAR_LEVEL_8 = 3'd4; - localparam logic [2:0] NEAR_LEVEL_16 = 3'd5; - localparam logic [2:0] NEAR_LEVEL_31 = 3'd6; + localparam logic [3:0] NEAR_LEVEL_0 = 4'd0; + localparam logic [3:0] NEAR_LEVEL_1 = 4'd1; + localparam logic [3:0] NEAR_LEVEL_2 = 4'd2; + localparam logic [3:0] NEAR_LEVEL_4 = 4'd3; + localparam logic [3:0] NEAR_LEVEL_8 = 4'd4; + localparam logic [3:0] NEAR_LEVEL_16 = 4'd5; + localparam logic [3:0] NEAR_LEVEL_32 = 4'd6; + localparam logic [3:0] NEAR_LEVEL_64 = 4'd7; + localparam logic [3:0] NEAR_LEVEL_127 = 4'd8; + localparam logic [3:0] NEAR_LEVEL_255 = 4'd9; // Latched ratio for the current original image. logic [3:0] active_ratio; // Internal discrete NEAR level register. current_near is decoded from it. - logic [2:0] current_near_level; + logic [3:0] current_near_level; // Strip-level source and target bit calculations. logic [47:0] strip_pixel_count_ext; @@ -113,7 +116,7 @@ module jls_near_ctrl #( logic [47:0] target_times_two; logic [47:0] target_times_three; logic [47:0] target_times_five; - logic [2:0] first_strip_level_next; + logic [3:0] first_strip_level_next; // Cumulative micro-adjust thresholds for later strips. +/-1/16 is the hold // band; beyond +/-1/4 the controller skips one NEAR rung. @@ -127,11 +130,11 @@ module jls_near_ctrl #( logic step_down_two_levels; // Discrete NEAR ladder movement. - logic [2:0] near_level_plus_one; - logic [2:0] near_level_plus_two; - logic [2:0] near_level_minus_one; - logic [2:0] near_level_minus_two; - logic [2:0] adjusted_near_level_next; + logic [3:0] near_level_plus_one; + logic [3:0] near_level_plus_two; + logic [3:0] near_level_minus_one; + logic [3:0] near_level_minus_two; + logic [3:0] adjusted_near_level_next; // Max-level sticky miss reporting. logic near_level_is_max; @@ -142,34 +145,46 @@ module jls_near_ctrl #( end always_comb begin - current_near = 6'd0; + current_near = 8'd0; case (current_near_level) NEAR_LEVEL_1: begin - current_near = 6'd1; + current_near = 8'd1; end NEAR_LEVEL_2: begin - current_near = 6'd2; + current_near = 8'd2; end NEAR_LEVEL_4: begin - current_near = 6'd4; + current_near = 8'd4; end NEAR_LEVEL_8: begin - current_near = 6'd8; + current_near = 8'd8; end NEAR_LEVEL_16: begin - current_near = 6'd16; + current_near = 8'd16; end - NEAR_LEVEL_31: begin - current_near = 6'd31; + NEAR_LEVEL_32: begin + current_near = 8'd32; + end + + NEAR_LEVEL_64: begin + current_near = 8'd64; + end + + NEAR_LEVEL_127: begin + current_near = 8'd127; + end + + NEAR_LEVEL_255: begin + current_near = 8'd255; end default: begin - current_near = 6'd0; + current_near = 8'd0; end endcase end @@ -279,7 +294,7 @@ module jls_near_ctrl #( end always_comb begin - first_strip_level_next = NEAR_LEVEL_31; + first_strip_level_next = NEAR_LEVEL_255; case (active_ratio) RATIO_1_TO_4, RATIO_1_TO_8: begin if (pending_actual_bits_sum <= target_plus_one_eighth) begin @@ -292,6 +307,13 @@ module jls_near_ctrl #( first_strip_level_next = NEAR_LEVEL_4; end else if (pending_actual_bits_sum <= target_times_two_plus_quarter) begin first_strip_level_next = NEAR_LEVEL_8; + end else if (pending_actual_bits_sum <= target_times_three) begin + first_strip_level_next = NEAR_LEVEL_32; + end else if (pending_actual_bits_sum <= {pending_target_bits_sum[45:0], 2'b00}) begin + first_strip_level_next = NEAR_LEVEL_64; + end else if (pending_actual_bits_sum <= ({pending_target_bits_sum[45:0], 2'b00} + + {pending_target_bits_sum[46:0], 1'b0})) begin + first_strip_level_next = NEAR_LEVEL_127; end end @@ -308,6 +330,15 @@ module jls_near_ctrl #( first_strip_level_next = NEAR_LEVEL_8; end else if (pending_actual_bits_sum <= target_times_five) begin first_strip_level_next = NEAR_LEVEL_16; + end else if (pending_actual_bits_sum <= ({pending_target_bits_sum[45:0], 2'b00} + + {pending_target_bits_sum[46:0], 1'b0})) begin + first_strip_level_next = NEAR_LEVEL_32; + end else if (pending_actual_bits_sum <= ({pending_target_bits_sum[44:0], 3'b000} + + pending_target_bits_sum)) begin + first_strip_level_next = NEAR_LEVEL_64; + end else if (pending_actual_bits_sum <= ({pending_target_bits_sum[44:0], 3'b000} + + {pending_target_bits_sum[46:0], 1'b0})) begin + first_strip_level_next = NEAR_LEVEL_127; end end endcase @@ -391,8 +422,20 @@ module jls_near_ctrl #( near_level_plus_one = NEAR_LEVEL_16; end + NEAR_LEVEL_16: begin + near_level_plus_one = NEAR_LEVEL_32; + end + + NEAR_LEVEL_32: begin + near_level_plus_one = NEAR_LEVEL_64; + end + + NEAR_LEVEL_64: begin + near_level_plus_one = NEAR_LEVEL_127; + end + default: begin - near_level_plus_one = NEAR_LEVEL_31; + near_level_plus_one = NEAR_LEVEL_255; end endcase end @@ -416,8 +459,24 @@ module jls_near_ctrl #( near_level_plus_two = NEAR_LEVEL_16; end + NEAR_LEVEL_8: begin + near_level_plus_two = NEAR_LEVEL_32; + end + + NEAR_LEVEL_16: begin + near_level_plus_two = NEAR_LEVEL_64; + end + + NEAR_LEVEL_32: begin + near_level_plus_two = NEAR_LEVEL_127; + end + + NEAR_LEVEL_64: begin + near_level_plus_two = NEAR_LEVEL_255; + end + default: begin - near_level_plus_two = NEAR_LEVEL_31; + near_level_plus_two = NEAR_LEVEL_255; end endcase end @@ -445,10 +504,22 @@ module jls_near_ctrl #( near_level_minus_one = NEAR_LEVEL_8; end - NEAR_LEVEL_31: begin + NEAR_LEVEL_32: begin near_level_minus_one = NEAR_LEVEL_16; end + NEAR_LEVEL_64: begin + near_level_minus_one = NEAR_LEVEL_32; + end + + NEAR_LEVEL_127: begin + near_level_minus_one = NEAR_LEVEL_64; + end + + NEAR_LEVEL_255: begin + near_level_minus_one = NEAR_LEVEL_127; + end + default: begin near_level_minus_one = NEAR_LEVEL_0; end @@ -482,9 +553,21 @@ module jls_near_ctrl #( near_level_minus_two = NEAR_LEVEL_4; end - default: begin + NEAR_LEVEL_32: begin near_level_minus_two = NEAR_LEVEL_8; end + + NEAR_LEVEL_64: begin + near_level_minus_two = NEAR_LEVEL_16; + end + + NEAR_LEVEL_127: begin + near_level_minus_two = NEAR_LEVEL_32; + end + + default: begin + near_level_minus_two = NEAR_LEVEL_64; + end endcase end @@ -503,7 +586,7 @@ module jls_near_ctrl #( always_comb begin near_level_is_max = 1'b0; - if (current_near_level == NEAR_LEVEL_31) begin + if (current_near_level == NEAR_LEVEL_255) begin near_level_is_max = 1'b1; end end diff --git a/fpga/verilog/jls_near_reciprocal_magic_lut.sv b/fpga/verilog/jls_near_reciprocal_magic_lut.sv new file mode 100644 index 0000000..add466f --- /dev/null +++ b/fpga/verilog/jls_near_reciprocal_magic_lut.sv @@ -0,0 +1,284 @@ +// Standard : Helper for JPEG-LS Annex A.5/A.7 reciprocal division pipeline +// Clause : N/A helper used by regular and run-mode NEAR quantization +// Figure : N/A +// Table : N/A +// Pseudocode : reciprocal_magic = ceil(2^24 / (2*NEAR+1)) +// Trace : docs/jls_traceability.md#regular-error-quantization +// Example : NEAR=1 gives reciprocal_magic=5592406. +// +// Shared reciprocal-magic lookup for the exact divide-by-(2*NEAR+1) pipeline. +// A common LUT keeps the large constant table out of the two datapath modules. + +`default_nettype none + +module jls_near_reciprocal_magic_lut ( + input var logic [7:0] near_i, + output logic [22:0] reciprocal_magic_o +); + + always_comb begin + reciprocal_magic_o = 23'd0; + case (near_i) + 8'd0: begin reciprocal_magic_o = 23'd0; end + 8'd1: begin reciprocal_magic_o = 23'd5592406; end + 8'd2: begin reciprocal_magic_o = 23'd3355444; end + 8'd3: begin reciprocal_magic_o = 23'd2396746; end + 8'd4: begin reciprocal_magic_o = 23'd1864136; end + 8'd5: begin reciprocal_magic_o = 23'd1525202; end + 8'd6: begin reciprocal_magic_o = 23'd1290556; end + 8'd7: begin reciprocal_magic_o = 23'd1118482; end + 8'd8: begin reciprocal_magic_o = 23'd986896; end + 8'd9: begin reciprocal_magic_o = 23'd883012; end + 8'd10: begin reciprocal_magic_o = 23'd798916; end + 8'd11: begin reciprocal_magic_o = 23'd729445; end + 8'd12: begin reciprocal_magic_o = 23'd671089; end + 8'd13: begin reciprocal_magic_o = 23'd621379; end + 8'd14: begin reciprocal_magic_o = 23'd578525; end + 8'd15: begin reciprocal_magic_o = 23'd541201; end + 8'd16: begin reciprocal_magic_o = 23'd508401; end + 8'd17: begin reciprocal_magic_o = 23'd479350; end + 8'd18: begin reciprocal_magic_o = 23'd453439; end + 8'd19: begin reciprocal_magic_o = 23'd430186; end + 8'd20: begin reciprocal_magic_o = 23'd409201; end + 8'd21: begin reciprocal_magic_o = 23'd390168; end + 8'd22: begin reciprocal_magic_o = 23'd372828; end + 8'd23: begin reciprocal_magic_o = 23'd356963; end + 8'd24: begin reciprocal_magic_o = 23'd342393; end + 8'd25: begin reciprocal_magic_o = 23'd328966; end + 8'd26: begin reciprocal_magic_o = 23'd316552; end + 8'd27: begin reciprocal_magic_o = 23'd305041; end + 8'd28: begin reciprocal_magic_o = 23'd294338; end + 8'd29: begin reciprocal_magic_o = 23'd284360; end + 8'd30: begin reciprocal_magic_o = 23'd275037; end + 8'd31: begin reciprocal_magic_o = 23'd266306; end + 8'd32: begin reciprocal_magic_o = 23'd258112; end + 8'd33: begin reciprocal_magic_o = 23'd250407; end + 8'd34: begin reciprocal_magic_o = 23'd243149; end + 8'd35: begin reciprocal_magic_o = 23'd236299; end + 8'd36: begin reciprocal_magic_o = 23'd229825; end + 8'd37: begin reciprocal_magic_o = 23'd223697; end + 8'd38: begin reciprocal_magic_o = 23'd217886; end + 8'd39: begin reciprocal_magic_o = 23'd212370; end + 8'd40: begin reciprocal_magic_o = 23'd207127; end + 8'd41: begin reciprocal_magic_o = 23'd202136; end + 8'd42: begin reciprocal_magic_o = 23'd197380; end + 8'd43: begin reciprocal_magic_o = 23'd192842; end + 8'd44: begin reciprocal_magic_o = 23'd188509; end + 8'd45: begin reciprocal_magic_o = 23'd184366; end + 8'd46: begin reciprocal_magic_o = 23'd180401; end + 8'd47: begin reciprocal_magic_o = 23'd176603; end + 8'd48: begin reciprocal_magic_o = 23'd172961; end + 8'd49: begin reciprocal_magic_o = 23'd169467; end + 8'd50: begin reciprocal_magic_o = 23'd166112; end + 8'd51: begin reciprocal_magic_o = 23'd162886; end + 8'd52: begin reciprocal_magic_o = 23'd159784; end + 8'd53: begin reciprocal_magic_o = 23'd156797; end + 8'd54: begin reciprocal_magic_o = 23'd153920; end + 8'd55: begin reciprocal_magic_o = 23'd151147; end + 8'd56: begin reciprocal_magic_o = 23'd148471; end + 8'd57: begin reciprocal_magic_o = 23'd145889; end + 8'd58: begin reciprocal_magic_o = 23'd143396; end + 8'd59: begin reciprocal_magic_o = 23'd140986; end + 8'd60: begin reciprocal_magic_o = 23'd138655; end + 8'd61: begin reciprocal_magic_o = 23'd136401; end + 8'd62: begin reciprocal_magic_o = 23'd134218; end + 8'd63: begin reciprocal_magic_o = 23'd132105; end + 8'd64: begin reciprocal_magic_o = 23'd130056; end + 8'd65: begin reciprocal_magic_o = 23'd128071; end + 8'd66: begin reciprocal_magic_o = 23'd126145; end + 8'd67: begin reciprocal_magic_o = 23'd124276; end + 8'd68: begin reciprocal_magic_o = 23'd122462; end + 8'd69: begin reciprocal_magic_o = 23'd120700; end + 8'd70: begin reciprocal_magic_o = 23'd118988; end + 8'd71: begin reciprocal_magic_o = 23'd117324; end + 8'd72: begin reciprocal_magic_o = 23'd115705; end + 8'd73: begin reciprocal_magic_o = 23'd114131; end + 8'd74: begin reciprocal_magic_o = 23'd112599; end + 8'd75: begin reciprocal_magic_o = 23'd111108; end + 8'd76: begin reciprocal_magic_o = 23'd109656; end + 8'd77: begin reciprocal_magic_o = 23'd108241; end + 8'd78: begin reciprocal_magic_o = 23'd106862; end + 8'd79: begin reciprocal_magic_o = 23'd105518; end + 8'd80: begin reciprocal_magic_o = 23'd104207; end + 8'd81: begin reciprocal_magic_o = 23'd102928; end + 8'd82: begin reciprocal_magic_o = 23'd101681; end + 8'd83: begin reciprocal_magic_o = 23'd100463; end + 8'd84: begin reciprocal_magic_o = 23'd99274; end + 8'd85: begin reciprocal_magic_o = 23'd98113; end + 8'd86: begin reciprocal_magic_o = 23'd96979; end + 8'd87: begin reciprocal_magic_o = 23'd95870; end + 8'd88: begin reciprocal_magic_o = 23'd94787; end + 8'd89: begin reciprocal_magic_o = 23'd93728; end + 8'd90: begin reciprocal_magic_o = 23'd92692; end + 8'd91: begin reciprocal_magic_o = 23'd91679; end + 8'd92: begin reciprocal_magic_o = 23'd90688; end + 8'd93: begin reciprocal_magic_o = 23'd89718; end + 8'd94: begin reciprocal_magic_o = 23'd88769; end + 8'd95: begin reciprocal_magic_o = 23'd87839; end + 8'd96: begin reciprocal_magic_o = 23'd86929; end + 8'd97: begin reciprocal_magic_o = 23'd86038; end + 8'd98: begin reciprocal_magic_o = 23'd85164; end + 8'd99: begin reciprocal_magic_o = 23'd84308; end + 8'd100: begin reciprocal_magic_o = 23'd83469; end + 8'd101: begin reciprocal_magic_o = 23'd82647; end + 8'd102: begin reciprocal_magic_o = 23'd81841; end + 8'd103: begin reciprocal_magic_o = 23'd81050; end + 8'd104: begin reciprocal_magic_o = 23'd80274; end + 8'd105: begin reciprocal_magic_o = 23'd79513; end + 8'd106: begin reciprocal_magic_o = 23'd78767; end + 8'd107: begin reciprocal_magic_o = 23'd78034; end + 8'd108: begin reciprocal_magic_o = 23'd77315; end + 8'd109: begin reciprocal_magic_o = 23'd76609; end + 8'd110: begin reciprocal_magic_o = 23'd75916; end + 8'd111: begin reciprocal_magic_o = 23'd75235; end + 8'd112: begin reciprocal_magic_o = 23'd74566; end + 8'd113: begin reciprocal_magic_o = 23'd73909; end + 8'd114: begin reciprocal_magic_o = 23'd73263; end + 8'd115: begin reciprocal_magic_o = 23'd72629; end + 8'd116: begin reciprocal_magic_o = 23'd72006; end + 8'd117: begin reciprocal_magic_o = 23'd71393; end + 8'd118: begin reciprocal_magic_o = 23'd70790; end + 8'd119: begin reciprocal_magic_o = 23'd70198; end + 8'd120: begin reciprocal_magic_o = 23'd69616; end + 8'd121: begin reciprocal_magic_o = 23'd69043; end + 8'd122: begin reciprocal_magic_o = 23'd68479; end + 8'd123: begin reciprocal_magic_o = 23'd67924; end + 8'd124: begin reciprocal_magic_o = 23'd67379; end + 8'd125: begin reciprocal_magic_o = 23'd66842; end + 8'd126: begin reciprocal_magic_o = 23'd66314; end + 8'd127: begin reciprocal_magic_o = 23'd65794; end + 8'd128: begin reciprocal_magic_o = 23'd65281; end + 8'd129: begin reciprocal_magic_o = 23'd64777; end + 8'd130: begin reciprocal_magic_o = 23'd64281; end + 8'd131: begin reciprocal_magic_o = 23'd63792; end + 8'd132: begin reciprocal_magic_o = 23'd63311; end + 8'd133: begin reciprocal_magic_o = 23'd62837; end + 8'd134: begin reciprocal_magic_o = 23'd62369; end + 8'd135: begin reciprocal_magic_o = 23'd61909; end + 8'd136: begin reciprocal_magic_o = 23'd61456; end + 8'd137: begin reciprocal_magic_o = 23'd61009; end + 8'd138: begin reciprocal_magic_o = 23'd60568; end + 8'd139: begin reciprocal_magic_o = 23'd60134; end + 8'd140: begin reciprocal_magic_o = 23'd59706; end + 8'd141: begin reciprocal_magic_o = 23'd59284; end + 8'd142: begin reciprocal_magic_o = 23'd58868; end + 8'd143: begin reciprocal_magic_o = 23'd58458; end + 8'd144: begin reciprocal_magic_o = 23'd58053; end + 8'd145: begin reciprocal_magic_o = 23'd57654; end + 8'd146: begin reciprocal_magic_o = 23'd57261; end + 8'd147: begin reciprocal_magic_o = 23'd56872; end + 8'd148: begin reciprocal_magic_o = 23'd56489; end + 8'd149: begin reciprocal_magic_o = 23'd56112; end + 8'd150: begin reciprocal_magic_o = 23'd55739; end + 8'd151: begin reciprocal_magic_o = 23'd55371; end + 8'd152: begin reciprocal_magic_o = 23'd55008; end + 8'd153: begin reciprocal_magic_o = 23'd54649; end + 8'd154: begin reciprocal_magic_o = 23'd54296; end + 8'd155: begin reciprocal_magic_o = 23'd53947; end + 8'd156: begin reciprocal_magic_o = 23'd53602; end + 8'd157: begin reciprocal_magic_o = 23'd53262; end + 8'd158: begin reciprocal_magic_o = 23'd52925; end + 8'd159: begin reciprocal_magic_o = 23'd52594; end + 8'd160: begin reciprocal_magic_o = 23'd52266; end + 8'd161: begin reciprocal_magic_o = 23'd51942; end + 8'd162: begin reciprocal_magic_o = 23'd51623; end + 8'd163: begin reciprocal_magic_o = 23'd51307; end + 8'd164: begin reciprocal_magic_o = 23'd50995; end + 8'd165: begin reciprocal_magic_o = 23'd50687; end + 8'd166: begin reciprocal_magic_o = 23'd50383; end + 8'd167: begin reciprocal_magic_o = 23'd50082; end + 8'd168: begin reciprocal_magic_o = 23'd49785; end + 8'd169: begin reciprocal_magic_o = 23'd49491; end + 8'd170: begin reciprocal_magic_o = 23'd49201; end + 8'd171: begin reciprocal_magic_o = 23'd48914; end + 8'd172: begin reciprocal_magic_o = 23'd48630; end + 8'd173: begin reciprocal_magic_o = 23'd48350; end + 8'd174: begin reciprocal_magic_o = 23'd48073; end + 8'd175: begin reciprocal_magic_o = 23'd47799; end + 8'd176: begin reciprocal_magic_o = 23'd47528; end + 8'd177: begin reciprocal_magic_o = 23'd47260; end + 8'd178: begin reciprocal_magic_o = 23'd46996; end + 8'd179: begin reciprocal_magic_o = 23'd46734; end + 8'd180: begin reciprocal_magic_o = 23'd46475; end + 8'd181: begin reciprocal_magic_o = 23'd46219; end + 8'd182: begin reciprocal_magic_o = 23'd45965; end + 8'd183: begin reciprocal_magic_o = 23'd45715; end + 8'd184: begin reciprocal_magic_o = 23'd45467; end + 8'd185: begin reciprocal_magic_o = 23'd45222; end + 8'd186: begin reciprocal_magic_o = 23'd44980; end + 8'd187: begin reciprocal_magic_o = 23'd44740; end + 8'd188: begin reciprocal_magic_o = 23'd44502; end + 8'd189: begin reciprocal_magic_o = 23'd44268; end + 8'd190: begin reciprocal_magic_o = 23'd44035; end + 8'd191: begin reciprocal_magic_o = 23'd43805; end + 8'd192: begin reciprocal_magic_o = 23'd43578; end + 8'd193: begin reciprocal_magic_o = 23'd43352; end + 8'd194: begin reciprocal_magic_o = 23'd43130; end + 8'd195: begin reciprocal_magic_o = 23'd42909; end + 8'd196: begin reciprocal_magic_o = 23'd42691; end + 8'd197: begin reciprocal_magic_o = 23'd42474; end + 8'd198: begin reciprocal_magic_o = 23'd42260; end + 8'd199: begin reciprocal_magic_o = 23'd42049; end + 8'd200: begin reciprocal_magic_o = 23'd41839; end + 8'd201: begin reciprocal_magic_o = 23'd41631; end + 8'd202: begin reciprocal_magic_o = 23'd41426; end + 8'd203: begin reciprocal_magic_o = 23'd41222; end + 8'd204: begin reciprocal_magic_o = 23'd41021; end + 8'd205: begin reciprocal_magic_o = 23'd40821; end + 8'd206: begin reciprocal_magic_o = 23'd40623; end + 8'd207: begin reciprocal_magic_o = 23'd40428; end + 8'd208: begin reciprocal_magic_o = 23'd40234; end + 8'd209: begin reciprocal_magic_o = 23'd40042; end + 8'd210: begin reciprocal_magic_o = 23'd39851; end + 8'd211: begin reciprocal_magic_o = 23'd39663; end + 8'd212: begin reciprocal_magic_o = 23'd39476; end + 8'd213: begin reciprocal_magic_o = 23'd39291; end + 8'd214: begin reciprocal_magic_o = 23'd39108; end + 8'd215: begin reciprocal_magic_o = 23'd38927; end + 8'd216: begin reciprocal_magic_o = 23'd38747; end + 8'd217: begin reciprocal_magic_o = 23'd38569; end + 8'd218: begin reciprocal_magic_o = 23'd38392; end + 8'd219: begin reciprocal_magic_o = 23'd38217; end + 8'd220: begin reciprocal_magic_o = 23'd38044; end + 8'd221: begin reciprocal_magic_o = 23'd37872; end + 8'd222: begin reciprocal_magic_o = 23'd37702; end + 8'd223: begin reciprocal_magic_o = 23'd37533; end + 8'd224: begin reciprocal_magic_o = 23'd37366; end + 8'd225: begin reciprocal_magic_o = 23'd37201; end + 8'd226: begin reciprocal_magic_o = 23'd37036; end + 8'd227: begin reciprocal_magic_o = 23'd36874; end + 8'd228: begin reciprocal_magic_o = 23'd36712; end + 8'd229: begin reciprocal_magic_o = 23'd36552; end + 8'd230: begin reciprocal_magic_o = 23'd36394; end + 8'd231: begin reciprocal_magic_o = 23'd36236; end + 8'd232: begin reciprocal_magic_o = 23'd36081; end + 8'd233: begin reciprocal_magic_o = 23'd35926; end + 8'd234: begin reciprocal_magic_o = 23'd35773; end + 8'd235: begin reciprocal_magic_o = 23'd35621; end + 8'd236: begin reciprocal_magic_o = 23'd35470; end + 8'd237: begin reciprocal_magic_o = 23'd35321; end + 8'd238: begin reciprocal_magic_o = 23'd35173; end + 8'd239: begin reciprocal_magic_o = 23'd35026; end + 8'd240: begin reciprocal_magic_o = 23'd34880; end + 8'd241: begin reciprocal_magic_o = 23'd34736; end + 8'd242: begin reciprocal_magic_o = 23'd34593; end + 8'd243: begin reciprocal_magic_o = 23'd34451; end + 8'd244: begin reciprocal_magic_o = 23'd34310; end + 8'd245: begin reciprocal_magic_o = 23'd34170; end + 8'd246: begin reciprocal_magic_o = 23'd34031; end + 8'd247: begin reciprocal_magic_o = 23'd33894; end + 8'd248: begin reciprocal_magic_o = 23'd33757; end + 8'd249: begin reciprocal_magic_o = 23'd33622; end + 8'd250: begin reciprocal_magic_o = 23'd33488; end + 8'd251: begin reciprocal_magic_o = 23'd33355; end + 8'd252: begin reciprocal_magic_o = 23'd33223; end + 8'd253: begin reciprocal_magic_o = 23'd33092; end + 8'd254: begin reciprocal_magic_o = 23'd32962; end + 8'd255: begin reciprocal_magic_o = 23'd32833; end + default: begin reciprocal_magic_o = 23'd0; end + endcase + end + +endmodule + +`default_nettype wire diff --git a/fpga/verilog/jls_regular_error_quantizer.sv b/fpga/verilog/jls_regular_error_quantizer.sv index 4833176..35fcdd8 100644 --- a/fpga/verilog/jls_regular_error_quantizer.sv +++ b/fpga/verilog/jls_regular_error_quantizer.sv @@ -59,7 +59,7 @@ module jls_regular_error_quantizer #( input var logic [16:0] RANGE, input var logic [4:0] qbpp, input var logic [6:0] LIMIT, - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, // Quantized error event is valid. output logic err_valid, @@ -98,7 +98,7 @@ module jls_regular_error_quantizer #( localparam int RECIP_SHIFT = 24; localparam int RECIP_MAGIC_WIDTH = 23; localparam int RECIP_PRODUCT_WIDTH = DIV_WIDTH + RECIP_MAGIC_WIDTH; - localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 6; + localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 9; // State for the exact reciprocal-LUT division pipeline when NEAR > 0. typedef enum logic [3:0] { @@ -140,17 +140,17 @@ module jls_regular_error_quantizer #( logic [16:0] RANGE_latched; logic [4:0] qbpp_latched; logic [6:0] LIMIT_latched; - logic [5:0] NEAR_latched; + logic [7:0] NEAR_latched; logic signed [32:0] oriented_error_latched; logic quotient_negative_latched; // Reciprocal-division registers and combinational next values. logic [DIV_WIDTH-1:0] div_dividend; logic [DIV_WIDTH-1:0] div_quotient; - logic [5:0] div_denominator; + logic [8:0] div_denominator; logic [RECIP_MAGIC_WIDTH-1:0] div_magic; logic [RECIP_PRODUCT_WIDTH-1:0] div_product; - logic [5:0] divisor_small_next; + logic [8:0] divisor_small_next; logic [RECIP_MAGIC_WIDTH-1:0] reciprocal_magic_next; logic [RECIP_PRODUCT_WIDTH-1:0] div_dividend_product_ext; logic [RECIP_PRODUCT_WIDTH-1:0] div_magic_product_ext; @@ -190,7 +190,7 @@ module jls_regular_error_quantizer #( // pipeline stage for the odd-scale carry-chain multiplier used below. logic signed [32:0] sign_restored_Errval_latched; logic signed [32:0] sign_restored_mul_latched; - logic signed [6:0] near_scale_latched; + logic signed [9:0] near_scale_latched; logic signed [40:0] dequantized_error; logic signed [40:0] dequantized_error_latched; logic signed [40:0] reconstruction_base; @@ -220,7 +220,7 @@ module jls_regular_error_quantizer #( .OUTPUT_WIDTH(41) ) regular_recon_err_mul_i ( .multiplicand_i(sign_restored_mul_latched), - .near_scale_i(near_scale_latched[5:0]), + .near_scale_i(near_scale_latched[8:0]), .product_o(dequantized_error) ); @@ -229,10 +229,15 @@ module jls_regular_error_quantizer #( .OUTPUT_WIDTH(41) ) regular_recon_range_mul_i ( .multiplicand_i($signed({1'b0, RANGE_latched})), - .near_scale_i(near_scale_latched[5:0]), + .near_scale_i(near_scale_latched[8:0]), .product_o(range_scaled) ); + jls_near_reciprocal_magic_lut regular_err_recip_magic_lut_i ( + .near_i(NEAR_latched), + .reciprocal_magic_o(reciprocal_magic_next) + ); + always_comb begin output_slot_open = 1'b0; if (!err_valid || err_ready) begin @@ -277,8 +282,8 @@ module jls_regular_error_quantizer #( always_comb begin quotient_negative_next = 1'b1; - division_numerator_positive = oriented_error_latched + $signed({27'd0, NEAR_latched}); - division_numerator_negative = neg_oriented_error_next + $signed({27'd0, NEAR_latched}); + division_numerator_positive = oriented_error_latched + $signed({25'd0, NEAR_latched}); + division_numerator_negative = neg_oriented_error_next + $signed({25'd0, NEAR_latched}); division_numerator_next = division_numerator_negative[DIV_WIDTH-1:0]; if (oriented_error_latched > 33'sd0) begin quotient_negative_next = 1'b0; @@ -287,142 +292,7 @@ module jls_regular_error_quantizer #( end always_comb begin - divisor_small_next = {NEAR_latched[4:0], 1'b1}; - end - - always_comb begin - // ceil(2^24 / (2*NEAR+1)) for NEAR=1..31. The next pipeline stage - // corrects the possible +1 quotient overshoot by checking q*d > n. - reciprocal_magic_next = 23'd0; - case (NEAR_latched[4:0]) - 5'd1: begin - reciprocal_magic_next = 23'd5592406; - end - - 5'd2: begin - reciprocal_magic_next = 23'd3355444; - end - - 5'd3: begin - reciprocal_magic_next = 23'd2396746; - end - - 5'd4: begin - reciprocal_magic_next = 23'd1864136; - end - - 5'd5: begin - reciprocal_magic_next = 23'd1525202; - end - - 5'd6: begin - reciprocal_magic_next = 23'd1290556; - end - - 5'd7: begin - reciprocal_magic_next = 23'd1118482; - end - - 5'd8: begin - reciprocal_magic_next = 23'd986896; - end - - 5'd9: begin - reciprocal_magic_next = 23'd883012; - end - - 5'd10: begin - reciprocal_magic_next = 23'd798916; - end - - 5'd11: begin - reciprocal_magic_next = 23'd729445; - end - - 5'd12: begin - reciprocal_magic_next = 23'd671089; - end - - 5'd13: begin - reciprocal_magic_next = 23'd621379; - end - - 5'd14: begin - reciprocal_magic_next = 23'd578525; - end - - 5'd15: begin - reciprocal_magic_next = 23'd541201; - end - - 5'd16: begin - reciprocal_magic_next = 23'd508401; - end - - 5'd17: begin - reciprocal_magic_next = 23'd479350; - end - - 5'd18: begin - reciprocal_magic_next = 23'd453439; - end - - 5'd19: begin - reciprocal_magic_next = 23'd430186; - end - - 5'd20: begin - reciprocal_magic_next = 23'd409201; - end - - 5'd21: begin - reciprocal_magic_next = 23'd390168; - end - - 5'd22: begin - reciprocal_magic_next = 23'd372828; - end - - 5'd23: begin - reciprocal_magic_next = 23'd356963; - end - - 5'd24: begin - reciprocal_magic_next = 23'd342393; - end - - 5'd25: begin - reciprocal_magic_next = 23'd328966; - end - - 5'd26: begin - reciprocal_magic_next = 23'd316552; - end - - 5'd27: begin - reciprocal_magic_next = 23'd305041; - end - - 5'd28: begin - reciprocal_magic_next = 23'd294338; - end - - 5'd29: begin - reciprocal_magic_next = 23'd284360; - end - - 5'd30: begin - reciprocal_magic_next = 23'd275037; - end - - 5'd31: begin - reciprocal_magic_next = 23'd266306; - end - - default: begin - reciprocal_magic_next = 23'd0; - end - endcase + divisor_small_next = {NEAR_latched, 1'b1}; end always_comb begin @@ -439,9 +309,9 @@ module jls_regular_error_quantizer #( end always_comb begin - recip_quotient_est_ext = {{6{1'b0}}, recip_quotient_est}; + recip_quotient_est_ext = {{9{1'b0}}, recip_quotient_est}; recip_divisor_ext = {{DIV_WIDTH{1'b0}}, div_denominator}; - recip_dividend_ext = {{6{1'b0}}, div_dividend}; + recip_dividend_ext = {{9{1'b0}}, div_dividend}; end always_comb begin @@ -504,7 +374,7 @@ module jls_regular_error_quantizer #( always_comb begin maxval_ext = (41'sd1 <<< PIX_WIDTH) - 41'sd1; - near_ext = $signed({35'd0, NEAR_latched}); + near_ext = $signed({33'd0, NEAR_latched}); end always_comb begin @@ -544,12 +414,12 @@ module jls_regular_error_quantizer #( RANGE_latched <= 17'd0; qbpp_latched <= 5'd0; LIMIT_latched <= 7'd0; - NEAR_latched <= 6'd0; + NEAR_latched <= 8'd0; oriented_error_latched <= 33'sd0; quotient_negative_latched <= 1'b0; div_dividend <= {DIV_WIDTH{1'b0}}; div_quotient <= {DIV_WIDTH{1'b0}}; - div_denominator <= 6'd0; + div_denominator <= 9'd0; div_magic <= {RECIP_MAGIC_WIDTH{1'b0}}; div_product <= {RECIP_PRODUCT_WIDTH{1'b0}}; recip_quotient_est_latched <= {DIV_WIDTH{1'b0}}; @@ -559,7 +429,7 @@ module jls_regular_error_quantizer #( modulo_Errval_latched <= 33'sd0; sign_restored_Errval_latched <= 33'sd0; sign_restored_mul_latched <= 33'sd0; - near_scale_latched <= 7'sd1; + near_scale_latched <= 10'sd1; dequantized_error_latched <= 41'sd0; reconstruction_base_latched <= 41'sd0; reconstruction_sum_latched <= 41'sd0; @@ -641,7 +511,7 @@ module jls_regular_error_quantizer #( // the DSP B-input register used by the reciprocal multiply. quotient_negative_latched <= quotient_negative_next; - if (NEAR_latched == 6'd0) begin + if (NEAR_latched == 8'd0) begin div_quotient <= division_numerator_next; state <= STATE_ERRVAL_PREP; end else begin @@ -717,7 +587,7 @@ module jls_regular_error_quantizer #( // Stage note : Explicit operand registers give the reconstruction // odd-scale multiplier a clean input boundary before Errval*(2*NEAR+1). sign_restored_mul_latched <= sign_restored_Errval_latched; - near_scale_latched <= $signed({NEAR_latched, 1'b1}); + near_scale_latched <= $signed({1'b0, NEAR_latched, 1'b1}); state <= STATE_RECON_MUL; end diff --git a/fpga/verilog/jls_run_mode.sv b/fpga/verilog/jls_run_mode.sv index b0394eb..d9c61aa 100644 --- a/fpga/verilog/jls_run_mode.sv +++ b/fpga/verilog/jls_run_mode.sv @@ -42,7 +42,7 @@ module jls_run_mode #( input var logic [16:0] RANGE, input var logic [4:0] qbpp, input var logic [6:0] LIMIT, - input var logic [5:0] NEAR, + input var logic [7:0] NEAR, input var logic [15:0] RESET, // Run-segment input. run_length is the number of already reconstructed run @@ -95,7 +95,7 @@ module jls_run_mode #( localparam int RECIP_SHIFT = 24; localparam int RECIP_MAGIC_WIDTH = 23; localparam int RECIP_PRODUCT_WIDTH = DIV_WIDTH + RECIP_MAGIC_WIDTH; - localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 6; + localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 9; localparam int SAMPLE_EXT_PAD_WIDTH = 33 - PIX_WIDTH; localparam int RECON_EXT_PAD_WIDTH = 41 - PIX_WIDTH; localparam logic [6:0] MAX_CODE_BITS_VALUE = MAX_CODE_BITS; @@ -145,7 +145,7 @@ module jls_run_mode #( logic [16:0] RANGE_latched; logic [4:0] qbpp_latched; logic [6:0] LIMIT_latched; - logic [5:0] NEAR_latched; + logic [7:0] NEAR_latched; logic [15:0] RESET_latched; // Latched run segment and interruption fields. @@ -171,10 +171,10 @@ module jls_run_mode #( // possible +1 quotient overshoot before Annex A.7 mapping. logic [DIV_WIDTH-1:0] div_dividend; logic [DIV_WIDTH-1:0] div_quotient; - logic [5:0] div_denominator; + logic [8:0] div_denominator; logic [RECIP_MAGIC_WIDTH-1:0] div_magic; logic [RECIP_PRODUCT_WIDTH-1:0] div_product; - logic [5:0] divisor_small_next; + logic [8:0] divisor_small_next; logic [RECIP_MAGIC_WIDTH-1:0] reciprocal_magic_next; logic [RECIP_PRODUCT_WIDTH-1:0] div_dividend_product_ext; logic [RECIP_PRODUCT_WIDTH-1:0] div_magic_product_ext; @@ -246,7 +246,7 @@ module jls_run_mode #( // Registered reconstruction operands for sign_restored_Errval * (2*NEAR+1), // matching Annex A.7 interruption handling. logic signed [32:0] sign_restored_mul_latched; - logic signed [6:0] near_scale_latched; + logic signed [9:0] near_scale_latched; logic signed [40:0] dequantized_error; logic signed [40:0] dequantized_error_latched; logic signed [40:0] reconstruction_base; @@ -327,7 +327,7 @@ module jls_run_mode #( .OUTPUT_WIDTH(41) ) run_recon_err_mul_i ( .multiplicand_i(sign_restored_mul_latched), - .near_scale_i(near_scale_latched[5:0]), + .near_scale_i(near_scale_latched[8:0]), .product_o(dequantized_error) ); @@ -336,10 +336,15 @@ module jls_run_mode #( .OUTPUT_WIDTH(41) ) run_recon_range_mul_i ( .multiplicand_i($signed({1'b0, RANGE_latched})), - .near_scale_i(near_scale_latched[5:0]), + .near_scale_i(near_scale_latched[8:0]), .product_o(range_scaled) ); + jls_near_reciprocal_magic_lut run_mode_recip_magic_lut_i ( + .near_i(NEAR_latched), + .reciprocal_magic_o(reciprocal_magic_next) + ); + // Loop index declared outside procedural blocks per project coding style. integer run_bit_index; @@ -492,7 +497,7 @@ module jls_run_mode #( sample_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, sample_latched}); Ra_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Ra_latched}); Rb_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rb_latched}); - near_ext33 = $signed({27'd0, NEAR_latched}); + near_ext33 = $signed({25'd0, NEAR_latched}); end always_comb begin @@ -542,8 +547,8 @@ module jls_run_mode #( always_comb begin quotient_negative_next = 1'b1; - division_numerator_positive = oriented_error_next + $signed({27'd0, NEAR_latched}); - division_numerator_negative = neg_oriented_error_next + $signed({27'd0, NEAR_latched}); + division_numerator_positive = oriented_error_next + $signed({25'd0, NEAR_latched}); + division_numerator_negative = neg_oriented_error_next + $signed({25'd0, NEAR_latched}); division_numerator_next = division_numerator_negative[DIV_WIDTH-1:0]; if (oriented_error_next > 33'sd0) begin quotient_negative_next = 1'b0; @@ -552,141 +557,7 @@ module jls_run_mode #( end always_comb begin - divisor_small_next = {NEAR_latched[4:0], 1'b1}; - end - - always_comb begin - // ceil(2^24 / (2*NEAR+1)) for NEAR=1..31. - reciprocal_magic_next = 23'd0; - case (NEAR_latched[4:0]) - 5'd1: begin - reciprocal_magic_next = 23'd5592406; - end - - 5'd2: begin - reciprocal_magic_next = 23'd3355444; - end - - 5'd3: begin - reciprocal_magic_next = 23'd2396746; - end - - 5'd4: begin - reciprocal_magic_next = 23'd1864136; - end - - 5'd5: begin - reciprocal_magic_next = 23'd1525202; - end - - 5'd6: begin - reciprocal_magic_next = 23'd1290556; - end - - 5'd7: begin - reciprocal_magic_next = 23'd1118482; - end - - 5'd8: begin - reciprocal_magic_next = 23'd986896; - end - - 5'd9: begin - reciprocal_magic_next = 23'd883012; - end - - 5'd10: begin - reciprocal_magic_next = 23'd798916; - end - - 5'd11: begin - reciprocal_magic_next = 23'd729445; - end - - 5'd12: begin - reciprocal_magic_next = 23'd671089; - end - - 5'd13: begin - reciprocal_magic_next = 23'd621379; - end - - 5'd14: begin - reciprocal_magic_next = 23'd578525; - end - - 5'd15: begin - reciprocal_magic_next = 23'd541201; - end - - 5'd16: begin - reciprocal_magic_next = 23'd508401; - end - - 5'd17: begin - reciprocal_magic_next = 23'd479350; - end - - 5'd18: begin - reciprocal_magic_next = 23'd453439; - end - - 5'd19: begin - reciprocal_magic_next = 23'd430186; - end - - 5'd20: begin - reciprocal_magic_next = 23'd409201; - end - - 5'd21: begin - reciprocal_magic_next = 23'd390168; - end - - 5'd22: begin - reciprocal_magic_next = 23'd372828; - end - - 5'd23: begin - reciprocal_magic_next = 23'd356963; - end - - 5'd24: begin - reciprocal_magic_next = 23'd342393; - end - - 5'd25: begin - reciprocal_magic_next = 23'd328966; - end - - 5'd26: begin - reciprocal_magic_next = 23'd316552; - end - - 5'd27: begin - reciprocal_magic_next = 23'd305041; - end - - 5'd28: begin - reciprocal_magic_next = 23'd294338; - end - - 5'd29: begin - reciprocal_magic_next = 23'd284360; - end - - 5'd30: begin - reciprocal_magic_next = 23'd275037; - end - - 5'd31: begin - reciprocal_magic_next = 23'd266306; - end - - default: begin - reciprocal_magic_next = 23'd0; - end - endcase + divisor_small_next = {NEAR_latched, 1'b1}; end always_comb begin @@ -703,9 +574,9 @@ module jls_run_mode #( end always_comb begin - recip_quotient_est_ext = {{6{1'b0}}, recip_quotient_est}; + recip_quotient_est_ext = {{9{1'b0}}, recip_quotient_est}; recip_divisor_ext = {{DIV_WIDTH{1'b0}}, div_denominator}; - recip_dividend_ext = {{6{1'b0}}, div_dividend}; + recip_dividend_ext = {{9{1'b0}}, div_dividend}; end always_comb begin @@ -775,7 +646,7 @@ module jls_run_mode #( always_comb begin maxval_ext = (41'sd1 <<< PIX_WIDTH) - 41'sd1; - near_ext41 = $signed({35'd0, NEAR_latched}); + near_ext41 = $signed({33'd0, NEAR_latched}); end always_comb begin @@ -1010,7 +881,7 @@ module jls_run_mode #( RANGE_latched <= 17'd0; qbpp_latched <= 5'd0; LIMIT_latched <= 7'd0; - NEAR_latched <= 6'd0; + NEAR_latched <= 8'd0; RESET_latched <= 16'd64; run_remaining <= 16'd0; run_end_of_line_latched <= 1'b0; @@ -1036,7 +907,7 @@ module jls_run_mode #( division_numerator_latched <= {DIV_WIDTH{1'b0}}; div_dividend <= {DIV_WIDTH{1'b0}}; div_quotient <= {DIV_WIDTH{1'b0}}; - div_denominator <= 6'd0; + div_denominator <= 9'd0; div_magic <= {RECIP_MAGIC_WIDTH{1'b0}}; div_product <= {RECIP_PRODUCT_WIDTH{1'b0}}; recip_quotient_est_latched <= {DIV_WIDTH{1'b0}}; @@ -1046,7 +917,7 @@ module jls_run_mode #( modulo_Errval_latched <= 33'sd0; sign_restored_Errval_latched <= 33'sd0; sign_restored_mul_latched <= 33'sd0; - near_scale_latched <= 7'sd1; + near_scale_latched <= 10'sd1; dequantized_error_latched <= 41'sd0; reconstruction_base_latched <= 41'sd0; reconstruction_sum_latched <= 41'sd0; @@ -1241,7 +1112,7 @@ module jls_run_mode #( end if (interruption_valid_latched) begin - if (NEAR_latched == 6'd0) begin + if (NEAR_latched == 8'd0) begin div_quotient <= division_numerator_latched; state_after_run_code <= STATE_ERRVAL_PREP; end else begin @@ -1337,7 +1208,7 @@ module jls_run_mode #( // Stage note : Explicit operand registers give the reconstruction // odd-scale multiplier a clean input boundary before Errval*(2*NEAR+1). sign_restored_mul_latched <= sign_restored_Errval_latched; - near_scale_latched <= $signed({NEAR_latched, 1'b1}); + near_scale_latched <= $signed({1'b0, NEAR_latched, 1'b1}); state <= STATE_CONTEXT_PREP; end diff --git a/fpga/verilog/jls_scan_ctrl.sv b/fpga/verilog/jls_scan_ctrl.sv index 567bd1b..3d4a2f0 100644 --- a/fpga/verilog/jls_scan_ctrl.sv +++ b/fpga/verilog/jls_scan_ctrl.sv @@ -62,7 +62,7 @@ module jls_scan_ctrl #( input var logic [3:0] active_ratio, // Dynamic NEAR value from jls_near_ctrl for non-first strips. - input var logic [5:0] current_near, + input var logic [7:0] current_near, // Pixel event forwarded to the predictor/context pipeline. output logic enc_pixel_valid, @@ -105,7 +105,7 @@ module jls_scan_ctrl #( output logic [12:0] strip_height, // NEAR value used by this strip frame. - output logic [5:0] strip_near, + output logic [7:0] strip_near, // Strip finish command after the last strip pixel enters the encode pipeline. output logic strip_finish_valid, @@ -145,7 +145,7 @@ module jls_scan_ctrl #( logic slot_image_first_pixel; logic slot_image_last_pixel; logic [12:0] slot_active_pic_col; - logic [5:0] slot_strip_near; + logic [7:0] slot_strip_near; // Independent readiness terms for input loading, strip commands, and encode // pipeline forwarding. @@ -163,7 +163,7 @@ module jls_scan_ctrl #( // First-strip NEAR must be zero even if jls_near_ctrl has not yet reset on // the same SOF pixel cycle. - logic [5:0] selected_strip_near; + logic [7:0] selected_strip_near; always_comb begin input_start_path_ready = 1'b1; @@ -275,7 +275,7 @@ module jls_scan_ctrl #( always_comb begin selected_strip_near = current_near; if (image_first_pixel) begin - selected_strip_near = 6'd0; + selected_strip_near = 8'd0; end end @@ -328,7 +328,7 @@ module jls_scan_ctrl #( slot_image_first_pixel <= 1'b0; slot_image_last_pixel <= 1'b0; slot_active_pic_col <= 13'd0; - slot_strip_near <= 6'd0; + slot_strip_near <= 8'd0; strip_pixel_count_running <= 32'd0; end else begin if (forward_slot) begin diff --git a/fpga/verilog/jpeg_ls_encoder_top.sv b/fpga/verilog/jpeg_ls_encoder_top.sv index 575b8a2..34d0bfa 100644 --- a/fpga/verilog/jpeg_ls_encoder_top.sv +++ b/fpga/verilog/jpeg_ls_encoder_top.sv @@ -31,7 +31,7 @@ module jpeg_ls_encoder_top #( parameter int SCAN_ROWS = 16, // Maximum dynamic NEAR value. - parameter int MAX_NEAR = 31, + parameter int MAX_NEAR = 255, // Internal output buffer capacity in bytes. parameter int OUT_BUF_BYTES = 8192, @@ -111,7 +111,7 @@ module jpeg_ls_encoder_top #( logic input_image_active; // Dynamic NEAR controller signals. - logic [5:0] current_near; + logic [7:0] current_near; logic [47:0] actual_bits_cumulative; logic [47:0] target_bits_cumulative; logic target_miss_at_max_near; @@ -138,7 +138,7 @@ module jpeg_ls_encoder_top #( logic scan_original_image_first_strip; logic [12:0] scan_strip_width; logic [12:0] scan_strip_height; - logic [5:0] scan_strip_near; + logic [7:0] scan_strip_near; logic scan_strip_finish_valid; logic scan_strip_finish_ready; logic scan_original_image_last_strip; @@ -148,7 +148,7 @@ module jpeg_ls_encoder_top #( logic strip_open_active; logic strip_start_accepted; logic strip_finish_accepted; - logic [5:0] active_strip_near; + logic [7:0] active_strip_near; logic [31:0] active_strip_pixel_count; // Preset and coding parameters for strip-start header and active pipeline. @@ -382,7 +382,7 @@ module jpeg_ls_encoder_top #( logic [16:0] run_mode_RANGE; logic [4:0] run_mode_qbpp; logic [6:0] run_mode_LIMIT; - logic [5:0] run_mode_NEAR; + logic [7:0] run_mode_NEAR; logic [15:0] run_mode_RESET; logic run_code_valid; logic run_code_ready; @@ -531,7 +531,7 @@ module jpeg_ls_encoder_top #( always_comb begin scan_strip_lossless_fast = 1'b0; - if (scan_strip_near == 6'd0) begin + if (scan_strip_near == 8'd0) begin scan_strip_lossless_fast = 1'b1; end end @@ -1520,7 +1520,7 @@ module jpeg_ls_encoder_top #( always_ff @(posedge clk) begin if (rst) begin strip_open_active <= 1'b0; - active_strip_near <= 6'd0; + active_strip_near <= 8'd0; active_strip_lossless_fast <= 1'b1; active_strip_pixel_count <= 32'd0; near_update_pending <= 1'b0; diff --git a/fpga/verilog/jpeg_ls_rtl.f b/fpga/verilog/jpeg_ls_rtl.f index 1172f8d..9618833 100644 --- a/fpga/verilog/jpeg_ls_rtl.f +++ b/fpga/verilog/jpeg_ls_rtl.f @@ -10,6 +10,7 @@ fpga/verilog/jls_context_quantizer.sv fpga/verilog/jls_context_model.sv fpga/verilog/jls_prediction_corrector.sv fpga/verilog/jls_near_scale_mul.sv +fpga/verilog/jls_near_reciprocal_magic_lut.sv fpga/verilog/jls_regular_error_quantizer.sv fpga/verilog/jls_header_writer.sv fpga/verilog/jls_near_ctrl.sv